[
  {
    "path": ".github/workflows/python-app.yml",
    "content": "# This workflow will install Python dependencies, run tests and lint with a single version of Python\n# For more information see: https://help.github.com/actions/language-and-framework-guides/using-python-with-github-actions\n\nname: Python application\n\non:\n  push:\n    branches: [ master ]\njobs:\n  build:\n    runs-on: ubuntu-latest\n    steps:\n    - uses: actions/checkout@v2\n    - name: Set up Python 3.8\n      uses: actions/setup-python@v2\n      with:\n        python-version: 3.8\n    - name: Install dependencies\n      run: |\n        python -m pip install --upgrade pip\n        pip install py2ipynb==0.0.5\n    - name: Test with py2ipynb\n      run: |\n        py2ipynb '*/*.py'\n    - name: Commit changes\n      uses: EndBug/add-and-commit@v4\n      with:\n        author_name: graykode\n        author_email: nlkey2022@gmail.com\n        message: \"Automatic convert from py to ipynb\"\n        add: \"*/*.ipynb\"\n      env:\n        GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}\n"
  },
  {
    "path": ".gitignore",
    "content": ".idea\n"
  },
  {
    "path": "1-1.NNLM/NNLM.ipynb",
    "content": "{\n  \"cells\": [\n    {\n      \"cell_type\": \"code\",\n      \"metadata\": {},\n      \"source\": [\n        \"# code by Tae Hwan Jung @graykode\\n\",\n        \"import torch\\n\",\n        \"import torch.nn as nn\\n\",\n        \"import torch.optim as optim\\n\",\n        \"\\n\",\n        \"def make_batch():\\n\",\n        \"    input_batch = []\\n\",\n        \"    target_batch = []\\n\",\n        \"\\n\",\n        \"    for sen in sentences:\\n\",\n        \"        word = sen.split() # space tokenizer\\n\",\n        \"        input = [word_dict[n] for n in word[:-1]] # create (1~n-1) as input\\n\",\n        \"        target = word_dict[word[-1]] # create (n) as target, We usually call this 'casual language model'\\n\",\n        \"\\n\",\n        \"        input_batch.append(input)\\n\",\n        \"        target_batch.append(target)\\n\",\n        \"\\n\",\n        \"    return input_batch, target_batch\\n\",\n        \"\\n\",\n        \"# Model\\n\",\n        \"class NNLM(nn.Module):\\n\",\n        \"    def __init__(self):\\n\",\n        \"        super(NNLM, self).__init__()\\n\",\n        \"        self.C = nn.Embedding(n_class, m)\\n\",\n        \"        self.H = nn.Linear(n_step * m, n_hidden, bias=False)\\n\",\n        \"        self.d = nn.Parameter(torch.ones(n_hidden))\\n\",\n        \"        self.U = nn.Linear(n_hidden, n_class, bias=False)\\n\",\n        \"        self.W = nn.Linear(n_step * m, n_class, bias=False)\\n\",\n        \"        self.b = nn.Parameter(torch.ones(n_class))\\n\",\n        \"\\n\",\n        \"    def forward(self, X):\\n\",\n        \"        X = self.C(X) # X : [batch_size, n_step, m]\\n\",\n        \"        X = X.view(-1, n_step * m) # [batch_size, n_step * m]\\n\",\n        \"        tanh = torch.tanh(self.d + self.H(X)) # [batch_size, n_hidden]\\n\",\n        \"        output = self.b + self.W(X) + self.U(tanh) # [batch_size, n_class]\\n\",\n        \"        return output\\n\",\n        \"\\n\",\n        \"if __name__ == '__main__':\\n\",\n        \"    n_step = 2 # number of steps, n-1 in paper\\n\",\n        \"    n_hidden = 2 # number of hidden size, h in paper\\n\",\n        \"    m = 2 # embedding size, m in paper\\n\",\n        \"\\n\",\n        \"    sentences = [\\\"i like dog\\\", \\\"i love coffee\\\", \\\"i hate milk\\\"]\\n\",\n        \"\\n\",\n        \"    word_list = \\\" \\\".join(sentences).split()\\n\",\n        \"    word_list = list(set(word_list))\\n\",\n        \"    word_dict = {w: i for i, w in enumerate(word_list)}\\n\",\n        \"    number_dict = {i: w for i, w in enumerate(word_list)}\\n\",\n        \"    n_class = len(word_dict)  # number of Vocabulary\\n\",\n        \"\\n\",\n        \"    model = NNLM()\\n\",\n        \"\\n\",\n        \"    criterion = nn.CrossEntropyLoss()\\n\",\n        \"    optimizer = optim.Adam(model.parameters(), lr=0.001)\\n\",\n        \"\\n\",\n        \"    input_batch, target_batch = make_batch()\\n\",\n        \"    input_batch = torch.LongTensor(input_batch)\\n\",\n        \"    target_batch = torch.LongTensor(target_batch)\\n\",\n        \"\\n\",\n        \"    # Training\\n\",\n        \"    for epoch in range(5000):\\n\",\n        \"        optimizer.zero_grad()\\n\",\n        \"        output = model(input_batch)\\n\",\n        \"\\n\",\n        \"        # output : [batch_size, n_class], target_batch : [batch_size]\\n\",\n        \"        loss = criterion(output, target_batch)\\n\",\n        \"        if (epoch + 1) % 1000 == 0:\\n\",\n        \"            print('Epoch:', '%04d' % (epoch + 1), 'cost =', '{:.6f}'.format(loss))\\n\",\n        \"\\n\",\n        \"        loss.backward()\\n\",\n        \"        optimizer.step()\\n\",\n        \"\\n\",\n        \"    # Predict\\n\",\n        \"    predict = model(input_batch).data.max(1, keepdim=True)[1]\\n\",\n        \"\\n\",\n        \"    # Test\\n\",\n        \"    print([sen.split()[:2] for sen in sentences], '->', [number_dict[n.item()] for n in predict.squeeze()])\"\n      ],\n      \"outputs\": [],\n      \"execution_count\": null\n    }\n  ],\n  \"metadata\": {\n    \"anaconda-cloud\": {},\n    \"kernelspec\": {\n      \"display_name\": \"Python 3\",\n      \"language\": \"python\",\n      \"name\": \"python3\"\n    },\n    \"language_info\": {\n      \"codemirror_mode\": {\n        \"name\": \"ipython\",\n        \"version\": 3\n      },\n      \"file_extension\": \".py\",\n      \"mimetype\": \"text/x-python\",\n      \"name\": \"python\",\n      \"nbconvert_exporter\": \"python\",\n      \"pygments_lexer\": \"ipython3\",\n      \"version\": \"3.6.1\"\n    }\n  },\n  \"nbformat\": 4,\n  \"nbformat_minor\": 4\n}"
  },
  {
    "path": "1-1.NNLM/NNLM.py",
    "content": "# %%\n# code by Tae Hwan Jung @graykode\nimport torch\nimport torch.nn as nn\nimport torch.optim as optim\n\ndef make_batch():\n    input_batch = []\n    target_batch = []\n\n    for sen in sentences:\n        word = sen.split() # space tokenizer\n        input = [word_dict[n] for n in word[:-1]] # create (1~n-1) as input\n        target = word_dict[word[-1]] # create (n) as target, We usually call this 'casual language model'\n\n        input_batch.append(input)\n        target_batch.append(target)\n\n    return input_batch, target_batch\n\n# Model\nclass NNLM(nn.Module):\n    def __init__(self):\n        super(NNLM, self).__init__()\n        self.C = nn.Embedding(n_class, m)\n        self.H = nn.Linear(n_step * m, n_hidden, bias=False)\n        self.d = nn.Parameter(torch.ones(n_hidden))\n        self.U = nn.Linear(n_hidden, n_class, bias=False)\n        self.W = nn.Linear(n_step * m, n_class, bias=False)\n        self.b = nn.Parameter(torch.ones(n_class))\n\n    def forward(self, X):\n        X = self.C(X) # X : [batch_size, n_step, m]\n        X = X.view(-1, n_step * m) # [batch_size, n_step * m]\n        tanh = torch.tanh(self.d + self.H(X)) # [batch_size, n_hidden]\n        output = self.b + self.W(X) + self.U(tanh) # [batch_size, n_class]\n        return output\n\nif __name__ == '__main__':\n    n_step = 2 # number of steps, n-1 in paper\n    n_hidden = 2 # number of hidden size, h in paper\n    m = 2 # embedding size, m in paper\n\n    sentences = [\"i like dog\", \"i love coffee\", \"i hate milk\"]\n\n    word_list = \" \".join(sentences).split()\n    word_list = list(set(word_list))\n    word_dict = {w: i for i, w in enumerate(word_list)}\n    number_dict = {i: w for i, w in enumerate(word_list)}\n    n_class = len(word_dict)  # number of Vocabulary\n\n    model = NNLM()\n\n    criterion = nn.CrossEntropyLoss()\n    optimizer = optim.Adam(model.parameters(), lr=0.001)\n\n    input_batch, target_batch = make_batch()\n    input_batch = torch.LongTensor(input_batch)\n    target_batch = torch.LongTensor(target_batch)\n\n    # Training\n    for epoch in range(5000):\n        optimizer.zero_grad()\n        output = model(input_batch)\n\n        # output : [batch_size, n_class], target_batch : [batch_size]\n        loss = criterion(output, target_batch)\n        if (epoch + 1) % 1000 == 0:\n            print('Epoch:', '%04d' % (epoch + 1), 'cost =', '{:.6f}'.format(loss))\n\n        loss.backward()\n        optimizer.step()\n\n    # Predict\n    predict = model(input_batch).data.max(1, keepdim=True)[1]\n\n    # Test\n    print([sen.split()[:2] for sen in sentences], '->', [number_dict[n.item()] for n in predict.squeeze()])"
  },
  {
    "path": "1-2.Word2Vec/Word2Vec-Skipgram(Softmax).ipynb",
    "content": "{\n  \"cells\": [\n    {\n      \"cell_type\": \"code\",\n      \"metadata\": {},\n      \"source\": [\n        \"# code by Tae Hwan Jung @graykode\\n\",\n        \"import numpy as np\\n\",\n        \"import torch\\n\",\n        \"import torch.nn as nn\\n\",\n        \"import torch.optim as optim\\n\",\n        \"import matplotlib.pyplot as plt\\n\",\n        \"\\n\",\n        \"def random_batch():\\n\",\n        \"    random_inputs = []\\n\",\n        \"    random_labels = []\\n\",\n        \"    random_index = np.random.choice(range(len(skip_grams)), batch_size, replace=False)\\n\",\n        \"\\n\",\n        \"    for i in random_index:\\n\",\n        \"        random_inputs.append(np.eye(voc_size)[skip_grams[i][0]])  # target\\n\",\n        \"        random_labels.append(skip_grams[i][1])  # context word\\n\",\n        \"\\n\",\n        \"    return random_inputs, random_labels\\n\",\n        \"\\n\",\n        \"# Model\\n\",\n        \"class Word2Vec(nn.Module):\\n\",\n        \"    def __init__(self):\\n\",\n        \"        super(Word2Vec, self).__init__()\\n\",\n        \"        # W and WT is not Traspose relationship\\n\",\n        \"        self.W = nn.Linear(voc_size, embedding_size, bias=False) # voc_size > embedding_size Weight\\n\",\n        \"        self.WT = nn.Linear(embedding_size, voc_size, bias=False) # embedding_size > voc_size Weight\\n\",\n        \"\\n\",\n        \"    def forward(self, X):\\n\",\n        \"        # X : [batch_size, voc_size]\\n\",\n        \"        hidden_layer = self.W(X) # hidden_layer : [batch_size, embedding_size]\\n\",\n        \"        output_layer = self.WT(hidden_layer) # output_layer : [batch_size, voc_size]\\n\",\n        \"        return output_layer\\n\",\n        \"\\n\",\n        \"if __name__ == '__main__':\\n\",\n        \"    batch_size = 2 # mini-batch size\\n\",\n        \"    embedding_size = 2 # embedding size\\n\",\n        \"\\n\",\n        \"    sentences = [\\\"apple banana fruit\\\", \\\"banana orange fruit\\\", \\\"orange banana fruit\\\",\\n\",\n        \"                 \\\"dog cat animal\\\", \\\"cat monkey animal\\\", \\\"monkey dog animal\\\"]\\n\",\n        \"\\n\",\n        \"    word_sequence = \\\" \\\".join(sentences).split()\\n\",\n        \"    word_list = \\\" \\\".join(sentences).split()\\n\",\n        \"    word_list = list(set(word_list))\\n\",\n        \"    word_dict = {w: i for i, w in enumerate(word_list)}\\n\",\n        \"    voc_size = len(word_list)\\n\",\n        \"\\n\",\n        \"    # Make skip gram of one size window\\n\",\n        \"    skip_grams = []\\n\",\n        \"    for i in range(1, len(word_sequence) - 1):\\n\",\n        \"        target = word_dict[word_sequence[i]]\\n\",\n        \"        context = [word_dict[word_sequence[i - 1]], word_dict[word_sequence[i + 1]]]\\n\",\n        \"        for w in context:\\n\",\n        \"            skip_grams.append([target, w])\\n\",\n        \"\\n\",\n        \"    model = Word2Vec()\\n\",\n        \"\\n\",\n        \"    criterion = nn.CrossEntropyLoss()\\n\",\n        \"    optimizer = optim.Adam(model.parameters(), lr=0.001)\\n\",\n        \"\\n\",\n        \"    # Training\\n\",\n        \"    for epoch in range(5000):\\n\",\n        \"        input_batch, target_batch = random_batch()\\n\",\n        \"        input_batch = torch.Tensor(input_batch)\\n\",\n        \"        target_batch = torch.LongTensor(target_batch)\\n\",\n        \"\\n\",\n        \"        optimizer.zero_grad()\\n\",\n        \"        output = model(input_batch)\\n\",\n        \"\\n\",\n        \"        # output : [batch_size, voc_size], target_batch : [batch_size] (LongTensor, not one-hot)\\n\",\n        \"        loss = criterion(output, target_batch)\\n\",\n        \"        if (epoch + 1) % 1000 == 0:\\n\",\n        \"            print('Epoch:', '%04d' % (epoch + 1), 'cost =', '{:.6f}'.format(loss))\\n\",\n        \"\\n\",\n        \"        loss.backward()\\n\",\n        \"        optimizer.step()\\n\",\n        \"\\n\",\n        \"    for i, label in enumerate(word_list):\\n\",\n        \"        W, WT = model.parameters()\\n\",\n        \"        x, y = W[0][i].item(), W[1][i].item()\\n\",\n        \"        plt.scatter(x, y)\\n\",\n        \"        plt.annotate(label, xy=(x, y), xytext=(5, 2), textcoords='offset points', ha='right', va='bottom')\\n\",\n        \"    plt.show()\\n\"\n      ],\n      \"outputs\": [],\n      \"execution_count\": null\n    }\n  ],\n  \"metadata\": {\n    \"anaconda-cloud\": {},\n    \"kernelspec\": {\n      \"display_name\": \"Python 3\",\n      \"language\": \"python\",\n      \"name\": \"python3\"\n    },\n    \"language_info\": {\n      \"codemirror_mode\": {\n        \"name\": \"ipython\",\n        \"version\": 3\n      },\n      \"file_extension\": \".py\",\n      \"mimetype\": \"text/x-python\",\n      \"name\": \"python\",\n      \"nbconvert_exporter\": \"python\",\n      \"pygments_lexer\": \"ipython3\",\n      \"version\": \"3.6.1\"\n    }\n  },\n  \"nbformat\": 4,\n  \"nbformat_minor\": 4\n}"
  },
  {
    "path": "1-2.Word2Vec/Word2Vec-Skipgram(Softmax).py",
    "content": "# %%\n# code by Tae Hwan Jung @graykode\nimport numpy as np\nimport torch\nimport torch.nn as nn\nimport torch.optim as optim\nimport matplotlib.pyplot as plt\n\ndef random_batch():\n    random_inputs = []\n    random_labels = []\n    random_index = np.random.choice(range(len(skip_grams)), batch_size, replace=False)\n\n    for i in random_index:\n        random_inputs.append(np.eye(voc_size)[skip_grams[i][0]])  # target\n        random_labels.append(skip_grams[i][1])  # context word\n\n    return random_inputs, random_labels\n\n# Model\nclass Word2Vec(nn.Module):\n    def __init__(self):\n        super(Word2Vec, self).__init__()\n        # W and WT is not Traspose relationship\n        self.W = nn.Linear(voc_size, embedding_size, bias=False) # voc_size > embedding_size Weight\n        self.WT = nn.Linear(embedding_size, voc_size, bias=False) # embedding_size > voc_size Weight\n\n    def forward(self, X):\n        # X : [batch_size, voc_size]\n        hidden_layer = self.W(X) # hidden_layer : [batch_size, embedding_size]\n        output_layer = self.WT(hidden_layer) # output_layer : [batch_size, voc_size]\n        return output_layer\n\nif __name__ == '__main__':\n    batch_size = 2 # mini-batch size\n    embedding_size = 2 # embedding size\n\n    sentences = [\"apple banana fruit\", \"banana orange fruit\", \"orange banana fruit\",\n                 \"dog cat animal\", \"cat monkey animal\", \"monkey dog animal\"]\n\n    word_sequence = \" \".join(sentences).split()\n    word_list = \" \".join(sentences).split()\n    word_list = list(set(word_list))\n    word_dict = {w: i for i, w in enumerate(word_list)}\n    voc_size = len(word_list)\n\n    # Make skip gram of one size window\n    skip_grams = []\n    for i in range(1, len(word_sequence) - 1):\n        target = word_dict[word_sequence[i]]\n        context = [word_dict[word_sequence[i - 1]], word_dict[word_sequence[i + 1]]]\n        for w in context:\n            skip_grams.append([target, w])\n\n    model = Word2Vec()\n\n    criterion = nn.CrossEntropyLoss()\n    optimizer = optim.Adam(model.parameters(), lr=0.001)\n\n    # Training\n    for epoch in range(5000):\n        input_batch, target_batch = random_batch()\n        input_batch = torch.Tensor(input_batch)\n        target_batch = torch.LongTensor(target_batch)\n\n        optimizer.zero_grad()\n        output = model(input_batch)\n\n        # output : [batch_size, voc_size], target_batch : [batch_size] (LongTensor, not one-hot)\n        loss = criterion(output, target_batch)\n        if (epoch + 1) % 1000 == 0:\n            print('Epoch:', '%04d' % (epoch + 1), 'cost =', '{:.6f}'.format(loss))\n\n        loss.backward()\n        optimizer.step()\n\n    for i, label in enumerate(word_list):\n        W, WT = model.parameters()\n        x, y = W[0][i].item(), W[1][i].item()\n        plt.scatter(x, y)\n        plt.annotate(label, xy=(x, y), xytext=(5, 2), textcoords='offset points', ha='right', va='bottom')\n    plt.show()\n"
  },
  {
    "path": "1-3.FastText/FastText.ipynb",
    "content": "{\n  \"nbformat\": 4,\n  \"nbformat_minor\": 0,\n  \"metadata\": {\n    \"colab\": {\n      \"name\": \"FastText.ipynb\",\n      \"version\": \"0.3.2\",\n      \"provenance\": [],\n      \"collapsed_sections\": []\n    },\n    \"kernelspec\": {\n      \"name\": \"python3\",\n      \"display_name\": \"Python 3\"\n    },\n    \"accelerator\": \"GPU\"\n  },\n  \"cells\": [\n    {\n      \"metadata\": {\n        \"id\": \"kg9kgMnGqYkU\",\n        \"colab_type\": \"text\"\n      },\n      \"cell_type\": \"markdown\",\n      \"source\": [\n        \"## Install [FastText](https://fasttext.cc/docs/en/supervised-tutorial.html)\"\n      ]\n    },\n    {\n      \"metadata\": {\n        \"id\": \"3Iod5UKTqZnC\",\n        \"colab_type\": \"code\",\n        \"colab\": {\n          \"base_uri\": \"https://localhost:8080/\",\n          \"height\": 11051\n        },\n        \"outputId\": \"b10c85c0-c4cf-4f0b-a30e-2207ae4512b2\"\n      },\n      \"cell_type\": \"code\",\n      \"source\": [\n        \"!wget https://github.com/facebookresearch/fastText/archive/0.2.0.zip\\n\",\n        \"!unzip 0.2.0.zip\\n\",\n        \"%cd fastText-0.2.0\\n\",\n        \"!make\"\n      ],\n      \"execution_count\": 1,\n      \"outputs\": [\n        {\n          \"output_type\": \"stream\",\n          \"text\": [\n            \"--2019-02-02 14:43:56--  https://github.com/facebookresearch/fastText/archive/0.2.0.zip\\n\",\n            \"Resolving github.com (github.com)... 140.82.118.3, 140.82.118.4\\n\",\n            \"Connecting to github.com (github.com)|140.82.118.3|:443... connected.\\n\",\n            \"HTTP request sent, awaiting response... 302 Found\\n\",\n            \"Location: https://codeload.github.com/facebookresearch/fastText/zip/0.2.0 [following]\\n\",\n            \"--2019-02-02 14:43:56--  https://codeload.github.com/facebookresearch/fastText/zip/0.2.0\\n\",\n            \"Resolving codeload.github.com (codeload.github.com)... 192.30.253.121, 192.30.253.120\\n\",\n            \"Connecting to codeload.github.com (codeload.github.com)|192.30.253.121|:443... connected.\\n\",\n            \"HTTP request sent, awaiting response... 200 OK\\n\",\n            \"Length: unspecified [application/zip]\\n\",\n            \"Saving to: ‘0.2.0.zip’\\n\",\n            \"\\n\",\n            \"0.2.0.zip               [    <=>             ]   4.10M  6.17MB/s    in 0.7s    \\n\",\n            \"\\n\",\n            \"2019-02-02 14:43:57 (6.17 MB/s) - ‘0.2.0.zip’ saved [4304799]\\n\",\n            \"\\n\",\n            \"Archive:  0.2.0.zip\\n\",\n            \"7842495a4d64c7a3bb4339d45d6e64321d002ed8\\n\",\n            \"   creating: fastText-0.2.0/\\n\",\n            \"   creating: fastText-0.2.0/.circleci/\\n\",\n            \"  inflating: fastText-0.2.0/.circleci/cmake_test.sh  \\n\",\n            \"  inflating: fastText-0.2.0/.circleci/config.yml  \\n\",\n            \"  inflating: fastText-0.2.0/.circleci/gcc_test.sh  \\n\",\n            \"  inflating: fastText-0.2.0/.circleci/pip_test.sh  \\n\",\n            \"  inflating: fastText-0.2.0/.circleci/pull_data.sh  \\n\",\n            \"  inflating: fastText-0.2.0/.circleci/python_test.sh  \\n\",\n            \"  inflating: fastText-0.2.0/.circleci/run_locally.sh  \\n\",\n            \"  inflating: fastText-0.2.0/.circleci/setup_circleimg.sh  \\n\",\n            \"  inflating: fastText-0.2.0/.circleci/setup_debian.sh  \\n\",\n            \"  inflating: fastText-0.2.0/.gitignore  \\n\",\n            \"  inflating: fastText-0.2.0/CMakeLists.txt  \\n\",\n            \"  inflating: fastText-0.2.0/CONTRIBUTING.md  \\n\",\n            \"  inflating: fastText-0.2.0/LICENSE  \\n\",\n            \"  inflating: fastText-0.2.0/MANIFEST.in  \\n\",\n            \"  inflating: fastText-0.2.0/Makefile  \\n\",\n            \"  inflating: fastText-0.2.0/README.md  \\n\",\n            \"   creating: fastText-0.2.0/alignment/\\n\",\n            \"  inflating: fastText-0.2.0/alignment/README.md  \\n\",\n            \"  inflating: fastText-0.2.0/alignment/align.py  \\n\",\n            \"  inflating: fastText-0.2.0/alignment/eval.py  \\n\",\n            \"  inflating: fastText-0.2.0/alignment/example.sh  \\n\",\n            \"  inflating: fastText-0.2.0/alignment/utils.py  \\n\",\n            \"  inflating: fastText-0.2.0/classification-example.sh  \\n\",\n            \"  inflating: fastText-0.2.0/classification-results.sh  \\n\",\n            \"   creating: fastText-0.2.0/docs/\\n\",\n            \"  inflating: fastText-0.2.0/docs/aligned-vectors.md  \\n\",\n            \"  inflating: fastText-0.2.0/docs/api.md  \\n\",\n            \"  inflating: fastText-0.2.0/docs/cheatsheet.md  \\n\",\n            \"  inflating: fastText-0.2.0/docs/crawl-vectors.md  \\n\",\n            \"  inflating: fastText-0.2.0/docs/dataset.md  \\n\",\n            \"  inflating: fastText-0.2.0/docs/english-vectors.md  \\n\",\n            \"  inflating: fastText-0.2.0/docs/faqs.md  \\n\",\n            \"  inflating: fastText-0.2.0/docs/language-identification.md  \\n\",\n            \"  inflating: fastText-0.2.0/docs/options.md  \\n\",\n            \"  inflating: fastText-0.2.0/docs/pretrained-vectors.md  \\n\",\n            \"  inflating: fastText-0.2.0/docs/references.md  \\n\",\n            \"  inflating: fastText-0.2.0/docs/supervised-models.md  \\n\",\n            \"  inflating: fastText-0.2.0/docs/supervised-tutorial.md  \\n\",\n            \"  inflating: fastText-0.2.0/docs/support.md  \\n\",\n            \"  inflating: fastText-0.2.0/docs/unsupervised-tutorials.md  \\n\",\n            \"  inflating: fastText-0.2.0/eval.py  \\n\",\n            \"  inflating: fastText-0.2.0/get-wikimedia.sh  \\n\",\n            \"  inflating: fastText-0.2.0/pretrained-vectors.md  \\n\",\n            \"   creating: fastText-0.2.0/python/\\n\",\n            \"  inflating: fastText-0.2.0/python/README.md  \\n\",\n            \"  inflating: fastText-0.2.0/python/README.rst  \\n\",\n            \"   creating: fastText-0.2.0/python/benchmarks/\\n\",\n            \"  inflating: fastText-0.2.0/python/benchmarks/README.rst  \\n\",\n            \"  inflating: fastText-0.2.0/python/benchmarks/get_word_vector.py  \\n\",\n            \"   creating: fastText-0.2.0/python/doc/\\n\",\n            \"   creating: fastText-0.2.0/python/doc/examples/\\n\",\n            \"  inflating: fastText-0.2.0/python/doc/examples/FastTextEmbeddingBag.py  \\n\",\n            \"  inflating: fastText-0.2.0/python/doc/examples/bin_to_vec.py  \\n\",\n            \"  inflating: fastText-0.2.0/python/doc/examples/compute_accuracy.py  \\n\",\n            \"  inflating: fastText-0.2.0/python/doc/examples/get_vocab.py  \\n\",\n            \"  inflating: fastText-0.2.0/python/doc/examples/train_supervised.py  \\n\",\n            \"  inflating: fastText-0.2.0/python/doc/examples/train_unsupervised.py  \\n\",\n            \"   creating: fastText-0.2.0/python/fastText/\\n\",\n            \"  inflating: fastText-0.2.0/python/fastText/FastText.py  \\n\",\n            \"  inflating: fastText-0.2.0/python/fastText/__init__.py  \\n\",\n            \"   creating: fastText-0.2.0/python/fastText/pybind/\\n\",\n            \"  inflating: fastText-0.2.0/python/fastText/pybind/fasttext_pybind.cc  \\n\",\n            \"   creating: fastText-0.2.0/python/fastText/tests/\\n\",\n            \"  inflating: fastText-0.2.0/python/fastText/tests/__init__.py  \\n\",\n            \"  inflating: fastText-0.2.0/python/fastText/tests/test_configurations.py  \\n\",\n            \"  inflating: fastText-0.2.0/python/fastText/tests/test_script.py  \\n\",\n            \"   creating: fastText-0.2.0/python/fastText/util/\\n\",\n            \"  inflating: fastText-0.2.0/python/fastText/util/__init__.py  \\n\",\n            \"  inflating: fastText-0.2.0/python/fastText/util/util.py  \\n\",\n            \"  inflating: fastText-0.2.0/quantization-example.sh  \\n\",\n            \"  inflating: fastText-0.2.0/runtests.py  \\n\",\n            \"   creating: fastText-0.2.0/scripts/\\n\",\n            \"   creating: fastText-0.2.0/scripts/kbcompletion/\\n\",\n            \"  inflating: fastText-0.2.0/scripts/kbcompletion/README.md  \\n\",\n            \"  inflating: fastText-0.2.0/scripts/kbcompletion/data.sh  \\n\",\n            \"  inflating: fastText-0.2.0/scripts/kbcompletion/eval.cpp  \\n\",\n            \"  inflating: fastText-0.2.0/scripts/kbcompletion/fb15k.sh  \\n\",\n            \"  inflating: fastText-0.2.0/scripts/kbcompletion/fb15k237.sh  \\n\",\n            \"  inflating: fastText-0.2.0/scripts/kbcompletion/svo.sh  \\n\",\n            \"  inflating: fastText-0.2.0/scripts/kbcompletion/wn18.sh  \\n\",\n            \"   creating: fastText-0.2.0/scripts/quantization/\\n\",\n            \"  inflating: fastText-0.2.0/scripts/quantization/quantization-results.sh  \\n\",\n            \" extracting: fastText-0.2.0/setup.cfg  \\n\",\n            \"  inflating: fastText-0.2.0/setup.py  \\n\",\n            \"   creating: fastText-0.2.0/src/\\n\",\n            \"  inflating: fastText-0.2.0/src/args.cc  \\n\",\n            \"  inflating: fastText-0.2.0/src/args.h  \\n\",\n            \"  inflating: fastText-0.2.0/src/dictionary.cc  \\n\",\n            \"  inflating: fastText-0.2.0/src/dictionary.h  \\n\",\n            \"  inflating: fastText-0.2.0/src/fasttext.cc  \\n\",\n            \"  inflating: fastText-0.2.0/src/fasttext.h  \\n\",\n            \"  inflating: fastText-0.2.0/src/main.cc  \\n\",\n            \"  inflating: fastText-0.2.0/src/matrix.cc  \\n\",\n            \"  inflating: fastText-0.2.0/src/matrix.h  \\n\",\n            \"  inflating: fastText-0.2.0/src/meter.cc  \\n\",\n            \"  inflating: fastText-0.2.0/src/meter.h  \\n\",\n            \"  inflating: fastText-0.2.0/src/model.cc  \\n\",\n            \"  inflating: fastText-0.2.0/src/model.h  \\n\",\n            \"  inflating: fastText-0.2.0/src/productquantizer.cc  \\n\",\n            \"  inflating: fastText-0.2.0/src/productquantizer.h  \\n\",\n            \"  inflating: fastText-0.2.0/src/qmatrix.cc  \\n\",\n            \"  inflating: fastText-0.2.0/src/qmatrix.h  \\n\",\n            \"  inflating: fastText-0.2.0/src/real.h  \\n\",\n            \"  inflating: fastText-0.2.0/src/utils.cc  \\n\",\n            \"  inflating: fastText-0.2.0/src/utils.h  \\n\",\n            \"  inflating: fastText-0.2.0/src/vector.cc  \\n\",\n            \"  inflating: fastText-0.2.0/src/vector.h  \\n\",\n            \"   creating: fastText-0.2.0/tests/\\n\",\n            \"  inflating: fastText-0.2.0/tests/fetch_test_data.sh  \\n\",\n            \"   creating: fastText-0.2.0/website/\\n\",\n            \"  inflating: fastText-0.2.0/website/README.md  \\n\",\n            \"   creating: fastText-0.2.0/website/blog/\\n\",\n            \"  inflating: fastText-0.2.0/website/blog/2016-08-18-blog-post.md  \\n\",\n            \"  inflating: fastText-0.2.0/website/blog/2017-05-02-blog-post.md  \\n\",\n            \"  inflating: fastText-0.2.0/website/blog/2017-10-02-blog-post.md  \\n\",\n            \"   creating: fastText-0.2.0/website/core/\\n\",\n            \"  inflating: fastText-0.2.0/website/core/Footer.js  \\n\",\n            \"  inflating: fastText-0.2.0/website/package.json  \\n\",\n            \"   creating: fastText-0.2.0/website/pages/\\n\",\n            \"   creating: fastText-0.2.0/website/pages/en/\\n\",\n            \"  inflating: fastText-0.2.0/website/pages/en/index.js  \\n\",\n            \"  inflating: fastText-0.2.0/website/sidebars.json  \\n\",\n            \"  inflating: fastText-0.2.0/website/siteConfig.js  \\n\",\n            \"   creating: fastText-0.2.0/website/static/\\n\",\n            \"   creating: fastText-0.2.0/website/static/docs/\\n\",\n            \"   creating: fastText-0.2.0/website/static/docs/en/\\n\",\n            \"   creating: fastText-0.2.0/website/static/docs/en/html/\\n\",\n            \" extracting: fastText-0.2.0/website/static/docs/en/html/.classfasttext_1_1QMatrix-members.html.i4eKqy  \\n\",\n            \"  inflating: fastText-0.2.0/website/static/docs/en/html/annotated.html  \\n\",\n            \"  inflating: fastText-0.2.0/website/static/docs/en/html/annotated_dup.js  \\n\",\n            \"  inflating: fastText-0.2.0/website/static/docs/en/html/args_8cc.html  \\n\",\n            \"  inflating: fastText-0.2.0/website/static/docs/en/html/args_8h.html  \\n\",\n            \"  inflating: fastText-0.2.0/website/static/docs/en/html/args_8h.js  \\n\",\n            \"  inflating: fastText-0.2.0/website/static/docs/en/html/args_8h_source.html  \\n\",\n            \" extracting: fastText-0.2.0/website/static/docs/en/html/bc_s.png  \\n\",\n            \"  inflating: fastText-0.2.0/website/static/docs/en/html/bdwn.png  \\n\",\n            \"  inflating: fastText-0.2.0/website/static/docs/en/html/classes.html  \\n\",\n            \"  inflating: fastText-0.2.0/website/static/docs/en/html/classfasttext_1_1Args-members.html  \\n\",\n            \"  inflating: fastText-0.2.0/website/static/docs/en/html/classfasttext_1_1Args.html  \\n\",\n            \"  inflating: fastText-0.2.0/website/static/docs/en/html/classfasttext_1_1Args.js  \\n\",\n            \"  inflating: fastText-0.2.0/website/static/docs/en/html/classfasttext_1_1Dictionary-members.html  \\n\",\n            \"  inflating: fastText-0.2.0/website/static/docs/en/html/classfasttext_1_1Dictionary.html  \\n\",\n            \"  inflating: fastText-0.2.0/website/static/docs/en/html/classfasttext_1_1Dictionary.js  \\n\",\n            \"  inflating: fastText-0.2.0/website/static/docs/en/html/classfasttext_1_1FastText-members.html  \\n\",\n            \"  inflating: fastText-0.2.0/website/static/docs/en/html/classfasttext_1_1FastText.html  \\n\",\n            \"  inflating: fastText-0.2.0/website/static/docs/en/html/classfasttext_1_1FastText.js  \\n\",\n            \"  inflating: fastText-0.2.0/website/static/docs/en/html/classfasttext_1_1Matrix-members.html  \\n\",\n            \"  inflating: fastText-0.2.0/website/static/docs/en/html/classfasttext_1_1Matrix.html  \\n\",\n            \"  inflating: fastText-0.2.0/website/static/docs/en/html/classfasttext_1_1Matrix.js  \\n\",\n            \"  inflating: fastText-0.2.0/website/static/docs/en/html/classfasttext_1_1Model-members.html  \\n\",\n            \"  inflating: fastText-0.2.0/website/static/docs/en/html/classfasttext_1_1Model.html  \\n\",\n            \"  inflating: fastText-0.2.0/website/static/docs/en/html/classfasttext_1_1Model.js  \\n\",\n            \"  inflating: fastText-0.2.0/website/static/docs/en/html/classfasttext_1_1ProductQuantizer-members.html  \\n\",\n            \"  inflating: fastText-0.2.0/website/static/docs/en/html/classfasttext_1_1ProductQuantizer.html  \\n\",\n            \"  inflating: fastText-0.2.0/website/static/docs/en/html/classfasttext_1_1ProductQuantizer.js  \\n\",\n            \"  inflating: fastText-0.2.0/website/static/docs/en/html/classfasttext_1_1QMatrix-members.html  \\n\",\n            \"  inflating: fastText-0.2.0/website/static/docs/en/html/classfasttext_1_1QMatrix.html  \\n\",\n            \"  inflating: fastText-0.2.0/website/static/docs/en/html/classfasttext_1_1QMatrix.js  \\n\",\n            \"  inflating: fastText-0.2.0/website/static/docs/en/html/classfasttext_1_1Vector-members.html  \\n\",\n            \"  inflating: fastText-0.2.0/website/static/docs/en/html/classfasttext_1_1Vector.html  \\n\",\n            \"  inflating: fastText-0.2.0/website/static/docs/en/html/classfasttext_1_1Vector.js  \\n\",\n            \"  inflating: fastText-0.2.0/website/static/docs/en/html/closed.png  \\n\",\n            \"  inflating: fastText-0.2.0/website/static/docs/en/html/dictionary_8cc.html  \\n\",\n            \"  inflating: fastText-0.2.0/website/static/docs/en/html/dictionary_8h.html  \\n\",\n            \"  inflating: fastText-0.2.0/website/static/docs/en/html/dictionary_8h.js  \\n\",\n            \"  inflating: fastText-0.2.0/website/static/docs/en/html/dictionary_8h_source.html  \\n\",\n            \"  inflating: fastText-0.2.0/website/static/docs/en/html/dir_68267d1309a1af8e8297ef4c3efbcdba.html  \\n\",\n            \"  inflating: fastText-0.2.0/website/static/docs/en/html/dir_68267d1309a1af8e8297ef4c3efbcdba.js  \\n\",\n            \" extracting: fastText-0.2.0/website/static/docs/en/html/doc.png  \\n\",\n            \"  inflating: fastText-0.2.0/website/static/docs/en/html/doxygen.css  \\n\",\n            \" extracting: fastText-0.2.0/website/static/docs/en/html/doxygen.png  \\n\",\n            \"  inflating: fastText-0.2.0/website/static/docs/en/html/dynsections.js  \\n\",\n            \"  inflating: fastText-0.2.0/website/static/docs/en/html/fasttext_8cc.html  \\n\",\n            \"  inflating: fastText-0.2.0/website/static/docs/en/html/fasttext_8h.html  \\n\",\n            \"  inflating: fastText-0.2.0/website/static/docs/en/html/fasttext_8h.js  \\n\",\n            \"  inflating: fastText-0.2.0/website/static/docs/en/html/fasttext_8h_source.html  \\n\",\n            \"  inflating: fastText-0.2.0/website/static/docs/en/html/favicon.png  \\n\",\n            \"  inflating: fastText-0.2.0/website/static/docs/en/html/files.html  \\n\",\n            \"  inflating: fastText-0.2.0/website/static/docs/en/html/files.js  \\n\",\n            \" extracting: fastText-0.2.0/website/static/docs/en/html/folderclosed.png  \\n\",\n            \" extracting: fastText-0.2.0/website/static/docs/en/html/folderopen.png  \\n\",\n            \"  inflating: fastText-0.2.0/website/static/docs/en/html/functions.html  \\n\",\n            \"  inflating: fastText-0.2.0/website/static/docs/en/html/functions_0x7e.html  \\n\",\n            \"  inflating: fastText-0.2.0/website/static/docs/en/html/functions_b.html  \\n\",\n            \"  inflating: fastText-0.2.0/website/static/docs/en/html/functions_c.html  \\n\",\n            \"  inflating: fastText-0.2.0/website/static/docs/en/html/functions_d.html  \\n\",\n            \"  inflating: fastText-0.2.0/website/static/docs/en/html/functions_dup.js  \\n\",\n            \"  inflating: fastText-0.2.0/website/static/docs/en/html/functions_e.html  \\n\",\n            \"  inflating: fastText-0.2.0/website/static/docs/en/html/functions_f.html  \\n\",\n            \"  inflating: fastText-0.2.0/website/static/docs/en/html/functions_func.html  \\n\",\n            \"  inflating: fastText-0.2.0/website/static/docs/en/html/functions_g.html  \\n\",\n            \"  inflating: fastText-0.2.0/website/static/docs/en/html/functions_h.html  \\n\",\n            \"  inflating: fastText-0.2.0/website/static/docs/en/html/functions_i.html  \\n\",\n            \"  inflating: fastText-0.2.0/website/static/docs/en/html/functions_k.html  \\n\",\n            \"  inflating: fastText-0.2.0/website/static/docs/en/html/functions_l.html  \\n\",\n            \"  inflating: fastText-0.2.0/website/static/docs/en/html/functions_m.html  \\n\",\n            \"  inflating: fastText-0.2.0/website/static/docs/en/html/functions_n.html  \\n\",\n            \"  inflating: fastText-0.2.0/website/static/docs/en/html/functions_o.html  \\n\",\n            \"  inflating: fastText-0.2.0/website/static/docs/en/html/functions_p.html  \\n\",\n            \"  inflating: fastText-0.2.0/website/static/docs/en/html/functions_q.html  \\n\",\n            \"  inflating: fastText-0.2.0/website/static/docs/en/html/functions_r.html  \\n\",\n            \"  inflating: fastText-0.2.0/website/static/docs/en/html/functions_s.html  \\n\",\n            \"  inflating: fastText-0.2.0/website/static/docs/en/html/functions_t.html  \\n\",\n            \"  inflating: fastText-0.2.0/website/static/docs/en/html/functions_u.html  \\n\",\n            \"  inflating: fastText-0.2.0/website/static/docs/en/html/functions_v.html  \\n\",\n            \"  inflating: fastText-0.2.0/website/static/docs/en/html/functions_vars.html  \\n\",\n            \"  inflating: fastText-0.2.0/website/static/docs/en/html/functions_w.html  \\n\",\n            \"  inflating: fastText-0.2.0/website/static/docs/en/html/functions_z.html  \\n\",\n            \"  inflating: fastText-0.2.0/website/static/docs/en/html/globals.html  \\n\",\n            \"  inflating: fastText-0.2.0/website/static/docs/en/html/globals_defs.html  \\n\",\n            \"  inflating: fastText-0.2.0/website/static/docs/en/html/globals_func.html  \\n\",\n            \"  inflating: fastText-0.2.0/website/static/docs/en/html/index.html  \\n\",\n            \"  inflating: fastText-0.2.0/website/static/docs/en/html/jquery.js  \\n\",\n            \"  inflating: fastText-0.2.0/website/static/docs/en/html/main_8cc.html  \\n\",\n            \"  inflating: fastText-0.2.0/website/static/docs/en/html/main_8cc.js  \\n\",\n            \"  inflating: fastText-0.2.0/website/static/docs/en/html/matrix_8cc.html  \\n\",\n            \"  inflating: fastText-0.2.0/website/static/docs/en/html/matrix_8h.html  \\n\",\n            \"  inflating: fastText-0.2.0/website/static/docs/en/html/matrix_8h_source.html  \\n\",\n            \"  inflating: fastText-0.2.0/website/static/docs/en/html/menu.js  \\n\",\n            \"  inflating: fastText-0.2.0/website/static/docs/en/html/menudata.js  \\n\",\n            \"  inflating: fastText-0.2.0/website/static/docs/en/html/model_8cc.html  \\n\",\n            \"  inflating: fastText-0.2.0/website/static/docs/en/html/model_8h.html  \\n\",\n            \"  inflating: fastText-0.2.0/website/static/docs/en/html/model_8h.js  \\n\",\n            \"  inflating: fastText-0.2.0/website/static/docs/en/html/model_8h_source.html  \\n\",\n            \"  inflating: fastText-0.2.0/website/static/docs/en/html/namespacefasttext.html  \\n\",\n            \"  inflating: fastText-0.2.0/website/static/docs/en/html/namespacefasttext.js  \\n\",\n            \"  inflating: fastText-0.2.0/website/static/docs/en/html/namespacefasttext_1_1utils.html  \\n\",\n            \"  inflating: fastText-0.2.0/website/static/docs/en/html/namespacemembers.html  \\n\",\n            \"  inflating: fastText-0.2.0/website/static/docs/en/html/namespacemembers_enum.html  \\n\",\n            \"  inflating: fastText-0.2.0/website/static/docs/en/html/namespacemembers_func.html  \\n\",\n            \"  inflating: fastText-0.2.0/website/static/docs/en/html/namespacemembers_type.html  \\n\",\n            \"  inflating: fastText-0.2.0/website/static/docs/en/html/namespaces.html  \\n\",\n            \"  inflating: fastText-0.2.0/website/static/docs/en/html/namespaces.js  \\n\",\n            \" extracting: fastText-0.2.0/website/static/docs/en/html/nav_f.png  \\n\",\n            \"  inflating: fastText-0.2.0/website/static/docs/en/html/nav_g.png  \\n\",\n            \"  inflating: fastText-0.2.0/website/static/docs/en/html/nav_h.png  \\n\",\n            \"  inflating: fastText-0.2.0/website/static/docs/en/html/navtree.css  \\n\",\n            \"  inflating: fastText-0.2.0/website/static/docs/en/html/navtree.js  \\n\",\n            \"  inflating: fastText-0.2.0/website/static/docs/en/html/navtreedata.js  \\n\",\n            \"  inflating: fastText-0.2.0/website/static/docs/en/html/navtreeindex0.js  \\n\",\n            \"  inflating: fastText-0.2.0/website/static/docs/en/html/navtreeindex1.js  \\n\",\n            \"  inflating: fastText-0.2.0/website/static/docs/en/html/open.png  \\n\",\n            \"  inflating: fastText-0.2.0/website/static/docs/en/html/productquantizer_8cc.html  \\n\",\n            \"  inflating: fastText-0.2.0/website/static/docs/en/html/productquantizer_8cc.js  \\n\",\n            \"  inflating: fastText-0.2.0/website/static/docs/en/html/productquantizer_8h.html  \\n\",\n            \"  inflating: fastText-0.2.0/website/static/docs/en/html/productquantizer_8h_source.html  \\n\",\n            \"  inflating: fastText-0.2.0/website/static/docs/en/html/qmatrix_8cc.html  \\n\",\n            \"  inflating: fastText-0.2.0/website/static/docs/en/html/qmatrix_8h.html  \\n\",\n            \"  inflating: fastText-0.2.0/website/static/docs/en/html/qmatrix_8h_source.html  \\n\",\n            \"  inflating: fastText-0.2.0/website/static/docs/en/html/real_8h.html  \\n\",\n            \"  inflating: fastText-0.2.0/website/static/docs/en/html/real_8h.js  \\n\",\n            \"  inflating: fastText-0.2.0/website/static/docs/en/html/real_8h_source.html  \\n\",\n            \"  inflating: fastText-0.2.0/website/static/docs/en/html/resize.js  \\n\",\n            \"   creating: fastText-0.2.0/website/static/docs/en/html/search/\\n\",\n            \" extracting: fastText-0.2.0/website/static/docs/en/html/search/.files_7.html.StRRNc  \\n\",\n            \" extracting: fastText-0.2.0/website/static/docs/en/html/search/.variables_a.html.1MGQ27  \\n\",\n            \"  inflating: fastText-0.2.0/website/static/docs/en/html/search/all_0.html  \\n\",\n            \"  inflating: fastText-0.2.0/website/static/docs/en/html/search/all_0.js  \\n\",\n            \"  inflating: fastText-0.2.0/website/static/docs/en/html/search/all_1.html  \\n\",\n            \"  inflating: fastText-0.2.0/website/static/docs/en/html/search/all_1.js  \\n\",\n            \"  inflating: fastText-0.2.0/website/static/docs/en/html/search/all_10.html  \\n\",\n            \"  inflating: fastText-0.2.0/website/static/docs/en/html/search/all_10.js  \\n\",\n            \"  inflating: fastText-0.2.0/website/static/docs/en/html/search/all_11.html  \\n\",\n            \"  inflating: fastText-0.2.0/website/static/docs/en/html/search/all_11.js  \\n\",\n            \"  inflating: fastText-0.2.0/website/static/docs/en/html/search/all_12.html  \\n\",\n            \"  inflating: fastText-0.2.0/website/static/docs/en/html/search/all_12.js  \\n\",\n            \"  inflating: fastText-0.2.0/website/static/docs/en/html/search/all_13.html  \\n\",\n            \"  inflating: fastText-0.2.0/website/static/docs/en/html/search/all_13.js  \\n\",\n            \"  inflating: fastText-0.2.0/website/static/docs/en/html/search/all_14.html  \\n\",\n            \"  inflating: fastText-0.2.0/website/static/docs/en/html/search/all_14.js  \\n\",\n            \"  inflating: fastText-0.2.0/website/static/docs/en/html/search/all_15.html  \\n\",\n            \"  inflating: fastText-0.2.0/website/static/docs/en/html/search/all_15.js  \\n\",\n            \"  inflating: fastText-0.2.0/website/static/docs/en/html/search/all_16.html  \\n\",\n            \"  inflating: fastText-0.2.0/website/static/docs/en/html/search/all_16.js  \\n\",\n            \"  inflating: fastText-0.2.0/website/static/docs/en/html/search/all_17.html  \\n\",\n            \"  inflating: fastText-0.2.0/website/static/docs/en/html/search/all_17.js  \\n\",\n            \"  inflating: fastText-0.2.0/website/static/docs/en/html/search/all_2.html  \\n\",\n            \"  inflating: fastText-0.2.0/website/static/docs/en/html/search/all_2.js  \\n\",\n            \"  inflating: fastText-0.2.0/website/static/docs/en/html/search/all_3.html  \\n\",\n            \"  inflating: fastText-0.2.0/website/static/docs/en/html/search/all_3.js  \\n\",\n            \"  inflating: fastText-0.2.0/website/static/docs/en/html/search/all_4.html  \\n\",\n            \"  inflating: fastText-0.2.0/website/static/docs/en/html/search/all_4.js  \\n\",\n            \"  inflating: fastText-0.2.0/website/static/docs/en/html/search/all_5.html  \\n\",\n            \"  inflating: fastText-0.2.0/website/static/docs/en/html/search/all_5.js  \\n\",\n            \"  inflating: fastText-0.2.0/website/static/docs/en/html/search/all_6.html  \\n\",\n            \"  inflating: fastText-0.2.0/website/static/docs/en/html/search/all_6.js  \\n\",\n            \"  inflating: fastText-0.2.0/website/static/docs/en/html/search/all_7.html  \\n\",\n            \"  inflating: fastText-0.2.0/website/static/docs/en/html/search/all_7.js  \\n\",\n            \"  inflating: fastText-0.2.0/website/static/docs/en/html/search/all_8.html  \\n\",\n            \"  inflating: fastText-0.2.0/website/static/docs/en/html/search/all_8.js  \\n\",\n            \"  inflating: fastText-0.2.0/website/static/docs/en/html/search/all_9.html  \\n\",\n            \"  inflating: fastText-0.2.0/website/static/docs/en/html/search/all_9.js  \\n\",\n            \"  inflating: fastText-0.2.0/website/static/docs/en/html/search/all_a.html  \\n\",\n            \"  inflating: fastText-0.2.0/website/static/docs/en/html/search/all_a.js  \\n\",\n            \"  inflating: fastText-0.2.0/website/static/docs/en/html/search/all_b.html  \\n\",\n            \"  inflating: fastText-0.2.0/website/static/docs/en/html/search/all_b.js  \\n\",\n            \"  inflating: fastText-0.2.0/website/static/docs/en/html/search/all_c.html  \\n\",\n            \"  inflating: fastText-0.2.0/website/static/docs/en/html/search/all_c.js  \\n\",\n            \"  inflating: fastText-0.2.0/website/static/docs/en/html/search/all_d.html  \\n\",\n            \"  inflating: fastText-0.2.0/website/static/docs/en/html/search/all_d.js  \\n\",\n            \"  inflating: fastText-0.2.0/website/static/docs/en/html/search/all_e.html  \\n\",\n            \"  inflating: fastText-0.2.0/website/static/docs/en/html/search/all_e.js  \\n\",\n            \"  inflating: fastText-0.2.0/website/static/docs/en/html/search/all_f.html  \\n\",\n            \"  inflating: fastText-0.2.0/website/static/docs/en/html/search/all_f.js  \\n\",\n            \"  inflating: fastText-0.2.0/website/static/docs/en/html/search/classes_0.html  \\n\",\n            \"  inflating: fastText-0.2.0/website/static/docs/en/html/search/classes_0.js  \\n\",\n            \"  inflating: fastText-0.2.0/website/static/docs/en/html/search/classes_1.html  \\n\",\n            \"  inflating: fastText-0.2.0/website/static/docs/en/html/search/classes_1.js  \\n\",\n            \"  inflating: fastText-0.2.0/website/static/docs/en/html/search/classes_2.html  \\n\",\n            \"  inflating: fastText-0.2.0/website/static/docs/en/html/search/classes_2.js  \\n\",\n            \"  inflating: fastText-0.2.0/website/static/docs/en/html/search/classes_3.html  \\n\",\n            \"  inflating: fastText-0.2.0/website/static/docs/en/html/search/classes_3.js  \\n\",\n            \"  inflating: fastText-0.2.0/website/static/docs/en/html/search/classes_4.html  \\n\",\n            \"  inflating: fastText-0.2.0/website/static/docs/en/html/search/classes_4.js  \\n\",\n            \"  inflating: fastText-0.2.0/website/static/docs/en/html/search/classes_5.html  \\n\",\n            \"  inflating: fastText-0.2.0/website/static/docs/en/html/search/classes_5.js  \\n\",\n            \"  inflating: fastText-0.2.0/website/static/docs/en/html/search/classes_6.html  \\n\",\n            \"  inflating: fastText-0.2.0/website/static/docs/en/html/search/classes_6.js  \\n\",\n            \"  inflating: fastText-0.2.0/website/static/docs/en/html/search/classes_7.html  \\n\",\n            \"  inflating: fastText-0.2.0/website/static/docs/en/html/search/classes_7.js  \\n\",\n            \"  inflating: fastText-0.2.0/website/static/docs/en/html/search/classes_8.html  \\n\",\n            \"  inflating: fastText-0.2.0/website/static/docs/en/html/search/classes_8.js  \\n\",\n            \" extracting: fastText-0.2.0/website/static/docs/en/html/search/close.png  \\n\",\n            \"  inflating: fastText-0.2.0/website/static/docs/en/html/search/defines_0.html  \\n\",\n            \"  inflating: fastText-0.2.0/website/static/docs/en/html/search/defines_0.js  \\n\",\n            \"  inflating: fastText-0.2.0/website/static/docs/en/html/search/defines_1.html  \\n\",\n            \"  inflating: fastText-0.2.0/website/static/docs/en/html/search/defines_1.js  \\n\",\n            \"  inflating: fastText-0.2.0/website/static/docs/en/html/search/defines_2.html  \\n\",\n            \"  inflating: fastText-0.2.0/website/static/docs/en/html/search/defines_2.js  \\n\",\n            \"  inflating: fastText-0.2.0/website/static/docs/en/html/search/defines_3.html  \\n\",\n            \"  inflating: fastText-0.2.0/website/static/docs/en/html/search/defines_3.js  \\n\",\n            \"  inflating: fastText-0.2.0/website/static/docs/en/html/search/enums_0.html  \\n\",\n            \"  inflating: fastText-0.2.0/website/static/docs/en/html/search/enums_0.js  \\n\",\n            \"  inflating: fastText-0.2.0/website/static/docs/en/html/search/enums_1.html  \\n\",\n            \"  inflating: fastText-0.2.0/website/static/docs/en/html/search/enums_1.js  \\n\",\n            \"  inflating: fastText-0.2.0/website/static/docs/en/html/search/enums_2.html  \\n\",\n            \"  inflating: fastText-0.2.0/website/static/docs/en/html/search/enums_2.js  \\n\",\n            \"  inflating: fastText-0.2.0/website/static/docs/en/html/search/enumvalues_0.html  \\n\",\n            \"  inflating: fastText-0.2.0/website/static/docs/en/html/search/enumvalues_0.js  \\n\",\n            \"  inflating: fastText-0.2.0/website/static/docs/en/html/search/enumvalues_1.html  \\n\",\n            \"  inflating: fastText-0.2.0/website/static/docs/en/html/search/enumvalues_1.js  \\n\",\n            \"  inflating: fastText-0.2.0/website/static/docs/en/html/search/enumvalues_2.html  \\n\",\n            \"  inflating: fastText-0.2.0/website/static/docs/en/html/search/enumvalues_2.js  \\n\",\n            \"  inflating: fastText-0.2.0/website/static/docs/en/html/search/enumvalues_3.html  \\n\",\n            \"  inflating: fastText-0.2.0/website/static/docs/en/html/search/enumvalues_3.js  \\n\",\n            \"  inflating: fastText-0.2.0/website/static/docs/en/html/search/enumvalues_4.html  \\n\",\n            \"  inflating: fastText-0.2.0/website/static/docs/en/html/search/enumvalues_4.js  \\n\",\n            \"  inflating: fastText-0.2.0/website/static/docs/en/html/search/enumvalues_5.html  \\n\",\n            \"  inflating: fastText-0.2.0/website/static/docs/en/html/search/enumvalues_5.js  \\n\",\n            \"  inflating: fastText-0.2.0/website/static/docs/en/html/search/files_0.html  \\n\",\n            \"  inflating: fastText-0.2.0/website/static/docs/en/html/search/files_0.js  \\n\",\n            \"  inflating: fastText-0.2.0/website/static/docs/en/html/search/files_1.html  \\n\",\n            \"  inflating: fastText-0.2.0/website/static/docs/en/html/search/files_1.js  \\n\",\n            \"  inflating: fastText-0.2.0/website/static/docs/en/html/search/files_2.html  \\n\",\n            \"  inflating: fastText-0.2.0/website/static/docs/en/html/search/files_2.js  \\n\",\n            \"  inflating: fastText-0.2.0/website/static/docs/en/html/search/files_3.html  \\n\",\n            \"  inflating: fastText-0.2.0/website/static/docs/en/html/search/files_3.js  \\n\",\n            \"  inflating: fastText-0.2.0/website/static/docs/en/html/search/files_4.html  \\n\",\n            \"  inflating: fastText-0.2.0/website/static/docs/en/html/search/files_4.js  \\n\",\n            \"  inflating: fastText-0.2.0/website/static/docs/en/html/search/files_5.html  \\n\",\n            \"  inflating: fastText-0.2.0/website/static/docs/en/html/search/files_5.js  \\n\",\n            \"  inflating: fastText-0.2.0/website/static/docs/en/html/search/files_6.html  \\n\",\n            \"  inflating: fastText-0.2.0/website/static/docs/en/html/search/files_6.js  \\n\",\n            \"  inflating: fastText-0.2.0/website/static/docs/en/html/search/files_7.html  \\n\",\n            \"  inflating: fastText-0.2.0/website/static/docs/en/html/search/files_7.js  \\n\",\n            \"  inflating: fastText-0.2.0/website/static/docs/en/html/search/files_8.html  \\n\",\n            \"  inflating: fastText-0.2.0/website/static/docs/en/html/search/files_8.js  \\n\",\n            \"  inflating: fastText-0.2.0/website/static/docs/en/html/search/functions_0.html  \\n\",\n            \"  inflating: fastText-0.2.0/website/static/docs/en/html/search/functions_0.js  \\n\",\n            \"  inflating: fastText-0.2.0/website/static/docs/en/html/search/functions_1.html  \\n\",\n            \"  inflating: fastText-0.2.0/website/static/docs/en/html/search/functions_1.js  \\n\",\n            \"  inflating: fastText-0.2.0/website/static/docs/en/html/search/functions_10.html  \\n\",\n            \"  inflating: fastText-0.2.0/website/static/docs/en/html/search/functions_10.js  \\n\",\n            \"  inflating: fastText-0.2.0/website/static/docs/en/html/search/functions_11.html  \\n\",\n            \"  inflating: fastText-0.2.0/website/static/docs/en/html/search/functions_11.js  \\n\",\n            \"  inflating: fastText-0.2.0/website/static/docs/en/html/search/functions_12.html  \\n\",\n            \"  inflating: fastText-0.2.0/website/static/docs/en/html/search/functions_12.js  \\n\",\n            \"  inflating: fastText-0.2.0/website/static/docs/en/html/search/functions_13.html  \\n\",\n            \"  inflating: fastText-0.2.0/website/static/docs/en/html/search/functions_13.js  \\n\",\n            \"  inflating: fastText-0.2.0/website/static/docs/en/html/search/functions_14.html  \\n\",\n            \"  inflating: fastText-0.2.0/website/static/docs/en/html/search/functions_14.js  \\n\",\n            \"  inflating: fastText-0.2.0/website/static/docs/en/html/search/functions_15.html  \\n\",\n            \"  inflating: fastText-0.2.0/website/static/docs/en/html/search/functions_15.js  \\n\",\n            \"  inflating: fastText-0.2.0/website/static/docs/en/html/search/functions_16.html  \\n\",\n            \"  inflating: fastText-0.2.0/website/static/docs/en/html/search/functions_16.js  \\n\",\n            \"  inflating: fastText-0.2.0/website/static/docs/en/html/search/functions_17.html  \\n\",\n            \"  inflating: fastText-0.2.0/website/static/docs/en/html/search/functions_17.js  \\n\",\n            \"  inflating: fastText-0.2.0/website/static/docs/en/html/search/functions_2.html  \\n\",\n            \"  inflating: fastText-0.2.0/website/static/docs/en/html/search/functions_2.js  \\n\",\n            \"  inflating: fastText-0.2.0/website/static/docs/en/html/search/functions_3.html  \\n\",\n            \"  inflating: fastText-0.2.0/website/static/docs/en/html/search/functions_3.js  \\n\",\n            \"  inflating: fastText-0.2.0/website/static/docs/en/html/search/functions_4.html  \\n\",\n            \"  inflating: fastText-0.2.0/website/static/docs/en/html/search/functions_4.js  \\n\",\n            \"  inflating: fastText-0.2.0/website/static/docs/en/html/search/functions_5.html  \\n\",\n            \"  inflating: fastText-0.2.0/website/static/docs/en/html/search/functions_5.js  \\n\",\n            \"  inflating: fastText-0.2.0/website/static/docs/en/html/search/functions_6.html  \\n\",\n            \"  inflating: fastText-0.2.0/website/static/docs/en/html/search/functions_6.js  \\n\",\n            \"  inflating: fastText-0.2.0/website/static/docs/en/html/search/functions_7.html  \\n\",\n            \"  inflating: fastText-0.2.0/website/static/docs/en/html/search/functions_7.js  \\n\",\n            \"  inflating: fastText-0.2.0/website/static/docs/en/html/search/functions_8.html  \\n\",\n            \"  inflating: fastText-0.2.0/website/static/docs/en/html/search/functions_8.js  \\n\",\n            \"  inflating: fastText-0.2.0/website/static/docs/en/html/search/functions_9.html  \\n\",\n            \"  inflating: fastText-0.2.0/website/static/docs/en/html/search/functions_9.js  \\n\",\n            \"  inflating: fastText-0.2.0/website/static/docs/en/html/search/functions_a.html  \\n\",\n            \"  inflating: fastText-0.2.0/website/static/docs/en/html/search/functions_a.js  \\n\",\n            \"  inflating: fastText-0.2.0/website/static/docs/en/html/search/functions_b.html  \\n\",\n            \"  inflating: fastText-0.2.0/website/static/docs/en/html/search/functions_b.js  \\n\",\n            \"  inflating: fastText-0.2.0/website/static/docs/en/html/search/functions_c.html  \\n\",\n            \"  inflating: fastText-0.2.0/website/static/docs/en/html/search/functions_c.js  \\n\",\n            \"  inflating: fastText-0.2.0/website/static/docs/en/html/search/functions_d.html  \\n\",\n            \"  inflating: fastText-0.2.0/website/static/docs/en/html/search/functions_d.js  \\n\",\n            \"  inflating: fastText-0.2.0/website/static/docs/en/html/search/functions_e.html  \\n\",\n            \"  inflating: fastText-0.2.0/website/static/docs/en/html/search/functions_e.js  \\n\",\n            \"  inflating: fastText-0.2.0/website/static/docs/en/html/search/functions_f.html  \\n\",\n            \"  inflating: fastText-0.2.0/website/static/docs/en/html/search/functions_f.js  \\n\",\n            \" extracting: fastText-0.2.0/website/static/docs/en/html/search/mag_sel.png  \\n\",\n            \"  inflating: fastText-0.2.0/website/static/docs/en/html/search/namespaces_0.html  \\n\",\n            \"  inflating: fastText-0.2.0/website/static/docs/en/html/search/namespaces_0.js  \\n\",\n            \"  inflating: fastText-0.2.0/website/static/docs/en/html/search/nomatches.html  \\n\",\n            \"  inflating: fastText-0.2.0/website/static/docs/en/html/search/search.css  \\n\",\n            \"  inflating: fastText-0.2.0/website/static/docs/en/html/search/search.js  \\n\",\n            \" extracting: fastText-0.2.0/website/static/docs/en/html/search/search_l.png  \\n\",\n            \"  inflating: fastText-0.2.0/website/static/docs/en/html/search/search_m.png  \\n\",\n            \" extracting: fastText-0.2.0/website/static/docs/en/html/search/search_r.png  \\n\",\n            \"  inflating: fastText-0.2.0/website/static/docs/en/html/search/searchdata.js  \\n\",\n            \"  inflating: fastText-0.2.0/website/static/docs/en/html/search/typedefs_0.html  \\n\",\n            \"  inflating: fastText-0.2.0/website/static/docs/en/html/search/typedefs_0.js  \\n\",\n            \"  inflating: fastText-0.2.0/website/static/docs/en/html/search/typedefs_1.html  \\n\",\n            \"  inflating: fastText-0.2.0/website/static/docs/en/html/search/typedefs_1.js  \\n\",\n            \"  inflating: fastText-0.2.0/website/static/docs/en/html/search/variables_0.html  \\n\",\n            \"  inflating: fastText-0.2.0/website/static/docs/en/html/search/variables_0.js  \\n\",\n            \"  inflating: fastText-0.2.0/website/static/docs/en/html/search/variables_1.html  \\n\",\n            \"  inflating: fastText-0.2.0/website/static/docs/en/html/search/variables_1.js  \\n\",\n            \"  inflating: fastText-0.2.0/website/static/docs/en/html/search/variables_10.html  \\n\",\n            \"  inflating: fastText-0.2.0/website/static/docs/en/html/search/variables_10.js  \\n\",\n            \"  inflating: fastText-0.2.0/website/static/docs/en/html/search/variables_11.html  \\n\",\n            \"  inflating: fastText-0.2.0/website/static/docs/en/html/search/variables_11.js  \\n\",\n            \"  inflating: fastText-0.2.0/website/static/docs/en/html/search/variables_12.html  \\n\",\n            \"  inflating: fastText-0.2.0/website/static/docs/en/html/search/variables_12.js  \\n\",\n            \"  inflating: fastText-0.2.0/website/static/docs/en/html/search/variables_13.html  \\n\",\n            \"  inflating: fastText-0.2.0/website/static/docs/en/html/search/variables_13.js  \\n\",\n            \"  inflating: fastText-0.2.0/website/static/docs/en/html/search/variables_2.html  \\n\",\n            \"  inflating: fastText-0.2.0/website/static/docs/en/html/search/variables_2.js  \\n\",\n            \"  inflating: fastText-0.2.0/website/static/docs/en/html/search/variables_3.html  \\n\",\n            \"  inflating: fastText-0.2.0/website/static/docs/en/html/search/variables_3.js  \\n\",\n            \"  inflating: fastText-0.2.0/website/static/docs/en/html/search/variables_4.html  \\n\",\n            \"  inflating: fastText-0.2.0/website/static/docs/en/html/search/variables_4.js  \\n\",\n            \"  inflating: fastText-0.2.0/website/static/docs/en/html/search/variables_5.html  \\n\",\n            \"  inflating: fastText-0.2.0/website/static/docs/en/html/search/variables_5.js  \\n\",\n            \"  inflating: fastText-0.2.0/website/static/docs/en/html/search/variables_6.html  \\n\",\n            \"  inflating: fastText-0.2.0/website/static/docs/en/html/search/variables_6.js  \\n\",\n            \"  inflating: fastText-0.2.0/website/static/docs/en/html/search/variables_7.html  \\n\",\n            \"  inflating: fastText-0.2.0/website/static/docs/en/html/search/variables_7.js  \\n\",\n            \"  inflating: fastText-0.2.0/website/static/docs/en/html/search/variables_8.html  \\n\",\n            \"  inflating: fastText-0.2.0/website/static/docs/en/html/search/variables_8.js  \\n\",\n            \"  inflating: fastText-0.2.0/website/static/docs/en/html/search/variables_9.html  \\n\",\n            \"  inflating: fastText-0.2.0/website/static/docs/en/html/search/variables_9.js  \\n\",\n            \"  inflating: fastText-0.2.0/website/static/docs/en/html/search/variables_a.html  \\n\",\n            \"  inflating: fastText-0.2.0/website/static/docs/en/html/search/variables_a.js  \\n\",\n            \"  inflating: fastText-0.2.0/website/static/docs/en/html/search/variables_b.html  \\n\",\n            \"  inflating: fastText-0.2.0/website/static/docs/en/html/search/variables_b.js  \\n\",\n            \"  inflating: fastText-0.2.0/website/static/docs/en/html/search/variables_c.html  \\n\",\n            \"  inflating: fastText-0.2.0/website/static/docs/en/html/search/variables_c.js  \\n\",\n            \"  inflating: fastText-0.2.0/website/static/docs/en/html/search/variables_d.html  \\n\",\n            \"  inflating: fastText-0.2.0/website/static/docs/en/html/search/variables_d.js  \\n\",\n            \"  inflating: fastText-0.2.0/website/static/docs/en/html/search/variables_e.html  \\n\",\n            \"  inflating: fastText-0.2.0/website/static/docs/en/html/search/variables_e.js  \\n\",\n            \"  inflating: fastText-0.2.0/website/static/docs/en/html/search/variables_f.html  \\n\",\n            \"  inflating: fastText-0.2.0/website/static/docs/en/html/search/variables_f.js  \\n\",\n            \"  inflating: fastText-0.2.0/website/static/docs/en/html/splitbar.png  \\n\",\n            \"  inflating: fastText-0.2.0/website/static/docs/en/html/structfasttext_1_1Node-members.html  \\n\",\n            \"  inflating: fastText-0.2.0/website/static/docs/en/html/structfasttext_1_1Node.html  \\n\",\n            \"  inflating: fastText-0.2.0/website/static/docs/en/html/structfasttext_1_1Node.js  \\n\",\n            \"  inflating: fastText-0.2.0/website/static/docs/en/html/structfasttext_1_1entry-members.html  \\n\",\n            \"  inflating: fastText-0.2.0/website/static/docs/en/html/structfasttext_1_1entry.html  \\n\",\n            \"  inflating: fastText-0.2.0/website/static/docs/en/html/structfasttext_1_1entry.js  \\n\",\n            \" extracting: fastText-0.2.0/website/static/docs/en/html/sync_off.png  \\n\",\n            \" extracting: fastText-0.2.0/website/static/docs/en/html/sync_on.png  \\n\",\n            \" extracting: fastText-0.2.0/website/static/docs/en/html/tab_a.png  \\n\",\n            \" extracting: fastText-0.2.0/website/static/docs/en/html/tab_b.png  \\n\",\n            \" extracting: fastText-0.2.0/website/static/docs/en/html/tab_h.png  \\n\",\n            \" extracting: fastText-0.2.0/website/static/docs/en/html/tab_s.png  \\n\",\n            \"  inflating: fastText-0.2.0/website/static/docs/en/html/tabs.css  \\n\",\n            \"  inflating: fastText-0.2.0/website/static/docs/en/html/utils_8cc.html  \\n\",\n            \"  inflating: fastText-0.2.0/website/static/docs/en/html/utils_8cc.js  \\n\",\n            \"  inflating: fastText-0.2.0/website/static/docs/en/html/utils_8h.html  \\n\",\n            \"  inflating: fastText-0.2.0/website/static/docs/en/html/utils_8h.js  \\n\",\n            \"  inflating: fastText-0.2.0/website/static/docs/en/html/utils_8h_source.html  \\n\",\n            \"  inflating: fastText-0.2.0/website/static/docs/en/html/vector_8cc.html  \\n\",\n            \"  inflating: fastText-0.2.0/website/static/docs/en/html/vector_8cc.js  \\n\",\n            \"  inflating: fastText-0.2.0/website/static/docs/en/html/vector_8h.html  \\n\",\n            \"  inflating: fastText-0.2.0/website/static/docs/en/html/vector_8h.js  \\n\",\n            \"  inflating: fastText-0.2.0/website/static/docs/en/html/vector_8h_source.html  \\n\",\n            \"  inflating: fastText-0.2.0/website/static/fasttext.css  \\n\",\n            \"   creating: fastText-0.2.0/website/static/img/\\n\",\n            \"   creating: fastText-0.2.0/website/static/img/authors/\\n\",\n            \"  inflating: fastText-0.2.0/website/static/img/authors/armand_joulin.jpg  \\n\",\n            \"  inflating: fastText-0.2.0/website/static/img/authors/christian_puhrsch.png  \\n\",\n            \"  inflating: fastText-0.2.0/website/static/img/authors/edouard_grave.jpeg  \\n\",\n            \"  inflating: fastText-0.2.0/website/static/img/authors/piotr_bojanowski.jpg  \\n\",\n            \"  inflating: fastText-0.2.0/website/static/img/authors/tomas_mikolov.jpg  \\n\",\n            \"   creating: fastText-0.2.0/website/static/img/blog/\\n\",\n            \"  inflating: fastText-0.2.0/website/static/img/blog/2016-08-18-blog-post-img1.png  \\n\",\n            \"  inflating: fastText-0.2.0/website/static/img/blog/2016-08-18-blog-post-img2.png  \\n\",\n            \"  inflating: fastText-0.2.0/website/static/img/blog/2017-05-02-blog-post-img1.jpg  \\n\",\n            \"  inflating: fastText-0.2.0/website/static/img/blog/2017-05-02-blog-post-img2.jpg  \\n\",\n            \"  inflating: fastText-0.2.0/website/static/img/blog/2017-10-02-blog-post-img1.png  \\n\",\n            \"  inflating: fastText-0.2.0/website/static/img/cbo_vs_skipgram.png  \\n\",\n            \"  inflating: fastText-0.2.0/website/static/img/fasttext-icon-api.png  \\n\",\n            \"  inflating: fastText-0.2.0/website/static/img/fasttext-icon-bg-web.png  \\n\",\n            \"  inflating: fastText-0.2.0/website/static/img/fasttext-icon-color-square.png  \\n\",\n            \"  inflating: fastText-0.2.0/website/static/img/fasttext-icon-color-web.png  \\n\",\n            \"  inflating: fastText-0.2.0/website/static/img/fasttext-icon-faq.png  \\n\",\n            \"  inflating: fastText-0.2.0/website/static/img/fasttext-icon-tutorial.png  \\n\",\n            \"  inflating: fastText-0.2.0/website/static/img/fasttext-icon-white-web.png  \\n\",\n            \"  inflating: fastText-0.2.0/website/static/img/fasttext-logo-color-web.png  \\n\",\n            \"  inflating: fastText-0.2.0/website/static/img/fasttext-logo-white-web.png  \\n\",\n            \"  inflating: fastText-0.2.0/website/static/img/logo-color.png  \\n\",\n            \"  inflating: fastText-0.2.0/website/static/img/model-black.png  \\n\",\n            \"  inflating: fastText-0.2.0/website/static/img/model-blue.png  \\n\",\n            \"  inflating: fastText-0.2.0/website/static/img/model-red.png  \\n\",\n            \"  inflating: fastText-0.2.0/website/static/img/ogimage.png  \\n\",\n            \"  inflating: fastText-0.2.0/website/static/img/oss_logo.png  \\n\",\n            \"  inflating: fastText-0.2.0/wikifil.pl  \\n\",\n            \"  inflating: fastText-0.2.0/word-vector-example.sh  \\n\",\n            \"/content/fastText-0.2.0\\n\",\n            \"c++ -pthread -std=c++0x -march=native -O3 -funroll-loops -c src/args.cc\\n\",\n            \"c++ -pthread -std=c++0x -march=native -O3 -funroll-loops -c src/dictionary.cc\\n\",\n            \"c++ -pthread -std=c++0x -march=native -O3 -funroll-loops -c src/productquantizer.cc\\n\",\n            \"c++ -pthread -std=c++0x -march=native -O3 -funroll-loops -c src/matrix.cc\\n\",\n            \"c++ -pthread -std=c++0x -march=native -O3 -funroll-loops -c src/qmatrix.cc\\n\",\n            \"c++ -pthread -std=c++0x -march=native -O3 -funroll-loops -c src/vector.cc\\n\",\n            \"c++ -pthread -std=c++0x -march=native -O3 -funroll-loops -c src/model.cc\\n\",\n            \"c++ -pthread -std=c++0x -march=native -O3 -funroll-loops -c src/utils.cc\\n\",\n            \"c++ -pthread -std=c++0x -march=native -O3 -funroll-loops -c src/meter.cc\\n\",\n            \"c++ -pthread -std=c++0x -march=native -O3 -funroll-loops -c src/fasttext.cc\\n\",\n            \"\\u001b[01m\\u001b[Ksrc/fasttext.cc:\\u001b[m\\u001b[K In member function ‘\\u001b[01m\\u001b[Kvoid fasttext::FastText::quantize(const fasttext::Args&)\\u001b[m\\u001b[K’:\\n\",\n            \"\\u001b[01m\\u001b[Ksrc/fasttext.cc:302:45:\\u001b[m\\u001b[K \\u001b[01;35m\\u001b[Kwarning: \\u001b[m\\u001b[K‘\\u001b[01m\\u001b[Kstd::vector<int> fasttext::FastText::selectEmbeddings(int32_t) const\\u001b[m\\u001b[K’ is deprecated: selectEmbeddings is being deprecated. [\\u001b[01;35m\\u001b[K-Wdeprecated-declarations\\u001b[m\\u001b[K]\\n\",\n            \"     auto idx = selectEmbeddings(qargs.cutoff\\u001b[01;35m\\u001b[K)\\u001b[m\\u001b[K;\\n\",\n            \"                                             \\u001b[01;35m\\u001b[K^\\u001b[m\\u001b[K\\n\",\n            \"\\u001b[01m\\u001b[Ksrc/fasttext.cc:279:22:\\u001b[m\\u001b[K \\u001b[01;36m\\u001b[Knote: \\u001b[m\\u001b[Kdeclared here\\n\",\n            \" std::vector<int32_t> \\u001b[01;36m\\u001b[KFastText\\u001b[m\\u001b[K::selectEmbeddings(int32_t cutoff) const {\\n\",\n            \"                      \\u001b[01;36m\\u001b[K^~~~~~~~\\u001b[m\\u001b[K\\n\",\n            \"\\u001b[01m\\u001b[Ksrc/fasttext.cc:\\u001b[m\\u001b[K In member function ‘\\u001b[01m\\u001b[Kvoid fasttext::FastText::lazyComputeWordVectors()\\u001b[m\\u001b[K’:\\n\",\n            \"\\u001b[01m\\u001b[Ksrc/fasttext.cc:531:40:\\u001b[m\\u001b[K \\u001b[01;35m\\u001b[Kwarning: \\u001b[m\\u001b[K‘\\u001b[01m\\u001b[Kvoid fasttext::FastText::precomputeWordVectors(fasttext::Matrix&)\\u001b[m\\u001b[K’ is deprecated: precomputeWordVectors is being deprecated. [\\u001b[01;35m\\u001b[K-Wdeprecated-declarations\\u001b[m\\u001b[K]\\n\",\n            \"     precomputeWordVectors(*wordVectors_\\u001b[01;35m\\u001b[K)\\u001b[m\\u001b[K;\\n\",\n            \"                                        \\u001b[01;35m\\u001b[K^\\u001b[m\\u001b[K\\n\",\n            \"\\u001b[01m\\u001b[Ksrc/fasttext.cc:514:6:\\u001b[m\\u001b[K \\u001b[01;36m\\u001b[Knote: \\u001b[m\\u001b[Kdeclared here\\n\",\n            \" void \\u001b[01;36m\\u001b[KFastText\\u001b[m\\u001b[K::precomputeWordVectors(Matrix& wordVectors) {\\n\",\n            \"      \\u001b[01;36m\\u001b[K^~~~~~~~\\u001b[m\\u001b[K\\n\",\n            \"\\u001b[01m\\u001b[Ksrc/fasttext.cc:\\u001b[m\\u001b[K In member function ‘\\u001b[01m\\u001b[Kvoid fasttext::FastText::trainThread(int32_t)\\u001b[m\\u001b[K’:\\n\",\n            \"\\u001b[01m\\u001b[Ksrc/fasttext.cc:650:41:\\u001b[m\\u001b[K \\u001b[01;35m\\u001b[Kwarning: \\u001b[m\\u001b[K‘\\u001b[01m\\u001b[Kvoid fasttext::FastText::supervised(fasttext::Model&, fasttext::real, const std::vector<int>&, const std::vector<int>&)\\u001b[m\\u001b[K’ is deprecated: supervised is being deprecated. [\\u001b[01;35m\\u001b[K-Wdeprecated-declarations\\u001b[m\\u001b[K]\\n\",\n            \"       supervised(model, lr, line, labels\\u001b[01;35m\\u001b[K)\\u001b[m\\u001b[K;\\n\",\n            \"                                         \\u001b[01;35m\\u001b[K^\\u001b[m\\u001b[K\\n\",\n            \"\\u001b[01m\\u001b[Ksrc/fasttext.cc:338:6:\\u001b[m\\u001b[K \\u001b[01;36m\\u001b[Knote: \\u001b[m\\u001b[Kdeclared here\\n\",\n            \" void \\u001b[01;36m\\u001b[KFastText\\u001b[m\\u001b[K::supervised(\\n\",\n            \"      \\u001b[01;36m\\u001b[K^~~~~~~~\\u001b[m\\u001b[K\\n\",\n            \"\\u001b[01m\\u001b[Ksrc/fasttext.cc:653:27:\\u001b[m\\u001b[K \\u001b[01;35m\\u001b[Kwarning: \\u001b[m\\u001b[K‘\\u001b[01m\\u001b[Kvoid fasttext::FastText::cbow(fasttext::Model&, fasttext::real, const std::vector<int>&)\\u001b[m\\u001b[K’ is deprecated: cbow is being deprecated. [\\u001b[01;35m\\u001b[K-Wdeprecated-declarations\\u001b[m\\u001b[K]\\n\",\n            \"       cbow(model, lr, line\\u001b[01;35m\\u001b[K)\\u001b[m\\u001b[K;\\n\",\n            \"                           \\u001b[01;35m\\u001b[K^\\u001b[m\\u001b[K\\n\",\n            \"\\u001b[01m\\u001b[Ksrc/fasttext.cc:355:6:\\u001b[m\\u001b[K \\u001b[01;36m\\u001b[Knote: \\u001b[m\\u001b[Kdeclared here\\n\",\n            \" void \\u001b[01;36m\\u001b[KFastText\\u001b[m\\u001b[K::cbow(Model& model, real lr, const std::vector<int32_t>& line) {\\n\",\n            \"      \\u001b[01;36m\\u001b[K^~~~~~~~\\u001b[m\\u001b[K\\n\",\n            \"\\u001b[01m\\u001b[Ksrc/fasttext.cc:656:31:\\u001b[m\\u001b[K \\u001b[01;35m\\u001b[Kwarning: \\u001b[m\\u001b[K‘\\u001b[01m\\u001b[Kvoid fasttext::FastText::skipgram(fasttext::Model&, fasttext::real, const std::vector<int>&)\\u001b[m\\u001b[K’ is deprecated: skipgram is being deprecated. [\\u001b[01;35m\\u001b[K-Wdeprecated-declarations\\u001b[m\\u001b[K]\\n\",\n            \"       skipgram(model, lr, line\\u001b[01;35m\\u001b[K)\\u001b[m\\u001b[K;\\n\",\n            \"                               \\u001b[01;35m\\u001b[K^\\u001b[m\\u001b[K\\n\",\n            \"\\u001b[01m\\u001b[Ksrc/fasttext.cc:371:6:\\u001b[m\\u001b[K \\u001b[01;36m\\u001b[Knote: \\u001b[m\\u001b[Kdeclared here\\n\",\n            \" void \\u001b[01;36m\\u001b[KFastText\\u001b[m\\u001b[K::skipgram(\\n\",\n            \"      \\u001b[01;36m\\u001b[K^~~~~~~~\\u001b[m\\u001b[K\\n\",\n            \"c++ -pthread -std=c++0x -march=native -O3 -funroll-loops args.o dictionary.o productquantizer.o matrix.o qmatrix.o vector.o model.o utils.o meter.o fasttext.o src/main.cc -o fasttext\\n\"\n          ],\n          \"name\": \"stdout\"\n        }\n      ]\n    },\n    {\n      \"metadata\": {\n        \"id\": \"5JauDviyqqL-\",\n        \"colab_type\": \"text\"\n      },\n      \"cell_type\": \"markdown\",\n      \"source\": [\n        \"## Make simple dataset\"\n      ]\n    },\n    {\n      \"metadata\": {\n        \"id\": \"ALMQ3gjFqqZS\",\n        \"colab_type\": \"code\",\n        \"colab\": {}\n      },\n      \"cell_type\": \"code\",\n      \"source\": [\n        \"# 1 is positive, 0 is negative\\n\",\n        \"f = open('train.txt', 'w')\\n\",\n        \"f.write('__label__1 i love you\\\\n')\\n\",\n        \"f.write('__label__1 he loves me\\\\n')\\n\",\n        \"f.write('__label__1 she likes baseball\\\\n')\\n\",\n        \"f.write('__label__0 i hate you\\\\n')\\n\",\n        \"f.write('__label__0 sorry for that\\\\n')\\n\",\n        \"f.write('__label__0 this is awful')\\n\",\n        \"f.close()\\n\",\n        \"\\n\",\n        \"f = open('test.txt', 'w')\\n\",\n        \"f.write('sorry hate you')\\n\",\n        \"f.close()\"\n      ],\n      \"execution_count\": 0,\n      \"outputs\": []\n    },\n    {\n      \"metadata\": {\n        \"id\": \"i3_PpexwsN_a\",\n        \"colab_type\": \"text\"\n      },\n      \"cell_type\": \"markdown\",\n      \"source\": [\n        \"## Training\"\n      ]\n    },\n    {\n      \"metadata\": {\n        \"id\": \"q06m76JusOQ8\",\n        \"colab_type\": \"code\",\n        \"colab\": {\n          \"base_uri\": \"https://localhost:8080/\",\n          \"height\": 92\n        },\n        \"outputId\": \"4ed3502d-4aec-4d06-cb02-b8392978ce14\"\n      },\n      \"cell_type\": \"code\",\n      \"source\": [\n        \"!./fasttext supervised -input train.txt -output model -dim 2\"\n      ],\n      \"execution_count\": 18,\n      \"outputs\": [\n        {\n          \"output_type\": \"stream\",\n          \"text\": [\n            \"\\rRead 0M words\\n\",\n            \"Number of words:  17\\n\",\n            \"Number of labels: 2\\n\",\n            \"\\rProgress: 100.0% words/sec/thread:   17608 lr:  0.000000 loss:  0.672308 ETA:   0h 0m\\n\"\n          ],\n          \"name\": \"stdout\"\n        }\n      ]\n    },\n    {\n      \"metadata\": {\n        \"id\": \"C77MXO-GsOpi\",\n        \"colab_type\": \"text\"\n      },\n      \"cell_type\": \"markdown\",\n      \"source\": [\n        \"## Predict\"\n      ]\n    },\n    {\n      \"metadata\": {\n        \"id\": \"y1yDPCjVsO6x\",\n        \"colab_type\": \"code\",\n        \"colab\": {\n          \"base_uri\": \"https://localhost:8080/\",\n          \"height\": 36\n        },\n        \"outputId\": \"8963d7bd-01c8-40b9-e1ee-1446cb1b3454\"\n      },\n      \"cell_type\": \"code\",\n      \"source\": [\n        \"!cat test.txt\\n\",\n        \"!./fasttext predict model.bin test.txt\"\n      ],\n      \"execution_count\": 22,\n      \"outputs\": [\n        {\n          \"output_type\": \"stream\",\n          \"text\": [\n            \"sorry hate you__label__0\\n\"\n          ],\n          \"name\": \"stdout\"\n        }\n      ]\n    }\n  ]\n}"
  },
  {
    "path": "1-3.FastText/test.txt",
    "content": "﻿sorry hate you"
  },
  {
    "path": "1-3.FastText/train.txt",
    "content": "﻿__label__1 i love you\n__label__1 he loves me\n__label__1 she likes baseball\n__label__0 i hate you\n__label__0 sorry for that\n__label__0 this is awful"
  },
  {
    "path": "2-1.TextCNN/TextCNN.ipynb",
    "content": "{\n  \"cells\": [\n    {\n      \"cell_type\": \"code\",\n      \"metadata\": {},\n      \"source\": [\n        \"# code by Tae Hwan Jung @graykode\\n\",\n        \"import numpy as np\\n\",\n        \"import torch\\n\",\n        \"import torch.nn as nn\\n\",\n        \"import torch.optim as optim\\n\",\n        \"import torch.nn.functional as F\\n\",\n        \"\\n\",\n        \"class TextCNN(nn.Module):\\n\",\n        \"    def __init__(self):\\n\",\n        \"        super(TextCNN, self).__init__()\\n\",\n        \"        self.num_filters_total = num_filters * len(filter_sizes)\\n\",\n        \"        self.W = nn.Embedding(vocab_size, embedding_size)\\n\",\n        \"        self.Weight = nn.Linear(self.num_filters_total, num_classes, bias=False)\\n\",\n        \"        self.Bias = nn.Parameter(torch.ones([num_classes]))\\n\",\n        \"        self.filter_list = nn.ModuleList([nn.Conv2d(1, num_filters, (size, embedding_size)) for size in filter_sizes])\\n\",\n        \"\\n\",\n        \"    def forward(self, X):\\n\",\n        \"        embedded_chars = self.W(X) # [batch_size, sequence_length, sequence_length]\\n\",\n        \"        embedded_chars = embedded_chars.unsqueeze(1) # add channel(=1) [batch, channel(=1), sequence_length, embedding_size]\\n\",\n        \"\\n\",\n        \"        pooled_outputs = []\\n\",\n        \"        for i, conv in enumerate(self.filter_list):\\n\",\n        \"            # conv : [input_channel(=1), output_channel(=3), (filter_height, filter_width), bias_option]\\n\",\n        \"            h = F.relu(conv(embedded_chars))\\n\",\n        \"            # mp : ((filter_height, filter_width))\\n\",\n        \"            mp = nn.MaxPool2d((sequence_length - filter_sizes[i] + 1, 1))\\n\",\n        \"            # pooled : [batch_size(=6), output_height(=1), output_width(=1), output_channel(=3)]\\n\",\n        \"            pooled = mp(h).permute(0, 3, 2, 1)\\n\",\n        \"            pooled_outputs.append(pooled)\\n\",\n        \"\\n\",\n        \"        h_pool = torch.cat(pooled_outputs, len(filter_sizes)) # [batch_size(=6), output_height(=1), output_width(=1), output_channel(=3) * 3]\\n\",\n        \"        h_pool_flat = torch.reshape(h_pool, [-1, self.num_filters_total]) # [batch_size(=6), output_height * output_width * (output_channel * 3)]\\n\",\n        \"        model = self.Weight(h_pool_flat) + self.Bias # [batch_size, num_classes]\\n\",\n        \"        return model\\n\",\n        \"\\n\",\n        \"if __name__ == '__main__':\\n\",\n        \"    embedding_size = 2 # embedding size\\n\",\n        \"    sequence_length = 3 # sequence length\\n\",\n        \"    num_classes = 2 # number of classes\\n\",\n        \"    filter_sizes = [2, 2, 2] # n-gram windows\\n\",\n        \"    num_filters = 3 # number of filters\\n\",\n        \"\\n\",\n        \"    # 3 words sentences (=sequence_length is 3)\\n\",\n        \"    sentences = [\\\"i love you\\\", \\\"he loves me\\\", \\\"she likes baseball\\\", \\\"i hate you\\\", \\\"sorry for that\\\", \\\"this is awful\\\"]\\n\",\n        \"    labels = [1, 1, 1, 0, 0, 0]  # 1 is good, 0 is not good.\\n\",\n        \"\\n\",\n        \"    word_list = \\\" \\\".join(sentences).split()\\n\",\n        \"    word_list = list(set(word_list))\\n\",\n        \"    word_dict = {w: i for i, w in enumerate(word_list)}\\n\",\n        \"    vocab_size = len(word_dict)\\n\",\n        \"\\n\",\n        \"    model = TextCNN()\\n\",\n        \"\\n\",\n        \"    criterion = nn.CrossEntropyLoss()\\n\",\n        \"    optimizer = optim.Adam(model.parameters(), lr=0.001)\\n\",\n        \"\\n\",\n        \"    inputs = torch.LongTensor([np.asarray([word_dict[n] for n in sen.split()]) for sen in sentences])\\n\",\n        \"    targets = torch.LongTensor([out for out in labels]) # To using Torch Softmax Loss function\\n\",\n        \"\\n\",\n        \"    # Training\\n\",\n        \"    for epoch in range(5000):\\n\",\n        \"        optimizer.zero_grad()\\n\",\n        \"        output = model(inputs)\\n\",\n        \"\\n\",\n        \"        # output : [batch_size, num_classes], target_batch : [batch_size] (LongTensor, not one-hot)\\n\",\n        \"        loss = criterion(output, targets)\\n\",\n        \"        if (epoch + 1) % 1000 == 0:\\n\",\n        \"            print('Epoch:', '%04d' % (epoch + 1), 'cost =', '{:.6f}'.format(loss))\\n\",\n        \"\\n\",\n        \"        loss.backward()\\n\",\n        \"        optimizer.step()\\n\",\n        \"\\n\",\n        \"    # Test\\n\",\n        \"    test_text = 'sorry hate you'\\n\",\n        \"    tests = [np.asarray([word_dict[n] for n in test_text.split()])]\\n\",\n        \"    test_batch = torch.LongTensor(tests)\\n\",\n        \"\\n\",\n        \"    # Predict\\n\",\n        \"    predict = model(test_batch).data.max(1, keepdim=True)[1]\\n\",\n        \"    if predict[0][0] == 0:\\n\",\n        \"        print(test_text,\\\"is Bad Mean...\\\")\\n\",\n        \"    else:\\n\",\n        \"        print(test_text,\\\"is Good Mean!!\\\")\"\n      ],\n      \"outputs\": [],\n      \"execution_count\": null\n    }\n  ],\n  \"metadata\": {\n    \"anaconda-cloud\": {},\n    \"kernelspec\": {\n      \"display_name\": \"Python 3\",\n      \"language\": \"python\",\n      \"name\": \"python3\"\n    },\n    \"language_info\": {\n      \"codemirror_mode\": {\n        \"name\": \"ipython\",\n        \"version\": 3\n      },\n      \"file_extension\": \".py\",\n      \"mimetype\": \"text/x-python\",\n      \"name\": \"python\",\n      \"nbconvert_exporter\": \"python\",\n      \"pygments_lexer\": \"ipython3\",\n      \"version\": \"3.6.1\"\n    }\n  },\n  \"nbformat\": 4,\n  \"nbformat_minor\": 4\n}"
  },
  {
    "path": "2-1.TextCNN/TextCNN.py",
    "content": "# %%\n# code by Tae Hwan Jung @graykode\nimport numpy as np\nimport torch\nimport torch.nn as nn\nimport torch.optim as optim\nimport torch.nn.functional as F\n\nclass TextCNN(nn.Module):\n    def __init__(self):\n        super(TextCNN, self).__init__()\n        self.num_filters_total = num_filters * len(filter_sizes)\n        self.W = nn.Embedding(vocab_size, embedding_size)\n        self.Weight = nn.Linear(self.num_filters_total, num_classes, bias=False)\n        self.Bias = nn.Parameter(torch.ones([num_classes]))\n        self.filter_list = nn.ModuleList([nn.Conv2d(1, num_filters, (size, embedding_size)) for size in filter_sizes])\n\n    def forward(self, X):\n        embedded_chars = self.W(X) # [batch_size, sequence_length, sequence_length]\n        embedded_chars = embedded_chars.unsqueeze(1) # add channel(=1) [batch, channel(=1), sequence_length, embedding_size]\n\n        pooled_outputs = []\n        for i, conv in enumerate(self.filter_list):\n            # conv : [input_channel(=1), output_channel(=3), (filter_height, filter_width), bias_option]\n            h = F.relu(conv(embedded_chars))\n            # mp : ((filter_height, filter_width))\n            mp = nn.MaxPool2d((sequence_length - filter_sizes[i] + 1, 1))\n            # pooled : [batch_size(=6), output_height(=1), output_width(=1), output_channel(=3)]\n            pooled = mp(h).permute(0, 3, 2, 1)\n            pooled_outputs.append(pooled)\n\n        h_pool = torch.cat(pooled_outputs, len(filter_sizes)) # [batch_size(=6), output_height(=1), output_width(=1), output_channel(=3) * 3]\n        h_pool_flat = torch.reshape(h_pool, [-1, self.num_filters_total]) # [batch_size(=6), output_height * output_width * (output_channel * 3)]\n        model = self.Weight(h_pool_flat) + self.Bias # [batch_size, num_classes]\n        return model\n\nif __name__ == '__main__':\n    embedding_size = 2 # embedding size\n    sequence_length = 3 # sequence length\n    num_classes = 2 # number of classes\n    filter_sizes = [2, 2, 2] # n-gram windows\n    num_filters = 3 # number of filters\n\n    # 3 words sentences (=sequence_length is 3)\n    sentences = [\"i love you\", \"he loves me\", \"she likes baseball\", \"i hate you\", \"sorry for that\", \"this is awful\"]\n    labels = [1, 1, 1, 0, 0, 0]  # 1 is good, 0 is not good.\n\n    word_list = \" \".join(sentences).split()\n    word_list = list(set(word_list))\n    word_dict = {w: i for i, w in enumerate(word_list)}\n    vocab_size = len(word_dict)\n\n    model = TextCNN()\n\n    criterion = nn.CrossEntropyLoss()\n    optimizer = optim.Adam(model.parameters(), lr=0.001)\n\n    inputs = torch.LongTensor([np.asarray([word_dict[n] for n in sen.split()]) for sen in sentences])\n    targets = torch.LongTensor([out for out in labels]) # To using Torch Softmax Loss function\n\n    # Training\n    for epoch in range(5000):\n        optimizer.zero_grad()\n        output = model(inputs)\n\n        # output : [batch_size, num_classes], target_batch : [batch_size] (LongTensor, not one-hot)\n        loss = criterion(output, targets)\n        if (epoch + 1) % 1000 == 0:\n            print('Epoch:', '%04d' % (epoch + 1), 'cost =', '{:.6f}'.format(loss))\n\n        loss.backward()\n        optimizer.step()\n\n    # Test\n    test_text = 'sorry hate you'\n    tests = [np.asarray([word_dict[n] for n in test_text.split()])]\n    test_batch = torch.LongTensor(tests)\n\n    # Predict\n    predict = model(test_batch).data.max(1, keepdim=True)[1]\n    if predict[0][0] == 0:\n        print(test_text,\"is Bad Mean...\")\n    else:\n        print(test_text,\"is Good Mean!!\")"
  },
  {
    "path": "3-1.TextRNN/TextRNN.ipynb",
    "content": "{\n  \"cells\": [\n    {\n      \"cell_type\": \"code\",\n      \"metadata\": {},\n      \"source\": [\n        \"# code by Tae Hwan Jung @graykode\\n\",\n        \"import numpy as np\\n\",\n        \"import torch\\n\",\n        \"import torch.nn as nn\\n\",\n        \"import torch.optim as optim\\n\",\n        \"\\n\",\n        \"def make_batch():\\n\",\n        \"    input_batch = []\\n\",\n        \"    target_batch = []\\n\",\n        \"\\n\",\n        \"    for sen in sentences:\\n\",\n        \"        word = sen.split()  # space tokenizer\\n\",\n        \"        input = [word_dict[n] for n in word[:-1]]  # create (1~n-1) as input\\n\",\n        \"        target = word_dict[word[-1]]  # create (n) as target, We usually call this 'casual language model'\\n\",\n        \"\\n\",\n        \"        input_batch.append(np.eye(n_class)[input])\\n\",\n        \"        target_batch.append(target)\\n\",\n        \"\\n\",\n        \"    return input_batch, target_batch\\n\",\n        \"\\n\",\n        \"class TextRNN(nn.Module):\\n\",\n        \"    def __init__(self):\\n\",\n        \"        super(TextRNN, self).__init__()\\n\",\n        \"        self.rnn = nn.RNN(input_size=n_class, hidden_size=n_hidden)\\n\",\n        \"        self.W = nn.Linear(n_hidden, n_class, bias=False)\\n\",\n        \"        self.b = nn.Parameter(torch.ones([n_class]))\\n\",\n        \"\\n\",\n        \"    def forward(self, hidden, X):\\n\",\n        \"        X = X.transpose(0, 1) # X : [n_step, batch_size, n_class]\\n\",\n        \"        outputs, hidden = self.rnn(X, hidden)\\n\",\n        \"        # outputs : [n_step, batch_size, num_directions(=1) * n_hidden]\\n\",\n        \"        # hidden : [num_layers(=1) * num_directions(=1), batch_size, n_hidden]\\n\",\n        \"        outputs = outputs[-1] # [batch_size, num_directions(=1) * n_hidden]\\n\",\n        \"        model = self.W(outputs) + self.b # model : [batch_size, n_class]\\n\",\n        \"        return model\\n\",\n        \"\\n\",\n        \"if __name__ == '__main__':\\n\",\n        \"    n_step = 2 # number of cells(= number of Step)\\n\",\n        \"    n_hidden = 5 # number of hidden units in one cell\\n\",\n        \"\\n\",\n        \"    sentences = [\\\"i like dog\\\", \\\"i love coffee\\\", \\\"i hate milk\\\"]\\n\",\n        \"\\n\",\n        \"    word_list = \\\" \\\".join(sentences).split()\\n\",\n        \"    word_list = list(set(word_list))\\n\",\n        \"    word_dict = {w: i for i, w in enumerate(word_list)}\\n\",\n        \"    number_dict = {i: w for i, w in enumerate(word_list)}\\n\",\n        \"    n_class = len(word_dict)\\n\",\n        \"    batch_size = len(sentences)\\n\",\n        \"\\n\",\n        \"    model = TextRNN()\\n\",\n        \"\\n\",\n        \"    criterion = nn.CrossEntropyLoss()\\n\",\n        \"    optimizer = optim.Adam(model.parameters(), lr=0.001)\\n\",\n        \"\\n\",\n        \"    input_batch, target_batch = make_batch()\\n\",\n        \"    input_batch = torch.FloatTensor(input_batch)\\n\",\n        \"    target_batch = torch.LongTensor(target_batch)\\n\",\n        \"\\n\",\n        \"    # Training\\n\",\n        \"    for epoch in range(5000):\\n\",\n        \"        optimizer.zero_grad()\\n\",\n        \"\\n\",\n        \"        # hidden : [num_layers * num_directions, batch, hidden_size]\\n\",\n        \"        hidden = torch.zeros(1, batch_size, n_hidden)\\n\",\n        \"        # input_batch : [batch_size, n_step, n_class]\\n\",\n        \"        output = model(hidden, input_batch)\\n\",\n        \"\\n\",\n        \"        # output : [batch_size, n_class], target_batch : [batch_size] (LongTensor, not one-hot)\\n\",\n        \"        loss = criterion(output, target_batch)\\n\",\n        \"        if (epoch + 1) % 1000 == 0:\\n\",\n        \"            print('Epoch:', '%04d' % (epoch + 1), 'cost =', '{:.6f}'.format(loss))\\n\",\n        \"\\n\",\n        \"        loss.backward()\\n\",\n        \"        optimizer.step()\\n\",\n        \"\\n\",\n        \"    input = [sen.split()[:2] for sen in sentences]\\n\",\n        \"\\n\",\n        \"    # Predict\\n\",\n        \"    hidden = torch.zeros(1, batch_size, n_hidden)\\n\",\n        \"    predict = model(hidden, input_batch).data.max(1, keepdim=True)[1]\\n\",\n        \"    print([sen.split()[:2] for sen in sentences], '->', [number_dict[n.item()] for n in predict.squeeze()])\"\n      ],\n      \"outputs\": [],\n      \"execution_count\": null\n    }\n  ],\n  \"metadata\": {\n    \"anaconda-cloud\": {},\n    \"kernelspec\": {\n      \"display_name\": \"Python 3\",\n      \"language\": \"python\",\n      \"name\": \"python3\"\n    },\n    \"language_info\": {\n      \"codemirror_mode\": {\n        \"name\": \"ipython\",\n        \"version\": 3\n      },\n      \"file_extension\": \".py\",\n      \"mimetype\": \"text/x-python\",\n      \"name\": \"python\",\n      \"nbconvert_exporter\": \"python\",\n      \"pygments_lexer\": \"ipython3\",\n      \"version\": \"3.6.1\"\n    }\n  },\n  \"nbformat\": 4,\n  \"nbformat_minor\": 4\n}"
  },
  {
    "path": "3-1.TextRNN/TextRNN.py",
    "content": "# %%\n# code by Tae Hwan Jung @graykode\nimport numpy as np\nimport torch\nimport torch.nn as nn\nimport torch.optim as optim\n\ndef make_batch():\n    input_batch = []\n    target_batch = []\n\n    for sen in sentences:\n        word = sen.split()  # space tokenizer\n        input = [word_dict[n] for n in word[:-1]]  # create (1~n-1) as input\n        target = word_dict[word[-1]]  # create (n) as target, We usually call this 'casual language model'\n\n        input_batch.append(np.eye(n_class)[input])\n        target_batch.append(target)\n\n    return input_batch, target_batch\n\nclass TextRNN(nn.Module):\n    def __init__(self):\n        super(TextRNN, self).__init__()\n        self.rnn = nn.RNN(input_size=n_class, hidden_size=n_hidden)\n        self.W = nn.Linear(n_hidden, n_class, bias=False)\n        self.b = nn.Parameter(torch.ones([n_class]))\n\n    def forward(self, hidden, X):\n        X = X.transpose(0, 1) # X : [n_step, batch_size, n_class]\n        outputs, hidden = self.rnn(X, hidden)\n        # outputs : [n_step, batch_size, num_directions(=1) * n_hidden]\n        # hidden : [num_layers(=1) * num_directions(=1), batch_size, n_hidden]\n        outputs = outputs[-1] # [batch_size, num_directions(=1) * n_hidden]\n        model = self.W(outputs) + self.b # model : [batch_size, n_class]\n        return model\n\nif __name__ == '__main__':\n    n_step = 2 # number of cells(= number of Step)\n    n_hidden = 5 # number of hidden units in one cell\n\n    sentences = [\"i like dog\", \"i love coffee\", \"i hate milk\"]\n\n    word_list = \" \".join(sentences).split()\n    word_list = list(set(word_list))\n    word_dict = {w: i for i, w in enumerate(word_list)}\n    number_dict = {i: w for i, w in enumerate(word_list)}\n    n_class = len(word_dict)\n    batch_size = len(sentences)\n\n    model = TextRNN()\n\n    criterion = nn.CrossEntropyLoss()\n    optimizer = optim.Adam(model.parameters(), lr=0.001)\n\n    input_batch, target_batch = make_batch()\n    input_batch = torch.FloatTensor(input_batch)\n    target_batch = torch.LongTensor(target_batch)\n\n    # Training\n    for epoch in range(5000):\n        optimizer.zero_grad()\n\n        # hidden : [num_layers * num_directions, batch, hidden_size]\n        hidden = torch.zeros(1, batch_size, n_hidden)\n        # input_batch : [batch_size, n_step, n_class]\n        output = model(hidden, input_batch)\n\n        # output : [batch_size, n_class], target_batch : [batch_size] (LongTensor, not one-hot)\n        loss = criterion(output, target_batch)\n        if (epoch + 1) % 1000 == 0:\n            print('Epoch:', '%04d' % (epoch + 1), 'cost =', '{:.6f}'.format(loss))\n\n        loss.backward()\n        optimizer.step()\n\n    input = [sen.split()[:2] for sen in sentences]\n\n    # Predict\n    hidden = torch.zeros(1, batch_size, n_hidden)\n    predict = model(hidden, input_batch).data.max(1, keepdim=True)[1]\n    print([sen.split()[:2] for sen in sentences], '->', [number_dict[n.item()] for n in predict.squeeze()])"
  },
  {
    "path": "3-2.TextLSTM/TextLSTM.ipynb",
    "content": "{\n  \"cells\": [\n    {\n      \"cell_type\": \"code\",\n      \"metadata\": {},\n      \"source\": [\n        \"# code by Tae Hwan Jung @graykode\\n\",\n        \"import numpy as np\\n\",\n        \"import torch\\n\",\n        \"import torch.nn as nn\\n\",\n        \"import torch.optim as optim\\n\",\n        \"\\n\",\n        \"def make_batch():\\n\",\n        \"    input_batch, target_batch = [], []\\n\",\n        \"\\n\",\n        \"    for seq in seq_data:\\n\",\n        \"        input = [word_dict[n] for n in seq[:-1]] # 'm', 'a' , 'k' is input\\n\",\n        \"        target = word_dict[seq[-1]] # 'e' is target\\n\",\n        \"        input_batch.append(np.eye(n_class)[input])\\n\",\n        \"        target_batch.append(target)\\n\",\n        \"\\n\",\n        \"    return input_batch, target_batch\\n\",\n        \"\\n\",\n        \"class TextLSTM(nn.Module):\\n\",\n        \"    def __init__(self):\\n\",\n        \"        super(TextLSTM, self).__init__()\\n\",\n        \"\\n\",\n        \"        self.lstm = nn.LSTM(input_size=n_class, hidden_size=n_hidden)\\n\",\n        \"        self.W = nn.Linear(n_hidden, n_class, bias=False)\\n\",\n        \"        self.b = nn.Parameter(torch.ones([n_class]))\\n\",\n        \"\\n\",\n        \"    def forward(self, X):\\n\",\n        \"        input = X.transpose(0, 1)  # X : [n_step, batch_size, n_class]\\n\",\n        \"\\n\",\n        \"        hidden_state = torch.zeros(1, len(X), n_hidden)  # [num_layers(=1) * num_directions(=1), batch_size, n_hidden]\\n\",\n        \"        cell_state = torch.zeros(1, len(X), n_hidden)     # [num_layers(=1) * num_directions(=1), batch_size, n_hidden]\\n\",\n        \"\\n\",\n        \"        outputs, (_, _) = self.lstm(input, (hidden_state, cell_state))\\n\",\n        \"        outputs = outputs[-1]  # [batch_size, n_hidden]\\n\",\n        \"        model = self.W(outputs) + self.b  # model : [batch_size, n_class]\\n\",\n        \"        return model\\n\",\n        \"\\n\",\n        \"if __name__ == '__main__':\\n\",\n        \"    n_step = 3 # number of cells(= number of Step)\\n\",\n        \"    n_hidden = 128 # number of hidden units in one cell\\n\",\n        \"\\n\",\n        \"    char_arr = [c for c in 'abcdefghijklmnopqrstuvwxyz']\\n\",\n        \"    word_dict = {n: i for i, n in enumerate(char_arr)}\\n\",\n        \"    number_dict = {i: w for i, w in enumerate(char_arr)}\\n\",\n        \"    n_class = len(word_dict)  # number of class(=number of vocab)\\n\",\n        \"\\n\",\n        \"    seq_data = ['make', 'need', 'coal', 'word', 'love', 'hate', 'live', 'home', 'hash', 'star']\\n\",\n        \"\\n\",\n        \"    model = TextLSTM()\\n\",\n        \"\\n\",\n        \"    criterion = nn.CrossEntropyLoss()\\n\",\n        \"    optimizer = optim.Adam(model.parameters(), lr=0.001)\\n\",\n        \"\\n\",\n        \"    input_batch, target_batch = make_batch()\\n\",\n        \"    input_batch = torch.FloatTensor(input_batch)\\n\",\n        \"    target_batch = torch.LongTensor(target_batch)\\n\",\n        \"\\n\",\n        \"    # Training\\n\",\n        \"    for epoch in range(1000):\\n\",\n        \"        optimizer.zero_grad()\\n\",\n        \"\\n\",\n        \"        output = model(input_batch)\\n\",\n        \"        loss = criterion(output, target_batch)\\n\",\n        \"        if (epoch + 1) % 100 == 0:\\n\",\n        \"            print('Epoch:', '%04d' % (epoch + 1), 'cost =', '{:.6f}'.format(loss))\\n\",\n        \"\\n\",\n        \"        loss.backward()\\n\",\n        \"        optimizer.step()\\n\",\n        \"\\n\",\n        \"    inputs = [sen[:3] for sen in seq_data]\\n\",\n        \"\\n\",\n        \"    predict = model(input_batch).data.max(1, keepdim=True)[1]\\n\",\n        \"    print(inputs, '->', [number_dict[n.item()] for n in predict.squeeze()])\"\n      ],\n      \"outputs\": [],\n      \"execution_count\": null\n    }\n  ],\n  \"metadata\": {\n    \"anaconda-cloud\": {},\n    \"kernelspec\": {\n      \"display_name\": \"Python 3\",\n      \"language\": \"python\",\n      \"name\": \"python3\"\n    },\n    \"language_info\": {\n      \"codemirror_mode\": {\n        \"name\": \"ipython\",\n        \"version\": 3\n      },\n      \"file_extension\": \".py\",\n      \"mimetype\": \"text/x-python\",\n      \"name\": \"python\",\n      \"nbconvert_exporter\": \"python\",\n      \"pygments_lexer\": \"ipython3\",\n      \"version\": \"3.6.1\"\n    }\n  },\n  \"nbformat\": 4,\n  \"nbformat_minor\": 4\n}"
  },
  {
    "path": "3-2.TextLSTM/TextLSTM.py",
    "content": "# %%\n# code by Tae Hwan Jung @graykode\nimport numpy as np\nimport torch\nimport torch.nn as nn\nimport torch.optim as optim\n\ndef make_batch():\n    input_batch, target_batch = [], []\n\n    for seq in seq_data:\n        input = [word_dict[n] for n in seq[:-1]] # 'm', 'a' , 'k' is input\n        target = word_dict[seq[-1]] # 'e' is target\n        input_batch.append(np.eye(n_class)[input])\n        target_batch.append(target)\n\n    return input_batch, target_batch\n\nclass TextLSTM(nn.Module):\n    def __init__(self):\n        super(TextLSTM, self).__init__()\n\n        self.lstm = nn.LSTM(input_size=n_class, hidden_size=n_hidden)\n        self.W = nn.Linear(n_hidden, n_class, bias=False)\n        self.b = nn.Parameter(torch.ones([n_class]))\n\n    def forward(self, X):\n        input = X.transpose(0, 1)  # X : [n_step, batch_size, n_class]\n\n        hidden_state = torch.zeros(1, len(X), n_hidden)  # [num_layers(=1) * num_directions(=1), batch_size, n_hidden]\n        cell_state = torch.zeros(1, len(X), n_hidden)     # [num_layers(=1) * num_directions(=1), batch_size, n_hidden]\n\n        outputs, (_, _) = self.lstm(input, (hidden_state, cell_state))\n        outputs = outputs[-1]  # [batch_size, n_hidden]\n        model = self.W(outputs) + self.b  # model : [batch_size, n_class]\n        return model\n\nif __name__ == '__main__':\n    n_step = 3 # number of cells(= number of Step)\n    n_hidden = 128 # number of hidden units in one cell\n\n    char_arr = [c for c in 'abcdefghijklmnopqrstuvwxyz']\n    word_dict = {n: i for i, n in enumerate(char_arr)}\n    number_dict = {i: w for i, w in enumerate(char_arr)}\n    n_class = len(word_dict)  # number of class(=number of vocab)\n\n    seq_data = ['make', 'need', 'coal', 'word', 'love', 'hate', 'live', 'home', 'hash', 'star']\n\n    model = TextLSTM()\n\n    criterion = nn.CrossEntropyLoss()\n    optimizer = optim.Adam(model.parameters(), lr=0.001)\n\n    input_batch, target_batch = make_batch()\n    input_batch = torch.FloatTensor(input_batch)\n    target_batch = torch.LongTensor(target_batch)\n\n    # Training\n    for epoch in range(1000):\n        optimizer.zero_grad()\n\n        output = model(input_batch)\n        loss = criterion(output, target_batch)\n        if (epoch + 1) % 100 == 0:\n            print('Epoch:', '%04d' % (epoch + 1), 'cost =', '{:.6f}'.format(loss))\n\n        loss.backward()\n        optimizer.step()\n\n    inputs = [sen[:3] for sen in seq_data]\n\n    predict = model(input_batch).data.max(1, keepdim=True)[1]\n    print(inputs, '->', [number_dict[n.item()] for n in predict.squeeze()])"
  },
  {
    "path": "3-3.Bi-LSTM/Bi-LSTM.ipynb",
    "content": "{\n  \"cells\": [\n    {\n      \"cell_type\": \"code\",\n      \"metadata\": {},\n      \"source\": [\n        \"# code by Tae Hwan Jung @graykode\\n\",\n        \"import numpy as np\\n\",\n        \"import torch\\n\",\n        \"import torch.nn as nn\\n\",\n        \"import torch.optim as optim\\n\",\n        \"\\n\",\n        \"def make_batch():\\n\",\n        \"    input_batch = []\\n\",\n        \"    target_batch = []\\n\",\n        \"\\n\",\n        \"    words = sentence.split()\\n\",\n        \"    for i, word in enumerate(words[:-1]):\\n\",\n        \"        input = [word_dict[n] for n in words[:(i + 1)]]\\n\",\n        \"        input = input + [0] * (max_len - len(input))\\n\",\n        \"        target = word_dict[words[i + 1]]\\n\",\n        \"        input_batch.append(np.eye(n_class)[input])\\n\",\n        \"        target_batch.append(target)\\n\",\n        \"\\n\",\n        \"    return input_batch, target_batch\\n\",\n        \"\\n\",\n        \"class BiLSTM(nn.Module):\\n\",\n        \"    def __init__(self):\\n\",\n        \"        super(BiLSTM, self).__init__()\\n\",\n        \"\\n\",\n        \"        self.lstm = nn.LSTM(input_size=n_class, hidden_size=n_hidden, bidirectional=True)\\n\",\n        \"        self.W = nn.Linear(n_hidden * 2, n_class, bias=False)\\n\",\n        \"        self.b = nn.Parameter(torch.ones([n_class]))\\n\",\n        \"\\n\",\n        \"    def forward(self, X):\\n\",\n        \"        input = X.transpose(0, 1)  # input : [n_step, batch_size, n_class]\\n\",\n        \"\\n\",\n        \"        hidden_state = torch.zeros(1*2, len(X), n_hidden)   # [num_layers(=1) * num_directions(=2), batch_size, n_hidden]\\n\",\n        \"        cell_state = torch.zeros(1*2, len(X), n_hidden)     # [num_layers(=1) * num_directions(=2), batch_size, n_hidden]\\n\",\n        \"\\n\",\n        \"        outputs, (_, _) = self.lstm(input, (hidden_state, cell_state))\\n\",\n        \"        outputs = outputs[-1]  # [batch_size, n_hidden]\\n\",\n        \"        model = self.W(outputs) + self.b  # model : [batch_size, n_class]\\n\",\n        \"        return model\\n\",\n        \"\\n\",\n        \"if __name__ == '__main__':\\n\",\n        \"    n_hidden = 5 # number of hidden units in one cell\\n\",\n        \"\\n\",\n        \"    sentence = (\\n\",\n        \"        'Lorem ipsum dolor sit amet consectetur adipisicing elit '\\n\",\n        \"        'sed do eiusmod tempor incididunt ut labore et dolore magna '\\n\",\n        \"        'aliqua Ut enim ad minim veniam quis nostrud exercitation'\\n\",\n        \"    )\\n\",\n        \"\\n\",\n        \"    word_dict = {w: i for i, w in enumerate(list(set(sentence.split())))}\\n\",\n        \"    number_dict = {i: w for i, w in enumerate(list(set(sentence.split())))}\\n\",\n        \"    n_class = len(word_dict)\\n\",\n        \"    max_len = len(sentence.split())\\n\",\n        \"\\n\",\n        \"    model = BiLSTM()\\n\",\n        \"\\n\",\n        \"    criterion = nn.CrossEntropyLoss()\\n\",\n        \"    optimizer = optim.Adam(model.parameters(), lr=0.001)\\n\",\n        \"\\n\",\n        \"    input_batch, target_batch = make_batch()\\n\",\n        \"    input_batch = torch.FloatTensor(input_batch)\\n\",\n        \"    target_batch = torch.LongTensor(target_batch)\\n\",\n        \"\\n\",\n        \"    # Training\\n\",\n        \"    for epoch in range(10000):\\n\",\n        \"        optimizer.zero_grad()\\n\",\n        \"        output = model(input_batch)\\n\",\n        \"        loss = criterion(output, target_batch)\\n\",\n        \"        if (epoch + 1) % 1000 == 0:\\n\",\n        \"            print('Epoch:', '%04d' % (epoch + 1), 'cost =', '{:.6f}'.format(loss))\\n\",\n        \"\\n\",\n        \"        loss.backward()\\n\",\n        \"        optimizer.step()\\n\",\n        \"\\n\",\n        \"    predict = model(input_batch).data.max(1, keepdim=True)[1]\\n\",\n        \"    print(sentence)\\n\",\n        \"    print([number_dict[n.item()] for n in predict.squeeze()])\\n\"\n      ],\n      \"outputs\": [],\n      \"execution_count\": null\n    }\n  ],\n  \"metadata\": {\n    \"anaconda-cloud\": {},\n    \"kernelspec\": {\n      \"display_name\": \"Python 3\",\n      \"language\": \"python\",\n      \"name\": \"python3\"\n    },\n    \"language_info\": {\n      \"codemirror_mode\": {\n        \"name\": \"ipython\",\n        \"version\": 3\n      },\n      \"file_extension\": \".py\",\n      \"mimetype\": \"text/x-python\",\n      \"name\": \"python\",\n      \"nbconvert_exporter\": \"python\",\n      \"pygments_lexer\": \"ipython3\",\n      \"version\": \"3.6.1\"\n    }\n  },\n  \"nbformat\": 4,\n  \"nbformat_minor\": 4\n}"
  },
  {
    "path": "3-3.Bi-LSTM/Bi-LSTM.py",
    "content": "# %%\n# code by Tae Hwan Jung @graykode\nimport numpy as np\nimport torch\nimport torch.nn as nn\nimport torch.optim as optim\n\ndef make_batch():\n    input_batch = []\n    target_batch = []\n\n    words = sentence.split()\n    for i, word in enumerate(words[:-1]):\n        input = [word_dict[n] for n in words[:(i + 1)]]\n        input = input + [0] * (max_len - len(input))\n        target = word_dict[words[i + 1]]\n        input_batch.append(np.eye(n_class)[input])\n        target_batch.append(target)\n\n    return input_batch, target_batch\n\nclass BiLSTM(nn.Module):\n    def __init__(self):\n        super(BiLSTM, self).__init__()\n\n        self.lstm = nn.LSTM(input_size=n_class, hidden_size=n_hidden, bidirectional=True)\n        self.W = nn.Linear(n_hidden * 2, n_class, bias=False)\n        self.b = nn.Parameter(torch.ones([n_class]))\n\n    def forward(self, X):\n        input = X.transpose(0, 1)  # input : [n_step, batch_size, n_class]\n\n        hidden_state = torch.zeros(1*2, len(X), n_hidden)   # [num_layers(=1) * num_directions(=2), batch_size, n_hidden]\n        cell_state = torch.zeros(1*2, len(X), n_hidden)     # [num_layers(=1) * num_directions(=2), batch_size, n_hidden]\n\n        outputs, (_, _) = self.lstm(input, (hidden_state, cell_state))\n        outputs = outputs[-1]  # [batch_size, n_hidden]\n        model = self.W(outputs) + self.b  # model : [batch_size, n_class]\n        return model\n\nif __name__ == '__main__':\n    n_hidden = 5 # number of hidden units in one cell\n\n    sentence = (\n        'Lorem ipsum dolor sit amet consectetur adipisicing elit '\n        'sed do eiusmod tempor incididunt ut labore et dolore magna '\n        'aliqua Ut enim ad minim veniam quis nostrud exercitation'\n    )\n\n    word_dict = {w: i for i, w in enumerate(list(set(sentence.split())))}\n    number_dict = {i: w for i, w in enumerate(list(set(sentence.split())))}\n    n_class = len(word_dict)\n    max_len = len(sentence.split())\n\n    model = BiLSTM()\n\n    criterion = nn.CrossEntropyLoss()\n    optimizer = optim.Adam(model.parameters(), lr=0.001)\n\n    input_batch, target_batch = make_batch()\n    input_batch = torch.FloatTensor(input_batch)\n    target_batch = torch.LongTensor(target_batch)\n\n    # Training\n    for epoch in range(10000):\n        optimizer.zero_grad()\n        output = model(input_batch)\n        loss = criterion(output, target_batch)\n        if (epoch + 1) % 1000 == 0:\n            print('Epoch:', '%04d' % (epoch + 1), 'cost =', '{:.6f}'.format(loss))\n\n        loss.backward()\n        optimizer.step()\n\n    predict = model(input_batch).data.max(1, keepdim=True)[1]\n    print(sentence)\n    print([number_dict[n.item()] for n in predict.squeeze()])\n"
  },
  {
    "path": "4-1.Seq2Seq/Seq2Seq.ipynb",
    "content": "{\n  \"cells\": [\n    {\n      \"cell_type\": \"code\",\n      \"metadata\": {},\n      \"source\": [\n        \"# code by Tae Hwan Jung @graykode\\n\",\n        \"import numpy as np\\n\",\n        \"import torch\\n\",\n        \"import torch.nn as nn\\n\",\n        \"\\n\",\n        \"# S: Symbol that shows starting of decoding input\\n\",\n        \"# E: Symbol that shows starting of decoding output\\n\",\n        \"# P: Symbol that will fill in blank sequence if current batch data size is short than time steps\\n\",\n        \"\\n\",\n        \"def make_batch():\\n\",\n        \"    input_batch, output_batch, target_batch = [], [], []\\n\",\n        \"\\n\",\n        \"    for seq in seq_data:\\n\",\n        \"        for i in range(2):\\n\",\n        \"            seq[i] = seq[i] + 'P' * (n_step - len(seq[i]))\\n\",\n        \"\\n\",\n        \"        input = [num_dic[n] for n in seq[0]]\\n\",\n        \"        output = [num_dic[n] for n in ('S' + seq[1])]\\n\",\n        \"        target = [num_dic[n] for n in (seq[1] + 'E')]\\n\",\n        \"\\n\",\n        \"        input_batch.append(np.eye(n_class)[input])\\n\",\n        \"        output_batch.append(np.eye(n_class)[output])\\n\",\n        \"        target_batch.append(target) # not one-hot\\n\",\n        \"\\n\",\n        \"    # make tensor\\n\",\n        \"    return torch.FloatTensor(input_batch), torch.FloatTensor(output_batch), torch.LongTensor(target_batch)\\n\",\n        \"\\n\",\n        \"# make test batch\\n\",\n        \"def make_testbatch(input_word):\\n\",\n        \"    input_batch, output_batch = [], []\\n\",\n        \"\\n\",\n        \"    input_w = input_word + 'P' * (n_step - len(input_word))\\n\",\n        \"    input = [num_dic[n] for n in input_w]\\n\",\n        \"    output = [num_dic[n] for n in 'S' + 'P' * n_step]\\n\",\n        \"\\n\",\n        \"    input_batch = np.eye(n_class)[input]\\n\",\n        \"    output_batch = np.eye(n_class)[output]\\n\",\n        \"\\n\",\n        \"    return torch.FloatTensor(input_batch).unsqueeze(0), torch.FloatTensor(output_batch).unsqueeze(0)\\n\",\n        \"\\n\",\n        \"# Model\\n\",\n        \"class Seq2Seq(nn.Module):\\n\",\n        \"    def __init__(self):\\n\",\n        \"        super(Seq2Seq, self).__init__()\\n\",\n        \"\\n\",\n        \"        self.enc_cell = nn.RNN(input_size=n_class, hidden_size=n_hidden, dropout=0.5)\\n\",\n        \"        self.dec_cell = nn.RNN(input_size=n_class, hidden_size=n_hidden, dropout=0.5)\\n\",\n        \"        self.fc = nn.Linear(n_hidden, n_class)\\n\",\n        \"\\n\",\n        \"    def forward(self, enc_input, enc_hidden, dec_input):\\n\",\n        \"        enc_input = enc_input.transpose(0, 1) # enc_input: [max_len(=n_step, time step), batch_size, n_class]\\n\",\n        \"        dec_input = dec_input.transpose(0, 1) # dec_input: [max_len(=n_step, time step), batch_size, n_class]\\n\",\n        \"\\n\",\n        \"        # enc_states : [num_layers(=1) * num_directions(=1), batch_size, n_hidden]\\n\",\n        \"        _, enc_states = self.enc_cell(enc_input, enc_hidden)\\n\",\n        \"        # outputs : [max_len+1(=6), batch_size, num_directions(=1) * n_hidden(=128)]\\n\",\n        \"        outputs, _ = self.dec_cell(dec_input, enc_states)\\n\",\n        \"\\n\",\n        \"        model = self.fc(outputs) # model : [max_len+1(=6), batch_size, n_class]\\n\",\n        \"        return model\\n\",\n        \"\\n\",\n        \"if __name__ == '__main__':\\n\",\n        \"    n_step = 5\\n\",\n        \"    n_hidden = 128\\n\",\n        \"\\n\",\n        \"    char_arr = [c for c in 'SEPabcdefghijklmnopqrstuvwxyz']\\n\",\n        \"    num_dic = {n: i for i, n in enumerate(char_arr)}\\n\",\n        \"    seq_data = [['man', 'women'], ['black', 'white'], ['king', 'queen'], ['girl', 'boy'], ['up', 'down'], ['high', 'low']]\\n\",\n        \"\\n\",\n        \"    n_class = len(num_dic)\\n\",\n        \"    batch_size = len(seq_data)\\n\",\n        \"\\n\",\n        \"    model = Seq2Seq()\\n\",\n        \"\\n\",\n        \"    criterion = nn.CrossEntropyLoss()\\n\",\n        \"    optimizer = torch.optim.Adam(model.parameters(), lr=0.001)\\n\",\n        \"\\n\",\n        \"    input_batch, output_batch, target_batch = make_batch()\\n\",\n        \"\\n\",\n        \"    for epoch in range(5000):\\n\",\n        \"        # make hidden shape [num_layers * num_directions, batch_size, n_hidden]\\n\",\n        \"        hidden = torch.zeros(1, batch_size, n_hidden)\\n\",\n        \"\\n\",\n        \"        optimizer.zero_grad()\\n\",\n        \"        # input_batch : [batch_size, max_len(=n_step, time step), n_class]\\n\",\n        \"        # output_batch : [batch_size, max_len+1(=n_step, time step) (becase of 'S' or 'E'), n_class]\\n\",\n        \"        # target_batch : [batch_size, max_len+1(=n_step, time step)], not one-hot\\n\",\n        \"        output = model(input_batch, hidden, output_batch)\\n\",\n        \"        # output : [max_len+1, batch_size, n_class]\\n\",\n        \"        output = output.transpose(0, 1) # [batch_size, max_len+1(=6), n_class]\\n\",\n        \"        loss = 0\\n\",\n        \"        for i in range(0, len(target_batch)):\\n\",\n        \"            # output[i] : [max_len+1, n_class, target_batch[i] : max_len+1]\\n\",\n        \"            loss += criterion(output[i], target_batch[i])\\n\",\n        \"        if (epoch + 1) % 1000 == 0:\\n\",\n        \"            print('Epoch:', '%04d' % (epoch + 1), 'cost =', '{:.6f}'.format(loss))\\n\",\n        \"        loss.backward()\\n\",\n        \"        optimizer.step()\\n\",\n        \"\\n\",\n        \"    # Test\\n\",\n        \"    def translate(word):\\n\",\n        \"        input_batch, output_batch = make_testbatch(word)\\n\",\n        \"\\n\",\n        \"        # make hidden shape [num_layers * num_directions, batch_size, n_hidden]\\n\",\n        \"        hidden = torch.zeros(1, 1, n_hidden)\\n\",\n        \"        output = model(input_batch, hidden, output_batch)\\n\",\n        \"        # output : [max_len+1(=6), batch_size(=1), n_class]\\n\",\n        \"\\n\",\n        \"        predict = output.data.max(2, keepdim=True)[1] # select n_class dimension\\n\",\n        \"        decoded = [char_arr[i] for i in predict]\\n\",\n        \"        end = decoded.index('E')\\n\",\n        \"        translated = ''.join(decoded[:end])\\n\",\n        \"\\n\",\n        \"        return translated.replace('P', '')\\n\",\n        \"\\n\",\n        \"    print('test')\\n\",\n        \"    print('man ->', translate('man'))\\n\",\n        \"    print('mans ->', translate('mans'))\\n\",\n        \"    print('king ->', translate('king'))\\n\",\n        \"    print('black ->', translate('black'))\\n\",\n        \"    print('upp ->', translate('upp'))\"\n      ],\n      \"outputs\": [],\n      \"execution_count\": null\n    }\n  ],\n  \"metadata\": {\n    \"anaconda-cloud\": {},\n    \"kernelspec\": {\n      \"display_name\": \"Python 3\",\n      \"language\": \"python\",\n      \"name\": \"python3\"\n    },\n    \"language_info\": {\n      \"codemirror_mode\": {\n        \"name\": \"ipython\",\n        \"version\": 3\n      },\n      \"file_extension\": \".py\",\n      \"mimetype\": \"text/x-python\",\n      \"name\": \"python\",\n      \"nbconvert_exporter\": \"python\",\n      \"pygments_lexer\": \"ipython3\",\n      \"version\": \"3.6.1\"\n    }\n  },\n  \"nbformat\": 4,\n  \"nbformat_minor\": 4\n}"
  },
  {
    "path": "4-1.Seq2Seq/Seq2Seq.py",
    "content": "# %%\n# code by Tae Hwan Jung @graykode\nimport numpy as np\nimport torch\nimport torch.nn as nn\n\n# S: Symbol that shows starting of decoding input\n# E: Symbol that shows starting of decoding output\n# P: Symbol that will fill in blank sequence if current batch data size is short than time steps\n\ndef make_batch():\n    input_batch, output_batch, target_batch = [], [], []\n\n    for seq in seq_data:\n        for i in range(2):\n            seq[i] = seq[i] + 'P' * (n_step - len(seq[i]))\n\n        input = [num_dic[n] for n in seq[0]]\n        output = [num_dic[n] for n in ('S' + seq[1])]\n        target = [num_dic[n] for n in (seq[1] + 'E')]\n\n        input_batch.append(np.eye(n_class)[input])\n        output_batch.append(np.eye(n_class)[output])\n        target_batch.append(target) # not one-hot\n\n    # make tensor\n    return torch.FloatTensor(input_batch), torch.FloatTensor(output_batch), torch.LongTensor(target_batch)\n\n# make test batch\ndef make_testbatch(input_word):\n    input_batch, output_batch = [], []\n\n    input_w = input_word + 'P' * (n_step - len(input_word))\n    input = [num_dic[n] for n in input_w]\n    output = [num_dic[n] for n in 'S' + 'P' * n_step]\n\n    input_batch = np.eye(n_class)[input]\n    output_batch = np.eye(n_class)[output]\n\n    return torch.FloatTensor(input_batch).unsqueeze(0), torch.FloatTensor(output_batch).unsqueeze(0)\n\n# Model\nclass Seq2Seq(nn.Module):\n    def __init__(self):\n        super(Seq2Seq, self).__init__()\n\n        self.enc_cell = nn.RNN(input_size=n_class, hidden_size=n_hidden, dropout=0.5)\n        self.dec_cell = nn.RNN(input_size=n_class, hidden_size=n_hidden, dropout=0.5)\n        self.fc = nn.Linear(n_hidden, n_class)\n\n    def forward(self, enc_input, enc_hidden, dec_input):\n        enc_input = enc_input.transpose(0, 1) # enc_input: [max_len(=n_step, time step), batch_size, n_class]\n        dec_input = dec_input.transpose(0, 1) # dec_input: [max_len(=n_step, time step), batch_size, n_class]\n\n        # enc_states : [num_layers(=1) * num_directions(=1), batch_size, n_hidden]\n        _, enc_states = self.enc_cell(enc_input, enc_hidden)\n        # outputs : [max_len+1(=6), batch_size, num_directions(=1) * n_hidden(=128)]\n        outputs, _ = self.dec_cell(dec_input, enc_states)\n\n        model = self.fc(outputs) # model : [max_len+1(=6), batch_size, n_class]\n        return model\n\nif __name__ == '__main__':\n    n_step = 5\n    n_hidden = 128\n\n    char_arr = [c for c in 'SEPabcdefghijklmnopqrstuvwxyz']\n    num_dic = {n: i for i, n in enumerate(char_arr)}\n    seq_data = [['man', 'women'], ['black', 'white'], ['king', 'queen'], ['girl', 'boy'], ['up', 'down'], ['high', 'low']]\n\n    n_class = len(num_dic)\n    batch_size = len(seq_data)\n\n    model = Seq2Seq()\n\n    criterion = nn.CrossEntropyLoss()\n    optimizer = torch.optim.Adam(model.parameters(), lr=0.001)\n\n    input_batch, output_batch, target_batch = make_batch()\n\n    for epoch in range(5000):\n        # make hidden shape [num_layers * num_directions, batch_size, n_hidden]\n        hidden = torch.zeros(1, batch_size, n_hidden)\n\n        optimizer.zero_grad()\n        # input_batch : [batch_size, max_len(=n_step, time step), n_class]\n        # output_batch : [batch_size, max_len+1(=n_step, time step) (becase of 'S' or 'E'), n_class]\n        # target_batch : [batch_size, max_len+1(=n_step, time step)], not one-hot\n        output = model(input_batch, hidden, output_batch)\n        # output : [max_len+1, batch_size, n_class]\n        output = output.transpose(0, 1) # [batch_size, max_len+1(=6), n_class]\n        loss = 0\n        for i in range(0, len(target_batch)):\n            # output[i] : [max_len+1, n_class, target_batch[i] : max_len+1]\n            loss += criterion(output[i], target_batch[i])\n        if (epoch + 1) % 1000 == 0:\n            print('Epoch:', '%04d' % (epoch + 1), 'cost =', '{:.6f}'.format(loss))\n        loss.backward()\n        optimizer.step()\n\n    # Test\n    def translate(word):\n        input_batch, output_batch = make_testbatch(word)\n\n        # make hidden shape [num_layers * num_directions, batch_size, n_hidden]\n        hidden = torch.zeros(1, 1, n_hidden)\n        output = model(input_batch, hidden, output_batch)\n        # output : [max_len+1(=6), batch_size(=1), n_class]\n\n        predict = output.data.max(2, keepdim=True)[1] # select n_class dimension\n        decoded = [char_arr[i] for i in predict]\n        end = decoded.index('E')\n        translated = ''.join(decoded[:end])\n\n        return translated.replace('P', '')\n\n    print('test')\n    print('man ->', translate('man'))\n    print('mans ->', translate('mans'))\n    print('king ->', translate('king'))\n    print('black ->', translate('black'))\n    print('upp ->', translate('upp'))"
  },
  {
    "path": "4-2.Seq2Seq(Attention)/Seq2Seq(Attention).ipynb",
    "content": "{\n  \"cells\": [\n    {\n      \"cell_type\": \"code\",\n      \"metadata\": {},\n      \"source\": [\n        \"# code by Tae Hwan Jung @graykode\\n\",\n        \"# Reference : https://github.com/hunkim/PyTorchZeroToAll/blob/master/14_2_seq2seq_att.py\\n\",\n        \"import numpy as np\\n\",\n        \"import torch\\n\",\n        \"import torch.nn as nn\\n\",\n        \"import torch.nn.functional as F\\n\",\n        \"import matplotlib.pyplot as plt\\n\",\n        \"\\n\",\n        \"# S: Symbol that shows starting of decoding input\\n\",\n        \"# E: Symbol that shows starting of decoding output\\n\",\n        \"# P: Symbol that will fill in blank sequence if current batch data size is short than time steps\\n\",\n        \"\\n\",\n        \"def make_batch():\\n\",\n        \"    input_batch = [np.eye(n_class)[[word_dict[n] for n in sentences[0].split()]]]\\n\",\n        \"    output_batch = [np.eye(n_class)[[word_dict[n] for n in sentences[1].split()]]]\\n\",\n        \"    target_batch = [[word_dict[n] for n in sentences[2].split()]]\\n\",\n        \"\\n\",\n        \"    # make tensor\\n\",\n        \"    return torch.FloatTensor(input_batch), torch.FloatTensor(output_batch), torch.LongTensor(target_batch)\\n\",\n        \"\\n\",\n        \"class Attention(nn.Module):\\n\",\n        \"    def __init__(self):\\n\",\n        \"        super(Attention, self).__init__()\\n\",\n        \"        self.enc_cell = nn.RNN(input_size=n_class, hidden_size=n_hidden, dropout=0.5)\\n\",\n        \"        self.dec_cell = nn.RNN(input_size=n_class, hidden_size=n_hidden, dropout=0.5)\\n\",\n        \"\\n\",\n        \"        # Linear for attention\\n\",\n        \"        self.attn = nn.Linear(n_hidden, n_hidden)\\n\",\n        \"        self.out = nn.Linear(n_hidden * 2, n_class)\\n\",\n        \"\\n\",\n        \"    def forward(self, enc_inputs, hidden, dec_inputs):\\n\",\n        \"        enc_inputs = enc_inputs.transpose(0, 1)  # enc_inputs: [n_step(=n_step, time step), batch_size, n_class]\\n\",\n        \"        dec_inputs = dec_inputs.transpose(0, 1)  # dec_inputs: [n_step(=n_step, time step), batch_size, n_class]\\n\",\n        \"\\n\",\n        \"        # enc_outputs : [n_step, batch_size, num_directions(=1) * n_hidden], matrix F\\n\",\n        \"        # enc_hidden : [num_layers(=1) * num_directions(=1), batch_size, n_hidden]\\n\",\n        \"        enc_outputs, enc_hidden = self.enc_cell(enc_inputs, hidden)\\n\",\n        \"\\n\",\n        \"        trained_attn = []\\n\",\n        \"        hidden = enc_hidden\\n\",\n        \"        n_step = len(dec_inputs)\\n\",\n        \"        model = torch.empty([n_step, 1, n_class])\\n\",\n        \"\\n\",\n        \"        for i in range(n_step):  # each time step\\n\",\n        \"            # dec_output : [n_step(=1), batch_size(=1), num_directions(=1) * n_hidden]\\n\",\n        \"            # hidden : [num_layers(=1) * num_directions(=1), batch_size(=1), n_hidden]\\n\",\n        \"            dec_output, hidden = self.dec_cell(dec_inputs[i].unsqueeze(0), hidden)\\n\",\n        \"            attn_weights = self.get_att_weight(dec_output, enc_outputs)  # attn_weights : [1, 1, n_step]\\n\",\n        \"            trained_attn.append(attn_weights.squeeze().data.numpy())\\n\",\n        \"\\n\",\n        \"            # matrix-matrix product of matrices [1,1,n_step] x [1,n_step,n_hidden] = [1,1,n_hidden]\\n\",\n        \"            context = attn_weights.bmm(enc_outputs.transpose(0, 1))\\n\",\n        \"            dec_output = dec_output.squeeze(0)  # dec_output : [batch_size(=1), num_directions(=1) * n_hidden]\\n\",\n        \"            context = context.squeeze(1)  # [1, num_directions(=1) * n_hidden]\\n\",\n        \"            model[i] = self.out(torch.cat((dec_output, context), 1))\\n\",\n        \"\\n\",\n        \"        # make model shape [n_step, n_class]\\n\",\n        \"        return model.transpose(0, 1).squeeze(0), trained_attn\\n\",\n        \"\\n\",\n        \"    def get_att_weight(self, dec_output, enc_outputs):  # get attention weight one 'dec_output' with 'enc_outputs'\\n\",\n        \"        n_step = len(enc_outputs)\\n\",\n        \"        attn_scores = torch.zeros(n_step)  # attn_scores : [n_step]\\n\",\n        \"\\n\",\n        \"        for i in range(n_step):\\n\",\n        \"            attn_scores[i] = self.get_att_score(dec_output, enc_outputs[i])\\n\",\n        \"\\n\",\n        \"        # Normalize scores to weights in range 0 to 1\\n\",\n        \"        return F.softmax(attn_scores).view(1, 1, -1)\\n\",\n        \"\\n\",\n        \"    def get_att_score(self, dec_output, enc_output):  # enc_outputs [batch_size, num_directions(=1) * n_hidden]\\n\",\n        \"        score = self.attn(enc_output)  # score : [batch_size, n_hidden]\\n\",\n        \"        return torch.dot(dec_output.view(-1), score.view(-1))  # inner product make scalar value\\n\",\n        \"\\n\",\n        \"if __name__ == '__main__':\\n\",\n        \"    n_step = 5 # number of cells(= number of Step)\\n\",\n        \"    n_hidden = 128 # number of hidden units in one cell\\n\",\n        \"\\n\",\n        \"    sentences = ['ich mochte ein bier P', 'S i want a beer', 'i want a beer E']\\n\",\n        \"\\n\",\n        \"    word_list = \\\" \\\".join(sentences).split()\\n\",\n        \"    word_list = list(set(word_list))\\n\",\n        \"    word_dict = {w: i for i, w in enumerate(word_list)}\\n\",\n        \"    number_dict = {i: w for i, w in enumerate(word_list)}\\n\",\n        \"    n_class = len(word_dict)  # vocab list\\n\",\n        \"\\n\",\n        \"    # hidden : [num_layers(=1) * num_directions(=1), batch_size, n_hidden]\\n\",\n        \"    hidden = torch.zeros(1, 1, n_hidden)\\n\",\n        \"\\n\",\n        \"    model = Attention()\\n\",\n        \"    criterion = nn.CrossEntropyLoss()\\n\",\n        \"    optimizer = torch.optim.Adam(model.parameters(), lr=0.001)\\n\",\n        \"\\n\",\n        \"    input_batch, output_batch, target_batch = make_batch()\\n\",\n        \"\\n\",\n        \"    # Train\\n\",\n        \"    for epoch in range(2000):\\n\",\n        \"        optimizer.zero_grad()\\n\",\n        \"        output, _ = model(input_batch, hidden, output_batch)\\n\",\n        \"\\n\",\n        \"        loss = criterion(output, target_batch.squeeze(0))\\n\",\n        \"        if (epoch + 1) % 400 == 0:\\n\",\n        \"            print('Epoch:', '%04d' % (epoch + 1), 'cost =', '{:.6f}'.format(loss))\\n\",\n        \"\\n\",\n        \"        loss.backward()\\n\",\n        \"        optimizer.step()\\n\",\n        \"\\n\",\n        \"    # Test\\n\",\n        \"    test_batch = [np.eye(n_class)[[word_dict[n] for n in 'SPPPP']]]\\n\",\n        \"    test_batch = torch.FloatTensor(test_batch)\\n\",\n        \"    predict, trained_attn = model(input_batch, hidden, test_batch)\\n\",\n        \"    predict = predict.data.max(1, keepdim=True)[1]\\n\",\n        \"    print(sentences[0], '->', [number_dict[n.item()] for n in predict.squeeze()])\\n\",\n        \"\\n\",\n        \"    # Show Attention\\n\",\n        \"    fig = plt.figure(figsize=(5, 5))\\n\",\n        \"    ax = fig.add_subplot(1, 1, 1)\\n\",\n        \"    ax.matshow(trained_attn, cmap='viridis')\\n\",\n        \"    ax.set_xticklabels([''] + sentences[0].split(), fontdict={'fontsize': 14})\\n\",\n        \"    ax.set_yticklabels([''] + sentences[2].split(), fontdict={'fontsize': 14})\\n\",\n        \"    plt.show()\"\n      ],\n      \"outputs\": [],\n      \"execution_count\": null\n    }\n  ],\n  \"metadata\": {\n    \"anaconda-cloud\": {},\n    \"kernelspec\": {\n      \"display_name\": \"Python 3\",\n      \"language\": \"python\",\n      \"name\": \"python3\"\n    },\n    \"language_info\": {\n      \"codemirror_mode\": {\n        \"name\": \"ipython\",\n        \"version\": 3\n      },\n      \"file_extension\": \".py\",\n      \"mimetype\": \"text/x-python\",\n      \"name\": \"python\",\n      \"nbconvert_exporter\": \"python\",\n      \"pygments_lexer\": \"ipython3\",\n      \"version\": \"3.6.1\"\n    }\n  },\n  \"nbformat\": 4,\n  \"nbformat_minor\": 4\n}"
  },
  {
    "path": "4-2.Seq2Seq(Attention)/Seq2Seq(Attention).py",
    "content": "# %%\n# code by Tae Hwan Jung @graykode\n# Reference : https://github.com/hunkim/PyTorchZeroToAll/blob/master/14_2_seq2seq_att.py\nimport numpy as np\nimport torch\nimport torch.nn as nn\nimport torch.nn.functional as F\nimport matplotlib.pyplot as plt\n\n# S: Symbol that shows starting of decoding input\n# E: Symbol that shows starting of decoding output\n# P: Symbol that will fill in blank sequence if current batch data size is short than time steps\n\ndef make_batch():\n    input_batch = [np.eye(n_class)[[word_dict[n] for n in sentences[0].split()]]]\n    output_batch = [np.eye(n_class)[[word_dict[n] for n in sentences[1].split()]]]\n    target_batch = [[word_dict[n] for n in sentences[2].split()]]\n\n    # make tensor\n    return torch.FloatTensor(input_batch), torch.FloatTensor(output_batch), torch.LongTensor(target_batch)\n\nclass Attention(nn.Module):\n    def __init__(self):\n        super(Attention, self).__init__()\n        self.enc_cell = nn.RNN(input_size=n_class, hidden_size=n_hidden, dropout=0.5)\n        self.dec_cell = nn.RNN(input_size=n_class, hidden_size=n_hidden, dropout=0.5)\n\n        # Linear for attention\n        self.attn = nn.Linear(n_hidden, n_hidden)\n        self.out = nn.Linear(n_hidden * 2, n_class)\n\n    def forward(self, enc_inputs, hidden, dec_inputs):\n        enc_inputs = enc_inputs.transpose(0, 1)  # enc_inputs: [n_step(=n_step, time step), batch_size, n_class]\n        dec_inputs = dec_inputs.transpose(0, 1)  # dec_inputs: [n_step(=n_step, time step), batch_size, n_class]\n\n        # enc_outputs : [n_step, batch_size, num_directions(=1) * n_hidden], matrix F\n        # enc_hidden : [num_layers(=1) * num_directions(=1), batch_size, n_hidden]\n        enc_outputs, enc_hidden = self.enc_cell(enc_inputs, hidden)\n\n        trained_attn = []\n        hidden = enc_hidden\n        n_step = len(dec_inputs)\n        model = torch.empty([n_step, 1, n_class])\n\n        for i in range(n_step):  # each time step\n            # dec_output : [n_step(=1), batch_size(=1), num_directions(=1) * n_hidden]\n            # hidden : [num_layers(=1) * num_directions(=1), batch_size(=1), n_hidden]\n            dec_output, hidden = self.dec_cell(dec_inputs[i].unsqueeze(0), hidden)\n            attn_weights = self.get_att_weight(dec_output, enc_outputs)  # attn_weights : [1, 1, n_step]\n            trained_attn.append(attn_weights.squeeze().data.numpy())\n\n            # matrix-matrix product of matrices [1,1,n_step] x [1,n_step,n_hidden] = [1,1,n_hidden]\n            context = attn_weights.bmm(enc_outputs.transpose(0, 1))\n            dec_output = dec_output.squeeze(0)  # dec_output : [batch_size(=1), num_directions(=1) * n_hidden]\n            context = context.squeeze(1)  # [1, num_directions(=1) * n_hidden]\n            model[i] = self.out(torch.cat((dec_output, context), 1))\n\n        # make model shape [n_step, n_class]\n        return model.transpose(0, 1).squeeze(0), trained_attn\n\n    def get_att_weight(self, dec_output, enc_outputs):  # get attention weight one 'dec_output' with 'enc_outputs'\n        n_step = len(enc_outputs)\n        attn_scores = torch.zeros(n_step)  # attn_scores : [n_step]\n\n        for i in range(n_step):\n            attn_scores[i] = self.get_att_score(dec_output, enc_outputs[i])\n\n        # Normalize scores to weights in range 0 to 1\n        return F.softmax(attn_scores).view(1, 1, -1)\n\n    def get_att_score(self, dec_output, enc_output):  # enc_outputs [batch_size, num_directions(=1) * n_hidden]\n        score = self.attn(enc_output)  # score : [batch_size, n_hidden]\n        return torch.dot(dec_output.view(-1), score.view(-1))  # inner product make scalar value\n\nif __name__ == '__main__':\n    n_step = 5 # number of cells(= number of Step)\n    n_hidden = 128 # number of hidden units in one cell\n\n    sentences = ['ich mochte ein bier P', 'S i want a beer', 'i want a beer E']\n\n    word_list = \" \".join(sentences).split()\n    word_list = list(set(word_list))\n    word_dict = {w: i for i, w in enumerate(word_list)}\n    number_dict = {i: w for i, w in enumerate(word_list)}\n    n_class = len(word_dict)  # vocab list\n\n    # hidden : [num_layers(=1) * num_directions(=1), batch_size, n_hidden]\n    hidden = torch.zeros(1, 1, n_hidden)\n\n    model = Attention()\n    criterion = nn.CrossEntropyLoss()\n    optimizer = torch.optim.Adam(model.parameters(), lr=0.001)\n\n    input_batch, output_batch, target_batch = make_batch()\n\n    # Train\n    for epoch in range(2000):\n        optimizer.zero_grad()\n        output, _ = model(input_batch, hidden, output_batch)\n\n        loss = criterion(output, target_batch.squeeze(0))\n        if (epoch + 1) % 400 == 0:\n            print('Epoch:', '%04d' % (epoch + 1), 'cost =', '{:.6f}'.format(loss))\n\n        loss.backward()\n        optimizer.step()\n\n    # Test\n    test_batch = [np.eye(n_class)[[word_dict[n] for n in 'SPPPP']]]\n    test_batch = torch.FloatTensor(test_batch)\n    predict, trained_attn = model(input_batch, hidden, test_batch)\n    predict = predict.data.max(1, keepdim=True)[1]\n    print(sentences[0], '->', [number_dict[n.item()] for n in predict.squeeze()])\n\n    # Show Attention\n    fig = plt.figure(figsize=(5, 5))\n    ax = fig.add_subplot(1, 1, 1)\n    ax.matshow(trained_attn, cmap='viridis')\n    ax.set_xticklabels([''] + sentences[0].split(), fontdict={'fontsize': 14})\n    ax.set_yticklabels([''] + sentences[2].split(), fontdict={'fontsize': 14})\n    plt.show()"
  },
  {
    "path": "4-3.Bi-LSTM(Attention)/Bi-LSTM(Attention).ipynb",
    "content": "{\n  \"cells\": [\n    {\n      \"cell_type\": \"code\",\n      \"metadata\": {},\n      \"source\": [\n        \"# code by Tae Hwan Jung(Jeff Jung) @graykode\\n\",\n        \"# Reference : https://github.com/prakashpandey9/Text-Classification-Pytorch/blob/master/models/LSTM_Attn.py\\n\",\n        \"import numpy as np\\n\",\n        \"import torch\\n\",\n        \"import torch.nn as nn\\n\",\n        \"import torch.optim as optim\\n\",\n        \"import torch.nn.functional as F\\n\",\n        \"import matplotlib.pyplot as plt\\n\",\n        \"\\n\",\n        \"class BiLSTM_Attention(nn.Module):\\n\",\n        \"    def __init__(self):\\n\",\n        \"        super(BiLSTM_Attention, self).__init__()\\n\",\n        \"\\n\",\n        \"        self.embedding = nn.Embedding(vocab_size, embedding_dim)\\n\",\n        \"        self.lstm = nn.LSTM(embedding_dim, n_hidden, bidirectional=True)\\n\",\n        \"        self.out = nn.Linear(n_hidden * 2, num_classes)\\n\",\n        \"\\n\",\n        \"    # lstm_output : [batch_size, n_step, n_hidden * num_directions(=2)], F matrix\\n\",\n        \"    def attention_net(self, lstm_output, final_state):\\n\",\n        \"        hidden = final_state.view(-1, n_hidden * 2, 1)   # hidden : [batch_size, n_hidden * num_directions(=2), 1(=n_layer)]\\n\",\n        \"        attn_weights = torch.bmm(lstm_output, hidden).squeeze(2) # attn_weights : [batch_size, n_step]\\n\",\n        \"        soft_attn_weights = F.softmax(attn_weights, 1)\\n\",\n        \"        # [batch_size, n_hidden * num_directions(=2), n_step] * [batch_size, n_step, 1] = [batch_size, n_hidden * num_directions(=2), 1]\\n\",\n        \"        context = torch.bmm(lstm_output.transpose(1, 2), soft_attn_weights.unsqueeze(2)).squeeze(2)\\n\",\n        \"        return context, soft_attn_weights.data.numpy() # context : [batch_size, n_hidden * num_directions(=2)]\\n\",\n        \"\\n\",\n        \"    def forward(self, X):\\n\",\n        \"        input = self.embedding(X) # input : [batch_size, len_seq, embedding_dim]\\n\",\n        \"        input = input.permute(1, 0, 2) # input : [len_seq, batch_size, embedding_dim]\\n\",\n        \"\\n\",\n        \"        hidden_state = torch.zeros(1*2, len(X), n_hidden) # [num_layers(=1) * num_directions(=2), batch_size, n_hidden]\\n\",\n        \"        cell_state = torch.zeros(1*2, len(X), n_hidden) # [num_layers(=1) * num_directions(=2), batch_size, n_hidden]\\n\",\n        \"\\n\",\n        \"        # final_hidden_state, final_cell_state : [num_layers(=1) * num_directions(=2), batch_size, n_hidden]\\n\",\n        \"        output, (final_hidden_state, final_cell_state) = self.lstm(input, (hidden_state, cell_state))\\n\",\n        \"        output = output.permute(1, 0, 2) # output : [batch_size, len_seq, n_hidden]\\n\",\n        \"        attn_output, attention = self.attention_net(output, final_hidden_state)\\n\",\n        \"        return self.out(attn_output), attention # model : [batch_size, num_classes], attention : [batch_size, n_step]\\n\",\n        \"\\n\",\n        \"if __name__ == '__main__':\\n\",\n        \"    embedding_dim = 2 # embedding size\\n\",\n        \"    n_hidden = 5  # number of hidden units in one cell\\n\",\n        \"    num_classes = 2  # 0 or 1\\n\",\n        \"\\n\",\n        \"    # 3 words sentences (=sequence_length is 3)\\n\",\n        \"    sentences = [\\\"i love you\\\", \\\"he loves me\\\", \\\"she likes baseball\\\", \\\"i hate you\\\", \\\"sorry for that\\\", \\\"this is awful\\\"]\\n\",\n        \"    labels = [1, 1, 1, 0, 0, 0]  # 1 is good, 0 is not good.\\n\",\n        \"\\n\",\n        \"    word_list = \\\" \\\".join(sentences).split()\\n\",\n        \"    word_list = list(set(word_list))\\n\",\n        \"    word_dict = {w: i for i, w in enumerate(word_list)}\\n\",\n        \"    vocab_size = len(word_dict)\\n\",\n        \"\\n\",\n        \"    model = BiLSTM_Attention()\\n\",\n        \"\\n\",\n        \"    criterion = nn.CrossEntropyLoss()\\n\",\n        \"    optimizer = optim.Adam(model.parameters(), lr=0.001)\\n\",\n        \"\\n\",\n        \"    inputs = torch.LongTensor([np.asarray([word_dict[n] for n in sen.split()]) for sen in sentences])\\n\",\n        \"    targets = torch.LongTensor([out for out in labels])  # To using Torch Softmax Loss function\\n\",\n        \"\\n\",\n        \"    # Training\\n\",\n        \"    for epoch in range(5000):\\n\",\n        \"        optimizer.zero_grad()\\n\",\n        \"        output, attention = model(inputs)\\n\",\n        \"        loss = criterion(output, targets)\\n\",\n        \"        if (epoch + 1) % 1000 == 0:\\n\",\n        \"            print('Epoch:', '%04d' % (epoch + 1), 'cost =', '{:.6f}'.format(loss))\\n\",\n        \"\\n\",\n        \"        loss.backward()\\n\",\n        \"        optimizer.step()\\n\",\n        \"\\n\",\n        \"    # Test\\n\",\n        \"    test_text = 'sorry hate you'\\n\",\n        \"    tests = [np.asarray([word_dict[n] for n in test_text.split()])]\\n\",\n        \"    test_batch = torch.LongTensor(tests)\\n\",\n        \"\\n\",\n        \"    # Predict\\n\",\n        \"    predict, _ = model(test_batch)\\n\",\n        \"    predict = predict.data.max(1, keepdim=True)[1]\\n\",\n        \"    if predict[0][0] == 0:\\n\",\n        \"        print(test_text,\\\"is Bad Mean...\\\")\\n\",\n        \"    else:\\n\",\n        \"        print(test_text,\\\"is Good Mean!!\\\")\\n\",\n        \"\\n\",\n        \"    fig = plt.figure(figsize=(6, 3)) # [batch_size, n_step]\\n\",\n        \"    ax = fig.add_subplot(1, 1, 1)\\n\",\n        \"    ax.matshow(attention, cmap='viridis')\\n\",\n        \"    ax.set_xticklabels(['']+['first_word', 'second_word', 'third_word'], fontdict={'fontsize': 14}, rotation=90)\\n\",\n        \"    ax.set_yticklabels(['']+['batch_1', 'batch_2', 'batch_3', 'batch_4', 'batch_5', 'batch_6'], fontdict={'fontsize': 14})\\n\",\n        \"    plt.show()\"\n      ],\n      \"outputs\": [],\n      \"execution_count\": null\n    }\n  ],\n  \"metadata\": {\n    \"anaconda-cloud\": {},\n    \"kernelspec\": {\n      \"display_name\": \"Python 3\",\n      \"language\": \"python\",\n      \"name\": \"python3\"\n    },\n    \"language_info\": {\n      \"codemirror_mode\": {\n        \"name\": \"ipython\",\n        \"version\": 3\n      },\n      \"file_extension\": \".py\",\n      \"mimetype\": \"text/x-python\",\n      \"name\": \"python\",\n      \"nbconvert_exporter\": \"python\",\n      \"pygments_lexer\": \"ipython3\",\n      \"version\": \"3.6.1\"\n    }\n  },\n  \"nbformat\": 4,\n  \"nbformat_minor\": 4\n}"
  },
  {
    "path": "4-3.Bi-LSTM(Attention)/Bi-LSTM(Attention).py",
    "content": "# %%\n# code by Tae Hwan Jung(Jeff Jung) @graykode\n# Reference : https://github.com/prakashpandey9/Text-Classification-Pytorch/blob/master/models/LSTM_Attn.py\nimport numpy as np\nimport torch\nimport torch.nn as nn\nimport torch.optim as optim\nimport torch.nn.functional as F\nimport matplotlib.pyplot as plt\n\nclass BiLSTM_Attention(nn.Module):\n    def __init__(self):\n        super(BiLSTM_Attention, self).__init__()\n\n        self.embedding = nn.Embedding(vocab_size, embedding_dim)\n        self.lstm = nn.LSTM(embedding_dim, n_hidden, bidirectional=True)\n        self.out = nn.Linear(n_hidden * 2, num_classes)\n\n    # lstm_output : [batch_size, n_step, n_hidden * num_directions(=2)], F matrix\n    def attention_net(self, lstm_output, final_state):\n        hidden = final_state.view(-1, n_hidden * 2, 1)   # hidden : [batch_size, n_hidden * num_directions(=2), 1(=n_layer)]\n        attn_weights = torch.bmm(lstm_output, hidden).squeeze(2) # attn_weights : [batch_size, n_step]\n        soft_attn_weights = F.softmax(attn_weights, 1)\n        # [batch_size, n_hidden * num_directions(=2), n_step] * [batch_size, n_step, 1] = [batch_size, n_hidden * num_directions(=2), 1]\n        context = torch.bmm(lstm_output.transpose(1, 2), soft_attn_weights.unsqueeze(2)).squeeze(2)\n        return context, soft_attn_weights.data.numpy() # context : [batch_size, n_hidden * num_directions(=2)]\n\n    def forward(self, X):\n        input = self.embedding(X) # input : [batch_size, len_seq, embedding_dim]\n        input = input.permute(1, 0, 2) # input : [len_seq, batch_size, embedding_dim]\n\n        hidden_state = torch.zeros(1*2, len(X), n_hidden) # [num_layers(=1) * num_directions(=2), batch_size, n_hidden]\n        cell_state = torch.zeros(1*2, len(X), n_hidden) # [num_layers(=1) * num_directions(=2), batch_size, n_hidden]\n\n        # final_hidden_state, final_cell_state : [num_layers(=1) * num_directions(=2), batch_size, n_hidden]\n        output, (final_hidden_state, final_cell_state) = self.lstm(input, (hidden_state, cell_state))\n        output = output.permute(1, 0, 2) # output : [batch_size, len_seq, n_hidden]\n        attn_output, attention = self.attention_net(output, final_hidden_state)\n        return self.out(attn_output), attention # model : [batch_size, num_classes], attention : [batch_size, n_step]\n\nif __name__ == '__main__':\n    embedding_dim = 2 # embedding size\n    n_hidden = 5  # number of hidden units in one cell\n    num_classes = 2  # 0 or 1\n\n    # 3 words sentences (=sequence_length is 3)\n    sentences = [\"i love you\", \"he loves me\", \"she likes baseball\", \"i hate you\", \"sorry for that\", \"this is awful\"]\n    labels = [1, 1, 1, 0, 0, 0]  # 1 is good, 0 is not good.\n\n    word_list = \" \".join(sentences).split()\n    word_list = list(set(word_list))\n    word_dict = {w: i for i, w in enumerate(word_list)}\n    vocab_size = len(word_dict)\n\n    model = BiLSTM_Attention()\n\n    criterion = nn.CrossEntropyLoss()\n    optimizer = optim.Adam(model.parameters(), lr=0.001)\n\n    inputs = torch.LongTensor([np.asarray([word_dict[n] for n in sen.split()]) for sen in sentences])\n    targets = torch.LongTensor([out for out in labels])  # To using Torch Softmax Loss function\n\n    # Training\n    for epoch in range(5000):\n        optimizer.zero_grad()\n        output, attention = model(inputs)\n        loss = criterion(output, targets)\n        if (epoch + 1) % 1000 == 0:\n            print('Epoch:', '%04d' % (epoch + 1), 'cost =', '{:.6f}'.format(loss))\n\n        loss.backward()\n        optimizer.step()\n\n    # Test\n    test_text = 'sorry hate you'\n    tests = [np.asarray([word_dict[n] for n in test_text.split()])]\n    test_batch = torch.LongTensor(tests)\n\n    # Predict\n    predict, _ = model(test_batch)\n    predict = predict.data.max(1, keepdim=True)[1]\n    if predict[0][0] == 0:\n        print(test_text,\"is Bad Mean...\")\n    else:\n        print(test_text,\"is Good Mean!!\")\n\n    fig = plt.figure(figsize=(6, 3)) # [batch_size, n_step]\n    ax = fig.add_subplot(1, 1, 1)\n    ax.matshow(attention, cmap='viridis')\n    ax.set_xticklabels(['']+['first_word', 'second_word', 'third_word'], fontdict={'fontsize': 14}, rotation=90)\n    ax.set_yticklabels(['']+['batch_1', 'batch_2', 'batch_3', 'batch_4', 'batch_5', 'batch_6'], fontdict={'fontsize': 14})\n    plt.show()"
  },
  {
    "path": "5-1.Transformer/Transformer(Greedy_decoder).ipynb",
    "content": "{\n  \"cells\": [\n    {\n      \"cell_type\": \"code\",\n      \"metadata\": {},\n      \"source\": [\n        \"# code by Tae Hwan Jung(Jeff Jung) @graykode, Derek Miller @dmmiller612\\n\",\n        \"# Reference : https://github.com/jadore801120/attention-is-all-you-need-pytorch\\n\",\n        \"#           https://github.com/JayParks/transformer\\n\",\n        \"import numpy as np\\n\",\n        \"import torch\\n\",\n        \"import torch.nn as nn\\n\",\n        \"import torch.optim as optim\\n\",\n        \"import matplotlib.pyplot as plt\\n\",\n        \"\\n\",\n        \"# S: Symbol that shows starting of decoding input\\n\",\n        \"# E: Symbol that shows starting of decoding output\\n\",\n        \"# P: Symbol that will fill in blank sequence if current batch data size is short than time steps\\n\",\n        \"\\n\",\n        \"def make_batch():\\n\",\n        \"    input_batch = [[src_vocab[n] for n in sentences[0].split()]]\\n\",\n        \"    output_batch = [[tgt_vocab[n] for n in sentences[1].split()]]\\n\",\n        \"    target_batch = [[tgt_vocab[n] for n in sentences[2].split()]]\\n\",\n        \"    return torch.LongTensor(input_batch), torch.LongTensor(output_batch), torch.LongTensor(target_batch)\\n\",\n        \"\\n\",\n        \"def get_sinusoid_encoding_table(n_position, d_model):\\n\",\n        \"    def cal_angle(position, hid_idx):\\n\",\n        \"        return position / np.power(10000, 2 * (hid_idx // 2) / d_model)\\n\",\n        \"    def get_posi_angle_vec(position):\\n\",\n        \"        return [cal_angle(position, hid_j) for hid_j in range(d_model)]\\n\",\n        \"\\n\",\n        \"    sinusoid_table = np.array([get_posi_angle_vec(pos_i) for pos_i in range(n_position)])\\n\",\n        \"    sinusoid_table[:, 0::2] = np.sin(sinusoid_table[:, 0::2])  # dim 2i\\n\",\n        \"    sinusoid_table[:, 1::2] = np.cos(sinusoid_table[:, 1::2])  # dim 2i+1\\n\",\n        \"    return torch.FloatTensor(sinusoid_table)\\n\",\n        \"\\n\",\n        \"def get_attn_pad_mask(seq_q, seq_k):\\n\",\n        \"    # print(seq_q)\\n\",\n        \"    batch_size, len_q = seq_q.size()\\n\",\n        \"    batch_size, len_k = seq_k.size()\\n\",\n        \"    # eq(zero) is PAD token\\n\",\n        \"    pad_attn_mask = seq_k.data.eq(0).unsqueeze(1)  # batch_size x 1 x len_k(=len_q), one is masking\\n\",\n        \"    return pad_attn_mask.expand(batch_size, len_q, len_k)  # batch_size x len_q x len_k\\n\",\n        \"\\n\",\n        \"def get_attn_subsequent_mask(seq):\\n\",\n        \"    attn_shape = [seq.size(0), seq.size(1), seq.size(1)]\\n\",\n        \"    subsequent_mask = np.triu(np.ones(attn_shape), k=1)\\n\",\n        \"    subsequent_mask = torch.from_numpy(subsequent_mask).byte()\\n\",\n        \"    return subsequent_mask\\n\",\n        \"\\n\",\n        \"class ScaledDotProductAttention(nn.Module):\\n\",\n        \"    def __init__(self):\\n\",\n        \"        super(ScaledDotProductAttention, self).__init__()\\n\",\n        \"\\n\",\n        \"    def forward(self, Q, K, V, attn_mask):\\n\",\n        \"        scores = torch.matmul(Q, K.transpose(-1, -2)) / np.sqrt(d_k) # scores : [batch_size x n_heads x len_q(=len_k) x len_k(=len_q)]\\n\",\n        \"        scores.masked_fill_(attn_mask, -1e9) # Fills elements of self tensor with value where mask is one.\\n\",\n        \"        attn = nn.Softmax(dim=-1)(scores)\\n\",\n        \"        context = torch.matmul(attn, V)\\n\",\n        \"        return context, attn\\n\",\n        \"\\n\",\n        \"class MultiHeadAttention(nn.Module):\\n\",\n        \"    def __init__(self):\\n\",\n        \"        super(MultiHeadAttention, self).__init__()\\n\",\n        \"        self.W_Q = nn.Linear(d_model, d_k * n_heads)\\n\",\n        \"        self.W_K = nn.Linear(d_model, d_k * n_heads)\\n\",\n        \"        self.W_V = nn.Linear(d_model, d_v * n_heads)\\n\",\n        \"        self.linear = nn.Linear(n_heads * d_v, d_model)\\n\",\n        \"        self.layer_norm = nn.LayerNorm(d_model)\\n\",\n        \"\\n\",\n        \"    def forward(self, Q, K, V, attn_mask):\\n\",\n        \"        # q: [batch_size x len_q x d_model], k: [batch_size x len_k x d_model], v: [batch_size x len_k x d_model]\\n\",\n        \"        residual, batch_size = Q, Q.size(0)\\n\",\n        \"        # (B, S, D) -proj-> (B, S, D) -split-> (B, S, H, W) -trans-> (B, H, S, W)\\n\",\n        \"        q_s = self.W_Q(Q).view(batch_size, -1, n_heads, d_k).transpose(1,2)  # q_s: [batch_size x n_heads x len_q x d_k]\\n\",\n        \"        k_s = self.W_K(K).view(batch_size, -1, n_heads, d_k).transpose(1,2)  # k_s: [batch_size x n_heads x len_k x d_k]\\n\",\n        \"        v_s = self.W_V(V).view(batch_size, -1, n_heads, d_v).transpose(1,2)  # v_s: [batch_size x n_heads x len_k x d_v]\\n\",\n        \"\\n\",\n        \"        attn_mask = attn_mask.unsqueeze(1).repeat(1, n_heads, 1, 1) # attn_mask : [batch_size x n_heads x len_q x len_k]\\n\",\n        \"\\n\",\n        \"        # context: [batch_size x n_heads x len_q x d_v], attn: [batch_size x n_heads x len_q(=len_k) x len_k(=len_q)]\\n\",\n        \"        context, attn = ScaledDotProductAttention()(q_s, k_s, v_s, attn_mask)\\n\",\n        \"        context = context.transpose(1, 2).contiguous().view(batch_size, -1, n_heads * d_v) # context: [batch_size x len_q x n_heads * d_v]\\n\",\n        \"        output = self.linear(context)\\n\",\n        \"        return self.layer_norm(output + residual), attn # output: [batch_size x len_q x d_model]\\n\",\n        \"\\n\",\n        \"class PoswiseFeedForwardNet(nn.Module):\\n\",\n        \"    def __init__(self):\\n\",\n        \"        super(PoswiseFeedForwardNet, self).__init__()\\n\",\n        \"        self.conv1 = nn.Conv1d(in_channels=d_model, out_channels=d_ff, kernel_size=1)\\n\",\n        \"        self.conv2 = nn.Conv1d(in_channels=d_ff, out_channels=d_model, kernel_size=1)\\n\",\n        \"        self.layer_norm = nn.LayerNorm(d_model)\\n\",\n        \"\\n\",\n        \"    def forward(self, inputs):\\n\",\n        \"        residual = inputs # inputs : [batch_size, len_q, d_model]\\n\",\n        \"        output = nn.ReLU()(self.conv1(inputs.transpose(1, 2)))\\n\",\n        \"        output = self.conv2(output).transpose(1, 2)\\n\",\n        \"        return self.layer_norm(output + residual)\\n\",\n        \"\\n\",\n        \"class EncoderLayer(nn.Module):\\n\",\n        \"    def __init__(self):\\n\",\n        \"        super(EncoderLayer, self).__init__()\\n\",\n        \"        self.enc_self_attn = MultiHeadAttention()\\n\",\n        \"        self.pos_ffn = PoswiseFeedForwardNet()\\n\",\n        \"\\n\",\n        \"    def forward(self, enc_inputs, enc_self_attn_mask):\\n\",\n        \"        enc_outputs, attn = self.enc_self_attn(enc_inputs, enc_inputs, enc_inputs, enc_self_attn_mask) # enc_inputs to same Q,K,V\\n\",\n        \"        enc_outputs = self.pos_ffn(enc_outputs) # enc_outputs: [batch_size x len_q x d_model]\\n\",\n        \"        return enc_outputs, attn\\n\",\n        \"\\n\",\n        \"class DecoderLayer(nn.Module):\\n\",\n        \"    def __init__(self):\\n\",\n        \"        super(DecoderLayer, self).__init__()\\n\",\n        \"        self.dec_self_attn = MultiHeadAttention()\\n\",\n        \"        self.dec_enc_attn = MultiHeadAttention()\\n\",\n        \"        self.pos_ffn = PoswiseFeedForwardNet()\\n\",\n        \"\\n\",\n        \"    def forward(self, dec_inputs, enc_outputs, dec_self_attn_mask, dec_enc_attn_mask):\\n\",\n        \"        dec_outputs, dec_self_attn = self.dec_self_attn(dec_inputs, dec_inputs, dec_inputs, dec_self_attn_mask)\\n\",\n        \"        dec_outputs, dec_enc_attn = self.dec_enc_attn(dec_outputs, enc_outputs, enc_outputs, dec_enc_attn_mask)\\n\",\n        \"        dec_outputs = self.pos_ffn(dec_outputs)\\n\",\n        \"        return dec_outputs, dec_self_attn, dec_enc_attn\\n\",\n        \"\\n\",\n        \"class Encoder(nn.Module):\\n\",\n        \"    def __init__(self):\\n\",\n        \"        super(Encoder, self).__init__()\\n\",\n        \"        self.src_emb = nn.Embedding(src_vocab_size, d_model)\\n\",\n        \"        self.pos_emb = nn.Embedding.from_pretrained(get_sinusoid_encoding_table(src_len+1, d_model),freeze=True)\\n\",\n        \"        self.layers = nn.ModuleList([EncoderLayer() for _ in range(n_layers)])\\n\",\n        \"\\n\",\n        \"    def forward(self, enc_inputs): # enc_inputs : [batch_size x source_len]\\n\",\n        \"        enc_outputs = self.src_emb(enc_inputs) + self.pos_emb(torch.LongTensor([[1,2,3,4,0]]))\\n\",\n        \"        enc_self_attn_mask = get_attn_pad_mask(enc_inputs, enc_inputs)\\n\",\n        \"        enc_self_attns = []\\n\",\n        \"        for layer in self.layers:\\n\",\n        \"            enc_outputs, enc_self_attn = layer(enc_outputs, enc_self_attn_mask)\\n\",\n        \"            enc_self_attns.append(enc_self_attn)\\n\",\n        \"        return enc_outputs, enc_self_attns\\n\",\n        \"\\n\",\n        \"class Decoder(nn.Module):\\n\",\n        \"    def __init__(self):\\n\",\n        \"        super(Decoder, self).__init__()\\n\",\n        \"        self.tgt_emb = nn.Embedding(tgt_vocab_size, d_model)\\n\",\n        \"        self.pos_emb = nn.Embedding.from_pretrained(get_sinusoid_encoding_table(tgt_len+1, d_model),freeze=True)\\n\",\n        \"        self.layers = nn.ModuleList([DecoderLayer() for _ in range(n_layers)])\\n\",\n        \"\\n\",\n        \"    def forward(self, dec_inputs, enc_inputs, enc_outputs): # dec_inputs : [batch_size x target_len]\\n\",\n        \"        dec_outputs = self.tgt_emb(dec_inputs) + self.pos_emb(torch.LongTensor([[5,1,2,3,4]]))\\n\",\n        \"        dec_self_attn_pad_mask = get_attn_pad_mask(dec_inputs, dec_inputs)\\n\",\n        \"        dec_self_attn_subsequent_mask = get_attn_subsequent_mask(dec_inputs)\\n\",\n        \"        dec_self_attn_mask = torch.gt((dec_self_attn_pad_mask + dec_self_attn_subsequent_mask), 0)\\n\",\n        \"\\n\",\n        \"        dec_enc_attn_mask = get_attn_pad_mask(dec_inputs, enc_inputs)\\n\",\n        \"\\n\",\n        \"        dec_self_attns, dec_enc_attns = [], []\\n\",\n        \"        for layer in self.layers:\\n\",\n        \"            dec_outputs, dec_self_attn, dec_enc_attn = layer(dec_outputs, enc_outputs, dec_self_attn_mask, dec_enc_attn_mask)\\n\",\n        \"            dec_self_attns.append(dec_self_attn)\\n\",\n        \"            dec_enc_attns.append(dec_enc_attn)\\n\",\n        \"        return dec_outputs, dec_self_attns, dec_enc_attns\\n\",\n        \"\\n\",\n        \"class Transformer(nn.Module):\\n\",\n        \"    def __init__(self):\\n\",\n        \"        super(Transformer, self).__init__()\\n\",\n        \"        self.encoder = Encoder()\\n\",\n        \"        self.decoder = Decoder()\\n\",\n        \"        self.projection = nn.Linear(d_model, tgt_vocab_size, bias=False)\\n\",\n        \"    def forward(self, enc_inputs, dec_inputs):\\n\",\n        \"        enc_outputs, enc_self_attns = self.encoder(enc_inputs)\\n\",\n        \"        dec_outputs, dec_self_attns, dec_enc_attns = self.decoder(dec_inputs, enc_inputs, enc_outputs)\\n\",\n        \"        dec_logits = self.projection(dec_outputs) # dec_logits : [batch_size x src_vocab_size x tgt_vocab_size]\\n\",\n        \"        return dec_logits.view(-1, dec_logits.size(-1)), enc_self_attns, dec_self_attns, dec_enc_attns\\n\",\n        \"\\n\",\n        \"def greedy_decoder(model, enc_input, start_symbol):\\n\",\n        \"    \\\"\\\"\\\"\\n\",\n        \"    For simplicity, a Greedy Decoder is Beam search when K=1. This is necessary for inference as we don't know the\\n\",\n        \"    target sequence input. Therefore we try to generate the target input word by word, then feed it into the transformer.\\n\",\n        \"    Starting Reference: http://nlp.seas.harvard.edu/2018/04/03/attention.html#greedy-decoding\\n\",\n        \"    :param model: Transformer Model\\n\",\n        \"    :param enc_input: The encoder input\\n\",\n        \"    :param start_symbol: The start symbol. In this example it is 'S' which corresponds to index 4\\n\",\n        \"    :return: The target input\\n\",\n        \"    \\\"\\\"\\\"\\n\",\n        \"    enc_outputs, enc_self_attns = model.encoder(enc_input)\\n\",\n        \"    dec_input = torch.zeros(1, 5).type_as(enc_input.data)\\n\",\n        \"    next_symbol = start_symbol\\n\",\n        \"    for i in range(0, 5):\\n\",\n        \"        dec_input[0][i] = next_symbol\\n\",\n        \"        dec_outputs, _, _ = model.decoder(dec_input, enc_input, enc_outputs)\\n\",\n        \"        projected = model.projection(dec_outputs)\\n\",\n        \"        prob = projected.squeeze(0).max(dim=-1, keepdim=False)[1]\\n\",\n        \"        next_word = prob.data[i]\\n\",\n        \"        next_symbol = next_word.item()\\n\",\n        \"    return dec_input\\n\",\n        \"\\n\",\n        \"def showgraph(attn):\\n\",\n        \"    attn = attn[-1].squeeze(0)[0]\\n\",\n        \"    attn = attn.squeeze(0).data.numpy()\\n\",\n        \"    fig = plt.figure(figsize=(n_heads, n_heads)) # [n_heads, n_heads]\\n\",\n        \"    ax = fig.add_subplot(1, 1, 1)\\n\",\n        \"    ax.matshow(attn, cmap='viridis')\\n\",\n        \"    ax.set_xticklabels(['']+sentences[0].split(), fontdict={'fontsize': 14}, rotation=90)\\n\",\n        \"    ax.set_yticklabels(['']+sentences[2].split(), fontdict={'fontsize': 14})\\n\",\n        \"    plt.show()\\n\",\n        \"\\n\",\n        \"if __name__ == '__main__':\\n\",\n        \"    sentences = ['ich mochte ein bier P', 'S i want a beer', 'i want a beer E']\\n\",\n        \"    # Transformer Parameters\\n\",\n        \"    # Padding Should be Zero index\\n\",\n        \"    src_vocab = {'P': 0, 'ich': 1, 'mochte': 2, 'ein': 3, 'bier': 4}\\n\",\n        \"    src_vocab_size = len(src_vocab)\\n\",\n        \"\\n\",\n        \"    tgt_vocab = {'P': 0, 'i': 1, 'want': 2, 'a': 3, 'beer': 4, 'S': 5, 'E': 6}\\n\",\n        \"    number_dict = {i: w for i, w in enumerate(tgt_vocab)}\\n\",\n        \"    tgt_vocab_size = len(tgt_vocab)\\n\",\n        \"\\n\",\n        \"    src_len = 5 # length of source\\n\",\n        \"    tgt_len = 5 # length of target\\n\",\n        \"\\n\",\n        \"    d_model = 512  # Embedding Size\\n\",\n        \"    d_ff = 2048  # FeedForward dimension\\n\",\n        \"    d_k = d_v = 64  # dimension of K(=Q), V\\n\",\n        \"    n_layers = 6  # number of Encoder of Decoder Layer\\n\",\n        \"    n_heads = 8  # number of heads in Multi-Head Attention\\n\",\n        \"\\n\",\n        \"    model = Transformer()\\n\",\n        \"\\n\",\n        \"    criterion = nn.CrossEntropyLoss()\\n\",\n        \"    optimizer = optim.Adam(model.parameters(), lr=0.001)\\n\",\n        \"\\n\",\n        \"    enc_inputs, dec_inputs, target_batch = make_batch()\\n\",\n        \"\\n\",\n        \"    for epoch in range(20):\\n\",\n        \"        optimizer.zero_grad()\\n\",\n        \"        outputs, enc_self_attns, dec_self_attns, dec_enc_attns = model(enc_inputs, dec_inputs)\\n\",\n        \"        loss = criterion(outputs, target_batch.contiguous().view(-1))\\n\",\n        \"        print('Epoch:', '%04d' % (epoch + 1), 'cost =', '{:.6f}'.format(loss))\\n\",\n        \"        loss.backward()\\n\",\n        \"        optimizer.step()\\n\",\n        \"\\n\",\n        \"    # Test\\n\",\n        \"    greedy_dec_input = greedy_decoder(model, enc_inputs, start_symbol=tgt_vocab[\\\"S\\\"])\\n\",\n        \"    predict, _, _, _ = model(enc_inputs, greedy_dec_input)\\n\",\n        \"    predict = predict.data.max(1, keepdim=True)[1]\\n\",\n        \"    print(sentences[0], '->', [number_dict[n.item()] for n in predict.squeeze()])\\n\",\n        \"\\n\",\n        \"    print('first head of last state enc_self_attns')\\n\",\n        \"    showgraph(enc_self_attns)\\n\",\n        \"\\n\",\n        \"    print('first head of last state dec_self_attns')\\n\",\n        \"    showgraph(dec_self_attns)\\n\",\n        \"\\n\",\n        \"    print('first head of last state dec_enc_attns')\\n\",\n        \"    showgraph(dec_enc_attns)\"\n      ],\n      \"outputs\": [],\n      \"execution_count\": null\n    }\n  ],\n  \"metadata\": {\n    \"anaconda-cloud\": {},\n    \"kernelspec\": {\n      \"display_name\": \"Python 3\",\n      \"language\": \"python\",\n      \"name\": \"python3\"\n    },\n    \"language_info\": {\n      \"codemirror_mode\": {\n        \"name\": \"ipython\",\n        \"version\": 3\n      },\n      \"file_extension\": \".py\",\n      \"mimetype\": \"text/x-python\",\n      \"name\": \"python\",\n      \"nbconvert_exporter\": \"python\",\n      \"pygments_lexer\": \"ipython3\",\n      \"version\": \"3.6.1\"\n    }\n  },\n  \"nbformat\": 4,\n  \"nbformat_minor\": 4\n}"
  },
  {
    "path": "5-1.Transformer/Transformer(Greedy_decoder).py",
    "content": "# %%\n# code by Tae Hwan Jung(Jeff Jung) @graykode, Derek Miller @dmmiller612\n# Reference : https://github.com/jadore801120/attention-is-all-you-need-pytorch\n#           https://github.com/JayParks/transformer\nimport numpy as np\nimport torch\nimport torch.nn as nn\nimport torch.optim as optim\nimport matplotlib.pyplot as plt\n\n# S: Symbol that shows starting of decoding input\n# E: Symbol that shows starting of decoding output\n# P: Symbol that will fill in blank sequence if current batch data size is short than time steps\n\ndef make_batch():\n    input_batch = [[src_vocab[n] for n in sentences[0].split()]]\n    output_batch = [[tgt_vocab[n] for n in sentences[1].split()]]\n    target_batch = [[tgt_vocab[n] for n in sentences[2].split()]]\n    return torch.LongTensor(input_batch), torch.LongTensor(output_batch), torch.LongTensor(target_batch)\n\ndef get_sinusoid_encoding_table(n_position, d_model):\n    def cal_angle(position, hid_idx):\n        return position / np.power(10000, 2 * (hid_idx // 2) / d_model)\n    def get_posi_angle_vec(position):\n        return [cal_angle(position, hid_j) for hid_j in range(d_model)]\n\n    sinusoid_table = np.array([get_posi_angle_vec(pos_i) for pos_i in range(n_position)])\n    sinusoid_table[:, 0::2] = np.sin(sinusoid_table[:, 0::2])  # dim 2i\n    sinusoid_table[:, 1::2] = np.cos(sinusoid_table[:, 1::2])  # dim 2i+1\n    return torch.FloatTensor(sinusoid_table)\n\ndef get_attn_pad_mask(seq_q, seq_k):\n    # print(seq_q)\n    batch_size, len_q = seq_q.size()\n    batch_size, len_k = seq_k.size()\n    # eq(zero) is PAD token\n    pad_attn_mask = seq_k.data.eq(0).unsqueeze(1)  # batch_size x 1 x len_k(=len_q), one is masking\n    return pad_attn_mask.expand(batch_size, len_q, len_k)  # batch_size x len_q x len_k\n\ndef get_attn_subsequent_mask(seq):\n    attn_shape = [seq.size(0), seq.size(1), seq.size(1)]\n    subsequent_mask = np.triu(np.ones(attn_shape), k=1)\n    subsequent_mask = torch.from_numpy(subsequent_mask).byte()\n    return subsequent_mask\n\nclass ScaledDotProductAttention(nn.Module):\n    def __init__(self):\n        super(ScaledDotProductAttention, self).__init__()\n\n    def forward(self, Q, K, V, attn_mask):\n        scores = torch.matmul(Q, K.transpose(-1, -2)) / np.sqrt(d_k) # scores : [batch_size x n_heads x len_q(=len_k) x len_k(=len_q)]\n        scores.masked_fill_(attn_mask, -1e9) # Fills elements of self tensor with value where mask is one.\n        attn = nn.Softmax(dim=-1)(scores)\n        context = torch.matmul(attn, V)\n        return context, attn\n\nclass MultiHeadAttention(nn.Module):\n    def __init__(self):\n        super(MultiHeadAttention, self).__init__()\n        self.W_Q = nn.Linear(d_model, d_k * n_heads)\n        self.W_K = nn.Linear(d_model, d_k * n_heads)\n        self.W_V = nn.Linear(d_model, d_v * n_heads)\n        self.linear = nn.Linear(n_heads * d_v, d_model)\n        self.layer_norm = nn.LayerNorm(d_model)\n\n    def forward(self, Q, K, V, attn_mask):\n        # q: [batch_size x len_q x d_model], k: [batch_size x len_k x d_model], v: [batch_size x len_k x d_model]\n        residual, batch_size = Q, Q.size(0)\n        # (B, S, D) -proj-> (B, S, D) -split-> (B, S, H, W) -trans-> (B, H, S, W)\n        q_s = self.W_Q(Q).view(batch_size, -1, n_heads, d_k).transpose(1,2)  # q_s: [batch_size x n_heads x len_q x d_k]\n        k_s = self.W_K(K).view(batch_size, -1, n_heads, d_k).transpose(1,2)  # k_s: [batch_size x n_heads x len_k x d_k]\n        v_s = self.W_V(V).view(batch_size, -1, n_heads, d_v).transpose(1,2)  # v_s: [batch_size x n_heads x len_k x d_v]\n\n        attn_mask = attn_mask.unsqueeze(1).repeat(1, n_heads, 1, 1) # attn_mask : [batch_size x n_heads x len_q x len_k]\n\n        # context: [batch_size x n_heads x len_q x d_v], attn: [batch_size x n_heads x len_q(=len_k) x len_k(=len_q)]\n        context, attn = ScaledDotProductAttention()(q_s, k_s, v_s, attn_mask)\n        context = context.transpose(1, 2).contiguous().view(batch_size, -1, n_heads * d_v) # context: [batch_size x len_q x n_heads * d_v]\n        output = self.linear(context)\n        return self.layer_norm(output + residual), attn # output: [batch_size x len_q x d_model]\n\nclass PoswiseFeedForwardNet(nn.Module):\n    def __init__(self):\n        super(PoswiseFeedForwardNet, self).__init__()\n        self.conv1 = nn.Conv1d(in_channels=d_model, out_channels=d_ff, kernel_size=1)\n        self.conv2 = nn.Conv1d(in_channels=d_ff, out_channels=d_model, kernel_size=1)\n        self.layer_norm = nn.LayerNorm(d_model)\n\n    def forward(self, inputs):\n        residual = inputs # inputs : [batch_size, len_q, d_model]\n        output = nn.ReLU()(self.conv1(inputs.transpose(1, 2)))\n        output = self.conv2(output).transpose(1, 2)\n        return self.layer_norm(output + residual)\n\nclass EncoderLayer(nn.Module):\n    def __init__(self):\n        super(EncoderLayer, self).__init__()\n        self.enc_self_attn = MultiHeadAttention()\n        self.pos_ffn = PoswiseFeedForwardNet()\n\n    def forward(self, enc_inputs, enc_self_attn_mask):\n        enc_outputs, attn = self.enc_self_attn(enc_inputs, enc_inputs, enc_inputs, enc_self_attn_mask) # enc_inputs to same Q,K,V\n        enc_outputs = self.pos_ffn(enc_outputs) # enc_outputs: [batch_size x len_q x d_model]\n        return enc_outputs, attn\n\nclass DecoderLayer(nn.Module):\n    def __init__(self):\n        super(DecoderLayer, self).__init__()\n        self.dec_self_attn = MultiHeadAttention()\n        self.dec_enc_attn = MultiHeadAttention()\n        self.pos_ffn = PoswiseFeedForwardNet()\n\n    def forward(self, dec_inputs, enc_outputs, dec_self_attn_mask, dec_enc_attn_mask):\n        dec_outputs, dec_self_attn = self.dec_self_attn(dec_inputs, dec_inputs, dec_inputs, dec_self_attn_mask)\n        dec_outputs, dec_enc_attn = self.dec_enc_attn(dec_outputs, enc_outputs, enc_outputs, dec_enc_attn_mask)\n        dec_outputs = self.pos_ffn(dec_outputs)\n        return dec_outputs, dec_self_attn, dec_enc_attn\n\nclass Encoder(nn.Module):\n    def __init__(self):\n        super(Encoder, self).__init__()\n        self.src_emb = nn.Embedding(src_vocab_size, d_model)\n        self.pos_emb = nn.Embedding.from_pretrained(get_sinusoid_encoding_table(src_len+1, d_model),freeze=True)\n        self.layers = nn.ModuleList([EncoderLayer() for _ in range(n_layers)])\n\n    def forward(self, enc_inputs): # enc_inputs : [batch_size x source_len]\n        enc_outputs = self.src_emb(enc_inputs) + self.pos_emb(torch.LongTensor([[1,2,3,4,0]]))\n        enc_self_attn_mask = get_attn_pad_mask(enc_inputs, enc_inputs)\n        enc_self_attns = []\n        for layer in self.layers:\n            enc_outputs, enc_self_attn = layer(enc_outputs, enc_self_attn_mask)\n            enc_self_attns.append(enc_self_attn)\n        return enc_outputs, enc_self_attns\n\nclass Decoder(nn.Module):\n    def __init__(self):\n        super(Decoder, self).__init__()\n        self.tgt_emb = nn.Embedding(tgt_vocab_size, d_model)\n        self.pos_emb = nn.Embedding.from_pretrained(get_sinusoid_encoding_table(tgt_len+1, d_model),freeze=True)\n        self.layers = nn.ModuleList([DecoderLayer() for _ in range(n_layers)])\n\n    def forward(self, dec_inputs, enc_inputs, enc_outputs): # dec_inputs : [batch_size x target_len]\n        dec_outputs = self.tgt_emb(dec_inputs) + self.pos_emb(torch.LongTensor([[5,1,2,3,4]]))\n        dec_self_attn_pad_mask = get_attn_pad_mask(dec_inputs, dec_inputs)\n        dec_self_attn_subsequent_mask = get_attn_subsequent_mask(dec_inputs)\n        dec_self_attn_mask = torch.gt((dec_self_attn_pad_mask + dec_self_attn_subsequent_mask), 0)\n\n        dec_enc_attn_mask = get_attn_pad_mask(dec_inputs, enc_inputs)\n\n        dec_self_attns, dec_enc_attns = [], []\n        for layer in self.layers:\n            dec_outputs, dec_self_attn, dec_enc_attn = layer(dec_outputs, enc_outputs, dec_self_attn_mask, dec_enc_attn_mask)\n            dec_self_attns.append(dec_self_attn)\n            dec_enc_attns.append(dec_enc_attn)\n        return dec_outputs, dec_self_attns, dec_enc_attns\n\nclass Transformer(nn.Module):\n    def __init__(self):\n        super(Transformer, self).__init__()\n        self.encoder = Encoder()\n        self.decoder = Decoder()\n        self.projection = nn.Linear(d_model, tgt_vocab_size, bias=False)\n    def forward(self, enc_inputs, dec_inputs):\n        enc_outputs, enc_self_attns = self.encoder(enc_inputs)\n        dec_outputs, dec_self_attns, dec_enc_attns = self.decoder(dec_inputs, enc_inputs, enc_outputs)\n        dec_logits = self.projection(dec_outputs) # dec_logits : [batch_size x src_vocab_size x tgt_vocab_size]\n        return dec_logits.view(-1, dec_logits.size(-1)), enc_self_attns, dec_self_attns, dec_enc_attns\n\ndef greedy_decoder(model, enc_input, start_symbol):\n    \"\"\"\n    For simplicity, a Greedy Decoder is Beam search when K=1. This is necessary for inference as we don't know the\n    target sequence input. Therefore we try to generate the target input word by word, then feed it into the transformer.\n    Starting Reference: http://nlp.seas.harvard.edu/2018/04/03/attention.html#greedy-decoding\n    :param model: Transformer Model\n    :param enc_input: The encoder input\n    :param start_symbol: The start symbol. In this example it is 'S' which corresponds to index 4\n    :return: The target input\n    \"\"\"\n    enc_outputs, enc_self_attns = model.encoder(enc_input)\n    dec_input = torch.zeros(1, 5).type_as(enc_input.data)\n    next_symbol = start_symbol\n    for i in range(0, 5):\n        dec_input[0][i] = next_symbol\n        dec_outputs, _, _ = model.decoder(dec_input, enc_input, enc_outputs)\n        projected = model.projection(dec_outputs)\n        prob = projected.squeeze(0).max(dim=-1, keepdim=False)[1]\n        next_word = prob.data[i]\n        next_symbol = next_word.item()\n    return dec_input\n\ndef showgraph(attn):\n    attn = attn[-1].squeeze(0)[0]\n    attn = attn.squeeze(0).data.numpy()\n    fig = plt.figure(figsize=(n_heads, n_heads)) # [n_heads, n_heads]\n    ax = fig.add_subplot(1, 1, 1)\n    ax.matshow(attn, cmap='viridis')\n    ax.set_xticklabels(['']+sentences[0].split(), fontdict={'fontsize': 14}, rotation=90)\n    ax.set_yticklabels(['']+sentences[2].split(), fontdict={'fontsize': 14})\n    plt.show()\n\nif __name__ == '__main__':\n    sentences = ['ich mochte ein bier P', 'S i want a beer', 'i want a beer E']\n    # Transformer Parameters\n    # Padding Should be Zero index\n    src_vocab = {'P': 0, 'ich': 1, 'mochte': 2, 'ein': 3, 'bier': 4}\n    src_vocab_size = len(src_vocab)\n\n    tgt_vocab = {'P': 0, 'i': 1, 'want': 2, 'a': 3, 'beer': 4, 'S': 5, 'E': 6}\n    number_dict = {i: w for i, w in enumerate(tgt_vocab)}\n    tgt_vocab_size = len(tgt_vocab)\n\n    src_len = 5 # length of source\n    tgt_len = 5 # length of target\n\n    d_model = 512  # Embedding Size\n    d_ff = 2048  # FeedForward dimension\n    d_k = d_v = 64  # dimension of K(=Q), V\n    n_layers = 6  # number of Encoder of Decoder Layer\n    n_heads = 8  # number of heads in Multi-Head Attention\n\n    model = Transformer()\n\n    criterion = nn.CrossEntropyLoss()\n    optimizer = optim.Adam(model.parameters(), lr=0.001)\n\n    enc_inputs, dec_inputs, target_batch = make_batch()\n\n    for epoch in range(20):\n        optimizer.zero_grad()\n        outputs, enc_self_attns, dec_self_attns, dec_enc_attns = model(enc_inputs, dec_inputs)\n        loss = criterion(outputs, target_batch.contiguous().view(-1))\n        print('Epoch:', '%04d' % (epoch + 1), 'cost =', '{:.6f}'.format(loss))\n        loss.backward()\n        optimizer.step()\n\n    # Test\n    greedy_dec_input = greedy_decoder(model, enc_inputs, start_symbol=tgt_vocab[\"S\"])\n    predict, _, _, _ = model(enc_inputs, greedy_dec_input)\n    predict = predict.data.max(1, keepdim=True)[1]\n    print(sentences[0], '->', [number_dict[n.item()] for n in predict.squeeze()])\n\n    print('first head of last state enc_self_attns')\n    showgraph(enc_self_attns)\n\n    print('first head of last state dec_self_attns')\n    showgraph(dec_self_attns)\n\n    print('first head of last state dec_enc_attns')\n    showgraph(dec_enc_attns)"
  },
  {
    "path": "5-1.Transformer/Transformer.ipynb",
    "content": "{\n  \"cells\": [\n    {\n      \"cell_type\": \"code\",\n      \"metadata\": {},\n      \"source\": [\n        \"# code by Tae Hwan Jung(Jeff Jung) @graykode, Derek Miller @dmmiller612\\n\",\n        \"# Reference : https://github.com/jadore801120/attention-is-all-you-need-pytorch\\n\",\n        \"#           https://github.com/JayParks/transformer\\n\",\n        \"import numpy as np\\n\",\n        \"import torch\\n\",\n        \"import torch.nn as nn\\n\",\n        \"import torch.optim as optim\\n\",\n        \"import matplotlib.pyplot as plt\\n\",\n        \"\\n\",\n        \"# S: Symbol that shows starting of decoding input\\n\",\n        \"# E: Symbol that shows starting of decoding output\\n\",\n        \"# P: Symbol that will fill in blank sequence if current batch data size is short than time steps\\n\",\n        \"\\n\",\n        \"def make_batch(sentences):\\n\",\n        \"    input_batch = [[src_vocab[n] for n in sentences[0].split()]]\\n\",\n        \"    output_batch = [[tgt_vocab[n] for n in sentences[1].split()]]\\n\",\n        \"    target_batch = [[tgt_vocab[n] for n in sentences[2].split()]]\\n\",\n        \"    return torch.LongTensor(input_batch), torch.LongTensor(output_batch), torch.LongTensor(target_batch)\\n\",\n        \"\\n\",\n        \"def get_sinusoid_encoding_table(n_position, d_model):\\n\",\n        \"    def cal_angle(position, hid_idx):\\n\",\n        \"        return position / np.power(10000, 2 * (hid_idx // 2) / d_model)\\n\",\n        \"    def get_posi_angle_vec(position):\\n\",\n        \"        return [cal_angle(position, hid_j) for hid_j in range(d_model)]\\n\",\n        \"\\n\",\n        \"    sinusoid_table = np.array([get_posi_angle_vec(pos_i) for pos_i in range(n_position)])\\n\",\n        \"    sinusoid_table[:, 0::2] = np.sin(sinusoid_table[:, 0::2])  # dim 2i\\n\",\n        \"    sinusoid_table[:, 1::2] = np.cos(sinusoid_table[:, 1::2])  # dim 2i+1\\n\",\n        \"    return torch.FloatTensor(sinusoid_table)\\n\",\n        \"\\n\",\n        \"def get_attn_pad_mask(seq_q, seq_k):\\n\",\n        \"    batch_size, len_q = seq_q.size()\\n\",\n        \"    batch_size, len_k = seq_k.size()\\n\",\n        \"    # eq(zero) is PAD token\\n\",\n        \"    pad_attn_mask = seq_k.data.eq(0).unsqueeze(1)  # batch_size x 1 x len_k(=len_q), one is masking\\n\",\n        \"    return pad_attn_mask.expand(batch_size, len_q, len_k)  # batch_size x len_q x len_k\\n\",\n        \"\\n\",\n        \"def get_attn_subsequent_mask(seq):\\n\",\n        \"    attn_shape = [seq.size(0), seq.size(1), seq.size(1)]\\n\",\n        \"    subsequent_mask = np.triu(np.ones(attn_shape), k=1)\\n\",\n        \"    subsequent_mask = torch.from_numpy(subsequent_mask).byte()\\n\",\n        \"    return subsequent_mask\\n\",\n        \"\\n\",\n        \"class ScaledDotProductAttention(nn.Module):\\n\",\n        \"    def __init__(self):\\n\",\n        \"        super(ScaledDotProductAttention, self).__init__()\\n\",\n        \"\\n\",\n        \"    def forward(self, Q, K, V, attn_mask):\\n\",\n        \"        scores = torch.matmul(Q, K.transpose(-1, -2)) / np.sqrt(d_k) # scores : [batch_size x n_heads x len_q(=len_k) x len_k(=len_q)]\\n\",\n        \"        scores.masked_fill_(attn_mask, -1e9) # Fills elements of self tensor with value where mask is one.\\n\",\n        \"        attn = nn.Softmax(dim=-1)(scores)\\n\",\n        \"        context = torch.matmul(attn, V)\\n\",\n        \"        return context, attn\\n\",\n        \"\\n\",\n        \"class MultiHeadAttention(nn.Module):\\n\",\n        \"    def __init__(self):\\n\",\n        \"        super(MultiHeadAttention, self).__init__()\\n\",\n        \"        self.W_Q = nn.Linear(d_model, d_k * n_heads)\\n\",\n        \"        self.W_K = nn.Linear(d_model, d_k * n_heads)\\n\",\n        \"        self.W_V = nn.Linear(d_model, d_v * n_heads)\\n\",\n        \"        self.linear = nn.Linear(n_heads * d_v, d_model)\\n\",\n        \"        self.layer_norm = nn.LayerNorm(d_model)\\n\",\n        \"\\n\",\n        \"    def forward(self, Q, K, V, attn_mask):\\n\",\n        \"        # q: [batch_size x len_q x d_model], k: [batch_size x len_k x d_model], v: [batch_size x len_k x d_model]\\n\",\n        \"        residual, batch_size = Q, Q.size(0)\\n\",\n        \"        # (B, S, D) -proj-> (B, S, D) -split-> (B, S, H, W) -trans-> (B, H, S, W)\\n\",\n        \"        q_s = self.W_Q(Q).view(batch_size, -1, n_heads, d_k).transpose(1,2)  # q_s: [batch_size x n_heads x len_q x d_k]\\n\",\n        \"        k_s = self.W_K(K).view(batch_size, -1, n_heads, d_k).transpose(1,2)  # k_s: [batch_size x n_heads x len_k x d_k]\\n\",\n        \"        v_s = self.W_V(V).view(batch_size, -1, n_heads, d_v).transpose(1,2)  # v_s: [batch_size x n_heads x len_k x d_v]\\n\",\n        \"\\n\",\n        \"        attn_mask = attn_mask.unsqueeze(1).repeat(1, n_heads, 1, 1) # attn_mask : [batch_size x n_heads x len_q x len_k]\\n\",\n        \"\\n\",\n        \"        # context: [batch_size x n_heads x len_q x d_v], attn: [batch_size x n_heads x len_q(=len_k) x len_k(=len_q)]\\n\",\n        \"        context, attn = ScaledDotProductAttention()(q_s, k_s, v_s, attn_mask)\\n\",\n        \"        context = context.transpose(1, 2).contiguous().view(batch_size, -1, n_heads * d_v) # context: [batch_size x len_q x n_heads * d_v]\\n\",\n        \"        output = self.linear(context)\\n\",\n        \"        return self.layer_norm(output + residual), attn # output: [batch_size x len_q x d_model]\\n\",\n        \"\\n\",\n        \"class PoswiseFeedForwardNet(nn.Module):\\n\",\n        \"    def __init__(self):\\n\",\n        \"        super(PoswiseFeedForwardNet, self).__init__()\\n\",\n        \"        self.conv1 = nn.Conv1d(in_channels=d_model, out_channels=d_ff, kernel_size=1)\\n\",\n        \"        self.conv2 = nn.Conv1d(in_channels=d_ff, out_channels=d_model, kernel_size=1)\\n\",\n        \"        self.layer_norm = nn.LayerNorm(d_model)\\n\",\n        \"\\n\",\n        \"    def forward(self, inputs):\\n\",\n        \"        residual = inputs # inputs : [batch_size, len_q, d_model]\\n\",\n        \"        output = nn.ReLU()(self.conv1(inputs.transpose(1, 2)))\\n\",\n        \"        output = self.conv2(output).transpose(1, 2)\\n\",\n        \"        return self.layer_norm(output + residual)\\n\",\n        \"\\n\",\n        \"class EncoderLayer(nn.Module):\\n\",\n        \"    def __init__(self):\\n\",\n        \"        super(EncoderLayer, self).__init__()\\n\",\n        \"        self.enc_self_attn = MultiHeadAttention()\\n\",\n        \"        self.pos_ffn = PoswiseFeedForwardNet()\\n\",\n        \"\\n\",\n        \"    def forward(self, enc_inputs, enc_self_attn_mask):\\n\",\n        \"        enc_outputs, attn = self.enc_self_attn(enc_inputs, enc_inputs, enc_inputs, enc_self_attn_mask) # enc_inputs to same Q,K,V\\n\",\n        \"        enc_outputs = self.pos_ffn(enc_outputs) # enc_outputs: [batch_size x len_q x d_model]\\n\",\n        \"        return enc_outputs, attn\\n\",\n        \"\\n\",\n        \"class DecoderLayer(nn.Module):\\n\",\n        \"    def __init__(self):\\n\",\n        \"        super(DecoderLayer, self).__init__()\\n\",\n        \"        self.dec_self_attn = MultiHeadAttention()\\n\",\n        \"        self.dec_enc_attn = MultiHeadAttention()\\n\",\n        \"        self.pos_ffn = PoswiseFeedForwardNet()\\n\",\n        \"\\n\",\n        \"    def forward(self, dec_inputs, enc_outputs, dec_self_attn_mask, dec_enc_attn_mask):\\n\",\n        \"        dec_outputs, dec_self_attn = self.dec_self_attn(dec_inputs, dec_inputs, dec_inputs, dec_self_attn_mask)\\n\",\n        \"        dec_outputs, dec_enc_attn = self.dec_enc_attn(dec_outputs, enc_outputs, enc_outputs, dec_enc_attn_mask)\\n\",\n        \"        dec_outputs = self.pos_ffn(dec_outputs)\\n\",\n        \"        return dec_outputs, dec_self_attn, dec_enc_attn\\n\",\n        \"\\n\",\n        \"class Encoder(nn.Module):\\n\",\n        \"    def __init__(self):\\n\",\n        \"        super(Encoder, self).__init__()\\n\",\n        \"        self.src_emb = nn.Embedding(src_vocab_size, d_model)\\n\",\n        \"        self.pos_emb = nn.Embedding.from_pretrained(get_sinusoid_encoding_table(src_len+1, d_model),freeze=True)\\n\",\n        \"        self.layers = nn.ModuleList([EncoderLayer() for _ in range(n_layers)])\\n\",\n        \"\\n\",\n        \"    def forward(self, enc_inputs): # enc_inputs : [batch_size x source_len]\\n\",\n        \"        enc_outputs = self.src_emb(enc_inputs) + self.pos_emb(torch.LongTensor([[1,2,3,4,0]]))\\n\",\n        \"        enc_self_attn_mask = get_attn_pad_mask(enc_inputs, enc_inputs)\\n\",\n        \"        enc_self_attns = []\\n\",\n        \"        for layer in self.layers:\\n\",\n        \"            enc_outputs, enc_self_attn = layer(enc_outputs, enc_self_attn_mask)\\n\",\n        \"            enc_self_attns.append(enc_self_attn)\\n\",\n        \"        return enc_outputs, enc_self_attns\\n\",\n        \"\\n\",\n        \"class Decoder(nn.Module):\\n\",\n        \"    def __init__(self):\\n\",\n        \"        super(Decoder, self).__init__()\\n\",\n        \"        self.tgt_emb = nn.Embedding(tgt_vocab_size, d_model)\\n\",\n        \"        self.pos_emb = nn.Embedding.from_pretrained(get_sinusoid_encoding_table(tgt_len+1, d_model),freeze=True)\\n\",\n        \"        self.layers = nn.ModuleList([DecoderLayer() for _ in range(n_layers)])\\n\",\n        \"\\n\",\n        \"    def forward(self, dec_inputs, enc_inputs, enc_outputs): # dec_inputs : [batch_size x target_len]\\n\",\n        \"        dec_outputs = self.tgt_emb(dec_inputs) + self.pos_emb(torch.LongTensor([[5,1,2,3,4]]))\\n\",\n        \"        dec_self_attn_pad_mask = get_attn_pad_mask(dec_inputs, dec_inputs)\\n\",\n        \"        dec_self_attn_subsequent_mask = get_attn_subsequent_mask(dec_inputs)\\n\",\n        \"        dec_self_attn_mask = torch.gt((dec_self_attn_pad_mask + dec_self_attn_subsequent_mask), 0)\\n\",\n        \"\\n\",\n        \"        dec_enc_attn_mask = get_attn_pad_mask(dec_inputs, enc_inputs)\\n\",\n        \"\\n\",\n        \"        dec_self_attns, dec_enc_attns = [], []\\n\",\n        \"        for layer in self.layers:\\n\",\n        \"            dec_outputs, dec_self_attn, dec_enc_attn = layer(dec_outputs, enc_outputs, dec_self_attn_mask, dec_enc_attn_mask)\\n\",\n        \"            dec_self_attns.append(dec_self_attn)\\n\",\n        \"            dec_enc_attns.append(dec_enc_attn)\\n\",\n        \"        return dec_outputs, dec_self_attns, dec_enc_attns\\n\",\n        \"\\n\",\n        \"class Transformer(nn.Module):\\n\",\n        \"    def __init__(self):\\n\",\n        \"        super(Transformer, self).__init__()\\n\",\n        \"        self.encoder = Encoder()\\n\",\n        \"        self.decoder = Decoder()\\n\",\n        \"        self.projection = nn.Linear(d_model, tgt_vocab_size, bias=False)\\n\",\n        \"    def forward(self, enc_inputs, dec_inputs):\\n\",\n        \"        enc_outputs, enc_self_attns = self.encoder(enc_inputs)\\n\",\n        \"        dec_outputs, dec_self_attns, dec_enc_attns = self.decoder(dec_inputs, enc_inputs, enc_outputs)\\n\",\n        \"        dec_logits = self.projection(dec_outputs) # dec_logits : [batch_size x src_vocab_size x tgt_vocab_size]\\n\",\n        \"        return dec_logits.view(-1, dec_logits.size(-1)), enc_self_attns, dec_self_attns, dec_enc_attns\\n\",\n        \"\\n\",\n        \"def showgraph(attn):\\n\",\n        \"    attn = attn[-1].squeeze(0)[0]\\n\",\n        \"    attn = attn.squeeze(0).data.numpy()\\n\",\n        \"    fig = plt.figure(figsize=(n_heads, n_heads)) # [n_heads, n_heads]\\n\",\n        \"    ax = fig.add_subplot(1, 1, 1)\\n\",\n        \"    ax.matshow(attn, cmap='viridis')\\n\",\n        \"    ax.set_xticklabels(['']+sentences[0].split(), fontdict={'fontsize': 14}, rotation=90)\\n\",\n        \"    ax.set_yticklabels(['']+sentences[2].split(), fontdict={'fontsize': 14})\\n\",\n        \"    plt.show()\\n\",\n        \"\\n\",\n        \"if __name__ == '__main__':\\n\",\n        \"    sentences = ['ich mochte ein bier P', 'S i want a beer', 'i want a beer E']\\n\",\n        \"\\n\",\n        \"    # Transformer Parameters\\n\",\n        \"    # Padding Should be Zero\\n\",\n        \"    src_vocab = {'P': 0, 'ich': 1, 'mochte': 2, 'ein': 3, 'bier': 4}\\n\",\n        \"    src_vocab_size = len(src_vocab)\\n\",\n        \"\\n\",\n        \"    tgt_vocab = {'P': 0, 'i': 1, 'want': 2, 'a': 3, 'beer': 4, 'S': 5, 'E': 6}\\n\",\n        \"    number_dict = {i: w for i, w in enumerate(tgt_vocab)}\\n\",\n        \"    tgt_vocab_size = len(tgt_vocab)\\n\",\n        \"\\n\",\n        \"    src_len = 5 # length of source\\n\",\n        \"    tgt_len = 5 # length of target\\n\",\n        \"\\n\",\n        \"    d_model = 512  # Embedding Size\\n\",\n        \"    d_ff = 2048  # FeedForward dimension\\n\",\n        \"    d_k = d_v = 64  # dimension of K(=Q), V\\n\",\n        \"    n_layers = 6  # number of Encoder of Decoder Layer\\n\",\n        \"    n_heads = 8  # number of heads in Multi-Head Attention\\n\",\n        \"\\n\",\n        \"    model = Transformer()\\n\",\n        \"\\n\",\n        \"    criterion = nn.CrossEntropyLoss()\\n\",\n        \"    optimizer = optim.Adam(model.parameters(), lr=0.001)\\n\",\n        \"\\n\",\n        \"    enc_inputs, dec_inputs, target_batch = make_batch(sentences)\\n\",\n        \"\\n\",\n        \"    for epoch in range(20):\\n\",\n        \"        optimizer.zero_grad()\\n\",\n        \"        outputs, enc_self_attns, dec_self_attns, dec_enc_attns = model(enc_inputs, dec_inputs)\\n\",\n        \"        loss = criterion(outputs, target_batch.contiguous().view(-1))\\n\",\n        \"        print('Epoch:', '%04d' % (epoch + 1), 'cost =', '{:.6f}'.format(loss))\\n\",\n        \"        loss.backward()\\n\",\n        \"        optimizer.step()\\n\",\n        \"\\n\",\n        \"    # Test\\n\",\n        \"    predict, _, _, _ = model(enc_inputs, dec_inputs)\\n\",\n        \"    predict = predict.data.max(1, keepdim=True)[1]\\n\",\n        \"    print(sentences[0], '->', [number_dict[n.item()] for n in predict.squeeze()])\\n\",\n        \"\\n\",\n        \"    print('first head of last state enc_self_attns')\\n\",\n        \"    showgraph(enc_self_attns)\\n\",\n        \"\\n\",\n        \"    print('first head of last state dec_self_attns')\\n\",\n        \"    showgraph(dec_self_attns)\\n\",\n        \"\\n\",\n        \"    print('first head of last state dec_enc_attns')\\n\",\n        \"    showgraph(dec_enc_attns)\"\n      ],\n      \"outputs\": [],\n      \"execution_count\": null\n    }\n  ],\n  \"metadata\": {\n    \"anaconda-cloud\": {},\n    \"kernelspec\": {\n      \"display_name\": \"Python 3\",\n      \"language\": \"python\",\n      \"name\": \"python3\"\n    },\n    \"language_info\": {\n      \"codemirror_mode\": {\n        \"name\": \"ipython\",\n        \"version\": 3\n      },\n      \"file_extension\": \".py\",\n      \"mimetype\": \"text/x-python\",\n      \"name\": \"python\",\n      \"nbconvert_exporter\": \"python\",\n      \"pygments_lexer\": \"ipython3\",\n      \"version\": \"3.6.1\"\n    }\n  },\n  \"nbformat\": 4,\n  \"nbformat_minor\": 4\n}"
  },
  {
    "path": "5-1.Transformer/Transformer.py",
    "content": "# %%\n# code by Tae Hwan Jung(Jeff Jung) @graykode, Derek Miller @dmmiller612\n# Reference : https://github.com/jadore801120/attention-is-all-you-need-pytorch\n#           https://github.com/JayParks/transformer\nimport numpy as np\nimport torch\nimport torch.nn as nn\nimport torch.optim as optim\nimport matplotlib.pyplot as plt\n\n# S: Symbol that shows starting of decoding input\n# E: Symbol that shows starting of decoding output\n# P: Symbol that will fill in blank sequence if current batch data size is short than time steps\n\ndef make_batch(sentences):\n    input_batch = [[src_vocab[n] for n in sentences[0].split()]]\n    output_batch = [[tgt_vocab[n] for n in sentences[1].split()]]\n    target_batch = [[tgt_vocab[n] for n in sentences[2].split()]]\n    return torch.LongTensor(input_batch), torch.LongTensor(output_batch), torch.LongTensor(target_batch)\n\ndef get_sinusoid_encoding_table(n_position, d_model):\n    def cal_angle(position, hid_idx):\n        return position / np.power(10000, 2 * (hid_idx // 2) / d_model)\n    def get_posi_angle_vec(position):\n        return [cal_angle(position, hid_j) for hid_j in range(d_model)]\n\n    sinusoid_table = np.array([get_posi_angle_vec(pos_i) for pos_i in range(n_position)])\n    sinusoid_table[:, 0::2] = np.sin(sinusoid_table[:, 0::2])  # dim 2i\n    sinusoid_table[:, 1::2] = np.cos(sinusoid_table[:, 1::2])  # dim 2i+1\n    return torch.FloatTensor(sinusoid_table)\n\ndef get_attn_pad_mask(seq_q, seq_k):\n    batch_size, len_q = seq_q.size()\n    batch_size, len_k = seq_k.size()\n    # eq(zero) is PAD token\n    pad_attn_mask = seq_k.data.eq(0).unsqueeze(1)  # batch_size x 1 x len_k(=len_q), one is masking\n    return pad_attn_mask.expand(batch_size, len_q, len_k)  # batch_size x len_q x len_k\n\ndef get_attn_subsequent_mask(seq):\n    attn_shape = [seq.size(0), seq.size(1), seq.size(1)]\n    subsequent_mask = np.triu(np.ones(attn_shape), k=1)\n    subsequent_mask = torch.from_numpy(subsequent_mask).byte()\n    return subsequent_mask\n\nclass ScaledDotProductAttention(nn.Module):\n    def __init__(self):\n        super(ScaledDotProductAttention, self).__init__()\n\n    def forward(self, Q, K, V, attn_mask):\n        scores = torch.matmul(Q, K.transpose(-1, -2)) / np.sqrt(d_k) # scores : [batch_size x n_heads x len_q(=len_k) x len_k(=len_q)]\n        scores.masked_fill_(attn_mask, -1e9) # Fills elements of self tensor with value where mask is one.\n        attn = nn.Softmax(dim=-1)(scores)\n        context = torch.matmul(attn, V)\n        return context, attn\n\nclass MultiHeadAttention(nn.Module):\n    def __init__(self):\n        super(MultiHeadAttention, self).__init__()\n        self.W_Q = nn.Linear(d_model, d_k * n_heads)\n        self.W_K = nn.Linear(d_model, d_k * n_heads)\n        self.W_V = nn.Linear(d_model, d_v * n_heads)\n        self.linear = nn.Linear(n_heads * d_v, d_model)\n        self.layer_norm = nn.LayerNorm(d_model)\n\n    def forward(self, Q, K, V, attn_mask):\n        # q: [batch_size x len_q x d_model], k: [batch_size x len_k x d_model], v: [batch_size x len_k x d_model]\n        residual, batch_size = Q, Q.size(0)\n        # (B, S, D) -proj-> (B, S, D) -split-> (B, S, H, W) -trans-> (B, H, S, W)\n        q_s = self.W_Q(Q).view(batch_size, -1, n_heads, d_k).transpose(1,2)  # q_s: [batch_size x n_heads x len_q x d_k]\n        k_s = self.W_K(K).view(batch_size, -1, n_heads, d_k).transpose(1,2)  # k_s: [batch_size x n_heads x len_k x d_k]\n        v_s = self.W_V(V).view(batch_size, -1, n_heads, d_v).transpose(1,2)  # v_s: [batch_size x n_heads x len_k x d_v]\n\n        attn_mask = attn_mask.unsqueeze(1).repeat(1, n_heads, 1, 1) # attn_mask : [batch_size x n_heads x len_q x len_k]\n\n        # context: [batch_size x n_heads x len_q x d_v], attn: [batch_size x n_heads x len_q(=len_k) x len_k(=len_q)]\n        context, attn = ScaledDotProductAttention()(q_s, k_s, v_s, attn_mask)\n        context = context.transpose(1, 2).contiguous().view(batch_size, -1, n_heads * d_v) # context: [batch_size x len_q x n_heads * d_v]\n        output = self.linear(context)\n        return self.layer_norm(output + residual), attn # output: [batch_size x len_q x d_model]\n\nclass PoswiseFeedForwardNet(nn.Module):\n    def __init__(self):\n        super(PoswiseFeedForwardNet, self).__init__()\n        self.conv1 = nn.Conv1d(in_channels=d_model, out_channels=d_ff, kernel_size=1)\n        self.conv2 = nn.Conv1d(in_channels=d_ff, out_channels=d_model, kernel_size=1)\n        self.layer_norm = nn.LayerNorm(d_model)\n\n    def forward(self, inputs):\n        residual = inputs # inputs : [batch_size, len_q, d_model]\n        output = nn.ReLU()(self.conv1(inputs.transpose(1, 2)))\n        output = self.conv2(output).transpose(1, 2)\n        return self.layer_norm(output + residual)\n\nclass EncoderLayer(nn.Module):\n    def __init__(self):\n        super(EncoderLayer, self).__init__()\n        self.enc_self_attn = MultiHeadAttention()\n        self.pos_ffn = PoswiseFeedForwardNet()\n\n    def forward(self, enc_inputs, enc_self_attn_mask):\n        enc_outputs, attn = self.enc_self_attn(enc_inputs, enc_inputs, enc_inputs, enc_self_attn_mask) # enc_inputs to same Q,K,V\n        enc_outputs = self.pos_ffn(enc_outputs) # enc_outputs: [batch_size x len_q x d_model]\n        return enc_outputs, attn\n\nclass DecoderLayer(nn.Module):\n    def __init__(self):\n        super(DecoderLayer, self).__init__()\n        self.dec_self_attn = MultiHeadAttention()\n        self.dec_enc_attn = MultiHeadAttention()\n        self.pos_ffn = PoswiseFeedForwardNet()\n\n    def forward(self, dec_inputs, enc_outputs, dec_self_attn_mask, dec_enc_attn_mask):\n        dec_outputs, dec_self_attn = self.dec_self_attn(dec_inputs, dec_inputs, dec_inputs, dec_self_attn_mask)\n        dec_outputs, dec_enc_attn = self.dec_enc_attn(dec_outputs, enc_outputs, enc_outputs, dec_enc_attn_mask)\n        dec_outputs = self.pos_ffn(dec_outputs)\n        return dec_outputs, dec_self_attn, dec_enc_attn\n\nclass Encoder(nn.Module):\n    def __init__(self):\n        super(Encoder, self).__init__()\n        self.src_emb = nn.Embedding(src_vocab_size, d_model)\n        self.pos_emb = nn.Embedding.from_pretrained(get_sinusoid_encoding_table(src_len+1, d_model),freeze=True)\n        self.layers = nn.ModuleList([EncoderLayer() for _ in range(n_layers)])\n\n    def forward(self, enc_inputs): # enc_inputs : [batch_size x source_len]\n        enc_outputs = self.src_emb(enc_inputs) + self.pos_emb(torch.LongTensor([[1,2,3,4,0]]))\n        enc_self_attn_mask = get_attn_pad_mask(enc_inputs, enc_inputs)\n        enc_self_attns = []\n        for layer in self.layers:\n            enc_outputs, enc_self_attn = layer(enc_outputs, enc_self_attn_mask)\n            enc_self_attns.append(enc_self_attn)\n        return enc_outputs, enc_self_attns\n\nclass Decoder(nn.Module):\n    def __init__(self):\n        super(Decoder, self).__init__()\n        self.tgt_emb = nn.Embedding(tgt_vocab_size, d_model)\n        self.pos_emb = nn.Embedding.from_pretrained(get_sinusoid_encoding_table(tgt_len+1, d_model),freeze=True)\n        self.layers = nn.ModuleList([DecoderLayer() for _ in range(n_layers)])\n\n    def forward(self, dec_inputs, enc_inputs, enc_outputs): # dec_inputs : [batch_size x target_len]\n        dec_outputs = self.tgt_emb(dec_inputs) + self.pos_emb(torch.LongTensor([[5,1,2,3,4]]))\n        dec_self_attn_pad_mask = get_attn_pad_mask(dec_inputs, dec_inputs)\n        dec_self_attn_subsequent_mask = get_attn_subsequent_mask(dec_inputs)\n        dec_self_attn_mask = torch.gt((dec_self_attn_pad_mask + dec_self_attn_subsequent_mask), 0)\n\n        dec_enc_attn_mask = get_attn_pad_mask(dec_inputs, enc_inputs)\n\n        dec_self_attns, dec_enc_attns = [], []\n        for layer in self.layers:\n            dec_outputs, dec_self_attn, dec_enc_attn = layer(dec_outputs, enc_outputs, dec_self_attn_mask, dec_enc_attn_mask)\n            dec_self_attns.append(dec_self_attn)\n            dec_enc_attns.append(dec_enc_attn)\n        return dec_outputs, dec_self_attns, dec_enc_attns\n\nclass Transformer(nn.Module):\n    def __init__(self):\n        super(Transformer, self).__init__()\n        self.encoder = Encoder()\n        self.decoder = Decoder()\n        self.projection = nn.Linear(d_model, tgt_vocab_size, bias=False)\n    def forward(self, enc_inputs, dec_inputs):\n        enc_outputs, enc_self_attns = self.encoder(enc_inputs)\n        dec_outputs, dec_self_attns, dec_enc_attns = self.decoder(dec_inputs, enc_inputs, enc_outputs)\n        dec_logits = self.projection(dec_outputs) # dec_logits : [batch_size x src_vocab_size x tgt_vocab_size]\n        return dec_logits.view(-1, dec_logits.size(-1)), enc_self_attns, dec_self_attns, dec_enc_attns\n\ndef showgraph(attn):\n    attn = attn[-1].squeeze(0)[0]\n    attn = attn.squeeze(0).data.numpy()\n    fig = plt.figure(figsize=(n_heads, n_heads)) # [n_heads, n_heads]\n    ax = fig.add_subplot(1, 1, 1)\n    ax.matshow(attn, cmap='viridis')\n    ax.set_xticklabels(['']+sentences[0].split(), fontdict={'fontsize': 14}, rotation=90)\n    ax.set_yticklabels(['']+sentences[2].split(), fontdict={'fontsize': 14})\n    plt.show()\n\nif __name__ == '__main__':\n    sentences = ['ich mochte ein bier P', 'S i want a beer', 'i want a beer E']\n\n    # Transformer Parameters\n    # Padding Should be Zero\n    src_vocab = {'P': 0, 'ich': 1, 'mochte': 2, 'ein': 3, 'bier': 4}\n    src_vocab_size = len(src_vocab)\n\n    tgt_vocab = {'P': 0, 'i': 1, 'want': 2, 'a': 3, 'beer': 4, 'S': 5, 'E': 6}\n    number_dict = {i: w for i, w in enumerate(tgt_vocab)}\n    tgt_vocab_size = len(tgt_vocab)\n\n    src_len = 5 # length of source\n    tgt_len = 5 # length of target\n\n    d_model = 512  # Embedding Size\n    d_ff = 2048  # FeedForward dimension\n    d_k = d_v = 64  # dimension of K(=Q), V\n    n_layers = 6  # number of Encoder of Decoder Layer\n    n_heads = 8  # number of heads in Multi-Head Attention\n\n    model = Transformer()\n\n    criterion = nn.CrossEntropyLoss()\n    optimizer = optim.Adam(model.parameters(), lr=0.001)\n\n    enc_inputs, dec_inputs, target_batch = make_batch(sentences)\n\n    for epoch in range(20):\n        optimizer.zero_grad()\n        outputs, enc_self_attns, dec_self_attns, dec_enc_attns = model(enc_inputs, dec_inputs)\n        loss = criterion(outputs, target_batch.contiguous().view(-1))\n        print('Epoch:', '%04d' % (epoch + 1), 'cost =', '{:.6f}'.format(loss))\n        loss.backward()\n        optimizer.step()\n\n    # Test\n    predict, _, _, _ = model(enc_inputs, dec_inputs)\n    predict = predict.data.max(1, keepdim=True)[1]\n    print(sentences[0], '->', [number_dict[n.item()] for n in predict.squeeze()])\n\n    print('first head of last state enc_self_attns')\n    showgraph(enc_self_attns)\n\n    print('first head of last state dec_self_attns')\n    showgraph(dec_self_attns)\n\n    print('first head of last state dec_enc_attns')\n    showgraph(dec_enc_attns)"
  },
  {
    "path": "5-2.BERT/BERT.ipynb",
    "content": "{\n  \"cells\": [\n    {\n      \"cell_type\": \"code\",\n      \"metadata\": {},\n      \"source\": [\n        \"# code by Tae Hwan Jung(Jeff Jung) @graykode\\n\",\n        \"# Reference : https://github.com/jadore801120/attention-is-all-you-need-pytorch\\n\",\n        \"#           https://github.com/JayParks/transformer, https://github.com/dhlee347/pytorchic-bert\\n\",\n        \"import math\\n\",\n        \"import re\\n\",\n        \"from random import *\\n\",\n        \"import numpy as np\\n\",\n        \"import torch\\n\",\n        \"import torch.nn as nn\\n\",\n        \"import torch.optim as optim\\n\",\n        \"\\n\",\n        \"# sample IsNext and NotNext to be same in small batch size\\n\",\n        \"def make_batch():\\n\",\n        \"    batch = []\\n\",\n        \"    positive = negative = 0\\n\",\n        \"    while positive != batch_size/2 or negative != batch_size/2:\\n\",\n        \"        tokens_a_index, tokens_b_index= randrange(len(sentences)), randrange(len(sentences)) # sample random index in sentences\\n\",\n        \"        tokens_a, tokens_b= token_list[tokens_a_index], token_list[tokens_b_index]\\n\",\n        \"        input_ids = [word_dict['[CLS]']] + tokens_a + [word_dict['[SEP]']] + tokens_b + [word_dict['[SEP]']]\\n\",\n        \"        segment_ids = [0] * (1 + len(tokens_a) + 1) + [1] * (len(tokens_b) + 1)\\n\",\n        \"\\n\",\n        \"        # MASK LM\\n\",\n        \"        n_pred =  min(max_pred, max(1, int(round(len(input_ids) * 0.15)))) # 15 % of tokens in one sentence\\n\",\n        \"        cand_maked_pos = [i for i, token in enumerate(input_ids)\\n\",\n        \"                          if token != word_dict['[CLS]'] and token != word_dict['[SEP]']]\\n\",\n        \"        shuffle(cand_maked_pos)\\n\",\n        \"        masked_tokens, masked_pos = [], []\\n\",\n        \"        for pos in cand_maked_pos[:n_pred]:\\n\",\n        \"            masked_pos.append(pos)\\n\",\n        \"            masked_tokens.append(input_ids[pos])\\n\",\n        \"            if random() < 0.8:  # 80%\\n\",\n        \"                input_ids[pos] = word_dict['[MASK]'] # make mask\\n\",\n        \"            elif random() < 0.5:  # 10%\\n\",\n        \"                index = randint(0, vocab_size - 1) # random index in vocabulary\\n\",\n        \"                input_ids[pos] = word_dict[number_dict[index]] # replace\\n\",\n        \"\\n\",\n        \"        # Zero Paddings\\n\",\n        \"        n_pad = maxlen - len(input_ids)\\n\",\n        \"        input_ids.extend([0] * n_pad)\\n\",\n        \"        segment_ids.extend([0] * n_pad)\\n\",\n        \"\\n\",\n        \"        # Zero Padding (100% - 15%) tokens\\n\",\n        \"        if max_pred > n_pred:\\n\",\n        \"            n_pad = max_pred - n_pred\\n\",\n        \"            masked_tokens.extend([0] * n_pad)\\n\",\n        \"            masked_pos.extend([0] * n_pad)\\n\",\n        \"\\n\",\n        \"        if tokens_a_index + 1 == tokens_b_index and positive < batch_size/2:\\n\",\n        \"            batch.append([input_ids, segment_ids, masked_tokens, masked_pos, True]) # IsNext\\n\",\n        \"            positive += 1\\n\",\n        \"        elif tokens_a_index + 1 != tokens_b_index and negative < batch_size/2:\\n\",\n        \"            batch.append([input_ids, segment_ids, masked_tokens, masked_pos, False]) # NotNext\\n\",\n        \"            negative += 1\\n\",\n        \"    return batch\\n\",\n        \"# Proprecessing Finished\\n\",\n        \"\\n\",\n        \"def get_attn_pad_mask(seq_q, seq_k):\\n\",\n        \"    batch_size, len_q = seq_q.size()\\n\",\n        \"    batch_size, len_k = seq_k.size()\\n\",\n        \"    # eq(zero) is PAD token\\n\",\n        \"    pad_attn_mask = seq_k.data.eq(0).unsqueeze(1)  # batch_size x 1 x len_k(=len_q), one is masking\\n\",\n        \"    return pad_attn_mask.expand(batch_size, len_q, len_k)  # batch_size x len_q x len_k\\n\",\n        \"\\n\",\n        \"def gelu(x):\\n\",\n        \"    \\\"Implementation of the gelu activation function by Hugging Face\\\"\\n\",\n        \"    return x * 0.5 * (1.0 + torch.erf(x / math.sqrt(2.0)))\\n\",\n        \"\\n\",\n        \"class Embedding(nn.Module):\\n\",\n        \"    def __init__(self):\\n\",\n        \"        super(Embedding, self).__init__()\\n\",\n        \"        self.tok_embed = nn.Embedding(vocab_size, d_model)  # token embedding\\n\",\n        \"        self.pos_embed = nn.Embedding(maxlen, d_model)  # position embedding\\n\",\n        \"        self.seg_embed = nn.Embedding(n_segments, d_model)  # segment(token type) embedding\\n\",\n        \"        self.norm = nn.LayerNorm(d_model)\\n\",\n        \"\\n\",\n        \"    def forward(self, x, seg):\\n\",\n        \"        seq_len = x.size(1)\\n\",\n        \"        pos = torch.arange(seq_len, dtype=torch.long)\\n\",\n        \"        pos = pos.unsqueeze(0).expand_as(x)  # (seq_len,) -> (batch_size, seq_len)\\n\",\n        \"        embedding = self.tok_embed(x) + self.pos_embed(pos) + self.seg_embed(seg)\\n\",\n        \"        return self.norm(embedding)\\n\",\n        \"\\n\",\n        \"class ScaledDotProductAttention(nn.Module):\\n\",\n        \"    def __init__(self):\\n\",\n        \"        super(ScaledDotProductAttention, self).__init__()\\n\",\n        \"\\n\",\n        \"    def forward(self, Q, K, V, attn_mask):\\n\",\n        \"        scores = torch.matmul(Q, K.transpose(-1, -2)) / np.sqrt(d_k) # scores : [batch_size x n_heads x len_q(=len_k) x len_k(=len_q)]\\n\",\n        \"        scores.masked_fill_(attn_mask, -1e9) # Fills elements of self tensor with value where mask is one.\\n\",\n        \"        attn = nn.Softmax(dim=-1)(scores)\\n\",\n        \"        context = torch.matmul(attn, V)\\n\",\n        \"        return context, attn\\n\",\n        \"\\n\",\n        \"class MultiHeadAttention(nn.Module):\\n\",\n        \"    def __init__(self):\\n\",\n        \"        super(MultiHeadAttention, self).__init__()\\n\",\n        \"        self.W_Q = nn.Linear(d_model, d_k * n_heads)\\n\",\n        \"        self.W_K = nn.Linear(d_model, d_k * n_heads)\\n\",\n        \"        self.W_V = nn.Linear(d_model, d_v * n_heads)\\n\",\n        \"    def forward(self, Q, K, V, attn_mask):\\n\",\n        \"        # q: [batch_size x len_q x d_model], k: [batch_size x len_k x d_model], v: [batch_size x len_k x d_model]\\n\",\n        \"        residual, batch_size = Q, Q.size(0)\\n\",\n        \"        # (B, S, D) -proj-> (B, S, D) -split-> (B, S, H, W) -trans-> (B, H, S, W)\\n\",\n        \"        q_s = self.W_Q(Q).view(batch_size, -1, n_heads, d_k).transpose(1,2)  # q_s: [batch_size x n_heads x len_q x d_k]\\n\",\n        \"        k_s = self.W_K(K).view(batch_size, -1, n_heads, d_k).transpose(1,2)  # k_s: [batch_size x n_heads x len_k x d_k]\\n\",\n        \"        v_s = self.W_V(V).view(batch_size, -1, n_heads, d_v).transpose(1,2)  # v_s: [batch_size x n_heads x len_k x d_v]\\n\",\n        \"\\n\",\n        \"        attn_mask = attn_mask.unsqueeze(1).repeat(1, n_heads, 1, 1) # attn_mask : [batch_size x n_heads x len_q x len_k]\\n\",\n        \"\\n\",\n        \"        # context: [batch_size x n_heads x len_q x d_v], attn: [batch_size x n_heads x len_q(=len_k) x len_k(=len_q)]\\n\",\n        \"        context, attn = ScaledDotProductAttention()(q_s, k_s, v_s, attn_mask)\\n\",\n        \"        context = context.transpose(1, 2).contiguous().view(batch_size, -1, n_heads * d_v) # context: [batch_size x len_q x n_heads * d_v]\\n\",\n        \"        output = nn.Linear(n_heads * d_v, d_model)(context)\\n\",\n        \"        return nn.LayerNorm(d_model)(output + residual), attn # output: [batch_size x len_q x d_model]\\n\",\n        \"\\n\",\n        \"class PoswiseFeedForwardNet(nn.Module):\\n\",\n        \"    def __init__(self):\\n\",\n        \"        super(PoswiseFeedForwardNet, self).__init__()\\n\",\n        \"        self.fc1 = nn.Linear(d_model, d_ff)\\n\",\n        \"        self.fc2 = nn.Linear(d_ff, d_model)\\n\",\n        \"\\n\",\n        \"    def forward(self, x):\\n\",\n        \"        # (batch_size, len_seq, d_model) -> (batch_size, len_seq, d_ff) -> (batch_size, len_seq, d_model)\\n\",\n        \"        return self.fc2(gelu(self.fc1(x)))\\n\",\n        \"\\n\",\n        \"class EncoderLayer(nn.Module):\\n\",\n        \"    def __init__(self):\\n\",\n        \"        super(EncoderLayer, self).__init__()\\n\",\n        \"        self.enc_self_attn = MultiHeadAttention()\\n\",\n        \"        self.pos_ffn = PoswiseFeedForwardNet()\\n\",\n        \"\\n\",\n        \"    def forward(self, enc_inputs, enc_self_attn_mask):\\n\",\n        \"        enc_outputs, attn = self.enc_self_attn(enc_inputs, enc_inputs, enc_inputs, enc_self_attn_mask) # enc_inputs to same Q,K,V\\n\",\n        \"        enc_outputs = self.pos_ffn(enc_outputs) # enc_outputs: [batch_size x len_q x d_model]\\n\",\n        \"        return enc_outputs, attn\\n\",\n        \"\\n\",\n        \"class BERT(nn.Module):\\n\",\n        \"    def __init__(self):\\n\",\n        \"        super(BERT, self).__init__()\\n\",\n        \"        self.embedding = Embedding()\\n\",\n        \"        self.layers = nn.ModuleList([EncoderLayer() for _ in range(n_layers)])\\n\",\n        \"        self.fc = nn.Linear(d_model, d_model)\\n\",\n        \"        self.activ1 = nn.Tanh()\\n\",\n        \"        self.linear = nn.Linear(d_model, d_model)\\n\",\n        \"        self.activ2 = gelu\\n\",\n        \"        self.norm = nn.LayerNorm(d_model)\\n\",\n        \"        self.classifier = nn.Linear(d_model, 2)\\n\",\n        \"        # decoder is shared with embedding layer\\n\",\n        \"        embed_weight = self.embedding.tok_embed.weight\\n\",\n        \"        n_vocab, n_dim = embed_weight.size()\\n\",\n        \"        self.decoder = nn.Linear(n_dim, n_vocab, bias=False)\\n\",\n        \"        self.decoder.weight = embed_weight\\n\",\n        \"        self.decoder_bias = nn.Parameter(torch.zeros(n_vocab))\\n\",\n        \"\\n\",\n        \"    def forward(self, input_ids, segment_ids, masked_pos):\\n\",\n        \"        output = self.embedding(input_ids, segment_ids)\\n\",\n        \"        enc_self_attn_mask = get_attn_pad_mask(input_ids, input_ids)\\n\",\n        \"        for layer in self.layers:\\n\",\n        \"            output, enc_self_attn = layer(output, enc_self_attn_mask)\\n\",\n        \"        # output : [batch_size, len, d_model], attn : [batch_size, n_heads, d_mode, d_model]\\n\",\n        \"        # it will be decided by first token(CLS)\\n\",\n        \"        h_pooled = self.activ1(self.fc(output[:, 0])) # [batch_size, d_model]\\n\",\n        \"        logits_clsf = self.classifier(h_pooled) # [batch_size, 2]\\n\",\n        \"\\n\",\n        \"        masked_pos = masked_pos[:, :, None].expand(-1, -1, output.size(-1)) # [batch_size, max_pred, d_model]\\n\",\n        \"        # get masked position from final output of transformer.\\n\",\n        \"        h_masked = torch.gather(output, 1, masked_pos) # masking position [batch_size, max_pred, d_model]\\n\",\n        \"        h_masked = self.norm(self.activ2(self.linear(h_masked)))\\n\",\n        \"        logits_lm = self.decoder(h_masked) + self.decoder_bias # [batch_size, max_pred, n_vocab]\\n\",\n        \"\\n\",\n        \"        return logits_lm, logits_clsf\\n\",\n        \"\\n\",\n        \"if __name__ == '__main__':\\n\",\n        \"    # BERT Parameters\\n\",\n        \"    maxlen = 30 # maximum of length\\n\",\n        \"    batch_size = 6\\n\",\n        \"    max_pred = 5  # max tokens of prediction\\n\",\n        \"    n_layers = 6 # number of Encoder of Encoder Layer\\n\",\n        \"    n_heads = 12 # number of heads in Multi-Head Attention\\n\",\n        \"    d_model = 768 # Embedding Size\\n\",\n        \"    d_ff = 768 * 4  # 4*d_model, FeedForward dimension\\n\",\n        \"    d_k = d_v = 64  # dimension of K(=Q), V\\n\",\n        \"    n_segments = 2\\n\",\n        \"\\n\",\n        \"    text = (\\n\",\n        \"        'Hello, how are you? I am Romeo.\\\\n'\\n\",\n        \"        'Hello, Romeo My name is Juliet. Nice to meet you.\\\\n'\\n\",\n        \"        'Nice meet you too. How are you today?\\\\n'\\n\",\n        \"        'Great. My baseball team won the competition.\\\\n'\\n\",\n        \"        'Oh Congratulations, Juliet\\\\n'\\n\",\n        \"        'Thanks you Romeo'\\n\",\n        \"    )\\n\",\n        \"    sentences = re.sub(\\\"[.,!?\\\\\\\\-]\\\", '', text.lower()).split('\\\\n')  # filter '.', ',', '?', '!'\\n\",\n        \"    word_list = list(set(\\\" \\\".join(sentences).split()))\\n\",\n        \"    word_dict = {'[PAD]': 0, '[CLS]': 1, '[SEP]': 2, '[MASK]': 3}\\n\",\n        \"    for i, w in enumerate(word_list):\\n\",\n        \"        word_dict[w] = i + 4\\n\",\n        \"    number_dict = {i: w for i, w in enumerate(word_dict)}\\n\",\n        \"    vocab_size = len(word_dict)\\n\",\n        \"\\n\",\n        \"    token_list = list()\\n\",\n        \"    for sentence in sentences:\\n\",\n        \"        arr = [word_dict[s] for s in sentence.split()]\\n\",\n        \"        token_list.append(arr)\\n\",\n        \"\\n\",\n        \"    model = BERT()\\n\",\n        \"    criterion = nn.CrossEntropyLoss()\\n\",\n        \"    optimizer = optim.Adam(model.parameters(), lr=0.001)\\n\",\n        \"\\n\",\n        \"    batch = make_batch()\\n\",\n        \"    input_ids, segment_ids, masked_tokens, masked_pos, isNext = map(torch.LongTensor, zip(*batch))\\n\",\n        \"\\n\",\n        \"    for epoch in range(100):\\n\",\n        \"        optimizer.zero_grad()\\n\",\n        \"        logits_lm, logits_clsf = model(input_ids, segment_ids, masked_pos)\\n\",\n        \"        loss_lm = criterion(logits_lm.transpose(1, 2), masked_tokens) # for masked LM\\n\",\n        \"        loss_lm = (loss_lm.float()).mean()\\n\",\n        \"        loss_clsf = criterion(logits_clsf, isNext) # for sentence classification\\n\",\n        \"        loss = loss_lm + loss_clsf\\n\",\n        \"        if (epoch + 1) % 10 == 0:\\n\",\n        \"            print('Epoch:', '%04d' % (epoch + 1), 'cost =', '{:.6f}'.format(loss))\\n\",\n        \"        loss.backward()\\n\",\n        \"        optimizer.step()\\n\",\n        \"\\n\",\n        \"    # Predict mask tokens ans isNext\\n\",\n        \"    input_ids, segment_ids, masked_tokens, masked_pos, isNext = map(torch.LongTensor, zip(batch[0]))\\n\",\n        \"    print(text)\\n\",\n        \"    print([number_dict[w.item()] for w in input_ids[0] if number_dict[w.item()] != '[PAD]'])\\n\",\n        \"\\n\",\n        \"    logits_lm, logits_clsf = model(input_ids, segment_ids, masked_pos)\\n\",\n        \"    logits_lm = logits_lm.data.max(2)[1][0].data.numpy()\\n\",\n        \"    print('masked tokens list : ',[pos.item() for pos in masked_tokens[0] if pos.item() != 0])\\n\",\n        \"    print('predict masked tokens list : ',[pos for pos in logits_lm if pos != 0])\\n\",\n        \"\\n\",\n        \"    logits_clsf = logits_clsf.data.max(1)[1].data.numpy()[0]\\n\",\n        \"    print('isNext : ', True if isNext else False)\\n\",\n        \"    print('predict isNext : ',True if logits_clsf else False)\\n\"\n      ],\n      \"outputs\": [],\n      \"execution_count\": null\n    }\n  ],\n  \"metadata\": {\n    \"anaconda-cloud\": {},\n    \"kernelspec\": {\n      \"display_name\": \"Python 3\",\n      \"language\": \"python\",\n      \"name\": \"python3\"\n    },\n    \"language_info\": {\n      \"codemirror_mode\": {\n        \"name\": \"ipython\",\n        \"version\": 3\n      },\n      \"file_extension\": \".py\",\n      \"mimetype\": \"text/x-python\",\n      \"name\": \"python\",\n      \"nbconvert_exporter\": \"python\",\n      \"pygments_lexer\": \"ipython3\",\n      \"version\": \"3.6.1\"\n    }\n  },\n  \"nbformat\": 4,\n  \"nbformat_minor\": 4\n}"
  },
  {
    "path": "5-2.BERT/BERT.py",
    "content": "# %%\n# code by Tae Hwan Jung(Jeff Jung) @graykode\n# Reference : https://github.com/jadore801120/attention-is-all-you-need-pytorch\n#           https://github.com/JayParks/transformer, https://github.com/dhlee347/pytorchic-bert\nimport math\nimport re\nfrom random import *\nimport numpy as np\nimport torch\nimport torch.nn as nn\nimport torch.optim as optim\n\n# sample IsNext and NotNext to be same in small batch size\ndef make_batch():\n    batch = []\n    positive = negative = 0\n    while positive != batch_size/2 or negative != batch_size/2:\n        tokens_a_index, tokens_b_index= randrange(len(sentences)), randrange(len(sentences)) # sample random index in sentences\n        tokens_a, tokens_b= token_list[tokens_a_index], token_list[tokens_b_index]\n        input_ids = [word_dict['[CLS]']] + tokens_a + [word_dict['[SEP]']] + tokens_b + [word_dict['[SEP]']]\n        segment_ids = [0] * (1 + len(tokens_a) + 1) + [1] * (len(tokens_b) + 1)\n\n        # MASK LM\n        n_pred =  min(max_pred, max(1, int(round(len(input_ids) * 0.15)))) # 15 % of tokens in one sentence\n        cand_maked_pos = [i for i, token in enumerate(input_ids)\n                          if token != word_dict['[CLS]'] and token != word_dict['[SEP]']]\n        shuffle(cand_maked_pos)\n        masked_tokens, masked_pos = [], []\n        for pos in cand_maked_pos[:n_pred]:\n            masked_pos.append(pos)\n            masked_tokens.append(input_ids[pos])\n            if random() < 0.8:  # 80%\n                input_ids[pos] = word_dict['[MASK]'] # make mask\n            elif random() < 0.5:  # 10%\n                index = randint(0, vocab_size - 1) # random index in vocabulary\n                input_ids[pos] = word_dict[number_dict[index]] # replace\n\n        # Zero Paddings\n        n_pad = maxlen - len(input_ids)\n        input_ids.extend([0] * n_pad)\n        segment_ids.extend([0] * n_pad)\n\n        # Zero Padding (100% - 15%) tokens\n        if max_pred > n_pred:\n            n_pad = max_pred - n_pred\n            masked_tokens.extend([0] * n_pad)\n            masked_pos.extend([0] * n_pad)\n\n        if tokens_a_index + 1 == tokens_b_index and positive < batch_size/2:\n            batch.append([input_ids, segment_ids, masked_tokens, masked_pos, True]) # IsNext\n            positive += 1\n        elif tokens_a_index + 1 != tokens_b_index and negative < batch_size/2:\n            batch.append([input_ids, segment_ids, masked_tokens, masked_pos, False]) # NotNext\n            negative += 1\n    return batch\n# Proprecessing Finished\n\ndef get_attn_pad_mask(seq_q, seq_k):\n    batch_size, len_q = seq_q.size()\n    batch_size, len_k = seq_k.size()\n    # eq(zero) is PAD token\n    pad_attn_mask = seq_k.data.eq(0).unsqueeze(1)  # batch_size x 1 x len_k(=len_q), one is masking\n    return pad_attn_mask.expand(batch_size, len_q, len_k)  # batch_size x len_q x len_k\n\ndef gelu(x):\n    \"Implementation of the gelu activation function by Hugging Face\"\n    return x * 0.5 * (1.0 + torch.erf(x / math.sqrt(2.0)))\n\nclass Embedding(nn.Module):\n    def __init__(self):\n        super(Embedding, self).__init__()\n        self.tok_embed = nn.Embedding(vocab_size, d_model)  # token embedding\n        self.pos_embed = nn.Embedding(maxlen, d_model)  # position embedding\n        self.seg_embed = nn.Embedding(n_segments, d_model)  # segment(token type) embedding\n        self.norm = nn.LayerNorm(d_model)\n\n    def forward(self, x, seg):\n        seq_len = x.size(1)\n        pos = torch.arange(seq_len, dtype=torch.long)\n        pos = pos.unsqueeze(0).expand_as(x)  # (seq_len,) -> (batch_size, seq_len)\n        embedding = self.tok_embed(x) + self.pos_embed(pos) + self.seg_embed(seg)\n        return self.norm(embedding)\n\nclass ScaledDotProductAttention(nn.Module):\n    def __init__(self):\n        super(ScaledDotProductAttention, self).__init__()\n\n    def forward(self, Q, K, V, attn_mask):\n        scores = torch.matmul(Q, K.transpose(-1, -2)) / np.sqrt(d_k) # scores : [batch_size x n_heads x len_q(=len_k) x len_k(=len_q)]\n        scores.masked_fill_(attn_mask, -1e9) # Fills elements of self tensor with value where mask is one.\n        attn = nn.Softmax(dim=-1)(scores)\n        context = torch.matmul(attn, V)\n        return context, attn\n\nclass MultiHeadAttention(nn.Module):\n    def __init__(self):\n        super(MultiHeadAttention, self).__init__()\n        self.W_Q = nn.Linear(d_model, d_k * n_heads)\n        self.W_K = nn.Linear(d_model, d_k * n_heads)\n        self.W_V = nn.Linear(d_model, d_v * n_heads)\n    def forward(self, Q, K, V, attn_mask):\n        # q: [batch_size x len_q x d_model], k: [batch_size x len_k x d_model], v: [batch_size x len_k x d_model]\n        residual, batch_size = Q, Q.size(0)\n        # (B, S, D) -proj-> (B, S, D) -split-> (B, S, H, W) -trans-> (B, H, S, W)\n        q_s = self.W_Q(Q).view(batch_size, -1, n_heads, d_k).transpose(1,2)  # q_s: [batch_size x n_heads x len_q x d_k]\n        k_s = self.W_K(K).view(batch_size, -1, n_heads, d_k).transpose(1,2)  # k_s: [batch_size x n_heads x len_k x d_k]\n        v_s = self.W_V(V).view(batch_size, -1, n_heads, d_v).transpose(1,2)  # v_s: [batch_size x n_heads x len_k x d_v]\n\n        attn_mask = attn_mask.unsqueeze(1).repeat(1, n_heads, 1, 1) # attn_mask : [batch_size x n_heads x len_q x len_k]\n\n        # context: [batch_size x n_heads x len_q x d_v], attn: [batch_size x n_heads x len_q(=len_k) x len_k(=len_q)]\n        context, attn = ScaledDotProductAttention()(q_s, k_s, v_s, attn_mask)\n        context = context.transpose(1, 2).contiguous().view(batch_size, -1, n_heads * d_v) # context: [batch_size x len_q x n_heads * d_v]\n        output = nn.Linear(n_heads * d_v, d_model)(context)\n        return nn.LayerNorm(d_model)(output + residual), attn # output: [batch_size x len_q x d_model]\n\nclass PoswiseFeedForwardNet(nn.Module):\n    def __init__(self):\n        super(PoswiseFeedForwardNet, self).__init__()\n        self.fc1 = nn.Linear(d_model, d_ff)\n        self.fc2 = nn.Linear(d_ff, d_model)\n\n    def forward(self, x):\n        # (batch_size, len_seq, d_model) -> (batch_size, len_seq, d_ff) -> (batch_size, len_seq, d_model)\n        return self.fc2(gelu(self.fc1(x)))\n\nclass EncoderLayer(nn.Module):\n    def __init__(self):\n        super(EncoderLayer, self).__init__()\n        self.enc_self_attn = MultiHeadAttention()\n        self.pos_ffn = PoswiseFeedForwardNet()\n\n    def forward(self, enc_inputs, enc_self_attn_mask):\n        enc_outputs, attn = self.enc_self_attn(enc_inputs, enc_inputs, enc_inputs, enc_self_attn_mask) # enc_inputs to same Q,K,V\n        enc_outputs = self.pos_ffn(enc_outputs) # enc_outputs: [batch_size x len_q x d_model]\n        return enc_outputs, attn\n\nclass BERT(nn.Module):\n    def __init__(self):\n        super(BERT, self).__init__()\n        self.embedding = Embedding()\n        self.layers = nn.ModuleList([EncoderLayer() for _ in range(n_layers)])\n        self.fc = nn.Linear(d_model, d_model)\n        self.activ1 = nn.Tanh()\n        self.linear = nn.Linear(d_model, d_model)\n        self.activ2 = gelu\n        self.norm = nn.LayerNorm(d_model)\n        self.classifier = nn.Linear(d_model, 2)\n        # decoder is shared with embedding layer\n        embed_weight = self.embedding.tok_embed.weight\n        n_vocab, n_dim = embed_weight.size()\n        self.decoder = nn.Linear(n_dim, n_vocab, bias=False)\n        self.decoder.weight = embed_weight\n        self.decoder_bias = nn.Parameter(torch.zeros(n_vocab))\n\n    def forward(self, input_ids, segment_ids, masked_pos):\n        output = self.embedding(input_ids, segment_ids)\n        enc_self_attn_mask = get_attn_pad_mask(input_ids, input_ids)\n        for layer in self.layers:\n            output, enc_self_attn = layer(output, enc_self_attn_mask)\n        # output : [batch_size, len, d_model], attn : [batch_size, n_heads, d_mode, d_model]\n        # it will be decided by first token(CLS)\n        h_pooled = self.activ1(self.fc(output[:, 0])) # [batch_size, d_model]\n        logits_clsf = self.classifier(h_pooled) # [batch_size, 2]\n\n        masked_pos = masked_pos[:, :, None].expand(-1, -1, output.size(-1)) # [batch_size, max_pred, d_model]\n        # get masked position from final output of transformer.\n        h_masked = torch.gather(output, 1, masked_pos) # masking position [batch_size, max_pred, d_model]\n        h_masked = self.norm(self.activ2(self.linear(h_masked)))\n        logits_lm = self.decoder(h_masked) + self.decoder_bias # [batch_size, max_pred, n_vocab]\n\n        return logits_lm, logits_clsf\n\nif __name__ == '__main__':\n    # BERT Parameters\n    maxlen = 30 # maximum of length\n    batch_size = 6\n    max_pred = 5  # max tokens of prediction\n    n_layers = 6 # number of Encoder of Encoder Layer\n    n_heads = 12 # number of heads in Multi-Head Attention\n    d_model = 768 # Embedding Size\n    d_ff = 768 * 4  # 4*d_model, FeedForward dimension\n    d_k = d_v = 64  # dimension of K(=Q), V\n    n_segments = 2\n\n    text = (\n        'Hello, how are you? I am Romeo.\\n'\n        'Hello, Romeo My name is Juliet. Nice to meet you.\\n'\n        'Nice meet you too. How are you today?\\n'\n        'Great. My baseball team won the competition.\\n'\n        'Oh Congratulations, Juliet\\n'\n        'Thanks you Romeo'\n    )\n    sentences = re.sub(\"[.,!?\\\\-]\", '', text.lower()).split('\\n')  # filter '.', ',', '?', '!'\n    word_list = list(set(\" \".join(sentences).split()))\n    word_dict = {'[PAD]': 0, '[CLS]': 1, '[SEP]': 2, '[MASK]': 3}\n    for i, w in enumerate(word_list):\n        word_dict[w] = i + 4\n    number_dict = {i: w for i, w in enumerate(word_dict)}\n    vocab_size = len(word_dict)\n\n    token_list = list()\n    for sentence in sentences:\n        arr = [word_dict[s] for s in sentence.split()]\n        token_list.append(arr)\n\n    model = BERT()\n    criterion = nn.CrossEntropyLoss()\n    optimizer = optim.Adam(model.parameters(), lr=0.001)\n\n    batch = make_batch()\n    input_ids, segment_ids, masked_tokens, masked_pos, isNext = map(torch.LongTensor, zip(*batch))\n\n    for epoch in range(100):\n        optimizer.zero_grad()\n        logits_lm, logits_clsf = model(input_ids, segment_ids, masked_pos)\n        loss_lm = criterion(logits_lm.transpose(1, 2), masked_tokens) # for masked LM\n        loss_lm = (loss_lm.float()).mean()\n        loss_clsf = criterion(logits_clsf, isNext) # for sentence classification\n        loss = loss_lm + loss_clsf\n        if (epoch + 1) % 10 == 0:\n            print('Epoch:', '%04d' % (epoch + 1), 'cost =', '{:.6f}'.format(loss))\n        loss.backward()\n        optimizer.step()\n\n    # Predict mask tokens ans isNext\n    input_ids, segment_ids, masked_tokens, masked_pos, isNext = map(torch.LongTensor, zip(batch[0]))\n    print(text)\n    print([number_dict[w.item()] for w in input_ids[0] if number_dict[w.item()] != '[PAD]'])\n\n    logits_lm, logits_clsf = model(input_ids, segment_ids, masked_pos)\n    logits_lm = logits_lm.data.max(2)[1][0].data.numpy()\n    print('masked tokens list : ',[pos.item() for pos in masked_tokens[0] if pos.item() != 0])\n    print('predict masked tokens list : ',[pos for pos in logits_lm if pos != 0])\n\n    logits_clsf = logits_clsf.data.max(1)[1].data.numpy()[0]\n    print('isNext : ', True if isNext else False)\n    print('predict isNext : ',True if logits_clsf else False)\n"
  },
  {
    "path": "CONTRIBUTING.md",
    "content": "## Contribution Guidelines\n\nThank you to everyone who contributes. Here are some rules to follow before contributing.\n1. Contributions are open to the smallest details such as typos, comments and code refactors.\n2. Do not commit the jupyter notebook file(*.ipynb). When the modified python code is merged into the master branch, the github action automatically generates an ipynb.\n3. Please attach a commit message appropriate to the modified code."
  },
  {
    "path": "LICENSE",
    "content": "MIT License\n\nCopyright (c) 2019 TaeHwan Jung\n\nPermission is hereby granted, free of charge, to any person obtaining a copy\nof this software and associated documentation files (the \"Software\"), to deal\nin the Software without restriction, including without limitation the rights\nto use, copy, modify, merge, publish, distribute, sublicense, and/or sell\ncopies of the Software, and to permit persons to whom the Software is\nfurnished to do so, subject to the following conditions:\n\nThe above copyright notice and this permission notice shall be included in all\ncopies or substantial portions of the Software.\n\nTHE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\nIMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\nFITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\nAUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\nLIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\nOUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE\nSOFTWARE.\n"
  },
  {
    "path": "README.md",
    "content": "## nlp-tutorial\n\n<p align=\"center\"><img width=\"100\" src=\"https://upload.wikimedia.org/wikipedia/commons/thumb/1/11/TensorFlowLogo.svg/225px-TensorFlowLogo.svg.png\" />  <img width=\"100\" src=\"https://media-thumbs.golden.com/OLqzmrmwAzY1P7Sl29k2T9WjJdM=/200x200/smart/golden-storage-production.s3.amazonaws.com/topic_images/e08914afa10a4179893eeb07cb5e4713.png\" /></p>\n\n`nlp-tutorial` is a tutorial for who is studying NLP(Natural Language Processing) using **Pytorch**. Most of the models in NLP were implemented with less than **100 lines** of code.(except comments or blank lines)\n\n- [08-14-2020] Old TensorFlow v1 code is archived in [the archive folder](archive). For beginner readability, only pytorch version 1.0 or higher is supported.\n\n\n## Curriculum - (Example Purpose)\n\n#### 1. Basic Embedding Model\n\n- 1-1. [NNLM(Neural Network Language Model)](1-1.NNLM) - **Predict Next Word**\n  - Paper -  [A Neural Probabilistic Language Model(2003)](http://www.jmlr.org/papers/volume3/bengio03a/bengio03a.pdf)\n  - Colab - [NNLM.ipynb](https://colab.research.google.com/github/graykode/nlp-tutorial/blob/master/1-1.NNLM/NNLM.ipynb)\n- 1-2. [Word2Vec(Skip-gram)](1-2.Word2Vec) - **Embedding Words and Show Graph**\n  - Paper - [Distributed Representations of Words and Phrases\n    and their Compositionality(2013)](https://papers.nips.cc/paper/5021-distributed-representations-of-words-and-phrases-and-their-compositionality.pdf)\n  - Colab - [Word2Vec.ipynb](https://colab.research.google.com/github/graykode/nlp-tutorial/blob/master/1-2.Word2Vec/Word2Vec_Skipgram(Softmax).ipynb)\n- 1-3. [FastText(Application Level)](1-3.FastText) - **Sentence Classification**\n  - Paper - [Bag of Tricks for Efficient Text Classification(2016)](https://arxiv.org/pdf/1607.01759.pdf)\n  - Colab - [FastText.ipynb](https://colab.research.google.com/github/graykode/nlp-tutorial/blob/master/1-3.FastText/FastText.ipynb)\n\n\n\n#### 2. CNN(Convolutional Neural Network)\n\n- 2-1. [TextCNN](2-1.TextCNN) - **Binary Sentiment Classification**\n  - Paper - [Convolutional Neural Networks for Sentence Classification(2014)](http://www.aclweb.org/anthology/D14-1181)\n  - [TextCNN.ipynb](https://colab.research.google.com/github/graykode/nlp-tutorial/blob/master/2-1.TextCNN/TextCNN.ipynb)\n\n\n\n#### 3. RNN(Recurrent Neural Network)\n\n- 3-1. [TextRNN](3-1.TextRNN) - **Predict Next Step**\n  - Paper - [Finding Structure in Time(1990)](http://psych.colorado.edu/~kimlab/Elman1990.pdf)\n  - Colab - [TextRNN.ipynb](https://colab.research.google.com/github/graykode/nlp-tutorial/blob/master/3-1.TextRNN/TextRNN.ipynb)\n- 3-2. [TextLSTM](https://github.com/graykode/nlp-tutorial/tree/master/3-2.TextLSTM) - **Autocomplete**\n  - Paper - [LONG SHORT-TERM MEMORY(1997)](https://www.bioinf.jku.at/publications/older/2604.pdf)\n  - Colab - [TextLSTM.ipynb](https://colab.research.google.com/github/graykode/nlp-tutorial/blob/master/3-2.TextLSTM/TextLSTM.ipynb)\n- 3-3. [Bi-LSTM](3-3.Bi-LSTM) - **Predict Next Word in Long Sentence**\n  - Colab - [Bi_LSTM.ipynb](https://colab.research.google.com/github/graykode/nlp-tutorial/blob/master/3-3.Bi-LSTM/Bi_LSTM.ipynb)\n\n\n\n#### 4. Attention Mechanism\n\n- 4-1. [Seq2Seq](4-1.Seq2Seq) - **Change Word**\n  - Paper - [Learning Phrase Representations using RNN Encoder–Decoder\n    for Statistical Machine Translation(2014)](https://arxiv.org/pdf/1406.1078.pdf)\n  - Colab - [Seq2Seq.ipynb](https://colab.research.google.com/github/graykode/nlp-tutorial/blob/master/4-1.Seq2Seq/Seq2Seq.ipynb)\n- 4-2. [Seq2Seq with Attention](4-2.Seq2Seq(Attention)) - **Translate**\n  - Paper - [Neural Machine Translation by Jointly Learning to Align and Translate(2014)](https://arxiv.org/abs/1409.0473)\n  - Colab - [Seq2Seq(Attention).ipynb](https://colab.research.google.com/github/graykode/nlp-tutorial/blob/master/4-2.Seq2Seq(Attention)/Seq2Seq(Attention).ipynb)\n- 4-3. [Bi-LSTM with Attention](4-3.Bi-LSTM(Attention)) - **Binary Sentiment Classification**\n  - Colab - [Bi_LSTM(Attention).ipynb](https://colab.research.google.com/github/graykode/nlp-tutorial/blob/master/4-3.Bi-LSTM(Attention)/Bi_LSTM(Attention).ipynb)\n\n\n\n#### 5. Model based on Transformer\n\n- 5-1.  [The Transformer](5-1.Transformer) - **Translate**\n  - Paper - [Attention Is All You Need(2017)](https://arxiv.org/abs/1706.03762)\n  - Colab - [Transformer.ipynb](https://colab.research.google.com/github/graykode/nlp-tutorial/blob/master/5-1.Transformer/Transformer.ipynb), [Transformer(Greedy_decoder).ipynb](https://colab.research.google.com/github/graykode/nlp-tutorial/blob/master/5-1.Transformer/Transformer(Greedy_decoder).ipynb)\n- 5-2. [BERT](5-2.BERT) - **Classification Next Sentence & Predict Masked Tokens**\n  - Paper - [BERT: Pre-training of Deep Bidirectional Transformers for Language Understanding(2018)](https://arxiv.org/abs/1810.04805)\n  - Colab - [BERT.ipynb](https://colab.research.google.com/github/graykode/nlp-tutorial/blob/master/5-2.BERT/BERT.ipynb)\n\n\n\n## Dependencies\n\n- Python 3.5+\n- Pytorch 1.0.0+\n\n\n\n## Author\n\n- Tae Hwan Jung(Jeff Jung) @graykode\n- Author Email : nlkey2022@gmail.com\n- Acknowledgements to [mojitok](http://mojitok.com/) as NLP Research Internship.\n"
  },
  {
    "path": "archive/tensorflow/v1/1-1.NNLM/NNLM.py",
    "content": "# code by Tae Hwan Jung @graykode\nimport tensorflow as tf\nimport numpy as np\n\ntf.reset_default_graph()\n\nsentences = [ \"i like dog\", \"i love coffee\", \"i hate milk\"]\n\nword_list = \" \".join(sentences).split()\nword_list = list(set(word_list))\nword_dict = {w: i for i, w in enumerate(word_list)}\nnumber_dict = {i: w for i, w in enumerate(word_list)}\nn_class = len(word_dict) # number of Vocabulary\n\n# NNLM Parameter\nn_step = 2 # number of steps ['i like', 'i love', 'i hate']\nn_hidden = 2 # number of hidden units\n\ndef make_batch(sentences):\n    input_batch = []\n    target_batch = []\n\n    for sen in sentences:\n        word = sen.split()\n        input = [word_dict[n] for n in word[:-1]]\n        target = word_dict[word[-1]]\n\n        input_batch.append(np.eye(n_class)[input])\n        target_batch.append(np.eye(n_class)[target])\n\n    return input_batch, target_batch\n\n# Model\nX = tf.placeholder(tf.float32, [None, n_step, n_class]) # [batch_size, number of steps, number of Vocabulary]\nY = tf.placeholder(tf.float32, [None, n_class])\n\ninput = tf.reshape(X, shape=[-1, n_step * n_class]) # [batch_size, n_step * n_class]\nH = tf.Variable(tf.random_normal([n_step * n_class, n_hidden]))\nd = tf.Variable(tf.random_normal([n_hidden]))\nU = tf.Variable(tf.random_normal([n_hidden, n_class]))\nb = tf.Variable(tf.random_normal([n_class]))\n\ntanh = tf.nn.tanh(d + tf.matmul(input, H)) # [batch_size, n_hidden]\nmodel = tf.matmul(tanh, U) + b # [batch_size, n_class]\n\ncost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits_v2(logits=model, labels=Y))\noptimizer = tf.train.AdamOptimizer(0.001).minimize(cost)\nprediction =tf.argmax(model, 1)\n\n# Training\ninit = tf.global_variables_initializer()\nsess = tf.Session()\nsess.run(init)\n\ninput_batch, target_batch = make_batch(sentences)\n\nfor epoch in range(5000):\n    _, loss = sess.run([optimizer, cost], feed_dict={X: input_batch, Y: target_batch})\n    if (epoch + 1)%1000 == 0:\n        print('Epoch:', '%04d' % (epoch + 1), 'cost =', '{:.6f}'.format(loss))\n\n# Predict\npredict =  sess.run([prediction], feed_dict={X: input_batch})\n\n# Test\ninput = [sen.split()[:2] for sen in sentences]\nprint([sen.split()[:2] for sen in sentences], '->', [number_dict[n] for n in predict[0]])"
  },
  {
    "path": "archive/tensorflow/v1/1-2.Word2Vec/Word2Vec-Skipgram(NCE_loss).py",
    "content": "'''\n  code by Tae Hwan Jung(Jeff Jung) @graykode\n  reference : https://github.com/golbin/TensorFlow-Tutorials/blob/master/04%20-%20Neural%20Network%20Basic/03%20-%20Word2Vec.py\n'''\nimport tensorflow as tf\nimport matplotlib.pyplot as plt\nimport numpy as np\n\ntf.reset_default_graph()\n\n# 3 Words Sentence\nsentences = [ \"i like dog\", \"i like cat\", \"i like animal\",\n              \"dog cat animal\", \"apple cat dog like\", \"dog fish milk like\",\n              \"dog cat eyes like\", \"i like apple\", \"apple i hate\",\n              \"apple i movie book music like\", \"cat dog hate\", \"cat dog like\"]\n\nword_sequence = \" \".join(sentences).split()\nword_list = \" \".join(sentences).split()\nword_list = list(set(word_list))\nword_dict = {w: i for i, w in enumerate(word_list)}\n\n# Word2Vec Parameter\nbatch_size = 20\nembedding_size = 2 # To show 2 dim embedding graph\nnum_sampled = 10 # for negative sampling, less than batch_size\nvoc_size = len(word_list)\n\ndef random_batch(data, size):\n    random_inputs = []\n    random_labels = []\n    random_index = np.random.choice(range(len(data)), size, replace=False)\n\n    for i in random_index:\n        random_inputs.append(data[i][0])  # target\n        random_labels.append([data[i][1]])  # context word\n\n    return random_inputs, random_labels\n\n# Make skip gram of one size window\nskip_grams = []\nfor i in range(1, len(word_sequence) - 1):\n    target = word_dict[word_sequence[i]]\n    context = [word_dict[word_sequence[i - 1]], word_dict[word_sequence[i + 1]]]\n\n    for w in context:\n        skip_grams.append([target, w])\n\n# Model\ninputs = tf.placeholder(tf.int32, shape=[batch_size])\nlabels = tf.placeholder(tf.int32, shape=[batch_size, 1]) # To use tf.nn.nce_loss, [batch_size, 1]\n\nembeddings = tf.Variable(tf.random_uniform([voc_size, embedding_size], -1.0, 1.0))\nselected_embed = tf.nn.embedding_lookup(embeddings, inputs)\n\nnce_weights = tf.Variable(tf.random_uniform([voc_size, embedding_size], -1.0, 1.0))\nnce_biases = tf.Variable(tf.zeros([voc_size]))\n\n# Loss and optimizer\ncost = tf.reduce_mean(tf.nn.nce_loss(nce_weights, nce_biases, labels, selected_embed, num_sampled, voc_size))\noptimizer = tf.train.AdamOptimizer(0.001).minimize(cost)\n\n# Training\nwith tf.Session() as sess:\n    init = tf.global_variables_initializer()\n    sess.run(init)\n\n    for epoch in range(5000):\n        batch_inputs, batch_labels = random_batch(skip_grams, batch_size)\n        _, loss = sess.run([optimizer, cost], feed_dict={inputs: batch_inputs, labels: batch_labels})\n\n        if (epoch + 1) % 1000 == 0:\n            print('Epoch:', '%04d' % (epoch + 1), 'cost =', '{:.6f}'.format(loss))\n\n    trained_embeddings = embeddings.eval()\n\nfor i, label in enumerate(word_list):\n    x, y = trained_embeddings[i]\n    plt.scatter(x, y)\n    plt.annotate(label, xy=(x, y), xytext=(5, 2), textcoords='offset points', ha='right', va='bottom')\nplt.show()"
  },
  {
    "path": "archive/tensorflow/v1/1-2.Word2Vec/Word2Vec-Skipgram(Softmax).py",
    "content": "'''\n  code by Tae Hwan Jung(Jeff Jung) @graykode\n'''\nimport tensorflow as tf\nimport matplotlib.pyplot as plt\nimport numpy as np\n\ntf.reset_default_graph()\n\n# 3 Words Sentence\nsentences = [ \"i like dog\", \"i like cat\", \"i like animal\",\n              \"dog cat animal\", \"apple cat dog like\", \"dog fish milk like\",\n              \"dog cat eyes like\", \"i like apple\", \"apple i hate\",\n              \"apple i movie book music like\", \"cat dog hate\", \"cat dog like\"]\n\nword_sequence = \" \".join(sentences).split()\nword_list = \" \".join(sentences).split()\nword_list = list(set(word_list))\nword_dict = {w: i for i, w in enumerate(word_list)}\n\n# Word2Vec Parameter\nbatch_size = 20\nembedding_size = 2 # To show 2 dim embedding graph\nvoc_size = len(word_list)\n\ndef random_batch(data, size):\n    random_inputs = []\n    random_labels = []\n    random_index = np.random.choice(range(len(data)), size, replace=False)\n\n    for i in random_index:\n        random_inputs.append(np.eye(voc_size)[data[i][0]])  # target\n        random_labels.append(np.eye(voc_size)[data[i][1]])  # context word\n\n    return random_inputs, random_labels\n\n# Make skip gram of one size window\nskip_grams = []\nfor i in range(1, len(word_sequence) - 1):\n    target = word_dict[word_sequence[i]]\n    context = [word_dict[word_sequence[i - 1]], word_dict[word_sequence[i + 1]]]\n\n    for w in context:\n        skip_grams.append([target, w])\n\n# Model\ninputs = tf.placeholder(tf.float32, shape=[None, voc_size])\nlabels = tf.placeholder(tf.float32, shape=[None, voc_size])\n\n# W and WT is not Traspose relationship\nW = tf.Variable(tf.random_uniform([voc_size, embedding_size], -1.0, 1.0))\nWT = tf.Variable(tf.random_uniform([embedding_size, voc_size], -1.0, 1.0))\n\nhidden_layer = tf.matmul(inputs, W) # [batch_size, embedding_size]\noutput_layer = tf.matmul(hidden_layer, WT) # [batch_size, voc_size]\n\ncost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits_v2(logits=output_layer, labels=labels))\noptimizer = tf.train.AdamOptimizer(0.001).minimize(cost)\n\nwith tf.Session() as sess:\n    init = tf.global_variables_initializer()\n    sess.run(init)\n\n    for epoch in range(5000):\n        batch_inputs, batch_labels = random_batch(skip_grams, batch_size)\n        _, loss = sess.run([optimizer, cost], feed_dict={inputs: batch_inputs, labels: batch_labels})\n\n        if (epoch + 1)%1000 == 0:\n            print('Epoch:', '%04d' % (epoch + 1), 'cost =', '{:.6f}'.format(loss))\n\n        trained_embeddings = W.eval()\n\nfor i, label in enumerate(word_list):\n    x, y = trained_embeddings[i]\n    plt.scatter(x, y)\n    plt.annotate(label, xy=(x, y), xytext=(5, 2), textcoords='offset points', ha='right', va='bottom')\nplt.show()"
  },
  {
    "path": "archive/tensorflow/v1/2-1.TextCNN/TextCNN.py",
    "content": "'''\n  code by Tae Hwan Jung(Jeff Jung) @graykode\n  Reference : https://github.com/ioatr/textcnn\n'''\nimport tensorflow as tf\nimport numpy as np\n\ntf.reset_default_graph()\n\n# Text-CNN Parameter\nembedding_size = 2 # n-gram\nsequence_length = 3\nnum_classes = 2 # 0 or 1\nfilter_sizes = [2,2,2] # n-gram window\nnum_filters = 3\n\n# 3 words sentences (=sequence_length is 3)\nsentences = [\"i love you\",\"he loves me\", \"she likes baseball\", \"i hate you\",\"sorry for that\", \"this is awful\"]\nlabels = [1,1,1,0,0,0] # 1 is good, 0 is not good.\n\nword_list = \" \".join(sentences).split()\nword_list = list(set(word_list))\nword_dict = {w: i for i, w in enumerate(word_list)}\nvocab_size = len(word_dict)\n\ninputs = []\nfor sen in sentences:\n    inputs.append(np.asarray([word_dict[n] for n in sen.split()]))\n\noutputs = []\nfor out in labels:\n    outputs.append(np.eye(num_classes)[out]) # ONE-HOT : To using Tensor Softmax Loss function\n\n# Model\nX = tf.placeholder(tf.int32, [None, sequence_length])\nY = tf.placeholder(tf.int32, [None, num_classes])\n\nW = tf.Variable(tf.random_uniform([vocab_size, embedding_size], -1.0, 1.0))\nembedded_chars = tf.nn.embedding_lookup(W, X) # [batch_size, sequence_length, embedding_size]\nembedded_chars = tf.expand_dims(embedded_chars, -1) # add channel(=1) [batch_size, sequence_length, embedding_size, 1]\n\npooled_outputs = []\nfor i, filter_size in enumerate(filter_sizes):\n    filter_shape = [filter_size, embedding_size, 1, num_filters]\n    W = tf.Variable(tf.truncated_normal(filter_shape, stddev=0.1))\n    b = tf.Variable(tf.constant(0.1, shape=[num_filters]))\n\n    conv = tf.nn.conv2d(embedded_chars, # [batch_size, sequence_length, embedding_size, 1]\n                        W,              # [filter_size(n-gram window), embedding_size, 1, num_filters(=3)]\n                        strides=[1, 1, 1, 1],\n                        padding='VALID')\n    h = tf.nn.relu(tf.nn.bias_add(conv, b))\n    pooled = tf.nn.max_pool(h,\n                            ksize=[1, sequence_length - filter_size + 1, 1, 1], # [batch_size, filter_height, filter_width, channel]\n                            strides=[1, 1, 1, 1],\n                            padding='VALID')\n    pooled_outputs.append(pooled) # dim of pooled : [batch_size(=6), output_height(=1), output_width(=1), channel(=1)]\n\nnum_filters_total = num_filters * len(filter_sizes)\nh_pool = tf.concat(pooled_outputs, num_filters) # h_pool : [batch_size(=6), output_height(=1), output_width(=1), channel(=1) * 3]\nh_pool_flat = tf.reshape(h_pool, [-1, num_filters_total]) # [batch_size, ]\n\n# Model-Training\nWeight = tf.get_variable('W', shape=[num_filters_total, num_classes], \n                    initializer=tf.contrib.layers.xavier_initializer())\nBias = tf.Variable(tf.constant(0.1, shape=[num_classes]))\nmodel = tf.nn.xw_plus_b(h_pool_flat, Weight, Bias)  \ncost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits_v2(logits=model, labels=Y))\noptimizer = tf.train.AdamOptimizer(0.001).minimize(cost)\n\n# Model-Predict\nhypothesis = tf.nn.softmax(model)\npredictions = tf.argmax(hypothesis, 1)\n# Training\ninit = tf.global_variables_initializer()\nsess = tf.Session()\nsess.run(init)\n\nfor epoch in range(5000):\n    _, loss = sess.run([optimizer, cost], feed_dict={X: inputs, Y: outputs})\n    if (epoch + 1)%1000 == 0:\n        print('Epoch:', '%06d' % (epoch + 1), 'cost =', '{:.6f}'.format(loss))\n\n# Test\ntest_text = 'sorry hate you'\ntests = []\ntests.append(np.asarray([word_dict[n] for n in test_text.split()]))\n\npredict = sess.run([predictions], feed_dict={X: tests})\nresult = predict[0][0]\nif result == 0:\n    print(test_text,\"is Bad Mean...\")\nelse:\n    print(test_text,\"is Good Mean!!\")"
  },
  {
    "path": "archive/tensorflow/v1/3-1.TextRNN/TextRNN.py",
    "content": "'''\n  code by Tae Hwan Jung(Jeff Jung) @graykode\n'''\nimport tensorflow as tf\nimport numpy as np\n\ntf.reset_default_graph()\n\nsentences = [ \"i like dog\", \"i love coffee\", \"i hate milk\"]\n\nword_list = \" \".join(sentences).split()\nword_list = list(set(word_list))\nword_dict = {w: i for i, w in enumerate(word_list)}\nnumber_dict = {i: w for i, w in enumerate(word_list)}\nn_class = len(word_dict)\n\n# TextRNN Parameter\nn_step = 2 # number of cells(= number of Step)\nn_hidden = 5 # number of hidden units in one cell\n\ndef make_batch(sentences):\n    input_batch = []\n    target_batch = []\n    \n    for sen in sentences:\n        word = sen.split()\n        input = [word_dict[n] for n in word[:-1]]\n        target = word_dict[word[-1]]\n\n        input_batch.append(np.eye(n_class)[input])\n        target_batch.append(np.eye(n_class)[target])\n\n    return input_batch, target_batch\n\n# Model\nX = tf.placeholder(tf.float32, [None, n_step, n_class]) # [batch_size, n_step, n_class]\nY = tf.placeholder(tf.float32, [None, n_class])         # [batch_size, n_class]\n\nW = tf.Variable(tf.random_normal([n_hidden, n_class]))\nb = tf.Variable(tf.random_normal([n_class]))\n\ncell = tf.nn.rnn_cell.BasicRNNCell(n_hidden)\noutputs, states = tf.nn.dynamic_rnn(cell, X, dtype=tf.float32)\n\n# outputs : [batch_size, n_step, n_hidden]\noutputs = tf.transpose(outputs, [1, 0, 2]) # [n_step, batch_size, n_hidden]\noutputs = outputs[-1] # [batch_size, n_hidden]\nmodel = tf.matmul(outputs, W) + b # model : [batch_size, n_class]\n\ncost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits_v2(logits=model, labels=Y))\noptimizer = tf.train.AdamOptimizer(0.001).minimize(cost)\n\nprediction = tf.cast(tf.argmax(model, 1), tf.int32)\n\n# Training\ninit = tf.global_variables_initializer()\nsess = tf.Session()\nsess.run(init)\n\ninput_batch, target_batch = make_batch(sentences)\n\nfor epoch in range(5000):\n    _, loss = sess.run([optimizer, cost], feed_dict={X: input_batch, Y: target_batch})\n    if (epoch + 1)%1000 == 0:\n        print('Epoch:', '%04d' % (epoch + 1), 'cost =', '{:.6f}'.format(loss))\n        \ninput = [sen.split()[:2] for sen in sentences]\n\npredict =  sess.run([prediction], feed_dict={X: input_batch})\nprint([sen.split()[:2] for sen in sentences], '->', [number_dict[n] for n in predict[0]])"
  },
  {
    "path": "archive/tensorflow/v1/3-2.TextLSTM/TextLSTM.py",
    "content": "'''\n  code by Tae Hwan Jung(Jeff Jung) @graykode\n'''\nimport tensorflow as tf\nimport numpy as np\n\ntf.reset_default_graph()\n\nchar_arr = [c for c in 'abcdefghijklmnopqrstuvwxyz']\nword_dict = {n: i for i, n in enumerate(char_arr)}\nnumber_dict = {i: w for i, w in enumerate(char_arr)}\nn_class = len(word_dict) # number of class(=number of vocab)\n\nseq_data = ['make', 'need', 'coal', 'word', 'love', 'hate', 'live', 'home', 'hash', 'star']\n\n# TextLSTM Parameters\nn_step = 3\nn_hidden = 128\n\ndef make_batch(seq_data):\n    input_batch, target_batch = [], []\n\n    for seq in seq_data:\n        input = [word_dict[n] for n in seq[:-1]] # 'm', 'a' , 'k' is input\n        target = word_dict[seq[-1]] # 'e' is target\n        input_batch.append(np.eye(n_class)[input])\n        target_batch.append(np.eye(n_class)[target])\n\n    return input_batch, target_batch\n\n# Model\nX = tf.placeholder(tf.float32, [None, n_step, n_class]) # [batch_size, n_step, n_class]\nY = tf.placeholder(tf.float32, [None, n_class])         # [batch_size, n_class]\n\nW = tf.Variable(tf.random_normal([n_hidden, n_class]))\nb = tf.Variable(tf.random_normal([n_class]))\n\ncell = tf.nn.rnn_cell.BasicLSTMCell(n_hidden)\noutputs, states = tf.nn.dynamic_rnn(cell, X, dtype=tf.float32)\n\n# outputs : [batch_size, n_step, n_hidden]\noutputs = tf.transpose(outputs, [1, 0, 2]) # [n_step, batch_size, n_hidden]\noutputs = outputs[-1] # [batch_size, n_hidden]\nmodel = tf.matmul(outputs, W) + b # model : [batch_size, n_class]\n\ncost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits_v2(logits=model, labels=Y))\noptimizer = tf.train.AdamOptimizer(0.001).minimize(cost)\n\nprediction = tf.cast(tf.argmax(model, 1), tf.int32)\n\n# Training\ninit = tf.global_variables_initializer()\nsess = tf.Session()\nsess.run(init)\n\ninput_batch, target_batch = make_batch(seq_data)\n\nfor epoch in range(1000):\n    _, loss = sess.run([optimizer, cost], feed_dict={X: input_batch, Y: target_batch})\n    if (epoch + 1)%100 == 0:\n        print('Epoch:', '%04d' % (epoch + 1), 'cost =', '{:.6f}'.format(loss))\n\ninputs = [sen[:3] for sen in seq_data]\n\npredict =  sess.run([prediction], feed_dict={X: input_batch})\nprint(inputs, '->', [number_dict[n] for n in predict[0]])"
  },
  {
    "path": "archive/tensorflow/v1/3-3.Bi-LSTM/Bi-LSTM.py",
    "content": "'''\n  code by Tae Hwan Jung(Jeff Jung) @graykode\n'''\nimport tensorflow as tf\nimport numpy as np\n\ntf.reset_default_graph()\n\nsentence = (\n    'Lorem ipsum dolor sit amet consectetur adipisicing elit '\n    'sed do eiusmod tempor incididunt ut labore et dolore magna '\n    'aliqua Ut enim ad minim veniam quis nostrud exercitation'\n)\n\nword_dict = {w: i for i, w in enumerate(list(set(sentence.split())))}\nnumber_dict = {i: w for i, w in enumerate(list(set(sentence.split())))}\nn_class = len(word_dict)\nn_step = len(sentence.split())\nn_hidden = 5\n\ndef make_batch(sentence):\n    input_batch = []\n    target_batch = []\n\n    words = sentence.split()\n    for i, word in enumerate(words[:-1]):\n        input = [word_dict[n] for n in words[:(i + 1)]]\n        input = input + [0] * (n_step - len(input))\n        target = word_dict[words[i + 1]]\n        input_batch.append(np.eye(n_class)[input])\n        target_batch.append(np.eye(n_class)[target])\n\n    return input_batch, target_batch\n\n# Bi-LSTM Model\nX = tf.placeholder(tf.float32, [None, n_step, n_class])\nY = tf.placeholder(tf.float32, [None, n_class])\n\nW = tf.Variable(tf.random_normal([n_hidden * 2, n_class]))\nb = tf.Variable(tf.random_normal([n_class]))\n\nlstm_fw_cell = tf.nn.rnn_cell.LSTMCell(n_hidden)\nlstm_bw_cell = tf.nn.rnn_cell.LSTMCell(n_hidden)\n\n# outputs : [batch_size, len_seq, n_hidden], states : [batch_size, n_hidden]\noutputs, _ = tf.nn.bidirectional_dynamic_rnn(lstm_fw_cell,lstm_bw_cell, X, dtype=tf.float32)\n\noutputs = tf.concat([outputs[0], outputs[1]], 2) # output[0] : lstm_fw, output[1] : lstm_bw\noutputs = tf.transpose(outputs, [1, 0, 2]) # [n_step, batch_size, n_hidden]\noutputs = outputs[-1] # [batch_size, n_hidden]\n\nmodel = tf.matmul(outputs, W) + b\n\ncost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits_v2(logits=model, labels=Y))\noptimizer = tf.train.AdamOptimizer(0.001).minimize(cost)\n\nprediction = tf.cast(tf.argmax(model, 1), tf.int32)\n\n# Training\ninit = tf.global_variables_initializer()\nsess = tf.Session()\nsess.run(init)\n\ninput_batch, target_batch = make_batch(sentence)\n\nfor epoch in range(10000):\n    _, loss = sess.run([optimizer, cost], feed_dict={X: input_batch, Y: target_batch})\n    if (epoch + 1)%1000 == 0:\n        print('Epoch:', '%04d' % (epoch + 1), 'cost =', '{:.6f}'.format(loss))\n\npredict =  sess.run([prediction], feed_dict={X: input_batch})\nprint(sentence)\nprint([number_dict[n] for n in [pre for pre in predict[0]]])"
  },
  {
    "path": "archive/tensorflow/v1/4-1.Seq2Seq/Seq2Seq.py",
    "content": "'''\n  code by Tae Hwan Jung(Jeff Jung) @graykode\n  reference : https://github.com/golbin/TensorFlow-Tutorials/blob/master/10%20-%20RNN/03%20-%20Seq2Seq.py\n'''\nimport tensorflow as tf\nimport numpy as np\n\ntf.reset_default_graph()\n# S: Symbol that shows starting of decoding input\n# E: Symbol that shows starting of decoding output\n# P: Symbol that will fill in blank sequence if current batch data size is short than time steps\n\nchar_arr = [c for c in 'SEPabcdefghijklmnopqrstuvwxyz']\nnum_dic = {n: i for i, n in enumerate(char_arr)}\n\nseq_data = [['man', 'women'], ['black', 'white'], ['king', 'queen'], ['girl', 'boy'], ['up', 'down'], ['high', 'low']]\n\n# Seq2Seq Parameter\nn_step = 5\nn_hidden = 128\nn_class = len(num_dic) # number of class(=number of vocab)\n\ndef make_batch(seq_data):\n    input_batch, output_batch, target_batch = [], [], []\n\n    for seq in seq_data:\n        for i in range(2):\n            seq[i] = seq[i] + 'P' * (n_step - len(seq[i]))\n\n        input = [num_dic[n] for n in seq[0]]\n        output = [num_dic[n] for n in ('S' + seq[1])]\n        target = [num_dic[n] for n in (seq[1] + 'E')]\n\n        input_batch.append(np.eye(n_class)[input])\n        output_batch.append(np.eye(n_class)[output])\n\n        target_batch.append(target)\n\n    return input_batch, output_batch, target_batch\n\n# Model\nenc_input = tf.placeholder(tf.float32, [None, None, n_class]) # [batch_size, max_len(=encoder_step), n_class]\ndec_input = tf.placeholder(tf.float32, [None, None, n_class]) # [batch_size, max_len+1(=decoder_step) (becase of 'S' or 'E'), n_class]\ntargets = tf.placeholder(tf.int64, [None, None]) # [batch_size, max_len+1], not one-hot\n\nwith tf.variable_scope('encode'):\n    enc_cell = tf.nn.rnn_cell.BasicRNNCell(n_hidden)\n    enc_cell = tf.nn.rnn_cell.DropoutWrapper(enc_cell, output_keep_prob=0.5)\n    _, enc_states = tf.nn.dynamic_rnn(enc_cell, enc_input, dtype=tf.float32)\n    # encoder state will go to decoder initial_state, enc_states : [batch_size, n_hidden(=128)]\n\nwith tf.variable_scope('decode'):\n    dec_cell = tf.nn.rnn_cell.BasicRNNCell(n_hidden)\n    dec_cell = tf.nn.rnn_cell.DropoutWrapper(dec_cell, output_keep_prob=0.5)\n    outputs, _ = tf.nn.dynamic_rnn(dec_cell, dec_input, initial_state=enc_states, dtype=tf.float32)\n    # outputs : [batch_size, max_len+1, n_hidden(=128)]\n\nmodel = tf.layers.dense(outputs, n_class, activation=None) # model : [batch_size, max_len+1, n_class]\n\ncost = tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(logits=model, labels=targets))\noptimizer = tf.train.AdamOptimizer(0.001).minimize(cost)\n\n# Training\nsess = tf.Session()\nsess.run(tf.global_variables_initializer())\ninput_batch, output_batch, target_batch = make_batch(seq_data)\n\nfor epoch in range(5000):\n    _, loss = sess.run([optimizer, cost], feed_dict={enc_input: input_batch, dec_input: output_batch, targets: target_batch})\n    if (epoch + 1)%1000 == 0:\n        print('Epoch:', '%04d' % (epoch + 1), 'cost =', '{:.6f}'.format(loss))\n\n# Test\ndef translate(word):\n    seq_data = [word, 'P' * len(word)]\n\n    input_batch, output_batch, _ = make_batch([seq_data])\n    prediction = tf.argmax(model, 2)\n\n    result = sess.run(prediction, feed_dict={enc_input: input_batch, dec_input: output_batch})\n\n    decoded = [char_arr[i] for i in result[0]]\n    end = decoded.index('E')\n    translated = ''.join(decoded[:end])\n\n    return translated.replace('P','')\n\nprint('test')\nprint('man ->', translate('man'))\nprint('mans ->', translate('mans'))\nprint('king ->', translate('king'))\nprint('black ->', translate('black'))\nprint('upp ->', translate('upp'))"
  },
  {
    "path": "archive/tensorflow/v1/4-2.Seq2Seq(Attention)/Seq2Seq(Attention).py",
    "content": "# code by Tae Hwan Jung(Jeff Jung) @graykode\nimport tensorflow as tf\nimport matplotlib.pyplot as plt\nimport numpy as np\n\ntf.reset_default_graph()\n# S: Symbol that shows starting of decoding input\n# E: Symbol that shows starting of decoding output\n# P: Symbol that will fill in blank sequence if current batch data size is short than time steps\nsentences = ['ich mochte ein bier P', 'S i want a beer', 'i want a beer E']\n\nword_list = \" \".join(sentences).split()\nword_list = list(set(word_list))\nword_dict = {w: i for i, w in enumerate(word_list)}\nnumber_dict = {i: w for i, w in enumerate(word_list)}\nn_class = len(word_dict)  # vocab list\n\n# Parameter\nn_step = 5  # maxium number of words in one sentence(=number of time steps)\nn_hidden = 128\n\ndef make_batch(sentences):\n    input_batch = [np.eye(n_class)[[word_dict[n] for n in sentences[0].split()]]]\n    output_batch = [np.eye(n_class)[[word_dict[n] for n in sentences[1].split()]]]\n    target_batch = [[word_dict[n] for n in sentences[2].split()]]\n    return input_batch, output_batch, target_batch\n\n# Model\nenc_inputs = tf.placeholder(tf.float32, [None, None, n_class])  # [batch_size, n_step, n_class]\ndec_inputs = tf.placeholder(tf.float32, [None, None, n_class])  # [batch_size, n_step, n_class]\ntargets = tf.placeholder(tf.int64, [1, n_step])  # [batch_size, n_step], not one-hot\n\n# Linear for attention\nattn = tf.Variable(tf.random_normal([n_hidden, n_hidden]))\nout = tf.Variable(tf.random_normal([n_hidden * 2, n_class]))\n\ndef get_att_score(dec_output, enc_output):  # enc_output [n_step, n_hidden]\n    score = tf.squeeze(tf.matmul(enc_output, attn), 0)  # score : [n_hidden]\n    dec_output = tf.squeeze(dec_output, [0, 1])  # dec_output : [n_hidden]\n    return tf.tensordot(dec_output, score, 1)  # inner product make scalar value\n\ndef get_att_weight(dec_output, enc_outputs):\n    attn_scores = []  # list of attention scalar : [n_step]\n    enc_outputs = tf.transpose(enc_outputs, [1, 0, 2])  # enc_outputs : [n_step, batch_size, n_hidden]\n    for i in range(n_step):\n        attn_scores.append(get_att_score(dec_output, enc_outputs[i]))\n\n    # Normalize scores to weights in range 0 to 1\n    return tf.reshape(tf.nn.softmax(attn_scores), [1, 1, -1])  # [1, 1, n_step]\n\nmodel = []\nAttention = []\nwith tf.variable_scope('encode'):\n    enc_cell = tf.nn.rnn_cell.BasicRNNCell(n_hidden)\n    enc_cell = tf.nn.rnn_cell.DropoutWrapper(enc_cell, output_keep_prob=0.5)\n    # enc_outputs : [batch_size(=1), n_step(=decoder_step), n_hidden(=128)]\n    # enc_hidden : [batch_size(=1), n_hidden(=128)]\n    enc_outputs, enc_hidden = tf.nn.dynamic_rnn(enc_cell, enc_inputs, dtype=tf.float32)\n\nwith tf.variable_scope('decode'):\n    dec_cell = tf.nn.rnn_cell.BasicRNNCell(n_hidden)\n    dec_cell = tf.nn.rnn_cell.DropoutWrapper(dec_cell, output_keep_prob=0.5)\n\n    inputs = tf.transpose(dec_inputs, [1, 0, 2])\n    hidden = enc_hidden\n    for i in range(n_step):\n        # time_major True mean inputs shape: [max_time, batch_size, ...]\n        dec_output, hidden = tf.nn.dynamic_rnn(dec_cell, tf.expand_dims(inputs[i], 1),\n                                               initial_state=hidden, dtype=tf.float32, time_major=True)\n        attn_weights = get_att_weight(dec_output, enc_outputs)  # attn_weights : [1, 1, n_step]\n        Attention.append(tf.squeeze(attn_weights))\n\n        # matrix-matrix product of matrices [1, 1, n_step] x [1, n_step, n_hidden] = [1, 1, n_hidden]\n        context = tf.matmul(attn_weights, enc_outputs)\n        dec_output = tf.squeeze(dec_output, 0)  # [1, n_step]\n        context = tf.squeeze(context, 1)  # [1, n_hidden]\n\n        model.append(tf.matmul(tf.concat((dec_output, context), 1), out))  # [n_step, batch_size(=1), n_class]\n\ntrained_attn = tf.stack([Attention[0], Attention[1], Attention[2], Attention[3], Attention[4]], 0)  # to show attention matrix\nmodel = tf.transpose(model, [1, 0, 2])  # model : [n_step, n_class]\nprediction = tf.argmax(model, 2)\ncost = tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(logits=model, labels=targets))\noptimizer = tf.train.AdamOptimizer(0.001).minimize(cost)\n\n# Training and Test\nwith tf.Session() as sess:\n    init = tf.global_variables_initializer()\n    sess.run(init)\n    for epoch in range(2000):\n        input_batch, output_batch, target_batch = make_batch(sentences)\n        _, loss, attention = sess.run([optimizer, cost, trained_attn],\n                                      feed_dict={enc_inputs: input_batch, dec_inputs: output_batch, targets: target_batch})\n\n        if (epoch + 1) % 400 == 0:\n            print('Epoch:', '%04d' % (epoch + 1), 'cost =', '{:.6f}'.format(loss))\n\n    predict_batch = [np.eye(n_class)[[word_dict[n] for n in 'P P P P P'.split()]]]\n    result = sess.run(prediction, feed_dict={enc_inputs: input_batch, dec_inputs: predict_batch})\n    print(sentences[0].split(), '->', [number_dict[n] for n in result[0]])\n\n    # Show Attention\n    fig = plt.figure(figsize=(5, 5))\n    ax = fig.add_subplot(1, 1, 1)\n    ax.matshow(attention, cmap='viridis')\n    ax.set_xticklabels([''] + sentences[0].split(), fontdict={'fontsize': 14})\n    ax.set_yticklabels([''] + sentences[2].split(), fontdict={'fontsize': 14})\n    plt.show()"
  },
  {
    "path": "archive/tensorflow/v1/4-3.Bi-LSTM(Attention)/Bi-LSTM(Attention).py",
    "content": "'''\n  code by Tae Hwan Jung(Jeff Jung) @graykode\n  Reference : https://github.com/prakashpandey9/Text-Classification-Pytorch/blob/master/models/LSTM_Attn.py\n'''\nimport tensorflow as tf\nimport matplotlib.pyplot as plt\nimport numpy as np\n\ntf.reset_default_graph()\n\n# Bi-LSTM(Attention) Parameters\nembedding_dim = 2\nn_hidden = 5 # number of hidden units in one cell\nn_step = 3 # all sentence is consist of 3 words\nn_class = 2  # 0 or 1\n\n# 3 words sentences (=sequence_length is 3)\nsentences = [\"i love you\", \"he loves me\", \"she likes baseball\", \"i hate you\", \"sorry for that\", \"this is awful\"]\nlabels = [1, 1, 1, 0, 0, 0]  # 1 is good, 0 is not good.\n\nword_list = \" \".join(sentences).split()\nword_list = list(set(word_list))\nword_dict = {w: i for i, w in enumerate(word_list)}\nvocab_size = len(word_dict)\n\ninput_batch = []\nfor sen in sentences:\n    input_batch.append(np.asarray([word_dict[n] for n in sen.split()]))\n\ntarget_batch = []\nfor out in labels:\n    target_batch.append(np.eye(n_class)[out]) # ONE-HOT : To using Tensor Softmax Loss function\n\n# LSTM Model\nX = tf.placeholder(tf.int32, [None, n_step])\nY = tf.placeholder(tf.int32, [None, n_class])\nout = tf.Variable(tf.random_normal([n_hidden * 2, n_class]))\n\nembedding = tf.Variable(tf.random_uniform([vocab_size, embedding_dim]))\ninput = tf.nn.embedding_lookup(embedding, X) # [batch_size, len_seq, embedding_dim]\n\nlstm_fw_cell = tf.nn.rnn_cell.LSTMCell(n_hidden)\nlstm_bw_cell = tf.nn.rnn_cell.LSTMCell(n_hidden)\n\n# output : [batch_size, len_seq, n_hidden], states : [batch_size, n_hidden]\noutput, final_state = tf.nn.bidirectional_dynamic_rnn(lstm_fw_cell,lstm_bw_cell, input, dtype=tf.float32)\n\n# Attention\noutput = tf.concat([output[0], output[1]], 2)                             # output[0] : lstm_fw, output[1] : lstm_bw\nfinal_hidden_state = tf.concat([final_state[1][0], final_state[1][1]], 1) # final_hidden_state : [batch_size, n_hidden * num_directions(=2)]\nfinal_hidden_state = tf.expand_dims(final_hidden_state, 2)                # final_hidden_state : [batch_size, n_hidden * num_directions(=2), 1]\n\nattn_weights = tf.squeeze(tf.matmul(output, final_hidden_state), 2) # attn_weights : [batch_size, n_step]\nsoft_attn_weights = tf.nn.softmax(attn_weights, 1)\ncontext = tf.matmul(tf.transpose(output, [0, 2, 1]), tf.expand_dims(soft_attn_weights, 2)) # context : [batch_size, n_hidden * num_directions(=2), 1]\ncontext = tf.squeeze(context, 2) # [batch_size, n_hidden * num_directions(=2)]\n\nmodel = tf.matmul(context, out)\n\ncost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits_v2(logits=model, labels=Y))\noptimizer = tf.train.AdamOptimizer(0.001).minimize(cost)\n\n# Model-Predict\nhypothesis = tf.nn.softmax(model)\npredictions = tf.argmax(hypothesis, 1)\n\n# Training\nwith tf.Session() as sess:\n    init = tf.global_variables_initializer()\n    sess.run(init)\n    for epoch in range(5000):\n        _, loss, attention = sess.run([optimizer, cost, soft_attn_weights], feed_dict={X: input_batch, Y: target_batch})\n        if (epoch + 1)%1000 == 0:\n            print('Epoch:', '%06d' % (epoch + 1), 'cost =', '{:.6f}'.format(loss))\n\n    # Test\n    test_text = 'sorry hate you'\n    tests = [np.asarray([word_dict[n] for n in test_text.split()])]\n\n    predict = sess.run([predictions], feed_dict={X: tests})\n    result = predict[0][0]\n    if result == 0:\n        print(test_text,\"is Bad Mean...\")\n    else:\n        print(test_text,\"is Good Mean!!\")\n\n    fig = plt.figure(figsize=(6, 3)) # [batch_size, n_step]\n    ax = fig.add_subplot(1, 1, 1)\n    ax.matshow(attention, cmap='viridis')\n    ax.set_xticklabels([''] + ['first_word', 'second_word', 'third_word'], fontdict={'fontsize': 14}, rotation=90)\n    ax.set_yticklabels([''] + ['batch_1', 'batch_2', 'batch_3', 'batch_4', 'batch_5', 'batch_6'], fontdict={'fontsize': 14})\n    plt.show()"
  }
]