[
  {
    "path": ".gitignore",
    "content": "# Byte-compiled / optimized / DLL files\n__pycache__/\n*.py[cod]\n*$py.class\n\n# C extensions\n*.so\n\n# Distribution / packaging\n.Python\nenv/\nbuild/\ndevelop-eggs/\ndist/\ndownloads/\neggs/\n.eggs/\nlib/\nlib64/\nparts/\nsdist/\nvar/\n*.egg-info/\n.installed.cfg\n*.egg\n\n# PyInstaller\n#  Usually these files are written by a python script from a template\n#  before PyInstaller builds the exe, so as to inject date/other infos into it.\n*.manifest\n*.spec\n\n# Installer logs\npip-log.txt\npip-delete-this-directory.txt\n\n# Unit test / coverage reports\nhtmlcov/\n.tox/\n.coverage\n.coverage.*\n.cache\nnosetests.xml\ncoverage.xml\n*,cover\n.hypothesis/\n\n# Translations\n*.mo\n*.pot\n\n# Django stuff:\n*.log\nlocal_settings.py\n\n# Flask stuff:\ninstance/\n.webassets-cache\n\n# Scrapy stuff:\n.scrapy\n\n# Sphinx documentation\ndocs/_build/\n\n# PyBuilder\ntarget/\n\n# IPython Notebook\n.ipynb_checkpoints\n\n# pyenv\n.python-version\n\n# celery beat schedule file\ncelerybeat-schedule\n\n# dotenv\n.env\n\n# virtualenv\nvenv/\nENV/\n\n# Spyder project settings\n.spyderproject\n\n# Rope project settings\n.ropeproject\n"
  },
  {
    "path": "LICENSE",
    "content": "MIT License\n\nCopyright (c) 2017 Keon Kim\n\nPermission is hereby granted, free of charge, to any person obtaining a copy\nof this software and associated documentation files (the \"Software\"), to deal\nin the Software without restriction, including without limitation the rights\nto use, copy, modify, merge, publish, distribute, sublicense, and/or sell\ncopies of the Software, and to permit persons to whom the Software is\nfurnished to do so, subject to the following conditions:\n\nThe above copyright notice and this permission notice shall be included in all\ncopies or substantial portions of the Software.\n\nTHE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\nIMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\nFITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\nAUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\nLIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\nOUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE\nSOFTWARE.\n"
  },
  {
    "path": "PointerLSTM.py",
    "content": "import keras.backend as K\nfrom keras.activations import tanh, softmax\nfrom keras.engine import InputSpec\nfrom keras.layers import LSTM\nimport keras\n\n\nclass Attention(keras.layers.Layer):\n    \"\"\"\n        Attention layer\n    \"\"\"\n\n    def __init__(self, hidden_dimensions, name='attention'):\n        super(Attention, self).__init__(name=name, trainable=True)\n        self.W1 = keras.layers.Dense(hidden_dimensions, use_bias=False)\n        self.W2 = keras.layers.Dense(hidden_dimensions, use_bias=False)\n        self.V = keras.layers.Dense(1, use_bias=False)\n\n    def call(self, encoder_outputs, dec_output, mask=None):\n\n        w1_e = self.W1(encoder_outputs)\n        w2_d = self.W2(dec_output)\n        tanh_output = tanh(w1_e + w2_d)\n        v_dot_tanh = self.V(tanh_output)\n        if mask is not None:\n            v_dot_tanh += (mask * -1e9)\n        attention_weights = softmax(v_dot_tanh, axis=1)\n        att_shape = K.shape(attention_weights)\n        return K.reshape(attention_weights, (att_shape[0], att_shape[1]))\n\n\nclass Decoder(keras.layers.Layer):\n    \"\"\"\n        Decoder class for PointerLayer\n    \"\"\"\n\n    def __init__(self, hidden_dimensions):\n        super(Decoder, self).__init__()\n        self.lstm = keras.layers.LSTM(\n            hidden_dimensions, return_sequences=False, return_state=True)\n\n    def call(self, x, hidden_states):\n        dec_output, state_h, state_c = self.lstm(\n            x, initial_state=hidden_states)\n        return dec_output, [state_h, state_c]\n\n    def get_initial_state(self, inputs):\n        return self.lstm.get_initial_state(inputs)\n\n    def process_inputs(self, x_input, initial_states, constants):\n        return self.lstm._process_inputs(x_input, initial_states, constants)\n\n\nclass PointerLSTM(keras.layers.Layer):\n    \"\"\"\n        PointerLSTM\n    \"\"\"\n\n    def __init__(self, hidden_dimensions, name='pointer', **kwargs):\n        super(PointerLSTM, self).__init__(\n            hidden_dimensions, name=name, **kwargs)\n        self.hidden_dimensions = hidden_dimensions\n        self.attention = Attention(hidden_dimensions)\n        self.decoder = Decoder(hidden_dimensions)\n\n    def build(self, input_shape):\n        super(PointerLSTM, self).build(input_shape)\n        self.input_spec = [InputSpec(shape=input_shape)]\n\n    def call(self, x, training=None, mask=None, states=None):\n        \"\"\"\n        :param Tensor x: Should be the output of the decoder\n        :param Tensor states: last state of the decoder\n        :param Tensor mask: The mask to apply\n        :return: Pointers probabilities\n        \"\"\"\n\n        input_shape = self.input_spec[0].shape\n        en_seq = x\n        x_input = x[:, input_shape[1] - 1, :]\n        x_input = K.repeat(x_input, input_shape[1])\n        if states:\n            initial_states = states\n        else:\n            initial_states = self.decoder.get_initial_state(x_input)\n\n        constants = []\n        preprocessed_input, _, constants = self.decoder.process_inputs(\n            x_input, initial_states, constants)\n        constants.append(en_seq)\n        last_output, outputs, states = K.rnn(self.step, preprocessed_input,\n                                             initial_states,\n                                             go_backwards=self.decoder.lstm.go_backwards,\n                                             constants=constants,\n                                             input_length=input_shape[1])\n\n        return outputs\n\n    def step(self, x_input, states):\n        x_input = K.expand_dims(x_input,1)\n        input_shape = self.input_spec[0].shape\n        en_seq = states[-1]\n        _, [h, c] = self.decoder(x_input, states[:-1])\n        dec_seq = K.repeat(h, input_shape[1])\n        probs = self.attention(dec_seq, en_seq)\n        return probs, [h, c]\n\n    def get_output_shape_for(self, input_shape):\n        # output shape is not affected by the attention component\n        return (input_shape[0], input_shape[1], input_shape[1])\n\n    def compute_output_shape(self, input_shape):\n        return (input_shape[0], input_shape[1], input_shape[1])\n"
  },
  {
    "path": "README.md",
    "content": "## Code upgrade of [Pointer Networks](http://arxiv.org/pdf/1511.06391v4.pdf) to run on keras>=2.4.3 and tensorflow>=2.2.0 \nThe original author code is at https://github.com/keon/pointer-networks.git.\n"
  },
  {
    "path": "requirement.txt",
    "content": "keras==2.4.3\ntensorflow==2.2.0"
  },
  {
    "path": "run.py",
    "content": "from keras.models import Model\nfrom keras.layers import LSTM, Input\nfrom keras.callbacks import LearningRateScheduler\nfrom keras.utils.np_utils import to_categorical\nfrom pointer import PointerLSTM\nimport pickle\nimport tsp_data as tsp\nimport numpy as np\nimport keras\n\n\ndef scheduler(epoch):\n    if epoch < nb_epochs/4:\n        return learning_rate\n    elif epoch < nb_epochs/2:\n        return learning_rate*0.5\n    return learning_rate*0.1\n\nprint(\"preparing dataset...\")\nt = tsp.Tsp()\nX, Y = t.next_batch(10000)\nx_test, y_test = t.next_batch(1000)\n\nYY = []\nfor y in Y:\n    YY.append(to_categorical(y))\nYY = np.asarray(YY)\n\nhidden_size = 128\nseq_len = 10\nnb_epochs = 10000\nlearning_rate = 0.1\n\nprint(\"building model...\")\nmain_input = Input(shape=(seq_len, 2), name='main_input')\n\nencoder,state_h, state_c = LSTM(hidden_size,return_sequences = True, name=\"encoder\",return_state=True)(main_input)\ndecoder = PointerLSTM(hidden_size, name=\"decoder\")(encoder,states=[state_h, state_c])\n\nmodel = Model(main_input, decoder)\nprint(model.summary())\nmodel.compile(optimizer='adam',\n              loss='categorical_crossentropy',\n              metrics=['accuracy'])\n\nmodel.fit(X, YY, epochs=nb_epochs, batch_size=64,)\nprint(model.predict(x_test))\nprint('evaluate : ',model.evaluate(x_test,to_categorical(y_test)))\nprint(\"------\")\nprint(to_categorical(y_test))\nmodel.save_weights('model_weight_100.hdf5')\n"
  },
  {
    "path": "sort_data.py",
    "content": "from __future__ import absolute_import, division, print_function\nimport numpy as np\n\n\nclass DataGenerator(object):\n    def next_batch(self, batch_size, N, train_mode=True):\n        \"\"\"Return the next `batch_size` examples from this data set.\"\"\"\n\n        # A sequence of random numbers from [0, 1]\n        encoder_batch = []\n\n        # Sorted sequence that we feed to encoder\n        # In inference we feed an unordered sequence again\n        decoder_batch = []\n\n        # Ordered sequence where one hot vector encodes\n        # position in the input array\n        target_batch = []\n        for _ in range(batch_size):\n            encoder_batch.append(np.zeros([N, 1]))\n        for _ in range(batch_size):\n            decoder_batch.append(np.zeros([N, 1]))\n            target_batch.append(np.zeros([N, N]))\n\n        encoder_batch = np.asarray(encoder_batch)\n        decoder_batch = np.asarray(decoder_batch)\n        target_batch = np.asarray(target_batch)\n\n        for b in range(batch_size):\n            shuffle = np.random.permutation(N)\n            sequence = np.sort(np.random.random(N))\n            shuffled_sequence = sequence[shuffle]\n\n            for i in range(N):\n                encoder_batch[b][i] = shuffled_sequence[i]\n                if train_mode:\n                    decoder_batch[b][i] = sequence[i]\n                else:\n                    decoder_batch[b][i] = shuffled_sequence[i]\n                target_batch[b, i][shuffle[i]] = 1.0\n\n            # Points to the stop symbol\n            # target_batch[b, N][0] = 1.0\n\n        return encoder_batch, decoder_batch, target_batch\n\n\nif __name__ == \"__main__\":\n    seq_len = 3\n    batch_size = 3\n    dataset = DataGenerator()\n    enc_input, dec_input, targets = dataset.next_batch(batch_size, seq_len)\n    print(\"batch_size\", batch_size, \"seq_len\", seq_len)\n    print(\"-------------encoder input-------------\")\n    print(enc_input.shape)\n    print(enc_input)\n    print(\"-------------decoder input-------------\")\n    print(dec_input.shape)\n    print(dec_input)\n    print(\"-------------   targets   -------------\")\n    print(targets.shape)\n    print(targets)\n"
  },
  {
    "path": "tsp_data.py",
    "content": "import math\nimport numpy as np\nimport random\nimport itertools\n\n\nclass Tsp:\n    def next_batch(self, batch_size=1):\n        X, Y = [], []\n        for b in range(batch_size):\n            print(\"preparing dataset... %s/%s\" % (b, batch_size))\n            points = self.generate_data()\n            solved = self.solve_tsp_dynamic(points)\n            X.append(points), Y.append(solved)\n        return np.asarray(X), np.asarray(Y)\n\n    def length(self, x, y):\n        return (math.sqrt((x[0]-y[0])**2 + (x[1]-y[1])**2))\n\n    def solve_tsp_dynamic(self, points):\n        # calc all lengths\n        all_distances = [[self.length(x, y) for y in points] for x in points]\n        # initial value - just distance from 0 to\n        # every other point + keep the track of edges\n        A = {(frozenset([0, idx+1]), idx+1): (dist, [0, idx+1])\n             for idx, dist in enumerate(all_distances[0][1:])}\n        cnt = len(points)\n        for m in range(2, cnt):\n            B = {}\n            for S in [frozenset(C) | {0}\n                      for C in itertools.combinations(range(1, cnt), m)]:\n                for j in S - {0}:\n                    B[(S, j)] = min([(A[(S-{j}, k)][0] + all_distances[k][j],\n                                      A[(S-{j}, k)][1] + [j])\n                                     for k in S if k != 0 and k != j])\n            A = B\n        res = min([(A[d][0] + all_distances[0][d[1]], A[d][1])\n                   for d in iter(A)])\n        return res[1]\n\n    def generate_data(self, N=10):\n        radius = 1\n        rangeX = (0, 10)\n        rangeY = (0, 10)\n        qty = N\n\n        deltas = set()\n        for x in range(-radius, radius+1):\n            for y in range(-radius, radius+1):\n                if x*x + y*y <= radius*radius:\n                    deltas.add((x, y))\n\n        randPoints = []\n        excluded = set()\n        i = 0\n        while i < qty:\n            x = random.randrange(*rangeX)\n            y = random.randrange(*rangeY)\n            if (x, y) in excluded:\n                continue\n            randPoints.append((x, y))\n            i += 1\n            excluded.update((x+dx, y+dy) for (dx, dy) in deltas)\n        return randPoints\n\nif __name__ == \"__main__\":\n    p = Tsp()\n    X, Y = p.next_batch(1)\n    print(X)\n    print(Y)\n"
  }
]