[
  {
    "path": ".gitignore",
    "content": "# Compiled source #\n###################\n*.com\n*.class\n*.dll\n*.exe\n*.o\n*.so\n*.pyc\n*.mat\n*.png\n*.jpg\n\n# Packages #\n############\n# it's better to unpack these files and commit the raw source\n# git has its own built in compression methods\n*.7z\n*.dmg\n*.gz\n*.iso\n*.jar\n*.rar\n*.tar\n*.zip\n*~\n\n.gitlab\n.github\ndata/label/*\ndata/tf_records/*\nmodel/*\n\n\n# Logs and databases #\n######################\n*.log\n*.sql\n*.sqlite\n*.out\n\n# OS generated files #\n######################\n.DS_Store\n.DS_Store?\n._*\n.Spotlight-V100\n.Trashes\nehthumbs.db\nThumbs.db\n"
  },
  {
    "path": "Dockerfile",
    "content": "FROM nvidia/cuda:8.0-cudnn5-devel\n\n# Pick up some TF dependencies\nRUN apt-get update && apt-get install -y --no-install-recommends \\\n        build-essential \\\n        curl \\\n        libfreetype6-dev \\\n        libpng12-dev \\\n        libzmq3-dev \\\n        pkg-config \\\n        python \\\n        python-dev \\\n        rsync \\\n        software-properties-common \\\n        unzip \\\n        && \\\n    apt-get clean && \\\n    rm -rf /var/lib/apt/lists/*\n\nRUN curl -O https://bootstrap.pypa.io/get-pip.py && \\\n    python get-pip.py && \\\n    rm get-pip.py\n\nRUN pip --no-cache-dir install \\\n        ipykernel \\\n        jupyter \\\n        matplotlib \\\n        numpy \\\n        scipy \\\n\t\tscikit-learn \\\n        && \\\n    python -m ipykernel.kernelspec\n\nENV TENSORFLOW_VERSION 0.11.0\n\n# --- DO NOT EDIT OR DELETE BETWEEN THE LINES --- #\n# These lines will be edited automatically by parameterized_docker_build.sh. #\n# COPY _PIP_FILE_ /\n# RUN pip --no-cache-dir install /_PIP_FILE_\n# RUN rm -f /_PIP_FILE_\n\n# Install TensorFlow GPU version.\nRUN pip --no-cache-dir install \\\n    http://storage.googleapis.com/tensorflow/linux/gpu/tensorflow-${TENSORFLOW_VERSION}-cp27-none-linux_x86_64.whl\n# --- ~ DO NOT EDIT OR DELETE BETWEEN THE LINES --- #\n\n# TensorBoard\nEXPOSE 6006\n# IPython\nEXPOSE 8888\n\nWORKDIR \"/root\"\n\nCMD [\"/bin/bash\"]\n"
  },
  {
    "path": "LICENSE",
    "content": "                                 Apache License\n                           Version 2.0, January 2004\n                        http://www.apache.org/licenses/\n\n   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION\n\n   1. Definitions.\n\n      \"License\" shall mean the terms and conditions for use, reproduction,\n      and distribution as defined by Sections 1 through 9 of this document.\n\n      \"Licensor\" shall mean the copyright owner or entity authorized by\n      the copyright owner that is granting the License.\n\n      \"Legal Entity\" shall mean the union of the acting entity and all\n      other entities that control, are controlled by, or are under common\n      control with that entity. For the purposes of this definition,\n      \"control\" means (i) the power, direct or indirect, to cause the\n      direction or management of such entity, whether by contract or\n      otherwise, or (ii) ownership of fifty percent (50%) or more of the\n      outstanding shares, or (iii) beneficial ownership of such entity.\n\n      \"You\" (or \"Your\") shall mean an individual or Legal Entity\n      exercising permissions granted by this License.\n\n      \"Source\" form shall mean the preferred form for making modifications,\n      including but not limited to software source code, documentation\n      source, and configuration files.\n\n      \"Object\" form shall mean any form resulting from mechanical\n      transformation or translation of a Source form, including but\n      not limited to compiled object code, generated documentation,\n      and conversions to other media types.\n\n      \"Work\" shall mean the work of authorship, whether in Source or\n      Object form, made available under the License, as indicated by a\n      copyright notice that is included in or attached to the work\n      (an example is provided in the Appendix below).\n\n      \"Derivative Works\" shall mean any work, whether in Source or Object\n      form, that is based on (or derived from) the Work and for which the\n      editorial revisions, annotations, elaborations, or other modifications\n      represent, as a whole, an original work of authorship. For the purposes\n      of this License, Derivative Works shall not include works that remain\n      separable from, or merely link (or bind by name) to the interfaces of,\n      the Work and Derivative Works thereof.\n\n      \"Contribution\" shall mean any work of authorship, including\n      the original version of the Work and any modifications or additions\n      to that Work or Derivative Works thereof, that is intentionally\n      submitted to Licensor for inclusion in the Work by the copyright owner\n      or by an individual or Legal Entity authorized to submit on behalf of\n      the copyright owner. For the purposes of this definition, \"submitted\"\n      means any form of electronic, verbal, or written communication sent\n      to the Licensor or its representatives, including but not limited to\n      communication on electronic mailing lists, source code control systems,\n      and issue tracking systems that are managed by, or on behalf of, the\n      Licensor for the purpose of discussing and improving the Work, but\n      excluding communication that is conspicuously marked or otherwise\n      designated in writing by the copyright owner as \"Not a Contribution.\"\n\n      \"Contributor\" shall mean Licensor and any individual or Legal Entity\n      on behalf of whom a Contribution has been received by Licensor and\n      subsequently incorporated within the Work.\n\n   2. Grant of Copyright License. Subject to the terms and conditions of\n      this License, each Contributor hereby grants to You a perpetual,\n      worldwide, non-exclusive, no-charge, royalty-free, irrevocable\n      copyright license to reproduce, prepare Derivative Works of,\n      publicly display, publicly perform, sublicense, and distribute the\n      Work and such Derivative Works in Source or Object form.\n\n   3. Grant of Patent License. Subject to the terms and conditions of\n      this License, each Contributor hereby grants to You a perpetual,\n      worldwide, non-exclusive, no-charge, royalty-free, irrevocable\n      (except as stated in this section) patent license to make, have made,\n      use, offer to sell, sell, import, and otherwise transfer the Work,\n      where such license applies only to those patent claims licensable\n      by such Contributor that are necessarily infringed by their\n      Contribution(s) alone or by combination of their Contribution(s)\n      with the Work to which such Contribution(s) was submitted. If You\n      institute patent litigation against any entity (including a\n      cross-claim or counterclaim in a lawsuit) alleging that the Work\n      or a Contribution incorporated within the Work constitutes direct\n      or contributory patent infringement, then any patent licenses\n      granted to You under this License for that Work shall terminate\n      as of the date such litigation is filed.\n\n   4. Redistribution. You may reproduce and distribute copies of the\n      Work or Derivative Works thereof in any medium, with or without\n      modifications, and in Source or Object form, provided that You\n      meet the following conditions:\n\n      (a) You must give any other recipients of the Work or\n          Derivative Works a copy of this License; and\n\n      (b) You must cause any modified files to carry prominent notices\n          stating that You changed the files; and\n\n      (c) You must retain, in the Source form of any Derivative Works\n          that You distribute, all copyright, patent, trademark, and\n          attribution notices from the Source form of the Work,\n          excluding those notices that do not pertain to any part of\n          the Derivative Works; and\n\n      (d) If the Work includes a \"NOTICE\" text file as part of its\n          distribution, then any Derivative Works that You distribute must\n          include a readable copy of the attribution notices contained\n          within such NOTICE file, excluding those notices that do not\n          pertain to any part of the Derivative Works, in at least one\n          of the following places: within a NOTICE text file distributed\n          as part of the Derivative Works; within the Source form or\n          documentation, if provided along with the Derivative Works; or,\n          within a display generated by the Derivative Works, if and\n          wherever such third-party notices normally appear. The contents\n          of the NOTICE file are for informational purposes only and\n          do not modify the License. You may add Your own attribution\n          notices within Derivative Works that You distribute, alongside\n          or as an addendum to the NOTICE text from the Work, provided\n          that such additional attribution notices cannot be construed\n          as modifying the License.\n\n      You may add Your own copyright statement to Your modifications and\n      may provide additional or different license terms and conditions\n      for use, reproduction, or distribution of Your modifications, or\n      for any such Derivative Works as a whole, provided Your use,\n      reproduction, and distribution of the Work otherwise complies with\n      the conditions stated in this License.\n\n   5. Submission of Contributions. Unless You explicitly state otherwise,\n      any Contribution intentionally submitted for inclusion in the Work\n      by You to the Licensor shall be under the terms and conditions of\n      this License, without any additional terms or conditions.\n      Notwithstanding the above, nothing herein shall supersede or modify\n      the terms of any separate license agreement you may have executed\n      with Licensor regarding such Contributions.\n\n   6. Trademarks. This License does not grant permission to use the trade\n      names, trademarks, service marks, or product names of the Licensor,\n      except as required for reasonable and customary use in describing the\n      origin of the Work and reproducing the content of the NOTICE file.\n\n   7. Disclaimer of Warranty. Unless required by applicable law or\n      agreed to in writing, Licensor provides the Work (and each\n      Contributor provides its Contributions) on an \"AS IS\" BASIS,\n      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or\n      implied, including, without limitation, any warranties or conditions\n      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A\n      PARTICULAR PURPOSE. You are solely responsible for determining the\n      appropriateness of using or redistributing the Work and assume any\n      risks associated with Your exercise of permissions under this License.\n\n   8. Limitation of Liability. In no event and under no legal theory,\n      whether in tort (including negligence), contract, or otherwise,\n      unless required by applicable law (such as deliberate and grossly\n      negligent acts) or agreed to in writing, shall any Contributor be\n      liable to You for damages, including any direct, indirect, special,\n      incidental, or consequential damages of any character arising as a\n      result of this License or out of the use or inability to use the\n      Work (including but not limited to damages for loss of goodwill,\n      work stoppage, computer failure or malfunction, or any and all\n      other commercial damages or losses), even if such Contributor\n      has been advised of the possibility of such damages.\n\n   9. Accepting Warranty or Additional Liability. While redistributing\n      the Work or Derivative Works thereof, You may choose to offer,\n      and charge a fee for, acceptance of support, warranty, indemnity,\n      or other liability obligations and/or rights consistent with this\n      License. However, in accepting such obligations, You may act only\n      on Your own behalf and on Your sole responsibility, not on behalf\n      of any other Contributor, and only if You agree to indemnify,\n      defend, and hold each Contributor harmless for any liability\n      incurred by, or claims asserted against, such Contributor by reason\n      of your accepting any such warranty or additional liability.\n\n   END OF TERMS AND CONDITIONS\n\n   APPENDIX: How to apply the Apache License to your work.\n\n      To apply the Apache License to your work, attach the following\n      boilerplate notice, with the fields enclosed by brackets \"{}\"\n      replaced with your own identifying information. (Don't include\n      the brackets!)  The text should be enclosed in the appropriate\n      comment syntax for the file format. We also recommend that a\n      file or class name and description of purpose be included on the\n      same \"printed page\" as the copyright notice for easier\n      identification within third-party archives.\n\n   Copyright 2017 Xintong Han\n   \n   Licensed under the Apache License, Version 2.0 (the \"License\");\n   you may not use this file except in compliance with the License.\n   You may obtain a copy of the License at\n\n       http://www.apache.org/licenses/LICENSE-2.0\n\n   Unless required by applicable law or agreed to in writing, software\n   distributed under the License is distributed on an \"AS IS\" BASIS,\n   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n   See the License for the specific language governing permissions and\n   limitations under the License.\n"
  },
  {
    "path": "README.md",
    "content": "## Bi-LSTM model for learning fashion compatibility. \nCode for ACM MM'17 paper \"Learning Fashion Compatibility with Bidirectional LSTMs\" [[paper]](https://arxiv.org/pdf/1707.05691.pdf).\n\nParts of the code are from an older version of Tensorflow's im2txt repo [GitHub](https://github.com/tensorflow/models/blob/master/research/im2txt).\n\n\nThe corresponding dataset can be found on [GitHub](https://github.com/xthan/polyvore-dataset) or [Google Drive](https://drive.google.com/drive/folders/0B4Eo9mft9jwoVDNEWlhEbUNUSE0?resourcekey=0-vQg9TMSLKnmPCuuWwl5Ebw&usp=sharing).\n\n### Contact\nAuthor: Xintong Han\n\nContact: xintong@umd.edu\n\n### Polyvore.com\n\n[Polyvore.com](https://www.polyvore.com/outfits/search.sets?date=day&item_count.from=4&item_count.to=10) is a popular fashion website, where user can create and upload outfit data. Here is an [exmaple](https://www.polyvore.com/striped_blazer/set?id=227166819).\n\n### Required Packages\n\n* **TensorFlow** ~~0.10.0~~ 0.11 ([instructions](https://www.tensorflow.org/install/))\n* **NumPy** ([instructions](http://www.scipy.org/install.html))\n* **scikit-learn**\n\nI actually used some version between r0.10 to r0.11 as the first commit of Tensorflow's im2txt, you might need to install r0.11 and modify some functions to run the code. Newer versions of Tensorflow prevent me from doing inference with my old code and restoring my models trained using this version. However, I have a commit that supports training using TensorFlow 1.0 or greater [idd1e03e](https://github.com/xthan/polyvore/tree/dd1e03e27fab12ef0051dd2a8ba7a61caaded499). I will create a new repo supporting TensorFlow version >= 1.0.\n\n\n#### Recommended Setup\n\n* [**docker-ce**](https://docs.docker.com/install/linux/docker-ce/ubuntu/)\n* [**nvidia-docker**](https://github.com/NVIDIA/nvidia-docker)\n* bulid TensorFlow image\n\nexcute the below command at this repository root: \n\n```sh\ndocker build -t tensorflow:0.11 .\n```\n\n* run container\n\n```sh\ndocker run -it \\\n    --runtime=nvidia \\\n    -p 8888:8888 \\\n    -p 6006:6006 \\\n    -v $CURRENT:/root/workdir \\\n\ttensorflow:0.11\n```\n\n### Prepare the Training Data\nDownload the dataset and put it in the ./data folder:\n\n0. Decompress polyvore.tar.gz into ./data/label/\n1. Decompress plyvore-images.tar.gz to ./data/, so all outfit image folders are in ./data/images/\n2. Run the following commands to generate TFRecords in ./data/tf_records/:\n```\npython data/build_polyvore_data.py\n```\n\n### Download the Inception v3 Checkpoint\n\nThis model requires a pretrained *Inception v3* checkpoint file to initialize the network.\n\n\nThis checkpoint file is provided by the\n[TensorFlow-Slim image classification library](https://github.com/tensorflow/models/tree/master/research/slim#tensorflow-slim-image-classification-library)\nwhich provides a suite of pre-trained image classification models. You can read\nmore about the models provided by the library\n[here](https://github.com/tensorflow/models/tree/master/research/slim#pre-trained-models).\n\nRun the following commands to download the *Inception v3* checkpoint.\n\n```shell\n# Save the Inception v3 checkpoint in model folder.\nwget \"http://download.tensorflow.org/models/inception_v3_2016_08_28.tar.gz\"\ntar -xvf \"inception_v3_2016_08_28.tar.gz\" -C ${INCEPTION_DIR}\nrm \"inception_v3_2016_08_28.tar.gz\"\n```\n### Training\n```shell\n./train.sh\n```\nThe models will be saved in model/bi_lstm\n\n### Inference\n\n#### Trained model\nDownload the trained models from the final_model folder on [Google Drive](https://drive.google.com/drive/folders/0B4Eo9mft9jwoVDNEWlhEbUNUSE0) and put it in ./model/final_model/model.ckpt-34865.\n\n#### Extract features of test data\nTo do all three kinds of tasks mentioned in the paper. We need to first extract the features of test images:\n```\n./extract_features.sh\n```\nAnd the image features will be in data/features/test_features.pkl.\n\nYou can also perform end-to-end inference by modifying the corresponding code. For example, input a sequence of images and output a compatibility score. \n\n#### Fashion fill-in-the-blank\n```\n./fill_in_blank.sh\n```\nNote that we further optimized some design choices in the released model. It can achieve 73.5% accuracy, which is higher than the number reported in our paper.\n\n#### Compatibility prediction\n```\n./predict_compatibility.sh\n```\nDifferent from the training process where the loss is calculated in each mini batch, during testing, we get the loss againist the whole test set. This is pretty slow, maybe a better method could be used (e.g., using distance between LSTM predicted representation and the target image embedding).\n\n\n#### Outfit generation\n```\n./outfit_generation.sh\n```\n\nIt generates an outfit given the image/text query in query.json, and saves the results in the results dir. For demo purposes, the query.json only contains one example:\n\n<img src=\"https://github.com/xthan/polyvore/raw/master/results/outfit.png\" height=\"300\">\n\nwhere green boxes indicate the image query, and the text query is \"blue\".\n\n\n#### Some notes\nWe found that a late fusion of different single models (Bi-LSTM w/o VSE + VSE + Siamese) can achieve superior results on all tasks. These models are also available in the same folder on  [Google Drive](https://drive.google.com/drive/folders/0B4Eo9mft9jwoVDNEWlhEbUNUSE0).\n\n### Todo list\n- [x] Add multiple choice inference code.\n- [x] Add compatibility prediction inference code.\n- [x] Add image outfit generation code. Very similar to compatibility prediction, you can try to do it yourself if in a hurry.\n- [x] Release trained models.\n- [x] Release Siamese/VSE models.\n- [ ] Polish the code.\n\n### Citation\n\nIf this code or the Polyvore dataset helps your research, please cite our paper:\n\n    @inproceedings{han2017learning,\n      author = {Han, Xintong and Wu, Zuxuan and Jiang, Yu-Gang and Davis, Larry S},\n      title = {Learning Fashion Compatibility with Bidirectional LSTMs},\n      booktitle = {ACM Multimedia},\n      year  = {2017},\n    }\n"
  },
  {
    "path": "data/build_polyvore_data.py",
    "content": "# Copyright 2017 Xintong Han. All Rights Reserved.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n# ==============================================================================\n\n\"\"\"Prepare Polyvore outfit data.\"\"\"\n\nfrom __future__ import absolute_import\nfrom __future__ import division\nfrom __future__ import print_function\n\n\nfrom datetime import datetime\nimport json\nimport os\nimport random\nimport sys\nimport threading\n\nimport numpy as np\nimport tensorflow as tf\n\ntf.app.flags.DEFINE_string('train_label', 'data/label/train_no_dup.json',\n                           'Training label file')\ntf.app.flags.DEFINE_string('test_label', 'data/label/test_no_dup.json',\n                           'Testing label file')\ntf.app.flags.DEFINE_string('valid_label','data/label/valid_no_dup.json',\n                           'Validation label file')\ntf.app.flags.DEFINE_string('output_directory', 'data/tf_records/',\n                           'Output data directory')\ntf.app.flags.DEFINE_string('image_dir', 'data/images/',\n                           'Directory of image patches')\ntf.app.flags.DEFINE_string('word_dict_file', 'data/final_word_dict.txt',\n                           'File containing the word dictionary.')\n\ntf.app.flags.DEFINE_integer('train_shards', 128,\n                            'Number of shards in training TFRecord files.')\ntf.app.flags.DEFINE_integer('test_shards', 16,\n                            'Number of shards in test TFRecord files.')\ntf.app.flags.DEFINE_integer('valid_shards', 8,\n                            'Number of shards in validation TFRecord files.')\ntf.app.flags.DEFINE_integer('num_threads', 8,\n                            'Number of threads to preprocess the images.')\n\nFLAGS = tf.flags.FLAGS\n\n\nclass Vocabulary(object):\n  \"\"\"Simple vocabulary wrapper.\"\"\"\n\n  def __init__(self, vocab, unk_id):\n    \"\"\"Initializes the vocabulary.\n    Args:\n      vocab: A dictionary of word to word_id.\n      unk_id: Id of the special 'unknown' word.\n    \"\"\"\n    self._vocab = vocab\n    self._unk_id = unk_id\n\n  def word_to_id(self, word):\n    \"\"\"Returns the integer id of a word string.\"\"\"\n    if word in self._vocab:\n      return self._vocab[word]\n    else:\n      print('unknow: ' + word)\n      return self._unk_id\n\n\ndef _is_png(filename):\n  \"\"\"Determine if a file contains a PNG format image.\n  Args:\n    filename: string, path of the image file.\n  Returns:\n    boolean indicating if the image is a PNG.\n  \"\"\"\n  return '.png' in filename\n\n\ndef _int64_feature(value):\n  \"\"\"Wrapper for inserting int64 features into Example proto.\"\"\"\n  if not isinstance(value, list):\n    value = [value]\n  return tf.train.Feature(int64_list=tf.train.Int64List(value=value))\n  \n\ndef _float_feature(value):\n  \"\"\"Wrapper for inserting float features into Example proto.\"\"\"\n  if not isinstance(value, list):\n    value = [value]\n  return tf.train.Feature(float_list=tf.train.FloatList(value=value))\n  \n  \ndef _bytes_feature(value):\n  \"\"\"Wrapper for inserting bytes features into Example proto.\"\"\"\n  return tf.train.Feature(bytes_list=tf.train.BytesList(value=[str(value)]))\n\n\ndef _int64_feature_list(values):\n  \"\"\"Wrapper for inserting an int64 FeatureList into a SequenceExample proto.\"\"\"\n  return tf.train.FeatureList(feature=[_int64_feature(v) for v in values])\n\n\ndef _int64_list_feature_list(values):\n  \"\"\"Wrapper for inserting an int64 list FeatureList into a SequenceExample proto.\"\"\"\n  return tf.train.FeatureList(feature=[_int64_feature(v) for v in values])\n\n\ndef _bytes_feature_list(values):\n  \"\"\"Wrapper for inserting a bytes FeatureList into a SequenceExample proto.\"\"\"\n  return tf.train.FeatureList(feature=[_bytes_feature(v) for v in values])\n\ndef _float_feature_list(values):\n  \"\"\"Wrapper for inserting a float FeatureList into a SequenceExample proto.\"\"\"\n  return tf.train.FeatureList(feature=[_float_feature(v) for v in values])\n\n\ndef _to_sequence_example(set_info, decoder, vocab):\n  \"\"\"Builds a SequenceExample proto for an outfit.\n  \"\"\"\n  set_id = set_info['set_id']\n  image_data = []\n  image_ids = []\n  caption_data = []\n  caption_ids = []\n  for image_info in set_info['items']:\n    filename = os.path.join(FLAGS.image_dir, set_id,\n                            str(image_info['index']) + '.jpg')\n    with open(filename, \"r\") as f:\n      encoded_image = f.read()\n    try:\n      decoded_image = decoder.decode_jpeg(encoded_image)\n    except (tf.errors.InvalidArgumentError, AssertionError):\n      print(\"Skipping file with invalid JPEG data: %s\" % filename)\n      return\n\n    image_data.append(encoded_image)\n    image_ids.append(image_info['index'])\n    caption = image_info['name'].encode('utf-8')\n    caption_data.append(caption)\n    caption_id = [vocab.word_to_id(word) + 1 for word in caption.split()]\n    caption_ids.append(caption_id)\n\n  feature = {}\n  # Only keep 8 images, if outfit has less than 8 items, repeat the last one.\n  for index in range(8):\n    if index >= len(image_data):\n      feature['images/' + str(index)] = _bytes_feature(image_data[-1])\n    else:\n      feature['images/' + str(index)] = _bytes_feature(image_data[index])\n    \n  feature[\"set_id\"] = _bytes_feature(set_id)\n  feature[\"set_url\"] = _bytes_feature(set_info['set_url'])\n  # Likes and Views are not used in our model, but we put it into TFRecords.\n  feature[\"likes\"] = _int64_feature(set_info['likes'])\n  feature[\"views\"] = _int64_feature(set_info['views'])\n\n  context = tf.train.Features(feature=feature)\n\n  feature_lists = tf.train.FeatureLists(feature_list={\n      \"caption\": _bytes_feature_list(caption_data),\n      \"caption_ids\": _int64_list_feature_list(caption_ids),\n      \"image_index\": _int64_feature_list(image_ids)\n  })\n\n  sequence_example = tf.train.SequenceExample(\n      context=context, feature_lists=feature_lists)\n\n  return sequence_example\n\n\nclass ImageCoder(object):\n  \"\"\"Helper class that provides TensorFlow image coding utilities.\"\"\"\n\n  def __init__(self):\n    # Create a single Session to run all image coding calls.\n    self._sess = tf.Session()\n\n    # Initializes function that converts PNG to JPEG data.\n    self._png_data = tf.placeholder(dtype=tf.string)\n    image = tf.image.decode_png(self._png_data, channels=3)\n    self._png_to_jpeg = tf.image.encode_jpeg(image, format='rgb', quality=100)\n\n    # Initializes function that decodes RGB JPEG data.\n    self._decode_jpeg_data = tf.placeholder(dtype=tf.string)\n    self._decode_jpeg = tf.image.decode_jpeg(\n                                self._decode_jpeg_data, channels=3)\n\n  def png_to_jpeg(self, image_data):\n    return self._sess.run(self._png_to_jpeg,\n                          feed_dict={self._png_data: image_data})\n\n  def decode_jpeg(self, image_data):\n    image = self._sess.run(self._decode_jpeg,\n                           feed_dict={self._decode_jpeg_data: image_data})\n    assert len(image.shape) == 3\n    assert image.shape[2] == 3\n    return image\n\n\ndef _process_image_files_batch(coder, thread_index, ranges, name,\n                               all_sets, vocab, num_shards):\n  \"\"\"Processes and saves list of images as TFRecord in 1 thread.\n  \"\"\"\n  # Each thread produces N shards where N = int(num_shards / num_threads).\n  # For instance, if num_shards = 128, and the num_threads = 2, then the first\n  # thread would produce shards [0, 64).\n  num_threads = len(ranges)\n  assert not num_shards % num_threads\n  num_shards_per_batch = int(num_shards / num_threads)\n\n  shard_ranges = np.linspace(ranges[thread_index][0],\n                             ranges[thread_index][1],\n                             num_shards_per_batch + 1).astype(int)\n  num_files_in_thread = ranges[thread_index][1] - ranges[thread_index][0]\n\n  counter = 0\n  for s in xrange(num_shards_per_batch):\n    # Generate a sharded version of the file name, e.g. 'train-00002-of-00010'\n    shard = thread_index * num_shards_per_batch + s\n    output_filename = '%s-%.5d-of-%.5d' % (name, shard, num_shards)\n    output_file = os.path.join(FLAGS.output_directory, output_filename)\n    writer = tf.python_io.TFRecordWriter(output_file)\n\n    shard_counter = 0\n    files_in_shard = np.arange(shard_ranges[s], shard_ranges[s + 1], dtype=int)\n    for i in files_in_shard:\n      sequence_example = _to_sequence_example(all_sets[i], coder, vocab)\n      if not sequence_example:\n        print('fail for set: ' + all_sets[i]['set_id'])\n        continue\n      writer.write(sequence_example.SerializeToString())\n      shard_counter += 1\n      counter += 1\n\n      if not counter % 100:\n        print('%s [thread %d]: Processed %d of %d images in thread batch.' %\n              (datetime.now(), thread_index, counter, num_files_in_thread))\n        sys.stdout.flush()\n\n    writer.close()\n    print('%s [thread %d]: Wrote %d images to %s' %\n          (datetime.now(), thread_index, shard_counter, output_file))\n    sys.stdout.flush()\n    shard_counter = 0\n  print('%s [thread %d]: Wrote %d images to %d shards.' %\n        (datetime.now(), thread_index, counter, num_files_in_thread))\n  sys.stdout.flush()\n\n\ndef _process_image_files(name, all_sets, vocab, num_shards):\n  \"\"\"Process and save list of images as TFRecord of Example protos.\n  \"\"\"\n\n  # Break all images into batches with a [ranges[i][0], ranges[i][1]].\n  spacing = np.linspace(0, len(all_sets), FLAGS.num_threads + 1).astype(np.int)\n  ranges = []\n  for i in xrange(len(spacing) - 1):\n    ranges.append([spacing[i], spacing[i+1]])\n\n  # Launch a thread for each batch.\n  print('Launching %d threads for spacings: %s' % (FLAGS.num_threads, ranges))\n  sys.stdout.flush()\n\n  # Create a mechanism for monitoring when all threads are finished.\n  coord = tf.train.Coordinator()\n\n  # Create a generic TensorFlow-based utility for converting all image codings.\n  coder = ImageCoder()\n\n  threads = []\n  for thread_index in xrange(len(ranges)):\n    args = (coder, thread_index, ranges, name, all_sets, vocab, num_shards)\n    t = threading.Thread(target=_process_image_files_batch, args=args)\n    t.start()\n    threads.append(t)\n\n  # Wait for all the threads to terminate.\n  coord.join(threads)\n  print('%s: Finished writing all %d fashion sets in data set.' %\n        (datetime.now(), len(all_sets)))\n  sys.stdout.flush()\n\n\ndef _create_vocab(filename):\n  \"\"\"Creates the vocabulary of word to word_id.\n  \"\"\"\n  # Create the vocabulary dictionary.\n  word_counts = open(filename).read().splitlines()\n  reverse_vocab = [x.split()[0] for x in word_counts]\n  unk_id = len(reverse_vocab)\n  vocab_dict = dict([(x, y) for (y, x) in enumerate(reverse_vocab)])\n  vocab = Vocabulary(vocab_dict, unk_id)\n\n  return vocab\n\n\ndef _find_image_files(labels_file, name):\n  \"\"\"Build a list of all images files and labels in the data set.\n  \"\"\"\n  \n  # Read image ids\n  all_sets = json.load(open(labels_file))\n  \n  # Shuffle the ordering of all image files in order to guarantee\n  # random ordering of the images with respect to label in the\n  # saved TFRecord files. Make the randomization repeatable.\n  \n  shuffled_index = range(len(all_sets))\n  random.seed(12345)\n  random.shuffle(shuffled_index)\n\n  all_sets = [all_sets[i] for i in shuffled_index]  \n  print('Found %d fashion sets.' % (len(all_sets)))\n  return all_sets\n\ndef _process_dataset(name, label_file, vocab, num_shards):\n  \"\"\"Process a complete data set and save it as a TFRecord.\n  Args:\n    name: string, unique identifier specifying the data set.\n    directory: string, root path to the data set.\n    num_shards: integer number of shards for this data set.\n    labels_file: string, path to the labels file.\n  \"\"\"\n  print(label_file)\n  all_sets  = _find_image_files(label_file, name)\n  _process_image_files(name, all_sets, vocab, num_shards)\n\n\ndef main(unused_argv):\n  assert not FLAGS.train_shards % FLAGS.num_threads, (\n      'Please make the FLAGS.num_threads commensurate with FLAGS.train_shards')\n  assert not FLAGS.test_shards % FLAGS.num_threads, (\n      'Please make the FLAGS.num_threads commensurate with '\n      'FLAGS.test_shards')\n  assert not FLAGS.valid_shards % FLAGS.num_threads, (\n      'Please make the FLAGS.num_threads commensurate with '\n      'FLAGS.valid_shards')\n  print('Saving results to %s' % FLAGS.output_directory)\n\n\n  vocab = _create_vocab(FLAGS.word_dict_file)\n  # Run it!\n  _process_dataset('valid-no-dup', FLAGS.valid_label, vocab, FLAGS.valid_shards)\n  _process_dataset('test-no-dup', FLAGS.test_label, vocab, FLAGS.test_shards)\n  _process_dataset('train-no-dup', FLAGS.train_label, vocab, FLAGS.train_shards)\n  \n\nif __name__ == '__main__':\n  tf.app.run()\n"
  },
  {
    "path": "data/features/README.md",
    "content": "Extracted image features go here.\n"
  },
  {
    "path": "data/final_word_dict.txt",
    "content": "black   9909\nleather 8516\nbag 6350\nwomen's 5810\ntop 4504\njeans   4133\ndress   4100\ngold    4031\nwhite   3837\nearrings    3619\niphone  3613\nsunglasses  3382\nnecklace    3381\nskirt   3254\nboots   3142\nsuede   3004\njacket  2922\ncase    2871\ndenim   2763\nring    2703\nmini    2622\nyoins   2563\nhigh    2535\nblue    2533\nclutch  2497\nplus    2465\nbracelet    2418\nskinny  2164\ncoat    2127\nshoulder    2125\nsandals 2122\nlong    2112\nset 2106\nwomen   2106\nlace    2069\nred 2014\nnew 1996\nprint   1986\npink    1961\nsleeve  1954\nankle   1949\nsilver  1894\npre-owned   1877\nlipstick    1861\nshorts  1850\ntopshop 1818\nsweater 1788\nsize    1749\nfaux    1711\nvintage 1699\nshoes   1693\nrose    1689\npumps   1651\nde  1642\nmichael 1634\ncrop    1634\ncolor   1598\neye 1554\nwatch   1541\nshirt   1538\nround   1527\nbackpack    1526\ndiamond 1506\nbrown   1499\ntote    1477\nvelvet  1474\nfloral  1454\nneck    1382\nlip 1372\nsaint   1356\nlaurent 1328\ncropped 1299\nstud    1282\nwool    1278\nblouse  1277\nsheinside   1264\nshein   1260\nt-shirt 1209\nchanel  1199\nsmall   1191\ngucci   1187\ncrossbody   1185\nfashion 1184\nkate    1170\nshort   1156\nstrap   1156\nstar    1154\nclassic 1154\nsneakers    1153\nwomens  1151\nheel    1137\ncover   1101\ntoe 1075\nkors    1075\nhat 1066\nnail    1061\ngrey    1051\nchain   1044\nplatform    1043\nboho    1040\nalexander   1040\npants   1035\nflower  1033\npearl   1018\nhair    1009\ndolce   1007\ncrystal 996\nmetal   995\nmetallic    994\ncotton  988\nsilk    983\ndesign  979\ngreen   974\nlove    969\nmarc    969\nvalentino   951\ntank    951\nripped  935\nyork    927\nstriped 921\ncollection  919\nyellow  918\nconverse    914\ngabbana 912\nprinted 911\nembellished 910\nmascara 900\nheart   896\nknit    894\ndouble  885\nspade   876\nwaist   875\nfur 867\nh&m 862\nchoker  846\nlarge   841\nbow 830\neau 828\nmedium  826\nlight   819\npu  818\nflat    811\nlace-up 811\nboohoo  811\nmatte   807\njewelry 805\nembroidered 801\nheels   791\nstyle   790\ntee 788\npendant 772\npatent  769\ntaylor  766\nmiu 762\nwrap    761\ncasual  759\nzip 756\nbeauty  747\nslim    738\ncollar  737\ncharlotte   736\ndistressed  733\nnars    725\nsatchel 724\nchristian   722\nscarf   720\nblazer  719\ngivenchy    717\nsleeveless  709\nsandal  707\ndrop    702\njean    699\nmakeup  699\nframe   696\nisland  696\ncuff    696\nfront   696\nriver   691\nboot    683\npencil  682\nbobbi   664\nforever 663\njacobs  660\nliquid  647\ncream   646\nla  644\nback    641\nlook    641\ntassel  640\nshadow  633\nstripe  632\ncashmere    629\npleated 629\nboyfriend   626\nmiss    625\nlouboutin   624\ntrousers    624\ndior    620\noversized   619\nzipper  614\nmoto    614\nsterling    612\nsatin   607\nsweatshirt  607\neyeshadow   605\nnude    604\npalette 604\njumper  604\ncross   602\nchuck   601\nstella  599\nle  594\npump    592\nbutton  589\ncat 588\nbiker   587\nburberry    586\none 583\nrossi   583\nlondon  573\nchunky  572\nfringe  567\nstretch 566\ndark    564\nplaid   562\npowder  560\nsolid   557\ncut 555\nbelt    553\nparfum  551\nmidi    548\nwang    547\ngianvito    547\ncanvas  547\ncardigan    546\npocket  544\nadidas  542\nhandbag 536\nhem 533\nretro   532\nbeanie  531\ntie 531\nladies  529\nmen's   528\nbody    528\noz  521\nmcqueen 518\nstudded 516\nwide    516\nbox 515\n14k 515\nloose   515\nfit 513\ngold-tone   512\nbangle  507\npolish  506\nvans    505\ntrainers    504\nmccartney   504\nblock   503\nmac 499\nlow 498\nnails   496\nstone   494\nselfridge   490\nnavy    489\nnike    485\ndetail  482\nsummer  481\nbooties 479\nwallet  476\npointed 475\nflats   474\nglitter 473\nsuper   472\nmango   467\ngloss   466\nquilted 466\nblush   463\nchloé   460\nsquare  457\nbuckle  454\nray-ban 448\nopen    447\nx   447\npack    444\nbags    439\npetite  438\nribbed  438\nleggings    437\nleg 436\ncolour  436\nflap    435\nbeach   434\nsoft    434\njimmy   429\nskater  428\nchiffon 428\ncami    427\nwash    423\nstiletto    422\nhot 421\nrouge   420\nv   417\nsteel   414\nturtleneck  414\nchoo    410\nclear   408\nnatural 405\nrag 404\nbone    403\norange  402\nrise    402\noz. 400\npattern 400\nrusse   399\npreowned    396\nrings   395\nbucket  393\nwaisted 392\nmid 392\nzara    391\neyeliner    386\ncrepe   384\nrhinestone  384\nbrush   384\nmesh    383\nbeige   383\ncosmetics   379\nknitted 377\nbomber  377\ngiuseppe    375\nclothing    374\ncharm   370\nzanotti 370\ndrawstring  369\nwedge   368\ntory    368\npure    368\nolivia  366\nmoschino    366\nmulti   365\nglasses 364\naccessories 362\nband    361\nburch   360\ncouture 359\nacne    359\nchic    358\nmaison  358\n18k 358\nvest    358\nlayered 356\njersey  355\nlogo    355\nknee    354\ntrim    350\nstatement   349\ngolden  348\nbalmain 348\nparis   346\nphone   346\nbeaded  345\nlapel   343\nacetate 342\nstrappy 341\naviator 340\nstainless   339\ncap 337\nsneaker 337\nspray   336\nsteve   336\nmaxi    335\ncrochet 333\nmadden  333\nfedora  331\nshoe    331\nsporty  329\nside    329\ntriangle    327\nearring 325\npom 324\nedition 321\nfringed 321\nlauren  320\nrebecca 318\nfendi   318\nwedding 315\neyes    315\nevening 314\nvictoria    314\ntextured    314\nstuds   314\nliner   313\ncircle  313\nfoundation  312\ngirl    312\nrockstud    311\nmonki   311\nsheer   310\nunisex  310\nface    309\nparty   307\nelastic 307\nbootie  307\nv-neck  306\nwaterproof  305\npullover    305\nsleeves 304\nhandbags    303\nprada   303\nalice   302\ndot 302\ndesigner    301\nhooded  299\nlimited 299\nmoon    298\nburgundy    297\nhoop    297\nstudios 295\ncontrast    294\nj.crew  292\npockets 291\nauthentic   291\npurple  290\nplated  289\nfeather 288\nsexy    288\nstraw   286\nlens    284\nstraight    283\nbra 281\nbling   281\ncandy   281\nstylish 280\nbrand   280\nmen 279\near 278\npreppy  278\nwool-blend  276\nleaf    276\nprom    275\ndorothy 274\nday 274\nmarni   273\nsole    273\nhoodie  271\nquartz  271\nhandle  271\nperkins 271\npin 271\ncheck   271\nsecret  270\nmargiela    268\npurse   266\nart 265\nasos    264\nouterwear   263\nflared  262\nwoven   261\nbalenciaga  260\noscar   258\nbig 257\nfull    256\nclip    256\nbalm    256\noriginals   255\ngray    254\nhand    253\nswarovski   253\nenvelope    253\nlash    252\ngel 252\nlim 252\ngoop    252\nmessenger   251\nleopard 251\ngeometric   249\nsmith   248\nchristmas   247\ndaisy   246\ncoral   246\npro 244\ntrench  244\ntom 244\nkhaki   243\na-line  243\nsequin  243\nphillip 243\nheeled  242\nyves    240\nstore   240\nisabel  240\nsun 238\nminkoff 238\ncutout  237\ngift    237\ncamel   237\nrock    237\nj   237\nml  236\nrow 235\nlacquer 235\nklein   233\ntravel  232\nhollow  232\nformal  232\nrenta   232\nurban   231\nbelted  231\njane    230\nair 229\ntopic   228\ntan 228\ntone    227\nchicnova    227\nmirror  227\npeep    225\ntwo 225\nline    224\ncombat  224\nsingle  224\namazon.com  224\nmonogram    223\ncable   223\nguess   223\npant    222\nbodycon 222\nford    221\nchicwish    221\nmarant  220\ncoffee  219\nruffle  219\ndr. 218\nproenza 218\nschouler    217\ncolors  217\nleather-look    217\nloafers 216\nslip    216\nmirrored    216\nnotebook    215\nchloe   215\nbeckham 215\nflowers 215\nschool  214\nhi  213\n6s  213\ncalf    213\naccessorize 213\nwinter  212\ncute    212\nheadband    212\nblend   211\nbaker   211\nskull   211\nplain   211\narmani  210\nbasic   208\npastel  207\nsweet   207\nmid-rise    207\njacquard    207\ndial    207\ncourt   206\ndsquared2   206\nchelsea 205\nmint    205\nhalter  205\nonline  205\ncrew    204\nembroidery  204\nembossed    203\nmartens 203\napple   203\ntoilette    202\nbutterfly   202\nbaseball    202\npatch   201\ngown    201\nvon 201\nfree    201\narrow   200\nflare   200\nvictoria's  199\nasymmetric  198\nolympia 197\nombre   197\nglass   197\nlips    196\nbreasted    195\njet 195\nlanvin  195\nsuperstar   195\nsaffiano    194\nlinda   194\ncotton-blend    193\nsocks   192\nrubber  192\namerican    191\ngraphic 191\nralph   191\nfloppy  190\nvolume  190\nspring  190\nkey 189\nletter  189\ncape    189\nfelt    189\npave    187\nbar 187\nartificial  186\npeach   186\npolka   185\ncalvin  185\nruffled 185\nboutique    184\ngalaxy  184\nluxe    184\nskin    184\npanel   184\ncat-eye 184\nsimple  184\nnyx 184\nbralet  183\nox  183\nkit 183\npunk    183\npaul    183\nlength  181\nfinish  181\nstreet  181\njames   181\nperfect 180\nsnake   180\ndresses 180\nfall    180\ntights  180\npatchwork   180\naquazzura   179\nvince   179\npouch   179\nstudio  178\nelizabeth   177\nultra   177\nmodern  176\nm   176\nbead    176\nfrayed  175\n6/6s    175\nonyx    175\nshine   175\njoseph  175\nball    174\nlime    174\ntable   174\noriginal    173\nelegant 173\nmaybelline  172\nversace 172\ncity    172\nsaddle  172\nwest    171\nmoda    171\nround-frame 171\nted 171\ndiane   171\ncrown   170\ninfinity    170\nmax 170\nlife    169\nballet  168\naeropostale 168\nhome    167\nbraided 167\nbrim    167\nbutter  167\nfarrow  167\nintense 167\nwashed  166\nbright  166\nbikini  166\ntall    166\nshop    166\ngrunge  165\naustralia   165\neffect  165\ncocktail    165\nnoir    164\noversize    163\ntattoo  162\ngold-plated 162\nextreme 162\nivory   162\nswing   161\ntulle   160\n50ml    160\ntrue    160\nmixed   160\ndiamonds    160\ninspired    159\nice 159\nhouse   159\nwater   159\nexclusive   159\npremium 158\nglow    158\nwine    157\nturquoise   157\nbracelets   157\nbold    157\nshimmer 157\nneon    156\nlily    156\nvegan   156\nhalf    155\ngirls   154\ntweed   154\npieces  154\ntrio    154\nmens    153\npointy  153\nheadphones  153\npaige   152\nrivet   152\ngladiator   151\nsignature   151\nle3no   151\nshell   150\nbib 150\nantigona    149\npretty  149\nclosure 149\nmake    149\nwear    149\npeplum  149\nlinen   149\namazon  148\nenamel  148\ngarden  148\nend 148\nwood    148\nlock    147\ntextured-leather    147\nduo 147\nmade    147\nplastic 147\nlady    146\ntrendy  146\ngenuine 146\nfurstenberg 145\nco. 145\ngloves  145\npen 145\nkimono  145\nold 145\n3/4 145\nsplit   144\nspike   144\nsizes   143\nsapphire    143\nlipsy   143\noptical 143\nchoies  143\nrip 143\nreal    142\npatent-leather  142\nmsgm    142\n90s 142\nstretchy    141\nkenneth 141\nroll    140\nbell    140\nsilver-tone 140\nwayfarer    140\nbest    140\nariana  140\nmarble  140\nlashes  140\nmary    140\ngrande  140\nmilitary    139\nmom 139\nfine    139\nedge    139\nlong-wear   138\nkarl    138\ncrime   138\npolo    138\ntransparent 138\nugg 137\ntilbury 137\ncoco    137\njewellery   137\noval    137\ngoth    137\nwall    137\nbaby    136\njamie   136\ndestroyed   136\nshape   135\nmagnetic    134\nlambskin    134\ngaravani    134\nsport   134\nbamboo  134\nct. 134\nresin   134\njohn    133\nsea 133\ndavid   133\ncz  133\nspf 133\nspaghetti   133\nwarehouse   133\njennifer    133\nbohemian    132\nedgy    132\nstila   131\ncompact 131\ncamera  131\ntribal  131\nruby    131\nlittle  131\n5s  131\nprofessional    131\nfrench  131\ncalfskin    131\nengagement  130\nbronze  130\nhandmade    130\ninch    130\nfake    130\nacrylic 130\nhipster 129\npalm    129\nw/  129\nshearling   129\nnylon   129\npaper   128\nhigh-top    128\nbridal  128\nlucluc  128\nglam    128\nbear    128\nqueen   128\nwild    128\nstars   127\nwide-leg    127\ncurrent/elliott 127\ntriple  127\nbetsey  127\njohnson 127\ndeep    127\nnight   126\noff-the-shoulder    126\ndesigns 126\nlaura   126\nhigh-rise   126\ntunic   126\nberry   126\npower   126\nlane    125\nfalse   125\nperfume 125\nhard    125\nlagerfeld   125\nstick   125\npoint   124\nessie   124\nmulticolor  124\nlenses  124\nbanana  124\nea  123\nhermes  123\napricot 123\nboy 122\npale    122\nslit    122\ndecay   122\nluxury  121\naldo    121\nmadewell    121\nsee 120\nantique 120\nnine    120\ntree    120\nkylie   120\nsociety 120\narmy    120\nscoop   119\ncut-out 119\nlo  119\ncord    119\nslouchy 119\noasis   119\nnly 119\noxford  119\nknot    119\nbottle  118\npinterest   118\nrib 118\ncasadei 117\nfox 117\ngrace   117\nkiss    117\nadjustable  117\nstripes 117\nchair   116\ngradient    116\n3d  116\nplant   116\nvera    115\nlayer   115\nlouis   115\nstrapless   115\ntumblr  115\nkim 115\ntartan  115\nclinique    114\nofficial    114\nlands   114\nbendel  114\nroberto 114\nferragamo   114\ncartier 114\nlauder  114\nlegging 114\nmaurices    113\ntops    113\ngavriel 113\ncold    113\ncoin    113\ncool    113\nrainbow 112\nsmooth  112\nfresh   112\nalexis  112\nwildfox 112\ntwo-tone    112\nl   111\nblossom 111\nsalvatore   111\nteardrop    111\nvero    111\nolive   111\npuma    111\nlamp    111\ntibi    111\nstand   111\nwork    111\nsparkle 111\nthree   111\nmusic   110\nfreshwater  110\ndecor   110\nbouquet 110\ncard    110\nbrooch  110\ncosmetic    110\nsmashbox    110\nmara    110\nshopper 109\nkenzo   109\nsophie  109\nwomen’s 109\nlancome 109\nhenri   109\nkaren   109\npull    108\nballerina   108\nseconds 108\nhollister   108\n5/5s    108\nvase    108\nvelvetine   108\nfragrance   107\nbustier 107\nmansur  107\nmicro   107\nflip    107\nt-strap 107\ncamuto  107\npandora 107\nhead    107\naround  107\nflannel 107\nchevron 106\nzirconia    106\ncarven  106\nboxy    106\nparker  106\nnudes   106\ngemstone    106\nmineral 106\ncherry  105\ntube    105\nles 105\naudacious   105\nleather-trimmed 105\nhigh-waisted    105\nfitted  105\ncluster 105\nceramic 104\nsand    104\nscott   104\ntwill   104\nfestival    104\ncross-body  104\ncuffed  104\nbird    104\npython  104\ncc  104\ndrew    104\ninc 104\ncavalli 104\nsilk-blend  104\nbrass   104\nsophia  104\nlined   104\npillow  103\nallure  103\nwig 103\njessica 103\nmartin  103\ngypsy   102\nn   102\njeggings    102\nt.w.    102\nmerino  102\nbleach  102\nstuart  102\nmidnight    102\nbook    102\nbralette    101\ndangle  101\nperforated  101\njoni    101\nchocolate   101\nwarm    101\nmix 101\ntime    100\njay 99\nfaye    99\npeople  99\ntwist   99\ncubic   99\nchecked 99\nthrow   99\nasymmetrical    99\nsmokey  99\nfabric  98\nbrogues 98\nemilio  98\nbeautiful   98\nunited  98\nskate   98\ndraped  98\npiece   98\naccent  98\navenue  97\nanimal  97\ncéline  97\ncarat   97\nreversible  97\nbardot  97\nsale    97\nribbon  97\nsky 97\nroyal   96\nloafer  96\nslip-on 96\nhippie  96\nstack   96\nclub    96\nlow-rise    96\ncheap   95\nbrow    95\nfloral-print    95\nceline  95\ndeluxe  95\nvuitton 95\nshades  94\nhappy   94\ncole    94\nnecklaces   94\nii  94\npaint   94\naztec   94\nathletic    94\nthong   94\nmankind 94\napparel 94\ndrape   94\nraw 94\nshawl   93\n100mm   93\nmulberry    93\nweitzman    93\nkendall 93\nmcq 93\ndream   93\nshift   93\njeffrey 93\nrolled  93\nchine   92\nstay    92\n5sos    92\nbeads   92\nsports  92\nbcbgmaxazria    92\ncampbell    92\neyewear 92\ncashmere-blend  92\nfossil  92\ngem 92\nxl  92\ngiorgio 92\npierre  92\nfly 92\nmark    92\nabercrombie 92\neyeglasses  92\nwatches 91\nespadrille  91\ntiffany 91\nfitch   91\nsilicone    91\nkendra  91\ncult    91\nguerlain    91\npop 91\npucci   91\nzimmermann  91\n5c  90\ndirection   90\nacid    90\nphilosophy  90\nextra   90\nfleece  90\nsuedette    90\nthick   90\nmono    90\nideas   90\ncheek   90\njour    90\nsans    89\nterry   89\nholiday 89\nwebster 89\nlink    89\npins    89\nwitchery    89\nallurez 89\npyramid 89\nessential   89\ncushion 89\noliver  89\nvogue   89\nthigh   88\nlightweight 88\nroses   88\nwoolen  88\ngorgeous    88\nmother  88\nsweat   88\nturtle  88\njil 87\nsnapback    87\nchronograph 87\nautumn  87\nsander  87\nhalo    87\nopi 87\nbrushes 87\njewel   87\nus  87\nsite    87\nshiny   87\ntopaz   87\npark    87\ntapered 87\niconic  87\ncustom  87\nuniqlo  87\ndkny    87\nfaced   87\nsouci   86\nlana    86\nfaceted 86\nmm  86\nholder  86\nmagic   86\nbillabong   86\ncrystal-embellished 86\nlord    86\nsnakeskin   86\ntommy   86\nhilfiger    86\nover-the-knee   85\noutdoor 85\nculottes    85\nrug 85\ngothic  85\nhole    85\nwire    85\ntiny    85\ncaviar  85\ntarget  85\nmodcloth    85\nchampagne   85\niro 84\ncup 84\npeep-toe    84\nrental  84\nbandeau 84\nvernis  84\npiercing    84\nhelmut  84\npineapple   84\nkeds    84\npleat   84\nribkoff 84\nnaked   84\nclip-on 84\nvanessa 84\npadded  83\nnile    83\nbittar  83\nag  83\nagate   83\nmohair  83\nvinyl   83\nhardy   83\ntimberland  83\nrunway  83\nbella   83\ncoach   83\ntattoos 83\nanne    83\ncaged   82\nburton  82\ntrend   82\ntailored    82\ncore    82\npainted 82\nconvertible 82\ncrystals    82\nforever21   82\nslippers    82\nbradley 82\ntropical    81\nalex    81\ntassels 81\nrepublic    81\nlucy    81\nfunny   81\nestee   81\nheritage    81\nhobo    81\nhydrating   81\nhairstyles  81\nmink    81\neugenia 81\nbottega 81\nsac 81\nponte   80\nlang    80\nteal    80\nalloy   80\ninsert  80\ntea 80\nink 80\nparka   80\nsugar   80\npoppy   80\nveneta  80\nbackless    79\nperry   79\nleaves  79\nnappa   79\ndes 79\nedp 79\nplaysuit    79\nthin    79\n120mm   79\nstacking    79\nstraight-leg    79\nloeffler    78\npatterned   78\ncolorful    78\nvivienne    78\nmock    78\nnina    78\nblack/white 78\nnose    78\nkelly   78\ntrouser 78\nphoto   78\nnote    78\nrandall 78\ngap 78\ncandle  77\nlevi's  77\nselma   77\ntrends  77\nsicily  77\nmask    77\nscallop 77\nbuttons 77\ngoldtone    77\nlongline    77\ntshirt  77\ncotton-jersey   77\nchandelier  77\nhoney   77\njumpsuit    77\nshirts  76\ndetails 76\nslim-fit    76\n100ml   76\nget 76\nangel   76\npur 76\nwaterfall   76\nbodysuit    76\nwestwood    76\nanna    76\nstitch  75\nacross  75\norganic 75\nunique  75\nmercier 75\ngo  75\ndisney  75\nstraps  75\nemerald 75\nbatwing 75\nhudson  75\nirregular   75\nrihanna 75\nreligion    75\nbowknot 75\nclean   75\nglamorous   75\nberricle    75\nbath    74\nelephant    74\ncapri   74\nco  74\nhighlighter 74\ngg  74\nfiligree    74\njaeger  74\nmonsoon 74\ncamo    74\ncontour 74\nzizzi   74\ndeborah 74\nhayden  74\nmonochrome  74\nnearly  74\nmuscle  74\npeoples 74\nmonday  74\nmarket  74\nst. 74\ndip 74\nmolly   74\n30ml    73\nlike    73\nmug 73\nm·a·c   73\nsequined    73\nruched  73\ndoll    73\nlasting 73\ntheory  73\nowl 73\narrangement 73\nsam 73\nsleeved 73\nbuttoned    73\nslingback   73\nfashionable 72\nbubble  72\npressed 72\nsplatter    72\ncorduroy    72\nshaped  72\nivy 72\ndisc    72\ngood    72\ndye 72\nstackable   71\nfurla   71\nbb  71\nca  71\nbennett 71\nhorn    71\nbarbara 71\noil 71\nmacbook 71\nipad    71\nelie    71\nblonde  70\nhoundstooth 70\nbeats   70\npcs 70\nanya    70\ntuxedo  70\njuicy   70\nsergio  70\nfoldover    70\ncrescent    70\nregular 70\nlow-top 70\ngoddess 70\nheather 70\nplunge  70\namber   70\nlulu    70\nnubuck  70\npanama  70\nsequins 69\ng   69\nscuba   69\ngreek   69\nrun 69\nslouch  69\n10k 69\nsnow    69\neasy    69\nlucky   69\nstones  69\nprincess    69\nsnap    69\nlilly   69\nbangles 69\ndiorshow    69\ncompany 68\nconcealer   68\nÉtoile  68\nlotion  68\nturn    68\ncultured    68\nswimsuit    68\ndonna   68\nlux 68\nbrushed 68\npearls  68\npusheen 68\npost    68\nfalabella   68\nny  68\nduster  68\nstitching   68\nmelissa 68\nstrand  68\neos 68\ncell    68\n18-karat    68\npilot   67\nlaser   67\nsucculent   67\nillesteva   67\npavé    67\nquay    67\nwolf    67\nmickey  67\noffice  67\nmist    67\nroksanda    67\nroshe   66\nassorted    66\nhammered    66\nshredded    66\namethyst    66\nwedges  66\nsonia   66\nscalloped   66\nb   66\nlippmann    66\nyurman  66\nclubmaster  66\nmermaid 66\nd'orsay 66\nduffle  66\nitalian 66\nbridesmaid  66\nteen    66\netro    66\nfive    66\nraglan  66\nharlow  66\nsouth   66\nhearts  66\nballerinas  66\npaisley 66\nopal    65\naway    65\nrare    65\ncrocodile   65\nsoap    65\ntouch   65\nplatinum    65\ninstant 65\ncargo   65\nrope    65\ncopper  65\nkeychain    65\ndeco    65\nnyc 65\ndi  65\nfancy   65\npier    64\ninstagram   64\nbandana 64\ngeo 64\nluggage 64\nspliced 64\nflag    64\nct  64\nknitwear    64\nk.i.s.s.i.n.g   64\ntemporary   64\nhood    64\nmonster 64\nreading 64\npeace   64\nsteampunk   64\nrimmel  64\njeanne  64\naddict  64\nsnowflake   64\nsimpson 64\ncoast   64\nboss    64\npot 64\nsaab    64\nrich    63\nsinglet 63\ndiamante    63\nkisses  63\nrachel  63\nsmart   63\nfold    63\ncitizens    63\nhindmarch   63\nw   63\nwallpaper   63\ncm  63\nrebel   63\ndigital 63\nhumanity    63\nredvalentino    63\nbandage 63\nmotorcycle  63\ntennis  63\nleopard-print   63\nclips   63\nlaptop  63\nfactory 63\nopening 62\nframes  62\nunion   61\nwave    61\nashley  61\nstan    61\nbasket  61\nyeezy   61\nblackfive   61\nmissoni 61\nwestern 61\ndouble-breasted 61\nharry   61\na.l.c.  61\npeter   61\nsolitaire   61\nvila    61\nantonio 61\nfloor   61\ncollarless  61\nbronzer 61\nrunning 61\nrain    61\nmason   60\nplate   60\nemma    60\nzipped  60\nedie    60\ncharcoal    60\ntoms    60\npolished    60\nlee 60\noxfords 60\npair    60\nberet   60\ngarnet  60\ntypography  60\narden   60\nd   60\nknotted 60\nhardware    60\n4s  60\nrolex   60\nstates  60\nindie   60\nabstract    60\nmichel  60\nnicholas    60\naqua    60\nrick    59\nfrill   59\nno. 59\nquote   59\nversatile   59\ncases   59\ngoose   59\net  59\ncoconut 59\nblahnik 59\nfour    59\nmanolo  59\njumbo   59\nmustard 59\nrolling 59\ndecorative  59\ncambridge   59\ntopman  59\nwooden  58\nfaded   58\nletters 58\npuffer  58\nm&co    58\nsamsung 58\nmm6 58\nhidden  58\nwalker  58\nblanket 58\ntortoise    58\nl.k.    58\nlemon   58\nedelman 58\ndelpozo 58\nsemi    58\njack    58\npassport    58\ndome    58\nboat    58\nshopping    58\nchains  58\npencils 58\nsensational 57\nrectangle   57\nkurt    57\neagle   57\nluminous    57\nankle-strap 57\napplique    57\nhalloween   57\npebbled 57\nbirkin  57\nuk  57\nspecs   57\nplum    57\nvirgin  57\nrelaxed 57\nespadrilles 57\nquotes  57\nplus/6/5/5s/5c  57\nseason  57\nzoe 56\n4/4s    56\nskool   56\nderek   56\nclarins 56\nandrew  56\nprecision   56\nlipgloss    56\nrochas  56\ni'm 56\nvacation    56\nbase    56\nanchor  56\nprimer  56\nponcho  56\nusa 56\nk   56\nflatform    56\npolarized   56\nfluffy  56\nrosie   56\nsoho    56\nedt 56\ndiesel  56\nbleached    56\ncelebrity   56\nnative  56\nwristlet    55\nclock   55\ncuffs   55\nscrunchie   55\nmarie   55\ndate    55\nleo 55\ngeiger  55\neyelashes   55\nseries  55\nquad    55\ndeer    55\nforest  55\ncartoon 55\nfaux-leather    55\nlolita  55\nminaudiere  55\nsofa    55\nethnic  55\n14kt    54\nchino   54\nzippers 54\nchambray    54\ngigi    54\ntaupe   54\njackets 54\nexpress 54\nmillen  54\ncamisole    54\nspace   54\ndoublju 54\ncharles 54\nvarsity 54\ncorset  54\nowens   54\ntext    54\nschutz  54\nlevis   54\nsplicing    54\ncage    54\nkane    54\nrips    54\nneed    54\nmuse    54\nsk8-hi  54\nrocket  54\norganza 53\ncrewneck    53\namy 53\nbirger  53\nfinger  53\ndelicate    53\nbeverly 53\nhills   53\nflash   53\ndune    53\nmcm 53\nvermeil 53\nbun 53\ndots    53\njade    53\nneoprene    53\nmonica  53\nbelle   53\npeony   53\nweave   53\nfluid   53\nflowy   53\ncrisscross  53\nmarmont 53\ncalypso 53\nimports 53\nrabbit  53\nlocket  52\nrykiel  52\ncostume 52\nmaria   52\ncolorblock  52\nsearch  52\nlegendary   52\ninfinite    52\nbunny   52\nteaspoon    52\nadult   52\nvita    52\n21+ 52\ndionysus    52\ncurly   52\nperla   52\nflora   52\ndahlia  52\npvc 52\nviolet  52\nphotos  51\nbrunello    51\nbailey  51\nrobinson    51\ncurl    51\nwing    51\nneo 51\nsydney  51\ncarved  51\njordan  51\ngrained 51\nrivets  51\nsupply  51\nhats    51\nmouret  51\nbaublebar   51\nxs  51\ndre 51\npoint-toe   51\ncucinelli   51\npadlock 51\nblack/gold  51\nbobby   51\nfleur   51\nwoman   51\norchid  51\npoplin  51\nroland  51\nlizzie  51\ndiana   51\nlewis   50\nequipment   50\nlabel   50\ntiered  50\nmoonstone   50\nmoisturizing    50\nstretch-jersey  50\nslim-leg    50\nsmoky   50\naudrey  50\nraffia  50\np   50\nnatasha 50\nsunset  50\nrhodium 50\nrupert  50\nysl 50\nilia    50\nleigh   50\ncara    50\nmouse   50\nrosa    50\njunior  50\nvan 50\nann 50\nsweetheart  50\nippolita    50\nintarsia    50\nillamasqua  50\nlorac   50\ngilet   49\namazing 49\njules   49\ninternational   49\nharrods 49\nsanderson   49\nalways  49\nplanter 49\nlongwear    49\nwhistles    49\nfallon  49\nmalene  49\nfriendship  49\nstructured  49\n38mm    49\never    49\ntwisted 49\nbackpacks   49\ncotton-poplin   49\nsynthetic   49\nfan 49\ngiambattista    48\nkensington  48\nbarrel  48\npulitzer    48\npetal   48\nunicorn 48\nmetro   48\nfirst   48\nbraid   48\nfunnel  48\nlavender    48\ngirly   48\nplus/7/6    48\nlipcolor    48\ncroc    48\nbui 48\npointed-toe 48\ndual    48\nindigo  48\nl'absolu    48\nfishnet 48\nounce   48\nvetements   48\ngraham  48\neffy    48\ngifts   48\nrhea    48\nally    48\noperandi    48\ntips    48\nnorman  48\nmarilyn 48\ncurved  48\nsleek   48\nvalli   48\ntinted  48\ntarte   48\nvarious 48\n18ct    47\naspinal 47\nmargot  47\nlisa    47\nwrist   47\ntrimmed 47\nspiral  47\nmaroon  47\natelier 47\nbo  47\nopaque  47\nproducts    47\nsunday  47\nholland 47\nbrocade 47\ncozy    47\nshower  47\nhulme   47\nsuit    47\nmatthew 47\nleisure 47\nloop    47\nminimal 47\nbrooks  47\nwireless    47\nchinese 47\nplants  47\ndance   47\nmessy   47\ntranslucent 47\nkirkwood    47\nforce   47\ncactus  47\ncreamy  47\nradiant 47\nbranch  47\nwaistcoat   46\nworld   46\nsarah   46\nhumble  46\nalmond  46\ntint    46\n80s 46\nkevyn   46\nrustic  46\nblade   46\nscarves 46\n1/2 46\nbed 46\nbee 46\ngeorgia 46\nfun 46\nrx  46\nvertical    46\neva 46\nrefill  46\ngoldschmied 46\nthing   46\ngrid    46\nlow-tops    46\ntortoiseshell   46\ntemple  46\nscotch  46\nbronzing    46\naucoin  46\nhi-top  46\nwilliamson  46\nadriano 46\ntag 46\nsephora 46\ncomb    46\nlola    46\nwatercolor  46\nmanon   45\njar 45\naeo 45\nancient 45\ncolours 45\nicon    45\nengraved    45\npajama  45\nfacial  45\npotter  45\na5  45\ncomme   45\nromance 45\nfaith   45\nbuckled 45\ndreamcatcher    45\npigalle 45\niris    45\nmoney   45\nboys    45\npeekaboo    45\nsailor  45\nclasp   45\nchristopher 45\nelle    45\nrucksack    45\nsilk-satin  45\ndetachable  45\nessentials  45\nrim 45\nchan    44\ntotes   44\nwrapped 44\nsimons  44\nrb3025  44\nperformance 44\nmule    44\nmonogramme  44\nbruno   44\nalien   44\ncocoon  44\npress   44\nharris  44\neddie   44\ncloud   44\nricci   44\nnarrow  44\nlarger  44\nbuy 44\nbaroque 44\ncurve   44\nfrye    44\ntight   44\nshaping 44\nclassics    44\noff-shoulder    44\nverdugo 44\nmatt    44\ntutorial    44\nrhinestones 44\nswag    44\nhermÃ¨s 44\no   44\ntod's   44\nglossy  44\ngreat   44\n90's    44\ndesk    44\nradiance    44\nlam 44\ncrossover   44\narm 44\ndusty   43\nsock    43\nslimming    43\nears    43\nella    43\nultimate    43\nhour    43\njustin  43\nsoda    43\ntowel   43\nhunter  43\nmiller  43\nclutches    43\ndr  43\nspitfire    43\nnerd    43\nelectric    43\npaolo   43\nevan    43\nkitty   43\nperspex 43\ncharms  43\nlatest  43\nsign    43\nnautical    43\ncare    43\nbvlgari 43\ncrème   43\ncamouflage  43\njonathan    43\nfriends 43\ndisco   43\ncrocheted   43\nlion    43\nlamb    43\ntiger   43\npony    43\nbaptiste    43\nelyse   43\nlights  43\nlapis   43\nmoi 43\ntrapeze 43\ntoast   43\nlacoste 43\nstyles  43\ncutoff  43\nvelvetines  42\nnature  42\nweb 42\nvarnish 42\nbottoms 42\nseven   42\nocean   42\nmules   42\nalexa   42\ncurling 42\ncriss   42\nanastasia   42\npalazzo 42\nlizard  42\npersonalized    42\nstrawberry  42\nbaked   42\nbrick   42\ncontinental 42\nbasics  42\nsimulated   42\nclassy  42\namazon.co.uk    42\nlos 42\n55mm    42\ngym 42\ncanada  42\nl.a.    42\nwonderland  42\nprinting    42\nphase   42\nzac 42\nsingle-breasted 42\nbetter  42\nstacked 42\nlovers  42\ngiant   42\narizona 42\njelly   42\nclosed  42\nkeyhole 42\n.   41\ntwo-piece   41\n18kt    41\npochette    41\nmoisture    41\nseam    41\ncomfort 41\nvalentine   41\nceremony    41\nclic    41\nvelour  41\ntwin    41\nbaguette    41\nfruit   41\ncolored 41\nhigh-waist  41\nskort   41\ny   41\nparadise    41\nchampion    41\nmilly   41\ncedar   41\nneutral 41\nbarth   41\nbareminerals    41\nbelly   41\nleonard 41\nembellishment   41\nctw 41\niron    41\npenny   41\nbriefcase   41\nbad 41\nplush   41\nsunscreen   41\nhip 41\nduffel  40\nblair   40\nvibrant 40\nerickson    40\nthierry 40\nfuchsia 40\nappliqué    40\nenvy    40\nbroken  40\ndolce&gabbana   40\ncameo   40\ncoats   40\nlengthening 40\nopenwork    40\nbrothers    40\nmichelle    40\nglittered   40\ncloth   40\nipod    40\npompom  40\nbrooklyn    40\nsnapmade.com    40\nbarneys 40\nquality 40\nlucite  40\nsling   40\nitaly   40\ncabochon    40\nsupreme 40\nnotes   40\nshoedazzle  40\nromper  39\ntech    39\ndyed    39\nbonded  39\nalexandre   39\nproduct 39\nsimone  39\nfemme   39\nflops   39\nmagazine    39\neyelet  39\nkatrantzou  39\nfl  39\nagent   39\nvictorian   39\ngunmetal    39\ndecoration  39\nseamed  39\nmiranda 39\npolka-dot   39\navailable   39\npizza   39\namanda  39\nwings   39\nlorenzo 39\npart    39\nmanicure    39\nyang    39\ndylan   39\ngorjana 39\ntimeless    39\nbrian   39\nraey    39\n15ml    39\nbarely  39\ndog 39\npour    39\nsieraden    39\nedited  39\noverall 39\n105mm   39\ncady    39\nappliquéd   39\nmovado  39\n10mm    39\nchiara  38\nvivier  38\npaneled 38\nfaux-fur    38\ndaniel  38\ngrand   38\ncharming    38\nsofia   38\nroom    38\nlovely  38\nr13 38\ngowns   38\nsplendid    38\nknuckle 38\nking    38\nbezel   38\nnixon   38\ngauze   38\npeacock 38\ncarolina    38\nangeles 38\nlizzy   38\nmarco   38\nbuckles 38\neyebrow 38\nneckline    38\npom-pom 38\nj.w.anderson    38\nsylvie  38\nkitten  38\nknee-high   38\nnano    38\nstem    38\nkaty    38\nwellington  38\nromantic    38\npolyvore    38\n50s 38\nmake-up 38\nmademoiselle    38\nrails   38\nwharf   38\nshaggy  38\npan 38\ngazelle 38\n36mm    38\ntulip   37\nspectrum    37\nbottom  37\ncobalt  37\npassion 37\nyear    37\nstarbucks   37\niantorno    37\nnarciso 37\neyelash 37\nlive    37\n3x1 37\nstretch-cotton  37\n8mm 37\noff-white   37\nse  37\n/jean   37\neve 37\nluna    37\nwars    37\nshow    37\nfolding 37\nsteven  37\nsilvertone  37\nmia 37\nfabulous    37\nshadows 37\ntemperley   37\nanouk   37\nroman   37\nracerback   37\nlilac   37\nlaundry 37\ndainty  37\nhinge   37\nfrost   37\njeweled 37\n40mm    37\nsupra   37\nview    37\nhanging 36\none-shoulder    36\ngraffiti    36\nself    36\nalpaca  36\ncognac  36\nps1 36\nslide   36\nturn-down   36\nu   36\nborgo   36\nnicole  36\ntower   36\nlighting    36\njoy 36\nskirts  36\nkeyring 36\nflock   36\ndu  36\nemoji   36\nplexi   36\nash 36\nhenna   36\nmod 36\nedit    36\nharper  36\n100%    36\ncleansing   36\ncreme   36\nlotus   36\ngeorgette   36\ninches  36\nmid-length  36\nkat 36\nready   36\ndemi    36\nsafari  36\nmoss    36\nbing    36\nmother-of-pearl 36\nshade   36\nhamilton    36\nskyline 36\nroger   36\ncomplete    36\npierced 36\njogger  36\nmytheresa.com   36\nbieber  36\nlaque   36\ndaily   36\nmilk    36\nhomme   36\neveryday    36\nsimmons 36\nsilk-chiffon    36\nsmythson    36\ntab 36\npeacoat 35\nera 35\njudith  35\nbouclé  35\nsheath  35\nmatch   35\npamela  35\nspiked  35\nholographic 35\nm&s 35\njoe 35\nbabies  35\ncarvela 35\nsilky   35\nsharon  35\nrotita  35\nmonarch 35\nfling   35\nen  35\nchico's 35\nnordstrom   35\nmarciano    35\nstring  35\ngirlfriend  35\nbloom   35\ntwenty  35\neight   35\nava 35\ns4  35\nbite    35\nzippered    35\nbrit    35\ngentle  35\ncitrine 35\nbarrette    35\nheavy   35\nstep    35\ncoated  35\njournal 35\ndry 35\nmat 35\nsimply  35\nextensions  35\nemporio 35\nwardrobe    35\nilluminating    35\nvioleta 35\nplein   35\nbrogue  35\ntools   35\nsticker 35\nbare    35\nbalance 35\naccessory   35\nirene   35\nstyling 35\ncable-knit  35\nlinea   35\nlooks   35\nwide-brim   35\ndon't   35\npigment 34\nminnie  34\nshahida 34\npumpkin 34\nspecial 34\njegging 34\nresistant   34\nweekend 34\nmineralize  34\nthicken 34\nselected    34\ncoachella   34\nfern    34\ncolor-block 34\ndannijo 34\nfair    34\nfuture  34\nrodriguez   34\nqupid   34\nnero    34\nrobert  34\nalice+olivia    34\nunderwear   34\nluke    34\nfollies 34\nmeyer   34\nheat    34\nani 34\ndining  34\nparides 34\nyoung   34\nbetty   34\n24k 34\nvoyage  34\nloewe   34\njungle  34\nautomatic   34\nrobyn   34\nrita    34\nh   34\npuff    34\nbaume   34\nbowler  34\nholly   34\njapanese    34\njennings    34\nhermès  34\njansport    34\npatches 34\nraf 34\nkill    34\nlancôme 34\ngrain   34\nkohl    33\nincase  33\nbooks   33\ngallery 33\nlost    33\nfoil    33\ncosmic  33\nzero    33\nbroderie    33\nbrief   33\ntabitha 33\ncube    33\ncuba    33\ntake    33\nsheet   33\nsheepskin   33\n1/4 33\nluu 33\nprecious    33\nbriefs  33\nthomas  33\npots    33\nhourglass   33\nbees    33\nzebra   33\nuse 33\nnirvana 33\nallen   33\nwink    33\ncatcher 33\nman 33\nsplash  33\ncynthia 33\nminiskirt   33\ncyber   33\nmagnolia    33\nlouise  33\nthings  33\nneuwirth    33\ntied    33\nlattice 33\nsparkling   33\nbebe    33\nslogan  33\nribbed-knit 33\nskagen  33\nbay 33\namelie  33\nshark   33\natwood  33\ntip 33\nsuperdry    33\nbarn    33\nvanilla 33\nslub    33\nmaster  32\npanelled    32\nbianca  32\nbabe    32\nwet 32\nvampire 32\nbroad   32\nnwt 32\nmorning 32\nspikes  32\nilluminator 32\nbridge  32\nlarsson 32\nzodiac  32\nwreath  32\nalaïa   32\nnovica  32\nslipper 32\nplay    32\nfire    32\nhello   32\nhoops   32\njuniors 32\nleiber  32\nlingerie    32\nblock-heel  32\nrio 32\nclover  32\nkingdom 32\nblank   32\npr  32\nmartini 32\nbrilliant   32\nfootwear    32\nveau    32\nneiman  32\nbrightening 32\n1980s   32\nwillow  32\nchantecaille    32\ncredit  32\netched  32\nstylo   32\nmarcus  32\nright   32\nfenty   32\nedward  32\nanita   32\nself-tie    32\nmaya    32\nbrows   32\naerin   32\numbrella    32\nlurex   32\nbirman  32\nchest   32\nroad    32\nviva    32\nbirthday    32\ninspirational   32\nyoga    31\nanalog  31\nderby   31\nillusion    31\nstretch-crepe   31\nfalke   31\nheadpiece   31\nwavy    31\nhigh-low    31\nnorth   31\nmarchesa    31\nwatermelon  31\nbatman  31\nferragni    31\nstilettos   31\nbarbour 31\nliberty 31\nspirit  31\nmotif   31\njasmine 31\nae  31\niridescent  31\nresults 31\nhope    31\nwool-crepe  31\nmedusa  31\nsandy   31\nyoni    31\nstocking    31\nlasry   31\nromy    31\nnational    31\nmr  31\neiffel  31\ndatejust    31\nicing   31\nawesome 31\n75ml    31\nterre   31\nre/done 31\nglitz   31\npear    31\nway 31\nchestnut    31\npapier  31\nerin    31\nt-bar   31\nviparo  31\nray 31\nlara    31\nmorganite   31\nrocha   31\nhaider  31\npoison  31\nlariat  31\npush    31\nl'oreal 31\nbusiness    31\nutility 31\nsmoke   31\njones   31\nhigh-heel   31\ntrack   31\nmarl    31\nplease  31\ndesert  31\nlattori 31\nbean    31\na.p.c.  31\nbidermann   31\nvelours 31\naluminum    31\nthread  31\npremiere    31\ncrossbar    30\nmenswear    30\nconnection  30\nflawless    30\ne   30\nsigma   30\nhigh-heeled 30\npatrick 30\npolyester   30\ncollections 30\ngold/black  30\nstretch-knit    30\nace 30\nlet 30\n1990s   30\nfamous  30\nbordeaux    30\ncicihot 30\npetits  30\nmonroe  30\ndakota  30\nlookbook    30\nmodel   30\n1970s   30\nstain   30\ncheckered   30\npolly   30\nmandala 30\nracer   30\ncrosby  30\nveil    30\nposh    30\nmessage 30\nskeleton    30\ncowl    30\nlazy    30\nvalentines  30\nsutton  30\ncollege 30\nbermuda 30\nreiss   30\nflex    30\neast    30\ntees    30\ndiorific    30\nkaran   30\nword    30\ncarpet  30\nfujifilm    30\ncut-off 30\narms    30\nsparkly 30\nmila    30"
  },
  {
    "path": "extract_feature.sh",
    "content": "#!/bin/bash\nCHECKPOINT_DIR=\"model/model_final/model.ckpt-34865\"\n\npython polyvore/run_inference.py \\\n  --checkpoint_path=${CHECKPOINT_DIR} \\\n  --json_file=\"data/label/test_no_dup.json\" \\\n  --image_dir=\"data/images/\" \\\n  --feature_file=\"data/features/test_features.pkl\" \\\n  --rnn_type=\"lstm\"\n\n# # Extract features of Bi-LSTM without VSE\n# CHECKPOINT_DIR=\"model/model_final/model_bi_no_emb.ckpt\"\n# python polyvore/run_inference.py \\\n#   --checkpoint_path=${CHECKPOINT_DIR} \\\n#   --json_file=\"data/label/test_no_dup.json\" \\\n#   --image_dir=\"data/images/\" \\\n#   --feature_file=\"data/features/test_features_bi_no_emb.pkl\" \\\n#   --rnn_type=\"lstm\"\n\n\n# # Extract features of VSE model without LSTM\n# CHECKPOINT_DIR=\"model/model_final/model_emb.ckpt\"\n# python polyvore/run_inference_vse.py \\\n#   --checkpoint_path=${CHECKPOINT_DIR} \\\n#   --json_file=\"data/label/test_no_dup.json\" \\\n#   --image_dir=\"data/images/\" \\\n#   --feature_file=\"data/features/test_features_emb.pkl\" \\\n\n# # Extract features of Siamese Network\n# CHECKPOINT_DIR=\"model/model_final/model_siamese.ckpt\"\n\n# python polyvore/run_inference_siamese.py \\\n#   --checkpoint_path=${CHECKPOINT_DIR} \\\n#   --json_file=\"data/label/test_no_dup.json\" \\\n#   --image_dir=\"data/images/\" \\\n#   --feature_file=\"data/features/test_features_siamese.pkl\"\n"
  },
  {
    "path": "fill_in_blank.sh",
    "content": "#!/bin/bash\nCHECKPOINT_DIR=\"model/model_final/model.ckpt-34865\"\n\npython polyvore/fill_in_blank.py \\\n  --checkpoint_path=${CHECKPOINT_DIR} \\\n  --json_file=\"data/label/fill_in_blank_test.json\" \\\n  --feature_file=\"data/features/test_features.pkl\" \\\n  --rnn_type=\"lstm\" \\\n  --direction=\"2\" \\\n  --result_file=\"fill_in_blank_result.pkl\"\n\n# # Fill in the blank Siamese Network\n# CHECKPOINT_DIR=\"model/model_final/model_siamese.ckpt\"\n\n# python polyvore/fill_in_blank_siamese.py \\\n#   --checkpoint_path=${CHECKPOINT_DIR} \\\n#   --json_file=\"data/label/fill_in_blank_test.json\" \\\n#   --feature_file=\"data/features/test_features_siamese.pkl\" \\\n#   --result_file=\"fill_in_blank_siamese_result.pkl\"\n"
  },
  {
    "path": "outfit_generation.sh",
    "content": "#!/bin/bash\nCHECKPOINT_DIR=\"model/model_final/model.ckpt-34865\"\n\n# Run inference on images.\npython polyvore/set_generation.py \\\n  --checkpoint_path=${CHECKPOINT_DIR} \\\n  --image_dir=\"data/images/test_no_dup/\" \\\n  --feature_file=\"data/features/test_features.pkl\" \\\n  --query_file=\"query.json\" \\\n  --word_dict_file=\"data/final_word_dict.txt\" \\\n  --result_dir=\"results/\"\n  "
  },
  {
    "path": "polyvore/configuration.py",
    "content": "# Copyright 2017 Xintong Han. All Rights Reserved.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n# ==============================================================================\n\n\"\"\"Bi-LSTM Polyvore model and training configurations.\"\"\"\n\nfrom __future__ import absolute_import\nfrom __future__ import division\nfrom __future__ import print_function\n\n\nclass ModelConfig(object):\n  \"\"\"Wrapper class for model hyperparameters.\"\"\"\n\n  def __init__(self):\n    \"\"\"Sets the default model hyperparameters.\"\"\"\n    # File pattern of sharded TFRecord file containing SequenceExample protos.\n    # Must be provided in training and evaluation modes.\n    self.input_file_pattern = None\n\n    # Image format (\"jpeg\" or \"png\").\n    self.image_format = \"jpeg\"\n\n    # Approximate number of values per input shard. Used to ensure sufficient\n    # mixing between shards in training.\n    self.values_per_input_shard = 135\n    # Minimum number of shards to keep in the input queue.\n    self.input_queue_capacity_factor = 2\n    # Number of threads for prefetching SequenceExample protos.\n    self.num_input_reader_threads = 1\n  \n    # Name of the SequenceExample context feature containing set ids.\n    self.set_id_name = \"set_id\"\n    \n    # Name of the SequenceExample feature list containing captions and images.\n    self.image_feature_name = \"images\"\n    self.image_index_name = \"image_index\"\n    self.caption_feature_name = \"caption_ids\"\n\n    # Number of unique words in the vocab (plus 1, for <UNK>).\n    # The default value is larger than the expected actual vocab size to allow\n    # for differences between tokenizer versions used in preprocessing. There is\n    # no harm in using a value greater than the actual vocab size, but using a\n    # value less than the actual vocab size will result in an error.\n    self.vocab_size = 2757\n\n    # Number of threads for image preprocessing.\n    self.num_preprocess_threads = 1\n\n    # Batch size.\n    self.batch_size = 10\n    \n    # File containing an Inception v3 checkpoint to initialize the variables\n    # of the Inception model. Must be provided when starting training for the\n    # first time.\n    self.inception_checkpoint_file = None\n\n    # Dimensions of Inception v3 input images.\n    self.image_height = 299\n    self.image_width = 299\n\n    # Scale used to initialize model variables.\n    self.initializer_scale = 0.08\n\n    # LSTM input and output dimensionality, respectively. embedding_size is also\n    # the embedding size in the visual-semantic joint space.\n    self.embedding_size = 512 \n    self.num_lstm_units = 512 \n\n    # If < 1.0, the dropout keep probability applied to LSTM variables.\n    self.lstm_dropout_keep_prob = 0.7\n\n    # Largest number of images in a fashion set.\n    self.number_set_images = 8\n    \n    # Margin for the embedding loss.\n    self.emb_margin = 0.2\n\n    # Balance factor of all losses.\n    self.emb_loss_factor = 1.0 # VSE loss\n    self.f_rnn_loss_factor = 1.0  # Forward LSTM\n    self.b_rnn_loss_factor = 1.0  # Backward LSTM, might give it a lower weight\n    # because it is harder to predict backward than forward in our senario.\n    \n    # RNN type. \"lstm\", \"gru\", \"rnn\"\n    self.rnn_type = \"lstm\"\n\n\nclass TrainingConfig(object):\n  \"\"\"Wrapper class for training hyperparameters.\"\"\"\n\n  def __init__(self):\n    \"\"\"Sets the default training hyperparameters.\"\"\"\n    # Number of examples per epoch of training data.\n    self.num_examples_per_epoch = 17316\n\n    # Optimizer for training the model.\n    self.optimizer = \"SGD\"\n\n    # Learning rate for the initial phase of training.\n    # by the FLAGS in train.py\n    self.initial_learning_rate = 0.2\n    \n    self.learning_rate_decay_factor = 0.5\n    self.num_epochs_per_decay = 2.0\n\n    # If not None, clip gradients to this value.\n    self.clip_gradients = 5.0\n\n    # How many model checkpoints to keep.\n    self.max_checkpoints_to_keep = 10\n"
  },
  {
    "path": "polyvore/fashion_compatibility.py",
    "content": "# Copyright 2017 Xintong Han. All Rights Reserved.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n# ==============================================================================\n\"\"\"Predict the fashion compatibility of a given image sequence.\"\"\"\n\nfrom __future__ import absolute_import\nfrom __future__ import division\nfrom __future__ import print_function\n\nimport os\nimport json\n\nimport tensorflow as tf\nimport numpy as np\nimport pickle as pkl\nfrom sklearn import metrics\n\nimport configuration\nimport polyvore_model_bi as polyvore_model\n\nFLAGS = tf.flags.FLAGS\n\ntf.flags.DEFINE_string(\"checkpoint_path\", \"\",\n                       \"Model checkpoint file or directory containing a \"\n                       \"model checkpoint file.\")\ntf.flags.DEFINE_string(\"label_file\", \"\", \"Txt file containing test outfits.\")\ntf.flags.DEFINE_string(\"feature_file\", \"\", \"Files containing image features\")\ntf.flags.DEFINE_string(\"rnn_type\", \"\", \"Type of RNN.\")\ntf.flags.DEFINE_string(\"result_file\", \"\", \"File to store the results.\")\ntf.flags.DEFINE_integer(\"direction\", 2, \"2: bidirectional; 1: forward only;\"\n                        \"-1: backward only.\")\n\n\ndef run_compatibility_inference(sess, image_seqs, test_feat,\n                                num_lstm_units, model):\n  emb_seqs = test_feat[image_seqs,:]\n  num_images = float(len(image_seqs))\n  if FLAGS.rnn_type == \"lstm\":\n    zero_state = np.zeros([1, 2 * num_lstm_units])\n  else:\n    zero_state = np.zeros([1, num_lstm_units])\n  \n  f_score = 0\n  b_score = 0\n  if FLAGS.direction != -1:\n    # Forward RNN.\n    outputs = []\n    input_feed = np.reshape(emb_seqs[0], [1,-1])\n    # Run first step with all zeros initial state.\n    [lstm_state, lstm_output] = sess.run(\n          fetches=[\"lstm/f_state:0\",\"f_logits/f_logits/BiasAdd:0\"],\n          feed_dict={\"lstm/f_input_feed:0\":input_feed,\n                     \"lstm/f_state_feed:0\":zero_state})\n    outputs.append(lstm_output)\n\n    # Run remaining steps.\n    for step in range(int(num_images)-1):\n      input_feed = np.reshape(emb_seqs[step+1], [1,-1])\n      [lstm_state, lstm_output] = sess.run(\n                fetches=[\"lstm/f_state:0\",\"f_logits/f_logits/BiasAdd:0\"],\n                feed_dict={\"lstm/f_input_feed:0\":input_feed,\n                           \"lstm/f_state_feed:0\":lstm_state})\n      outputs.append(lstm_output)\n    \n    # Calculate the loss.\n    # Different from the training process where the loss is calculated in each\n    # mini batch, during testing, we get the loss againist the whole test set.\n    # This is pretty slow, maybe a better method could be used.\n    s = np.squeeze(np.dot(np.asarray(outputs), np.transpose(test_feat)))\n    f_score = sess.run(model.lstm_xent_loss,\n         feed_dict={\"lstm/pred_feed:0\":s,\n         \"lstm/next_index_feed:0\":image_seqs[1:] + [test_feat.shape[0]-1]})\n    \n    f_score = - np.mean(f_score)\n    \n  if FLAGS.direction != 1:\n    # Backward RNN.\n    outputs = []\n    input_feed = np.reshape(emb_seqs[-1], [1,-1])\n    [lstm_state, lstm_output] = sess.run(\n                fetches=[\"lstm/b_state:0\",\"b_logits/b_logits/BiasAdd:0\"],\n                feed_dict={\"lstm/b_input_feed:0\":input_feed,\n                           \"lstm/b_state_feed:0\":zero_state})\n    outputs.append(lstm_output)\n    for step in range(int(num_images)-1):\n      input_feed = np.reshape(emb_seqs[int(num_images)-2-step], [1,-1])\n      [lstm_state, lstm_output] = sess.run(\n                fetches=[\"lstm/b_state:0\",\"b_logits/b_logits/BiasAdd:0\"],\n                feed_dict={\"lstm/b_input_feed:0\":input_feed,\n                           \"lstm/b_state_feed:0\":lstm_state})\n      outputs.append(lstm_output)\n    \n    # Calculate the loss.\n    s = np.squeeze(np.dot(np.asarray(outputs), np.transpose(test_feat)))\n    b_score = sess.run(model.lstm_xent_loss,\n        feed_dict={\"lstm/pred_feed:0\":s,\n        \"lstm/next_index_feed:0\": image_seqs[-2::-1] + [test_feat.shape[0]-1]})\n    b_score = - np.mean(b_score)\n  return [f_score, b_score]\n\n\n  \ndef main(_):\n  # Build the inference graph.\n  g = tf.Graph()\n  with g.as_default():\n    model_config = configuration.ModelConfig()\n    model_config.rnn_type = FLAGS.rnn_type\n    model = polyvore_model.PolyvoreModel(model_config, mode=\"inference\")\n    model.build()\n    saver = tf.train.Saver()\n    \n    # Load pre-computed image features.\n    with open(FLAGS.feature_file, \"rb\") as f:\n      test_data = pkl.load(f)\n    test_ids = test_data.keys()\n    test_feat = np.zeros((len(test_ids) + 1,\n                    len(test_data[test_ids[0]][\"image_rnn_feat\"])))\n    # test_feat has one more zero vector as the representation of END of\n    # RNN prediction.\n    for i, test_id in enumerate(test_ids):\n      # Image feature in the RNN space.\n      test_feat[i] = test_data[test_id][\"image_rnn_feat\"]\n    \n    g.finalize()\n    with tf.Session() as sess:\n      saver.restore(sess, FLAGS.checkpoint_path)\n      all_f_scores = []\n      all_b_scores = []\n      all_scores = []\n      all_labels = []\n      testset = open(FLAGS.label_file).read().splitlines()\n      k = 0\n      for test_outfit in testset:\n        k += 1\n        if k % 100 == 0:\n          print(\"Finish %d outfits.\" % k)\n        image_seqs = []\n        for test_image in test_outfit.split()[1:]:\n          image_seqs.append(test_ids.index(test_image))\n          \n        [f_score, b_score] = run_compatibility_inference(sess, image_seqs,\n                      test_feat, model_config.num_lstm_units, model)\n        \n        all_f_scores.append(f_score)\n        all_b_scores.append(b_score)\n        all_scores.append(f_score + b_score)\n        all_labels.append(int(test_outfit[0]))\n        \n      # calculate AUC and AP      \n      fpr, tpr, thresholds = metrics.roc_curve(all_labels,\n                                               all_scores,\n                                               pos_label=1)\n      print(\"Compatibility AUC: %f for %d outfits\" %\n              (metrics.auc(fpr, tpr), len(all_labels)))\n\n      with open(FLAGS.result_file, \"wb\") as f:\n        pkl.dump({\"all_labels\": all_labels, \"all_f_scores\": all_f_scores,\n                  \"all_b_scores\": all_b_scores}, f)\n\n      \nif __name__ == \"__main__\":\n  tf.app.run()\n"
  },
  {
    "path": "polyvore/fill_in_blank.py",
    "content": "# Copyright 2017 Xintong Han. All Rights Reserved.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n# ==============================================================================\n\n\"\"\"Fill in blank evaluation.\"\"\"\n\nfrom __future__ import absolute_import\nfrom __future__ import division\nfrom __future__ import print_function\n\nimport json\n\nimport tensorflow as tf\nimport numpy as np\nimport pickle as pkl\n\nimport configuration\nimport polyvore_model_bi as polyvore_model\n\nFLAGS = tf.flags.FLAGS\n\ntf.flags.DEFINE_string(\"checkpoint_path\", \"\",\n                       \"Model checkpoint file or directory containing a \"\n                       \"model checkpoint file.\")\ntf.flags.DEFINE_string(\"json_file\", \"\",\n                       \"Json file containing questions and answers.\")\ntf.flags.DEFINE_string(\"feature_file\", \"\", \"pkl files containing the features\")\ntf.flags.DEFINE_string(\"rnn_type\", \"lstm\", \"Type of RNN.\")\ntf.flags.DEFINE_string(\"result_file\", \"\", \"File to store the results.\")\ntf.flags.DEFINE_integer(\"direction\", 2, \"2: bidirectional; 1: forward only;\"\n                        \"-1: backward only; 0: Average pooling no RNN.\")\n\ndef run_question_inference(sess, question, test_ids, test_feat,\n                           test_rnn_feat, num_lstm_units):\n  question_ids = []\n  answer_ids = []\n  for q in question[\"question\"]:\n    try:\n      question_ids.append(test_ids.index(q))\n    except:\n      return [], []\n  \n  for a in question[\"answers\"]:\n    try:\n      answer_ids.append(test_ids.index(a))\n    except:\n      return [], []\n      \n  blank_posi = question[\"blank_position\"]\n  \n  # Average pooling of the VSE embeddings\n  question_emb = np.reshape(np.mean(test_feat[question_ids], 0), [1,-1])\n  q_emb = question_emb / np.linalg.norm(question_emb, axis=1)[:, np.newaxis]\n  a_emb = (test_feat[answer_ids] /\n             np.linalg.norm(test_feat[answer_ids], axis=1)[:, np.newaxis])\n  vse_score = (np.dot(q_emb, np.transpose(a_emb)) + 1) / 2 # scale to [0,1]\n  vse_score = vse_score #/ np.sum(vse_score) # normalize to sum to 1.\n  \n  if FLAGS.direction == 0:\n    # Only use VSE\n    predicted_answer = np.argsort(-vse_score)[0]\n    return vse_score, predicted_answer\n    \n  if FLAGS.rnn_type == \"lstm\":\n    # LSTM has two states.\n    zero_state = np.zeros([1, 2 * num_lstm_units])\n  else:\n    zero_state = np.zeros([1, num_lstm_units])\n  \n  # Blank is the last item.\n  if blank_posi == len(question_ids) + 1:\n    if FLAGS.direction == -1:\n      return [], []\n    # Only do forward rnn\n    input_feed = np.reshape(test_rnn_feat[question_ids[0]], [1,-1])\n    # Run first step with all zeros initial state.\n    [lstm_state, lstm_output] = sess.run(\n          fetches=[\"lstm/f_state:0\",\"f_logits/f_logits/BiasAdd:0\"],\n          feed_dict={\"lstm/f_input_feed:0\":input_feed,\n                     \"lstm/f_state_feed:0\":zero_state})\n    \n    for step in range(len(question_ids)-1):\n      input_feed = np.reshape(test_rnn_feat[question_ids[step + 1]], [1,-1])\n      [lstm_state, lstm_output] = sess.run(\n          fetches=[\"lstm/f_state:0\",\"f_logits/f_logits/BiasAdd:0\"],\n          feed_dict={\"lstm/f_input_feed:0\":input_feed,\n                     \"lstm/f_state_feed:0\":lstm_state})\n                                           \n    # Search in answers\n    rnn_score = np.exp(np.dot(lstm_output,\n                              np.transpose(test_rnn_feat[answer_ids])))\n    rnn_score = rnn_score / np.sum(rnn_score)\n      \n  # Blank is the frist item\n  elif blank_posi == 1:\n    if FLAGS.direction == 1:\n      return [], []\n    # only do backward rnn\n    input_feed = np.reshape(test_rnn_feat[question_ids[-1]], [1,-1])\n    # Run first step with all zeros initial state.\n    [lstm_state, lstm_output] = sess.run(\n              fetches=[\"lstm/b_state:0\",\"b_logits/b_logits/BiasAdd:0\"],\n              feed_dict={\"lstm/b_input_feed:0\":input_feed,\n                         \"lstm/b_state_feed:0\":zero_state})\n    \n    for step in range(len(question_ids)-1):\n      input_feed = np.reshape(test_rnn_feat[question_ids[-step-2]], [1,-1])\n      [lstm_state, lstm_output] = sess.run(\n                fetches=[\"lstm/b_state:0\",\"b_logits/b_logits/BiasAdd:0\"],\n                feed_dict={\"lstm/b_input_feed:0\":input_feed,\n                           \"lstm/b_state_feed:0\":lstm_state})\n    rnn_score = np.exp(np.dot(lstm_output,\n                              np.transpose(test_rnn_feat[answer_ids])))\n    rnn_score = rnn_score / np.sum(rnn_score)\n  \n  # Blank is in the middle.\n  else:\n    # Do bidirectional rnn.\n    # Forward:\n    input_feed = np.reshape(test_rnn_feat[question_ids[0]], [1,-1])\n    # Run first step with all zeros initial state.\n    [lstm_state, lstm_output] = sess.run(\n              fetches=[\"lstm/f_state:0\",\"f_logits/f_logits/BiasAdd:0\"],\n              feed_dict={\"lstm/f_input_feed:0\":input_feed,\n                         \"lstm/f_state_feed:0\":zero_state})\n    \n    for step in range(blank_posi - 2):\n      input_feed = np.reshape(test_rnn_feat[question_ids[step+1]], [1,-1])\n      [lstm_state, lstm_output] = sess.run(\n              fetches=[\"lstm/f_state:0\",\"f_logits/f_logits/BiasAdd:0\"],\n              feed_dict={\"lstm/f_input_feed:0\":input_feed,\n                         \"lstm/f_state_feed:0\":lstm_state})\n                                           \n    # Search in answers.\n    f_softmax = np.exp(np.dot(lstm_output,\n                              np.transpose(test_rnn_feat[answer_ids])))\n    # Backward:\n    input_feed = np.reshape(test_rnn_feat[question_ids[-1]], [1,-1])\n    # Run first step with all zeros initial state.\n    [lstm_state, lstm_output] = sess.run(\n            fetches=[\"lstm/b_state:0\",\"b_logits/b_logits/BiasAdd:0\"],\n            feed_dict={\"lstm/b_input_feed:0\":input_feed,\n                       \"lstm/b_state_feed:0\":zero_state})\n    \n    for step in range(len(question_ids)-blank_posi):\n      input_feed = np.reshape(test_rnn_feat[question_ids[-step-2]], [1,-1])\n      [lstm_state, lstm_output] = sess.run(\n                fetches=[\"lstm/b_state:0\",\"b_logits/b_logits/BiasAdd:0\"],\n                feed_dict={\"lstm/b_input_feed:0\":input_feed,\n                           \"lstm/b_state_feed:0\":lstm_state})\n                                          \n    b_softmax = np.exp(np.dot(lstm_output,\n                              np.transpose(test_rnn_feat[answer_ids])))\n    if FLAGS.direction == 2:\n      rnn_score = (f_softmax / np.sum(f_softmax) +\n                   b_softmax / np.sum(b_softmax))\n      rnn_score /= 2\n    elif FLAGS.direction == 1:\n      rnn_score = f_softmax / np.sum(f_softmax)\n    else:\n      rnn_score = b_softmax / np.sum(b_softmax)\n\n  predicted_answer = np.argsort(-rnn_score)[0]\n  return rnn_score, predicted_answer\n  \n\n  \ndef main(_):\n  # Build the inference graph.\n  top_k = 4 # Print the top_k accuracy.\n  true_pred = np.zeros(top_k)\n  # Load pre-computed image features.\n  with open(FLAGS.feature_file, \"rb\") as f:\n    test_data = pkl.load(f)\n  test_ids = test_data.keys()\n  test_feat = np.zeros((len(test_ids),\n                        len(test_data[test_ids[0]][\"image_feat\"])))\n  test_rnn_feat = np.zeros((len(test_ids),\n                            len(test_data[test_ids[0]][\"image_rnn_feat\"])))\n  for i, test_id in enumerate(test_ids):\n    # Image feature in visual-semantic embedding space.\n    test_feat[i] = test_data[test_id][\"image_feat\"]\n    # Image feature in the RNN space.\n    test_rnn_feat[i] = test_data[test_id][\"image_rnn_feat\"]\n\n  g = tf.Graph()\n  with g.as_default():\n    model_config = configuration.ModelConfig()\n    model_config.rnn_type = FLAGS.rnn_type\n    model = polyvore_model.PolyvoreModel(model_config, mode=\"inference\")\n    model.build()\n    saver = tf.train.Saver()\n    \n    g.finalize()\n    with tf.Session() as sess:\n      saver.restore(sess, FLAGS.checkpoint_path)\n      questions = json.load(open(FLAGS.json_file))\n      \n      all_pred = []\n      set_ids = []\n      all_scores = []\n      for question in questions:\n        score, pred = run_question_inference(sess, question, test_ids,\n                                             test_feat, test_rnn_feat,\n                                             model_config.num_lstm_units)\n        if pred != []:\n          all_pred.append(pred)\n          all_scores.append(score)\n          set_ids.append(question[\"question\"][0].split(\"_\")[0])\n          # 0 is the correct answer, iterate over top_k.\n          for i in range(top_k):\n            if 0 in pred[:i+1]:\n              true_pred[i] += 1\n\n      # Print all top-k accuracy.\n      for i in range(top_k):\n        print(\"Top %d Accuracy: \" % (i + 1))\n        print(\"%d correct answers in %d valid questions.\" %\n                  (true_pred[i], len(all_pred)))\n        print(\"Accuracy: %f\" % (true_pred[i] / len(all_pred)))\n        \n      s = np.empty((len(all_scores),), dtype=np.object)\n      for i in range(len(all_scores)):\n          s[i] = all_scores[i]\n\n      with open(FLAGS.result_file, \"wb\") as f:\n        pkl.dump({\"set_ids\": set_ids, \"pred\": all_pred, \"score\": s}, f)\n\nif __name__ == \"__main__\":\n  tf.app.run()\n"
  },
  {
    "path": "polyvore/fill_in_blank_siamese.py",
    "content": "# Copyright 2017 Xintong Han. All Rights Reserved.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n# ==============================================================================\n\n\"\"\"Fill in blank evaluation.\"\"\"\n\nfrom __future__ import absolute_import\nfrom __future__ import division\nfrom __future__ import print_function\n\nimport json\n\nimport tensorflow as tf\nimport numpy as np\nimport pickle as pkl\n\nimport configuration\nimport polyvore_model_siamese as polyvore_model\n\nFLAGS = tf.flags.FLAGS\n\ntf.flags.DEFINE_string(\"checkpoint_path\", \"\",\n                       \"Model checkpoint file or directory containing a \"\n                       \"model checkpoint file.\")\ntf.flags.DEFINE_string(\"json_file\", \"\",\n                       \"Json file containing questions and answers.\")\ntf.flags.DEFINE_string(\"feature_file\", \"\", \"pkl files containing the features\")\ntf.flags.DEFINE_string(\"result_file\", \"\", \"File to store the results.\")\n\ndef run_question_inference(sess, question, test_ids, test_feat):\n  question_ids = []\n  answer_ids = []\n  for q in question[\"question\"]:\n    try:\n      question_ids.append(test_ids.index(q))\n    except:\n      return [], []\n  \n  for a in question[\"answers\"]:\n    try:\n      answer_ids.append(test_ids.index(a))\n    except:\n      return [], []\n      \n  blank_posi = question[\"blank_position\"]\n  \n  # Average pooling of the VSE embeddings\n  question_emb = np.reshape(np.mean(test_feat[question_ids], 0), [1,-1])\n  q_emb = question_emb / np.linalg.norm(question_emb, axis=1)[:, np.newaxis]\n  a_emb = (test_feat[answer_ids] /\n             np.linalg.norm(test_feat[answer_ids], axis=1)[:, np.newaxis])\n  score = (np.dot(q_emb, np.transpose(a_emb)) + 1) / 2 # scale to [0,1]\n  \n  predicted_answer = np.argsort(-score)[0]\n  return score, predicted_answer\n  \n\n  \ndef main(_):\n  # Build the inference graph.\n  top_k = 4 # Print the top_k accuracy.\n  true_pred = np.zeros(top_k)\n  # Load pre-computed image features.\n  with open(FLAGS.feature_file, \"rb\") as f:\n    test_data = pkl.load(f)\n  test_ids = test_data.keys()\n  test_feat = np.zeros((len(test_ids),\n                        len(test_data[test_ids[0]][\"image_feat\"])))\n  for i, test_id in enumerate(test_ids):\n    # Image feature in visual-semantic embedding space.\n    test_feat[i] = test_data[test_id][\"image_feat\"]\n\n  g = tf.Graph()\n  with g.as_default():\n    model_config = configuration.ModelConfig()\n    model = polyvore_model.PolyvoreModel(model_config, mode=\"inference\")\n    model.build()\n    saver = tf.train.Saver()\n    \n    g.finalize()\n    with tf.Session() as sess:\n      saver.restore(sess, FLAGS.checkpoint_path)\n      questions = json.load(open(FLAGS.json_file))\n      \n      all_pred = []\n      set_ids = []\n      all_scores = []\n      for question in questions:\n        score, pred = run_question_inference(sess, question, test_ids,\n                                             test_feat)\n        if pred != []:\n          all_pred.append(pred)\n          all_scores.append(score)\n          set_ids.append(question[\"question\"][0].split(\"_\")[0])\n          # 0 is the correct answer, iterate over top_k.\n          for i in range(top_k):\n            if 0 in pred[:i+1]:\n              true_pred[i] += 1\n\n      # Print all top-k accuracy.\n      for i in range(top_k):\n        print(\"Top %d Accuracy: \" % (i + 1))\n        print(\"%d correct answers in %d valid questions.\" %\n                  (true_pred[i], len(all_pred)))\n        print(\"Accuracy: %f\" % (true_pred[i] / len(all_pred)))\n        \n      s = np.empty((len(all_scores),), dtype=np.object)\n      for i in range(len(all_scores)):\n          s[i] = all_scores[i]\n\n      with open(FLAGS.result_file, \"wb\") as f:\n        pkl.dump({\"set_ids\": set_ids, \"pred\": all_pred, \"score\": s}, f)\n\nif __name__ == \"__main__\":\n  tf.app.run()\n"
  },
  {
    "path": "polyvore/ops/__init__.py",
    "content": ""
  },
  {
    "path": "polyvore/ops/image_embedding.py",
    "content": "# Copyright 2016 The TensorFlow Authors. All Rights Reserved.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n# ==============================================================================\n\n\"\"\"Image embedding ops.\"\"\"\n\nfrom __future__ import absolute_import\nfrom __future__ import division\nfrom __future__ import print_function\n\n\nimport tensorflow as tf\n\nfrom tensorflow.contrib.slim.python.slim.nets.inception_v3 import inception_v3_base\n\nslim = tf.contrib.slim\n\n\ndef inception_v3(images,\n                 trainable=True,\n                 is_training=True,\n                 weight_decay=0.00004,\n                 stddev=0.1,\n                 dropout_keep_prob=0.8,\n                 use_batch_norm=True,\n                 batch_norm_params=None,\n                 add_summaries=True,\n                 scope=\"InceptionV3\"):\n  \"\"\"Builds an Inception V3 subgraph for image embeddings.\n\n  Args:\n    images: A float32 Tensor of shape [batch, height, width, channels].\n    trainable: Whether the inception submodel should be trainable or not.\n    is_training: Boolean indicating training mode or not.\n    weight_decay: Coefficient for weight regularization.\n    stddev: The standard deviation of the trunctated normal weight initializer.\n    dropout_keep_prob: Dropout keep probability.\n    use_batch_norm: Whether to use batch normalization.\n    batch_norm_params: Parameters for batch normalization. See\n      tf.contrib.layers.batch_norm for details.\n    add_summaries: Whether to add activation summaries.\n    scope: Optional Variable scope.\n\n  Returns:\n    end_points: A dictionary of activations from inception_v3 layers.\n  \"\"\"\n  # Only consider the inception model to be in training mode if it's trainable.\n  is_inception_model_training = trainable and is_training\n\n  if use_batch_norm:\n    # Default parameters for batch normalization.\n    if not batch_norm_params:\n      batch_norm_params = {\n          \"is_training\": is_inception_model_training,\n          \"trainable\": trainable,\n          # Decay for the moving averages.\n          \"decay\": 0.9997,\n          # Epsilon to prevent 0s in variance.\n          \"epsilon\": 0.001,\n          # Collection containing the moving mean and moving variance.\n          \"variables_collections\": {\n              \"beta\": None,\n              \"gamma\": None,\n              \"moving_mean\": [\"moving_vars\"],\n              \"moving_variance\": [\"moving_vars\"],\n          }\n      }\n  else:\n    batch_norm_params = None\n\n  if trainable:\n    weights_regularizer = tf.contrib.layers.l2_regularizer(weight_decay)\n  else:\n    weights_regularizer = None\n\n  with tf.variable_scope(scope, \"InceptionV3\", [images]) as scope:\n    with slim.arg_scope(\n        [slim.conv2d, slim.fully_connected],\n        weights_regularizer=weights_regularizer,\n        trainable=trainable):\n      with slim.arg_scope(\n          [slim.conv2d],\n          weights_initializer=tf.truncated_normal_initializer(stddev=stddev),\n          activation_fn=tf.nn.relu,\n          normalizer_fn=slim.batch_norm,\n          normalizer_params=batch_norm_params):\n        net, end_points = inception_v3_base(images, scope=scope)\n        with tf.variable_scope(\"logits\"):\n          shape = net.get_shape()\n          net = slim.avg_pool2d(net, shape[1:3], padding=\"VALID\", scope=\"pool\")\n          net = slim.dropout(\n              net,\n              keep_prob=dropout_keep_prob,\n              is_training=is_inception_model_training,\n              scope=\"dropout\")\n          net = slim.flatten(net, scope=\"flatten\")\n\n  # Add summaries.\n  if add_summaries:\n    for v in end_points.values():\n      tf.contrib.layers.summaries.summarize_activation(v)\n\n  return net\n"
  },
  {
    "path": "polyvore/ops/image_embedding_test.py",
    "content": "# Copyright 2016 The TensorFlow Authors. All Rights Reserved.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n# ==============================================================================\n\n\"\"\"Tests for tensorflow_models.im2txt.ops.image_embedding.\"\"\"\n\nfrom __future__ import absolute_import\nfrom __future__ import division\nfrom __future__ import print_function\n\n\nimport tensorflow as tf\n\nfrom polyvore.ops import image_embedding\n\n\nclass InceptionV3Test(tf.test.TestCase):\n\n  def setUp(self):\n    super(InceptionV3Test, self).setUp()\n\n    batch_size = 4\n    height = 299\n    width = 299\n    num_channels = 3\n    self._images = tf.placeholder(tf.float32,\n                                  [batch_size, height, width, num_channels])\n    self._batch_size = batch_size\n\n  def _countInceptionParameters(self):\n    \"\"\"Counts the number of parameters in the inception model at top scope.\"\"\"\n    counter = {}\n    for v in tf.all_variables():\n      name_tokens = v.op.name.split(\"/\")\n      if name_tokens[0] == \"InceptionV3\":\n        name = \"InceptionV3/\" + name_tokens[1]\n        num_params = v.get_shape().num_elements()\n        assert num_params\n        counter[name] = counter.get(name, 0) + num_params\n    return counter\n\n  def _verifyParameterCounts(self):\n    \"\"\"Verifies the number of parameters in the inception model.\"\"\"\n    param_counts = self._countInceptionParameters()\n    expected_param_counts = {\n        \"InceptionV3/Conv2d_1a_3x3\": 960,\n        \"InceptionV3/Conv2d_2a_3x3\": 9312,\n        \"InceptionV3/Conv2d_2b_3x3\": 18624,\n        \"InceptionV3/Conv2d_3b_1x1\": 5360,\n        \"InceptionV3/Conv2d_4a_3x3\": 138816,\n        \"InceptionV3/Mixed_5b\": 256368,\n        \"InceptionV3/Mixed_5c\": 277968,\n        \"InceptionV3/Mixed_5d\": 285648,\n        \"InceptionV3/Mixed_6a\": 1153920,\n        \"InceptionV3/Mixed_6b\": 1298944,\n        \"InceptionV3/Mixed_6c\": 1692736,\n        \"InceptionV3/Mixed_6d\": 1692736,\n        \"InceptionV3/Mixed_6e\": 2143872,\n        \"InceptionV3/Mixed_7a\": 1699584,\n        \"InceptionV3/Mixed_7b\": 5047872,\n        \"InceptionV3/Mixed_7c\": 6080064,\n    }\n    self.assertDictEqual(expected_param_counts, param_counts)\n\n  def _assertCollectionSize(self, expected_size, collection):\n    actual_size = len(tf.get_collection(collection))\n    if expected_size != actual_size:\n      self.fail(\"Found %d items in collection %s (expected %d).\" %\n                (actual_size, collection, expected_size))\n\n  def testTrainableTrueIsTrainingTrue(self):\n    embeddings = image_embedding.inception_v3(\n        self._images, trainable=True, is_training=True)\n    self.assertEqual([self._batch_size, 2048], embeddings.get_shape().as_list())\n\n    self._verifyParameterCounts()\n    self._assertCollectionSize(376, tf.GraphKeys.VARIABLES)\n    self._assertCollectionSize(188, tf.GraphKeys.TRAINABLE_VARIABLES)\n    self._assertCollectionSize(188, tf.GraphKeys.UPDATE_OPS)\n    self._assertCollectionSize(94, tf.GraphKeys.REGULARIZATION_LOSSES)\n    self._assertCollectionSize(0, tf.GraphKeys.LOSSES)\n    self._assertCollectionSize(23, tf.GraphKeys.SUMMARIES)\n\n  def testTrainableTrueIsTrainingFalse(self):\n    embeddings = image_embedding.inception_v3(\n        self._images, trainable=True, is_training=False)\n    self.assertEqual([self._batch_size, 2048], embeddings.get_shape().as_list())\n\n    self._verifyParameterCounts()\n    self._assertCollectionSize(376, tf.GraphKeys.VARIABLES)\n    self._assertCollectionSize(188, tf.GraphKeys.TRAINABLE_VARIABLES)\n    self._assertCollectionSize(0, tf.GraphKeys.UPDATE_OPS)\n    self._assertCollectionSize(94, tf.GraphKeys.REGULARIZATION_LOSSES)\n    self._assertCollectionSize(0, tf.GraphKeys.LOSSES)\n    self._assertCollectionSize(23, tf.GraphKeys.SUMMARIES)\n\n  def testTrainableFalseIsTrainingTrue(self):\n    embeddings = image_embedding.inception_v3(\n        self._images, trainable=False, is_training=True)\n    self.assertEqual([self._batch_size, 2048], embeddings.get_shape().as_list())\n\n    self._verifyParameterCounts()\n    self._assertCollectionSize(376, tf.GraphKeys.VARIABLES)\n    self._assertCollectionSize(0, tf.GraphKeys.TRAINABLE_VARIABLES)\n    self._assertCollectionSize(0, tf.GraphKeys.UPDATE_OPS)\n    self._assertCollectionSize(0, tf.GraphKeys.REGULARIZATION_LOSSES)\n    self._assertCollectionSize(0, tf.GraphKeys.LOSSES)\n    self._assertCollectionSize(23, tf.GraphKeys.SUMMARIES)\n\n  def testTrainableFalseIsTrainingFalse(self):\n    embeddings = image_embedding.inception_v3(\n        self._images, trainable=False, is_training=False)\n    self.assertEqual([self._batch_size, 2048], embeddings.get_shape().as_list())\n\n    self._verifyParameterCounts()\n    self._assertCollectionSize(376, tf.GraphKeys.VARIABLES)\n    self._assertCollectionSize(0, tf.GraphKeys.TRAINABLE_VARIABLES)\n    self._assertCollectionSize(0, tf.GraphKeys.UPDATE_OPS)\n    self._assertCollectionSize(0, tf.GraphKeys.REGULARIZATION_LOSSES)\n    self._assertCollectionSize(0, tf.GraphKeys.LOSSES)\n    self._assertCollectionSize(23, tf.GraphKeys.SUMMARIES)\n\n\nif __name__ == \"__main__\":\n  tf.test.main()\n"
  },
  {
    "path": "polyvore/ops/image_processing.py",
    "content": "# Copyright 2017 Xintong Han. All Rights Reserved.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n# ==============================================================================\n\n\"\"\"Helper functions for image preprocessing.\"\"\"\n\nfrom __future__ import absolute_import\nfrom __future__ import division\nfrom __future__ import print_function\n\n\nimport tensorflow as tf\n\n\ndef distort_image(image):\n  \"\"\"Perform random distortions on an image.\n\n  Args:\n    image: A float32 Tensor of shape [height, width, 3] with values in [0, 1).\n    \n  Returns:\n    distorted_image: A float32 Tensor of shape [height, width, 3] with values in\n      [0, 1].\n  \"\"\"\n  # Randomly flip horizontally. No color distortion.\n  with tf.name_scope(\"flip_horizontal\", values=[image]):\n    image = tf.image.random_flip_left_right(image)\n\n  return image\n\n\ndef process_image(encoded_image,\n                  is_training,\n                  height,\n                  width,\n                  resize_height=299,\n                  resize_width=299,\n                  image_format=\"jpeg\",\n                  image_idx=0):\n  \"\"\"Decode an image, resize and apply random distortions.\n\n  Args:\n    encoded_image: String Tensor containing the image.\n    is_training: Boolean; whether preprocessing for training or eval.\n    height: Height of the output image.\n    width: Width of the output image.\n    resize_height: If > 0, resize height before crop to final dimensions.\n    resize_width: If > 0, resize width before crop to final dimensions.\n    image_format: \"jpeg\" or \"png\".\n    image_idx: image index of the image in an outfit.\n  Returns:\n    A float32 Tensor of shape [height, width, 3] with values in [-1, 1].\n\n  Raises:\n    ValueError: If image_format is invalid.\n  \"\"\"\n  # Helper function to log an image summary to the visualizer. Summaries are\n  # only logged in thread 0.\n  def image_summary(name, image):\n    tf.image_summary(name, tf.expand_dims(image, 0))\n\n  # Decode image into a float32 Tensor of shape [?, ?, 3] with values in [0, 1).\n  with tf.name_scope(\"decode\", values=[encoded_image]):\n    if image_format == \"jpeg\":\n      image = tf.image.decode_jpeg(encoded_image, channels=3)\n    elif image_format == \"png\":\n      image = tf.image.decode_png(encoded_image, channels=3)\n    else:\n      raise ValueError(\"Invalid image format: %s\" % image_format)\n  image = tf.image.convert_image_dtype(image, dtype=tf.float32)\n  image_summary(\"original_image/\" + str(image_idx), image)\n\n  # Resize image.\n  assert (resize_height > 0) == (resize_width > 0)\n  if resize_height:\n    image = tf.image.resize_images(image,\n                                   size=[resize_height, resize_width],\n                                   method=tf.image.ResizeMethod.BILINEAR)\n\n  # Crop to final dimensions. In the Polyvore model, no cropping is used\n  # since we set height=resize_height and width=resize_width\n  if is_training:\n    image = tf.random_crop(image, [height, width, 3])\n  else:\n    image = tf.image.resize_image_with_crop_or_pad(image, height, width)\n\n  image_summary(\"resized_image/\" + str(image_idx), image)\n\n  # Randomly distort the image.\n  if is_training:\n    image = distort_image(image)\n\n  image_summary(\"final_image/\" + str(image_idx), image)\n\n  # Rescale to [-1,1] instead of [0, 1]\n  image = tf.sub(image, 0.5)\n  image = tf.mul(image, 2.0)\n  return image\n"
  },
  {
    "path": "polyvore/ops/inputs.py",
    "content": "# Copyright 2017 Xintong Han. All Rights Reserved.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n# ==============================================================================\n\n\"\"\"Input ops.\"\"\"\n\nfrom __future__ import absolute_import\nfrom __future__ import division\nfrom __future__ import print_function\n\n\nimport tensorflow as tf\n\n\ndef parse_sequence_example(serialized, set_id, image_feature,\n                           image_index, caption_feature, number_set_images):\n  \"\"\"Parses a tensorflow.SequenceExample into a set of images and caption.\n\n  Args:\n    serialized: A scalar string Tensor; a single serialized SequenceExample.\n    set_id: Name of SequenceExample context feature containing the id of\n      the outfit.\n    image_feature: Name of SequenceExample context feature containing image\n      data.\n    image_index: Name of SequenceExample feature list containing the index of\n      the item in the outfit.\n    caption_feature: Name of SequenceExample feature list containing integer\n      captions.\n    number_set_images: Number of images in an outfit.\n  Returns:\n    set_id: Set id of the outfit.\n    encoded_images: A string Tensor containing all JPEG encoded images\n      in the outfit.\n    image_ids: Image ids of the items in the outfit.\n    captions: A 2-D uint64 Tensor with dynamically specified length.\n    likes: Number of likes of the outfit. Hard coded name,\n      not used in our model.\n  \"\"\"\n  \n  context_features = {}\n  context_features[set_id] = tf.FixedLenFeature([], dtype=tf.string)\n  context_features['likes'] = tf.FixedLenFeature([], dtype=tf.int64,\n                                                 default_value=0)\n  for i in range(number_set_images):\n    context_features[image_feature + '/' + str(i)] = tf.FixedLenFeature([],\n                                                         dtype=tf.string,\n                                                         default_value = '')\n            \n  context, sequence = tf.parse_single_sequence_example(\n      serialized,\n      context_features=context_features,\n      sequence_features={\n          image_index: tf.FixedLenSequenceFeature([], dtype=tf.int64),\n          caption_feature: tf.VarLenFeature(dtype=tf.int64),\n      })\n      \n  set_id = context[set_id]\n  likes = context['likes']\n  \n  encoded_images = []\n  for i in range(number_set_images):\n    encoded_images.append(context[image_feature + '/' + str(i)])\n  \n  captions = sequence[caption_feature]\n  captions = tf.sparse_tensor_to_dense(captions)\n  image_ids = sequence[image_index]\n  \n  return set_id, encoded_images, image_ids, captions, likes\n\n\ndef prefetch_input_data(reader,\n                        file_pattern,\n                        is_training,\n                        batch_size,\n                        values_per_shard,\n                        input_queue_capacity_factor=16,\n                        num_reader_threads=1,\n                        shard_queue_name=\"filename_queue\",\n                        value_queue_name=\"input_queue\"):\n  \"\"\"Prefetches string values from disk into an input queue.\n\n  In training the capacity of the queue is important because a larger queue\n  means better mixing of training examples between shards. The minimum number of\n  values kept in the queue is values_per_shard * input_queue_capacity_factor,\n  where input_queue_memory factor should be chosen to trade-off better mixing\n  with memory usage.\n\n  Args:\n    reader: Instance of tf.ReaderBase.\n    file_pattern: Comma-separated list of file patterns (e.g.\n        /tmp/train_data-?????-of-00100).\n    is_training: Boolean; whether prefetching for training or eval.\n    batch_size: Model batch size used to determine queue capacity.\n    values_per_shard: Approximate number of values per shard.\n    input_queue_capacity_factor: Minimum number of values to keep in the queue\n      in multiples of values_per_shard. See comments above.\n    num_reader_threads: Number of reader threads to fill the queue.\n    shard_queue_name: Name for the shards filename queue.\n    value_queue_name: Name for the values input queue.\n\n  Returns:\n    A Queue containing prefetched string values.\n  \"\"\"\n  data_files = []\n  for pattern in file_pattern.split(\",\"):\n    data_files.extend(tf.gfile.Glob(pattern))\n  if not data_files:\n    tf.logging.fatal(\"Found no input files matching %s\", file_pattern)\n  else:\n    tf.logging.info(\"Prefetching values from %d files matching %s\",\n                    len(data_files), file_pattern)\n\n  if is_training:\n    filename_queue = tf.train.string_input_producer(\n        data_files, shuffle=True, capacity=16, name=shard_queue_name)\n    min_queue_examples = values_per_shard * input_queue_capacity_factor\n    capacity = min_queue_examples + 100 * batch_size\n    values_queue = tf.RandomShuffleQueue(\n        capacity=capacity,\n        min_after_dequeue=min_queue_examples,\n        dtypes=[tf.string],\n        name=\"random_\" + value_queue_name)\n  else:\n    filename_queue = tf.train.string_input_producer(\n        data_files, shuffle=False, capacity=1, name=shard_queue_name)\n    capacity = values_per_shard + 3 * batch_size\n    values_queue = tf.FIFOQueue(\n        capacity=capacity, dtypes=[tf.string], name=\"fifo_\" + value_queue_name)\n\n  enqueue_ops = []\n  for _ in range(num_reader_threads):\n    _, value = reader.read(filename_queue)\n    enqueue_ops.append(values_queue.enqueue([value]))\n  tf.train.queue_runner.add_queue_runner(tf.train.queue_runner.QueueRunner(\n      values_queue, enqueue_ops))\n  tf.scalar_summary(\n      \"queue/%s/fraction_of_%d_full\" % (values_queue.name, capacity),\n      tf.cast(values_queue.size(), tf.float32) * (1. / capacity))\n\n  return values_queue\n\n\ndef batch_with_dynamic_pad(images_and_captions,\n                           batch_size,\n                           queue_capacity,\n                           add_summaries=True):\n  \"\"\"Batches input images and captions.\n\n  This function splits the caption into an input sequence and a target sequence,\n  where the target sequence is the input sequence right-shifted by 1. Input and\n  target sequences are batched and padded up to the maximum length of sequences\n  in the batch. A mask is created to distinguish real words from padding words.\n  Similar sequence processing is used for images in an outfit.\n  Example:\n    Actual captions in the batch ('-' denotes padded character):\n      [\n        [ 1 2 5 4 5 ],\n        [ 1 2 3 4 - ],\n        [ 1 2 3 - - ],\n      ]\n\n    input_seqs:\n      [\n        [ 1 2 3 4 ],\n        [ 1 2 3 - ],\n        [ 1 2 - - ],\n      ]\n\n    target_seqs:\n      [\n        [ 2 3 4 5 ],\n        [ 2 3 4 - ],\n        [ 2 3 - - ],\n      ]\n\n    mask:\n      [\n        [ 1 1 1 1 ],\n        [ 1 1 1 0 ],\n        [ 1 1 0 0 ],\n      ]\n\n  Args:\n    images_and_captions: A list of image and caption meta data\n    batch_size: Batch size.\n    queue_capacity: Queue capacity.\n    add_summaries: If true, add caption length summaries.\n\n  Returns:\n    Padded image, captions, masks, etc.\n  \"\"\"\n  enqueue_list = []\n  for set_id, images, image_ids, captions, likes in images_and_captions:\n    image_seq_length = tf.shape(image_ids)[0]\n    input_length = tf.sub(image_seq_length, 0) # change 1 to 0\n    \n    cap_indicator = tf.cast(tf.not_equal(captions,\n                                         tf.zeros_like(captions)),\n                            tf.int32)\n    indicator = tf.ones(tf.expand_dims(input_length, 0), dtype=tf.int32)\n    loss_indicator = tf.ones(tf.expand_dims(image_seq_length, 0),\n                             dtype=tf.int32)\n    images = tf.pack(images)\n    \n    enqueue_list.append([set_id, images, indicator, loss_indicator,\n                        image_ids, captions, cap_indicator, likes])\n\n  (set_ids, images, mask, loss_mask, image_ids,\n    captions, cap_mask, likes) = tf.train.batch_join(enqueue_list,\n                                                     batch_size=batch_size,\n                                                     capacity=queue_capacity,\n                                                     dynamic_pad=True,\n                                                     name=\"batch_and_pad\")\n\n  if add_summaries:\n    lengths = tf.add(tf.reduce_sum(mask, 1), 1)\n    tf.scalar_summary(\"caption_length/batch_min\", tf.reduce_min(lengths))\n    tf.scalar_summary(\"caption_length/batch_max\", tf.reduce_max(lengths))\n    tf.scalar_summary(\"caption_length/batch_mean\", tf.reduce_mean(lengths))\n\n  return (set_ids, images, image_ids, mask, loss_mask, captions, cap_mask, likes)\n"
  },
  {
    "path": "polyvore/polyvore_model_bi.py",
    "content": "# Copyright 2017 Xintong Han. All Rights Reserved.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n# ==============================================================================\n\n\"\"\"\nPolyvore model used in ACM MM\"17 paper\n\"Learning Fashion Compatibility with Bidirectional LSTMs\"\nLink: https://arxiv.org/pdf/1707.05691.pdf\n\"\"\"\n\nfrom __future__ import absolute_import\nfrom __future__ import division\nfrom __future__ import print_function\n\nimport numpy as np                                  \nimport tensorflow as tf\n\nfrom ops import image_embedding\nfrom ops import image_processing\nfrom ops import inputs as input_ops\n\nclass PolyvoreModel(object):\n  \"\"\" Model for fashion set on Polyvore dataset.\n  \"\"\"\n\n  def __init__(self, config, mode, train_inception=False):\n    \"\"\"Basic setup.\n\n    Args:\n      config: Object containing configuration parameters.\n      mode: \"train\", \"eval\" or \"inference\".\n      train_inception: Whether the inception submodel variables are trainable.\n    \"\"\"\n    assert mode in [\"train\", \"eval\", \"inference\"]\n    self.config = config\n    self.mode = mode\n    self.train_inception = train_inception\n\n    # Reader for the input data.\n    self.reader = tf.TFRecordReader()\n\n    # To match the \"Show and Tell\" paper we initialize all variables with a\n    # random uniform initializer.\n    self.initializer = tf.random_uniform_initializer(\n        minval=-self.config.initializer_scale,\n        maxval=self.config.initializer_scale)\n\n    # A float32 Tensor with shape\n    # [batch_size, num_images, height, width, channels].\n    # num_images is the number of images in one outfit, default is 8.\n    self.images = None\n\n    # Forward RNN input and target sequences.\n    # An int32 Tensor with shape [batch_size, padded_length].\n    self.f_input_seqs = None\n    # An int32 Tensor with shape [batch_size, padded_length].\n    self.f_target_seqs = None\n    \n    # Backward RNN input and target sequences.\n    # An int32 Tensor with shape [batch_size, padded_length].\n    self.b_input_seqs = None\n    # An int32 Tensor with shape [batch_size, padded_length].\n    self.b_target_seqs = None\n    \n    # An int32 0/1 Tensor with shape [batch_size, padded_length].\n    self.input_mask = None\n  \n    # Image caption sequence and masks.\n    # An int32 Tensor with shape [batch_size, num_images, padded_length].\n    self.cap_seqs = None\n    # An int32 0/1 Tensor with shape [batch_size, padded_length].\n    self.cap_mask = None\n\n    # Caption sequence embeddings, we use simple bag of word model.\n    # A float32 Tensor with shape [batch_size, num_images, embedding_size].\n    self.seq_embeddings = None\n\n    # Image embeddings in the joint visual-semantic space\n    # A float32 Tensor with shape [batch_size, num_images, embedding_size].\n    self.image_embeddings = None\n\n    # Image embeddings in the RNN output/prediction space.\n    self.rnn_image_embeddings = None\n    \n    # Word embedding map.\n    self.embedding_map = None\n\n    # A float32 scalar Tensor; the total loss for the trainer to optimize.\n    self.total_loss = None\n\n    # Forward and backward RNN loss.\n    # A float32 Tensor with shape [batch_size * padded_length].\n    self.forward_losses = None\n    # A float32 Tensor with shape [batch_size * padded_length].\n    self.backward_losses = None\n    # RNN loss, forward + backward.\n    self.lstm_losses = None\n    \n    # Loss mask for lstm loss.\n    self.loss_mask = None\n\n    # Visual Semantic Embedding loss.\n    # A float32 Tensor with shape [batch_size * padded_length].\n    self.emb_losses = None\n    \n    # A float32 Tensor with shape [batch_size * padded_length].\n    self.target_weights = None\n\n    # Collection of variables from the inception submodel.\n    self.inception_variables = []\n\n    # Function to restore the inception submodel from checkpoint.\n    self.init_fn = None\n\n    # Global step Tensor.\n    self.global_step = None\n    \n    # Some output for debugging purposes .\n    self.target_embeddings = None\n    self.input_embeddings = None\n    self.set_ids = None\n    self.f_lstm_state = None\n    self.b_lstm_state = None\n    self.lstm_output = None\n    self.lstm_xent_loss = None\n\n\n  def is_training(self):\n    \"\"\"Returns true if the model is built for training mode.\"\"\"\n    return self.mode == \"train\"\n\n  def process_image(self, encoded_image, thread_id=0, image_idx=0):\n    \"\"\"Decodes and processes an image string.\n\n    Args:\n      encoded_image: A scalar string Tensor; the encoded image.\n      thread_id: Preprocessing thread id used to select the ordering of color\n        distortions. Not used in our model.\n      image_idx: Index of the image in an outfit. Only used for summaries.\n    Returns:\n      A float32 Tensor of shape [height, width, 3]; the processed image.\n    \"\"\"\n    return image_processing.process_image(encoded_image,\n                                          is_training=self.is_training(),\n                                          height=self.config.image_height,\n                                          width=self.config.image_width,\n                                          image_format=self.config.image_format,\n                                          image_idx=image_idx)\n\n  def build_inputs(self):\n    \"\"\"Input prefetching, preprocessing and batching.\n\n    Outputs:\n      Inputs of the model.\n    \"\"\"\n    if self.mode == \"inference\":\n      # In inference mode, images and inputs are fed via placeholders.\n      image_feed = tf.placeholder(dtype=tf.string, shape=[], name=\"image_feed\")\n      # Process image and insert batch dimensions.\n      image_feed = self.process_image(image_feed)\n      \n      input_feed = tf.placeholder(dtype=tf.int64,\n                                  shape=[None],  # batch_size\n                                  name=\"input_feed\")\n\n      # Process image and insert batch dimensions.\n      image_seqs = tf.expand_dims(image_feed, 0)\n      cap_seqs = tf.expand_dims(input_feed, 1)\n\n      # No target sequences or input mask in inference mode.\n      input_mask = tf.placeholder(dtype=tf.int64,\n                                  shape=[1, 8],  # batch_size\n                                  name=\"input_mask\")\n      cap_mask = None\n      loss_mask = None\n      set_ids = None\n      \n    else:\n      # Prefetch serialized SequenceExample protos.\n      input_queue = input_ops.prefetch_input_data(\n          self.reader,\n          self.config.input_file_pattern,\n          is_training=self.is_training(),\n          batch_size=self.config.batch_size,\n          values_per_shard=self.config.values_per_input_shard,\n          input_queue_capacity_factor=self.config.input_queue_capacity_factor,\n          num_reader_threads=self.config.num_input_reader_threads)\n\n      # Image processing and random distortion. Split across multiple threads\n      # with each thread applying a slightly different distortion. But we only\n      # use one thread in our Polyvore model. likes are not used.\n      images_and_captions = []\n      for thread_id in range(self.config.num_preprocess_threads):\n        serialized_sequence_example = input_queue.dequeue()\n        set_id, encoded_images, image_ids, captions, likes = (\n            input_ops.parse_sequence_example(\n            serialized_sequence_example,\n            set_id =self.config.set_id_name,\n            image_feature=self.config.image_feature_name,\n            image_index=self.config.image_index_name,\n            caption_feature=self.config.caption_feature_name,\n            number_set_images=self.config.number_set_images))\n        \n        images = []\n        for i in range(self.config.number_set_images):\n          images.append(self.process_image(encoded_images[i],image_idx=i))\n        \n        images_and_captions.append([set_id, images, image_ids, captions, likes])\n\n      # Batch inputs.\n      queue_capacity = (5 * self.config.num_preprocess_threads *\n                        self.config.batch_size)\n\n      (set_ids, image_seqs, image_ids, input_mask,\n       loss_mask, cap_seqs, cap_mask, likes) = (\n       input_ops.batch_with_dynamic_pad(images_and_captions,\n                                           batch_size=self.config.batch_size,\n                                           queue_capacity=queue_capacity))\n    \n    self.images = image_seqs\n    self.input_mask = input_mask\n    self.loss_mask = loss_mask\n    self.cap_seqs = cap_seqs\n    self.cap_mask = cap_mask\n    self.set_ids = set_ids\n\n  def build_image_embeddings(self):\n    \"\"\"Builds the image model subgraph and generates image embeddings\n      in visual semantic joint space and RNN prediction space.\n\n    Inputs:\n      self.images\n\n    Outputs:\n      self.image_embeddings\n      self.rnn_image_embeddings\n    \"\"\"\n    \n    # Reshape 5D image tensor.\n    images = tf.reshape(self.images, [-1,\n                                 self.config.image_height,\n                                 self.config.image_height,\n                                 3])\n    \n    inception_output = image_embedding.inception_v3(\n        images,\n        trainable=self.train_inception,\n        is_training=self.is_training())\n    self.inception_variables = tf.get_collection(\n        tf.GraphKeys.VARIABLES, scope=\"InceptionV3\")\n    \n    # Map inception output into embedding space.\n    with tf.variable_scope(\"image_embedding\") as scope:\n      image_embeddings = tf.contrib.layers.fully_connected(\n          inputs=inception_output,\n          num_outputs=self.config.embedding_size,\n          activation_fn=None,\n          weights_initializer=self.initializer,\n          biases_initializer=None,\n          scope=scope)\n    \n    with tf.variable_scope(\"rnn_image_embedding\") as scope:\n      rnn_image_embeddings = tf.contrib.layers.fully_connected(\n          inputs=inception_output,\n          num_outputs=self.config.embedding_size,\n          activation_fn=None,\n          weights_initializer=self.initializer,\n          biases_initializer=None,\n          scope=scope)\n\n    # Save the embedding size in the graph.\n    tf.constant(self.config.embedding_size, name=\"embedding_size\")\n    self.image_embeddings = tf.reshape(image_embeddings,\n                                       [tf.shape(self.images)[0],\n                                        -1,\n                                        self.config.embedding_size])\n\n    self.rnn_image_embeddings = tf.reshape(rnn_image_embeddings,\n                                           [tf.shape(self.images)[0],\n                                            -1,\n                                            self.config.embedding_size])\n\n  def build_seq_embeddings(self):\n    \"\"\"Builds the input sequence embeddings.\n\n    Inputs:\n      self.input_seqs\n\n    Outputs:\n      self.seq_embeddings\n      self.embedding_map\n    \"\"\"\n    with tf.variable_scope(\"seq_embedding\"), tf.device(\"/cpu:0\"):\n      embedding_map = tf.get_variable(\n          name=\"map\",\n          shape=[self.config.vocab_size, self.config.embedding_size],\n          initializer=self.initializer)\n      seq_embeddings = tf.nn.embedding_lookup(embedding_map, self.cap_seqs)\n      \n      # Average pooling the seq_embeddings (bag of words). \n      if self.mode != \"inference\":\n        seq_embeddings = tf.batch_matmul(\n                                tf.cast(tf.expand_dims(self.cap_mask, 2),\n                                        tf.float32),\n                                seq_embeddings)\n        seq_embeddings = tf.squeeze(seq_embeddings, [2])\n    \n    self.embedding_map = embedding_map\n    self.seq_embeddings = seq_embeddings\n\n  def build_model(self):\n    \"\"\"Builds the model.\n      The original code is written with Tensorflow r0.10\n      for Tensorflow > r1.0, many functions can be simplified.\n      For example Tensors support slicing now, so no need to use tf.slice()\n    \"\"\"\n    norm_image_embeddings = tf.nn.l2_normalize(self.image_embeddings, 2,\n                                               name=\"norm_image_embeddings\")\n    norm_seq_embeddings = tf.nn.l2_normalize(self.seq_embeddings, 2)\n    \n    norm_seq_embeddings = (\n        tf.pad(norm_seq_embeddings, [[0, 0],\n               [0, self.config.number_set_images - tf.shape(norm_seq_embeddings)[1]],\n               [0, 0]], name=\"norm_seq_embeddings\"))\n    \n    if self.mode == \"inference\":\n      pass\n    else:\n      # Compute losses for joint embedding.\n      # Only look at the captions that have length >= 2.\n      emb_loss_mask = tf.greater(tf.reduce_sum(self.cap_mask, 2), 1)\n      # Image mask is padded it to max length.\n      emb_loss_mask = tf.pad(emb_loss_mask,\n          [[0,0],\n           [0, self.config.number_set_images - tf.shape(emb_loss_mask)[1]]])\n      \n      # Select the valid image-caption pair.\n      emb_loss_mask = tf.reshape(emb_loss_mask, [-1])\n      norm_image_embeddings = tf.reshape(norm_image_embeddings,\n          [self.config.number_set_images * self.config.batch_size,\n           self.config.embedding_size])\n      norm_image_embeddings = tf.boolean_mask(norm_image_embeddings,\n                                                emb_loss_mask)\n      norm_seq_embeddings = tf.reshape(norm_seq_embeddings,\n                [self.config.number_set_images * self.config.batch_size,\n                 self.config.embedding_size])\n\n      norm_seq_embeddings = tf.boolean_mask(norm_seq_embeddings, emb_loss_mask)\n\n      # The following defines contrastive loss in the joint space.   \n      # Reference: https://github.com/ryankiros/visual-semantic-embedding/blob/master/model.py#L39\n      scores = tf.matmul(norm_seq_embeddings, norm_image_embeddings,\n                         transpose_a=False, transpose_b=True, name=\"scores\")\n      \n      diagonal = tf.expand_dims(tf.diag_part(scores), 1)\n      cost_s = tf.maximum(0.0, self.config.emb_margin - diagonal + scores)\n      cost_im = tf.maximum(0.0,\n          self.config.emb_margin - tf.transpose(diagonal) + scores)\n      cost_s = cost_s - tf.diag(tf.diag_part(cost_s))\n      cost_im = cost_im - tf.diag(tf.diag_part(cost_im))\n      \n      emb_batch_loss = tf.reduce_sum(cost_s) + tf.reduce_sum(cost_im)\n      emb_batch_loss = (emb_batch_loss /\n              tf.cast(tf.shape(norm_seq_embeddings)[0], tf.float32) ** 2)\n\n      if self.config.emb_loss_factor > 0.0:\n        tf.contrib.losses.add_loss(emb_batch_loss * self.config.emb_loss_factor)\n      \n    # Compute image LSTM loss.\n    # Start with one direction.\n    tf.logging.info(\"Rnn_type: %s\" % self.config.rnn_type)\n    if self.config.rnn_type == \"lstm\":\n      tf.logging.info(\"----- RNN Type: LSTM ------\")\n      # Forward LSTM.\n      f_lstm_cell = tf.nn.rnn_cell.BasicLSTMCell(\n        num_units=self.config.num_lstm_units, state_is_tuple=True)\n      # Backward LSTM.\n      b_lstm_cell = tf.nn.rnn_cell.BasicLSTMCell(\n        num_units=self.config.num_lstm_units, state_is_tuple=True)\n    elif self.config.rnn_type == \"gru\":\n      tf.logging.info(\"----- RNN Type: GRU ------\")\n      # Forward GRU.\n      f_lstm_cell = tf.nn.rnn_cell.GRUCell(num_units=self.config.num_lstm_units)\n      # Backward GRU.\n      b_lstm_cell = tf.nn.rnn_cell.GRUCell(num_units=self.config.num_lstm_units)\n    else:\n      tf.logging.info(\"----- RNN Type: RNN ------\")\n      # Forward RNN.\n      f_lstm_cell = tf.nn.rnn_cell.BasicRNNCell(num_units=self.config.num_lstm_units)\n      # Backward RNN.\n      b_lstm_cell = tf.nn.rnn_cell.BasicRNNCell(num_units=self.config.num_lstm_units)\n   \n    if self.mode == \"train\":\n      f_lstm_cell = tf.nn.rnn_cell.DropoutWrapper(\n          f_lstm_cell,\n          input_keep_prob=self.config.lstm_dropout_keep_prob,\n          output_keep_prob=self.config.lstm_dropout_keep_prob)\n      b_lstm_cell = tf.nn.rnn_cell.DropoutWrapper(\n          b_lstm_cell,\n          input_keep_prob=self.config.lstm_dropout_keep_prob,\n          output_keep_prob=self.config.lstm_dropout_keep_prob)\n\n    with tf.variable_scope(\"lstm\", initializer=self.initializer) as lstm_scope:\n      if self.mode == \"inference\":\n        # Inference for Bi-LSTM.\n        pred_feed = tf.placeholder(dtype=tf.float32,\n                                   shape=[None, None],\n                                   name=\"pred_feed\")\n        next_index_feed = tf.placeholder(dtype=tf.int64,\n                                   shape=[None],\n                                   name=\"next_index_feed\")\n        \n        self.lstm_xent_loss = tf.nn.sparse_softmax_cross_entropy_with_logits(\n                                    logits=pred_feed,\n                                    labels=next_index_feed,\n                                    name=\"lstm_xent\")\n\n                    \n        if self.config.rnn_type == \"lstm\":\n          # In inference mode, use concatenated states for convenient feeding\n          # and fetching.\n          # Forward\n          # Placeholder for feeding a batch of concatenated states.\n          f_state_feed = tf.placeholder(dtype=tf.float32,\n                                    shape=[None, sum(f_lstm_cell.state_size)],\n                                    name=\"f_state_feed\")\n          f_input_feed = tf.placeholder(dtype=tf.float32,\n                                    shape=[None, self.config.embedding_size],\n                                    name=\"f_input_feed\")\n          # Backward:\n          # Placeholder for feeding a batch of concatenated states.\n          b_state_feed = tf.placeholder(dtype=tf.float32,\n                                    shape=[None, sum(b_lstm_cell.state_size)],\n                                    name=\"b_state_feed\")\n          b_input_feed = tf.placeholder(dtype=tf.float32,\n                                    shape=[None, self.config.embedding_size],\n                                    name=\"b_input_feed\")\n                                        \n          f_state_tuple = tf.split(1, 2, f_state_feed)\n          # Run a single LSTM step.\n          with tf.variable_scope(\"FW\"):\n            f_lstm_outputs, f_state_tuple = f_lstm_cell(\n                                              inputs=f_input_feed,\n                                              state=f_state_tuple)\n          # Concatentate the resulting state.\n          self.f_lstm_state = tf.concat(1, f_state_tuple, name=\"f_state\")\n\n          b_state_tuple = tf.split(1, 2, b_state_feed)\n\n          # Run a single LSTM step.\n          with tf.variable_scope(\"BW\"):\n            b_lstm_outputs, b_state_tuple = b_lstm_cell(\n                                              inputs=b_input_feed,\n                                              state=b_state_tuple)\n          # Concatentate the resulting state.\n          self.b_lstm_state = tf.concat(1, b_state_tuple, name=\"b_state\")\n          \n        else:\n          # For non-LSTM RNN models, no tuple is used.\n          # Forward\n          # Placeholder for feeding a batch of concatenated states.\n          f_state_feed = tf.placeholder(dtype=tf.float32,\n                                    shape=[None, f_lstm_cell.state_size],\n                                    name=\"f_state_feed\")\n          f_input_feed = tf.placeholder(dtype=tf.float32,\n                                    shape=[None, self.config.embedding_size],\n                                    name=\"f_input_feed\")\n          # Backward:\n          # Placeholder for feeding a batch of concatenated states.\n          b_state_feed = tf.placeholder(dtype=tf.float32,\n                                    shape=[None, b_lstm_cell.state_size],\n                                    name=\"b_state_feed\")\n          b_input_feed = tf.placeholder(dtype=tf.float32,\n                                    shape=[None, self.config.embedding_size],\n                                    name=\"b_input_feed\")\n          # Run a single RNN step.\n          with tf.variable_scope(\"FW\"):\n            f_lstm_outputs, f_state_tuple = f_lstm_cell(\n                                              inputs=f_input_feed,\n                                              state=f_state_feed)\n          f_state_tuple = tf.identity(f_state_tuple, name=\"f_state\")\n            \n          with tf.variable_scope(\"BW\"):\n            b_lstm_outputs, b_state_tuple = b_lstm_cell(\n                                              inputs=b_input_feed,\n                                              state=b_state_feed)\n          b_state_tuple = tf.identity(b_state_tuple, name=\"b_state\")\n            \n        lstm_outputs = (f_lstm_outputs, b_lstm_outputs)\n        sequence_length = None\n      else:\n        # Run the batch of sequence embeddings through the LSTM.\n        sequence_length = tf.reduce_sum(self.input_mask, 1)\n        lstm_outputs, _ = tf.nn.bidirectional_dynamic_rnn(cell_fw=f_lstm_cell,\n                                            cell_bw=b_lstm_cell,\n                                            inputs=self.rnn_image_embeddings,\n                                            initial_state_fw=None,\n                                            initial_state_bw=None,\n                                            sequence_length=sequence_length,\n                                            dtype=tf.float32,\n                                            scope=lstm_scope)\n\n    # Stack batches vertically.\n    f_lstm_outputs = tf.reshape(lstm_outputs[0], [-1, f_lstm_cell.output_size])\n    if self.mode == \"inference\":\n      b_lstm_outputs = lstm_outputs[1]\n    else:\n      b_lstm_outputs = tf.reverse_sequence(lstm_outputs[1],\n                                           seq_lengths=sequence_length,\n                                           seq_dim=1,\n                                           batch_dim=0)\n    \n    b_lstm_outputs = tf.reshape(b_lstm_outputs, [-1, b_lstm_cell.output_size])\n    with tf.variable_scope(\"f_logits\") as logits_scope:\n      f_input_embeddings = tf.contrib.layers.fully_connected(\n          inputs=f_lstm_outputs,\n          num_outputs=self.config.embedding_size,\n          activation_fn=None,\n          weights_initializer=self.initializer,\n          scope=logits_scope)\n         \n    with tf.variable_scope(\"b_logits\") as logits_scope:\n      b_input_embeddings = tf.contrib.layers.fully_connected(\n          inputs=b_lstm_outputs,\n          num_outputs=self.config.embedding_size,\n          activation_fn=None,\n          weights_initializer=self.initializer,\n          scope=logits_scope)\n    \n    if self.mode == \"inference\":\n      pass\n    else:\n      # Padding input_mask to match dimension.\n      input_mask = tf.pad(self.input_mask,\n        [[0,0],\n        [0, self.config.number_set_images + 1 - tf.shape(self.input_mask)[1]]])\n      input_mask = tf.to_float(\n          tf.reshape(tf.slice(input_mask, [0,1], [-1, -1]), [-1,1]))\n      loss_mask = tf.pad(self.loss_mask,\n        [[0,0],\n         [0, self.config.number_set_images - tf.shape(self.loss_mask)[1]]])\n      loss_mask = tf.reshape(tf.to_float(loss_mask),\n                    [self.config.number_set_images * self.config.batch_size,1])\n      \n      # Forward rnn.\n      f_target_embeddings = tf.slice(tf.pad(self.rnn_image_embeddings,\n              [[0,0], [0,1], [0,0]]), [0,1,0], [-1,-1,-1])\n      f_target_embeddings = tf.reshape(f_target_embeddings,\n              [self.config.number_set_images * self.config.batch_size,\n               self.config.embedding_size])\n      f_target_embeddings = tf.mul(f_target_embeddings,\n                                        input_mask,\n                                        name=\"target_embeddings\")\n      \n      # Softmax loss over all items in this minibatch.\n      loss_mask = tf.squeeze(loss_mask)\n      f_input_embeddings = tf.boolean_mask(f_input_embeddings,\n                                           tf.cast(loss_mask, tf.bool))\n      f_target_embeddings = tf.boolean_mask(f_target_embeddings,\n                                            tf.cast(loss_mask, tf.bool))\n      \n      f_lstm_scores = tf.matmul(f_input_embeddings,\n                                f_target_embeddings,\n                                transpose_a=False,\n                                transpose_b=True)\n      f_lstm_loss = tf.nn.sparse_softmax_cross_entropy_with_logits(\n                        logits=f_lstm_scores,\n                        labels=tf.range(tf.shape(f_lstm_scores)[0]))\n      f_lstm_loss = tf.div(tf.reduce_sum(f_lstm_loss),\n                           tf.reduce_sum(loss_mask),\n                           name=\"f_lstm_loss\")\n      \n      # Backward rnn.\n      # It would be better to put write a function to calcute lstm_loss from\n      # loss_mask, inputs, and targets, so the code can be reused, for now\n      # just copy and paste the forward to get the backward loss.  \n      reverse_embeddings = tf.reverse_sequence(self.rnn_image_embeddings,\n                                               seq_lengths=sequence_length,\n                                               seq_dim=1,\n                                               batch_dim=0)\n      b_target_embeddings = tf.slice(tf.pad(reverse_embeddings,\n                                            [[0,0], [0,1], [0,0]]),\n                                     [0,1,0], [-1,-1,-1])\n      b_target_embeddings = tf.reshape(b_target_embeddings,\n            [self.config.number_set_images * self.config.batch_size,\n             self.config.embedding_size])\n      b_target_embeddings = tf.mul(b_target_embeddings,\n                                        input_mask,\n                                        name=\"target_embeddings\")\n      \n      # Softmax loss over all items in this minibatch\n      b_input_embeddings = tf.boolean_mask(b_input_embeddings,\n                                           tf.cast(loss_mask, tf.bool))\n      b_target_embeddings = tf.boolean_mask(b_target_embeddings,\n                                            tf.cast(loss_mask, tf.bool))\n      \n      b_lstm_scores = tf.matmul(b_input_embeddings,\n                                b_target_embeddings,\n                                transpose_a=False,\n                                transpose_b=True)\n      b_lstm_loss = tf.nn.sparse_softmax_cross_entropy_with_logits(\n              logits=b_lstm_scores,\n              labels=tf.range(tf.shape(b_lstm_scores)[0]))\n      b_lstm_loss = tf.div(tf.reduce_sum(b_lstm_loss),\n                           tf.reduce_sum(loss_mask),\n                           name=\"b_lstm_loss\")\n      \n      if self.config.f_rnn_loss_factor > 0:\n        tf.contrib.losses.add_loss(f_lstm_loss * self.config.f_rnn_loss_factor)\n      if self.config.b_rnn_loss_factor > 0:\n        tf.contrib.losses.add_loss(b_lstm_loss * self.config.b_rnn_loss_factor)\n     \n      # Merge all losses and stats.\n      total_loss = tf.contrib.losses.get_total_loss()\n      \n      # Add summaries.\n      tf.scalar_summary(\"emb_batch_loss\", emb_batch_loss)\n      tf.scalar_summary(\"f_lstm_loss\", f_lstm_loss)\n      tf.scalar_summary(\"b_lstm_loss\", b_lstm_loss)\n      tf.scalar_summary(\"lstm_loss\",\n            (f_lstm_loss * self.config.f_rnn_loss_factor +\n             b_lstm_loss * self.config.b_rnn_loss_factor))\n      tf.scalar_summary(\"total_loss\", total_loss)\n      for var in tf.trainable_variables():\n        tf.histogram_summary(var.op.name, var)\n      \n      weights = tf.to_float(tf.reshape(emb_loss_mask, [-1]))\n    \n      self.loss_mask = loss_mask\n      self.input_mask = input_mask\n      self.target_embeddings = (f_target_embeddings, b_target_embeddings)\n      self.input_embeddings = (f_input_embeddings, b_input_embeddings)\n      self.total_loss = total_loss\n      self.emb_losses = emb_batch_loss  # Used in evaluation.\n      self.lstm_losses = (f_lstm_loss * self.config.f_rnn_loss_factor +\n             b_lstm_loss * self.config.b_rnn_loss_factor) # Used in evaluation.\n      self.target_weights = weights  # Used in evaluation.\n      \n  def setup_inception_initializer(self):\n    \"\"\"Sets up the function to restore inception variables from checkpoint.\"\"\"\n    if self.mode != \"inference\":\n      # Restore inception variables only.\n      saver = tf.train.Saver(self.inception_variables)\n\n      def restore_fn(sess):\n        tf.logging.info(\"Restoring Inception variables from checkpoint %s\" %\n                        self.config.inception_checkpoint_file)\n        saver.restore(sess, self.config.inception_checkpoint_file)\n\n      self.init_fn = restore_fn\n\n  def setup_global_step(self):\n    \"\"\"Sets up the global step Tensor.\"\"\"\n    global_step = tf.Variable(\n        initial_value=0,\n        name=\"global_step\",\n        trainable=False,\n        collections=[tf.GraphKeys.GLOBAL_STEP, tf.GraphKeys.VARIABLES])\n\n    self.global_step = global_step\n\n  def build(self):\n    \"\"\"Creates all ops for training and evaluation.\"\"\"\n    self.build_inputs()\n    self.build_image_embeddings()\n    self.build_seq_embeddings()\n    self.build_model()\n    self.setup_inception_initializer()\n    self.setup_global_step()\n"
  },
  {
    "path": "polyvore/polyvore_model_siamese.py",
    "content": "# Copyright 2017 Xintong Han. All Rights Reserved.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n# ==============================================================================\n\n\"\"\"Siamese Network for compatibility modeling/\n\"\"\"\n\nfrom __future__ import absolute_import\nfrom __future__ import division\nfrom __future__ import print_function\n\nimport numpy as np                                  \nimport tensorflow as tf\nimport scipy.io as sio\nfrom scipy.linalg import block_diag\n\nfrom ops import image_embedding\nfrom ops import image_processing\nfrom ops import inputs as input_ops\n\nclass PolyvoreModel(object):\n  \"\"\" Model for fashion set on Polyvore dataset\n  \"\"\"\n\n  def __init__(self, config, mode, train_inception=False):\n    \"\"\"Basic setup.\n\n    Args:\n      config: Object containing configuration parameters.\n      mode: \"train\", \"eval\" or \"inference\".\n      train_inception: Whether the inception submodel variables are trainable.\n    \"\"\"\n    assert mode in [\"train\", \"eval\", \"inference\"]\n    self.config = config\n    self.mode = mode\n    self.train_inception = train_inception\n\n    # Reader for the input data.\n    self.reader = tf.TFRecordReader()\n\n    # To match the \"Show and Tell\" paper we initialize all variables with a\n    # random uniform initializer.\n    self.initializer = tf.random_uniform_initializer(\n        minval=-self.config.initializer_scale,\n        maxval=self.config.initializer_scale)\n\n    # A float32 Tensor with shape [batch_size, num_images, height, width, channels].\n    self.images = None\n\n    # An int32 0/1 Tensor with shape [batch_size, padded_length].\n    self.input_mask = None\n  \n    # A float32 Tensor with shape [batch_size, num_images, embedding_size].\n    self.image_embeddings = None\n    \n    # A float32 scalar Tensor; the total loss for the trainer to optimize.\n    self.total_loss = None\n\n    # Collection of variables from the inception submodel.\n    self.inception_variables = []\n\n    # Function to restore the inception submodel from checkpoint.\n    self.init_fn = None\n\n    # Global step Tensor.\n    self.global_step = None\n    \n  def is_training(self):\n    \"\"\"Returns true if the model is built for training mode.\"\"\"\n    return self.mode == \"train\"\n\n  def process_image(self, encoded_image, thread_id=0, image_idx=0):\n    \"\"\"Decodes and processes an image string.\n\n    Args:\n      encoded_image: A scalar string Tensor; the encoded image.\n      thread_id: Preprocessing thread id used to select the ordering of color\n        distortions.\n\n    Returns:\n      A float32 Tensor of shape [height, width, 3]; the processed image.\n    \"\"\"\n    return image_processing.process_image(encoded_image,\n                                          is_training=self.is_training(),\n                                          height=self.config.image_height,\n                                          width=self.config.image_width,\n                                          image_format=self.config.image_format,\n                                          image_idx=image_idx)\n\n  def build_inputs(self):\n    \"\"\"Input prefetching, preprocessing and batching.\n\n    Outputs:\n      images and seqs\n    \"\"\"\n    if self.mode == \"inference\":\n      # In inference mode, images and inputs are fed via placeholders.\n      \n      image_feed = tf.placeholder(dtype=tf.string, shape=[], name=\"image_feed\")\n      # Process image and insert batch dimensions.\n      image_feed = self.process_image(image_feed)\n\n      # Process image and insert batch dimensions.\n      image_seqs = tf.expand_dims(image_feed, 0)\n\n      # No target sequences or input mask in inference mode.\n      input_mask = tf.placeholder(dtype=tf.int64,\n                                  shape=[1,8],  # batch_size\n                                  name=\"input_mask\")\n    else:\n      # Prefetch serialized SequenceExample protos.\n      input_queue = input_ops.prefetch_input_data(\n          self.reader,\n          self.config.input_file_pattern,\n          is_training=self.is_training(),\n          batch_size=self.config.batch_size,\n          values_per_shard=self.config.values_per_input_shard,\n          input_queue_capacity_factor=self.config.input_queue_capacity_factor,\n          num_reader_threads=self.config.num_input_reader_threads)\n\n      # Image processing and random distortion. Split across multiple threads\n      # with each thread applying a slightly different distortion.\n      # assert self.config.num_preprocess_threads % 2 == 0\n      images_and_captions = []\n      for thread_id in range(self.config.num_preprocess_threads):\n        serialized_sequence_example = input_queue.dequeue()\n        set_id, encoded_images, image_ids, captions, likes = (\n            input_ops.parse_sequence_example(\n            serialized_sequence_example,\n            set_id =self.config.set_id_name,\n            image_feature=self.config.image_feature_name,\n            image_index=self.config.image_index_name,\n            caption_feature=self.config.caption_feature_name,\n            number_set_images=self.config.number_set_images))\n        \n        images = []\n        for i in range(self.config.number_set_images):\n          images.append(self.process_image(encoded_images[i],image_idx=i))\n        \n        images_and_captions.append([set_id, images, image_ids, captions, likes])\n\n      # Batch inputs.\n      queue_capacity = (5 * self.config.num_preprocess_threads *\n                        self.config.batch_size)\n      #(set_ids, image_seqs, image_ids, f_input_seqs, f_target_seqs,\n      # b_input_seqs, b_target_seqs, input_mask, cap_seqs, cap_mask) = (\n      (set_ids, image_seqs, image_ids, input_mask,\n       loss_mask, cap_seqs, cap_mask, likes) = (\n       input_ops.batch_with_dynamic_pad(images_and_captions,\n                                           batch_size=self.config.batch_size,\n                                           queue_capacity=queue_capacity))\n    self.images = image_seqs\n    self.input_mask = input_mask\n\n\n  def build_image_embeddings(self):\n    \"\"\"Builds the image model subgraph and generates image embeddings.\n\n    Inputs:\n      self.images\n\n    Outputs:\n      self.image_embeddings\n    \"\"\"\n    \n    # Reshape 5D image tensor.\n    images = tf.reshape(self.images, [-1,\n                                 self.config.image_height,\n                                 self.config.image_height,\n                                 3])\n    \n    inception_output = image_embedding.inception_v3(\n        images,\n        trainable=self.train_inception,\n        is_training=self.is_training())\n    self.inception_variables = tf.get_collection(\n        tf.GraphKeys.VARIABLES, scope=\"InceptionV3\")\n    \n    # Map inception output into embedding space.\n    with tf.variable_scope(\"image_embedding\") as scope:\n      image_embeddings = tf.contrib.layers.fully_connected(\n          inputs=inception_output,\n          num_outputs=self.config.embedding_size,\n          activation_fn=None,\n          weights_initializer=self.initializer,\n          biases_initializer=None,\n          scope=scope)\n\n    # Save the embedding size in the graph.\n    tf.constant(self.config.embedding_size, name=\"embedding_size\")\n      \n    self.image_embeddings = tf.reshape(image_embeddings,\n                                       [tf.shape(self.images)[0],\n                                        -1,\n                                        self.config.embedding_size])\n  \n\n  def build_model(self):\n    \"\"\"Builds the model.\n\n    Inputs:\n      self.image_embeddings\n      self.seq_embeddings\n      self.target_seqs (training and eval only)\n      self.input_mask (training and eval only)\n\n    Outputs:\n      self.total_loss (training and eval only)\n      self.target_cross_entropy_losses (training and eval only)\n      self.target_cross_entropy_loss_weights (training and eval only)\n    \"\"\"\n    norm_image_embeddings = tf.nn.l2_normalize(self.image_embeddings, 2,\n                                               name=\"norm_image_embeddings\")\n    \n    if self.mode == \"inference\":\n      pass\n    else:\n    \n      # Select the valid siamese pairs. Hacky for now!\n      emb_loss_mask = np.ones((self.config.number_set_images,\n                               self.config.number_set_images))\n      # Manually replicate for 8 times\n      emb_loss_mask = block_diag(emb_loss_mask, emb_loss_mask,\n                                 emb_loss_mask, emb_loss_mask,\n                                 emb_loss_mask, emb_loss_mask,\n                                 emb_loss_mask, emb_loss_mask,\n                                 emb_loss_mask, emb_loss_mask)\n\n      norm_image_embeddings = tf.reshape(norm_image_embeddings,\n              [self.config.number_set_images * self.config.batch_size,\n               self.config.embedding_size])\n      \n      scores = tf.matmul(norm_image_embeddings, norm_image_embeddings,\n                         transpose_a=False, transpose_b=True, name=\"scores\")\n      \n      posi_scores = tf.reduce_sum(tf.mul(scores, emb_loss_mask)) / np.sum(emb_loss_mask)\n      \n      emb_loss_mask = 1.0 - emb_loss_mask\n      m = 0.8 # magin in Siamese network\n      nega_scores = tf.maximum(tf.mul(scores, emb_loss_mask) - 0.8, 0.0) \n      nega_scores = tf.reduce_sum(nega_scores) / np.sum(emb_loss_mask)\n      \n      # nega_scores = (tf.reduce_sum(nega_scores) -\n      #                   m * np.sum(1 - emb_loss_mask)) / np.sum(emb_loss_mask)\n      \n      emb_batch_loss = tf.sub(nega_scores, posi_scores, name=\"emb_batch_loss\")\n      tf.contrib.losses.add_loss(emb_batch_loss)\n      \n      # Merge all losses and stats.\n      total_loss = tf.contrib.losses.get_total_loss()\n      \n      # Add summaries.\n      tf.scalar_summary(\"emb_batch_loss\", emb_batch_loss)\n      tf.scalar_summary(\"total_loss\", total_loss)\n      for var in tf.trainable_variables():\n        tf.histogram_summary(var.op.name, var)\n      self.total_loss = total_loss\n      \n  def setup_inception_initializer(self):\n    \"\"\"Sets up the function to restore inception variables from checkpoint.\"\"\"\n    if self.mode != \"inference\":\n      # Restore inception variables only.\n      saver = tf.train.Saver(self.inception_variables)\n\n      def restore_fn(sess):\n        tf.logging.info(\"Restoring Inception variables from checkpoint file %s\",\n                        self.config.inception_checkpoint_file)\n        saver.restore(sess, self.config.inception_checkpoint_file)\n\n      self.init_fn = restore_fn\n\n  def setup_global_step(self):\n    \"\"\"Sets up the global step Tensor.\"\"\"\n    global_step = tf.Variable(\n        initial_value=0,\n        name=\"global_step\",\n        trainable=False,\n        collections=[tf.GraphKeys.GLOBAL_STEP, tf.GraphKeys.VARIABLES])\n\n    self.global_step = global_step\n\n  def build(self):\n    \"\"\"Creates all ops for training and evaluation.\"\"\"\n    self.build_inputs()\n    self.build_image_embeddings()\n    self.build_model()\n    self.setup_inception_initializer()\n    self.setup_global_step()\n"
  },
  {
    "path": "polyvore/polyvore_model_vse.py",
    "content": "# Copyright 2017 Xintong Han. All Rights Reserved.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n# ==============================================================================\n\n\"\"\"\nPolyvore model used in ACM MM\"17 paper\n\"Learning Fashion Compatibility with Bidirectional LSTMs\"\nLink: https://arxiv.org/pdf/1707.05691.pdf\n\"\"\"\n\nfrom __future__ import absolute_import\nfrom __future__ import division\nfrom __future__ import print_function\n\nimport numpy as np                                  \nimport tensorflow as tf\n\nfrom ops import image_embedding\nfrom ops import image_processing\nfrom ops import inputs as input_ops\n\nclass PolyvoreModel(object):\n  \"\"\" Model for fashion set on Polyvore dataset.\n  \"\"\"\n\n  def __init__(self, config, mode, train_inception=False):\n    \"\"\"Basic setup.\n\n    Args:\n      config: Object containing configuration parameters.\n      mode: \"train\", \"eval\" or \"inference\".\n      train_inception: Whether the inception submodel variables are trainable.\n    \"\"\"\n    assert mode in [\"train\", \"eval\", \"inference\"]\n    self.config = config\n    self.mode = mode\n    self.train_inception = train_inception\n\n    # Reader for the input data.\n    self.reader = tf.TFRecordReader()\n\n    # To match the \"Show and Tell\" paper we initialize all variables with a\n    # random uniform initializer.\n    self.initializer = tf.random_uniform_initializer(\n        minval=-self.config.initializer_scale,\n        maxval=self.config.initializer_scale)\n\n    # A float32 Tensor with shape\n    # [batch_size, num_images, height, width, channels].\n    # num_images is the number of images in one outfit, default is 8.\n    self.images = None\n\n    # Forward RNN input and target sequences.\n    # An int32 Tensor with shape [batch_size, padded_length].\n    self.f_input_seqs = None\n    # An int32 Tensor with shape [batch_size, padded_length].\n    self.f_target_seqs = None\n    \n    # Backward RNN input and target sequences.\n    # An int32 Tensor with shape [batch_size, padded_length].\n    self.b_input_seqs = None\n    # An int32 Tensor with shape [batch_size, padded_length].\n    self.b_target_seqs = None\n    \n    # An int32 0/1 Tensor with shape [batch_size, padded_length].\n    self.input_mask = None\n  \n    # Image caption sequence and masks.\n    # An int32 Tensor with shape [batch_size, num_images, padded_length].\n    self.cap_seqs = None\n    # An int32 0/1 Tensor with shape [batch_size, padded_length].\n    self.cap_mask = None\n\n    # Caption sequence embeddings, we use simple bag of word model.\n    # A float32 Tensor with shape [batch_size, num_images, embedding_size].\n    self.seq_embeddings = None\n\n    # Image embeddings in the joint visual-semantic space\n    # A float32 Tensor with shape [batch_size, num_images, embedding_size].\n    self.image_embeddings = None\n\n    # Image embeddings in the RNN output/prediction space.\n    self.rnn_image_embeddings = None\n    \n    # Word embedding map.\n    self.embedding_map = None\n\n    # A float32 scalar Tensor; the total loss for the trainer to optimize.\n    self.total_loss = None\n\n    # Forward and backward RNN loss.\n    # A float32 Tensor with shape [batch_size * padded_length].\n    self.forward_losses = None\n    # A float32 Tensor with shape [batch_size * padded_length].\n    self.backward_losses = None\n    # RNN loss, forward + backward.\n    self.lstm_losses = None\n    \n    # Loss mask for lstm loss.\n    self.loss_mask = None\n\n    # Visual Semantic Embedding loss.\n    # A float32 Tensor with shape [batch_size * padded_length].\n    self.emb_losses = None\n    \n    # A float32 Tensor with shape [batch_size * padded_length].\n    self.target_weights = None\n\n    # Collection of variables from the inception submodel.\n    self.inception_variables = []\n\n    # Function to restore the inception submodel from checkpoint.\n    self.init_fn = None\n\n    # Global step Tensor.\n    self.global_step = None\n    \n\n\n  def is_training(self):\n    \"\"\"Returns true if the model is built for training mode.\"\"\"\n    return self.mode == \"train\"\n\n  def process_image(self, encoded_image, thread_id=0, image_idx=0):\n    \"\"\"Decodes and processes an image string.\n\n    Args:\n      encoded_image: A scalar string Tensor; the encoded image.\n      thread_id: Preprocessing thread id used to select the ordering of color\n        distortions. Not used in our model.\n      image_idx: Index of the image in an outfit. Only used for summaries.\n    Returns:\n      A float32 Tensor of shape [height, width, 3]; the processed image.\n    \"\"\"\n    return image_processing.process_image(encoded_image,\n                                          is_training=self.is_training(),\n                                          height=self.config.image_height,\n                                          width=self.config.image_width,\n                                          image_format=self.config.image_format,\n                                          image_idx=image_idx)\n\n  def build_inputs(self):\n    \"\"\"Input prefetching, preprocessing and batching.\n\n    Outputs:\n      Inputs of the model.\n    \"\"\"\n    if self.mode == \"inference\":\n      # In inference mode, images and inputs are fed via placeholders.\n      image_feed = tf.placeholder(dtype=tf.string, shape=[], name=\"image_feed\")\n      # Process image and insert batch dimensions.\n      image_feed = self.process_image(image_feed)\n      \n      input_feed = tf.placeholder(dtype=tf.int64,\n                                  shape=[None],  # batch_size\n                                  name=\"input_feed\")\n\n      # Process image and insert batch dimensions.\n      image_seqs = tf.expand_dims(image_feed, 0)\n      cap_seqs = tf.expand_dims(input_feed, 1)\n\n      # No target sequences or input mask in inference mode.\n      input_mask = tf.placeholder(dtype=tf.int64,\n                                  shape=[1, 8],  # batch_size\n                                  name=\"input_mask\")\n      cap_mask = None\n      loss_mask = None\n      set_ids = None\n      \n    else:\n      # Prefetch serialized SequenceExample protos.\n      input_queue = input_ops.prefetch_input_data(\n          self.reader,\n          self.config.input_file_pattern,\n          is_training=self.is_training(),\n          batch_size=self.config.batch_size,\n          values_per_shard=self.config.values_per_input_shard,\n          input_queue_capacity_factor=self.config.input_queue_capacity_factor,\n          num_reader_threads=self.config.num_input_reader_threads)\n\n      # Image processing and random distortion. Split across multiple threads\n      # with each thread applying a slightly different distortion. But we only\n      # use one thread in our Polyvore model. likes are not used.\n      images_and_captions = []\n      for thread_id in range(self.config.num_preprocess_threads):\n        serialized_sequence_example = input_queue.dequeue()\n        set_id, encoded_images, image_ids, captions, likes = (\n            input_ops.parse_sequence_example(\n            serialized_sequence_example,\n            set_id =self.config.set_id_name,\n            image_feature=self.config.image_feature_name,\n            image_index=self.config.image_index_name,\n            caption_feature=self.config.caption_feature_name,\n            number_set_images=self.config.number_set_images))\n        \n        images = []\n        for i in range(self.config.number_set_images):\n          images.append(self.process_image(encoded_images[i],image_idx=i))\n        \n        images_and_captions.append([set_id, images, image_ids, captions, likes])\n\n      # Batch inputs.\n      queue_capacity = (5 * self.config.num_preprocess_threads *\n                        self.config.batch_size)\n\n      (set_ids, image_seqs, image_ids, input_mask,\n       loss_mask, cap_seqs, cap_mask, likes) = (\n       input_ops.batch_with_dynamic_pad(images_and_captions,\n                                           batch_size=self.config.batch_size,\n                                           queue_capacity=queue_capacity))\n    \n    self.images = image_seqs\n    self.input_mask = input_mask\n    self.loss_mask = loss_mask\n    self.cap_seqs = cap_seqs\n    self.cap_mask = cap_mask\n    self.set_ids = set_ids\n\n  def build_image_embeddings(self):\n    \"\"\"Builds the image model subgraph and generates image embeddings\n      in visual semantic joint space and RNN prediction space.\n\n    Inputs:\n      self.images\n\n    Outputs:\n      self.image_embeddings\n      self.rnn_image_embeddings\n    \"\"\"\n    \n    # Reshape 5D image tensor.\n    images = tf.reshape(self.images, [-1,\n                                 self.config.image_height,\n                                 self.config.image_height,\n                                 3])\n    \n    inception_output = image_embedding.inception_v3(\n        images,\n        trainable=self.train_inception,\n        is_training=self.is_training())\n    self.inception_variables = tf.get_collection(\n        tf.GraphKeys.VARIABLES, scope=\"InceptionV3\")\n    \n    # Map inception output into embedding space.\n    with tf.variable_scope(\"image_embedding\") as scope:\n      image_embeddings = tf.contrib.layers.fully_connected(\n          inputs=inception_output,\n          num_outputs=self.config.embedding_size,\n          activation_fn=None,\n          weights_initializer=self.initializer,\n          biases_initializer=None,\n          scope=scope)\n\n    # Save the embedding size in the graph.\n    tf.constant(self.config.embedding_size, name=\"embedding_size\")\n    self.image_embeddings = tf.reshape(image_embeddings,\n                                       [tf.shape(self.images)[0],\n                                        -1,\n                                        self.config.embedding_size])\n\n  def build_seq_embeddings(self):\n    \"\"\"Builds the input sequence embeddings.\n\n    Inputs:\n      self.input_seqs\n\n    Outputs:\n      self.seq_embeddings\n      self.embedding_map\n    \"\"\"\n    with tf.variable_scope(\"seq_embedding\"), tf.device(\"/cpu:0\"):\n      embedding_map = tf.get_variable(\n          name=\"map\",\n          shape=[self.config.vocab_size, self.config.embedding_size],\n          initializer=self.initializer)\n      seq_embeddings = tf.nn.embedding_lookup(embedding_map, self.cap_seqs)\n      \n      # Average pooling the seq_embeddings (bag of words). \n      if self.mode != \"inference\":\n        seq_embeddings = tf.batch_matmul(\n                                tf.cast(tf.expand_dims(self.cap_mask, 2),\n                                        tf.float32),\n                                seq_embeddings)\n        seq_embeddings = tf.squeeze(seq_embeddings, [2])\n    \n    self.embedding_map = embedding_map\n    self.seq_embeddings = seq_embeddings\n\n  def build_model(self):\n    \"\"\"Builds the model.\n      The original code is written with Tensorflow r0.10\n      for Tensorflow > r1.0, many functions can be simplified.\n      For example Tensors support slicing now, so no need to use tf.slice()\n    \"\"\"\n    norm_image_embeddings = tf.nn.l2_normalize(self.image_embeddings, 2,\n                                               name=\"norm_image_embeddings\")\n    norm_seq_embeddings = tf.nn.l2_normalize(self.seq_embeddings, 2)\n    \n    norm_seq_embeddings = (\n        tf.pad(norm_seq_embeddings, [[0, 0],\n               [0, self.config.number_set_images - tf.shape(norm_seq_embeddings)[1]],\n               [0, 0]], name=\"norm_seq_embeddings\"))\n    \n    if self.mode == \"inference\":\n      pass\n    else:\n      # Compute losses for joint embedding.\n      # Only look at the captions that have length >= 2.\n      emb_loss_mask = tf.greater(tf.reduce_sum(self.cap_mask, 2), 1)\n      # Image mask is padded it to max length.\n      emb_loss_mask = tf.pad(emb_loss_mask,\n          [[0,0],\n           [0, self.config.number_set_images - tf.shape(emb_loss_mask)[1]]])\n      \n      # Select the valid image-caption pair.\n      emb_loss_mask = tf.reshape(emb_loss_mask, [-1])\n      norm_image_embeddings = tf.reshape(norm_image_embeddings,\n          [self.config.number_set_images * self.config.batch_size,\n           self.config.embedding_size])\n      norm_image_embeddings = tf.boolean_mask(norm_image_embeddings,\n                                                emb_loss_mask)\n      norm_seq_embeddings = tf.reshape(norm_seq_embeddings,\n                [self.config.number_set_images * self.config.batch_size,\n                 self.config.embedding_size])\n\n      norm_seq_embeddings = tf.boolean_mask(norm_seq_embeddings, emb_loss_mask)\n\n      # The following defines contrastive loss in the joint space.   \n      # Reference: https://github.com/ryankiros/visual-semantic-embedding/blob/master/model.py#L39\n      scores = tf.matmul(norm_seq_embeddings, norm_image_embeddings,\n                         transpose_a=False, transpose_b=True, name=\"scores\")\n      \n      diagonal = tf.expand_dims(tf.diag_part(scores), 1)\n      cost_s = tf.maximum(0.0, self.config.emb_margin - diagonal + scores)\n      cost_im = tf.maximum(0.0,\n          self.config.emb_margin - tf.transpose(diagonal) + scores)\n      cost_s = cost_s - tf.diag(tf.diag_part(cost_s))\n      cost_im = cost_im - tf.diag(tf.diag_part(cost_im))\n      \n      emb_batch_loss = tf.reduce_sum(cost_s) + tf.reduce_sum(cost_im)\n      emb_batch_loss = (emb_batch_loss /\n              tf.cast(tf.shape(norm_seq_embeddings)[0], tf.float32) ** 2)\n\n      tf.contrib.losses.add_loss(emb_batch_loss * self.config.emb_loss_factor)\n      \n      total_loss = tf.contrib.losses.get_total_loss()\n      \n      # Add summaries.\n      tf.scalar_summary(\"emb_batch_loss\", emb_batch_loss)\n      tf.scalar_summary(\"total_loss\", total_loss)\n      for var in tf.trainable_variables():\n        tf.histogram_summary(var.op.name, var)\n      \n      weights = tf.to_float(tf.reshape(emb_loss_mask, [-1]))\n    \n      self.loss_mask = loss_mask\n      self.input_mask = input_mask\n      self.total_loss = total_loss\n      self.emb_losses = emb_batch_loss  # Used in evaluation.\n      \n  def setup_inception_initializer(self):\n    \"\"\"Sets up the function to restore inception variables from checkpoint.\"\"\"\n    if self.mode != \"inference\":\n      # Restore inception variables only.\n      saver = tf.train.Saver(self.inception_variables)\n\n      def restore_fn(sess):\n        tf.logging.info(\"Restoring Inception variables from checkpoint %s\" %\n                        self.config.inception_checkpoint_file)\n        saver.restore(sess, self.config.inception_checkpoint_file)\n\n      self.init_fn = restore_fn\n\n  def setup_global_step(self):\n    \"\"\"Sets up the global step Tensor.\"\"\"\n    global_step = tf.Variable(\n        initial_value=0,\n        name=\"global_step\",\n        trainable=False,\n        collections=[tf.GraphKeys.GLOBAL_STEP, tf.GraphKeys.VARIABLES])\n\n    self.global_step = global_step\n\n  def build(self):\n    \"\"\"Creates all ops for training and evaluation.\"\"\"\n    self.build_inputs()\n    self.build_image_embeddings()\n    self.build_seq_embeddings()\n    self.build_model()\n    self.setup_inception_initializer()\n    self.setup_global_step()\n"
  },
  {
    "path": "polyvore/run_inference.py",
    "content": "# Copyright 2017 Xintong Han. All Rights Reserved.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n# ==============================================================================\n\"\"\"Run the inference of Bi-LSTM model given input images.\"\"\"\n\nfrom __future__ import absolute_import\nfrom __future__ import division\nfrom __future__ import print_function\n\nimport os\nimport json\n\nimport tensorflow as tf\nimport pickle as pkl\nimport numpy as np\nimport configuration\nimport polyvore_model_bi as polyvore_model\n\nFLAGS = tf.flags.FLAGS\n\ntf.flags.DEFINE_string(\"checkpoint_path\", \"\",\n                       \"Model checkpoint file or directory containing a \"\n                       \"model checkpoint file.\")\ntf.flags.DEFINE_string(\"json_file\", \"data/label/test-no-dup.json\",\n                       \"Json file containing the inference data.\")\ntf.flags.DEFINE_string(\"image_dir\", \"data/images\",\n                       \"Directory containing images.\")\ntf.flags.DEFINE_string(\"feature_file\", \"data/features/test_features.pkl\",\n                       \"Directory to save the features\")\ntf.flags.DEFINE_string(\"rnn_type\", \"\", \"Type of RNN.\")\n\n\ndef main(_):\n  if os.path.isfile(FLAGS.feature_file):\n    print(\"Feature file already exist.\")\n    return\n  # Build the inference graph.\n  g = tf.Graph()\n  with g.as_default():\n    model_config = configuration.ModelConfig()\n    model_config.rnn_type = FLAGS.rnn_type\n    model = polyvore_model.PolyvoreModel(model_config, mode=\"inference\")\n    model.build()\n    saver = tf.train.Saver()\n\n  g.finalize()\n  sess = tf.Session(graph=g)\n  saver.restore(sess, FLAGS.checkpoint_path)\n  test_json = json.load(open(FLAGS.json_file))\n  k = 0\n\n  # Save image ids and features in a dictionary.\n  test_features = dict()\n\n  for image_set in test_json:\n    set_id = image_set[\"set_id\"]\n    image_feat = []\n    image_rnn_feat = []\n    ids = []\n    k = k + 1\n    print(str(k) + \" : \" + set_id)\n    for image in image_set[\"items\"]:\n      filename = os.path.join(FLAGS.image_dir, set_id,\n                              str(image[\"index\"]) + \".jpg\")\n      with tf.gfile.GFile(filename, \"r\") as f:\n        image_feed = f.read()\n\n      [feat, rnn_feat] = sess.run([model.image_embeddings,\n                                   model.rnn_image_embeddings],\n                                  feed_dict={\"image_feed:0\": image_feed})\n      \n      image_name = set_id + \"_\" + str(image[\"index\"])\n      test_features[image_name] = dict()\n      test_features[image_name][\"image_feat\"] = np.squeeze(feat)\n      test_features[image_name][\"image_rnn_feat\"] = np.squeeze(rnn_feat)\n  \n  with open(FLAGS.feature_file, \"wb\") as f:\n    pkl.dump(test_features, f)\n\n\nif __name__ == \"__main__\":\n  tf.app.run()\n"
  },
  {
    "path": "polyvore/run_inference_siamese.py",
    "content": "# Copyright 2017 Xintong Han. All Rights Reserved.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n# ==============================================================================\n\"\"\"Run the inference of Siamese Network given input images.\"\"\"\n\nfrom __future__ import absolute_import\nfrom __future__ import division\nfrom __future__ import print_function\n\nimport os\nimport json\n\nimport tensorflow as tf\nimport pickle as pkl\nimport numpy as np\nimport configuration\nimport polyvore_model_siamese as polyvore_model\n\nFLAGS = tf.flags.FLAGS\n\ntf.flags.DEFINE_string(\"checkpoint_path\", \"\",\n                       \"Model checkpoint file or directory containing a \"\n                       \"model checkpoint file.\")\ntf.flags.DEFINE_string(\"json_file\", \"data/label/test-no-dup.json\",\n                       \"Json file containing the inference data.\")\ntf.flags.DEFINE_string(\"image_dir\", \"data/images\",\n                       \"Directory containing images.\")\ntf.flags.DEFINE_string(\"feature_file\",\n                       \"data/features/test_features_siamese.pkl\",\n                       \"Directory to save the features\")\n\n\ndef main(_):\n  if os.path.isfile(FLAGS.feature_file):\n    print(\"Feature file already exist.\")\n    return\n  # Build the inference graph.\n  g = tf.Graph()\n  with g.as_default():\n    model_config = configuration.ModelConfig()\n    model = polyvore_model.PolyvoreModel(model_config, mode=\"inference\")\n    model.build()\n    saver = tf.train.Saver()\n\n  g.finalize()\n  sess = tf.Session(graph=g)\n  saver.restore(sess, FLAGS.checkpoint_path)\n  test_json = json.load(open(FLAGS.json_file))\n  k = 0\n\n  # Save image ids and features in a dictionary.\n  test_features = dict()\n\n  for image_set in test_json:\n    set_id = image_set[\"set_id\"]\n    image_feat = []\n    image_rnn_feat = []\n    ids = []\n    k = k + 1\n    print(str(k) + \" : \" + set_id)\n    for image in image_set[\"items\"]:\n      filename = os.path.join(FLAGS.image_dir, set_id,\n                              str(image[\"index\"]) + \".jpg\")\n      with tf.gfile.GFile(filename, \"r\") as f:\n        image_feed = f.read()\n\n      [feat] = sess.run([model.image_embeddings],\n                         feed_dict={\"image_feed:0\": image_feed})\n      \n      image_name = set_id + \"_\" + str(image[\"index\"])\n      test_features[image_name] = dict()\n      test_features[image_name][\"image_feat\"] = np.squeeze(feat)\n  \n  with open(FLAGS.feature_file, \"wb\") as f:\n    pkl.dump(test_features, f)\n\n\nif __name__ == \"__main__\":\n  tf.app.run()\n"
  },
  {
    "path": "polyvore/run_inference_vse.py",
    "content": "# Copyright 2017 Xintong Han. All Rights Reserved.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n# ==============================================================================\n\"\"\"Run the inference of Siamese Network given input images.\"\"\"\n\nfrom __future__ import absolute_import\nfrom __future__ import division\nfrom __future__ import print_function\n\nimport os\nimport json\n\nimport tensorflow as tf\nimport pickle as pkl\nimport numpy as np\nimport configuration\nimport polyvore_model_vse as polyvore_model\n\nFLAGS = tf.flags.FLAGS\n\ntf.flags.DEFINE_string(\"checkpoint_path\", \"\",\n                       \"Model checkpoint file or directory containing a \"\n                       \"model checkpoint file.\")\ntf.flags.DEFINE_string(\"json_file\", \"data/label/test-no-dup.json\",\n                       \"Json file containing the inference data.\")\ntf.flags.DEFINE_string(\"image_dir\", \"data/images\",\n                       \"Directory containing images.\")\ntf.flags.DEFINE_string(\"feature_file\",\n                       \"data/features/test_features_siamese.pkl\",\n                       \"Directory to save the features\")\n\n\ndef main(_):\n  if os.path.isfile(FLAGS.feature_file):\n    print(\"Feature file already exist.\")\n    return\n  # Build the inference graph.\n  g = tf.Graph()\n  with g.as_default():\n    model_config = configuration.ModelConfig()\n    model = polyvore_model.PolyvoreModel(model_config, mode=\"inference\")\n    model.build()\n    saver = tf.train.Saver()\n\n  g.finalize()\n  sess = tf.Session(graph=g)\n  saver.restore(sess, FLAGS.checkpoint_path)\n  test_json = json.load(open(FLAGS.json_file))\n  k = 0\n\n  # Save image ids and features in a dictionary.\n  test_features = dict()\n\n  for image_set in test_json:\n    set_id = image_set[\"set_id\"]\n    image_feat = []\n    image_rnn_feat = []\n    ids = []\n    k = k + 1\n    print(str(k) + \" : \" + set_id)\n    for image in image_set[\"items\"]:\n      filename = os.path.join(FLAGS.image_dir, set_id,\n                              str(image[\"index\"]) + \".jpg\")\n      with tf.gfile.GFile(filename, \"r\") as f:\n        image_feed = f.read()\n\n      [feat] = sess.run([model.image_embeddings],\n                         feed_dict={\"image_feed:0\": image_feed})\n      \n      image_name = set_id + \"_\" + str(image[\"index\"])\n      test_features[image_name] = dict()\n      test_features[image_name][\"image_feat\"] = np.squeeze(feat)\n  \n  with open(FLAGS.feature_file, \"wb\") as f:\n    pkl.dump(test_features, f)\n\n\nif __name__ == \"__main__\":\n  tf.app.run()\n"
  },
  {
    "path": "polyvore/set_generation.py",
    "content": "# Copyright 2017 Xintong Han. All Rights Reserved.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n# ==============================================================================\nr\"\"\"Given multimodal queries, complete the outfit wiht bi-LSTM and VSE model.\"\"\"\n\nfrom __future__ import absolute_import\nfrom __future__ import division\nfrom __future__ import print_function\n\nimport json\nimport math\nimport os\n\nimport pickle as pkl\nimport tensorflow as tf\nimport numpy as np\nimport configuration\nimport polyvore_model_bi as polyvore_model\n\n\nFLAGS = tf.flags.FLAGS\n\ntf.flags.DEFINE_string(\"checkpoint_path\", \"\",\n                       \"Model checkpoint file or directory containing a \"\n                       \"model checkpoint file.\")\ntf.flags.DEFINE_string(\"image_dir\", \"\", \"Directory containing images.\")\ntf.flags.DEFINE_string(\"feature_file\", \"\", \"File which contains the features.\")\ntf.flags.DEFINE_string(\"word_dict_file\", \"\", \"File containing word list.\")\n\ntf.flags.DEFINE_string(\"query_file\", \"\",\n                       \"A json file containing the query to generate outfit.\")\ntf.flags.DEFINE_string(\"result_dir\", \"results\",\n                       \"Directory to save the results.\")\ntf.flags.DEFINE_float(\"balance_factor\", 2.0,\n        \"Trade off between image and text input.\"\n        \"Larger balance_factor encourages higher correlation with text query\")\n\n\ndef norm_row(a):\n  \"\"\"L2 normalize each row of a given set.\"\"\"\n  try:\n    return a / np.linalg.norm(a, axis=1)[:, np.newaxis]\n  except:\n    return a / np.linalg.norm(a)\n\ndef rnn_one_step(sess, input_feed, lstm_state, direction='f'):\n  \"\"\"Run one step of the RNN.\"\"\"\n  if direction == 'f':\n    # Forward\n    [lstm_state, lstm_output] = sess.run(\n        fetches=['lstm/f_state:0', 'f_logits/f_logits/BiasAdd:0'],\n        feed_dict={'lstm/f_input_feed:0': input_feed,\n                   'lstm/f_state_feed:0': lstm_state})\n  else:\n    # Backward\n    [lstm_state, lstm_output] = sess.run(\n        fetches=['lstm/b_state:0', 'b_logits/b_logits/BiasAdd:0'],\n        feed_dict={'lstm/b_input_feed:0': input_feed,\n                   'lstm/b_state_feed:0': lstm_state})\n    \n  return lstm_state, lstm_output\n\n\ndef run_forward_rnn(sess, test_idx, test_feat, num_lstm_units):\n  \"\"\" Run forward RNN given a query.\"\"\"\n  res_set = []\n  lstm_state = np.zeros([1, 2 * num_lstm_units])\n  for test_id in test_idx:\n    input_feed = np.reshape(test_feat[test_id], [1, -1])\n    # Run first step with all zeros initial state.\n    [lstm_state, lstm_output] = rnn_one_step(\n          sess, input_feed, lstm_state, direction='f')\n\n  # Maximum length of the outfit is set to 10.\n  for step in range(10):\n    curr_score = np.exp(np.dot(lstm_output, np.transpose(test_feat)))\n    curr_score /= np.sum(curr_score)\n\n    next_image = np.argsort(-curr_score)[0][0]\n    # 0.00001 is used as a probablity threshold to stop the generation.\n    # i.e, if the prob of end-of-set is larger than 0.00001, then stop.\n    if next_image == test_feat.shape[0] - 1 or curr_score[0][-1] > 0.00001:\n      # print('OVER')\n      break\n    else:\n      input_feed = np.reshape(test_feat[next_image], [1, -1])\n      [lstm_state, lstm_output] = rnn_one_step(\n            sess, input_feed, lstm_state, direction='f')\n      res_set.append(next_image)\n\n  return res_set\n\n\ndef run_backward_rnn(sess, test_idx, test_feat, num_lstm_units):\n  \"\"\" Run backward RNN given a query.\"\"\"\n  res_set = []\n  lstm_state = np.zeros([1, 2 * num_lstm_units])\n  for test_id in reversed(test_idx):\n    input_feed = np.reshape(test_feat[test_id], [1, -1])\n    [lstm_state, lstm_output] = rnn_one_step(\n          sess, input_feed, lstm_state, direction='b')\n  for step in range(10):\n    curr_score = np.exp(np.dot(lstm_output, np.transpose(test_feat)))\n    curr_score /= np.sum(curr_score)\n    next_image = np.argsort(-curr_score)[0][0]\n    # 0.00001 is used as a probablity threshold to stop the generation.\n    # i.e, if the prob of end-of-set is larger than 0.00001, then stop.\n    if next_image == test_feat.shape[0] - 1 or curr_score[0][-1] > 0.00001:\n      # print('OVER')\n      break\n    else:\n      input_feed = np.reshape(test_feat[next_image], [1, -1])\n      [lstm_state, lstm_output] = rnn_one_step(\n          sess, input_feed, lstm_state, direction='b')\n      res_set.append(next_image)\n\n  return res_set\n\n\ndef run_fill_rnn(sess, start_id, end_id, num_blank, test_feat, num_lstm_units):\n  \"\"\"Fill in the blanks between start and end.\"\"\"\n  if num_blank == 0:\n    return [start_id, end_id]\n  lstm_f_outputs = []\n  lstm_state = np.zeros([1, 2 * num_lstm_units])\n  input_feed = np.reshape(test_feat[start_id], [1, -1])\n  [lstm_state, lstm_output] = rnn_one_step(\n        sess, input_feed, lstm_state, direction='f')\n\n  f_outputs = []\n  for i in range(num_blank):\n    f_outputs.append(lstm_output[0])\n    curr_score = np.exp(np.dot(lstm_output, np.transpose(test_feat)))\n    curr_score /= np.sum(curr_score)\n    next_image = np.argsort(-curr_score)[0][0]\n    input_feed = np.reshape(test_feat[next_image], [1, -1])\n    [lstm_state, lstm_output] = rnn_one_step(\n          sess, input_feed, lstm_state, direction='f')\n\n  lstm_state = np.zeros([1, 2 * num_lstm_units])\n  input_feed = np.reshape(test_feat[end_id], [1, -1])\n  [lstm_state, lstm_output] = rnn_one_step(\n        sess, input_feed, lstm_state, direction='b')\n\n  b_outputs = []\n  for i in range(num_blank):\n    b_outputs.insert(0, lstm_output[0])\n    curr_score = np.exp(np.dot(lstm_output, np.transpose(test_feat)))\n    curr_score /= np.sum(curr_score)\n    next_image = np.argsort(-curr_score)[0][0]\n    input_feed = np.reshape(test_feat[next_image], [1, -1])\n    [lstm_state, lstm_output] = rnn_one_step(\n          sess, input_feed, lstm_state, direction='b')\n\n  outputs = np.asarray(f_outputs) + np.asarray(b_outputs)\n  score = np.exp(np.dot(outputs, np.transpose(test_feat)))\n  score /= np.sum(score, axis=1)[:, np.newaxis]\n  blank_ids = np.argmax(score, axis=1)\n  return [start_id] + list(blank_ids) + [end_id]\n\n\ndef run_set_inference(sess, set_name, test_ids, test_feat, num_lstm_units):\n  test_idx = []\n  for name in set_name:\n    try:\n      test_idx.append(test_ids.index(name))\n    except:\n      print('not found')\n      return\n\n  # dynamic search\n  # run the whole bi-LSTM on the first item\n  first_f_set = run_forward_rnn(sess, test_idx[:1], test_feat, num_lstm_units)\n  first_b_set = run_backward_rnn(sess, test_idx[:1], test_feat, num_lstm_units)\n\n  first_posi = len(first_b_set)\n  first_set = first_b_set + test_idx[:1] + first_f_set\n\n  image_set = []\n  for i in first_set:\n    image_set.append(test_ids[i])\n\n  # # Write results into folder.\n  # os.system('mkdir %s/%s' % (FLAGS.result_dir, 'first'))\n  # for i, image in enumerate(image_set):\n  #   name = image.split('_')\n  #   os.system('cp %s/%s/%s.jpg %s/%s/%d_%s.jpg' % (FLAGS.image_dir,\n  #             name[0], name[1], FLAGS.result_dir, 'first', i, image))\n\n  if len(set_name) >= 2:\n    current_set = norm_row(test_feat[first_set, :])\n    all_position = [first_posi]\n    for test_id in test_idx[1:]:\n      # gradually adding items into it\n      # findng nn of the next item\n      insert_posi = np.argmax(\n          np.dot(norm_row(test_feat[test_id, :]), np.transpose(current_set)))\n      all_position.append(insert_posi)\n\n    # run bi LSTM to fill items between first item and this item\n    start_posi = np.min(all_position)\n    end_posi = np.max(all_position)\n\n    sets = run_fill_rnn(sess, test_idx[0], test_idx[1],\n                        end_posi - start_posi - 1, test_feat, num_lstm_units)\n\n  else:\n    # run bi LSTM again\n    sets = test_idx\n  f_set = run_forward_rnn(sess, sets, test_feat, num_lstm_units)\n  b_set = run_backward_rnn(sess, sets, test_feat, num_lstm_units)\n\n  image_set = []\n  for i in b_set[::-1] + sets+f_set:\n    image_set.append(test_ids[i])\n\n  # os.system('mkdir %s/%s' % (FLAGS.result_dir, 'final'))\n  # for i, image in enumerate(image_set):\n  #   name = image.split('_')\n  #   os.system('cp %s/%s/%s.jpg %s/%s/%d_%s.jpg' % (FLAGS.image_dir,\n  #                   name[0], name[1], FLAGS.result_dir, 'final', i, image))\n\n  return b_set[::-1] + sets + f_set\n\n\ndef nn_search(i, test_emb, word_vec):\n  # score = np.dot(test_emb, np.transpose(test_emb[i] + word_vec))\n  score = np.dot(test_emb,\n        np.transpose(test_emb[i] + FLAGS.balance_factor * word_vec))\n  return np.argmax(score)\n\n\ndef main(_):\n  # Build the inference graph.\n  g = tf.Graph()\n  with g.as_default():\n    model_config = configuration.ModelConfig()\n    model = polyvore_model.PolyvoreModel(model_config, mode=\"inference\")\n    model.build()\n    saver = tf.train.Saver()\n\n    g.finalize()\n    with tf.Session() as sess:\n      saver.restore(sess, FLAGS.checkpoint_path)\n      with open(FLAGS.feature_file, \"rb\") as f:\n        test_data = pkl.load(f)\n\n      test_ids = test_data.keys()\n      test_feat = np.zeros((len(test_ids) + 1,\n                            len(test_data[test_ids[0]][\"image_rnn_feat\"])))\n      test_emb = np.zeros((len(test_ids),\n                           len(test_data[test_ids[0]][\"image_feat\"])))\n\n      for i, test_id in enumerate(test_ids):\n        # Image feature in the RNN space.\n        test_feat[i] = test_data[test_id][\"image_rnn_feat\"]\n        # Image feature in the joint embedding space.\n        test_emb[i] = test_data[test_id][\"image_feat\"]\n\n      test_emb = norm_row(test_emb)\n\n      # load queries from JSON file\n      queries = json.load(open(FLAGS.query_file))\n      # Get the word embedding.\n      [word_emb] = sess.run([model.embedding_map])\n\n      # Read word name\n      words = open(FLAGS.word_dict_file).read().splitlines()\n      for i, w in enumerate(words):\n        words[i] = w.split()[0]\n\n      # Calculate the embedding of the word query\n      # only run the first query for demo\n      for q in queries[:1]:\n        set_name = q['image_query']\n        print(set_name)\n        # Run Bi-LSTM model using the image query.\n        rnn_sets = run_set_inference(sess, set_name, test_ids,\n                                     test_feat, model_config.num_lstm_units)\n        print(rnn_sets)\n\n        # Reranking the LSTM prediction with similarity with the text query        \n        word_query = str(q['text_query'])\n        print(word_query)\n        if word_query != \"\":\n          # Get the indices of images.\n          test_idx = []\n          for name in set_name:\n            try:\n              test_idx.append(test_ids.index(name))\n            except:\n              print('not found')\n              return\n\n          # Calculate the word embedding\n          word_query = [i+1 for i in range(len(words))\n                            if words[i] in word_query.split()]\n          print(word_query)\n          query_emb = norm_row(np.sum(word_emb[word_query], axis=0))\n          for i, j in enumerate(rnn_sets):\n            if j not in test_idx:\n              rnn_sets[i] = nn_search(j, test_emb, query_emb)\n          print(rnn_sets)\n\n        # write images          \n        image_set = []\n        for i in rnn_sets:\n          image_set.append(test_ids[i])\n\n        # write results\n        # os.system('mkdir %s/%s' % (FLAGS.result_dir, 'emb_final'))\n        # for i, image in enumerate(image_set):\n        #   name = image.split('_')\n        #   os.system('cp %s/%s/%s.jpg %s/%s/%d_%s.jpg' % (FLAGS.image_dir,\n        #       name[0], name[1], FLAGS.result_dir, 'emb_final', i, image))\n  \n        for i, image in enumerate(image_set):\n          name = image.split('_')\n          os.system('cp %s/%s/%s.jpg %s/%d_%s.jpg' % (FLAGS.image_dir,\n              name[0], name[1], FLAGS.result_dir, i, image))\n\nif __name__ == \"__main__\":\n  tf.app.run()\n"
  },
  {
    "path": "polyvore/train.py",
    "content": "# Copyright 2017 Xintong Han. All Rights Reserved.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n# ==============================================================================\n\n\"\"\"Train the model.\"\"\"\n\nfrom __future__ import absolute_import\nfrom __future__ import division\nfrom __future__ import print_function\n\nimport tensorflow as tf\n\nimport configuration\nimport polyvore_model_bi as polyvore_model\n\nFLAGS = tf.app.flags.FLAGS\n\ntf.flags.DEFINE_string(\"input_file_pattern\", \"\",\n                       \"File pattern of sharded TFRecord input files.\")\ntf.flags.DEFINE_string(\"inception_checkpoint_file\", \"\",\n                       \"Path to a pretrained inception_v3 model.\")\ntf.flags.DEFINE_string(\"train_dir\", \"\",\n                       \"Directory for saving and loading model checkpoints.\")\ntf.flags.DEFINE_boolean(\"train_inception\", False,\n                        \"Whether to train inception submodel variables.\")\ntf.flags.DEFINE_integer(\"number_of_steps\", 1000000, \"Number of training steps.\")\ntf.flags.DEFINE_integer(\"log_every_n_steps\", 1,\n                        \"Frequency at which loss and global step are logged.\")\n\ntf.logging.set_verbosity(tf.logging.INFO)\n\n\ndef main(unused_argv):\n  assert FLAGS.input_file_pattern, \"--input_file_pattern is required\"\n  assert FLAGS.train_dir, \"--train_dir is required\"\n\n  model_config = configuration.ModelConfig()\n  model_config.input_file_pattern = FLAGS.input_file_pattern\n  model_config.inception_checkpoint_file = FLAGS.inception_checkpoint_file\n\n  training_config = configuration.TrainingConfig()\n\n  # Create training directory.\n  train_dir = FLAGS.train_dir\n  if not tf.gfile.IsDirectory(train_dir):\n    tf.logging.info(\"Creating training directory: %s\", train_dir)\n    tf.gfile.MakeDirs(train_dir)\n\n  # Build the TensorFlow graph.\n  g = tf.Graph()\n  with g.as_default():\n    # Build the model.\n    model = polyvore_model.PolyvoreModel(\n        model_config, mode=\"train\", train_inception=FLAGS.train_inception)\n    model.build()\n    learning_rate = tf.constant(training_config.initial_learning_rate)\n    \n    learning_rate_decay_fn = None\n    if training_config.learning_rate_decay_factor > 0:\n      num_batches_per_epoch = (training_config.num_examples_per_epoch /\n                               model_config.batch_size)\n      decay_steps = int(num_batches_per_epoch *\n                        training_config.num_epochs_per_decay)\n\n      def _learning_rate_decay_fn(learning_rate, global_step):\n        return tf.train.exponential_decay(\n            learning_rate,\n            global_step,\n            decay_steps=decay_steps,\n            decay_rate=training_config.learning_rate_decay_factor,\n            staircase=True)\n\n      learning_rate_decay_fn = _learning_rate_decay_fn\n\n    # Set up the training ops.\n    train_op = tf.contrib.layers.optimize_loss(\n        loss=model.total_loss,\n        global_step=model.global_step,\n        learning_rate=learning_rate,\n        optimizer=training_config.optimizer,\n        clip_gradients=training_config.clip_gradients,\n        learning_rate_decay_fn=learning_rate_decay_fn)\n\n    # Set up the Saver for saving and restoring model checkpoints.\n    saver = tf.train.Saver(max_to_keep=training_config.max_checkpoints_to_keep)\n\n  # Run training.\n  tf.contrib.slim.learning.train(\n      train_op,\n      train_dir,\n      log_every_n_steps=FLAGS.log_every_n_steps,\n      graph=g,\n      global_step=model.global_step,\n      number_of_steps=FLAGS.number_of_steps,\n      init_fn=model.init_fn,\n      saver=saver)\n\n\nif __name__ == \"__main__\":\n  tf.app.run()\n"
  },
  {
    "path": "polyvore/train_siamese.py",
    "content": "# Copyright 2017 Xintong Han. All Rights Reserved.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n# ==============================================================================\n\"\"\"Train the Siamese Network.\"\"\"\n\nfrom __future__ import absolute_import\nfrom __future__ import division\nfrom __future__ import print_function\n\nimport tensorflow as tf\n\nimport configuration\nimport polyvore_model_siamese as polyvore_model\n\nFLAGS = tf.app.flags.FLAGS\n\ntf.flags.DEFINE_string(\"input_file_pattern\", \"\",\n                       \"File pattern of sharded TFRecord input files.\")\ntf.flags.DEFINE_string(\"inception_checkpoint_file\", \"\",\n                       \"Path to a pretrained inception_v3 model.\")\ntf.flags.DEFINE_string(\"train_dir\", \"\",\n                       \"Directory for saving and loading model checkpoints.\")\ntf.flags.DEFINE_boolean(\"train_inception\", False,\n                        \"Whether to train inception submodel variables.\")\ntf.flags.DEFINE_integer(\"number_of_steps\", 1000000, \"Number of training steps.\")\ntf.flags.DEFINE_integer(\"log_every_n_steps\", 1,\n                        \"Frequency at which loss and global step are logged.\")\n\ntf.flags.DEFINE_float(\"learning_rate\", 0.2, \"Initial learning rate.\")\n\ntf.flags.DEFINE_string(\"rnn_type\", \"\",\n                       \"Types of rnn, lstm, gru or basic rnn.\")\n\n\ntf.logging.set_verbosity(tf.logging.INFO)\n\n\ndef main(unused_argv):\n  assert FLAGS.input_file_pattern, \"--input_file_pattern is required\"\n  assert FLAGS.train_dir, \"--train_dir is required\"\n\n  model_config = configuration.ModelConfig()\n  model_config.input_file_pattern = FLAGS.input_file_pattern\n  model_config.inception_checkpoint_file = FLAGS.inception_checkpoint_file\n\n  training_config = configuration.TrainingConfig()\n  # May use a different learning rate\n  training_config.initial_learning_rate = FLAGS.learning_rate\n  \n  # Create training directory.\n  train_dir = FLAGS.train_dir\n  if not tf.gfile.IsDirectory(train_dir):\n    tf.logging.info(\"Creating training directory: %s\", train_dir)\n    tf.gfile.MakeDirs(train_dir)\n\n  # Build the TensorFlow graph.\n  g = tf.Graph()\n  with g.as_default():\n    # Build the model.\n    model = polyvore_model.PolyvoreModel(\n        model_config, mode=\"train\", train_inception=FLAGS.train_inception)\n    model.build()\n\n    \n    # Set up the learning rate.\n    \n    learning_rate = tf.constant(training_config.initial_learning_rate)\n    learning_rate_decay_fn = None\n    if training_config.learning_rate_decay_factor > 0:\n      num_batches_per_epoch = (training_config.num_examples_per_epoch /\n                               model_config.batch_size)\n      decay_steps = int(num_batches_per_epoch *\n                        training_config.num_epochs_per_decay)\n\n      def _learning_rate_decay_fn(learning_rate, global_step):\n        return tf.train.exponential_decay(\n            learning_rate,\n            global_step,\n            decay_steps=decay_steps,\n            decay_rate=training_config.learning_rate_decay_factor,\n            staircase=True)\n\n      learning_rate_decay_fn = _learning_rate_decay_fn\n\n    # Set up the training ops.\n    train_op = tf.contrib.layers.optimize_loss(\n        loss=model.total_loss,\n        global_step=model.global_step,\n        learning_rate=learning_rate,\n        optimizer=training_config.optimizer,\n        clip_gradients=training_config.clip_gradients,\n        learning_rate_decay_fn=learning_rate_decay_fn)\n\n    # Set up the Saver for saving and restoring model checkpoints.\n    saver = tf.train.Saver(max_to_keep=training_config.max_checkpoints_to_keep)\n    # saver =  tf.train.Saver(keep_checkpoint_every_n_hours=0.1)\n\n  # Run training.\n  tf.contrib.slim.learning.train(\n      train_op,\n      train_dir,\n      log_every_n_steps=FLAGS.log_every_n_steps,\n      graph=g,\n      global_step=model.global_step,\n      number_of_steps=FLAGS.number_of_steps,\n      init_fn=model.init_fn,\n      saver=saver)\n\n\nif __name__ == \"__main__\":\n  tf.app.run()\n"
  },
  {
    "path": "predict_compatibility.sh",
    "content": "#!/bin/bash\nCHECKPOINT_DIR=\"model/model_final/model.ckpt-34865\"\n\npython polyvore/fashion_compatibility.py \\\n  --checkpoint_path=${CHECKPOINT_DIR} \\\n  --label_file=\"data/label/fashion_compatibility_prediction.txt\" \\\n  --feature_file=\"data/features/test_features.pkl\" \\\n  --rnn_type=\"lstm\" \\\n  --direction=\"2\" \\\n  --result_file=\"fashion_compatibility.pkl\"\n"
  },
  {
    "path": "query.json",
    "content": "[\n    {\n        \"image_query\": [\n            \"131138376_1\",\n            \"131138376_3\"\n        ],\n        \"text_query\": \"blue\"\n    }\n]\n"
  },
  {
    "path": "results/README.md",
    "content": "The generated outfit goes here.\n"
  },
  {
    "path": "train.sh",
    "content": "#!/bin/bash\n\n# Inception v3 checkpoint file.\nINCEPTION_CHECKPOINT=\"model/inception_v3.ckpt\"\n\n# Directory to save the model.\nMODEL_DIR=\"model/bi_lstm/\"\n\n# Run the training code.\npython polyvore/train.py \\\n  --input_file_pattern=\"data/tf_records/train-no-dup-?????-of-00128\" \\\n  --inception_checkpoint_file=\"${INCEPTION_CHECKPOINT}\" \\\n  --train_dir=\"${MODEL_DIR}/train\" \\\n  --train_inception=true \\\n  --number_of_steps=100000\n\n\n# # Training Siamese Network\n# # Directory to save the model.\n# MODEL_DIR=\"model/siamese/\"\n\n# # Run the training code.\n# python polyvore/train_siamese.py \\\n#   --input_file_pattern=\"data/tf_records/train-no-dup-?????-of-00128\" \\\n#   --inception_checkpoint_file=\"${INCEPTION_CHECKPOINT}\" \\\n#   --train_dir=\"${MODEL_DIR}/train\" \\\n#   --train_inception=true \\\n#   --number_of_steps=100000"
  }
]