Repository: xthan/polyvore
Branch: master
Commit: dd9e6cc450a6
Files: 32
Total size: 196.8 KB

Directory structure:
gitextract_bx3trk6u/

├── .gitignore
├── Dockerfile
├── LICENSE
├── README.md
├── data/
│   ├── build_polyvore_data.py
│   ├── features/
│   │   └── README.md
│   └── final_word_dict.txt
├── extract_feature.sh
├── fill_in_blank.sh
├── outfit_generation.sh
├── polyvore/
│   ├── configuration.py
│   ├── fashion_compatibility.py
│   ├── fill_in_blank.py
│   ├── fill_in_blank_siamese.py
│   ├── ops/
│   │   ├── __init__.py
│   │   ├── image_embedding.py
│   │   ├── image_embedding_test.py
│   │   ├── image_processing.py
│   │   └── inputs.py
│   ├── polyvore_model_bi.py
│   ├── polyvore_model_siamese.py
│   ├── polyvore_model_vse.py
│   ├── run_inference.py
│   ├── run_inference_siamese.py
│   ├── run_inference_vse.py
│   ├── set_generation.py
│   ├── train.py
│   └── train_siamese.py
├── predict_compatibility.sh
├── query.json
├── results/
│   └── README.md
└── train.sh

================================================
FILE CONTENTS
================================================

================================================
FILE: .gitignore
================================================
# Compiled source #
###################
*.com
*.class
*.dll
*.exe
*.o
*.so
*.pyc
*.mat
*.png
*.jpg

# Packages #
############
# it's better to unpack these files and commit the raw source
# git has its own built in compression methods
*.7z
*.dmg
*.gz
*.iso
*.jar
*.rar
*.tar
*.zip
*~

.gitlab
.github
data/label/*
data/tf_records/*
model/*


# Logs and databases #
######################
*.log
*.sql
*.sqlite
*.out

# OS generated files #
######################
.DS_Store
.DS_Store?
._*
.Spotlight-V100
.Trashes
ehthumbs.db
Thumbs.db


================================================
FILE: Dockerfile
================================================
FROM nvidia/cuda:8.0-cudnn5-devel

# Pick up some TF dependencies
RUN apt-get update && apt-get install -y --no-install-recommends \
        build-essential \
        curl \
        libfreetype6-dev \
        libpng12-dev \
        libzmq3-dev \
        pkg-config \
        python \
        python-dev \
        rsync \
        software-properties-common \
        unzip \
        && \
    apt-get clean && \
    rm -rf /var/lib/apt/lists/*

RUN curl -O https://bootstrap.pypa.io/get-pip.py && \
    python get-pip.py && \
    rm get-pip.py

RUN pip --no-cache-dir install \
        ipykernel \
        jupyter \
        matplotlib \
        numpy \
        scipy \
		scikit-learn \
        && \
    python -m ipykernel.kernelspec

ENV TENSORFLOW_VERSION 0.11.0

# --- DO NOT EDIT OR DELETE BETWEEN THE LINES --- #
# These lines will be edited automatically by parameterized_docker_build.sh. #
# COPY _PIP_FILE_ /
# RUN pip --no-cache-dir install /_PIP_FILE_
# RUN rm -f /_PIP_FILE_

# Install TensorFlow GPU version.
RUN pip --no-cache-dir install \
    http://storage.googleapis.com/tensorflow/linux/gpu/tensorflow-${TENSORFLOW_VERSION}-cp27-none-linux_x86_64.whl
# --- ~ DO NOT EDIT OR DELETE BETWEEN THE LINES --- #

# TensorBoard
EXPOSE 6006
# IPython
EXPOSE 8888

WORKDIR "/root"

CMD ["/bin/bash"]


================================================
FILE: LICENSE
================================================
                                 Apache License
                           Version 2.0, January 2004
                        http://www.apache.org/licenses/

   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION

   1. Definitions.

      "License" shall mean the terms and conditions for use, reproduction,
      and distribution as defined by Sections 1 through 9 of this document.

      "Licensor" shall mean the copyright owner or entity authorized by
      the copyright owner that is granting the License.

      "Legal Entity" shall mean the union of the acting entity and all
      other entities that control, are controlled by, or are under common
      control with that entity. For the purposes of this definition,
      "control" means (i) the power, direct or indirect, to cause the
      direction or management of such entity, whether by contract or
      otherwise, or (ii) ownership of fifty percent (50%) or more of the
      outstanding shares, or (iii) beneficial ownership of such entity.

      "You" (or "Your") shall mean an individual or Legal Entity
      exercising permissions granted by this License.

      "Source" form shall mean the preferred form for making modifications,
      including but not limited to software source code, documentation
      source, and configuration files.

      "Object" form shall mean any form resulting from mechanical
      transformation or translation of a Source form, including but
      not limited to compiled object code, generated documentation,
      and conversions to other media types.

      "Work" shall mean the work of authorship, whether in Source or
      Object form, made available under the License, as indicated by a
      copyright notice that is included in or attached to the work
      (an example is provided in the Appendix below).

      "Derivative Works" shall mean any work, whether in Source or Object
      form, that is based on (or derived from) the Work and for which the
      editorial revisions, annotations, elaborations, or other modifications
      represent, as a whole, an original work of authorship. For the purposes
      of this License, Derivative Works shall not include works that remain
      separable from, or merely link (or bind by name) to the interfaces of,
      the Work and Derivative Works thereof.

      "Contribution" shall mean any work of authorship, including
      the original version of the Work and any modifications or additions
      to that Work or Derivative Works thereof, that is intentionally
      submitted to Licensor for inclusion in the Work by the copyright owner
      or by an individual or Legal Entity authorized to submit on behalf of
      the copyright owner. For the purposes of this definition, "submitted"
      means any form of electronic, verbal, or written communication sent
      to the Licensor or its representatives, including but not limited to
      communication on electronic mailing lists, source code control systems,
      and issue tracking systems that are managed by, or on behalf of, the
      Licensor for the purpose of discussing and improving the Work, but
      excluding communication that is conspicuously marked or otherwise
      designated in writing by the copyright owner as "Not a Contribution."

      "Contributor" shall mean Licensor and any individual or Legal Entity
      on behalf of whom a Contribution has been received by Licensor and
      subsequently incorporated within the Work.

   2. Grant of Copyright License. Subject to the terms and conditions of
      this License, each Contributor hereby grants to You a perpetual,
      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
      copyright license to reproduce, prepare Derivative Works of,
      publicly display, publicly perform, sublicense, and distribute the
      Work and such Derivative Works in Source or Object form.

   3. Grant of Patent License. Subject to the terms and conditions of
      this License, each Contributor hereby grants to You a perpetual,
      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
      (except as stated in this section) patent license to make, have made,
      use, offer to sell, sell, import, and otherwise transfer the Work,
      where such license applies only to those patent claims licensable
      by such Contributor that are necessarily infringed by their
      Contribution(s) alone or by combination of their Contribution(s)
      with the Work to which such Contribution(s) was submitted. If You
      institute patent litigation against any entity (including a
      cross-claim or counterclaim in a lawsuit) alleging that the Work
      or a Contribution incorporated within the Work constitutes direct
      or contributory patent infringement, then any patent licenses
      granted to You under this License for that Work shall terminate
      as of the date such litigation is filed.

   4. Redistribution. You may reproduce and distribute copies of the
      Work or Derivative Works thereof in any medium, with or without
      modifications, and in Source or Object form, provided that You
      meet the following conditions:

      (a) You must give any other recipients of the Work or
          Derivative Works a copy of this License; and

      (b) You must cause any modified files to carry prominent notices
          stating that You changed the files; and

      (c) You must retain, in the Source form of any Derivative Works
          that You distribute, all copyright, patent, trademark, and
          attribution notices from the Source form of the Work,
          excluding those notices that do not pertain to any part of
          the Derivative Works; and

      (d) If the Work includes a "NOTICE" text file as part of its
          distribution, then any Derivative Works that You distribute must
          include a readable copy of the attribution notices contained
          within such NOTICE file, excluding those notices that do not
          pertain to any part of the Derivative Works, in at least one
          of the following places: within a NOTICE text file distributed
          as part of the Derivative Works; within the Source form or
          documentation, if provided along with the Derivative Works; or,
          within a display generated by the Derivative Works, if and
          wherever such third-party notices normally appear. The contents
          of the NOTICE file are for informational purposes only and
          do not modify the License. You may add Your own attribution
          notices within Derivative Works that You distribute, alongside
          or as an addendum to the NOTICE text from the Work, provided
          that such additional attribution notices cannot be construed
          as modifying the License.

      You may add Your own copyright statement to Your modifications and
      may provide additional or different license terms and conditions
      for use, reproduction, or distribution of Your modifications, or
      for any such Derivative Works as a whole, provided Your use,
      reproduction, and distribution of the Work otherwise complies with
      the conditions stated in this License.

   5. Submission of Contributions. Unless You explicitly state otherwise,
      any Contribution intentionally submitted for inclusion in the Work
      by You to the Licensor shall be under the terms and conditions of
      this License, without any additional terms or conditions.
      Notwithstanding the above, nothing herein shall supersede or modify
      the terms of any separate license agreement you may have executed
      with Licensor regarding such Contributions.

   6. Trademarks. This License does not grant permission to use the trade
      names, trademarks, service marks, or product names of the Licensor,
      except as required for reasonable and customary use in describing the
      origin of the Work and reproducing the content of the NOTICE file.

   7. Disclaimer of Warranty. Unless required by applicable law or
      agreed to in writing, Licensor provides the Work (and each
      Contributor provides its Contributions) on an "AS IS" BASIS,
      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
      implied, including, without limitation, any warranties or conditions
      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
      PARTICULAR PURPOSE. You are solely responsible for determining the
      appropriateness of using or redistributing the Work and assume any
      risks associated with Your exercise of permissions under this License.

   8. Limitation of Liability. In no event and under no legal theory,
      whether in tort (including negligence), contract, or otherwise,
      unless required by applicable law (such as deliberate and grossly
      negligent acts) or agreed to in writing, shall any Contributor be
      liable to You for damages, including any direct, indirect, special,
      incidental, or consequential damages of any character arising as a
      result of this License or out of the use or inability to use the
      Work (including but not limited to damages for loss of goodwill,
      work stoppage, computer failure or malfunction, or any and all
      other commercial damages or losses), even if such Contributor
      has been advised of the possibility of such damages.

   9. Accepting Warranty or Additional Liability. While redistributing
      the Work or Derivative Works thereof, You may choose to offer,
      and charge a fee for, acceptance of support, warranty, indemnity,
      or other liability obligations and/or rights consistent with this
      License. However, in accepting such obligations, You may act only
      on Your own behalf and on Your sole responsibility, not on behalf
      of any other Contributor, and only if You agree to indemnify,
      defend, and hold each Contributor harmless for any liability
      incurred by, or claims asserted against, such Contributor by reason
      of your accepting any such warranty or additional liability.

   END OF TERMS AND CONDITIONS

   APPENDIX: How to apply the Apache License to your work.

      To apply the Apache License to your work, attach the following
      boilerplate notice, with the fields enclosed by brackets "{}"
      replaced with your own identifying information. (Don't include
      the brackets!)  The text should be enclosed in the appropriate
      comment syntax for the file format. We also recommend that a
      file or class name and description of purpose be included on the
      same "printed page" as the copyright notice for easier
      identification within third-party archives.

   Copyright 2017 Xintong Han
   
   Licensed under the Apache License, Version 2.0 (the "License");
   you may not use this file except in compliance with the License.
   You may obtain a copy of the License at

       http://www.apache.org/licenses/LICENSE-2.0

   Unless required by applicable law or agreed to in writing, software
   distributed under the License is distributed on an "AS IS" BASIS,
   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
   See the License for the specific language governing permissions and
   limitations under the License.


================================================
FILE: README.md
================================================
## Bi-LSTM model for learning fashion compatibility. 
Code for ACM MM'17 paper "Learning Fashion Compatibility with Bidirectional LSTMs" [[paper]](https://arxiv.org/pdf/1707.05691.pdf).

Parts of the code are from an older version of Tensorflow's im2txt repo [GitHub](https://github.com/tensorflow/models/blob/master/research/im2txt).


The corresponding dataset can be found on [GitHub](https://github.com/xthan/polyvore-dataset) or [Google Drive](https://drive.google.com/drive/folders/0B4Eo9mft9jwoVDNEWlhEbUNUSE0?resourcekey=0-vQg9TMSLKnmPCuuWwl5Ebw&usp=sharing).

### Contact
Author: Xintong Han

Contact: xintong@umd.edu

### Polyvore.com

[Polyvore.com](https://www.polyvore.com/outfits/search.sets?date=day&item_count.from=4&item_count.to=10) is a popular fashion website, where user can create and upload outfit data. Here is an [exmaple](https://www.polyvore.com/striped_blazer/set?id=227166819).

### Required Packages

* **TensorFlow** ~~0.10.0~~ 0.11 ([instructions](https://www.tensorflow.org/install/))
* **NumPy** ([instructions](http://www.scipy.org/install.html))
* **scikit-learn**

I actually used some version between r0.10 to r0.11 as the first commit of Tensorflow's im2txt, you might need to install r0.11 and modify some functions to run the code. Newer versions of Tensorflow prevent me from doing inference with my old code and restoring my models trained using this version. However, I have a commit that supports training using TensorFlow 1.0 or greater [idd1e03e](https://github.com/xthan/polyvore/tree/dd1e03e27fab12ef0051dd2a8ba7a61caaded499). I will create a new repo supporting TensorFlow version >= 1.0.


#### Recommended Setup

* [**docker-ce**](https://docs.docker.com/install/linux/docker-ce/ubuntu/)
* [**nvidia-docker**](https://github.com/NVIDIA/nvidia-docker)
* bulid TensorFlow image

excute the below command at this repository root: 

```sh
docker build -t tensorflow:0.11 .
```

* run container

```sh
docker run -it \
    --runtime=nvidia \
    -p 8888:8888 \
    -p 6006:6006 \
    -v $CURRENT:/root/workdir \
	tensorflow:0.11
```

### Prepare the Training Data
Download the dataset and put it in the ./data folder:

0. Decompress polyvore.tar.gz into ./data/label/
1. Decompress plyvore-images.tar.gz to ./data/, so all outfit image folders are in ./data/images/
2. Run the following commands to generate TFRecords in ./data/tf_records/:
```
python data/build_polyvore_data.py
```

### Download the Inception v3 Checkpoint

This model requires a pretrained *Inception v3* checkpoint file to initialize the network.


This checkpoint file is provided by the
[TensorFlow-Slim image classification library](https://github.com/tensorflow/models/tree/master/research/slim#tensorflow-slim-image-classification-library)
which provides a suite of pre-trained image classification models. You can read
more about the models provided by the library
[here](https://github.com/tensorflow/models/tree/master/research/slim#pre-trained-models).

Run the following commands to download the *Inception v3* checkpoint.

```shell
# Save the Inception v3 checkpoint in model folder.
wget "http://download.tensorflow.org/models/inception_v3_2016_08_28.tar.gz"
tar -xvf "inception_v3_2016_08_28.tar.gz" -C ${INCEPTION_DIR}
rm "inception_v3_2016_08_28.tar.gz"
```
### Training
```shell
./train.sh
```
The models will be saved in model/bi_lstm

### Inference

#### Trained model
Download the trained models from the final_model folder on [Google Drive](https://drive.google.com/drive/folders/0B4Eo9mft9jwoVDNEWlhEbUNUSE0) and put it in ./model/final_model/model.ckpt-34865.

#### Extract features of test data
To do all three kinds of tasks mentioned in the paper. We need to first extract the features of test images:
```
./extract_features.sh
```
And the image features will be in data/features/test_features.pkl.

You can also perform end-to-end inference by modifying the corresponding code. For example, input a sequence of images and output a compatibility score. 

#### Fashion fill-in-the-blank
```
./fill_in_blank.sh
```
Note that we further optimized some design choices in the released model. It can achieve 73.5% accuracy, which is higher than the number reported in our paper.

#### Compatibility prediction
```
./predict_compatibility.sh
```
Different from the training process where the loss is calculated in each mini batch, during testing, we get the loss againist the whole test set. This is pretty slow, maybe a better method could be used (e.g., using distance between LSTM predicted representation and the target image embedding).


#### Outfit generation
```
./outfit_generation.sh
```

It generates an outfit given the image/text query in query.json, and saves the results in the results dir. For demo purposes, the query.json only contains one example:

<img src="https://github.com/xthan/polyvore/raw/master/results/outfit.png" height="300">

where green boxes indicate the image query, and the text query is "blue".


#### Some notes
We found that a late fusion of different single models (Bi-LSTM w/o VSE + VSE + Siamese) can achieve superior results on all tasks. These models are also available in the same folder on  [Google Drive](https://drive.google.com/drive/folders/0B4Eo9mft9jwoVDNEWlhEbUNUSE0).

### Todo list
- [x] Add multiple choice inference code.
- [x] Add compatibility prediction inference code.
- [x] Add image outfit generation code. Very similar to compatibility prediction, you can try to do it yourself if in a hurry.
- [x] Release trained models.
- [x] Release Siamese/VSE models.
- [ ] Polish the code.

### Citation

If this code or the Polyvore dataset helps your research, please cite our paper:

    @inproceedings{han2017learning,
      author = {Han, Xintong and Wu, Zuxuan and Jiang, Yu-Gang and Davis, Larry S},
      title = {Learning Fashion Compatibility with Bidirectional LSTMs},
      booktitle = {ACM Multimedia},
      year  = {2017},
    }


================================================
FILE: data/build_polyvore_data.py
================================================
# Copyright 2017 Xintong Han. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================

"""Prepare Polyvore outfit data."""

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function


from datetime import datetime
import json
import os
import random
import sys
import threading

import numpy as np
import tensorflow as tf

tf.app.flags.DEFINE_string('train_label', 'data/label/train_no_dup.json',
                           'Training label file')
tf.app.flags.DEFINE_string('test_label', 'data/label/test_no_dup.json',
                           'Testing label file')
tf.app.flags.DEFINE_string('valid_label','data/label/valid_no_dup.json',
                           'Validation label file')
tf.app.flags.DEFINE_string('output_directory', 'data/tf_records/',
                           'Output data directory')
tf.app.flags.DEFINE_string('image_dir', 'data/images/',
                           'Directory of image patches')
tf.app.flags.DEFINE_string('word_dict_file', 'data/final_word_dict.txt',
                           'File containing the word dictionary.')

tf.app.flags.DEFINE_integer('train_shards', 128,
                            'Number of shards in training TFRecord files.')
tf.app.flags.DEFINE_integer('test_shards', 16,
                            'Number of shards in test TFRecord files.')
tf.app.flags.DEFINE_integer('valid_shards', 8,
                            'Number of shards in validation TFRecord files.')
tf.app.flags.DEFINE_integer('num_threads', 8,
                            'Number of threads to preprocess the images.')

FLAGS = tf.flags.FLAGS


class Vocabulary(object):
  """Simple vocabulary wrapper."""

  def __init__(self, vocab, unk_id):
    """Initializes the vocabulary.
    Args:
      vocab: A dictionary of word to word_id.
      unk_id: Id of the special 'unknown' word.
    """
    self._vocab = vocab
    self._unk_id = unk_id

  def word_to_id(self, word):
    """Returns the integer id of a word string."""
    if word in self._vocab:
      return self._vocab[word]
    else:
      print('unknow: ' + word)
      return self._unk_id


def _is_png(filename):
  """Determine if a file contains a PNG format image.
  Args:
    filename: string, path of the image file.
  Returns:
    boolean indicating if the image is a PNG.
  """
  return '.png' in filename


def _int64_feature(value):
  """Wrapper for inserting int64 features into Example proto."""
  if not isinstance(value, list):
    value = [value]
  return tf.train.Feature(int64_list=tf.train.Int64List(value=value))
  

def _float_feature(value):
  """Wrapper for inserting float features into Example proto."""
  if not isinstance(value, list):
    value = [value]
  return tf.train.Feature(float_list=tf.train.FloatList(value=value))
  
  
def _bytes_feature(value):
  """Wrapper for inserting bytes features into Example proto."""
  return tf.train.Feature(bytes_list=tf.train.BytesList(value=[str(value)]))


def _int64_feature_list(values):
  """Wrapper for inserting an int64 FeatureList into a SequenceExample proto."""
  return tf.train.FeatureList(feature=[_int64_feature(v) for v in values])


def _int64_list_feature_list(values):
  """Wrapper for inserting an int64 list FeatureList into a SequenceExample proto."""
  return tf.train.FeatureList(feature=[_int64_feature(v) for v in values])


def _bytes_feature_list(values):
  """Wrapper for inserting a bytes FeatureList into a SequenceExample proto."""
  return tf.train.FeatureList(feature=[_bytes_feature(v) for v in values])

def _float_feature_list(values):
  """Wrapper for inserting a float FeatureList into a SequenceExample proto."""
  return tf.train.FeatureList(feature=[_float_feature(v) for v in values])


def _to_sequence_example(set_info, decoder, vocab):
  """Builds a SequenceExample proto for an outfit.
  """
  set_id = set_info['set_id']
  image_data = []
  image_ids = []
  caption_data = []
  caption_ids = []
  for image_info in set_info['items']:
    filename = os.path.join(FLAGS.image_dir, set_id,
                            str(image_info['index']) + '.jpg')
    with open(filename, "r") as f:
      encoded_image = f.read()
    try:
      decoded_image = decoder.decode_jpeg(encoded_image)
    except (tf.errors.InvalidArgumentError, AssertionError):
      print("Skipping file with invalid JPEG data: %s" % filename)
      return

    image_data.append(encoded_image)
    image_ids.append(image_info['index'])
    caption = image_info['name'].encode('utf-8')
    caption_data.append(caption)
    caption_id = [vocab.word_to_id(word) + 1 for word in caption.split()]
    caption_ids.append(caption_id)

  feature = {}
  # Only keep 8 images, if outfit has less than 8 items, repeat the last one.
  for index in range(8):
    if index >= len(image_data):
      feature['images/' + str(index)] = _bytes_feature(image_data[-1])
    else:
      feature['images/' + str(index)] = _bytes_feature(image_data[index])
    
  feature["set_id"] = _bytes_feature(set_id)
  feature["set_url"] = _bytes_feature(set_info['set_url'])
  # Likes and Views are not used in our model, but we put it into TFRecords.
  feature["likes"] = _int64_feature(set_info['likes'])
  feature["views"] = _int64_feature(set_info['views'])

  context = tf.train.Features(feature=feature)

  feature_lists = tf.train.FeatureLists(feature_list={
      "caption": _bytes_feature_list(caption_data),
      "caption_ids": _int64_list_feature_list(caption_ids),
      "image_index": _int64_feature_list(image_ids)
  })

  sequence_example = tf.train.SequenceExample(
      context=context, feature_lists=feature_lists)

  return sequence_example


class ImageCoder(object):
  """Helper class that provides TensorFlow image coding utilities."""

  def __init__(self):
    # Create a single Session to run all image coding calls.
    self._sess = tf.Session()

    # Initializes function that converts PNG to JPEG data.
    self._png_data = tf.placeholder(dtype=tf.string)
    image = tf.image.decode_png(self._png_data, channels=3)
    self._png_to_jpeg = tf.image.encode_jpeg(image, format='rgb', quality=100)

    # Initializes function that decodes RGB JPEG data.
    self._decode_jpeg_data = tf.placeholder(dtype=tf.string)
    self._decode_jpeg = tf.image.decode_jpeg(
                                self._decode_jpeg_data, channels=3)

  def png_to_jpeg(self, image_data):
    return self._sess.run(self._png_to_jpeg,
                          feed_dict={self._png_data: image_data})

  def decode_jpeg(self, image_data):
    image = self._sess.run(self._decode_jpeg,
                           feed_dict={self._decode_jpeg_data: image_data})
    assert len(image.shape) == 3
    assert image.shape[2] == 3
    return image


def _process_image_files_batch(coder, thread_index, ranges, name,
                               all_sets, vocab, num_shards):
  """Processes and saves list of images as TFRecord in 1 thread.
  """
  # Each thread produces N shards where N = int(num_shards / num_threads).
  # For instance, if num_shards = 128, and the num_threads = 2, then the first
  # thread would produce shards [0, 64).
  num_threads = len(ranges)
  assert not num_shards % num_threads
  num_shards_per_batch = int(num_shards / num_threads)

  shard_ranges = np.linspace(ranges[thread_index][0],
                             ranges[thread_index][1],
                             num_shards_per_batch + 1).astype(int)
  num_files_in_thread = ranges[thread_index][1] - ranges[thread_index][0]

  counter = 0
  for s in xrange(num_shards_per_batch):
    # Generate a sharded version of the file name, e.g. 'train-00002-of-00010'
    shard = thread_index * num_shards_per_batch + s
    output_filename = '%s-%.5d-of-%.5d' % (name, shard, num_shards)
    output_file = os.path.join(FLAGS.output_directory, output_filename)
    writer = tf.python_io.TFRecordWriter(output_file)

    shard_counter = 0
    files_in_shard = np.arange(shard_ranges[s], shard_ranges[s + 1], dtype=int)
    for i in files_in_shard:
      sequence_example = _to_sequence_example(all_sets[i], coder, vocab)
      if not sequence_example:
        print('fail for set: ' + all_sets[i]['set_id'])
        continue
      writer.write(sequence_example.SerializeToString())
      shard_counter += 1
      counter += 1

      if not counter % 100:
        print('%s [thread %d]: Processed %d of %d images in thread batch.' %
              (datetime.now(), thread_index, counter, num_files_in_thread))
        sys.stdout.flush()

    writer.close()
    print('%s [thread %d]: Wrote %d images to %s' %
          (datetime.now(), thread_index, shard_counter, output_file))
    sys.stdout.flush()
    shard_counter = 0
  print('%s [thread %d]: Wrote %d images to %d shards.' %
        (datetime.now(), thread_index, counter, num_files_in_thread))
  sys.stdout.flush()


def _process_image_files(name, all_sets, vocab, num_shards):
  """Process and save list of images as TFRecord of Example protos.
  """

  # Break all images into batches with a [ranges[i][0], ranges[i][1]].
  spacing = np.linspace(0, len(all_sets), FLAGS.num_threads + 1).astype(np.int)
  ranges = []
  for i in xrange(len(spacing) - 1):
    ranges.append([spacing[i], spacing[i+1]])

  # Launch a thread for each batch.
  print('Launching %d threads for spacings: %s' % (FLAGS.num_threads, ranges))
  sys.stdout.flush()

  # Create a mechanism for monitoring when all threads are finished.
  coord = tf.train.Coordinator()

  # Create a generic TensorFlow-based utility for converting all image codings.
  coder = ImageCoder()

  threads = []
  for thread_index in xrange(len(ranges)):
    args = (coder, thread_index, ranges, name, all_sets, vocab, num_shards)
    t = threading.Thread(target=_process_image_files_batch, args=args)
    t.start()
    threads.append(t)

  # Wait for all the threads to terminate.
  coord.join(threads)
  print('%s: Finished writing all %d fashion sets in data set.' %
        (datetime.now(), len(all_sets)))
  sys.stdout.flush()


def _create_vocab(filename):
  """Creates the vocabulary of word to word_id.
  """
  # Create the vocabulary dictionary.
  word_counts = open(filename).read().splitlines()
  reverse_vocab = [x.split()[0] for x in word_counts]
  unk_id = len(reverse_vocab)
  vocab_dict = dict([(x, y) for (y, x) in enumerate(reverse_vocab)])
  vocab = Vocabulary(vocab_dict, unk_id)

  return vocab


def _find_image_files(labels_file, name):
  """Build a list of all images files and labels in the data set.
  """
  
  # Read image ids
  all_sets = json.load(open(labels_file))
  
  # Shuffle the ordering of all image files in order to guarantee
  # random ordering of the images with respect to label in the
  # saved TFRecord files. Make the randomization repeatable.
  
  shuffled_index = range(len(all_sets))
  random.seed(12345)
  random.shuffle(shuffled_index)

  all_sets = [all_sets[i] for i in shuffled_index]  
  print('Found %d fashion sets.' % (len(all_sets)))
  return all_sets

def _process_dataset(name, label_file, vocab, num_shards):
  """Process a complete data set and save it as a TFRecord.
  Args:
    name: string, unique identifier specifying the data set.
    directory: string, root path to the data set.
    num_shards: integer number of shards for this data set.
    labels_file: string, path to the labels file.
  """
  print(label_file)
  all_sets  = _find_image_files(label_file, name)
  _process_image_files(name, all_sets, vocab, num_shards)


def main(unused_argv):
  assert not FLAGS.train_shards % FLAGS.num_threads, (
      'Please make the FLAGS.num_threads commensurate with FLAGS.train_shards')
  assert not FLAGS.test_shards % FLAGS.num_threads, (
      'Please make the FLAGS.num_threads commensurate with '
      'FLAGS.test_shards')
  assert not FLAGS.valid_shards % FLAGS.num_threads, (
      'Please make the FLAGS.num_threads commensurate with '
      'FLAGS.valid_shards')
  print('Saving results to %s' % FLAGS.output_directory)


  vocab = _create_vocab(FLAGS.word_dict_file)
  # Run it!
  _process_dataset('valid-no-dup', FLAGS.valid_label, vocab, FLAGS.valid_shards)
  _process_dataset('test-no-dup', FLAGS.test_label, vocab, FLAGS.test_shards)
  _process_dataset('train-no-dup', FLAGS.train_label, vocab, FLAGS.train_shards)
  

if __name__ == '__main__':
  tf.app.run()


================================================
FILE: data/features/README.md
================================================
Extracted image features go here.


================================================
FILE: data/final_word_dict.txt
================================================
black   9909
leather 8516
bag 6350
women's 5810
top 4504
jeans   4133
dress   4100
gold    4031
white   3837
earrings    3619
iphone  3613
sunglasses  3382
necklace    3381
skirt   3254
boots   3142
suede   3004
jacket  2922
case    2871
denim   2763
ring    2703
mini    2622
yoins   2563
high    2535
blue    2533
clutch  2497
plus    2465
bracelet    2418
skinny  2164
coat    2127
shoulder    2125
sandals 2122
long    2112
set 2106
women   2106
lace    2069
red 2014
new 1996
print   1986
pink    1961
sleeve  1954
ankle   1949
silver  1894
pre-owned   1877
lipstick    1861
shorts  1850
topshop 1818
sweater 1788
size    1749
faux    1711
vintage 1699
shoes   1693
rose    1689
pumps   1651
de  1642
michael 1634
crop    1634
color   1598
eye 1554
watch   1541
shirt   1538
round   1527
backpack    1526
diamond 1506
brown   1499
tote    1477
velvet  1474
floral  1454
neck    1382
lip 1372
saint   1356
laurent 1328
cropped 1299
stud    1282
wool    1278
blouse  1277
sheinside   1264
shein   1260
t-shirt 1209
chanel  1199
small   1191
gucci   1187
crossbody   1185
fashion 1184
kate    1170
short   1156
strap   1156
star    1154
classic 1154
sneakers    1153
womens  1151
heel    1137
cover   1101
toe 1075
kors    1075
hat 1066
nail    1061
grey    1051
chain   1044
platform    1043
boho    1040
alexander   1040
pants   1035
flower  1033
pearl   1018
hair    1009
dolce   1007
crystal 996
metal   995
metallic    994
cotton  988
silk    983
design  979
green   974
love    969
marc    969
valentino   951
tank    951
ripped  935
york    927
striped 921
collection  919
yellow  918
converse    914
gabbana 912
printed 911
embellished 910
mascara 900
heart   896
knit    894
double  885
spade   876
waist   875
fur 867
h&m 862
choker  846
large   841
bow 830
eau 828
medium  826
light   819
pu  818
flat    811
lace-up 811
boohoo  811
matte   807
jewelry 805
embroidered 801
heels   791
style   790
tee 788
pendant 772
patent  769
taylor  766
miu 762
wrap    761
casual  759
zip 756
beauty  747
slim    738
collar  737
charlotte   736
distressed  733
nars    725
satchel 724
christian   722
scarf   720
blazer  719
givenchy    717
sleeveless  709
sandal  707
drop    702
jean    699
makeup  699
frame   696
island  696
cuff    696
front   696
river   691
boot    683
pencil  682
bobbi   664
forever 663
jacobs  660
liquid  647
cream   646
la  644
back    641
look    641
tassel  640
shadow  633
stripe  632
cashmere    629
pleated 629
boyfriend   626
miss    625
louboutin   624
trousers    624
dior    620
oversized   619
zipper  614
moto    614
sterling    612
satin   607
sweatshirt  607
eyeshadow   605
nude    604
palette 604
jumper  604
cross   602
chuck   601
stella  599
le  594
pump    592
button  589
cat 588
biker   587
burberry    586
one 583
rossi   583
london  573
chunky  572
fringe  567
stretch 566
dark    564
plaid   562
powder  560
solid   557
cut 555
belt    553
parfum  551
midi    548
wang    547
gianvito    547
canvas  547
cardigan    546
pocket  544
adidas  542
handbag 536
hem 533
retro   532
beanie  531
tie 531
ladies  529
men's   528
body    528
oz  521
mcqueen 518
studded 516
wide    516
box 515
14k 515
loose   515
fit 513
gold-tone   512
bangle  507
polish  506
vans    505
trainers    504
mccartney   504
block   503
mac 499
low 498
nails   496
stone   494
selfridge   490
navy    489
nike    485
detail  482
summer  481
booties 479
wallet  476
pointed 475
flats   474
glitter 473
super   472
mango   467
gloss   466
quilted 466
blush   463
chloé   460
square  457
buckle  454
ray-ban 448
open    447
x   447
pack    444
bags    439
petite  438
ribbed  438
leggings    437
leg 436
colour  436
flap    435
beach   434
soft    434
jimmy   429
skater  428
chiffon 428
cami    427
wash    423
stiletto    422
hot 421
rouge   420
v   417
steel   414
turtleneck  414
choo    410
clear   408
natural 405
rag 404
bone    403
orange  402
rise    402
oz. 400
pattern 400
russe   399
preowned    396
rings   395
bucket  393
waisted 392
mid 392
zara    391
eyeliner    386
crepe   384
rhinestone  384
brush   384
mesh    383
beige   383
cosmetics   379
knitted 377
bomber  377
giuseppe    375
clothing    374
charm   370
zanotti 370
drawstring  369
wedge   368
tory    368
pure    368
olivia  366
moschino    366
multi   365
glasses 364
accessories 362
band    361
burch   360
couture 359
acne    359
chic    358
maison  358
18k 358
vest    358
layered 356
jersey  355
logo    355
knee    354
trim    350
statement   349
golden  348
balmain 348
paris   346
phone   346
beaded  345
lapel   343
acetate 342
strappy 341
aviator 340
stainless   339
cap 337
sneaker 337
spray   336
steve   336
maxi    335
crochet 333
madden  333
fedora  331
shoe    331
sporty  329
side    329
triangle    327
earring 325
pom 324
edition 321
fringed 321
lauren  320
rebecca 318
fendi   318
wedding 315
eyes    315
evening 314
victoria    314
textured    314
studs   314
liner   313
circle  313
foundation  312
girl    312
rockstud    311
monki   311
sheer   310
unisex  310
face    309
party   307
elastic 307
bootie  307
v-neck  306
waterproof  305
pullover    305
sleeves 304
handbags    303
prada   303
alice   302
dot 302
designer    301
hooded  299
limited 299
moon    298
burgundy    297
hoop    297
studios 295
contrast    294
j.crew  292
pockets 291
authentic   291
purple  290
plated  289
feather 288
sexy    288
straw   286
lens    284
straight    283
bra 281
bling   281
candy   281
stylish 280
brand   280
men 279
ear 278
preppy  278
wool-blend  276
leaf    276
prom    275
dorothy 274
day 274
marni   273
sole    273
hoodie  271
quartz  271
handle  271
perkins 271
pin 271
check   271
secret  270
margiela    268
purse   266
art 265
asos    264
outerwear   263
flared  262
woven   261
balenciaga  260
oscar   258
big 257
full    256
clip    256
balm    256
originals   255
gray    254
hand    253
swarovski   253
envelope    253
lash    252
gel 252
lim 252
goop    252
messenger   251
leopard 251
geometric   249
smith   248
christmas   247
daisy   246
coral   246
pro 244
trench  244
tom 244
khaki   243
a-line  243
sequin  243
phillip 243
heeled  242
yves    240
store   240
isabel  240
sun 238
minkoff 238
cutout  237
gift    237
camel   237
rock    237
j   237
ml  236
row 235
lacquer 235
klein   233
travel  232
hollow  232
formal  232
renta   232
urban   231
belted  231
jane    230
air 229
topic   228
tan 228
tone    227
chicnova    227
mirror  227
peep    225
two 225
line    224
combat  224
single  224
amazon.com  224
monogram    223
cable   223
guess   223
pant    222
bodycon 222
ford    221
chicwish    221
marant  220
coffee  219
ruffle  219
dr. 218
proenza 218
schouler    217
colors  217
leather-look    217
loafers 216
slip    216
mirrored    216
notebook    215
chloe   215
beckham 215
flowers 215
school  214
hi  213
6s  213
calf    213
accessorize 213
winter  212
cute    212
headband    212
blend   211
baker   211
skull   211
plain   211
armani  210
basic   208
pastel  207
sweet   207
mid-rise    207
jacquard    207
dial    207
court   206
dsquared2   206
chelsea 205
mint    205
halter  205
online  205
crew    204
embroidery  204
embossed    203
martens 203
apple   203
toilette    202
butterfly   202
baseball    202
patch   201
gown    201
von 201
free    201
arrow   200
flare   200
victoria's  199
asymmetric  198
olympia 197
ombre   197
glass   197
lips    196
breasted    195
jet 195
lanvin  195
superstar   195
saffiano    194
linda   194
cotton-blend    193
socks   192
rubber  192
american    191
graphic 191
ralph   191
floppy  190
volume  190
spring  190
key 189
letter  189
cape    189
felt    189
pave    187
bar 187
artificial  186
peach   186
polka   185
calvin  185
ruffled 185
boutique    184
galaxy  184
luxe    184
skin    184
panel   184
cat-eye 184
simple  184
nyx 184
bralet  183
ox  183
kit 183
punk    183
paul    183
length  181
finish  181
street  181
james   181
perfect 180
snake   180
dresses 180
fall    180
tights  180
patchwork   180
aquazzura   179
vince   179
pouch   179
studio  178
elizabeth   177
ultra   177
modern  176
m   176
bead    176
frayed  175
6/6s    175
onyx    175
shine   175
joseph  175
ball    174
lime    174
table   174
original    173
elegant 173
maybelline  172
versace 172
city    172
saddle  172
west    171
moda    171
round-frame 171
ted 171
diane   171
crown   170
infinity    170
max 170
life    169
ballet  168
aeropostale 168
home    167
braided 167
brim    167
butter  167
farrow  167
intense 167
washed  166
bright  166
bikini  166
tall    166
shop    166
grunge  165
australia   165
effect  165
cocktail    165
noir    164
oversize    163
tattoo  162
gold-plated 162
extreme 162
ivory   162
swing   161
tulle   160
50ml    160
true    160
mixed   160
diamonds    160
inspired    159
ice 159
house   159
water   159
exclusive   159
premium 158
glow    158
wine    157
turquoise   157
bracelets   157
bold    157
shimmer 157
neon    156
lily    156
vegan   156
half    155
girls   154
tweed   154
pieces  154
trio    154
mens    153
pointy  153
headphones  153
paige   152
rivet   152
gladiator   151
signature   151
le3no   151
shell   150
bib 150
antigona    149
pretty  149
closure 149
make    149
wear    149
peplum  149
linen   149
amazon  148
enamel  148
garden  148
end 148
wood    148
lock    147
textured-leather    147
duo 147
made    147
plastic 147
lady    146
trendy  146
genuine 146
furstenberg 145
co. 145
gloves  145
pen 145
kimono  145
old 145
3/4 145
split   144
spike   144
sizes   143
sapphire    143
lipsy   143
optical 143
choies  143
rip 143
real    142
patent-leather  142
msgm    142
90s 142
stretchy    141
kenneth 141
roll    140
bell    140
silver-tone 140
wayfarer    140
best    140
ariana  140
marble  140
lashes  140
mary    140
grande  140
military    139
mom 139
fine    139
edge    139
long-wear   138
karl    138
crime   138
polo    138
transparent 138
ugg 137
tilbury 137
coco    137
jewellery   137
oval    137
goth    137
wall    137
baby    136
jamie   136
destroyed   136
shape   135
magnetic    134
lambskin    134
garavani    134
sport   134
bamboo  134
ct. 134
resin   134
john    133
sea 133
david   133
cz  133
spf 133
spaghetti   133
warehouse   133
jennifer    133
bohemian    132
edgy    132
stila   131
compact 131
camera  131
tribal  131
ruby    131
little  131
5s  131
professional    131
french  131
calfskin    131
engagement  130
bronze  130
handmade    130
inch    130
fake    130
acrylic 130
hipster 129
palm    129
w/  129
shearling   129
nylon   129
paper   128
high-top    128
bridal  128
lucluc  128
glam    128
bear    128
queen   128
wild    128
stars   127
wide-leg    127
current/elliott 127
triple  127
betsey  127
johnson 127
deep    127
night   126
off-the-shoulder    126
designs 126
laura   126
high-rise   126
tunic   126
berry   126
power   126
lane    125
false   125
perfume 125
hard    125
lagerfeld   125
stick   125
point   124
essie   124
multicolor  124
lenses  124
banana  124
ea  123
hermes  123
apricot 123
boy 122
pale    122
slit    122
decay   122
luxury  121
aldo    121
madewell    121
see 120
antique 120
nine    120
tree    120
kylie   120
society 120
army    120
scoop   119
cut-out 119
lo  119
cord    119
slouchy 119
oasis   119
nly 119
oxford  119
knot    119
bottle  118
pinterest   118
rib 118
casadei 117
fox 117
grace   117
kiss    117
adjustable  117
stripes 117
chair   116
gradient    116
3d  116
plant   116
vera    115
layer   115
louis   115
strapless   115
tumblr  115
kim 115
tartan  115
clinique    114
official    114
lands   114
bendel  114
roberto 114
ferragamo   114
cartier 114
lauder  114
legging 114
maurices    113
tops    113
gavriel 113
cold    113
coin    113
cool    113
rainbow 112
smooth  112
fresh   112
alexis  112
wildfox 112
two-tone    112
l   111
blossom 111
salvatore   111
teardrop    111
vero    111
olive   111
puma    111
lamp    111
tibi    111
stand   111
work    111
sparkle 111
three   111
music   110
freshwater  110
decor   110
bouquet 110
card    110
brooch  110
cosmetic    110
smashbox    110
mara    110
shopper 109
kenzo   109
sophie  109
women’s 109
lancome 109
henri   109
karen   109
pull    108
ballerina   108
seconds 108
hollister   108
5/5s    108
vase    108
velvetine   108
fragrance   107
bustier 107
mansur  107
micro   107
flip    107
t-strap 107
camuto  107
pandora 107
head    107
around  107
flannel 107
chevron 106
zirconia    106
carven  106
boxy    106
parker  106
nudes   106
gemstone    106
mineral 106
cherry  105
tube    105
les 105
audacious   105
leather-trimmed 105
high-waisted    105
fitted  105
cluster 105
ceramic 104
sand    104
scott   104
twill   104
festival    104
cross-body  104
cuffed  104
bird    104
python  104
cc  104
drew    104
inc 104
cavalli 104
silk-blend  104
brass   104
sophia  104
lined   104
pillow  103
allure  103
wig 103
jessica 103
martin  103
gypsy   102
n   102
jeggings    102
t.w.    102
merino  102
bleach  102
stuart  102
midnight    102
book    102
bralette    101
dangle  101
perforated  101
joni    101
chocolate   101
warm    101
mix 101
time    100
jay 99
faye    99
people  99
twist   99
cubic   99
checked 99
throw   99
asymmetrical    99
smokey  99
fabric  98
brogues 98
emilio  98
beautiful   98
united  98
skate   98
draped  98
piece   98
accent  98
avenue  97
animal  97
céline  97
carat   97
reversible  97
bardot  97
sale    97
ribbon  97
sky 97
royal   96
loafer  96
slip-on 96
hippie  96
stack   96
club    96
low-rise    96
cheap   95
brow    95
floral-print    95
celine  95
deluxe  95
vuitton 95
shades  94
happy   94
cole    94
necklaces   94
ii  94
paint   94
aztec   94
athletic    94
thong   94
mankind 94
apparel 94
drape   94
raw 94
shawl   93
100mm   93
mulberry    93
weitzman    93
kendall 93
mcq 93
dream   93
shift   93
jeffrey 93
rolled  93
chine   92
stay    92
5sos    92
beads   92
sports  92
bcbgmaxazria    92
campbell    92
eyewear 92
cashmere-blend  92
fossil  92
gem 92
xl  92
giorgio 92
pierre  92
fly 92
mark    92
abercrombie 92
eyeglasses  92
watches 91
espadrille  91
tiffany 91
fitch   91
silicone    91
kendra  91
cult    91
guerlain    91
pop 91
pucci   91
zimmermann  91
5c  90
direction   90
acid    90
philosophy  90
extra   90
fleece  90
suedette    90
thick   90
mono    90
ideas   90
cheek   90
jour    90
sans    89
terry   89
holiday 89
webster 89
link    89
pins    89
witchery    89
allurez 89
pyramid 89
essential   89
cushion 89
oliver  89
vogue   89
thigh   88
lightweight 88
roses   88
woolen  88
gorgeous    88
mother  88
sweat   88
turtle  88
jil 87
snapback    87
chronograph 87
autumn  87
sander  87
halo    87
opi 87
brushes 87
jewel   87
us  87
site    87
shiny   87
topaz   87
park    87
tapered 87
iconic  87
custom  87
uniqlo  87
dkny    87
faced   87
souci   86
lana    86
faceted 86
mm  86
holder  86
magic   86
billabong   86
crystal-embellished 86
lord    86
snakeskin   86
tommy   86
hilfiger    86
over-the-knee   85
outdoor 85
culottes    85
rug 85
gothic  85
hole    85
wire    85
tiny    85
caviar  85
target  85
modcloth    85
champagne   85
iro 84
cup 84
peep-toe    84
rental  84
bandeau 84
vernis  84
piercing    84
helmut  84
pineapple   84
keds    84
pleat   84
ribkoff 84
naked   84
clip-on 84
vanessa 84
padded  83
nile    83
bittar  83
ag  83
agate   83
mohair  83
vinyl   83
hardy   83
timberland  83
runway  83
bella   83
coach   83
tattoos 83
anne    83
caged   82
burton  82
trend   82
tailored    82
core    82
painted 82
convertible 82
crystals    82
forever21   82
slippers    82
bradley 82
tropical    81
alex    81
tassels 81
republic    81
lucy    81
funny   81
estee   81
heritage    81
hobo    81
hydrating   81
hairstyles  81
mink    81
eugenia 81
bottega 81
sac 81
ponte   80
lang    80
teal    80
alloy   80
insert  80
tea 80
ink 80
parka   80
sugar   80
poppy   80
veneta  80
backless    79
perry   79
leaves  79
nappa   79
des 79
edp 79
playsuit    79
thin    79
120mm   79
stacking    79
straight-leg    79
loeffler    78
patterned   78
colorful    78
vivienne    78
mock    78
nina    78
black/white 78
nose    78
kelly   78
trouser 78
photo   78
note    78
randall 78
gap 78
candle  77
levi's  77
selma   77
trends  77
sicily  77
mask    77
scallop 77
buttons 77
goldtone    77
longline    77
tshirt  77
cotton-jersey   77
chandelier  77
honey   77
jumpsuit    77
shirts  76
details 76
slim-fit    76
100ml   76
get 76
angel   76
pur 76
waterfall   76
bodysuit    76
westwood    76
anna    76
stitch  75
across  75
organic 75
unique  75
mercier 75
go  75
disney  75
straps  75
emerald 75
batwing 75
hudson  75
irregular   75
rihanna 75
religion    75
bowknot 75
clean   75
glamorous   75
berricle    75
bath    74
elephant    74
capri   74
co  74
highlighter 74
gg  74
filigree    74
jaeger  74
monsoon 74
camo    74
contour 74
zizzi   74
deborah 74
hayden  74
monochrome  74
nearly  74
muscle  74
peoples 74
monday  74
market  74
st. 74
dip 74
molly   74
30ml    73
like    73
mug 73
m·a·c   73
sequined    73
ruched  73
doll    73
lasting 73
theory  73
owl 73
arrangement 73
sam 73
sleeved 73
buttoned    73
slingback   73
fashionable 72
bubble  72
pressed 72
splatter    72
corduroy    72
shaped  72
ivy 72
disc    72
good    72
dye 72
stackable   71
furla   71
bb  71
ca  71
bennett 71
horn    71
barbara 71
oil 71
macbook 71
ipad    71
elie    71
blonde  70
houndstooth 70
beats   70
pcs 70
anya    70
tuxedo  70
juicy   70
sergio  70
foldover    70
crescent    70
regular 70
low-top 70
goddess 70
heather 70
plunge  70
amber   70
lulu    70
nubuck  70
panama  70
sequins 69
g   69
scuba   69
greek   69
run 69
slouch  69
10k 69
snow    69
easy    69
lucky   69
stones  69
princess    69
snap    69
lilly   69
bangles 69
diorshow    69
company 68
concealer   68
Étoile  68
lotion  68
turn    68
cultured    68
swimsuit    68
donna   68
lux 68
brushed 68
pearls  68
pusheen 68
post    68
falabella   68
ny  68
duster  68
stitching   68
melissa 68
strand  68
eos 68
cell    68
18-karat    68
pilot   67
laser   67
succulent   67
illesteva   67
pavé    67
quay    67
wolf    67
mickey  67
office  67
mist    67
roksanda    67
roshe   66
assorted    66
hammered    66
shredded    66
amethyst    66
wedges  66
sonia   66
scalloped   66
b   66
lippmann    66
yurman  66
clubmaster  66
mermaid 66
d'orsay 66
duffle  66
italian 66
bridesmaid  66
teen    66
etro    66
five    66
raglan  66
harlow  66
south   66
hearts  66
ballerinas  66
paisley 66
opal    65
away    65
rare    65
crocodile   65
soap    65
touch   65
platinum    65
instant 65
cargo   65
rope    65
copper  65
keychain    65
deco    65
nyc 65
di  65
fancy   65
pier    64
instagram   64
bandana 64
geo 64
luggage 64
spliced 64
flag    64
ct  64
knitwear    64
k.i.s.s.i.n.g   64
temporary   64
hood    64
monster 64
reading 64
peace   64
steampunk   64
rimmel  64
jeanne  64
addict  64
snowflake   64
simpson 64
coast   64
boss    64
pot 64
saab    64
rich    63
singlet 63
diamante    63
kisses  63
rachel  63
smart   63
fold    63
citizens    63
hindmarch   63
w   63
wallpaper   63
cm  63
rebel   63
digital 63
humanity    63
redvalentino    63
bandage 63
motorcycle  63
tennis  63
leopard-print   63
clips   63
laptop  63
factory 63
opening 62
frames  62
union   61
wave    61
ashley  61
stan    61
basket  61
yeezy   61
blackfive   61
missoni 61
western 61
double-breasted 61
harry   61
a.l.c.  61
peter   61
solitaire   61
vila    61
antonio 61
floor   61
collarless  61
bronzer 61
running 61
rain    61
mason   60
plate   60
emma    60
zipped  60
edie    60
charcoal    60
toms    60
polished    60
lee 60
oxfords 60
pair    60
beret   60
garnet  60
typography  60
arden   60
d   60
knotted 60
hardware    60
4s  60
rolex   60
states  60
indie   60
abstract    60
michel  60
nicholas    60
aqua    60
rick    59
frill   59
no. 59
quote   59
versatile   59
cases   59
goose   59
et  59
coconut 59
blahnik 59
four    59
manolo  59
jumbo   59
mustard 59
rolling 59
decorative  59
cambridge   59
topman  59
wooden  58
faded   58
letters 58
puffer  58
m&co    58
samsung 58
mm6 58
hidden  58
walker  58
blanket 58
tortoise    58
l.k.    58
lemon   58
edelman 58
delpozo 58
semi    58
jack    58
passport    58
dome    58
boat    58
shopping    58
chains  58
pencils 58
sensational 57
rectangle   57
kurt    57
eagle   57
luminous    57
ankle-strap 57
applique    57
halloween   57
pebbled 57
birkin  57
uk  57
specs   57
plum    57
virgin  57
relaxed 57
espadrilles 57
quotes  57
plus/6/5/5s/5c  57
season  57
zoe 56
4/4s    56
skool   56
derek   56
clarins 56
andrew  56
precision   56
lipgloss    56
rochas  56
i'm 56
vacation    56
base    56
anchor  56
primer  56
poncho  56
usa 56
k   56
flatform    56
polarized   56
fluffy  56
rosie   56
soho    56
edt 56
diesel  56
bleached    56
celebrity   56
native  56
wristlet    55
clock   55
cuffs   55
scrunchie   55
marie   55
date    55
leo 55
geiger  55
eyelashes   55
series  55
quad    55
deer    55
forest  55
cartoon 55
faux-leather    55
lolita  55
minaudiere  55
sofa    55
ethnic  55
14kt    54
chino   54
zippers 54
chambray    54
gigi    54
taupe   54
jackets 54
express 54
millen  54
camisole    54
space   54
doublju 54
charles 54
varsity 54
corset  54
owens   54
text    54
schutz  54
levis   54
splicing    54
cage    54
kane    54
rips    54
need    54
muse    54
sk8-hi  54
rocket  54
organza 53
crewneck    53
amy 53
birger  53
finger  53
delicate    53
beverly 53
hills   53
flash   53
dune    53
mcm 53
vermeil 53
bun 53
dots    53
jade    53
neoprene    53
monica  53
belle   53
peony   53
weave   53
fluid   53
flowy   53
crisscross  53
marmont 53
calypso 53
imports 53
rabbit  53
locket  52
rykiel  52
costume 52
maria   52
colorblock  52
search  52
legendary   52
infinite    52
bunny   52
teaspoon    52
adult   52
vita    52
21+ 52
dionysus    52
curly   52
perla   52
flora   52
dahlia  52
pvc 52
violet  52
photos  51
brunello    51
bailey  51
robinson    51
curl    51
wing    51
neo 51
sydney  51
carved  51
jordan  51
grained 51
rivets  51
supply  51
hats    51
mouret  51
baublebar   51
xs  51
dre 51
point-toe   51
cucinelli   51
padlock 51
black/gold  51
bobby   51
fleur   51
woman   51
orchid  51
poplin  51
roland  51
lizzie  51
diana   51
lewis   50
equipment   50
label   50
tiered  50
moonstone   50
moisturizing    50
stretch-jersey  50
slim-leg    50
smoky   50
audrey  50
raffia  50
p   50
natasha 50
sunset  50
rhodium 50
rupert  50
ysl 50
ilia    50
leigh   50
cara    50
mouse   50
rosa    50
junior  50
van 50
ann 50
sweetheart  50
ippolita    50
intarsia    50
illamasqua  50
lorac   50
gilet   49
amazing 49
jules   49
international   49
harrods 49
sanderson   49
always  49
planter 49
longwear    49
whistles    49
fallon  49
malene  49
friendship  49
structured  49
38mm    49
ever    49
twisted 49
backpacks   49
cotton-poplin   49
synthetic   49
fan 49
giambattista    48
kensington  48
barrel  48
pulitzer    48
petal   48
unicorn 48
metro   48
first   48
braid   48
funnel  48
lavender    48
girly   48
plus/7/6    48
lipcolor    48
croc    48
bui 48
pointed-toe 48
dual    48
indigo  48
l'absolu    48
fishnet 48
ounce   48
vetements   48
graham  48
effy    48
gifts   48
rhea    48
ally    48
operandi    48
tips    48
norman  48
marilyn 48
curved  48
sleek   48
valli   48
tinted  48
tarte   48
various 48
18ct    47
aspinal 47
margot  47
lisa    47
wrist   47
trimmed 47
spiral  47
maroon  47
atelier 47
bo  47
opaque  47
products    47
sunday  47
holland 47
brocade 47
cozy    47
shower  47
hulme   47
suit    47
matthew 47
leisure 47
loop    47
minimal 47
brooks  47
wireless    47
chinese 47
plants  47
dance   47
messy   47
translucent 47
kirkwood    47
force   47
cactus  47
creamy  47
radiant 47
branch  47
waistcoat   46
world   46
sarah   46
humble  46
almond  46
tint    46
80s 46
kevyn   46
rustic  46
blade   46
scarves 46
1/2 46
bed 46
bee 46
georgia 46
fun 46
rx  46
vertical    46
eva 46
refill  46
goldschmied 46
thing   46
grid    46
low-tops    46
tortoiseshell   46
temple  46
scotch  46
bronzing    46
aucoin  46
hi-top  46
williamson  46
adriano 46
tag 46
sephora 46
comb    46
lola    46
watercolor  46
manon   45
jar 45
aeo 45
ancient 45
colours 45
icon    45
engraved    45
pajama  45
facial  45
potter  45
a5  45
comme   45
romance 45
faith   45
buckled 45
dreamcatcher    45
pigalle 45
iris    45
money   45
boys    45
peekaboo    45
sailor  45
clasp   45
christopher 45
elle    45
rucksack    45
silk-satin  45
detachable  45
essentials  45
rim 45
chan    44
totes   44
wrapped 44
simons  44
rb3025  44
performance 44
mule    44
monogramme  44
bruno   44
alien   44
cocoon  44
press   44
harris  44
eddie   44
cloud   44
ricci   44
narrow  44
larger  44
buy 44
baroque 44
curve   44
frye    44
tight   44
shaping 44
classics    44
off-shoulder    44
verdugo 44
matt    44
tutorial    44
rhinestones 44
swag    44
hermÃ¨s 44
o   44
tod's   44
glossy  44
great   44
90's    44
desk    44
radiance    44
lam 44
crossover   44
arm 44
dusty   43
sock    43
slimming    43
ears    43
ella    43
ultimate    43
hour    43
justin  43
soda    43
towel   43
hunter  43
miller  43
clutches    43
dr  43
spitfire    43
nerd    43
electric    43
paolo   43
evan    43
kitty   43
perspex 43
charms  43
latest  43
sign    43
nautical    43
care    43
bvlgari 43
crème   43
camouflage  43
jonathan    43
friends 43
disco   43
crocheted   43
lion    43
lamb    43
tiger   43
pony    43
baptiste    43
elyse   43
lights  43
lapis   43
moi 43
trapeze 43
toast   43
lacoste 43
styles  43
cutoff  43
velvetines  42
nature  42
web 42
varnish 42
bottoms 42
seven   42
ocean   42
mules   42
alexa   42
curling 42
criss   42
anastasia   42
palazzo 42
lizard  42
personalized    42
strawberry  42
baked   42
brick   42
continental 42
basics  42
simulated   42
classy  42
amazon.co.uk    42
los 42
55mm    42
gym 42
canada  42
l.a.    42
wonderland  42
printing    42
phase   42
zac 42
single-breasted 42
better  42
stacked 42
lovers  42
giant   42
arizona 42
jelly   42
closed  42
keyhole 42
.   41
two-piece   41
18kt    41
pochette    41
moisture    41
seam    41
comfort 41
valentine   41
ceremony    41
clic    41
velour  41
twin    41
baguette    41
fruit   41
colored 41
high-waist  41
skort   41
y   41
paradise    41
champion    41
milly   41
cedar   41
neutral 41
barth   41
bareminerals    41
belly   41
leonard 41
embellishment   41
ctw 41
iron    41
penny   41
briefcase   41
bad 41
plush   41
sunscreen   41
hip 41
duffel  40
blair   40
vibrant 40
erickson    40
thierry 40
fuchsia 40
appliqué    40
envy    40
broken  40
dolce&gabbana   40
cameo   40
coats   40
lengthening 40
openwork    40
brothers    40
michelle    40
glittered   40
cloth   40
ipod    40
pompom  40
brooklyn    40
snapmade.com    40
barneys 40
quality 40
lucite  40
sling   40
italy   40
cabochon    40
supreme 40
notes   40
shoedazzle  40
romper  39
tech    39
dyed    39
bonded  39
alexandre   39
product 39
simone  39
femme   39
flops   39
magazine    39
eyelet  39
katrantzou  39
fl  39
agent   39
victorian   39
gunmetal    39
decoration  39
seamed  39
miranda 39
polka-dot   39
available   39
pizza   39
amanda  39
wings   39
lorenzo 39
part    39
manicure    39
yang    39
dylan   39
gorjana 39
timeless    39
brian   39
raey    39
15ml    39
barely  39
dog 39
pour    39
sieraden    39
edited  39
overall 39
105mm   39
cady    39
appliquéd   39
movado  39
10mm    39
chiara  38
vivier  38
paneled 38
faux-fur    38
daniel  38
grand   38
charming    38
sofia   38
room    38
lovely  38
r13 38
gowns   38
splendid    38
knuckle 38
king    38
bezel   38
nixon   38
gauze   38
peacock 38
carolina    38
angeles 38
lizzy   38
marco   38
buckles 38
eyebrow 38
neckline    38
pom-pom 38
j.w.anderson    38
sylvie  38
kitten  38
knee-high   38
nano    38
stem    38
katy    38
wellington  38
romantic    38
polyvore    38
50s 38
make-up 38
mademoiselle    38
rails   38
wharf   38
shaggy  38
pan 38
gazelle 38
36mm    38
tulip   37
spectrum    37
bottom  37
cobalt  37
passion 37
year    37
starbucks   37
iantorno    37
narciso 37
eyelash 37
live    37
3x1 37
stretch-cotton  37
8mm 37
off-white   37
se  37
/jean   37
eve 37
luna    37
wars    37
show    37
folding 37
steven  37
silvertone  37
mia 37
fabulous    37
shadows 37
temperley   37
anouk   37
roman   37
racerback   37
lilac   37
laundry 37
dainty  37
hinge   37
frost   37
jeweled 37
40mm    37
supra   37
view    37
hanging 36
one-shoulder    36
graffiti    36
self    36
alpaca  36
cognac  36
ps1 36
slide   36
turn-down   36
u   36
borgo   36
nicole  36
tower   36
lighting    36
joy 36
skirts  36
keyring 36
flock   36
du  36
emoji   36
plexi   36
ash 36
henna   36
mod 36
edit    36
harper  36
100%    36
cleansing   36
creme   36
lotus   36
georgette   36
inches  36
mid-length  36
kat 36
ready   36
demi    36
safari  36
moss    36
bing    36
mother-of-pearl 36
shade   36
hamilton    36
skyline 36
roger   36
complete    36
pierced 36
jogger  36
mytheresa.com   36
bieber  36
laque   36
daily   36
milk    36
homme   36
everyday    36
simmons 36
silk-chiffon    36
smythson    36
tab 36
peacoat 35
era 35
judith  35
bouclé  35
sheath  35
match   35
pamela  35
spiked  35
holographic 35
m&s 35
joe 35
babies  35
carvela 35
silky   35
sharon  35
rotita  35
monarch 35
fling   35
en  35
chico's 35
nordstrom   35
marciano    35
string  35
girlfriend  35
bloom   35
twenty  35
eight   35
ava 35
s4  35
bite    35
zippered    35
brit    35
gentle  35
citrine 35
barrette    35
heavy   35
step    35
coated  35
journal 35
dry 35
mat 35
simply  35
extensions  35
emporio 35
wardrobe    35
illuminating    35
violeta 35
plein   35
brogue  35
tools   35
sticker 35
bare    35
balance 35
accessory   35
irene   35
styling 35
cable-knit  35
linea   35
looks   35
wide-brim   35
don't   35
pigment 34
minnie  34
shahida 34
pumpkin 34
special 34
jegging 34
resistant   34
weekend 34
mineralize  34
thicken 34
selected    34
coachella   34
fern    34
color-block 34
dannijo 34
fair    34
future  34
rodriguez   34
qupid   34
nero    34
robert  34
alice+olivia    34
underwear   34
luke    34
follies 34
meyer   34
heat    34
ani 34
dining  34
parides 34
young   34
betty   34
24k 34
voyage  34
loewe   34
jungle  34
automatic   34
robyn   34
rita    34
h   34
puff    34
baume   34
bowler  34
holly   34
japanese    34
jennings    34
hermès  34
jansport    34
patches 34
raf 34
kill    34
lancôme 34
grain   34
kohl    33
incase  33
books   33
gallery 33
lost    33
foil    33
cosmic  33
zero    33
broderie    33
brief   33
tabitha 33
cube    33
cuba    33
take    33
sheet   33
sheepskin   33
1/4 33
luu 33
precious    33
briefs  33
thomas  33
pots    33
hourglass   33
bees    33
zebra   33
use 33
nirvana 33
allen   33
wink    33
catcher 33
man 33
splash  33
cynthia 33
miniskirt   33
cyber   33
magnolia    33
louise  33
things  33
neuwirth    33
tied    33
lattice 33
sparkling   33
bebe    33
slogan  33
ribbed-knit 33
skagen  33
bay 33
amelie  33
shark   33
atwood  33
tip 33
superdry    33
barn    33
vanilla 33
slub    33
master  32
panelled    32
bianca  32
babe    32
wet 32
vampire 32
broad   32
nwt 32
morning 32
spikes  32
illuminator 32
bridge  32
larsson 32
zodiac  32
wreath  32
alaïa   32
novica  32
slipper 32
play    32
fire    32
hello   32
hoops   32
juniors 32
leiber  32
lingerie    32
block-heel  32
rio 32
clover  32
kingdom 32
blank   32
pr  32
martini 32
brilliant   32
footwear    32
veau    32
neiman  32
brightening 32
1980s   32
willow  32
chantecaille    32
credit  32
etched  32
stylo   32
marcus  32
right   32
fenty   32
edward  32
anita   32
self-tie    32
maya    32
brows   32
aerin   32
umbrella    32
lurex   32
birman  32
chest   32
road    32
viva    32
birthday    32
inspirational   32
yoga    31
analog  31
derby   31
illusion    31
stretch-crepe   31
falke   31
headpiece   31
wavy    31
high-low    31
north   31
marchesa    31
watermelon  31
batman  31
ferragni    31
stilettos   31
barbour 31
liberty 31
spirit  31
motif   31
jasmine 31
ae  31
iridescent  31
results 31
hope    31
wool-crepe  31
medusa  31
sandy   31
yoni    31
stocking    31
lasry   31
romy    31
national    31
mr  31
eiffel  31
datejust    31
icing   31
awesome 31
75ml    31
terre   31
re/done 31
glitz   31
pear    31
way 31
chestnut    31
papier  31
erin    31
t-bar   31
viparo  31
ray 31
lara    31
morganite   31
rocha   31
haider  31
poison  31
lariat  31
push    31
l'oreal 31
business    31
utility 31
smoke   31
jones   31
high-heel   31
track   31
marl    31
please  31
desert  31
lattori 31
bean    31
a.p.c.  31
bidermann   31
velours 31
aluminum    31
thread  31
premiere    31
crossbar    30
menswear    30
connection  30
flawless    30
e   30
sigma   30
high-heeled 30
patrick 30
polyester   30
collections 30
gold/black  30
stretch-knit    30
ace 30
let 30
1990s   30
famous  30
bordeaux    30
cicihot 30
petits  30
monroe  30
dakota  30
lookbook    30
model   30
1970s   30
stain   30
checkered   30
polly   30
mandala 30
racer   30
crosby  30
veil    30
posh    30
message 30
skeleton    30
cowl    30
lazy    30
valentines  30
sutton  30
college 30
bermuda 30
reiss   30
flex    30
east    30
tees    30
diorific    30
karan   30
word    30
carpet  30
fujifilm    30
cut-off 30
arms    30
sparkly 30
mila    30

================================================
FILE: extract_feature.sh
================================================
#!/bin/bash
CHECKPOINT_DIR="model/model_final/model.ckpt-34865"

python polyvore/run_inference.py \
  --checkpoint_path=${CHECKPOINT_DIR} \
  --json_file="data/label/test_no_dup.json" \
  --image_dir="data/images/" \
  --feature_file="data/features/test_features.pkl" \
  --rnn_type="lstm"

# # Extract features of Bi-LSTM without VSE
# CHECKPOINT_DIR="model/model_final/model_bi_no_emb.ckpt"
# python polyvore/run_inference.py \
#   --checkpoint_path=${CHECKPOINT_DIR} \
#   --json_file="data/label/test_no_dup.json" \
#   --image_dir="data/images/" \
#   --feature_file="data/features/test_features_bi_no_emb.pkl" \
#   --rnn_type="lstm"


# # Extract features of VSE model without LSTM
# CHECKPOINT_DIR="model/model_final/model_emb.ckpt"
# python polyvore/run_inference_vse.py \
#   --checkpoint_path=${CHECKPOINT_DIR} \
#   --json_file="data/label/test_no_dup.json" \
#   --image_dir="data/images/" \
#   --feature_file="data/features/test_features_emb.pkl" \

# # Extract features of Siamese Network
# CHECKPOINT_DIR="model/model_final/model_siamese.ckpt"

# python polyvore/run_inference_siamese.py \
#   --checkpoint_path=${CHECKPOINT_DIR} \
#   --json_file="data/label/test_no_dup.json" \
#   --image_dir="data/images/" \
#   --feature_file="data/features/test_features_siamese.pkl"


================================================
FILE: fill_in_blank.sh
================================================
#!/bin/bash
CHECKPOINT_DIR="model/model_final/model.ckpt-34865"

python polyvore/fill_in_blank.py \
  --checkpoint_path=${CHECKPOINT_DIR} \
  --json_file="data/label/fill_in_blank_test.json" \
  --feature_file="data/features/test_features.pkl" \
  --rnn_type="lstm" \
  --direction="2" \
  --result_file="fill_in_blank_result.pkl"

# # Fill in the blank Siamese Network
# CHECKPOINT_DIR="model/model_final/model_siamese.ckpt"

# python polyvore/fill_in_blank_siamese.py \
#   --checkpoint_path=${CHECKPOINT_DIR} \
#   --json_file="data/label/fill_in_blank_test.json" \
#   --feature_file="data/features/test_features_siamese.pkl" \
#   --result_file="fill_in_blank_siamese_result.pkl"


================================================
FILE: outfit_generation.sh
================================================
#!/bin/bash
CHECKPOINT_DIR="model/model_final/model.ckpt-34865"

# Run inference on images.
python polyvore/set_generation.py \
  --checkpoint_path=${CHECKPOINT_DIR} \
  --image_dir="data/images/test_no_dup/" \
  --feature_file="data/features/test_features.pkl" \
  --query_file="query.json" \
  --word_dict_file="data/final_word_dict.txt" \
  --result_dir="results/"
  

================================================
FILE: polyvore/configuration.py
================================================
# Copyright 2017 Xintong Han. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================

"""Bi-LSTM Polyvore model and training configurations."""

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function


class ModelConfig(object):
  """Wrapper class for model hyperparameters."""

  def __init__(self):
    """Sets the default model hyperparameters."""
    # File pattern of sharded TFRecord file containing SequenceExample protos.
    # Must be provided in training and evaluation modes.
    self.input_file_pattern = None

    # Image format ("jpeg" or "png").
    self.image_format = "jpeg"

    # Approximate number of values per input shard. Used to ensure sufficient
    # mixing between shards in training.
    self.values_per_input_shard = 135
    # Minimum number of shards to keep in the input queue.
    self.input_queue_capacity_factor = 2
    # Number of threads for prefetching SequenceExample protos.
    self.num_input_reader_threads = 1
  
    # Name of the SequenceExample context feature containing set ids.
    self.set_id_name = "set_id"
    
    # Name of the SequenceExample feature list containing captions and images.
    self.image_feature_name = "images"
    self.image_index_name = "image_index"
    self.caption_feature_name = "caption_ids"

    # Number of unique words in the vocab (plus 1, for <UNK>).
    # The default value is larger than the expected actual vocab size to allow
    # for differences between tokenizer versions used in preprocessing. There is
    # no harm in using a value greater than the actual vocab size, but using a
    # value less than the actual vocab size will result in an error.
    self.vocab_size = 2757

    # Number of threads for image preprocessing.
    self.num_preprocess_threads = 1

    # Batch size.
    self.batch_size = 10
    
    # File containing an Inception v3 checkpoint to initialize the variables
    # of the Inception model. Must be provided when starting training for the
    # first time.
    self.inception_checkpoint_file = None

    # Dimensions of Inception v3 input images.
    self.image_height = 299
    self.image_width = 299

    # Scale used to initialize model variables.
    self.initializer_scale = 0.08

    # LSTM input and output dimensionality, respectively. embedding_size is also
    # the embedding size in the visual-semantic joint space.
    self.embedding_size = 512 
    self.num_lstm_units = 512 

    # If < 1.0, the dropout keep probability applied to LSTM variables.
    self.lstm_dropout_keep_prob = 0.7

    # Largest number of images in a fashion set.
    self.number_set_images = 8
    
    # Margin for the embedding loss.
    self.emb_margin = 0.2

    # Balance factor of all losses.
    self.emb_loss_factor = 1.0 # VSE loss
    self.f_rnn_loss_factor = 1.0  # Forward LSTM
    self.b_rnn_loss_factor = 1.0  # Backward LSTM, might give it a lower weight
    # because it is harder to predict backward than forward in our senario.
    
    # RNN type. "lstm", "gru", "rnn"
    self.rnn_type = "lstm"


class TrainingConfig(object):
  """Wrapper class for training hyperparameters."""

  def __init__(self):
    """Sets the default training hyperparameters."""
    # Number of examples per epoch of training data.
    self.num_examples_per_epoch = 17316

    # Optimizer for training the model.
    self.optimizer = "SGD"

    # Learning rate for the initial phase of training.
    # by the FLAGS in train.py
    self.initial_learning_rate = 0.2
    
    self.learning_rate_decay_factor = 0.5
    self.num_epochs_per_decay = 2.0

    # If not None, clip gradients to this value.
    self.clip_gradients = 5.0

    # How many model checkpoints to keep.
    self.max_checkpoints_to_keep = 10


================================================
FILE: polyvore/fashion_compatibility.py
================================================
# Copyright 2017 Xintong Han. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Predict the fashion compatibility of a given image sequence."""

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import os
import json

import tensorflow as tf
import numpy as np
import pickle as pkl
from sklearn import metrics

import configuration
import polyvore_model_bi as polyvore_model

FLAGS = tf.flags.FLAGS

tf.flags.DEFINE_string("checkpoint_path", "",
                       "Model checkpoint file or directory containing a "
                       "model checkpoint file.")
tf.flags.DEFINE_string("label_file", "", "Txt file containing test outfits.")
tf.flags.DEFINE_string("feature_file", "", "Files containing image features")
tf.flags.DEFINE_string("rnn_type", "", "Type of RNN.")
tf.flags.DEFINE_string("result_file", "", "File to store the results.")
tf.flags.DEFINE_integer("direction", 2, "2: bidirectional; 1: forward only;"
                        "-1: backward only.")


def run_compatibility_inference(sess, image_seqs, test_feat,
                                num_lstm_units, model):
  emb_seqs = test_feat[image_seqs,:]
  num_images = float(len(image_seqs))
  if FLAGS.rnn_type == "lstm":
    zero_state = np.zeros([1, 2 * num_lstm_units])
  else:
    zero_state = np.zeros([1, num_lstm_units])
  
  f_score = 0
  b_score = 0
  if FLAGS.direction != -1:
    # Forward RNN.
    outputs = []
    input_feed = np.reshape(emb_seqs[0], [1,-1])
    # Run first step with all zeros initial state.
    [lstm_state, lstm_output] = sess.run(
          fetches=["lstm/f_state:0","f_logits/f_logits/BiasAdd:0"],
          feed_dict={"lstm/f_input_feed:0":input_feed,
                     "lstm/f_state_feed:0":zero_state})
    outputs.append(lstm_output)

    # Run remaining steps.
    for step in range(int(num_images)-1):
      input_feed = np.reshape(emb_seqs[step+1], [1,-1])
      [lstm_state, lstm_output] = sess.run(
                fetches=["lstm/f_state:0","f_logits/f_logits/BiasAdd:0"],
                feed_dict={"lstm/f_input_feed:0":input_feed,
                           "lstm/f_state_feed:0":lstm_state})
      outputs.append(lstm_output)
    
    # Calculate the loss.
    # Different from the training process where the loss is calculated in each
    # mini batch, during testing, we get the loss againist the whole test set.
    # This is pretty slow, maybe a better method could be used.
    s = np.squeeze(np.dot(np.asarray(outputs), np.transpose(test_feat)))
    f_score = sess.run(model.lstm_xent_loss,
         feed_dict={"lstm/pred_feed:0":s,
         "lstm/next_index_feed:0":image_seqs[1:] + [test_feat.shape[0]-1]})
    
    f_score = - np.mean(f_score)
    
  if FLAGS.direction != 1:
    # Backward RNN.
    outputs = []
    input_feed = np.reshape(emb_seqs[-1], [1,-1])
    [lstm_state, lstm_output] = sess.run(
                fetches=["lstm/b_state:0","b_logits/b_logits/BiasAdd:0"],
                feed_dict={"lstm/b_input_feed:0":input_feed,
                           "lstm/b_state_feed:0":zero_state})
    outputs.append(lstm_output)
    for step in range(int(num_images)-1):
      input_feed = np.reshape(emb_seqs[int(num_images)-2-step], [1,-1])
      [lstm_state, lstm_output] = sess.run(
                fetches=["lstm/b_state:0","b_logits/b_logits/BiasAdd:0"],
                feed_dict={"lstm/b_input_feed:0":input_feed,
                           "lstm/b_state_feed:0":lstm_state})
      outputs.append(lstm_output)
    
    # Calculate the loss.
    s = np.squeeze(np.dot(np.asarray(outputs), np.transpose(test_feat)))
    b_score = sess.run(model.lstm_xent_loss,
        feed_dict={"lstm/pred_feed:0":s,
        "lstm/next_index_feed:0": image_seqs[-2::-1] + [test_feat.shape[0]-1]})
    b_score = - np.mean(b_score)
  return [f_score, b_score]


def main(_):
  # Build the inference graph.
  g = tf.Graph()
  with g.as_default():
    model_config = configuration.ModelConfig()
    model_config.rnn_type = FLAGS.rnn_type
    model = polyvore_model.PolyvoreModel(model_config, mode="inference")
    model.build()
    saver = tf.train.Saver()
    
    # Load pre-computed image features.
    with open(FLAGS.feature_file, "rb") as f:
      test_data = pkl.load(f)
    test_ids = test_data.keys()
    test_feat = np.zeros((len(test_ids) + 1,
                    len(test_data[test_ids[0]]["image_rnn_feat"])))
    # test_feat has one more zero vector as the representation of END of
    # RNN prediction.
    for i, test_id in enumerate(test_ids):
      # Image feature in the RNN space.
      test_feat[i] = test_data[test_id]["image_rnn_feat"]
    
    g.finalize()
    with tf.Session() as sess:
      saver.restore(sess, FLAGS.checkpoint_path)
      all_f_scores = []
      all_b_scores = []
      all_scores = []
      all_labels = []
      testset = open(FLAGS.label_file).read().splitlines()
      k = 0
      for test_outfit in testset:
        k += 1
        if k % 100 == 0:
          print("Finish %d outfits." % k)
        image_seqs = []
        for test_image in test_outfit.split()[1:]:
          image_seqs.append(test_ids.index(test_image))
          
        [f_score, b_score] = run_compatibility_inference(sess, image_seqs,
                      test_feat, model_config.num_lstm_units, model)
        
        all_f_scores.append(f_score)
        all_b_scores.append(b_score)
        all_scores.append(f_score + b_score)
        all_labels.append(int(test_outfit[0]))
        
      # calculate AUC and AP      
      fpr, tpr, thresholds = metrics.roc_curve(all_labels,
                                               all_scores,
                                               pos_label=1)
      print("Compatibility AUC: %f for %d outfits" %
              (metrics.auc(fpr, tpr), len(all_labels)))

      with open(FLAGS.result_file, "wb") as f:
        pkl.dump({"all_labels": all_labels, "all_f_scores": all_f_scores,
                  "all_b_scores": all_b_scores}, f)

      
if __name__ == "__main__":
  tf.app.run()


================================================
FILE: polyvore/fill_in_blank.py
================================================
# Copyright 2017 Xintong Han. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================

"""Fill in blank evaluation."""

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import json

import tensorflow as tf
import numpy as np
import pickle as pkl

import configuration
import polyvore_model_bi as polyvore_model

FLAGS = tf.flags.FLAGS

tf.flags.DEFINE_string("checkpoint_path", "",
                       "Model checkpoint file or directory containing a "
                       "model checkpoint file.")
tf.flags.DEFINE_string("json_file", "",
                       "Json file containing questions and answers.")
tf.flags.DEFINE_string("feature_file", "", "pkl files containing the features")
tf.flags.DEFINE_string("rnn_type", "lstm", "Type of RNN.")
tf.flags.DEFINE_string("result_file", "", "File to store the results.")
tf.flags.DEFINE_integer("direction", 2, "2: bidirectional; 1: forward only;"
                        "-1: backward only; 0: Average pooling no RNN.")

def run_question_inference(sess, question, test_ids, test_feat,
                           test_rnn_feat, num_lstm_units):
  question_ids = []
  answer_ids = []
  for q in question["question"]:
    try:
      question_ids.append(test_ids.index(q))
    except:
      return [], []
  
  for a in question["answers"]:
    try:
      answer_ids.append(test_ids.index(a))
    except:
      return [], []
      
  blank_posi = question["blank_position"]
  
  # Average pooling of the VSE embeddings
  question_emb = np.reshape(np.mean(test_feat[question_ids], 0), [1,-1])
  q_emb = question_emb / np.linalg.norm(question_emb, axis=1)[:, np.newaxis]
  a_emb = (test_feat[answer_ids] /
             np.linalg.norm(test_feat[answer_ids], axis=1)[:, np.newaxis])
  vse_score = (np.dot(q_emb, np.transpose(a_emb)) + 1) / 2 # scale to [0,1]
  vse_score = vse_score #/ np.sum(vse_score) # normalize to sum to 1.
  
  if FLAGS.direction == 0:
    # Only use VSE
    predicted_answer = np.argsort(-vse_score)[0]
    return vse_score, predicted_answer
    
  if FLAGS.rnn_type == "lstm":
    # LSTM has two states.
    zero_state = np.zeros([1, 2 * num_lstm_units])
  else:
    zero_state = np.zeros([1, num_lstm_units])
  
  # Blank is the last item.
  if blank_posi == len(question_ids) + 1:
    if FLAGS.direction == -1:
      return [], []
    # Only do forward rnn
    input_feed = np.reshape(test_rnn_feat[question_ids[0]], [1,-1])
    # Run first step with all zeros initial state.
    [lstm_state, lstm_output] = sess.run(
          fetches=["lstm/f_state:0","f_logits/f_logits/BiasAdd:0"],
          feed_dict={"lstm/f_input_feed:0":input_feed,
                     "lstm/f_state_feed:0":zero_state})
    
    for step in range(len(question_ids)-1):
      input_feed = np.reshape(test_rnn_feat[question_ids[step + 1]], [1,-1])
      [lstm_state, lstm_output] = sess.run(
          fetches=["lstm/f_state:0","f_logits/f_logits/BiasAdd:0"],
          feed_dict={"lstm/f_input_feed:0":input_feed,
                     "lstm/f_state_feed:0":lstm_state})
                                           
    # Search in answers
    rnn_score = np.exp(np.dot(lstm_output,
                              np.transpose(test_rnn_feat[answer_ids])))
    rnn_score = rnn_score / np.sum(rnn_score)
      
  # Blank is the frist item
  elif blank_posi == 1:
    if FLAGS.direction == 1:
      return [], []
    # only do backward rnn
    input_feed = np.reshape(test_rnn_feat[question_ids[-1]], [1,-1])
    # Run first step with all zeros initial state.
    [lstm_state, lstm_output] = sess.run(
              fetches=["lstm/b_state:0","b_logits/b_logits/BiasAdd:0"],
              feed_dict={"lstm/b_input_feed:0":input_feed,
                         "lstm/b_state_feed:0":zero_state})
    
    for step in range(len(question_ids)-1):
      input_feed = np.reshape(test_rnn_feat[question_ids[-step-2]], [1,-1])
      [lstm_state, lstm_output] = sess.run(
                fetches=["lstm/b_state:0","b_logits/b_logits/BiasAdd:0"],
                feed_dict={"lstm/b_input_feed:0":input_feed,
                           "lstm/b_state_feed:0":lstm_state})
    rnn_score = np.exp(np.dot(lstm_output,
                              np.transpose(test_rnn_feat[answer_ids])))
    rnn_score = rnn_score / np.sum(rnn_score)
  
  # Blank is in the middle.
  else:
    # Do bidirectional rnn.
    # Forward:
    input_feed = np.reshape(test_rnn_feat[question_ids[0]], [1,-1])
    # Run first step with all zeros initial state.
    [lstm_state, lstm_output] = sess.run(
              fetches=["lstm/f_state:0","f_logits/f_logits/BiasAdd:0"],
              feed_dict={"lstm/f_input_feed:0":input_feed,
                         "lstm/f_state_feed:0":zero_state})
    
    for step in range(blank_posi - 2):
      input_feed = np.reshape(test_rnn_feat[question_ids[step+1]], [1,-1])
      [lstm_state, lstm_output] = sess.run(
              fetches=["lstm/f_state:0","f_logits/f_logits/BiasAdd:0"],
              feed_dict={"lstm/f_input_feed:0":input_feed,
                         "lstm/f_state_feed:0":lstm_state})
                                           
    # Search in answers.
    f_softmax = np.exp(np.dot(lstm_output,
                              np.transpose(test_rnn_feat[answer_ids])))
    # Backward:
    input_feed = np.reshape(test_rnn_feat[question_ids[-1]], [1,-1])
    # Run first step with all zeros initial state.
    [lstm_state, lstm_output] = sess.run(
            fetches=["lstm/b_state:0","b_logits/b_logits/BiasAdd:0"],
            feed_dict={"lstm/b_input_feed:0":input_feed,
                       "lstm/b_state_feed:0":zero_state})
    
    for step in range(len(question_ids)-blank_posi):
      input_feed = np.reshape(test_rnn_feat[question_ids[-step-2]], [1,-1])
      [lstm_state, lstm_output] = sess.run(
                fetches=["lstm/b_state:0","b_logits/b_logits/BiasAdd:0"],
                feed_dict={"lstm/b_input_feed:0":input_feed,
                           "lstm/b_state_feed:0":lstm_state})
                                          
    b_softmax = np.exp(np.dot(lstm_output,
                              np.transpose(test_rnn_feat[answer_ids])))
    if FLAGS.direction == 2:
      rnn_score = (f_softmax / np.sum(f_softmax) +
                   b_softmax / np.sum(b_softmax))
      rnn_score /= 2
    elif FLAGS.direction == 1:
      rnn_score = f_softmax / np.sum(f_softmax)
    else:
      rnn_score = b_softmax / np.sum(b_softmax)

  predicted_answer = np.argsort(-rnn_score)[0]
  return rnn_score, predicted_answer
  

def main(_):
  # Build the inference graph.
  top_k = 4 # Print the top_k accuracy.
  true_pred = np.zeros(top_k)
  # Load pre-computed image features.
  with open(FLAGS.feature_file, "rb") as f:
    test_data = pkl.load(f)
  test_ids = test_data.keys()
  test_feat = np.zeros((len(test_ids),
                        len(test_data[test_ids[0]]["image_feat"])))
  test_rnn_feat = np.zeros((len(test_ids),
                            len(test_data[test_ids[0]]["image_rnn_feat"])))
  for i, test_id in enumerate(test_ids):
    # Image feature in visual-semantic embedding space.
    test_feat[i] = test_data[test_id]["image_feat"]
    # Image feature in the RNN space.
    test_rnn_feat[i] = test_data[test_id]["image_rnn_feat"]

  g = tf.Graph()
  with g.as_default():
    model_config = configuration.ModelConfig()
    model_config.rnn_type = FLAGS.rnn_type
    model = polyvore_model.PolyvoreModel(model_config, mode="inference")
    model.build()
    saver = tf.train.Saver()
    
    g.finalize()
    with tf.Session() as sess:
      saver.restore(sess, FLAGS.checkpoint_path)
      questions = json.load(open(FLAGS.json_file))
      
      all_pred = []
      set_ids = []
      all_scores = []
      for question in questions:
        score, pred = run_question_inference(sess, question, test_ids,
                                             test_feat, test_rnn_feat,
                                             model_config.num_lstm_units)
        if pred != []:
          all_pred.append(pred)
          all_scores.append(score)
          set_ids.append(question["question"][0].split("_")[0])
          # 0 is the correct answer, iterate over top_k.
          for i in range(top_k):
            if 0 in pred[:i+1]:
              true_pred[i] += 1

      # Print all top-k accuracy.
      for i in range(top_k):
        print("Top %d Accuracy: " % (i + 1))
        print("%d correct answers in %d valid questions." %
                  (true_pred[i], len(all_pred)))
        print("Accuracy: %f" % (true_pred[i] / len(all_pred)))
        
      s = np.empty((len(all_scores),), dtype=np.object)
      for i in range(len(all_scores)):
          s[i] = all_scores[i]

      with open(FLAGS.result_file, "wb") as f:
        pkl.dump({"set_ids": set_ids, "pred": all_pred, "score": s}, f)

if __name__ == "__main__":
  tf.app.run()


================================================
FILE: polyvore/fill_in_blank_siamese.py
================================================
# Copyright 2017 Xintong Han. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================

"""Fill in blank evaluation."""

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import json

import tensorflow as tf
import numpy as np
import pickle as pkl

import configuration
import polyvore_model_siamese as polyvore_model

FLAGS = tf.flags.FLAGS

tf.flags.DEFINE_string("checkpoint_path", "",
                       "Model checkpoint file or directory containing a "
                       "model checkpoint file.")
tf.flags.DEFINE_string("json_file", "",
                       "Json file containing questions and answers.")
tf.flags.DEFINE_string("feature_file", "", "pkl files containing the features")
tf.flags.DEFINE_string("result_file", "", "File to store the results.")

def run_question_inference(sess, question, test_ids, test_feat):
  question_ids = []
  answer_ids = []
  for q in question["question"]:
    try:
      question_ids.append(test_ids.index(q))
    except:
      return [], []
  
  for a in question["answers"]:
    try:
      answer_ids.append(test_ids.index(a))
    except:
      return [], []
      
  blank_posi = question["blank_position"]
  
  # Average pooling of the VSE embeddings
  question_emb = np.reshape(np.mean(test_feat[question_ids], 0), [1,-1])
  q_emb = question_emb / np.linalg.norm(question_emb, axis=1)[:, np.newaxis]
  a_emb = (test_feat[answer_ids] /
             np.linalg.norm(test_feat[answer_ids], axis=1)[:, np.newaxis])
  score = (np.dot(q_emb, np.transpose(a_emb)) + 1) / 2 # scale to [0,1]
  
  predicted_answer = np.argsort(-score)[0]
  return score, predicted_answer
  

def main(_):
  # Build the inference graph.
  top_k = 4 # Print the top_k accuracy.
  true_pred = np.zeros(top_k)
  # Load pre-computed image features.
  with open(FLAGS.feature_file, "rb") as f:
    test_data = pkl.load(f)
  test_ids = test_data.keys()
  test_feat = np.zeros((len(test_ids),
                        len(test_data[test_ids[0]]["image_feat"])))
  for i, test_id in enumerate(test_ids):
    # Image feature in visual-semantic embedding space.
    test_feat[i] = test_data[test_id]["image_feat"]

  g = tf.Graph()
  with g.as_default():
    model_config = configuration.ModelConfig()
    model = polyvore_model.PolyvoreModel(model_config, mode="inference")
    model.build()
    saver = tf.train.Saver()
    
    g.finalize()
    with tf.Session() as sess:
      saver.restore(sess, FLAGS.checkpoint_path)
      questions = json.load(open(FLAGS.json_file))
      
      all_pred = []
      set_ids = []
      all_scores = []
      for question in questions:
        score, pred = run_question_inference(sess, question, test_ids,
                                             test_feat)
        if pred != []:
          all_pred.append(pred)
          all_scores.append(score)
          set_ids.append(question["question"][0].split("_")[0])
          # 0 is the correct answer, iterate over top_k.
          for i in range(top_k):
            if 0 in pred[:i+1]:
              true_pred[i] += 1

      # Print all top-k accuracy.
      for i in range(top_k):
        print("Top %d Accuracy: " % (i + 1))
        print("%d correct answers in %d valid questions." %
                  (true_pred[i], len(all_pred)))
        print("Accuracy: %f" % (true_pred[i] / len(all_pred)))
        
      s = np.empty((len(all_scores),), dtype=np.object)
      for i in range(len(all_scores)):
          s[i] = all_scores[i]

      with open(FLAGS.result_file, "wb") as f:
        pkl.dump({"set_ids": set_ids, "pred": all_pred, "score": s}, f)

if __name__ == "__main__":
  tf.app.run()


================================================
FILE: polyvore/ops/__init__.py
================================================


================================================
FILE: polyvore/ops/image_embedding.py
================================================
# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================

"""Image embedding ops."""

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function


import tensorflow as tf

from tensorflow.contrib.slim.python.slim.nets.inception_v3 import inception_v3_base

slim = tf.contrib.slim


def inception_v3(images,
                 trainable=True,
                 is_training=True,
                 weight_decay=0.00004,
                 stddev=0.1,
                 dropout_keep_prob=0.8,
                 use_batch_norm=True,
                 batch_norm_params=None,
                 add_summaries=True,
                 scope="InceptionV3"):
  """Builds an Inception V3 subgraph for image embeddings.

  Args:
    images: A float32 Tensor of shape [batch, height, width, channels].
    trainable: Whether the inception submodel should be trainable or not.
    is_training: Boolean indicating training mode or not.
    weight_decay: Coefficient for weight regularization.
    stddev: The standard deviation of the trunctated normal weight initializer.
    dropout_keep_prob: Dropout keep probability.
    use_batch_norm: Whether to use batch normalization.
    batch_norm_params: Parameters for batch normalization. See
      tf.contrib.layers.batch_norm for details.
    add_summaries: Whether to add activation summaries.
    scope: Optional Variable scope.

  Returns:
    end_points: A dictionary of activations from inception_v3 layers.
  """
  # Only consider the inception model to be in training mode if it's trainable.
  is_inception_model_training = trainable and is_training

  if use_batch_norm:
    # Default parameters for batch normalization.
    if not batch_norm_params:
      batch_norm_params = {
          "is_training": is_inception_model_training,
          "trainable": trainable,
          # Decay for the moving averages.
          "decay": 0.9997,
          # Epsilon to prevent 0s in variance.
          "epsilon": 0.001,
          # Collection containing the moving mean and moving variance.
          "variables_collections": {
              "beta": None,
              "gamma": None,
              "moving_mean": ["moving_vars"],
              "moving_variance": ["moving_vars"],
          }
      }
  else:
    batch_norm_params = None

  if trainable:
    weights_regularizer = tf.contrib.layers.l2_regularizer(weight_decay)
  else:
    weights_regularizer = None

  with tf.variable_scope(scope, "InceptionV3", [images]) as scope:
    with slim.arg_scope(
        [slim.conv2d, slim.fully_connected],
        weights_regularizer=weights_regularizer,
        trainable=trainable):
      with slim.arg_scope(
          [slim.conv2d],
          weights_initializer=tf.truncated_normal_initializer(stddev=stddev),
          activation_fn=tf.nn.relu,
          normalizer_fn=slim.batch_norm,
          normalizer_params=batch_norm_params):
        net, end_points = inception_v3_base(images, scope=scope)
        with tf.variable_scope("logits"):
          shape = net.get_shape()
          net = slim.avg_pool2d(net, shape[1:3], padding="VALID", scope="pool")
          net = slim.dropout(
              net,
              keep_prob=dropout_keep_prob,
              is_training=is_inception_model_training,
              scope="dropout")
          net = slim.flatten(net, scope="flatten")

  # Add summaries.
  if add_summaries:
    for v in end_points.values():
      tf.contrib.layers.summaries.summarize_activation(v)

  return net


================================================
FILE: polyvore/ops/image_embedding_test.py
================================================
# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================

"""Tests for tensorflow_models.im2txt.ops.image_embedding."""

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function


import tensorflow as tf

from polyvore.ops import image_embedding


class InceptionV3Test(tf.test.TestCase):

  def setUp(self):
    super(InceptionV3Test, self).setUp()

    batch_size = 4
    height = 299
    width = 299
    num_channels = 3
    self._images = tf.placeholder(tf.float32,
                                  [batch_size, height, width, num_channels])
    self._batch_size = batch_size

  def _countInceptionParameters(self):
    """Counts the number of parameters in the inception model at top scope."""
    counter = {}
    for v in tf.all_variables():
      name_tokens = v.op.name.split("/")
      if name_tokens[0] == "InceptionV3":
        name = "InceptionV3/" + name_tokens[1]
        num_params = v.get_shape().num_elements()
        assert num_params
        counter[name] = counter.get(name, 0) + num_params
    return counter

  def _verifyParameterCounts(self):
    """Verifies the number of parameters in the inception model."""
    param_counts = self._countInceptionParameters()
    expected_param_counts = {
        "InceptionV3/Conv2d_1a_3x3": 960,
        "InceptionV3/Conv2d_2a_3x3": 9312,
        "InceptionV3/Conv2d_2b_3x3": 18624,
        "InceptionV3/Conv2d_3b_1x1": 5360,
        "InceptionV3/Conv2d_4a_3x3": 138816,
        "InceptionV3/Mixed_5b": 256368,
        "InceptionV3/Mixed_5c": 277968,
        "InceptionV3/Mixed_5d": 285648,
        "InceptionV3/Mixed_6a": 1153920,
        "InceptionV3/Mixed_6b": 1298944,
        "InceptionV3/Mixed_6c": 1692736,
        "InceptionV3/Mixed_6d": 1692736,
        "InceptionV3/Mixed_6e": 2143872,
        "InceptionV3/Mixed_7a": 1699584,
        "InceptionV3/Mixed_7b": 5047872,
        "InceptionV3/Mixed_7c": 6080064,
    }
    self.assertDictEqual(expected_param_counts, param_counts)

  def _assertCollectionSize(self, expected_size, collection):
    actual_size = len(tf.get_collection(collection))
    if expected_size != actual_size:
      self.fail("Found %d items in collection %s (expected %d)." %
                (actual_size, collection, expected_size))

  def testTrainableTrueIsTrainingTrue(self):
    embeddings = image_embedding.inception_v3(
        self._images, trainable=True, is_training=True)
    self.assertEqual([self._batch_size, 2048], embeddings.get_shape().as_list())

    self._verifyParameterCounts()
    self._assertCollectionSize(376, tf.GraphKeys.VARIABLES)
    self._assertCollectionSize(188, tf.GraphKeys.TRAINABLE_VARIABLES)
    self._assertCollectionSize(188, tf.GraphKeys.UPDATE_OPS)
    self._assertCollectionSize(94, tf.GraphKeys.REGULARIZATION_LOSSES)
    self._assertCollectionSize(0, tf.GraphKeys.LOSSES)
    self._assertCollectionSize(23, tf.GraphKeys.SUMMARIES)

  def testTrainableTrueIsTrainingFalse(self):
    embeddings = image_embedding.inception_v3(
        self._images, trainable=True, is_training=False)
    self.assertEqual([self._batch_size, 2048], embeddings.get_shape().as_list())

    self._verifyParameterCounts()
    self._assertCollectionSize(376, tf.GraphKeys.VARIABLES)
    self._assertCollectionSize(188, tf.GraphKeys.TRAINABLE_VARIABLES)
    self._assertCollectionSize(0, tf.GraphKeys.UPDATE_OPS)
    self._assertCollectionSize(94, tf.GraphKeys.REGULARIZATION_LOSSES)
    self._assertCollectionSize(0, tf.GraphKeys.LOSSES)
    self._assertCollectionSize(23, tf.GraphKeys.SUMMARIES)

  def testTrainableFalseIsTrainingTrue(self):
    embeddings = image_embedding.inception_v3(
        self._images, trainable=False, is_training=True)
    self.assertEqual([self._batch_size, 2048], embeddings.get_shape().as_list())

    self._verifyParameterCounts()
    self._assertCollectionSize(376, tf.GraphKeys.VARIABLES)
    self._assertCollectionSize(0, tf.GraphKeys.TRAINABLE_VARIABLES)
    self._assertCollectionSize(0, tf.GraphKeys.UPDATE_OPS)
    self._assertCollectionSize(0, tf.GraphKeys.REGULARIZATION_LOSSES)
    self._assertCollectionSize(0, tf.GraphKeys.LOSSES)
    self._assertCollectionSize(23, tf.GraphKeys.SUMMARIES)

  def testTrainableFalseIsTrainingFalse(self):
    embeddings = image_embedding.inception_v3(
        self._images, trainable=False, is_training=False)
    self.assertEqual([self._batch_size, 2048], embeddings.get_shape().as_list())

    self._verifyParameterCounts()
    self._assertCollectionSize(376, tf.GraphKeys.VARIABLES)
    self._assertCollectionSize(0, tf.GraphKeys.TRAINABLE_VARIABLES)
    self._assertCollectionSize(0, tf.GraphKeys.UPDATE_OPS)
    self._assertCollectionSize(0, tf.GraphKeys.REGULARIZATION_LOSSES)
    self._assertCollectionSize(0, tf.GraphKeys.LOSSES)
    self._assertCollectionSize(23, tf.GraphKeys.SUMMARIES)


if __name__ == "__main__":
  tf.test.main()


================================================
FILE: polyvore/ops/image_processing.py
================================================
# Copyright 2017 Xintong Han. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================

"""Helper functions for image preprocessing."""

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function


import tensorflow as tf


def distort_image(image):
  """Perform random distortions on an image.

  Args:
    image: A float32 Tensor of shape [height, width, 3] with values in [0, 1).
    
  Returns:
    distorted_image: A float32 Tensor of shape [height, width, 3] with values in
      [0, 1].
  """
  # Randomly flip horizontally. No color distortion.
  with tf.name_scope("flip_horizontal", values=[image]):
    image = tf.image.random_flip_left_right(image)

  return image


def process_image(encoded_image,
                  is_training,
                  height,
                  width,
                  resize_height=299,
                  resize_width=299,
                  image_format="jpeg",
                  image_idx=0):
  """Decode an image, resize and apply random distortions.

  Args:
    encoded_image: String Tensor containing the image.
    is_training: Boolean; whether preprocessing for training or eval.
    height: Height of the output image.
    width: Width of the output image.
    resize_height: If > 0, resize height before crop to final dimensions.
    resize_width: If > 0, resize width before crop to final dimensions.
    image_format: "jpeg" or "png".
    image_idx: image index of the image in an outfit.
  Returns:
    A float32 Tensor of shape [height, width, 3] with values in [-1, 1].

  Raises:
    ValueError: If image_format is invalid.
  """
  # Helper function to log an image summary to the visualizer. Summaries are
  # only logged in thread 0.
  def image_summary(name, image):
    tf.image_summary(name, tf.expand_dims(image, 0))

  # Decode image into a float32 Tensor of shape [?, ?, 3] with values in [0, 1).
  with tf.name_scope("decode", values=[encoded_image]):
    if image_format == "jpeg":
      image = tf.image.decode_jpeg(encoded_image, channels=3)
    elif image_format == "png":
      image = tf.image.decode_png(encoded_image, channels=3)
    else:
      raise ValueError("Invalid image format: %s" % image_format)
  image = tf.image.convert_image_dtype(image, dtype=tf.float32)
  image_summary("original_image/" + str(image_idx), image)

  # Resize image.
  assert (resize_height > 0) == (resize_width > 0)
  if resize_height:
    image = tf.image.resize_images(image,
                                   size=[resize_height, resize_width],
                                   method=tf.image.ResizeMethod.BILINEAR)

  # Crop to final dimensions. In the Polyvore model, no cropping is used
  # since we set height=resize_height and width=resize_width
  if is_training:
    image = tf.random_crop(image, [height, width, 3])
  else:
    image = tf.image.resize_image_with_crop_or_pad(image, height, width)

  image_summary("resized_image/" + str(image_idx), image)

  # Randomly distort the image.
  if is_training:
    image = distort_image(image)

  image_summary("final_image/" + str(image_idx), image)

  # Rescale to [-1,1] instead of [0, 1]
  image = tf.sub(image, 0.5)
  image = tf.mul(image, 2.0)
  return image


================================================
FILE: polyvore/ops/inputs.py
================================================
# Copyright 2017 Xintong Han. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================

"""Input ops."""

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function


import tensorflow as tf


def parse_sequence_example(serialized, set_id, image_feature,
                           image_index, caption_feature, number_set_images):
  """Parses a tensorflow.SequenceExample into a set of images and caption.

  Args:
    serialized: A scalar string Tensor; a single serialized SequenceExample.
    set_id: Name of SequenceExample context feature containing the id of
      the outfit.
    image_feature: Name of SequenceExample context feature containing image
      data.
    image_index: Name of SequenceExample feature list containing the index of
      the item in the outfit.
    caption_feature: Name of SequenceExample feature list containing integer
      captions.
    number_set_images: Number of images in an outfit.
  Returns:
    set_id: Set id of the outfit.
    encoded_images: A string Tensor containing all JPEG encoded images
      in the outfit.
    image_ids: Image ids of the items in the outfit.
    captions: A 2-D uint64 Tensor with dynamically specified length.
    likes: Number of likes of the outfit. Hard coded name,
      not used in our model.
  """
  
  context_features = {}
  context_features[set_id] = tf.FixedLenFeature([], dtype=tf.string)
  context_features['likes'] = tf.FixedLenFeature([], dtype=tf.int64,
                                                 default_value=0)
  for i in range(number_set_images):
    context_features[image_feature + '/' + str(i)] = tf.FixedLenFeature([],
                                                         dtype=tf.string,
                                                         default_value = '')
            
  context, sequence = tf.parse_single_sequence_example(
      serialized,
      context_features=context_features,
      sequence_features={
          image_index: tf.FixedLenSequenceFeature([], dtype=tf.int64),
          caption_feature: tf.VarLenFeature(dtype=tf.int64),
      })
      
  set_id = context[set_id]
  likes = context['likes']
  
  encoded_images = []
  for i in range(number_set_images):
    encoded_images.append(context[image_feature + '/' + str(i)])
  
  captions = sequence[caption_feature]
  captions = tf.sparse_tensor_to_dense(captions)
  image_ids = sequence[image_index]
  
  return set_id, encoded_images, image_ids, captions, likes


def prefetch_input_data(reader,
                        file_pattern,
                        is_training,
                        batch_size,
                        values_per_shard,
                        input_queue_capacity_factor=16,
                        num_reader_threads=1,
                        shard_queue_name="filename_queue",
                        value_queue_name="input_queue"):
  """Prefetches string values from disk into an input queue.

  In training the capacity of the queue is important because a larger queue
  means better mixing of training examples between shards. The minimum number of
  values kept in the queue is values_per_shard * input_queue_capacity_factor,
  where input_queue_memory factor should be chosen to trade-off better mixing
  with memory usage.

  Args:
    reader: Instance of tf.ReaderBase.
    file_pattern: Comma-separated list of file patterns (e.g.
        /tmp/train_data-?????-of-00100).
    is_training: Boolean; whether prefetching for training or eval.
    batch_size: Model batch size used to determine queue capacity.
    values_per_shard: Approximate number of values per shard.
    input_queue_capacity_factor: Minimum number of values to keep in the queue
      in multiples of values_per_shard. See comments above.
    num_reader_threads: Number of reader threads to fill the queue.
    shard_queue_name: Name for the shards filename queue.
    value_queue_name: Name for the values input queue.

  Returns:
    A Queue containing prefetched string values.
  """
  data_files = []
  for pattern in file_pattern.split(","):
    data_files.extend(tf.gfile.Glob(pattern))
  if not data_files:
    tf.logging.fatal("Found no input files matching %s", file_pattern)
  else:
    tf.logging.info("Prefetching values from %d files matching %s",
                    len(data_files), file_pattern)

  if is_training:
    filename_queue = tf.train.string_input_producer(
        data_files, shuffle=True, capacity=16, name=shard_queue_name)
    min_queue_examples = values_per_shard * input_queue_capacity_factor
    capacity = min_queue_examples + 100 * batch_size
    values_queue = tf.RandomShuffleQueue(
        capacity=capacity,
        min_after_dequeue=min_queue_examples,
        dtypes=[tf.string],
        name="random_" + value_queue_name)
  else:
    filename_queue = tf.train.string_input_producer(
        data_files, shuffle=False, capacity=1, name=shard_queue_name)
    capacity = values_per_shard + 3 * batch_size
    values_queue = tf.FIFOQueue(
        capacity=capacity, dtypes=[tf.string], name="fifo_" + value_queue_name)

  enqueue_ops = []
  for _ in range(num_reader_threads):
    _, value = reader.read(filename_queue)
    enqueue_ops.append(values_queue.enqueue([value]))
  tf.train.queue_runner.add_queue_runner(tf.train.queue_runner.QueueRunner(
      values_queue, enqueue_ops))
  tf.scalar_summary(
      "queue/%s/fraction_of_%d_full" % (values_queue.name, capacity),
      tf.cast(values_queue.size(), tf.float32) * (1. / capacity))

  return values_queue


def batch_with_dynamic_pad(images_and_captions,
                           batch_size,
                           queue_capacity,
                           add_summaries=True):
  """Batches input images and captions.

  This function splits the caption into an input sequence and a target sequence,
  where the target sequence is the input sequence right-shifted by 1. Input and
  target sequences are batched and padded up to the maximum length of sequences
  in the batch. A mask is created to distinguish real words from padding words.
  Similar sequence processing is used for images in an outfit.
  Example:
    Actual captions in the batch ('-' denotes padded character):
      [
        [ 1 2 5 4 5 ],
        [ 1 2 3 4 - ],
        [ 1 2 3 - - ],
      ]

    input_seqs:
      [
        [ 1 2 3 4 ],
        [ 1 2 3 - ],
        [ 1 2 - - ],
      ]

    target_seqs:
      [
        [ 2 3 4 5 ],
        [ 2 3 4 - ],
        [ 2 3 - - ],
      ]

    mask:
      [
        [ 1 1 1 1 ],
        [ 1 1 1 0 ],
        [ 1 1 0 0 ],
      ]

  Args:
    images_and_captions: A list of image and caption meta data
    batch_size: Batch size.
    queue_capacity: Queue capacity.
    add_summaries: If true, add caption length summaries.

  Returns:
    Padded image, captions, masks, etc.
  """
  enqueue_list = []
  for set_id, images, image_ids, captions, likes in images_and_captions:
    image_seq_length = tf.shape(image_ids)[0]
    input_length = tf.sub(image_seq_length, 0) # change 1 to 0
    
    cap_indicator = tf.cast(tf.not_equal(captions,
                                         tf.zeros_like(captions)),
                            tf.int32)
    indicator = tf.ones(tf.expand_dims(input_length, 0), dtype=tf.int32)
    loss_indicator = tf.ones(tf.expand_dims(image_seq_length, 0),
                             dtype=tf.int32)
    images = tf.pack(images)
    
    enqueue_list.append([set_id, images, indicator, loss_indicator,
                        image_ids, captions, cap_indicator, likes])

  (set_ids, images, mask, loss_mask, image_ids,
    captions, cap_mask, likes) = tf.train.batch_join(enqueue_list,
                                                     batch_size=batch_size,
                                                     capacity=queue_capacity,
                                                     dynamic_pad=True,
                                                     name="batch_and_pad")

  if add_summaries:
    lengths = tf.add(tf.reduce_sum(mask, 1), 1)
    tf.scalar_summary("caption_length/batch_min", tf.reduce_min(lengths))
    tf.scalar_summary("caption_length/batch_max", tf.reduce_max(lengths))
    tf.scalar_summary("caption_length/batch_mean", tf.reduce_mean(lengths))

  return (set_ids, images, image_ids, mask, loss_mask, captions, cap_mask, likes)


================================================
FILE: polyvore/polyvore_model_bi.py
================================================
# Copyright 2017 Xintong Han. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================

"""
Polyvore model used in ACM MM"17 paper
"Learning Fashion Compatibility with Bidirectional LSTMs"
Link: https://arxiv.org/pdf/1707.05691.pdf
"""

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import numpy as np                                  
import tensorflow as tf

from ops import image_embedding
from ops import image_processing
from ops import inputs as input_ops

class PolyvoreModel(object):
  """ Model for fashion set on Polyvore dataset.
  """

  def __init__(self, config, mode, train_inception=False):
    """Basic setup.

    Args:
      config: Object containing configuration parameters.
      mode: "train", "eval" or "inference".
      train_inception: Whether the inception submodel variables are trainable.
    """
    assert mode in ["train", "eval", "inference"]
    self.config = config
    self.mode = mode
    self.train_inception = train_inception

    # Reader for the input data.
    self.reader = tf.TFRecordReader()

    # To match the "Show and Tell" paper we initialize all variables with a
    # random uniform initializer.
    self.initializer = tf.random_uniform_initializer(
        minval=-self.config.initializer_scale,
        maxval=self.config.initializer_scale)

    # A float32 Tensor with shape
    # [batch_size, num_images, height, width, channels].
    # num_images is the number of images in one outfit, default is 8.
    self.images = None

    # Forward RNN input and target sequences.
    # An int32 Tensor with shape [batch_size, padded_length].
    self.f_input_seqs = None
    # An int32 Tensor with shape [batch_size, padded_length].
    self.f_target_seqs = None
    
    # Backward RNN input and target sequences.
    # An int32 Tensor with shape [batch_size, padded_length].
    self.b_input_seqs = None
    # An int32 Tensor with shape [batch_size, padded_length].
    self.b_target_seqs = None
    
    # An int32 0/1 Tensor with shape [batch_size, padded_length].
    self.input_mask = None
  
    # Image caption sequence and masks.
    # An int32 Tensor with shape [batch_size, num_images, padded_length].
    self.cap_seqs = None
    # An int32 0/1 Tensor with shape [batch_size, padded_length].
    self.cap_mask = None

    # Caption sequence embeddings, we use simple bag of word model.
    # A float32 Tensor with shape [batch_size, num_images, embedding_size].
    self.seq_embeddings = None

    # Image embeddings in the joint visual-semantic space
    # A float32 Tensor with shape [batch_size, num_images, embedding_size].
    self.image_embeddings = None

    # Image embeddings in the RNN output/prediction space.
    self.rnn_image_embeddings = None
    
    # Word embedding map.
    self.embedding_map = None

    # A float32 scalar Tensor; the total loss for the trainer to optimize.
    self.total_loss = None

    # Forward and backward RNN loss.
    # A float32 Tensor with shape [batch_size * padded_length].
    self.forward_losses = None
    # A float32 Tensor with shape [batch_size * padded_length].
    self.backward_losses = None
    # RNN loss, forward + backward.
    self.lstm_losses = None
    
    # Loss mask for lstm loss.
    self.loss_mask = None

    # Visual Semantic Embedding loss.
    # A float32 Tensor with shape [batch_size * padded_length].
    self.emb_losses = None
    
    # A float32 Tensor with shape [batch_size * padded_length].
    self.target_weights = None

    # Collection of variables from the inception submodel.
    self.inception_variables = []

    # Function to restore the inception submodel from checkpoint.
    self.init_fn = None

    # Global step Tensor.
    self.global_step = None
    
    # Some output for debugging purposes .
    self.target_embeddings = None
    self.input_embeddings = None
    self.set_ids = None
    self.f_lstm_state = None
    self.b_lstm_state = None
    self.lstm_output = None
    self.lstm_xent_loss = None


  def is_training(self):
    """Returns true if the model is built for training mode."""
    return self.mode == "train"

  def process_image(self, encoded_image, thread_id=0, image_idx=0):
    """Decodes and processes an image string.

    Args:
      encoded_image: A scalar string Tensor; the encoded image.
      thread_id: Preprocessing thread id used to select the ordering of color
        distortions. Not used in our model.
      image_idx: Index of the image in an outfit. Only used for summaries.
    Returns:
      A float32 Tensor of shape [height, width, 3]; the processed image.
    """
    return image_processing.process_image(encoded_image,
                                          is_training=self.is_training(),
                                          height=self.config.image_height,
                                          width=self.config.image_width,
                                          image_format=self.config.image_format,
                                          image_idx=image_idx)

  def build_inputs(self):
    """Input prefetching, preprocessing and batching.

    Outputs:
      Inputs of the model.
    """
    if self.mode == "inference":
      # In inference mode, images and inputs are fed via placeholders.
      image_feed = tf.placeholder(dtype=tf.string, shape=[], name="image_feed")
      # Process image and insert batch dimensions.
      image_feed = self.process_image(image_feed)
      
      input_feed = tf.placeholder(dtype=tf.int64,
                                  shape=[None],  # batch_size
                                  name="input_feed")

      # Process image and insert batch dimensions.
      image_seqs = tf.expand_dims(image_feed, 0)
      cap_seqs = tf.expand_dims(input_feed, 1)

      # No target sequences or input mask in inference mode.
      input_mask = tf.placeholder(dtype=tf.int64,
                                  shape=[1, 8],  # batch_size
                                  name="input_mask")
      cap_mask = None
      loss_mask = None
      set_ids = None
      
    else:
      # Prefetch serialized SequenceExample protos.
      input_queue = input_ops.prefetch_input_data(
          self.reader,
          self.config.input_file_pattern,
          is_training=self.is_training(),
          batch_size=self.config.batch_size,
          values_per_shard=self.config.values_per_input_shard,
          input_queue_capacity_factor=self.config.input_queue_capacity_factor,
          num_reader_threads=self.config.num_input_reader_threads)

      # Image processing and random distortion. Split across multiple threads
      # with each thread applying a slightly different distortion. But we only
      # use one thread in our Polyvore model. likes are not used.
      images_and_captions = []
      for thread_id in range(self.config.num_preprocess_threads):
        serialized_sequence_example = input_queue.dequeue()
        set_id, encoded_images, image_ids, captions, likes = (
            input_ops.parse_sequence_example(
            serialized_sequence_example,
            set_id =self.config.set_id_name,
            image_feature=self.config.image_feature_name,
            image_index=self.config.image_index_name,
            caption_feature=self.config.caption_feature_name,
            number_set_images=self.config.number_set_images))
        
        images = []
        for i in range(self.config.number_set_images):
          images.append(self.process_image(encoded_images[i],image_idx=i))
        
        images_and_captions.append([set_id, images, image_ids, captions, likes])

      # Batch inputs.
      queue_capacity = (5 * self.config.num_preprocess_threads *
                        self.config.batch_size)

      (set_ids, image_seqs, image_ids, input_mask,
       loss_mask, cap_seqs, cap_mask, likes) = (
       input_ops.batch_with_dynamic_pad(images_and_captions,
                                           batch_size=self.config.batch_size,
                                           queue_capacity=queue_capacity))
    
    self.images = image_seqs
    self.input_mask = input_mask
    self.loss_mask = loss_mask
    self.cap_seqs = cap_seqs
    self.cap_mask = cap_mask
    self.set_ids = set_ids

  def build_image_embeddings(self):
    """Builds the image model subgraph and generates image embeddings
      in visual semantic joint space and RNN prediction space.

    Inputs:
      self.images

    Outputs:
      self.image_embeddings
      self.rnn_image_embeddings
    """
    
    # Reshape 5D image tensor.
    images = tf.reshape(self.images, [-1,
                                 self.config.image_height,
                                 self.config.image_height,
                                 3])
    
    inception_output = image_embedding.inception_v3(
        images,
        trainable=self.train_inception,
        is_training=self.is_training())
    self.inception_variables = tf.get_collection(
        tf.GraphKeys.VARIABLES, scope="InceptionV3")
    
    # Map inception output into embedding space.
    with tf.variable_scope("image_embedding") as scope:
      image_embeddings = tf.contrib.layers.fully_connected(
          inputs=inception_output,
          num_outputs=self.config.embedding_size,
          activation_fn=None,
          weights_initializer=self.initializer,
          biases_initializer=None,
          scope=scope)
    
    with tf.variable_scope("rnn_image_embedding") as scope:
      rnn_image_embeddings = tf.contrib.layers.fully_connected(
          inputs=inception_output,
          num_outputs=self.config.embedding_size,
          activation_fn=None,
          weights_initializer=self.initializer,
          biases_initializer=None,
          scope=scope)

    # Save the embedding size in the graph.
    tf.constant(self.config.embedding_size, name="embedding_size")
    self.image_embeddings = tf.reshape(image_embeddings,
                                       [tf.shape(self.images)[0],
                                        -1,
                                        self.config.embedding_size])

    self.rnn_image_embeddings = tf.reshape(rnn_image_embeddings,
                                           [tf.shape(self.images)[0],
                                            -1,
                                            self.config.embedding_size])

  def build_seq_embeddings(self):
    """Builds the input sequence embeddings.

    Inputs:
      self.input_seqs

    Outputs:
      self.seq_embeddings
      self.embedding_map
    """
    with tf.variable_scope("seq_embedding"), tf.device("/cpu:0"):
      embedding_map = tf.get_variable(
          name="map",
          shape=[self.config.vocab_size, self.config.embedding_size],
          initializer=self.initializer)
      seq_embeddings = tf.nn.embedding_lookup(embedding_map, self.cap_seqs)
      
      # Average pooling the seq_embeddings (bag of words). 
      if self.mode != "inference":
        seq_embeddings = tf.batch_matmul(
                                tf.cast(tf.expand_dims(self.cap_mask, 2),
                                        tf.float32),
                                seq_embeddings)
        seq_embeddings = tf.squeeze(seq_embeddings, [2])
    
    self.embedding_map = embedding_map
    self.seq_embeddings = seq_embeddings

  def build_model(self):
    """Builds the model.
      The original code is written with Tensorflow r0.10
      for Tensorflow > r1.0, many functions can be simplified.
      For example Tensors support slicing now, so no need to use tf.slice()
    """
    norm_image_embeddings = tf.nn.l2_normalize(self.image_embeddings, 2,
                                               name="norm_image_embeddings")
    norm_seq_embeddings = tf.nn.l2_normalize(self.seq_embeddings, 2)
    
    norm_seq_embeddings = (
        tf.pad(norm_seq_embeddings, [[0, 0],
               [0, self.config.number_set_images - tf.shape(norm_seq_embeddings)[1]],
               [0, 0]], name="norm_seq_embeddings"))
    
    if self.mode == "inference":
      pass
    else:
      # Compute losses for joint embedding.
      # Only look at the captions that have length >= 2.
      emb_loss_mask = tf.greater(tf.reduce_sum(self.cap_mask, 2), 1)
      # Image mask is padded it to max length.
      emb_loss_mask = tf.pad(emb_loss_mask,
          [[0,0],
           [0, self.config.number_set_images - tf.shape(emb_loss_mask)[1]]])
      
      # Select the valid image-caption pair.
      emb_loss_mask = tf.reshape(emb_loss_mask, [-1])
      norm_image_embeddings = tf.reshape(norm_image_embeddings,
          [self.config.number_set_images * self.config.batch_size,
           self.config.embedding_size])
      norm_image_embeddings = tf.boolean_mask(norm_image_embeddings,
                                                emb_loss_mask)
      norm_seq_embeddings = tf.reshape(norm_seq_embeddings,
                [self.config.number_set_images * self.config.batch_size,
                 self.config.embedding_size])

      norm_seq_embeddings = tf.boolean_mask(norm_seq_embeddings, emb_loss_mask)

      # The following defines contrastive loss in the joint space.   
      # Reference: https://github.com/ryankiros/visual-semantic-embedding/blob/master/model.py#L39
      scores = tf.matmul(norm_seq_embeddings, norm_image_embeddings,
                         transpose_a=False, transpose_b=True, name="scores")
      
      diagonal = tf.expand_dims(tf.diag_part(scores), 1)
      cost_s = tf.maximum(0.0, self.config.emb_margin - diagonal + scores)
      cost_im = tf.maximum(0.0,
          self.config.emb_margin - tf.transpose(diagonal) + scores)
      cost_s = cost_s - tf.diag(tf.diag_part(cost_s))
      cost_im = cost_im - tf.diag(tf.diag_part(cost_im))
      
      emb_batch_loss = tf.reduce_sum(cost_s) + tf.reduce_sum(cost_im)
      emb_batch_loss = (emb_batch_loss /
              tf.cast(tf.shape(norm_seq_embeddings)[0], tf.float32) ** 2)

      if self.config.emb_loss_factor > 0.0:
        tf.contrib.losses.add_loss(emb_batch_loss * self.config.emb_loss_factor)
      
    # Compute image LSTM loss.
    # Start with one direction.
    tf.logging.info("Rnn_type: %s" % self.config.rnn_type)
    if self.config.rnn_type == "lstm":
      tf.logging.info("----- RNN Type: LSTM ------")
      # Forward LSTM.
      f_lstm_cell = tf.nn.rnn_cell.BasicLSTMCell(
        num_units=self.config.num_lstm_units, state_is_tuple=True)
      # Backward LSTM.
      b_lstm_cell = tf.nn.rnn_cell.BasicLSTMCell(
        num_units=self.config.num_lstm_units, state_is_tuple=True)
    elif self.config.rnn_type == "gru":
      tf.logging.info("----- RNN Type: GRU ------")
      # Forward GRU.
      f_lstm_cell = tf.nn.rnn_cell.GRUCell(num_units=self.config.num_lstm_units)
      # Backward GRU.
      b_lstm_cell = tf.nn.rnn_cell.GRUCell(num_units=self.config.num_lstm_units)
    else:
      tf.logging.info("----- RNN Type: RNN ------")
      # Forward RNN.
      f_lstm_cell = tf.nn.rnn_cell.BasicRNNCell(num_units=self.config.num_lstm_units)
      # Backward RNN.
      b_lstm_cell = tf.nn.rnn_cell.BasicRNNCell(num_units=self.config.num_lstm_units)
   
    if self.mode == "train":
      f_lstm_cell = tf.nn.rnn_cell.DropoutWrapper(
          f_lstm_cell,
          input_keep_prob=self.config.lstm_dropout_keep_prob,
          output_keep_prob=self.config.lstm_dropout_keep_prob)
      b_lstm_cell = tf.nn.rnn_cell.DropoutWrapper(
          b_lstm_cell,
          input_keep_prob=self.config.lstm_dropout_keep_prob,
          output_keep_prob=self.config.lstm_dropout_keep_prob)

    with tf.variable_scope("lstm", initializer=self.initializer) as lstm_scope:
      if self.mode == "inference":
        # Inference for Bi-LSTM.
        pred_feed = tf.placeholder(dtype=tf.float32,
                                   shape=[None, None],
                                   name="pred_feed")
        next_index_feed = tf.placeholder(dtype=tf.int64,
                                   shape=[None],
                                   name="next_index_feed")
        
        self.lstm_xent_loss = tf.nn.sparse_softmax_cross_entropy_with_logits(
                                    logits=pred_feed,
                                    labels=next_index_feed,
                                    name="lstm_xent")

                    
        if self.config.rnn_type == "lstm":
          # In inference mode, use concatenated states for convenient feeding
          # and fetching.
          # Forward
          # Placeholder for feeding a batch of concatenated states.
          f_state_feed = tf.placeholder(dtype=tf.float32,
                                    shape=[None, sum(f_lstm_cell.state_size)],
                                    name="f_state_feed")
          f_input_feed = tf.placeholder(dtype=tf.float32,
                                    shape=[None, self.config.embedding_size],
                                    name="f_input_feed")
          # Backward:
          # Placeholder for feeding a batch of concatenated states.
          b_state_feed = tf.placeholder(dtype=tf.float32,
                                    shape=[None, sum(b_lstm_cell.state_size)],
                                    name="b_state_feed")
          b_input_feed = tf.placeholder(dtype=tf.float32,
                                    shape=[None, self.config.embedding_size],
                                    name="b_input_feed")
                                        
          f_state_tuple = tf.split(1, 2, f_state_feed)
          # Run a single LSTM step.
          with tf.variable_scope("FW"):
            f_lstm_outputs, f_state_tuple = f_lstm_cell(
                                              inputs=f_input_feed,
                                              state=f_state_tuple)
          # Concatentate the resulting state.
          self.f_lstm_state = tf.concat(1, f_state_tuple, name="f_state")

          b_state_tuple = tf.split(1, 2, b_state_feed)

          # Run a single LSTM step.
          with tf.variable_scope("BW"):
            b_lstm_outputs, b_state_tuple = b_lstm_cell(
                                              inputs=b_input_feed,
                                              state=b_state_tuple)
          # Concatentate the resulting state.
          self.b_lstm_state = tf.concat(1, b_state_tuple, name="b_state")
          
        else:
          # For non-LSTM RNN models, no tuple is used.
          # Forward
          # Placeholder for feeding a batch of concatenated states.
          f_state_feed = tf.placeholder(dtype=tf.float32,
                                    shape=[None, f_lstm_cell.state_size],
                                    name="f_state_feed")
          f_input_feed = tf.placeholder(dtype=tf.float32,
                                    shape=[None, self.config.embedding_size],
                                    name="f_input_feed")
          # Backward:
          # Placeholder for feeding a batch of concatenated states.
          b_state_feed = tf.placeholder(dtype=tf.float32,
                                    shape=[None, b_lstm_cell.state_size],
                                    name="b_state_feed")
          b_input_feed = tf.placeholder(dtype=tf.float32,
                                    shape=[None, self.config.embedding_size],
                                    name="b_input_feed")
          # Run a single RNN step.
          with tf.variable_scope("FW"):
            f_lstm_outputs, f_state_tuple = f_lstm_cell(
                                              inputs=f_input_feed,
                                              state=f_state_feed)
          f_state_tuple = tf.identity(f_state_tuple, name="f_state")
            
          with tf.variable_scope("BW"):
            b_lstm_outputs, b_state_tuple = b_lstm_cell(
                                              inputs=b_input_feed,
                                              state=b_state_feed)
          b_state_tuple = tf.identity(b_state_tuple, name="b_state")
            
        lstm_outputs = (f_lstm_outputs, b_lstm_outputs)
        sequence_length = None
      else:
        # Run the batch of sequence embeddings through the LSTM.
        sequence_length = tf.reduce_sum(self.input_mask, 1)
        lstm_outputs, _ = tf.nn.bidirectional_dynamic_rnn(cell_fw=f_lstm_cell,
                                            cell_bw=b_lstm_cell,
                                            inputs=self.rnn_image_embeddings,
                                            initial_state_fw=None,
                                            initial_state_bw=None,
                                            sequence_length=sequence_length,
                                            dtype=tf.float32,
                                            scope=lstm_scope)

    # Stack batches vertically.
    f_lstm_outputs = tf.reshape(lstm_outputs[0], [-1, f_lstm_cell.output_size])
    if self.mode == "inference":
      b_lstm_outputs = lstm_outputs[1]
    else:
      b_lstm_outputs = tf.reverse_sequence(lstm_outputs[1],
                                           seq_lengths=sequence_length,
                                           seq_dim=1,
                                           batch_dim=0)
    
    b_lstm_outputs = tf.reshape(b_lstm_outputs, [-1, b_lstm_cell.output_size])
    with tf.variable_scope("f_logits") as logits_scope:
      f_input_embeddings = tf.contrib.layers.fully_connected(
          inputs=f_lstm_outputs,
          num_outputs=self.config.embedding_size,
          activation_fn=None,
          weights_initializer=self.initializer,
          scope=logits_scope)
         
    with tf.variable_scope("b_logits") as logits_scope:
      b_input_embeddings = tf.contrib.layers.fully_connected(
          inputs=b_lstm_outputs,
          num_outputs=self.config.embedding_size,
          activation_fn=None,
          weights_initializer=self.initializer,
          scope=logits_scope)
    
    if self.mode == "inference":
      pass
    else:
      # Padding input_mask to match dimension.
      input_mask = tf.pad(self.input_mask,
        [[0,0],
        [0, self.config.number_set_images + 1 - tf.shape(self.input_mask)[1]]])
      input_mask = tf.to_float(
          tf.reshape(tf.slice(input_mask, [0,1], [-1, -1]), [-1,1]))
      loss_mask = tf.pad(self.loss_mask,
        [[0,0],
         [0, self.config.number_set_images - tf.shape(self.loss_mask)[1]]])
      loss_mask = tf.reshape(tf.to_float(loss_mask),
                    [self.config.number_set_images * self.config.batch_size,1])
      
      # Forward rnn.
      f_target_embeddings = tf.slice(tf.pad(self.rnn_image_embeddings,
              [[0,0], [0,1], [0,0]]), [0,1,0], [-1,-1,-1])
      f_target_embeddings = tf.reshape(f_target_embeddings,
              [self.config.number_set_images * self.config.batch_size,
               self.config.embedding_size])
      f_target_embeddings = tf.mul(f_target_embeddings,
                                        input_mask,
                                        name="target_embeddings")
      
      # Softmax loss over all items in this minibatch.
      loss_mask = tf.squeeze(loss_mask)
      f_input_embeddings = tf.boolean_mask(f_input_embeddings,
                                           tf.cast(loss_mask, tf.bool))
      f_target_embeddings = tf.boolean_mask(f_target_embeddings,
                                            tf.cast(loss_mask, tf.bool))
      
      f_lstm_scores = tf.matmul(f_input_embeddings,
                                f_target_embeddings,
                                transpose_a=False,
                                transpose_b=True)
      f_lstm_loss = tf.nn.sparse_softmax_cross_entropy_with_logits(
                        logits=f_lstm_scores,
                        labels=tf.range(tf.shape(f_lstm_scores)[0]))
      f_lstm_loss = tf.div(tf.reduce_sum(f_lstm_loss),
                           tf.reduce_sum(loss_mask),
                           name="f_lstm_loss")
      
      # Backward rnn.
      # It would be better to put write a function to calcute lstm_loss from
      # loss_mask, inputs, and targets, so the code can be reused, for now
      # just copy and paste the forward to get the backward loss.  
      reverse_embeddings = tf.reverse_sequence(self.rnn_image_embeddings,
                                               seq_lengths=sequence_length,
                                               seq_dim=1,
                                               batch_dim=0)
      b_target_embeddings = tf.slice(tf.pad(reverse_embeddings,
                                            [[0,0], [0,1], [0,0]]),
                                     [0,1,0], [-1,-1,-1])
      b_target_embeddings = tf.reshape(b_target_embeddings,
            [self.config.number_set_images * self.config.batch_size,
             self.config.embedding_size])
      b_target_embeddings = tf.mul(b_target_embeddings,
                                        input_mask,
                                        name="target_embeddings")
      
      # Softmax loss over all items in this minibatch
      b_input_embeddings = tf.boolean_mask(b_input_embeddings,
                                           tf.cast(loss_mask, tf.bool))
      b_target_embeddings = tf.boolean_mask(b_target_embeddings,
                                            tf.cast(loss_mask, tf.bool))
      
      b_lstm_scores = tf.matmul(b_input_embeddings,
                                b_target_embeddings,
                                transpose_a=False,
                                transpose_b=True)
      b_lstm_loss = tf.nn.sparse_softmax_cross_entropy_with_logits(
              logits=b_lstm_scores,
              labels=tf.range(tf.shape(b_lstm_scores)[0]))
      b_lstm_loss = tf.div(tf.reduce_sum(b_lstm_loss),
                           tf.reduce_sum(loss_mask),
                           name="b_lstm_loss")
      
      if self.config.f_rnn_loss_factor > 0:
        tf.contrib.losses.add_loss(f_lstm_loss * self.config.f_rnn_loss_factor)
      if self.config.b_rnn_loss_factor > 0:
        tf.contrib.losses.add_loss(b_lstm_loss * self.config.b_rnn_loss_factor)
     
      # Merge all losses and stats.
      total_loss = tf.contrib.losses.get_total_loss()
      
      # Add summaries.
      tf.scalar_summary("emb_batch_loss", emb_batch_loss)
      tf.scalar_summary("f_lstm_loss", f_lstm_loss)
      tf.scalar_summary("b_lstm_loss", b_lstm_loss)
      tf.scalar_summary("lstm_loss",
            (f_lstm_loss * self.config.f_rnn_loss_factor +
             b_lstm_loss * self.config.b_rnn_loss_factor))
      tf.scalar_summary("total_loss", total_loss)
      for var in tf.trainable_variables():
        tf.histogram_summary(var.op.name, var)
      
      weights = tf.to_float(tf.reshape(emb_loss_mask, [-1]))
    
      self.loss_mask = loss_mask
      self.input_mask = input_mask
      self.target_embeddings = (f_target_embeddings, b_target_embeddings)
      self.input_embeddings = (f_input_embeddings, b_input_embeddings)
      self.total_loss = total_loss
      self.emb_losses = emb_batch_loss  # Used in evaluation.
      self.lstm_losses = (f_lstm_loss * self.config.f_rnn_loss_factor +
             b_lstm_loss * self.config.b_rnn_loss_factor) # Used in evaluation.
      self.target_weights = weights  # Used in evaluation.
      
  def setup_inception_initializer(self):
    """Sets up the function to restore inception variables from checkpoint."""
    if self.mode != "inference":
      # Restore inception variables only.
      saver = tf.train.Saver(self.inception_variables)

      def restore_fn(sess):
        tf.logging.info("Restoring Inception variables from checkpoint %s" %
                        self.config.inception_checkpoint_file)
        saver.restore(sess, self.config.inception_checkpoint_file)

      self.init_fn = restore_fn

  def setup_global_step(self):
    """Sets up the global step Tensor."""
    global_step = tf.Variable(
        initial_value=0,
        name="global_step",
        trainable=False,
        collections=[tf.GraphKeys.GLOBAL_STEP, tf.GraphKeys.VARIABLES])

    self.global_step = global_step

  def build(self):
    """Creates all ops for training and evaluation."""
    self.build_inputs()
    self.build_image_embeddings()
    self.build_seq_embeddings()
    self.build_model()
    self.setup_inception_initializer()
    self.setup_global_step()


================================================
FILE: polyvore/polyvore_model_siamese.py
================================================
# Copyright 2017 Xintong Han. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================

"""Siamese Network for compatibility modeling/
"""

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import numpy as np                                  
import tensorflow as tf
import scipy.io as sio
from scipy.linalg import block_diag

from ops import image_embedding
from ops import image_processing
from ops import inputs as input_ops

class PolyvoreModel(object):
  """ Model for fashion set on Polyvore dataset
  """

  def __init__(self, config, mode, train_inception=False):
    """Basic setup.

    Args:
      config: Object containing configuration parameters.
      mode: "train", "eval" or "inference".
      train_inception: Whether the inception submodel variables are trainable.
    """
    assert mode in ["train", "eval", "inference"]
    self.config = config
    self.mode = mode
    self.train_inception = train_inception

    # Reader for the input data.
    self.reader = tf.TFRecordReader()

    # To match the "Show and Tell" paper we initialize all variables with a
    # random uniform initializer.
    self.initializer = tf.random_uniform_initializer(
        minval=-self.config.initializer_scale,
        maxval=self.config.initializer_scale)

    # A float32 Tensor with shape [batch_size, num_images, height, width, channels].
    self.images = None

    # An int32 0/1 Tensor with shape [batch_size, padded_length].
    self.input_mask = None
  
    # A float32 Tensor with shape [batch_size, num_images, embedding_size].
    self.image_embeddings = None
    
    # A float32 scalar Tensor; the total loss for the trainer to optimize.
    self.total_loss = None

    # Collection of variables from the inception submodel.
    self.inception_variables = []

    # Function to restore the inception submodel from checkpoint.
    self.init_fn = None

    # Global step Tensor.
    self.global_step = None
    
  def is_training(self):
    """Returns true if the model is built for training mode."""
    return self.mode == "train"

  def process_image(self, encoded_image, thread_id=0, image_idx=0):
    """Decodes and processes an image string.

    Args:
      encoded_image: A scalar string Tensor; the encoded image.
      thread_id: Preprocessing thread id used to select the ordering of color
        distortions.

    Returns:
      A float32 Tensor of shape [height, width, 3]; the processed image.
    """
    return image_processing.process_image(encoded_image,
                                          is_training=self.is_training(),
                                          height=self.config.image_height,
                                          width=self.config.image_width,
                                          image_format=self.config.image_format,
                                          image_idx=image_idx)

  def build_inputs(self):
    """Input prefetching, preprocessing and batching.

    Outputs:
      images and seqs
    """
    if self.mode == "inference":
      # In inference mode, images and inputs are fed via placeholders.
      
      image_feed = tf.placeholder(dtype=tf.string, shape=[], name="image_feed")
      # Process image and insert batch dimensions.
      image_feed = self.process_image(image_feed)

      # Process image and insert batch dimensions.
      image_seqs = tf.expand_dims(image_feed, 0)

      # No target sequences or input mask in inference mode.
      input_mask = tf.placeholder(dtype=tf.int64,
                                  shape=[1,8],  # batch_size
                                  name="input_mask")
    else:
      # Prefetch serialized SequenceExample protos.
      input_queue = input_ops.prefetch_input_data(
          self.reader,
          self.config.input_file_pattern,
          is_training=self.is_training(),
          batch_size=self.config.batch_size,
          values_per_shard=self.config.values_per_input_shard,
          input_queue_capacity_factor=self.config.input_queue_capacity_factor,
          num_reader_threads=self.config.num_input_reader_threads)

      # Image processing and random distortion. Split across multiple threads
      # with each thread applying a slightly different distortion.
      # assert self.config.num_preprocess_threads % 2 == 0
      images_and_captions = []
      for thread_id in range(self.config.num_preprocess_threads):
        serialized_sequence_example = input_queue.dequeue()
        set_id, encoded_images, image_ids, captions, likes = (
            input_ops.parse_sequence_example(
            serialized_sequence_example,
            set_id =self.config.set_id_name,
            image_feature=self.config.image_feature_name,
            image_index=self.config.image_index_name,
            caption_feature=self.config.caption_feature_name,
            number_set_images=self.config.number_set_images))
        
        images = []
        for i in range(self.config.number_set_images):
          images.append(self.process_image(encoded_images[i],image_idx=i))
        
        images_and_captions.append([set_id, images, image_ids, captions, likes])

      # Batch inputs.
      queue_capacity = (5 * self.config.num_preprocess_threads *
                        self.config.batch_size)
      #(set_ids, image_seqs, image_ids, f_input_seqs, f_target_seqs,
      # b_input_seqs, b_target_seqs, input_mask, cap_seqs, cap_mask) = (
      (set_ids, image_seqs, image_ids, input_mask,
       loss_mask, cap_seqs, cap_mask, likes) = (
       input_ops.batch_with_dynamic_pad(images_and_captions,
                                           batch_size=self.config.batch_size,
                                           queue_capacity=queue_capacity))
    self.images = image_seqs
    self.input_mask = input_mask


  def build_image_embeddings(self):
    """Builds the image model subgraph and generates image embeddings.

    Inputs:
      self.images

    Outputs:
      self.image_embeddings
    """
    
    # Reshape 5D image tensor.
    images = tf.reshape(self.images, [-1,
                                 self.config.image_height,
                                 self.config.image_height,
                                 3])
    
    inception_output = image_embedding.inception_v3(
        images,
        trainable=self.train_inception,
        is_training=self.is_training())
    self.inception_variables = tf.get_collection(
        tf.GraphKeys.VARIABLES, scope="InceptionV3")
    
    # Map inception output into embedding space.
    with tf.variable_scope("image_embedding") as scope:
      image_embeddings = tf.contrib.layers.fully_connected(
          inputs=inception_output,
          num_outputs=self.config.embedding_size,
          activation_fn=None,
          weights_initializer=self.initializer,
          biases_initializer=None,
          scope=scope)

    # Save the embedding size in the graph.
    tf.constant(self.config.embedding_size, name="embedding_size")
      
    self.image_embeddings = tf.reshape(image_embeddings,
                                       [tf.shape(self.images)[0],
                                        -1,
                                        self.config.embedding_size])
  

  def build_model(self):
    """Builds the model.

    Inputs:
      self.image_embeddings
      self.seq_embeddings
      self.target_seqs (training and eval only)
      self.input_mask (training and eval only)

    Outputs:
      self.total_loss (training and eval only)
      self.target_cross_entropy_losses (training and eval only)
      self.target_cross_entropy_loss_weights (training and eval only)
    """
    norm_image_embeddings = tf.nn.l2_normalize(self.image_embeddings, 2,
                                               name="norm_image_embeddings")
    
    if self.mode == "inference":
      pass
    else:
    
      # Select the valid siamese pairs. Hacky for now!
      emb_loss_mask = np.ones((self.config.number_set_images,
                               self.config.number_set_images))
      # Manually replicate for 8 times
      emb_loss_mask = block_diag(emb_loss_mask, emb_loss_mask,
                                 emb_loss_mask, emb_loss_mask,
                                 emb_loss_mask, emb_loss_mask,
                                 emb_loss_mask, emb_loss_mask,
                                 emb_loss_mask, emb_loss_mask)

      norm_image_embeddings = tf.reshape(norm_image_embeddings,
              [self.config.number_set_images * self.config.batch_size,
               self.config.embedding_size])
      
      scores = tf.matmul(norm_image_embeddings, norm_image_embeddings,
                         transpose_a=False, transpose_b=True, name="scores")
      
      posi_scores = tf.reduce_sum(tf.mul(scores, emb_loss_mask)) / np.sum(emb_loss_mask)
      
      emb_loss_mask = 1.0 - emb_loss_mask
      m = 0.8 # magin in Siamese network
      nega_scores = tf.maximum(tf.mul(scores, emb_loss_mask) - 0.8, 0.0) 
      nega_scores = tf.reduce_sum(nega_scores) / np.sum(emb_loss_mask)
      
      # nega_scores = (tf.reduce_sum(nega_scores) -
      #                   m * np.sum(1 - emb_loss_mask)) / np.sum(emb_loss_mask)
      
      emb_batch_loss = tf.sub(nega_scores, posi_scores, name="emb_batch_loss")
      tf.contrib.losses.add_loss(emb_batch_loss)
      
      # Merge all losses and stats.
      total_loss = tf.contrib.losses.get_total_loss()
      
      # Add summaries.
      tf.scalar_summary("emb_batch_loss", emb_batch_loss)
      tf.scalar_summary("total_loss", total_loss)
      for var in tf.trainable_variables():
        tf.histogram_summary(var.op.name, var)
      self.total_loss = total_loss
      
  def setup_inception_initializer(self):
    """Sets up the function to restore inception variables from checkpoint."""
    if self.mode != "inference":
      # Restore inception variables only.
      saver = tf.train.Saver(self.inception_variables)

      def restore_fn(sess):
        tf.logging.info("Restoring Inception variables from checkpoint file %s",
                        self.config.inception_checkpoint_file)
        saver.restore(sess, self.config.inception_checkpoint_file)

      self.init_fn = restore_fn

  def setup_global_step(self):
    """Sets up the global step Tensor."""
    global_step = tf.Variable(
        initial_value=0,
        name="global_step",
        trainable=False,
        collections=[tf.GraphKeys.GLOBAL_STEP, tf.GraphKeys.VARIABLES])

    self.global_step = global_step

  def build(self):
    """Creates all ops for training and evaluation."""
    self.build_inputs()
    self.build_image_embeddings()
    self.build_model()
    self.setup_inception_initializer()
    self.setup_global_step()


================================================
FILE: polyvore/polyvore_model_vse.py
================================================
# Copyright 2017 Xintong Han. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================

"""
Polyvore model used in ACM MM"17 paper
"Learning Fashion Compatibility with Bidirectional LSTMs"
Link: https://arxiv.org/pdf/1707.05691.pdf
"""

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import numpy as np                                  
import tensorflow as tf

from ops import image_embedding
from ops import image_processing
from ops import inputs as input_ops

class PolyvoreModel(object):
  """ Model for fashion set on Polyvore dataset.
  """

  def __init__(self, config, mode, train_inception=False):
    """Basic setup.

    Args:
      config: Object containing configuration parameters.
      mode: "train", "eval" or "inference".
      train_inception: Whether the inception submodel variables are trainable.
    """
    assert mode in ["train", "eval", "inference"]
    self.config = config
    self.mode = mode
    self.train_inception = train_inception

    # Reader for the input data.
    self.reader = tf.TFRecordReader()

    # To match the "Show and Tell" paper we initialize all variables with a
    # random uniform initializer.
    self.initializer = tf.random_uniform_initializer(
        minval=-self.config.initializer_scale,
        maxval=self.config.initializer_scale)

    # A float32 Tensor with shape
    # [batch_size, num_images, height, width, channels].
    # num_images is the number of images in one outfit, default is 8.
    self.images = None

    # Forward RNN input and target sequences.
    # An int32 Tensor with shape [batch_size, padded_length].
    self.f_input_seqs = None
    # An int32 Tensor with shape [batch_size, padded_length].
    self.f_target_seqs = None
    
    # Backward RNN input and target sequences.
    # An int32 Tensor with shape [batch_size, padded_length].
    self.b_input_seqs = None
    # An int32 Tensor with shape [batch_size, padded_length].
    self.b_target_seqs = None
    
    # An int32 0/1 Tensor with shape [batch_size, padded_length].
    self.input_mask = None
  
    # Image caption sequence and masks.
    # An int32 Tensor with shape [batch_size, num_images, padded_length].
    self.cap_seqs = None
    # An int32 0/1 Tensor with shape [batch_size, padded_length].
    self.cap_mask = None

    # Caption sequence embeddings, we use simple bag of word model.
    # A float32 Tensor with shape [batch_size, num_images, embedding_size].
    self.seq_embeddings = None

    # Image embeddings in the joint visual-semantic space
    # A float32 Tensor with shape [batch_size, num_images, embedding_size].
    self.image_embeddings = None

    # Image embeddings in the RNN output/prediction space.
    self.rnn_image_embeddings = None
    
    # Word embedding map.
    self.embedding_map = None

    # A float32 scalar Tensor; the total loss for the trainer to optimize.
    self.total_loss = None

    # Forward and backward RNN loss.
    # A float32 Tensor with shape [batch_size * padded_length].
    self.forward_losses = None
    # A float32 Tensor with shape [batch_size * padded_length].
    self.backward_losses = None
    # RNN loss, forward + backward.
    self.lstm_losses = None
    
    # Loss mask for lstm loss.
    self.loss_mask = None

    # Visual Semantic Embedding loss.
    # A float32 Tensor with shape [batch_size * padded_length].
    self.emb_losses = None
    
    # A float32 Tensor with shape [batch_size * padded_length].
    self.target_weights = None

    # Collection of variables from the inception submodel.
    self.inception_variables = []

    # Function to restore the inception submodel from checkpoint.
    self.init_fn = None

    # Global step Tensor.
    self.global_step = None
    

  def is_training(self):
    """Returns true if the model is built for training mode."""
    return self.mode == "train"

  def process_image(self, encoded_image, thread_id=0, image_idx=0):
    """Decodes and processes an image string.

    Args:
      encoded_image: A scalar string Tensor; the encoded image.
      thread_id: Preprocessing thread id used to select the ordering of color
        distortions. Not used in our model.
      image_idx: Index of the image in an outfit. Only used for summaries.
    Returns:
      A float32 Tensor of shape [height, width, 3]; the processed image.
    """
    return image_processing.process_image(encoded_image,
                                          is_training=self.is_training(),
                                          height=self.config.image_height,
                                          width=self.config.image_width,
                                          image_format=self.config.image_format,
                                          image_idx=image_idx)

  def build_inputs(self):
    """Input prefetching, preprocessing and batching.

    Outputs:
      Inputs of the model.
    """
    if self.mode == "inference":
      # In inference mode, images and inputs are fed via placeholders.
      image_feed = tf.placeholder(dtype=tf.string, shape=[], name="image_feed")
      # Process image and insert batch dimensions.
      image_feed = self.process_image(image_feed)
      
      input_feed = tf.placeholder(dtype=tf.int64,
                                  shape=[None],  # batch_size
                                  name="input_feed")

      # Process image and insert batch dimensions.
      image_seqs = tf.expand_dims(image_feed, 0)
      cap_seqs = tf.expand_dims(input_feed, 1)

      # No target sequences or input mask in inference mode.
      input_mask = tf.placeholder(dtype=tf.int64,
                                  shape=[1, 8],  # batch_size
                                  name="input_mask")
      cap_mask = None
      loss_mask = None
      set_ids = None
      
    else:
      # Prefetch serialized SequenceExample protos.
      input_queue = input_ops.prefetch_input_data(
          self.reader,
          self.config.input_file_pattern,
          is_training=self.is_training(),
          batch_size=self.config.batch_size,
          values_per_shard=self.config.values_per_input_shard,
          input_queue_capacity_factor=self.config.input_queue_capacity_factor,
          num_reader_threads=self.config.num_input_reader_threads)

      # Image processing and random distortion. Split across multiple threads
      # with each thread applying a slightly different distortion. But we only
      # use one thread in our Polyvore model. likes are not used.
      images_and_captions = []
      for thread_id in range(self.config.num_preprocess_threads):
        serialized_sequence_example = input_queue.dequeue()
        set_id, encoded_images, image_ids, captions, likes = (
            input_ops.parse_sequence_example(
            serialized_sequence_example,
            set_id =self.config.set_id_name,
            image_feature=self.config.image_feature_name,
            image_index=self.config.image_index_name,
            caption_feature=self.config.caption_feature_name,
            number_set_images=self.config.number_set_images))
        
        images = []
        for i in range(self.config.number_set_images):
          images.append(self.process_image(encoded_images[i],image_idx=i))
        
        images_and_captions.append([set_id, images, image_ids, captions, likes])

      # Batch inputs.
      queue_capacity = (5 * self.config.num_preprocess_threads *
                        self.config.batch_size)

      (set_ids, image_seqs, image_ids, input_mask,
       loss_mask, cap_seqs, cap_mask, likes) = (
       input_ops.batch_with_dynamic_pad(images_and_captions,
                                           batch_size=self.config.batch_size,
                                           queue_capacity=queue_capacity))
    
    self.images = image_seqs
    self.input_mask = input_mask
    self.loss_mask = loss_mask
    self.cap_seqs = cap_seqs
    self.cap_mask = cap_mask
    self.set_ids = set_ids

  def build_image_embeddings(self):
    """Builds the image model subgraph and generates image embeddings
      in visual semantic joint space and RNN prediction space.

    Inputs:
      self.images

    Outputs:
      self.image_embeddings
      self.rnn_image_embeddings
    """
    
    # Reshape 5D image tensor.
    images = tf.reshape(self.images, [-1,
                                 self.config.image_height,
                                 self.config.image_height,
                                 3])
    
    inception_output = image_embedding.inception_v3(
        images,
        trainable=self.train_inception,
        is_training=self.is_training())
    self.inception_variables = tf.get_collection(
        tf.GraphKeys.VARIABLES, scope="InceptionV3")
    
    # Map inception output into embedding space.
    with tf.variable_scope("image_embedding") as scope:
      image_embeddings = tf.contrib.layers.fully_connected(
          inputs=inception_output,
          num_outputs=self.config.embedding_size,
          activation_fn=None,
          weights_initializer=self.initializer,
          biases_initializer=None,
          scope=scope)

    # Save the embedding size in the graph.
    tf.constant(self.config.embedding_size, name="embedding_size")
    self.image_embeddings = tf.reshape(image_embeddings,
                                       [tf.shape(self.images)[0],
                                        -1,
                                        self.config.embedding_size])

  def build_seq_embeddings(self):
    """Builds the input sequence embeddings.

    Inputs:
      self.input_seqs

    Outputs:
      self.seq_embeddings
      self.embedding_map
    """
    with tf.variable_scope("seq_embedding"), tf.device("/cpu:0"):
      embedding_map = tf.get_variable(
          name="map",
          shape=[self.config.vocab_size, self.config.embedding_size],
          initializer=self.initializer)
      seq_embeddings = tf.nn.embedding_lookup(embedding_map, self.cap_seqs)
      
      # Average pooling the seq_embeddings (bag of words). 
      if self.mode != "inference":
        seq_embeddings = tf.batch_matmul(
                                tf.cast(tf.expand_dims(self.cap_mask, 2),
                                        tf.float32),
                                seq_embeddings)
        seq_embeddings = tf.squeeze(seq_embeddings, [2])
    
    self.embedding_map = embedding_map
    self.seq_embeddings = seq_embeddings

  def build_model(self):
    """Builds the model.
      The original code is written with Tensorflow r0.10
      for Tensorflow > r1.0, many functions can be simplified.
      For example Tensors support slicing now, so no need to use tf.slice()
    """
    norm_image_embeddings = tf.nn.l2_normalize(self.image_embeddings, 2,
                                               name="norm_image_embeddings")
    norm_seq_embeddings = tf.nn.l2_normalize(self.seq_embeddings, 2)
    
    norm_seq_embeddings = (
        tf.pad(norm_seq_embeddings, [[0, 0],
               [0, self.config.number_set_images - tf.shape(norm_seq_embeddings)[1]],
               [0, 0]], name="norm_seq_embeddings"))
    
    if self.mode == "inference":
      pass
    else:
      # Compute losses for joint embedding.
      # Only look at the captions that have length >= 2.
      emb_loss_mask = tf.greater(tf.reduce_sum(self.cap_mask, 2), 1)
      # Image mask is padded it to max length.
      emb_loss_mask = tf.pad(emb_loss_mask,
          [[0,0],
           [0, self.config.number_set_images - tf.shape(emb_loss_mask)[1]]])
      
      # Select the valid image-caption pair.
      emb_loss_mask = tf.reshape(emb_loss_mask, [-1])
      norm_image_embeddings = tf.reshape(norm_image_embeddings,
          [self.config.number_set_images * self.config.batch_size,
           self.config.embedding_size])
      norm_image_embeddings = tf.boolean_mask(norm_image_embeddings,
                                                emb_loss_mask)
      norm_seq_embeddings = tf.reshape(norm_seq_embeddings,
                [self.config.number_set_images * self.config.batch_size,
                 self.config.embedding_size])

      norm_seq_embeddings = tf.boolean_mask(norm_seq_embeddings, emb_loss_mask)

      # The following defines contrastive loss in the joint space.   
      # Reference: https://github.com/ryankiros/visual-semantic-embedding/blob/master/model.py#L39
      scores = tf.matmul(norm_seq_embeddings, norm_image_embeddings,
                         transpose_a=False, transpose_b=True, name="scores")
      
      diagonal = tf.expand_dims(tf.diag_part(scores), 1)
      cost_s = tf.maximum(0.0, self.config.emb_margin - diagonal + scores)
      cost_im = tf.maximum(0.0,
          self.config.emb_margin - tf.transpose(diagonal) + scores)
      cost_s = cost_s - tf.diag(tf.diag_part(cost_s))
      cost_im = cost_im - tf.diag(tf.diag_part(cost_im))
      
      emb_batch_loss = tf.reduce_sum(cost_s) + tf.reduce_sum(cost_im)
      emb_batch_loss = (emb_batch_loss /
              tf.cast(tf.shape(norm_seq_embeddings)[0], tf.float32) ** 2)

      tf.contrib.losses.add_loss(emb_batch_loss * self.config.emb_loss_factor)
      
      total_loss = tf.contrib.losses.get_total_loss()
      
      # Add summaries.
      tf.scalar_summary("emb_batch_loss", emb_batch_loss)
      tf.scalar_summary("total_loss", total_loss)
      for var in tf.trainable_variables():
        tf.histogram_summary(var.op.name, var)
      
      weights = tf.to_float(tf.reshape(emb_loss_mask, [-1]))
    
      self.loss_mask = loss_mask
      self.input_mask = input_mask
      self.total_loss = total_loss
      self.emb_losses = emb_batch_loss  # Used in evaluation.
      
  def setup_inception_initializer(self):
    """Sets up the function to restore inception variables from checkpoint."""
    if self.mode != "inference":
      # Restore inception variables only.
      saver = tf.train.Saver(self.inception_variables)

      def restore_fn(sess):
        tf.logging.info("Restoring Inception variables from checkpoint %s" %
                        self.config.inception_checkpoint_file)
        saver.restore(sess, self.config.inception_checkpoint_file)

      self.init_fn = restore_fn

  def setup_global_step(self):
    """Sets up the global step Tensor."""
    global_step = tf.Variable(
        initial_value=0,
        name="global_step",
        trainable=False,
        collections=[tf.GraphKeys.GLOBAL_STEP, tf.GraphKeys.VARIABLES])

    self.global_step = global_step

  def build(self):
    """Creates all ops for training and evaluation."""
    self.build_inputs()
    self.build_image_embeddings()
    self.build_seq_embeddings()
    self.build_model()
    self.setup_inception_initializer()
    self.setup_global_step()


================================================
FILE: polyvore/run_inference.py
================================================
# Copyright 2017 Xintong Han. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Run the inference of Bi-LSTM model given input images."""

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import os
import json

import tensorflow as tf
import pickle as pkl
import numpy as np
import configuration
import polyvore_model_bi as polyvore_model

FLAGS = tf.flags.FLAGS

tf.flags.DEFINE_string("checkpoint_path", "",
                       "Model checkpoint file or directory containing a "
                       "model checkpoint file.")
tf.flags.DEFINE_string("json_file", "data/label/test-no-dup.json",
                       "Json file containing the inference data.")
tf.flags.DEFINE_string("image_dir", "data/images",
                       "Directory containing images.")
tf.flags.DEFINE_string("feature_file", "data/features/test_features.pkl",
                       "Directory to save the features")
tf.flags.DEFINE_string("rnn_type", "", "Type of RNN.")


def main(_):
  if os.path.isfile(FLAGS.feature_file):
    print("Feature file already exist.")
    return
  # Build the inference graph.
  g = tf.Graph()
  with g.as_default():
    model_config = configuration.ModelConfig()
    model_config.rnn_type = FLAGS.rnn_type
    model = polyvore_model.PolyvoreModel(model_config, mode="inference")
    model.build()
    saver = tf.train.Saver()

  g.finalize()
  sess = tf.Session(graph=g)
  saver.restore(sess, FLAGS.checkpoint_path)
  test_json = json.load(open(FLAGS.json_file))
  k = 0

  # Save image ids and features in a dictionary.
  test_features = dict()

  for image_set in test_json:
    set_id = image_set["set_id"]
    image_feat = []
    image_rnn_feat = []
    ids = []
    k = k + 1
    print(str(k) + " : " + set_id)
    for image in image_set["items"]:
      filename = os.path.join(FLAGS.image_dir, set_id,
                              str(image["index"]) + ".jpg")
      with tf.gfile.GFile(filename, "r") as f:
        image_feed = f.read()

      [feat, rnn_feat] = sess.run([model.image_embeddings,
                                   model.rnn_image_embeddings],
                                  feed_dict={"image_feed:0": image_feed})
      
      image_name = set_id + "_" + str(image["index"])
      test_features[image_name] = dict()
      test_features[image_name]["image_feat"] = np.squeeze(feat)
      test_features[image_name]["image_rnn_feat"] = np.squeeze(rnn_feat)
  
  with open(FLAGS.feature_file, "wb") as f:
    pkl.dump(test_features, f)


if __name__ == "__main__":
  tf.app.run()


================================================
FILE: polyvore/run_inference_siamese.py
================================================
# Copyright 2017 Xintong Han. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Run the inference of Siamese Network given input images."""

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import os
import json

import tensorflow as tf
import pickle as pkl
import numpy as np
import configuration
import polyvore_model_siamese as polyvore_model

FLAGS = tf.flags.FLAGS

tf.flags.DEFINE_string("checkpoint_path", "",
                       "Model checkpoint file or directory containing a "
                       "model checkpoint file.")
tf.flags.DEFINE_string("json_file", "data/label/test-no-dup.json",
                       "Json file containing the inference data.")
tf.flags.DEFINE_string("image_dir", "data/images",
                       "Directory containing images.")
tf.flags.DEFINE_string("feature_file",
                       "data/features/test_features_siamese.pkl",
                       "Directory to save the features")


def main(_):
  if os.path.isfile(FLAGS.feature_file):
    print("Feature file already exist.")
    return
  # Build the inference graph.
  g = tf.Graph()
  with g.as_default():
    model_config = configuration.ModelConfig()
    model = polyvore_model.PolyvoreModel(model_config, mode="inference")
    model.build()
    saver = tf.train.Saver()

  g.finalize()
  sess = tf.Session(graph=g)
  saver.restore(sess, FLAGS.checkpoint_path)
  test_json = json.load(open(FLAGS.json_file))
  k = 0

  # Save image ids and features in a dictionary.
  test_features = dict()

  for image_set in test_json:
    set_id = image_set["set_id"]
    image_feat = []
    image_rnn_feat = []
    ids = []
    k = k + 1
    print(str(k) + " : " + set_id)
    for image in image_set["items"]:
      filename = os.path.join(FLAGS.image_dir, set_id,
                              str(image["index"]) + ".jpg")
      with tf.gfile.GFile(filename, "r") as f:
        image_feed = f.read()

      [feat] = sess.run([model.image_embeddings],
                         feed_dict={"image_feed:0": image_feed})
      
      image_name = set_id + "_" + str(image["index"])
      test_features[image_name] = dict()
      test_features[image_name]["image_feat"] = np.squeeze(feat)
  
  with open(FLAGS.feature_file, "wb") as f:
    pkl.dump(test_features, f)


if __name__ == "__main__":
  tf.app.run()


================================================
FILE: polyvore/run_inference_vse.py
================================================
# Copyright 2017 Xintong Han. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Run the inference of Siamese Network given input images."""

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import os
import json

import tensorflow as tf
import pickle as pkl
import numpy as np
import configuration
import polyvore_model_vse as polyvore_model

FLAGS = tf.flags.FLAGS

tf.flags.DEFINE_string("checkpoint_path", "",
                       "Model checkpoint file or directory containing a "
                       "model checkpoint file.")
tf.flags.DEFINE_string("json_file", "data/label/test-no-dup.json",
                       "Json file containing the inference data.")
tf.flags.DEFINE_string("image_dir", "data/images",
                       "Directory containing images.")
tf.flags.DEFINE_string("feature_file",
                       "data/features/test_features_siamese.pkl",
                       "Directory to save the features")


def main(_):
  if os.path.isfile(FLAGS.feature_file):
    print("Feature file already exist.")
    return
  # Build the inference graph.
  g = tf.Graph()
  with g.as_default():
    model_config = configuration.ModelConfig()
    model = polyvore_model.PolyvoreModel(model_config, mode="inference")
    model.build()
    saver = tf.train.Saver()

  g.finalize()
  sess = tf.Session(graph=g)
  saver.restore(sess, FLAGS.checkpoint_path)
  test_json = json.load(open(FLAGS.json_file))
  k = 0

  # Save image ids and features in a dictionary.
  test_features = dict()

  for image_set in test_json:
    set_id = image_set["set_id"]
    image_feat = []
    image_rnn_feat = []
    ids = []
    k = k + 1
    print(str(k) + " : " + set_id)
    for image in image_set["items"]:
      filename = os.path.join(FLAGS.image_dir, set_id,
                              str(image["index"]) + ".jpg")
      with tf.gfile.GFile(filename, "r") as f:
        image_feed = f.read()

      [feat] = sess.run([model.image_embeddings],
                         feed_dict={"image_feed:0": image_feed})
      
      image_name = set_id + "_" + str(image["index"])
      test_features[image_name] = dict()
      test_features[image_name]["image_feat"] = np.squeeze(feat)
  
  with open(FLAGS.feature_file, "wb") as f:
    pkl.dump(test_features, f)


if __name__ == "__main__":
  tf.app.run()


================================================
FILE: polyvore/set_generation.py
================================================
# Copyright 2017 Xintong Han. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
r"""Given multimodal queries, complete the outfit wiht bi-LSTM and VSE model."""

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import json
import math
import os

import pickle as pkl
import tensorflow as tf
import numpy as np
import configuration
import polyvore_model_bi as polyvore_model


FLAGS = tf.flags.FLAGS

tf.flags.DEFINE_string("checkpoint_path", "",
                       "Model checkpoint file or directory containing a "
                       "model checkpoint file.")
tf.flags.DEFINE_string("image_dir", "", "Directory containing images.")
tf.flags.DEFINE_string("feature_file", "", "File which contains the features.")
tf.flags.DEFINE_string("word_dict_file", "", "File containing word list.")

tf.flags.DEFINE_string("query_file", "",
                       "A json file containing the query to generate outfit.")
tf.flags.DEFINE_string("result_dir", "results",
                       "Directory to save the results.")
tf.flags.DEFINE_float("balance_factor", 2.0,
        "Trade off between image and text input."
        "Larger balance_factor encourages higher correlation with text query")


def norm_row(a):
  """L2 normalize each row of a given set."""
  try:
    return a / np.linalg.norm(a, axis=1)[:, np.newaxis]
  except:
    return a / np.linalg.norm(a)

def rnn_one_step(sess, input_feed, lstm_state, direction='f'):
  """Run one step of the RNN."""
  if direction == 'f':
    # Forward
    [lstm_state, lstm_output] = sess.run(
        fetches=['lstm/f_state:0', 'f_logits/f_logits/BiasAdd:0'],
        feed_dict={'lstm/f_input_feed:0': input_feed,
                   'lstm/f_state_feed:0': lstm_state})
  else:
    # Backward
    [lstm_state, lstm_output] = sess.run(
        fetches=['lstm/b_state:0', 'b_logits/b_logits/BiasAdd:0'],
        feed_dict={'lstm/b_input_feed:0': input_feed,
                   'lstm/b_state_feed:0': lstm_state})
    
  return lstm_state, lstm_output


def run_forward_rnn(sess, test_idx, test_feat, num_lstm_units):
  """ Run forward RNN given a query."""
  res_set = []
  lstm_state = np.zeros([1, 2 * num_lstm_units])
  for test_id in test_idx:
    input_feed = np.reshape(test_feat[test_id], [1, -1])
    # Run first step with all zeros initial state.
    [lstm_state, lstm_output] = rnn_one_step(
          sess, input_feed, lstm_state, direction='f')

  # Maximum length of the outfit is set to 10.
  for step in range(10):
    curr_score = np.exp(np.dot(lstm_output, np.transpose(test_feat)))
    curr_score /= np.sum(curr_score)

    next_image = np.argsort(-curr_score)[0][0]
    # 0.00001 is used as a probablity threshold to stop the generation.
    # i.e, if the prob of end-of-set is larger than 0.00001, then stop.
    if next_image == test_feat.shape[0] - 1 or curr_score[0][-1] > 0.00001:
      # print('OVER')
      break
    else:
      input_feed = np.reshape(test_feat[next_image], [1, -1])
      [lstm_state, lstm_output] = rnn_one_step(
            sess, input_feed, lstm_state, direction='f')
      res_set.append(next_image)

  return res_set


def run_backward_rnn(sess, test_idx, test_feat, num_lstm_units):
  """ Run backward RNN given a query."""
  res_set = []
  lstm_state = np.zeros([1, 2 * num_lstm_units])
  for test_id in reversed(test_idx):
    input_feed = np.reshape(test_feat[test_id], [1, -1])
    [lstm_state, lstm_output] = rnn_one_step(
          sess, input_feed, lstm_state, direction='b')
  for step in range(10):
    curr_score = np.exp(np.dot(lstm_output, np.transpose(test_feat)))
    curr_score /= np.sum(curr_score)
    next_image = np.argsort(-curr_score)[0][0]
    # 0.00001 is used as a probablity threshold to stop the generation.
    # i.e, if the prob of end-of-set is larger than 0.00001, then stop.
    if next_image == test_feat.shape[0] - 1 or curr_score[0][-1] > 0.00001:
      # print('OVER')
      break
    else:
      input_feed = np.reshape(test_feat[next_image], [1, -1])
      [lstm_state, lstm_output] = rnn_one_step(
          sess, input_feed, lstm_state, direction='b')
      res_set.append(next_image)

  return res_set


def run_fill_rnn(sess, start_id, end_id, num_blank, test_feat, num_lstm_units):
  """Fill in the blanks between start and end."""
  if num_blank == 0:
    return [start_id, end_id]
  lstm_f_outputs = []
  lstm_state = np.zeros([1, 2 * num_lstm_units])
  input_feed = np.reshape(test_feat[start_id], [1, -1])
  [lstm_state, lstm_output] = rnn_one_step(
        sess, input_feed, lstm_state, direction='f')

  f_outputs = []
  for i in range(num_blank):
    f_outputs.append(lstm_output[0])
    curr_score = np.exp(np.dot(lstm_output, np.transpose(test_feat)))
    curr_score /= np.sum(curr_score)
    next_image = np.argsort(-curr_score)[0][0]
    input_feed = np.reshape(test_feat[next_image], [1, -1])
    [lstm_state, lstm_output] = rnn_one_step(
          sess, input_feed, lstm_state, direction='f')

  lstm_state = np.zeros([1, 2 * num_lstm_units])
  input_feed = np.reshape(test_feat[end_id], [1, -1])
  [lstm_state, lstm_output] = rnn_one_step(
        sess, input_feed, lstm_state, direction='b')

  b_outputs = []
  for i in range(num_blank):
    b_outputs.insert(0, lstm_output[0])
    curr_score = np.exp(np.dot(lstm_output, np.transpose(test_feat)))
    curr_score /= np.sum(curr_score)
    next_image = np.argsort(-curr_score)[0][0]
    input_feed = np.reshape(test_feat[next_image], [1, -1])
    [lstm_state, lstm_output] = rnn_one_step(
          sess, input_feed, lstm_state, direction='b')

  outputs = np.asarray(f_outputs) + np.asarray(b_outputs)
  score = np.exp(np.dot(outputs, np.transpose(test_feat)))
  score /= np.sum(score, axis=1)[:, np.newaxis]
  blank_ids = np.argmax(score, axis=1)
  return [start_id] + list(blank_ids) + [end_id]


def run_set_inference(sess, set_name, test_ids, test_feat, num_lstm_units):
  test_idx = []
  for name in set_name:
    try:
      test_idx.append(test_ids.index(name))
    except:
      print('not found')
      return

  # dynamic search
  # run the whole bi-LSTM on the first item
  first_f_set = run_forward_rnn(sess, test_idx[:1], test_feat, num_lstm_units)
  first_b_set = run_backward_rnn(sess, test_idx[:1], test_feat, num_lstm_units)

  first_posi = len(first_b_set)
  first_set = first_b_set + test_idx[:1] + first_f_set

  image_set = []
  for i in first_set:
    image_set.append(test_ids[i])

  # # Write results into folder.
  # os.system('mkdir %s/%s' % (FLAGS.result_dir, 'first'))
  # for i, image in enumerate(image_set):
  #   name = image.split('_')
  #   os.system('cp %s/%s/%s.jpg %s/%s/%d_%s.jpg' % (FLAGS.image_dir,
  #             name[0], name[1], FLAGS.result_dir, 'first', i, image))

  if len(set_name) >= 2:
    current_set = norm_row(test_feat[first_set, :])
    all_position = [first_posi]
    for test_id in test_idx[1:]:
      # gradually adding items into it
      # findng nn of the next item
      insert_posi = np.argmax(
          np.dot(norm_row(test_feat[test_id, :]), np.transpose(current_set)))
      all_position.append(insert_posi)

    # run bi LSTM to fill items between first item and this item
    start_posi = np.min(all_position)
    end_posi = np.max(all_position)

    sets = run_fill_rnn(sess, test_idx[0], test_idx[1],
                        end_posi - start_posi - 1, test_feat, num_lstm_units)

  else:
    # run bi LSTM again
    sets = test_idx
  f_set = run_forward_rnn(sess, sets, test_feat, num_lstm_units)
  b_set = run_backward_rnn(sess, sets, test_feat, num_lstm_units)

  image_set = []
  for i in b_set[::-1] + sets+f_set:
    image_set.append(test_ids[i])

  # os.system('mkdir %s/%s' % (FLAGS.result_dir, 'final'))
  # for i, image in enumerate(image_set):
  #   name = image.split('_')
  #   os.system('cp %s/%s/%s.jpg %s/%s/%d_%s.jpg' % (FLAGS.image_dir,
  #                   name[0], name[1], FLAGS.result_dir, 'final', i, image))

  return b_set[::-1] + sets + f_set


def nn_search(i, test_emb, word_vec):
  # score = np.dot(test_emb, np.transpose(test_emb[i] + word_vec))
  score = np.dot(test_emb,
        np.transpose(test_emb[i] + FLAGS.balance_factor * word_vec))
  return np.argmax(score)


def main(_):
  # Build the inference graph.
  g = tf.Graph()
  with g.as_default():
    model_config = configuration.ModelConfig()
    model = polyvore_model.PolyvoreModel(model_config, mode="inference")
    model.build()
    saver = tf.train.Saver()

    g.finalize()
    with tf.Session() as sess:
      saver.restore(sess, FLAGS.checkpoint_path)
      with open(FLAGS.feature_file, "rb") as f:
        test_data = pkl.load(f)

      test_ids = test_data.keys()
      test_feat = np.zeros((len(test_ids) + 1,
                            len(test_data[test_ids[0]]["image_rnn_feat"])))
      test_emb = np.zeros((len(test_ids),
                           len(test_data[test_ids[0]]["image_feat"])))

      for i, test_id in enumerate(test_ids):
        # Image feature in the RNN space.
        test_feat[i] = test_data[test_id]["image_rnn_feat"]
        # Image feature in the joint embedding space.
        test_emb[i] = test_data[test_id]["image_feat"]

      test_emb = norm_row(test_emb)

      # load queries from JSON file
      queries = json.load(open(FLAGS.query_file))
      # Get the word embedding.
      [word_emb] = sess.run([model.embedding_map])

      # Read word name
      words = open(FLAGS.word_dict_file).read().splitlines()
      for i, w in enumerate(words):
        words[i] = w.split()[0]

      # Calculate the embedding of the word query
      # only run the first query for demo
      for q in queries[:1]:
        set_name = q['image_query']
        print(set_name)
        # Run Bi-LSTM model using the image query.
        rnn_sets = run_set_inference(sess, set_name, test_ids,
                                     test_feat, model_config.num_lstm_units)
        print(rnn_sets)

        # Reranking the LSTM prediction with similarity with the text query        
        word_query = str(q['text_query'])
        print(word_query)
        if word_query != "":
          # Get the indices of images.
          test_idx = []
          for name in set_name:
            try:
              test_idx.append(test_ids.index(name))
            except:
              print('not found')
              return

          # Calculate the word embedding
          word_query = [i+1 for i in range(len(words))
                            if words[i] in word_query.split()]
          print(word_query)
          query_emb = norm_row(np.sum(word_emb[word_query], axis=0))
          for i, j in enumerate(rnn_sets):
            if j not in test_idx:
              rnn_sets[i] = nn_search(j, test_emb, query_emb)
          print(rnn_sets)

        # write images          
        image_set = []
        for i in rnn_sets:
          image_set.append(test_ids[i])

        # write results
        # os.system('mkdir %s/%s' % (FLAGS.result_dir, 'emb_final'))
        # for i, image in enumerate(image_set):
        #   name = image.split('_')
        #   os.system('cp %s/%s/%s.jpg %s/%s/%d_%s.jpg' % (FLAGS.image_dir,
        #       name[0], name[1], FLAGS.result_dir, 'emb_final', i, image))
  
        for i, image in enumerate(image_set):
          name = image.split('_')
          os.system('cp %s/%s/%s.jpg %s/%d_%s.jpg' % (FLAGS.image_dir,
              name[0], name[1], FLAGS.result_dir, i, image))

if __name__ == "__main__":
  tf.app.run()


================================================
FILE: polyvore/train.py
================================================
# Copyright 2017 Xintong Han. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================

"""Train the model."""

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import tensorflow as tf

import configuration
import polyvore_model_bi as polyvore_model

FLAGS = tf.app.flags.FLAGS

tf.flags.DEFINE_string("input_file_pattern", "",
                       "File pattern of sharded TFRecord input files.")
tf.flags.DEFINE_string("inception_checkpoint_file", "",
                       "Path to a pretrained inception_v3 model.")
tf.flags.DEFINE_string("train_dir", "",
                       "Directory for saving and loading model checkpoints.")
tf.flags.DEFINE_boolean("train_inception", False,
                        "Whether to train inception submodel variables.")
tf.flags.DEFINE_integer("number_of_steps", 1000000, "Number of training steps.")
tf.flags.DEFINE_integer("log_every_n_steps", 1,
                        "Frequency at which loss and global step are logged.")

tf.logging.set_verbosity(tf.logging.INFO)


def main(unused_argv):
  assert FLAGS.input_file_pattern, "--input_file_pattern is required"
  assert FLAGS.train_dir, "--train_dir is required"

  model_config = configuration.ModelConfig()
  model_config.input_file_pattern = FLAGS.input_file_pattern
  model_config.inception_checkpoint_file = FLAGS.inception_checkpoint_file

  training_config = configuration.TrainingConfig()

  # Create training directory.
  train_dir = FLAGS.train_dir
  if not tf.gfile.IsDirectory(train_dir):
    tf.logging.info("Creating training directory: %s", train_dir)
    tf.gfile.MakeDirs(train_dir)

  # Build the TensorFlow graph.
  g = tf.Graph()
  with g.as_default():
    # Build the model.
    model = polyvore_model.PolyvoreModel(
        model_config, mode="train", train_inception=FLAGS.train_inception)
    model.build()
    learning_rate = tf.constant(training_config.initial_learning_rate)
    
    learning_rate_decay_fn = None
    if training_config.learning_rate_decay_factor > 0:
      num_batches_per_epoch = (training_config.num_examples_per_epoch /
                               model_config.batch_size)
      decay_steps = int(num_batches_per_epoch *
                        training_config.num_epochs_per_decay)

      def _learning_rate_decay_fn(learning_rate, global_step):
        return tf.train.exponential_decay(
            learning_rate,
            global_step,
            decay_steps=decay_steps,
            decay_rate=training_config.learning_rate_decay_factor,
            staircase=True)

      learning_rate_decay_fn = _learning_rate_decay_fn

    # Set up the training ops.
    train_op = tf.contrib.layers.optimize_loss(
        loss=model.total_loss,
        global_step=model.global_step,
        learning_rate=learning_rate,
        optimizer=training_config.optimizer,
        clip_gradients=training_config.clip_gradients,
        learning_rate_decay_fn=learning_rate_decay_fn)

    # Set up the Saver for saving and restoring model checkpoints.
    saver = tf.train.Saver(max_to_keep=training_config.max_checkpoints_to_keep)

  # Run training.
  tf.contrib.slim.learning.train(
      train_op,
      train_dir,
      log_every_n_steps=FLAGS.log_every_n_steps,
      graph=g,
      global_step=model.global_step,
      number_of_steps=FLAGS.number_of_steps,
      init_fn=model.init_fn,
      saver=saver)


if __name__ == "__main__":
  tf.app.run()


================================================
FILE: polyvore/train_siamese.py
================================================
# Copyright 2017 Xintong Han. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Train the Siamese Network."""

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import tensorflow as tf

import configuration
import polyvore_model_siamese as polyvore_model

FLAGS = tf.app.flags.FLAGS

tf.flags.DEFINE_string("input_file_pattern", "",
                       "File pattern of sharded TFRecord input files.")
tf.flags.DEFINE_string("inception_checkpoint_file", "",
                       "Path to a pretrained inception_v3 model.")
tf.flags.DEFINE_string("train_dir", "",
                       "Directory for saving and loading model checkpoints.")
tf.flags.DEFINE_boolean("train_inception", False,
                        "Whether to train inception submodel variables.")
tf.flags.DEFINE_integer("number_of_steps", 1000000, "Number of training steps.")
tf.flags.DEFINE_integer("log_every_n_steps", 1,
                        "Frequency at which loss and global step are logged.")

tf.flags.DEFINE_float("learning_rate", 0.2, "Initial learning rate.")

tf.flags.DEFINE_string("rnn_type", "",
                       "Types of rnn, lstm, gru or basic rnn.")


tf.logging.set_verbosity(tf.logging.INFO)


def main(unused_argv):
  assert FLAGS.input_file_pattern, "--input_file_pattern is required"
  assert FLAGS.train_dir, "--train_dir is required"

  model_config = configuration.ModelConfig()
  model_config.input_file_pattern = FLAGS.input_file_pattern
  model_config.inception_checkpoint_file = FLAGS.inception_checkpoint_file

  training_config = configuration.TrainingConfig()
  # May use a different learning rate
  training_config.initial_learning_rate = FLAGS.learning_rate
  
  # Create training directory.
  train_dir = FLAGS.train_dir
  if not tf.gfile.IsDirectory(train_dir):
    tf.logging.info("Creating training directory: %s", train_dir)
    tf.gfile.MakeDirs(train_dir)

  # Build the TensorFlow graph.
  g = tf.Graph()
  with g.as_default():
    # Build the model.
    model = polyvore_model.PolyvoreModel(
        model_config, mode="train", train_inception=FLAGS.train_inception)
    model.build()

    
    # Set up the learning rate.
    
    learning_rate = tf.constant(training_config.initial_learning_rate)
    learning_rate_decay_fn = None
    if training_config.learning_rate_decay_factor > 0:
      num_batches_per_epoch = (training_config.num_examples_per_epoch /
                               model_config.batch_size)
      decay_steps = int(num_batches_per_epoch *
                        training_config.num_epochs_per_decay)

      def _learning_rate_decay_fn(learning_rate, global_step):
        return tf.train.exponential_decay(
            learning_rate,
            global_step,
            decay_steps=decay_steps,
            decay_rate=training_config.learning_rate_decay_factor,
            staircase=True)

      learning_rate_decay_fn = _learning_rate_decay_fn

    # Set up the training ops.
    train_op = tf.contrib.layers.optimize_loss(
        loss=model.total_loss,
        global_step=model.global_step,
        learning_rate=learning_rate,
        optimizer=training_config.optimizer,
        clip_gradients=training_config.clip_gradients,
        learning_rate_decay_fn=learning_rate_decay_fn)

    # Set up the Saver for saving and restoring model checkpoints.
    saver = tf.train.Saver(max_to_keep=training_config.max_checkpoints_to_keep)
    # saver =  tf.train.Saver(keep_checkpoint_every_n_hours=0.1)

  # Run training.
  tf.contrib.slim.learning.train(
      train_op,
      train_dir,
      log_every_n_steps=FLAGS.log_every_n_steps,
      graph=g,
      global_step=model.global_step,
      number_of_steps=FLAGS.number_of_steps,
      init_fn=model.init_fn,
      saver=saver)


if __name__ == "__main__":
  tf.app.run()


================================================
FILE: predict_compatibility.sh
================================================
#!/bin/bash
CHECKPOINT_DIR="model/model_final/model.ckpt-34865"

python polyvore/fashion_compatibility.py \
  --checkpoint_path=${CHECKPOINT_DIR} \
  --label_file="data/label/fashion_compatibility_prediction.txt" \
  --feature_file="data/features/test_features.pkl" \
  --rnn_type="lstm" \
  --direction="2" \
  --result_file="fashion_compatibility.pkl"


================================================
FILE: query.json
================================================
[
    {
        "image_query": [
            "131138376_1",
            "131138376_3"
        ],
        "text_query": "blue"
    }
]


================================================
FILE: results/README.md
================================================
The generated outfit goes here.


================================================
FILE: train.sh
================================================
#!/bin/bash

# Inception v3 checkpoint file.
INCEPTION_CHECKPOINT="model/inception_v3.ckpt"

# Directory to save the model.
MODEL_DIR="model/bi_lstm/"

# Run the training code.
python polyvore/train.py \
  --input_file_pattern="data/tf_records/train-no-dup-?????-of-00128" \
  --inception_checkpoint_file="${INCEPTION_CHECKPOINT}" \
  --train_dir="${MODEL_DIR}/train" \
  --train_inception=true \
  --number_of_steps=100000


# # Training Siamese Network
# # Directory to save the model.
# MODEL_DIR="model/siamese/"

# # Run the training code.
# python polyvore/train_siamese.py \
#   --input_file_pattern="data/tf_records/train-no-dup-?????-of-00128" \
#   --inception_checkpoint_file="${INCEPTION_CHECKPOINT}" \
#   --train_dir="${MODEL_DIR}/train" \
#   --train_inception=true \
#   --number_of_steps=100000