Repository: xthan/polyvore
Branch: master
Commit: dd9e6cc450a6
Files: 32
Total size: 196.8 KB
Directory structure:
gitextract_bx3trk6u/
├── .gitignore
├── Dockerfile
├── LICENSE
├── README.md
├── data/
│ ├── build_polyvore_data.py
│ ├── features/
│ │ └── README.md
│ └── final_word_dict.txt
├── extract_feature.sh
├── fill_in_blank.sh
├── outfit_generation.sh
├── polyvore/
│ ├── configuration.py
│ ├── fashion_compatibility.py
│ ├── fill_in_blank.py
│ ├── fill_in_blank_siamese.py
│ ├── ops/
│ │ ├── __init__.py
│ │ ├── image_embedding.py
│ │ ├── image_embedding_test.py
│ │ ├── image_processing.py
│ │ └── inputs.py
│ ├── polyvore_model_bi.py
│ ├── polyvore_model_siamese.py
│ ├── polyvore_model_vse.py
│ ├── run_inference.py
│ ├── run_inference_siamese.py
│ ├── run_inference_vse.py
│ ├── set_generation.py
│ ├── train.py
│ └── train_siamese.py
├── predict_compatibility.sh
├── query.json
├── results/
│ └── README.md
└── train.sh
================================================
FILE CONTENTS
================================================
================================================
FILE: .gitignore
================================================
# Compiled source #
###################
*.com
*.class
*.dll
*.exe
*.o
*.so
*.pyc
*.mat
*.png
*.jpg
# Packages #
############
# it's better to unpack these files and commit the raw source
# git has its own built in compression methods
*.7z
*.dmg
*.gz
*.iso
*.jar
*.rar
*.tar
*.zip
*~
.gitlab
.github
data/label/*
data/tf_records/*
model/*
# Logs and databases #
######################
*.log
*.sql
*.sqlite
*.out
# OS generated files #
######################
.DS_Store
.DS_Store?
._*
.Spotlight-V100
.Trashes
ehthumbs.db
Thumbs.db
================================================
FILE: Dockerfile
================================================
FROM nvidia/cuda:8.0-cudnn5-devel
# Pick up some TF dependencies
RUN apt-get update && apt-get install -y --no-install-recommends \
build-essential \
curl \
libfreetype6-dev \
libpng12-dev \
libzmq3-dev \
pkg-config \
python \
python-dev \
rsync \
software-properties-common \
unzip \
&& \
apt-get clean && \
rm -rf /var/lib/apt/lists/*
RUN curl -O https://bootstrap.pypa.io/get-pip.py && \
python get-pip.py && \
rm get-pip.py
RUN pip --no-cache-dir install \
ipykernel \
jupyter \
matplotlib \
numpy \
scipy \
scikit-learn \
&& \
python -m ipykernel.kernelspec
ENV TENSORFLOW_VERSION 0.11.0
# --- DO NOT EDIT OR DELETE BETWEEN THE LINES --- #
# These lines will be edited automatically by parameterized_docker_build.sh. #
# COPY _PIP_FILE_ /
# RUN pip --no-cache-dir install /_PIP_FILE_
# RUN rm -f /_PIP_FILE_
# Install TensorFlow GPU version.
RUN pip --no-cache-dir install \
http://storage.googleapis.com/tensorflow/linux/gpu/tensorflow-${TENSORFLOW_VERSION}-cp27-none-linux_x86_64.whl
# --- ~ DO NOT EDIT OR DELETE BETWEEN THE LINES --- #
# TensorBoard
EXPOSE 6006
# IPython
EXPOSE 8888
WORKDIR "/root"
CMD ["/bin/bash"]
================================================
FILE: LICENSE
================================================
Apache License
Version 2.0, January 2004
http://www.apache.org/licenses/
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
1. Definitions.
"License" shall mean the terms and conditions for use, reproduction,
and distribution as defined by Sections 1 through 9 of this document.
"Licensor" shall mean the copyright owner or entity authorized by
the copyright owner that is granting the License.
"Legal Entity" shall mean the union of the acting entity and all
other entities that control, are controlled by, or are under common
control with that entity. For the purposes of this definition,
"control" means (i) the power, direct or indirect, to cause the
direction or management of such entity, whether by contract or
otherwise, or (ii) ownership of fifty percent (50%) or more of the
outstanding shares, or (iii) beneficial ownership of such entity.
"You" (or "Your") shall mean an individual or Legal Entity
exercising permissions granted by this License.
"Source" form shall mean the preferred form for making modifications,
including but not limited to software source code, documentation
source, and configuration files.
"Object" form shall mean any form resulting from mechanical
transformation or translation of a Source form, including but
not limited to compiled object code, generated documentation,
and conversions to other media types.
"Work" shall mean the work of authorship, whether in Source or
Object form, made available under the License, as indicated by a
copyright notice that is included in or attached to the work
(an example is provided in the Appendix below).
"Derivative Works" shall mean any work, whether in Source or Object
form, that is based on (or derived from) the Work and for which the
editorial revisions, annotations, elaborations, or other modifications
represent, as a whole, an original work of authorship. For the purposes
of this License, Derivative Works shall not include works that remain
separable from, or merely link (or bind by name) to the interfaces of,
the Work and Derivative Works thereof.
"Contribution" shall mean any work of authorship, including
the original version of the Work and any modifications or additions
to that Work or Derivative Works thereof, that is intentionally
submitted to Licensor for inclusion in the Work by the copyright owner
or by an individual or Legal Entity authorized to submit on behalf of
the copyright owner. For the purposes of this definition, "submitted"
means any form of electronic, verbal, or written communication sent
to the Licensor or its representatives, including but not limited to
communication on electronic mailing lists, source code control systems,
and issue tracking systems that are managed by, or on behalf of, the
Licensor for the purpose of discussing and improving the Work, but
excluding communication that is conspicuously marked or otherwise
designated in writing by the copyright owner as "Not a Contribution."
"Contributor" shall mean Licensor and any individual or Legal Entity
on behalf of whom a Contribution has been received by Licensor and
subsequently incorporated within the Work.
2. Grant of Copyright License. Subject to the terms and conditions of
this License, each Contributor hereby grants to You a perpetual,
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
copyright license to reproduce, prepare Derivative Works of,
publicly display, publicly perform, sublicense, and distribute the
Work and such Derivative Works in Source or Object form.
3. Grant of Patent License. Subject to the terms and conditions of
this License, each Contributor hereby grants to You a perpetual,
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
(except as stated in this section) patent license to make, have made,
use, offer to sell, sell, import, and otherwise transfer the Work,
where such license applies only to those patent claims licensable
by such Contributor that are necessarily infringed by their
Contribution(s) alone or by combination of their Contribution(s)
with the Work to which such Contribution(s) was submitted. If You
institute patent litigation against any entity (including a
cross-claim or counterclaim in a lawsuit) alleging that the Work
or a Contribution incorporated within the Work constitutes direct
or contributory patent infringement, then any patent licenses
granted to You under this License for that Work shall terminate
as of the date such litigation is filed.
4. Redistribution. You may reproduce and distribute copies of the
Work or Derivative Works thereof in any medium, with or without
modifications, and in Source or Object form, provided that You
meet the following conditions:
(a) You must give any other recipients of the Work or
Derivative Works a copy of this License; and
(b) You must cause any modified files to carry prominent notices
stating that You changed the files; and
(c) You must retain, in the Source form of any Derivative Works
that You distribute, all copyright, patent, trademark, and
attribution notices from the Source form of the Work,
excluding those notices that do not pertain to any part of
the Derivative Works; and
(d) If the Work includes a "NOTICE" text file as part of its
distribution, then any Derivative Works that You distribute must
include a readable copy of the attribution notices contained
within such NOTICE file, excluding those notices that do not
pertain to any part of the Derivative Works, in at least one
of the following places: within a NOTICE text file distributed
as part of the Derivative Works; within the Source form or
documentation, if provided along with the Derivative Works; or,
within a display generated by the Derivative Works, if and
wherever such third-party notices normally appear. The contents
of the NOTICE file are for informational purposes only and
do not modify the License. You may add Your own attribution
notices within Derivative Works that You distribute, alongside
or as an addendum to the NOTICE text from the Work, provided
that such additional attribution notices cannot be construed
as modifying the License.
You may add Your own copyright statement to Your modifications and
may provide additional or different license terms and conditions
for use, reproduction, or distribution of Your modifications, or
for any such Derivative Works as a whole, provided Your use,
reproduction, and distribution of the Work otherwise complies with
the conditions stated in this License.
5. Submission of Contributions. Unless You explicitly state otherwise,
any Contribution intentionally submitted for inclusion in the Work
by You to the Licensor shall be under the terms and conditions of
this License, without any additional terms or conditions.
Notwithstanding the above, nothing herein shall supersede or modify
the terms of any separate license agreement you may have executed
with Licensor regarding such Contributions.
6. Trademarks. This License does not grant permission to use the trade
names, trademarks, service marks, or product names of the Licensor,
except as required for reasonable and customary use in describing the
origin of the Work and reproducing the content of the NOTICE file.
7. Disclaimer of Warranty. Unless required by applicable law or
agreed to in writing, Licensor provides the Work (and each
Contributor provides its Contributions) on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
implied, including, without limitation, any warranties or conditions
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
PARTICULAR PURPOSE. You are solely responsible for determining the
appropriateness of using or redistributing the Work and assume any
risks associated with Your exercise of permissions under this License.
8. Limitation of Liability. In no event and under no legal theory,
whether in tort (including negligence), contract, or otherwise,
unless required by applicable law (such as deliberate and grossly
negligent acts) or agreed to in writing, shall any Contributor be
liable to You for damages, including any direct, indirect, special,
incidental, or consequential damages of any character arising as a
result of this License or out of the use or inability to use the
Work (including but not limited to damages for loss of goodwill,
work stoppage, computer failure or malfunction, or any and all
other commercial damages or losses), even if such Contributor
has been advised of the possibility of such damages.
9. Accepting Warranty or Additional Liability. While redistributing
the Work or Derivative Works thereof, You may choose to offer,
and charge a fee for, acceptance of support, warranty, indemnity,
or other liability obligations and/or rights consistent with this
License. However, in accepting such obligations, You may act only
on Your own behalf and on Your sole responsibility, not on behalf
of any other Contributor, and only if You agree to indemnify,
defend, and hold each Contributor harmless for any liability
incurred by, or claims asserted against, such Contributor by reason
of your accepting any such warranty or additional liability.
END OF TERMS AND CONDITIONS
APPENDIX: How to apply the Apache License to your work.
To apply the Apache License to your work, attach the following
boilerplate notice, with the fields enclosed by brackets "{}"
replaced with your own identifying information. (Don't include
the brackets!) The text should be enclosed in the appropriate
comment syntax for the file format. We also recommend that a
file or class name and description of purpose be included on the
same "printed page" as the copyright notice for easier
identification within third-party archives.
Copyright 2017 Xintong Han
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
================================================
FILE: README.md
================================================
## Bi-LSTM model for learning fashion compatibility.
Code for ACM MM'17 paper "Learning Fashion Compatibility with Bidirectional LSTMs" [[paper]](https://arxiv.org/pdf/1707.05691.pdf).
Parts of the code are from an older version of Tensorflow's im2txt repo [GitHub](https://github.com/tensorflow/models/blob/master/research/im2txt).
The corresponding dataset can be found on [GitHub](https://github.com/xthan/polyvore-dataset) or [Google Drive](https://drive.google.com/drive/folders/0B4Eo9mft9jwoVDNEWlhEbUNUSE0?resourcekey=0-vQg9TMSLKnmPCuuWwl5Ebw&usp=sharing).
### Contact
Author: Xintong Han
Contact: xintong@umd.edu
### Polyvore.com
[Polyvore.com](https://www.polyvore.com/outfits/search.sets?date=day&item_count.from=4&item_count.to=10) is a popular fashion website, where user can create and upload outfit data. Here is an [exmaple](https://www.polyvore.com/striped_blazer/set?id=227166819).
### Required Packages
* **TensorFlow** ~~0.10.0~~ 0.11 ([instructions](https://www.tensorflow.org/install/))
* **NumPy** ([instructions](http://www.scipy.org/install.html))
* **scikit-learn**
I actually used some version between r0.10 to r0.11 as the first commit of Tensorflow's im2txt, you might need to install r0.11 and modify some functions to run the code. Newer versions of Tensorflow prevent me from doing inference with my old code and restoring my models trained using this version. However, I have a commit that supports training using TensorFlow 1.0 or greater [idd1e03e](https://github.com/xthan/polyvore/tree/dd1e03e27fab12ef0051dd2a8ba7a61caaded499). I will create a new repo supporting TensorFlow version >= 1.0.
#### Recommended Setup
* [**docker-ce**](https://docs.docker.com/install/linux/docker-ce/ubuntu/)
* [**nvidia-docker**](https://github.com/NVIDIA/nvidia-docker)
* bulid TensorFlow image
excute the below command at this repository root:
```sh
docker build -t tensorflow:0.11 .
```
* run container
```sh
docker run -it \
--runtime=nvidia \
-p 8888:8888 \
-p 6006:6006 \
-v $CURRENT:/root/workdir \
tensorflow:0.11
```
### Prepare the Training Data
Download the dataset and put it in the ./data folder:
0. Decompress polyvore.tar.gz into ./data/label/
1. Decompress plyvore-images.tar.gz to ./data/, so all outfit image folders are in ./data/images/
2. Run the following commands to generate TFRecords in ./data/tf_records/:
```
python data/build_polyvore_data.py
```
### Download the Inception v3 Checkpoint
This model requires a pretrained *Inception v3* checkpoint file to initialize the network.
This checkpoint file is provided by the
[TensorFlow-Slim image classification library](https://github.com/tensorflow/models/tree/master/research/slim#tensorflow-slim-image-classification-library)
which provides a suite of pre-trained image classification models. You can read
more about the models provided by the library
[here](https://github.com/tensorflow/models/tree/master/research/slim#pre-trained-models).
Run the following commands to download the *Inception v3* checkpoint.
```shell
# Save the Inception v3 checkpoint in model folder.
wget "http://download.tensorflow.org/models/inception_v3_2016_08_28.tar.gz"
tar -xvf "inception_v3_2016_08_28.tar.gz" -C ${INCEPTION_DIR}
rm "inception_v3_2016_08_28.tar.gz"
```
### Training
```shell
./train.sh
```
The models will be saved in model/bi_lstm
### Inference
#### Trained model
Download the trained models from the final_model folder on [Google Drive](https://drive.google.com/drive/folders/0B4Eo9mft9jwoVDNEWlhEbUNUSE0) and put it in ./model/final_model/model.ckpt-34865.
#### Extract features of test data
To do all three kinds of tasks mentioned in the paper. We need to first extract the features of test images:
```
./extract_features.sh
```
And the image features will be in data/features/test_features.pkl.
You can also perform end-to-end inference by modifying the corresponding code. For example, input a sequence of images and output a compatibility score.
#### Fashion fill-in-the-blank
```
./fill_in_blank.sh
```
Note that we further optimized some design choices in the released model. It can achieve 73.5% accuracy, which is higher than the number reported in our paper.
#### Compatibility prediction
```
./predict_compatibility.sh
```
Different from the training process where the loss is calculated in each mini batch, during testing, we get the loss againist the whole test set. This is pretty slow, maybe a better method could be used (e.g., using distance between LSTM predicted representation and the target image embedding).
#### Outfit generation
```
./outfit_generation.sh
```
It generates an outfit given the image/text query in query.json, and saves the results in the results dir. For demo purposes, the query.json only contains one example:
where green boxes indicate the image query, and the text query is "blue".
#### Some notes
We found that a late fusion of different single models (Bi-LSTM w/o VSE + VSE + Siamese) can achieve superior results on all tasks. These models are also available in the same folder on [Google Drive](https://drive.google.com/drive/folders/0B4Eo9mft9jwoVDNEWlhEbUNUSE0).
### Todo list
- [x] Add multiple choice inference code.
- [x] Add compatibility prediction inference code.
- [x] Add image outfit generation code. Very similar to compatibility prediction, you can try to do it yourself if in a hurry.
- [x] Release trained models.
- [x] Release Siamese/VSE models.
- [ ] Polish the code.
### Citation
If this code or the Polyvore dataset helps your research, please cite our paper:
@inproceedings{han2017learning,
author = {Han, Xintong and Wu, Zuxuan and Jiang, Yu-Gang and Davis, Larry S},
title = {Learning Fashion Compatibility with Bidirectional LSTMs},
booktitle = {ACM Multimedia},
year = {2017},
}
================================================
FILE: data/build_polyvore_data.py
================================================
# Copyright 2017 Xintong Han. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Prepare Polyvore outfit data."""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from datetime import datetime
import json
import os
import random
import sys
import threading
import numpy as np
import tensorflow as tf
tf.app.flags.DEFINE_string('train_label', 'data/label/train_no_dup.json',
'Training label file')
tf.app.flags.DEFINE_string('test_label', 'data/label/test_no_dup.json',
'Testing label file')
tf.app.flags.DEFINE_string('valid_label','data/label/valid_no_dup.json',
'Validation label file')
tf.app.flags.DEFINE_string('output_directory', 'data/tf_records/',
'Output data directory')
tf.app.flags.DEFINE_string('image_dir', 'data/images/',
'Directory of image patches')
tf.app.flags.DEFINE_string('word_dict_file', 'data/final_word_dict.txt',
'File containing the word dictionary.')
tf.app.flags.DEFINE_integer('train_shards', 128,
'Number of shards in training TFRecord files.')
tf.app.flags.DEFINE_integer('test_shards', 16,
'Number of shards in test TFRecord files.')
tf.app.flags.DEFINE_integer('valid_shards', 8,
'Number of shards in validation TFRecord files.')
tf.app.flags.DEFINE_integer('num_threads', 8,
'Number of threads to preprocess the images.')
FLAGS = tf.flags.FLAGS
class Vocabulary(object):
"""Simple vocabulary wrapper."""
def __init__(self, vocab, unk_id):
"""Initializes the vocabulary.
Args:
vocab: A dictionary of word to word_id.
unk_id: Id of the special 'unknown' word.
"""
self._vocab = vocab
self._unk_id = unk_id
def word_to_id(self, word):
"""Returns the integer id of a word string."""
if word in self._vocab:
return self._vocab[word]
else:
print('unknow: ' + word)
return self._unk_id
def _is_png(filename):
"""Determine if a file contains a PNG format image.
Args:
filename: string, path of the image file.
Returns:
boolean indicating if the image is a PNG.
"""
return '.png' in filename
def _int64_feature(value):
"""Wrapper for inserting int64 features into Example proto."""
if not isinstance(value, list):
value = [value]
return tf.train.Feature(int64_list=tf.train.Int64List(value=value))
def _float_feature(value):
"""Wrapper for inserting float features into Example proto."""
if not isinstance(value, list):
value = [value]
return tf.train.Feature(float_list=tf.train.FloatList(value=value))
def _bytes_feature(value):
"""Wrapper for inserting bytes features into Example proto."""
return tf.train.Feature(bytes_list=tf.train.BytesList(value=[str(value)]))
def _int64_feature_list(values):
"""Wrapper for inserting an int64 FeatureList into a SequenceExample proto."""
return tf.train.FeatureList(feature=[_int64_feature(v) for v in values])
def _int64_list_feature_list(values):
"""Wrapper for inserting an int64 list FeatureList into a SequenceExample proto."""
return tf.train.FeatureList(feature=[_int64_feature(v) for v in values])
def _bytes_feature_list(values):
"""Wrapper for inserting a bytes FeatureList into a SequenceExample proto."""
return tf.train.FeatureList(feature=[_bytes_feature(v) for v in values])
def _float_feature_list(values):
"""Wrapper for inserting a float FeatureList into a SequenceExample proto."""
return tf.train.FeatureList(feature=[_float_feature(v) for v in values])
def _to_sequence_example(set_info, decoder, vocab):
"""Builds a SequenceExample proto for an outfit.
"""
set_id = set_info['set_id']
image_data = []
image_ids = []
caption_data = []
caption_ids = []
for image_info in set_info['items']:
filename = os.path.join(FLAGS.image_dir, set_id,
str(image_info['index']) + '.jpg')
with open(filename, "r") as f:
encoded_image = f.read()
try:
decoded_image = decoder.decode_jpeg(encoded_image)
except (tf.errors.InvalidArgumentError, AssertionError):
print("Skipping file with invalid JPEG data: %s" % filename)
return
image_data.append(encoded_image)
image_ids.append(image_info['index'])
caption = image_info['name'].encode('utf-8')
caption_data.append(caption)
caption_id = [vocab.word_to_id(word) + 1 for word in caption.split()]
caption_ids.append(caption_id)
feature = {}
# Only keep 8 images, if outfit has less than 8 items, repeat the last one.
for index in range(8):
if index >= len(image_data):
feature['images/' + str(index)] = _bytes_feature(image_data[-1])
else:
feature['images/' + str(index)] = _bytes_feature(image_data[index])
feature["set_id"] = _bytes_feature(set_id)
feature["set_url"] = _bytes_feature(set_info['set_url'])
# Likes and Views are not used in our model, but we put it into TFRecords.
feature["likes"] = _int64_feature(set_info['likes'])
feature["views"] = _int64_feature(set_info['views'])
context = tf.train.Features(feature=feature)
feature_lists = tf.train.FeatureLists(feature_list={
"caption": _bytes_feature_list(caption_data),
"caption_ids": _int64_list_feature_list(caption_ids),
"image_index": _int64_feature_list(image_ids)
})
sequence_example = tf.train.SequenceExample(
context=context, feature_lists=feature_lists)
return sequence_example
class ImageCoder(object):
"""Helper class that provides TensorFlow image coding utilities."""
def __init__(self):
# Create a single Session to run all image coding calls.
self._sess = tf.Session()
# Initializes function that converts PNG to JPEG data.
self._png_data = tf.placeholder(dtype=tf.string)
image = tf.image.decode_png(self._png_data, channels=3)
self._png_to_jpeg = tf.image.encode_jpeg(image, format='rgb', quality=100)
# Initializes function that decodes RGB JPEG data.
self._decode_jpeg_data = tf.placeholder(dtype=tf.string)
self._decode_jpeg = tf.image.decode_jpeg(
self._decode_jpeg_data, channels=3)
def png_to_jpeg(self, image_data):
return self._sess.run(self._png_to_jpeg,
feed_dict={self._png_data: image_data})
def decode_jpeg(self, image_data):
image = self._sess.run(self._decode_jpeg,
feed_dict={self._decode_jpeg_data: image_data})
assert len(image.shape) == 3
assert image.shape[2] == 3
return image
def _process_image_files_batch(coder, thread_index, ranges, name,
all_sets, vocab, num_shards):
"""Processes and saves list of images as TFRecord in 1 thread.
"""
# Each thread produces N shards where N = int(num_shards / num_threads).
# For instance, if num_shards = 128, and the num_threads = 2, then the first
# thread would produce shards [0, 64).
num_threads = len(ranges)
assert not num_shards % num_threads
num_shards_per_batch = int(num_shards / num_threads)
shard_ranges = np.linspace(ranges[thread_index][0],
ranges[thread_index][1],
num_shards_per_batch + 1).astype(int)
num_files_in_thread = ranges[thread_index][1] - ranges[thread_index][0]
counter = 0
for s in xrange(num_shards_per_batch):
# Generate a sharded version of the file name, e.g. 'train-00002-of-00010'
shard = thread_index * num_shards_per_batch + s
output_filename = '%s-%.5d-of-%.5d' % (name, shard, num_shards)
output_file = os.path.join(FLAGS.output_directory, output_filename)
writer = tf.python_io.TFRecordWriter(output_file)
shard_counter = 0
files_in_shard = np.arange(shard_ranges[s], shard_ranges[s + 1], dtype=int)
for i in files_in_shard:
sequence_example = _to_sequence_example(all_sets[i], coder, vocab)
if not sequence_example:
print('fail for set: ' + all_sets[i]['set_id'])
continue
writer.write(sequence_example.SerializeToString())
shard_counter += 1
counter += 1
if not counter % 100:
print('%s [thread %d]: Processed %d of %d images in thread batch.' %
(datetime.now(), thread_index, counter, num_files_in_thread))
sys.stdout.flush()
writer.close()
print('%s [thread %d]: Wrote %d images to %s' %
(datetime.now(), thread_index, shard_counter, output_file))
sys.stdout.flush()
shard_counter = 0
print('%s [thread %d]: Wrote %d images to %d shards.' %
(datetime.now(), thread_index, counter, num_files_in_thread))
sys.stdout.flush()
def _process_image_files(name, all_sets, vocab, num_shards):
"""Process and save list of images as TFRecord of Example protos.
"""
# Break all images into batches with a [ranges[i][0], ranges[i][1]].
spacing = np.linspace(0, len(all_sets), FLAGS.num_threads + 1).astype(np.int)
ranges = []
for i in xrange(len(spacing) - 1):
ranges.append([spacing[i], spacing[i+1]])
# Launch a thread for each batch.
print('Launching %d threads for spacings: %s' % (FLAGS.num_threads, ranges))
sys.stdout.flush()
# Create a mechanism for monitoring when all threads are finished.
coord = tf.train.Coordinator()
# Create a generic TensorFlow-based utility for converting all image codings.
coder = ImageCoder()
threads = []
for thread_index in xrange(len(ranges)):
args = (coder, thread_index, ranges, name, all_sets, vocab, num_shards)
t = threading.Thread(target=_process_image_files_batch, args=args)
t.start()
threads.append(t)
# Wait for all the threads to terminate.
coord.join(threads)
print('%s: Finished writing all %d fashion sets in data set.' %
(datetime.now(), len(all_sets)))
sys.stdout.flush()
def _create_vocab(filename):
"""Creates the vocabulary of word to word_id.
"""
# Create the vocabulary dictionary.
word_counts = open(filename).read().splitlines()
reverse_vocab = [x.split()[0] for x in word_counts]
unk_id = len(reverse_vocab)
vocab_dict = dict([(x, y) for (y, x) in enumerate(reverse_vocab)])
vocab = Vocabulary(vocab_dict, unk_id)
return vocab
def _find_image_files(labels_file, name):
"""Build a list of all images files and labels in the data set.
"""
# Read image ids
all_sets = json.load(open(labels_file))
# Shuffle the ordering of all image files in order to guarantee
# random ordering of the images with respect to label in the
# saved TFRecord files. Make the randomization repeatable.
shuffled_index = range(len(all_sets))
random.seed(12345)
random.shuffle(shuffled_index)
all_sets = [all_sets[i] for i in shuffled_index]
print('Found %d fashion sets.' % (len(all_sets)))
return all_sets
def _process_dataset(name, label_file, vocab, num_shards):
"""Process a complete data set and save it as a TFRecord.
Args:
name: string, unique identifier specifying the data set.
directory: string, root path to the data set.
num_shards: integer number of shards for this data set.
labels_file: string, path to the labels file.
"""
print(label_file)
all_sets = _find_image_files(label_file, name)
_process_image_files(name, all_sets, vocab, num_shards)
def main(unused_argv):
assert not FLAGS.train_shards % FLAGS.num_threads, (
'Please make the FLAGS.num_threads commensurate with FLAGS.train_shards')
assert not FLAGS.test_shards % FLAGS.num_threads, (
'Please make the FLAGS.num_threads commensurate with '
'FLAGS.test_shards')
assert not FLAGS.valid_shards % FLAGS.num_threads, (
'Please make the FLAGS.num_threads commensurate with '
'FLAGS.valid_shards')
print('Saving results to %s' % FLAGS.output_directory)
vocab = _create_vocab(FLAGS.word_dict_file)
# Run it!
_process_dataset('valid-no-dup', FLAGS.valid_label, vocab, FLAGS.valid_shards)
_process_dataset('test-no-dup', FLAGS.test_label, vocab, FLAGS.test_shards)
_process_dataset('train-no-dup', FLAGS.train_label, vocab, FLAGS.train_shards)
if __name__ == '__main__':
tf.app.run()
================================================
FILE: data/features/README.md
================================================
Extracted image features go here.
================================================
FILE: data/final_word_dict.txt
================================================
black 9909
leather 8516
bag 6350
women's 5810
top 4504
jeans 4133
dress 4100
gold 4031
white 3837
earrings 3619
iphone 3613
sunglasses 3382
necklace 3381
skirt 3254
boots 3142
suede 3004
jacket 2922
case 2871
denim 2763
ring 2703
mini 2622
yoins 2563
high 2535
blue 2533
clutch 2497
plus 2465
bracelet 2418
skinny 2164
coat 2127
shoulder 2125
sandals 2122
long 2112
set 2106
women 2106
lace 2069
red 2014
new 1996
print 1986
pink 1961
sleeve 1954
ankle 1949
silver 1894
pre-owned 1877
lipstick 1861
shorts 1850
topshop 1818
sweater 1788
size 1749
faux 1711
vintage 1699
shoes 1693
rose 1689
pumps 1651
de 1642
michael 1634
crop 1634
color 1598
eye 1554
watch 1541
shirt 1538
round 1527
backpack 1526
diamond 1506
brown 1499
tote 1477
velvet 1474
floral 1454
neck 1382
lip 1372
saint 1356
laurent 1328
cropped 1299
stud 1282
wool 1278
blouse 1277
sheinside 1264
shein 1260
t-shirt 1209
chanel 1199
small 1191
gucci 1187
crossbody 1185
fashion 1184
kate 1170
short 1156
strap 1156
star 1154
classic 1154
sneakers 1153
womens 1151
heel 1137
cover 1101
toe 1075
kors 1075
hat 1066
nail 1061
grey 1051
chain 1044
platform 1043
boho 1040
alexander 1040
pants 1035
flower 1033
pearl 1018
hair 1009
dolce 1007
crystal 996
metal 995
metallic 994
cotton 988
silk 983
design 979
green 974
love 969
marc 969
valentino 951
tank 951
ripped 935
york 927
striped 921
collection 919
yellow 918
converse 914
gabbana 912
printed 911
embellished 910
mascara 900
heart 896
knit 894
double 885
spade 876
waist 875
fur 867
h&m 862
choker 846
large 841
bow 830
eau 828
medium 826
light 819
pu 818
flat 811
lace-up 811
boohoo 811
matte 807
jewelry 805
embroidered 801
heels 791
style 790
tee 788
pendant 772
patent 769
taylor 766
miu 762
wrap 761
casual 759
zip 756
beauty 747
slim 738
collar 737
charlotte 736
distressed 733
nars 725
satchel 724
christian 722
scarf 720
blazer 719
givenchy 717
sleeveless 709
sandal 707
drop 702
jean 699
makeup 699
frame 696
island 696
cuff 696
front 696
river 691
boot 683
pencil 682
bobbi 664
forever 663
jacobs 660
liquid 647
cream 646
la 644
back 641
look 641
tassel 640
shadow 633
stripe 632
cashmere 629
pleated 629
boyfriend 626
miss 625
louboutin 624
trousers 624
dior 620
oversized 619
zipper 614
moto 614
sterling 612
satin 607
sweatshirt 607
eyeshadow 605
nude 604
palette 604
jumper 604
cross 602
chuck 601
stella 599
le 594
pump 592
button 589
cat 588
biker 587
burberry 586
one 583
rossi 583
london 573
chunky 572
fringe 567
stretch 566
dark 564
plaid 562
powder 560
solid 557
cut 555
belt 553
parfum 551
midi 548
wang 547
gianvito 547
canvas 547
cardigan 546
pocket 544
adidas 542
handbag 536
hem 533
retro 532
beanie 531
tie 531
ladies 529
men's 528
body 528
oz 521
mcqueen 518
studded 516
wide 516
box 515
14k 515
loose 515
fit 513
gold-tone 512
bangle 507
polish 506
vans 505
trainers 504
mccartney 504
block 503
mac 499
low 498
nails 496
stone 494
selfridge 490
navy 489
nike 485
detail 482
summer 481
booties 479
wallet 476
pointed 475
flats 474
glitter 473
super 472
mango 467
gloss 466
quilted 466
blush 463
chloé 460
square 457
buckle 454
ray-ban 448
open 447
x 447
pack 444
bags 439
petite 438
ribbed 438
leggings 437
leg 436
colour 436
flap 435
beach 434
soft 434
jimmy 429
skater 428
chiffon 428
cami 427
wash 423
stiletto 422
hot 421
rouge 420
v 417
steel 414
turtleneck 414
choo 410
clear 408
natural 405
rag 404
bone 403
orange 402
rise 402
oz. 400
pattern 400
russe 399
preowned 396
rings 395
bucket 393
waisted 392
mid 392
zara 391
eyeliner 386
crepe 384
rhinestone 384
brush 384
mesh 383
beige 383
cosmetics 379
knitted 377
bomber 377
giuseppe 375
clothing 374
charm 370
zanotti 370
drawstring 369
wedge 368
tory 368
pure 368
olivia 366
moschino 366
multi 365
glasses 364
accessories 362
band 361
burch 360
couture 359
acne 359
chic 358
maison 358
18k 358
vest 358
layered 356
jersey 355
logo 355
knee 354
trim 350
statement 349
golden 348
balmain 348
paris 346
phone 346
beaded 345
lapel 343
acetate 342
strappy 341
aviator 340
stainless 339
cap 337
sneaker 337
spray 336
steve 336
maxi 335
crochet 333
madden 333
fedora 331
shoe 331
sporty 329
side 329
triangle 327
earring 325
pom 324
edition 321
fringed 321
lauren 320
rebecca 318
fendi 318
wedding 315
eyes 315
evening 314
victoria 314
textured 314
studs 314
liner 313
circle 313
foundation 312
girl 312
rockstud 311
monki 311
sheer 310
unisex 310
face 309
party 307
elastic 307
bootie 307
v-neck 306
waterproof 305
pullover 305
sleeves 304
handbags 303
prada 303
alice 302
dot 302
designer 301
hooded 299
limited 299
moon 298
burgundy 297
hoop 297
studios 295
contrast 294
j.crew 292
pockets 291
authentic 291
purple 290
plated 289
feather 288
sexy 288
straw 286
lens 284
straight 283
bra 281
bling 281
candy 281
stylish 280
brand 280
men 279
ear 278
preppy 278
wool-blend 276
leaf 276
prom 275
dorothy 274
day 274
marni 273
sole 273
hoodie 271
quartz 271
handle 271
perkins 271
pin 271
check 271
secret 270
margiela 268
purse 266
art 265
asos 264
outerwear 263
flared 262
woven 261
balenciaga 260
oscar 258
big 257
full 256
clip 256
balm 256
originals 255
gray 254
hand 253
swarovski 253
envelope 253
lash 252
gel 252
lim 252
goop 252
messenger 251
leopard 251
geometric 249
smith 248
christmas 247
daisy 246
coral 246
pro 244
trench 244
tom 244
khaki 243
a-line 243
sequin 243
phillip 243
heeled 242
yves 240
store 240
isabel 240
sun 238
minkoff 238
cutout 237
gift 237
camel 237
rock 237
j 237
ml 236
row 235
lacquer 235
klein 233
travel 232
hollow 232
formal 232
renta 232
urban 231
belted 231
jane 230
air 229
topic 228
tan 228
tone 227
chicnova 227
mirror 227
peep 225
two 225
line 224
combat 224
single 224
amazon.com 224
monogram 223
cable 223
guess 223
pant 222
bodycon 222
ford 221
chicwish 221
marant 220
coffee 219
ruffle 219
dr. 218
proenza 218
schouler 217
colors 217
leather-look 217
loafers 216
slip 216
mirrored 216
notebook 215
chloe 215
beckham 215
flowers 215
school 214
hi 213
6s 213
calf 213
accessorize 213
winter 212
cute 212
headband 212
blend 211
baker 211
skull 211
plain 211
armani 210
basic 208
pastel 207
sweet 207
mid-rise 207
jacquard 207
dial 207
court 206
dsquared2 206
chelsea 205
mint 205
halter 205
online 205
crew 204
embroidery 204
embossed 203
martens 203
apple 203
toilette 202
butterfly 202
baseball 202
patch 201
gown 201
von 201
free 201
arrow 200
flare 200
victoria's 199
asymmetric 198
olympia 197
ombre 197
glass 197
lips 196
breasted 195
jet 195
lanvin 195
superstar 195
saffiano 194
linda 194
cotton-blend 193
socks 192
rubber 192
american 191
graphic 191
ralph 191
floppy 190
volume 190
spring 190
key 189
letter 189
cape 189
felt 189
pave 187
bar 187
artificial 186
peach 186
polka 185
calvin 185
ruffled 185
boutique 184
galaxy 184
luxe 184
skin 184
panel 184
cat-eye 184
simple 184
nyx 184
bralet 183
ox 183
kit 183
punk 183
paul 183
length 181
finish 181
street 181
james 181
perfect 180
snake 180
dresses 180
fall 180
tights 180
patchwork 180
aquazzura 179
vince 179
pouch 179
studio 178
elizabeth 177
ultra 177
modern 176
m 176
bead 176
frayed 175
6/6s 175
onyx 175
shine 175
joseph 175
ball 174
lime 174
table 174
original 173
elegant 173
maybelline 172
versace 172
city 172
saddle 172
west 171
moda 171
round-frame 171
ted 171
diane 171
crown 170
infinity 170
max 170
life 169
ballet 168
aeropostale 168
home 167
braided 167
brim 167
butter 167
farrow 167
intense 167
washed 166
bright 166
bikini 166
tall 166
shop 166
grunge 165
australia 165
effect 165
cocktail 165
noir 164
oversize 163
tattoo 162
gold-plated 162
extreme 162
ivory 162
swing 161
tulle 160
50ml 160
true 160
mixed 160
diamonds 160
inspired 159
ice 159
house 159
water 159
exclusive 159
premium 158
glow 158
wine 157
turquoise 157
bracelets 157
bold 157
shimmer 157
neon 156
lily 156
vegan 156
half 155
girls 154
tweed 154
pieces 154
trio 154
mens 153
pointy 153
headphones 153
paige 152
rivet 152
gladiator 151
signature 151
le3no 151
shell 150
bib 150
antigona 149
pretty 149
closure 149
make 149
wear 149
peplum 149
linen 149
amazon 148
enamel 148
garden 148
end 148
wood 148
lock 147
textured-leather 147
duo 147
made 147
plastic 147
lady 146
trendy 146
genuine 146
furstenberg 145
co. 145
gloves 145
pen 145
kimono 145
old 145
3/4 145
split 144
spike 144
sizes 143
sapphire 143
lipsy 143
optical 143
choies 143
rip 143
real 142
patent-leather 142
msgm 142
90s 142
stretchy 141
kenneth 141
roll 140
bell 140
silver-tone 140
wayfarer 140
best 140
ariana 140
marble 140
lashes 140
mary 140
grande 140
military 139
mom 139
fine 139
edge 139
long-wear 138
karl 138
crime 138
polo 138
transparent 138
ugg 137
tilbury 137
coco 137
jewellery 137
oval 137
goth 137
wall 137
baby 136
jamie 136
destroyed 136
shape 135
magnetic 134
lambskin 134
garavani 134
sport 134
bamboo 134
ct. 134
resin 134
john 133
sea 133
david 133
cz 133
spf 133
spaghetti 133
warehouse 133
jennifer 133
bohemian 132
edgy 132
stila 131
compact 131
camera 131
tribal 131
ruby 131
little 131
5s 131
professional 131
french 131
calfskin 131
engagement 130
bronze 130
handmade 130
inch 130
fake 130
acrylic 130
hipster 129
palm 129
w/ 129
shearling 129
nylon 129
paper 128
high-top 128
bridal 128
lucluc 128
glam 128
bear 128
queen 128
wild 128
stars 127
wide-leg 127
current/elliott 127
triple 127
betsey 127
johnson 127
deep 127
night 126
off-the-shoulder 126
designs 126
laura 126
high-rise 126
tunic 126
berry 126
power 126
lane 125
false 125
perfume 125
hard 125
lagerfeld 125
stick 125
point 124
essie 124
multicolor 124
lenses 124
banana 124
ea 123
hermes 123
apricot 123
boy 122
pale 122
slit 122
decay 122
luxury 121
aldo 121
madewell 121
see 120
antique 120
nine 120
tree 120
kylie 120
society 120
army 120
scoop 119
cut-out 119
lo 119
cord 119
slouchy 119
oasis 119
nly 119
oxford 119
knot 119
bottle 118
pinterest 118
rib 118
casadei 117
fox 117
grace 117
kiss 117
adjustable 117
stripes 117
chair 116
gradient 116
3d 116
plant 116
vera 115
layer 115
louis 115
strapless 115
tumblr 115
kim 115
tartan 115
clinique 114
official 114
lands 114
bendel 114
roberto 114
ferragamo 114
cartier 114
lauder 114
legging 114
maurices 113
tops 113
gavriel 113
cold 113
coin 113
cool 113
rainbow 112
smooth 112
fresh 112
alexis 112
wildfox 112
two-tone 112
l 111
blossom 111
salvatore 111
teardrop 111
vero 111
olive 111
puma 111
lamp 111
tibi 111
stand 111
work 111
sparkle 111
three 111
music 110
freshwater 110
decor 110
bouquet 110
card 110
brooch 110
cosmetic 110
smashbox 110
mara 110
shopper 109
kenzo 109
sophie 109
women’s 109
lancome 109
henri 109
karen 109
pull 108
ballerina 108
seconds 108
hollister 108
5/5s 108
vase 108
velvetine 108
fragrance 107
bustier 107
mansur 107
micro 107
flip 107
t-strap 107
camuto 107
pandora 107
head 107
around 107
flannel 107
chevron 106
zirconia 106
carven 106
boxy 106
parker 106
nudes 106
gemstone 106
mineral 106
cherry 105
tube 105
les 105
audacious 105
leather-trimmed 105
high-waisted 105
fitted 105
cluster 105
ceramic 104
sand 104
scott 104
twill 104
festival 104
cross-body 104
cuffed 104
bird 104
python 104
cc 104
drew 104
inc 104
cavalli 104
silk-blend 104
brass 104
sophia 104
lined 104
pillow 103
allure 103
wig 103
jessica 103
martin 103
gypsy 102
n 102
jeggings 102
t.w. 102
merino 102
bleach 102
stuart 102
midnight 102
book 102
bralette 101
dangle 101
perforated 101
joni 101
chocolate 101
warm 101
mix 101
time 100
jay 99
faye 99
people 99
twist 99
cubic 99
checked 99
throw 99
asymmetrical 99
smokey 99
fabric 98
brogues 98
emilio 98
beautiful 98
united 98
skate 98
draped 98
piece 98
accent 98
avenue 97
animal 97
céline 97
carat 97
reversible 97
bardot 97
sale 97
ribbon 97
sky 97
royal 96
loafer 96
slip-on 96
hippie 96
stack 96
club 96
low-rise 96
cheap 95
brow 95
floral-print 95
celine 95
deluxe 95
vuitton 95
shades 94
happy 94
cole 94
necklaces 94
ii 94
paint 94
aztec 94
athletic 94
thong 94
mankind 94
apparel 94
drape 94
raw 94
shawl 93
100mm 93
mulberry 93
weitzman 93
kendall 93
mcq 93
dream 93
shift 93
jeffrey 93
rolled 93
chine 92
stay 92
5sos 92
beads 92
sports 92
bcbgmaxazria 92
campbell 92
eyewear 92
cashmere-blend 92
fossil 92
gem 92
xl 92
giorgio 92
pierre 92
fly 92
mark 92
abercrombie 92
eyeglasses 92
watches 91
espadrille 91
tiffany 91
fitch 91
silicone 91
kendra 91
cult 91
guerlain 91
pop 91
pucci 91
zimmermann 91
5c 90
direction 90
acid 90
philosophy 90
extra 90
fleece 90
suedette 90
thick 90
mono 90
ideas 90
cheek 90
jour 90
sans 89
terry 89
holiday 89
webster 89
link 89
pins 89
witchery 89
allurez 89
pyramid 89
essential 89
cushion 89
oliver 89
vogue 89
thigh 88
lightweight 88
roses 88
woolen 88
gorgeous 88
mother 88
sweat 88
turtle 88
jil 87
snapback 87
chronograph 87
autumn 87
sander 87
halo 87
opi 87
brushes 87
jewel 87
us 87
site 87
shiny 87
topaz 87
park 87
tapered 87
iconic 87
custom 87
uniqlo 87
dkny 87
faced 87
souci 86
lana 86
faceted 86
mm 86
holder 86
magic 86
billabong 86
crystal-embellished 86
lord 86
snakeskin 86
tommy 86
hilfiger 86
over-the-knee 85
outdoor 85
culottes 85
rug 85
gothic 85
hole 85
wire 85
tiny 85
caviar 85
target 85
modcloth 85
champagne 85
iro 84
cup 84
peep-toe 84
rental 84
bandeau 84
vernis 84
piercing 84
helmut 84
pineapple 84
keds 84
pleat 84
ribkoff 84
naked 84
clip-on 84
vanessa 84
padded 83
nile 83
bittar 83
ag 83
agate 83
mohair 83
vinyl 83
hardy 83
timberland 83
runway 83
bella 83
coach 83
tattoos 83
anne 83
caged 82
burton 82
trend 82
tailored 82
core 82
painted 82
convertible 82
crystals 82
forever21 82
slippers 82
bradley 82
tropical 81
alex 81
tassels 81
republic 81
lucy 81
funny 81
estee 81
heritage 81
hobo 81
hydrating 81
hairstyles 81
mink 81
eugenia 81
bottega 81
sac 81
ponte 80
lang 80
teal 80
alloy 80
insert 80
tea 80
ink 80
parka 80
sugar 80
poppy 80
veneta 80
backless 79
perry 79
leaves 79
nappa 79
des 79
edp 79
playsuit 79
thin 79
120mm 79
stacking 79
straight-leg 79
loeffler 78
patterned 78
colorful 78
vivienne 78
mock 78
nina 78
black/white 78
nose 78
kelly 78
trouser 78
photo 78
note 78
randall 78
gap 78
candle 77
levi's 77
selma 77
trends 77
sicily 77
mask 77
scallop 77
buttons 77
goldtone 77
longline 77
tshirt 77
cotton-jersey 77
chandelier 77
honey 77
jumpsuit 77
shirts 76
details 76
slim-fit 76
100ml 76
get 76
angel 76
pur 76
waterfall 76
bodysuit 76
westwood 76
anna 76
stitch 75
across 75
organic 75
unique 75
mercier 75
go 75
disney 75
straps 75
emerald 75
batwing 75
hudson 75
irregular 75
rihanna 75
religion 75
bowknot 75
clean 75
glamorous 75
berricle 75
bath 74
elephant 74
capri 74
co 74
highlighter 74
gg 74
filigree 74
jaeger 74
monsoon 74
camo 74
contour 74
zizzi 74
deborah 74
hayden 74
monochrome 74
nearly 74
muscle 74
peoples 74
monday 74
market 74
st. 74
dip 74
molly 74
30ml 73
like 73
mug 73
m·a·c 73
sequined 73
ruched 73
doll 73
lasting 73
theory 73
owl 73
arrangement 73
sam 73
sleeved 73
buttoned 73
slingback 73
fashionable 72
bubble 72
pressed 72
splatter 72
corduroy 72
shaped 72
ivy 72
disc 72
good 72
dye 72
stackable 71
furla 71
bb 71
ca 71
bennett 71
horn 71
barbara 71
oil 71
macbook 71
ipad 71
elie 71
blonde 70
houndstooth 70
beats 70
pcs 70
anya 70
tuxedo 70
juicy 70
sergio 70
foldover 70
crescent 70
regular 70
low-top 70
goddess 70
heather 70
plunge 70
amber 70
lulu 70
nubuck 70
panama 70
sequins 69
g 69
scuba 69
greek 69
run 69
slouch 69
10k 69
snow 69
easy 69
lucky 69
stones 69
princess 69
snap 69
lilly 69
bangles 69
diorshow 69
company 68
concealer 68
Étoile 68
lotion 68
turn 68
cultured 68
swimsuit 68
donna 68
lux 68
brushed 68
pearls 68
pusheen 68
post 68
falabella 68
ny 68
duster 68
stitching 68
melissa 68
strand 68
eos 68
cell 68
18-karat 68
pilot 67
laser 67
succulent 67
illesteva 67
pavé 67
quay 67
wolf 67
mickey 67
office 67
mist 67
roksanda 67
roshe 66
assorted 66
hammered 66
shredded 66
amethyst 66
wedges 66
sonia 66
scalloped 66
b 66
lippmann 66
yurman 66
clubmaster 66
mermaid 66
d'orsay 66
duffle 66
italian 66
bridesmaid 66
teen 66
etro 66
five 66
raglan 66
harlow 66
south 66
hearts 66
ballerinas 66
paisley 66
opal 65
away 65
rare 65
crocodile 65
soap 65
touch 65
platinum 65
instant 65
cargo 65
rope 65
copper 65
keychain 65
deco 65
nyc 65
di 65
fancy 65
pier 64
instagram 64
bandana 64
geo 64
luggage 64
spliced 64
flag 64
ct 64
knitwear 64
k.i.s.s.i.n.g 64
temporary 64
hood 64
monster 64
reading 64
peace 64
steampunk 64
rimmel 64
jeanne 64
addict 64
snowflake 64
simpson 64
coast 64
boss 64
pot 64
saab 64
rich 63
singlet 63
diamante 63
kisses 63
rachel 63
smart 63
fold 63
citizens 63
hindmarch 63
w 63
wallpaper 63
cm 63
rebel 63
digital 63
humanity 63
redvalentino 63
bandage 63
motorcycle 63
tennis 63
leopard-print 63
clips 63
laptop 63
factory 63
opening 62
frames 62
union 61
wave 61
ashley 61
stan 61
basket 61
yeezy 61
blackfive 61
missoni 61
western 61
double-breasted 61
harry 61
a.l.c. 61
peter 61
solitaire 61
vila 61
antonio 61
floor 61
collarless 61
bronzer 61
running 61
rain 61
mason 60
plate 60
emma 60
zipped 60
edie 60
charcoal 60
toms 60
polished 60
lee 60
oxfords 60
pair 60
beret 60
garnet 60
typography 60
arden 60
d 60
knotted 60
hardware 60
4s 60
rolex 60
states 60
indie 60
abstract 60
michel 60
nicholas 60
aqua 60
rick 59
frill 59
no. 59
quote 59
versatile 59
cases 59
goose 59
et 59
coconut 59
blahnik 59
four 59
manolo 59
jumbo 59
mustard 59
rolling 59
decorative 59
cambridge 59
topman 59
wooden 58
faded 58
letters 58
puffer 58
m&co 58
samsung 58
mm6 58
hidden 58
walker 58
blanket 58
tortoise 58
l.k. 58
lemon 58
edelman 58
delpozo 58
semi 58
jack 58
passport 58
dome 58
boat 58
shopping 58
chains 58
pencils 58
sensational 57
rectangle 57
kurt 57
eagle 57
luminous 57
ankle-strap 57
applique 57
halloween 57
pebbled 57
birkin 57
uk 57
specs 57
plum 57
virgin 57
relaxed 57
espadrilles 57
quotes 57
plus/6/5/5s/5c 57
season 57
zoe 56
4/4s 56
skool 56
derek 56
clarins 56
andrew 56
precision 56
lipgloss 56
rochas 56
i'm 56
vacation 56
base 56
anchor 56
primer 56
poncho 56
usa 56
k 56
flatform 56
polarized 56
fluffy 56
rosie 56
soho 56
edt 56
diesel 56
bleached 56
celebrity 56
native 56
wristlet 55
clock 55
cuffs 55
scrunchie 55
marie 55
date 55
leo 55
geiger 55
eyelashes 55
series 55
quad 55
deer 55
forest 55
cartoon 55
faux-leather 55
lolita 55
minaudiere 55
sofa 55
ethnic 55
14kt 54
chino 54
zippers 54
chambray 54
gigi 54
taupe 54
jackets 54
express 54
millen 54
camisole 54
space 54
doublju 54
charles 54
varsity 54
corset 54
owens 54
text 54
schutz 54
levis 54
splicing 54
cage 54
kane 54
rips 54
need 54
muse 54
sk8-hi 54
rocket 54
organza 53
crewneck 53
amy 53
birger 53
finger 53
delicate 53
beverly 53
hills 53
flash 53
dune 53
mcm 53
vermeil 53
bun 53
dots 53
jade 53
neoprene 53
monica 53
belle 53
peony 53
weave 53
fluid 53
flowy 53
crisscross 53
marmont 53
calypso 53
imports 53
rabbit 53
locket 52
rykiel 52
costume 52
maria 52
colorblock 52
search 52
legendary 52
infinite 52
bunny 52
teaspoon 52
adult 52
vita 52
21+ 52
dionysus 52
curly 52
perla 52
flora 52
dahlia 52
pvc 52
violet 52
photos 51
brunello 51
bailey 51
robinson 51
curl 51
wing 51
neo 51
sydney 51
carved 51
jordan 51
grained 51
rivets 51
supply 51
hats 51
mouret 51
baublebar 51
xs 51
dre 51
point-toe 51
cucinelli 51
padlock 51
black/gold 51
bobby 51
fleur 51
woman 51
orchid 51
poplin 51
roland 51
lizzie 51
diana 51
lewis 50
equipment 50
label 50
tiered 50
moonstone 50
moisturizing 50
stretch-jersey 50
slim-leg 50
smoky 50
audrey 50
raffia 50
p 50
natasha 50
sunset 50
rhodium 50
rupert 50
ysl 50
ilia 50
leigh 50
cara 50
mouse 50
rosa 50
junior 50
van 50
ann 50
sweetheart 50
ippolita 50
intarsia 50
illamasqua 50
lorac 50
gilet 49
amazing 49
jules 49
international 49
harrods 49
sanderson 49
always 49
planter 49
longwear 49
whistles 49
fallon 49
malene 49
friendship 49
structured 49
38mm 49
ever 49
twisted 49
backpacks 49
cotton-poplin 49
synthetic 49
fan 49
giambattista 48
kensington 48
barrel 48
pulitzer 48
petal 48
unicorn 48
metro 48
first 48
braid 48
funnel 48
lavender 48
girly 48
plus/7/6 48
lipcolor 48
croc 48
bui 48
pointed-toe 48
dual 48
indigo 48
l'absolu 48
fishnet 48
ounce 48
vetements 48
graham 48
effy 48
gifts 48
rhea 48
ally 48
operandi 48
tips 48
norman 48
marilyn 48
curved 48
sleek 48
valli 48
tinted 48
tarte 48
various 48
18ct 47
aspinal 47
margot 47
lisa 47
wrist 47
trimmed 47
spiral 47
maroon 47
atelier 47
bo 47
opaque 47
products 47
sunday 47
holland 47
brocade 47
cozy 47
shower 47
hulme 47
suit 47
matthew 47
leisure 47
loop 47
minimal 47
brooks 47
wireless 47
chinese 47
plants 47
dance 47
messy 47
translucent 47
kirkwood 47
force 47
cactus 47
creamy 47
radiant 47
branch 47
waistcoat 46
world 46
sarah 46
humble 46
almond 46
tint 46
80s 46
kevyn 46
rustic 46
blade 46
scarves 46
1/2 46
bed 46
bee 46
georgia 46
fun 46
rx 46
vertical 46
eva 46
refill 46
goldschmied 46
thing 46
grid 46
low-tops 46
tortoiseshell 46
temple 46
scotch 46
bronzing 46
aucoin 46
hi-top 46
williamson 46
adriano 46
tag 46
sephora 46
comb 46
lola 46
watercolor 46
manon 45
jar 45
aeo 45
ancient 45
colours 45
icon 45
engraved 45
pajama 45
facial 45
potter 45
a5 45
comme 45
romance 45
faith 45
buckled 45
dreamcatcher 45
pigalle 45
iris 45
money 45
boys 45
peekaboo 45
sailor 45
clasp 45
christopher 45
elle 45
rucksack 45
silk-satin 45
detachable 45
essentials 45
rim 45
chan 44
totes 44
wrapped 44
simons 44
rb3025 44
performance 44
mule 44
monogramme 44
bruno 44
alien 44
cocoon 44
press 44
harris 44
eddie 44
cloud 44
ricci 44
narrow 44
larger 44
buy 44
baroque 44
curve 44
frye 44
tight 44
shaping 44
classics 44
off-shoulder 44
verdugo 44
matt 44
tutorial 44
rhinestones 44
swag 44
hermès 44
o 44
tod's 44
glossy 44
great 44
90's 44
desk 44
radiance 44
lam 44
crossover 44
arm 44
dusty 43
sock 43
slimming 43
ears 43
ella 43
ultimate 43
hour 43
justin 43
soda 43
towel 43
hunter 43
miller 43
clutches 43
dr 43
spitfire 43
nerd 43
electric 43
paolo 43
evan 43
kitty 43
perspex 43
charms 43
latest 43
sign 43
nautical 43
care 43
bvlgari 43
crème 43
camouflage 43
jonathan 43
friends 43
disco 43
crocheted 43
lion 43
lamb 43
tiger 43
pony 43
baptiste 43
elyse 43
lights 43
lapis 43
moi 43
trapeze 43
toast 43
lacoste 43
styles 43
cutoff 43
velvetines 42
nature 42
web 42
varnish 42
bottoms 42
seven 42
ocean 42
mules 42
alexa 42
curling 42
criss 42
anastasia 42
palazzo 42
lizard 42
personalized 42
strawberry 42
baked 42
brick 42
continental 42
basics 42
simulated 42
classy 42
amazon.co.uk 42
los 42
55mm 42
gym 42
canada 42
l.a. 42
wonderland 42
printing 42
phase 42
zac 42
single-breasted 42
better 42
stacked 42
lovers 42
giant 42
arizona 42
jelly 42
closed 42
keyhole 42
. 41
two-piece 41
18kt 41
pochette 41
moisture 41
seam 41
comfort 41
valentine 41
ceremony 41
clic 41
velour 41
twin 41
baguette 41
fruit 41
colored 41
high-waist 41
skort 41
y 41
paradise 41
champion 41
milly 41
cedar 41
neutral 41
barth 41
bareminerals 41
belly 41
leonard 41
embellishment 41
ctw 41
iron 41
penny 41
briefcase 41
bad 41
plush 41
sunscreen 41
hip 41
duffel 40
blair 40
vibrant 40
erickson 40
thierry 40
fuchsia 40
appliqué 40
envy 40
broken 40
dolce&gabbana 40
cameo 40
coats 40
lengthening 40
openwork 40
brothers 40
michelle 40
glittered 40
cloth 40
ipod 40
pompom 40
brooklyn 40
snapmade.com 40
barneys 40
quality 40
lucite 40
sling 40
italy 40
cabochon 40
supreme 40
notes 40
shoedazzle 40
romper 39
tech 39
dyed 39
bonded 39
alexandre 39
product 39
simone 39
femme 39
flops 39
magazine 39
eyelet 39
katrantzou 39
fl 39
agent 39
victorian 39
gunmetal 39
decoration 39
seamed 39
miranda 39
polka-dot 39
available 39
pizza 39
amanda 39
wings 39
lorenzo 39
part 39
manicure 39
yang 39
dylan 39
gorjana 39
timeless 39
brian 39
raey 39
15ml 39
barely 39
dog 39
pour 39
sieraden 39
edited 39
overall 39
105mm 39
cady 39
appliquéd 39
movado 39
10mm 39
chiara 38
vivier 38
paneled 38
faux-fur 38
daniel 38
grand 38
charming 38
sofia 38
room 38
lovely 38
r13 38
gowns 38
splendid 38
knuckle 38
king 38
bezel 38
nixon 38
gauze 38
peacock 38
carolina 38
angeles 38
lizzy 38
marco 38
buckles 38
eyebrow 38
neckline 38
pom-pom 38
j.w.anderson 38
sylvie 38
kitten 38
knee-high 38
nano 38
stem 38
katy 38
wellington 38
romantic 38
polyvore 38
50s 38
make-up 38
mademoiselle 38
rails 38
wharf 38
shaggy 38
pan 38
gazelle 38
36mm 38
tulip 37
spectrum 37
bottom 37
cobalt 37
passion 37
year 37
starbucks 37
iantorno 37
narciso 37
eyelash 37
live 37
3x1 37
stretch-cotton 37
8mm 37
off-white 37
se 37
/jean 37
eve 37
luna 37
wars 37
show 37
folding 37
steven 37
silvertone 37
mia 37
fabulous 37
shadows 37
temperley 37
anouk 37
roman 37
racerback 37
lilac 37
laundry 37
dainty 37
hinge 37
frost 37
jeweled 37
40mm 37
supra 37
view 37
hanging 36
one-shoulder 36
graffiti 36
self 36
alpaca 36
cognac 36
ps1 36
slide 36
turn-down 36
u 36
borgo 36
nicole 36
tower 36
lighting 36
joy 36
skirts 36
keyring 36
flock 36
du 36
emoji 36
plexi 36
ash 36
henna 36
mod 36
edit 36
harper 36
100% 36
cleansing 36
creme 36
lotus 36
georgette 36
inches 36
mid-length 36
kat 36
ready 36
demi 36
safari 36
moss 36
bing 36
mother-of-pearl 36
shade 36
hamilton 36
skyline 36
roger 36
complete 36
pierced 36
jogger 36
mytheresa.com 36
bieber 36
laque 36
daily 36
milk 36
homme 36
everyday 36
simmons 36
silk-chiffon 36
smythson 36
tab 36
peacoat 35
era 35
judith 35
bouclé 35
sheath 35
match 35
pamela 35
spiked 35
holographic 35
m&s 35
joe 35
babies 35
carvela 35
silky 35
sharon 35
rotita 35
monarch 35
fling 35
en 35
chico's 35
nordstrom 35
marciano 35
string 35
girlfriend 35
bloom 35
twenty 35
eight 35
ava 35
s4 35
bite 35
zippered 35
brit 35
gentle 35
citrine 35
barrette 35
heavy 35
step 35
coated 35
journal 35
dry 35
mat 35
simply 35
extensions 35
emporio 35
wardrobe 35
illuminating 35
violeta 35
plein 35
brogue 35
tools 35
sticker 35
bare 35
balance 35
accessory 35
irene 35
styling 35
cable-knit 35
linea 35
looks 35
wide-brim 35
don't 35
pigment 34
minnie 34
shahida 34
pumpkin 34
special 34
jegging 34
resistant 34
weekend 34
mineralize 34
thicken 34
selected 34
coachella 34
fern 34
color-block 34
dannijo 34
fair 34
future 34
rodriguez 34
qupid 34
nero 34
robert 34
alice+olivia 34
underwear 34
luke 34
follies 34
meyer 34
heat 34
ani 34
dining 34
parides 34
young 34
betty 34
24k 34
voyage 34
loewe 34
jungle 34
automatic 34
robyn 34
rita 34
h 34
puff 34
baume 34
bowler 34
holly 34
japanese 34
jennings 34
hermès 34
jansport 34
patches 34
raf 34
kill 34
lancôme 34
grain 34
kohl 33
incase 33
books 33
gallery 33
lost 33
foil 33
cosmic 33
zero 33
broderie 33
brief 33
tabitha 33
cube 33
cuba 33
take 33
sheet 33
sheepskin 33
1/4 33
luu 33
precious 33
briefs 33
thomas 33
pots 33
hourglass 33
bees 33
zebra 33
use 33
nirvana 33
allen 33
wink 33
catcher 33
man 33
splash 33
cynthia 33
miniskirt 33
cyber 33
magnolia 33
louise 33
things 33
neuwirth 33
tied 33
lattice 33
sparkling 33
bebe 33
slogan 33
ribbed-knit 33
skagen 33
bay 33
amelie 33
shark 33
atwood 33
tip 33
superdry 33
barn 33
vanilla 33
slub 33
master 32
panelled 32
bianca 32
babe 32
wet 32
vampire 32
broad 32
nwt 32
morning 32
spikes 32
illuminator 32
bridge 32
larsson 32
zodiac 32
wreath 32
alaïa 32
novica 32
slipper 32
play 32
fire 32
hello 32
hoops 32
juniors 32
leiber 32
lingerie 32
block-heel 32
rio 32
clover 32
kingdom 32
blank 32
pr 32
martini 32
brilliant 32
footwear 32
veau 32
neiman 32
brightening 32
1980s 32
willow 32
chantecaille 32
credit 32
etched 32
stylo 32
marcus 32
right 32
fenty 32
edward 32
anita 32
self-tie 32
maya 32
brows 32
aerin 32
umbrella 32
lurex 32
birman 32
chest 32
road 32
viva 32
birthday 32
inspirational 32
yoga 31
analog 31
derby 31
illusion 31
stretch-crepe 31
falke 31
headpiece 31
wavy 31
high-low 31
north 31
marchesa 31
watermelon 31
batman 31
ferragni 31
stilettos 31
barbour 31
liberty 31
spirit 31
motif 31
jasmine 31
ae 31
iridescent 31
results 31
hope 31
wool-crepe 31
medusa 31
sandy 31
yoni 31
stocking 31
lasry 31
romy 31
national 31
mr 31
eiffel 31
datejust 31
icing 31
awesome 31
75ml 31
terre 31
re/done 31
glitz 31
pear 31
way 31
chestnut 31
papier 31
erin 31
t-bar 31
viparo 31
ray 31
lara 31
morganite 31
rocha 31
haider 31
poison 31
lariat 31
push 31
l'oreal 31
business 31
utility 31
smoke 31
jones 31
high-heel 31
track 31
marl 31
please 31
desert 31
lattori 31
bean 31
a.p.c. 31
bidermann 31
velours 31
aluminum 31
thread 31
premiere 31
crossbar 30
menswear 30
connection 30
flawless 30
e 30
sigma 30
high-heeled 30
patrick 30
polyester 30
collections 30
gold/black 30
stretch-knit 30
ace 30
let 30
1990s 30
famous 30
bordeaux 30
cicihot 30
petits 30
monroe 30
dakota 30
lookbook 30
model 30
1970s 30
stain 30
checkered 30
polly 30
mandala 30
racer 30
crosby 30
veil 30
posh 30
message 30
skeleton 30
cowl 30
lazy 30
valentines 30
sutton 30
college 30
bermuda 30
reiss 30
flex 30
east 30
tees 30
diorific 30
karan 30
word 30
carpet 30
fujifilm 30
cut-off 30
arms 30
sparkly 30
mila 30
================================================
FILE: extract_feature.sh
================================================
#!/bin/bash
CHECKPOINT_DIR="model/model_final/model.ckpt-34865"
python polyvore/run_inference.py \
--checkpoint_path=${CHECKPOINT_DIR} \
--json_file="data/label/test_no_dup.json" \
--image_dir="data/images/" \
--feature_file="data/features/test_features.pkl" \
--rnn_type="lstm"
# # Extract features of Bi-LSTM without VSE
# CHECKPOINT_DIR="model/model_final/model_bi_no_emb.ckpt"
# python polyvore/run_inference.py \
# --checkpoint_path=${CHECKPOINT_DIR} \
# --json_file="data/label/test_no_dup.json" \
# --image_dir="data/images/" \
# --feature_file="data/features/test_features_bi_no_emb.pkl" \
# --rnn_type="lstm"
# # Extract features of VSE model without LSTM
# CHECKPOINT_DIR="model/model_final/model_emb.ckpt"
# python polyvore/run_inference_vse.py \
# --checkpoint_path=${CHECKPOINT_DIR} \
# --json_file="data/label/test_no_dup.json" \
# --image_dir="data/images/" \
# --feature_file="data/features/test_features_emb.pkl" \
# # Extract features of Siamese Network
# CHECKPOINT_DIR="model/model_final/model_siamese.ckpt"
# python polyvore/run_inference_siamese.py \
# --checkpoint_path=${CHECKPOINT_DIR} \
# --json_file="data/label/test_no_dup.json" \
# --image_dir="data/images/" \
# --feature_file="data/features/test_features_siamese.pkl"
================================================
FILE: fill_in_blank.sh
================================================
#!/bin/bash
CHECKPOINT_DIR="model/model_final/model.ckpt-34865"
python polyvore/fill_in_blank.py \
--checkpoint_path=${CHECKPOINT_DIR} \
--json_file="data/label/fill_in_blank_test.json" \
--feature_file="data/features/test_features.pkl" \
--rnn_type="lstm" \
--direction="2" \
--result_file="fill_in_blank_result.pkl"
# # Fill in the blank Siamese Network
# CHECKPOINT_DIR="model/model_final/model_siamese.ckpt"
# python polyvore/fill_in_blank_siamese.py \
# --checkpoint_path=${CHECKPOINT_DIR} \
# --json_file="data/label/fill_in_blank_test.json" \
# --feature_file="data/features/test_features_siamese.pkl" \
# --result_file="fill_in_blank_siamese_result.pkl"
================================================
FILE: outfit_generation.sh
================================================
#!/bin/bash
CHECKPOINT_DIR="model/model_final/model.ckpt-34865"
# Run inference on images.
python polyvore/set_generation.py \
--checkpoint_path=${CHECKPOINT_DIR} \
--image_dir="data/images/test_no_dup/" \
--feature_file="data/features/test_features.pkl" \
--query_file="query.json" \
--word_dict_file="data/final_word_dict.txt" \
--result_dir="results/"
================================================
FILE: polyvore/configuration.py
================================================
# Copyright 2017 Xintong Han. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Bi-LSTM Polyvore model and training configurations."""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
class ModelConfig(object):
"""Wrapper class for model hyperparameters."""
def __init__(self):
"""Sets the default model hyperparameters."""
# File pattern of sharded TFRecord file containing SequenceExample protos.
# Must be provided in training and evaluation modes.
self.input_file_pattern = None
# Image format ("jpeg" or "png").
self.image_format = "jpeg"
# Approximate number of values per input shard. Used to ensure sufficient
# mixing between shards in training.
self.values_per_input_shard = 135
# Minimum number of shards to keep in the input queue.
self.input_queue_capacity_factor = 2
# Number of threads for prefetching SequenceExample protos.
self.num_input_reader_threads = 1
# Name of the SequenceExample context feature containing set ids.
self.set_id_name = "set_id"
# Name of the SequenceExample feature list containing captions and images.
self.image_feature_name = "images"
self.image_index_name = "image_index"
self.caption_feature_name = "caption_ids"
# Number of unique words in the vocab (plus 1, for ).
# The default value is larger than the expected actual vocab size to allow
# for differences between tokenizer versions used in preprocessing. There is
# no harm in using a value greater than the actual vocab size, but using a
# value less than the actual vocab size will result in an error.
self.vocab_size = 2757
# Number of threads for image preprocessing.
self.num_preprocess_threads = 1
# Batch size.
self.batch_size = 10
# File containing an Inception v3 checkpoint to initialize the variables
# of the Inception model. Must be provided when starting training for the
# first time.
self.inception_checkpoint_file = None
# Dimensions of Inception v3 input images.
self.image_height = 299
self.image_width = 299
# Scale used to initialize model variables.
self.initializer_scale = 0.08
# LSTM input and output dimensionality, respectively. embedding_size is also
# the embedding size in the visual-semantic joint space.
self.embedding_size = 512
self.num_lstm_units = 512
# If < 1.0, the dropout keep probability applied to LSTM variables.
self.lstm_dropout_keep_prob = 0.7
# Largest number of images in a fashion set.
self.number_set_images = 8
# Margin for the embedding loss.
self.emb_margin = 0.2
# Balance factor of all losses.
self.emb_loss_factor = 1.0 # VSE loss
self.f_rnn_loss_factor = 1.0 # Forward LSTM
self.b_rnn_loss_factor = 1.0 # Backward LSTM, might give it a lower weight
# because it is harder to predict backward than forward in our senario.
# RNN type. "lstm", "gru", "rnn"
self.rnn_type = "lstm"
class TrainingConfig(object):
"""Wrapper class for training hyperparameters."""
def __init__(self):
"""Sets the default training hyperparameters."""
# Number of examples per epoch of training data.
self.num_examples_per_epoch = 17316
# Optimizer for training the model.
self.optimizer = "SGD"
# Learning rate for the initial phase of training.
# by the FLAGS in train.py
self.initial_learning_rate = 0.2
self.learning_rate_decay_factor = 0.5
self.num_epochs_per_decay = 2.0
# If not None, clip gradients to this value.
self.clip_gradients = 5.0
# How many model checkpoints to keep.
self.max_checkpoints_to_keep = 10
================================================
FILE: polyvore/fashion_compatibility.py
================================================
# Copyright 2017 Xintong Han. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Predict the fashion compatibility of a given image sequence."""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import os
import json
import tensorflow as tf
import numpy as np
import pickle as pkl
from sklearn import metrics
import configuration
import polyvore_model_bi as polyvore_model
FLAGS = tf.flags.FLAGS
tf.flags.DEFINE_string("checkpoint_path", "",
"Model checkpoint file or directory containing a "
"model checkpoint file.")
tf.flags.DEFINE_string("label_file", "", "Txt file containing test outfits.")
tf.flags.DEFINE_string("feature_file", "", "Files containing image features")
tf.flags.DEFINE_string("rnn_type", "", "Type of RNN.")
tf.flags.DEFINE_string("result_file", "", "File to store the results.")
tf.flags.DEFINE_integer("direction", 2, "2: bidirectional; 1: forward only;"
"-1: backward only.")
def run_compatibility_inference(sess, image_seqs, test_feat,
num_lstm_units, model):
emb_seqs = test_feat[image_seqs,:]
num_images = float(len(image_seqs))
if FLAGS.rnn_type == "lstm":
zero_state = np.zeros([1, 2 * num_lstm_units])
else:
zero_state = np.zeros([1, num_lstm_units])
f_score = 0
b_score = 0
if FLAGS.direction != -1:
# Forward RNN.
outputs = []
input_feed = np.reshape(emb_seqs[0], [1,-1])
# Run first step with all zeros initial state.
[lstm_state, lstm_output] = sess.run(
fetches=["lstm/f_state:0","f_logits/f_logits/BiasAdd:0"],
feed_dict={"lstm/f_input_feed:0":input_feed,
"lstm/f_state_feed:0":zero_state})
outputs.append(lstm_output)
# Run remaining steps.
for step in range(int(num_images)-1):
input_feed = np.reshape(emb_seqs[step+1], [1,-1])
[lstm_state, lstm_output] = sess.run(
fetches=["lstm/f_state:0","f_logits/f_logits/BiasAdd:0"],
feed_dict={"lstm/f_input_feed:0":input_feed,
"lstm/f_state_feed:0":lstm_state})
outputs.append(lstm_output)
# Calculate the loss.
# Different from the training process where the loss is calculated in each
# mini batch, during testing, we get the loss againist the whole test set.
# This is pretty slow, maybe a better method could be used.
s = np.squeeze(np.dot(np.asarray(outputs), np.transpose(test_feat)))
f_score = sess.run(model.lstm_xent_loss,
feed_dict={"lstm/pred_feed:0":s,
"lstm/next_index_feed:0":image_seqs[1:] + [test_feat.shape[0]-1]})
f_score = - np.mean(f_score)
if FLAGS.direction != 1:
# Backward RNN.
outputs = []
input_feed = np.reshape(emb_seqs[-1], [1,-1])
[lstm_state, lstm_output] = sess.run(
fetches=["lstm/b_state:0","b_logits/b_logits/BiasAdd:0"],
feed_dict={"lstm/b_input_feed:0":input_feed,
"lstm/b_state_feed:0":zero_state})
outputs.append(lstm_output)
for step in range(int(num_images)-1):
input_feed = np.reshape(emb_seqs[int(num_images)-2-step], [1,-1])
[lstm_state, lstm_output] = sess.run(
fetches=["lstm/b_state:0","b_logits/b_logits/BiasAdd:0"],
feed_dict={"lstm/b_input_feed:0":input_feed,
"lstm/b_state_feed:0":lstm_state})
outputs.append(lstm_output)
# Calculate the loss.
s = np.squeeze(np.dot(np.asarray(outputs), np.transpose(test_feat)))
b_score = sess.run(model.lstm_xent_loss,
feed_dict={"lstm/pred_feed:0":s,
"lstm/next_index_feed:0": image_seqs[-2::-1] + [test_feat.shape[0]-1]})
b_score = - np.mean(b_score)
return [f_score, b_score]
def main(_):
# Build the inference graph.
g = tf.Graph()
with g.as_default():
model_config = configuration.ModelConfig()
model_config.rnn_type = FLAGS.rnn_type
model = polyvore_model.PolyvoreModel(model_config, mode="inference")
model.build()
saver = tf.train.Saver()
# Load pre-computed image features.
with open(FLAGS.feature_file, "rb") as f:
test_data = pkl.load(f)
test_ids = test_data.keys()
test_feat = np.zeros((len(test_ids) + 1,
len(test_data[test_ids[0]]["image_rnn_feat"])))
# test_feat has one more zero vector as the representation of END of
# RNN prediction.
for i, test_id in enumerate(test_ids):
# Image feature in the RNN space.
test_feat[i] = test_data[test_id]["image_rnn_feat"]
g.finalize()
with tf.Session() as sess:
saver.restore(sess, FLAGS.checkpoint_path)
all_f_scores = []
all_b_scores = []
all_scores = []
all_labels = []
testset = open(FLAGS.label_file).read().splitlines()
k = 0
for test_outfit in testset:
k += 1
if k % 100 == 0:
print("Finish %d outfits." % k)
image_seqs = []
for test_image in test_outfit.split()[1:]:
image_seqs.append(test_ids.index(test_image))
[f_score, b_score] = run_compatibility_inference(sess, image_seqs,
test_feat, model_config.num_lstm_units, model)
all_f_scores.append(f_score)
all_b_scores.append(b_score)
all_scores.append(f_score + b_score)
all_labels.append(int(test_outfit[0]))
# calculate AUC and AP
fpr, tpr, thresholds = metrics.roc_curve(all_labels,
all_scores,
pos_label=1)
print("Compatibility AUC: %f for %d outfits" %
(metrics.auc(fpr, tpr), len(all_labels)))
with open(FLAGS.result_file, "wb") as f:
pkl.dump({"all_labels": all_labels, "all_f_scores": all_f_scores,
"all_b_scores": all_b_scores}, f)
if __name__ == "__main__":
tf.app.run()
================================================
FILE: polyvore/fill_in_blank.py
================================================
# Copyright 2017 Xintong Han. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Fill in blank evaluation."""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import json
import tensorflow as tf
import numpy as np
import pickle as pkl
import configuration
import polyvore_model_bi as polyvore_model
FLAGS = tf.flags.FLAGS
tf.flags.DEFINE_string("checkpoint_path", "",
"Model checkpoint file or directory containing a "
"model checkpoint file.")
tf.flags.DEFINE_string("json_file", "",
"Json file containing questions and answers.")
tf.flags.DEFINE_string("feature_file", "", "pkl files containing the features")
tf.flags.DEFINE_string("rnn_type", "lstm", "Type of RNN.")
tf.flags.DEFINE_string("result_file", "", "File to store the results.")
tf.flags.DEFINE_integer("direction", 2, "2: bidirectional; 1: forward only;"
"-1: backward only; 0: Average pooling no RNN.")
def run_question_inference(sess, question, test_ids, test_feat,
test_rnn_feat, num_lstm_units):
question_ids = []
answer_ids = []
for q in question["question"]:
try:
question_ids.append(test_ids.index(q))
except:
return [], []
for a in question["answers"]:
try:
answer_ids.append(test_ids.index(a))
except:
return [], []
blank_posi = question["blank_position"]
# Average pooling of the VSE embeddings
question_emb = np.reshape(np.mean(test_feat[question_ids], 0), [1,-1])
q_emb = question_emb / np.linalg.norm(question_emb, axis=1)[:, np.newaxis]
a_emb = (test_feat[answer_ids] /
np.linalg.norm(test_feat[answer_ids], axis=1)[:, np.newaxis])
vse_score = (np.dot(q_emb, np.transpose(a_emb)) + 1) / 2 # scale to [0,1]
vse_score = vse_score #/ np.sum(vse_score) # normalize to sum to 1.
if FLAGS.direction == 0:
# Only use VSE
predicted_answer = np.argsort(-vse_score)[0]
return vse_score, predicted_answer
if FLAGS.rnn_type == "lstm":
# LSTM has two states.
zero_state = np.zeros([1, 2 * num_lstm_units])
else:
zero_state = np.zeros([1, num_lstm_units])
# Blank is the last item.
if blank_posi == len(question_ids) + 1:
if FLAGS.direction == -1:
return [], []
# Only do forward rnn
input_feed = np.reshape(test_rnn_feat[question_ids[0]], [1,-1])
# Run first step with all zeros initial state.
[lstm_state, lstm_output] = sess.run(
fetches=["lstm/f_state:0","f_logits/f_logits/BiasAdd:0"],
feed_dict={"lstm/f_input_feed:0":input_feed,
"lstm/f_state_feed:0":zero_state})
for step in range(len(question_ids)-1):
input_feed = np.reshape(test_rnn_feat[question_ids[step + 1]], [1,-1])
[lstm_state, lstm_output] = sess.run(
fetches=["lstm/f_state:0","f_logits/f_logits/BiasAdd:0"],
feed_dict={"lstm/f_input_feed:0":input_feed,
"lstm/f_state_feed:0":lstm_state})
# Search in answers
rnn_score = np.exp(np.dot(lstm_output,
np.transpose(test_rnn_feat[answer_ids])))
rnn_score = rnn_score / np.sum(rnn_score)
# Blank is the frist item
elif blank_posi == 1:
if FLAGS.direction == 1:
return [], []
# only do backward rnn
input_feed = np.reshape(test_rnn_feat[question_ids[-1]], [1,-1])
# Run first step with all zeros initial state.
[lstm_state, lstm_output] = sess.run(
fetches=["lstm/b_state:0","b_logits/b_logits/BiasAdd:0"],
feed_dict={"lstm/b_input_feed:0":input_feed,
"lstm/b_state_feed:0":zero_state})
for step in range(len(question_ids)-1):
input_feed = np.reshape(test_rnn_feat[question_ids[-step-2]], [1,-1])
[lstm_state, lstm_output] = sess.run(
fetches=["lstm/b_state:0","b_logits/b_logits/BiasAdd:0"],
feed_dict={"lstm/b_input_feed:0":input_feed,
"lstm/b_state_feed:0":lstm_state})
rnn_score = np.exp(np.dot(lstm_output,
np.transpose(test_rnn_feat[answer_ids])))
rnn_score = rnn_score / np.sum(rnn_score)
# Blank is in the middle.
else:
# Do bidirectional rnn.
# Forward:
input_feed = np.reshape(test_rnn_feat[question_ids[0]], [1,-1])
# Run first step with all zeros initial state.
[lstm_state, lstm_output] = sess.run(
fetches=["lstm/f_state:0","f_logits/f_logits/BiasAdd:0"],
feed_dict={"lstm/f_input_feed:0":input_feed,
"lstm/f_state_feed:0":zero_state})
for step in range(blank_posi - 2):
input_feed = np.reshape(test_rnn_feat[question_ids[step+1]], [1,-1])
[lstm_state, lstm_output] = sess.run(
fetches=["lstm/f_state:0","f_logits/f_logits/BiasAdd:0"],
feed_dict={"lstm/f_input_feed:0":input_feed,
"lstm/f_state_feed:0":lstm_state})
# Search in answers.
f_softmax = np.exp(np.dot(lstm_output,
np.transpose(test_rnn_feat[answer_ids])))
# Backward:
input_feed = np.reshape(test_rnn_feat[question_ids[-1]], [1,-1])
# Run first step with all zeros initial state.
[lstm_state, lstm_output] = sess.run(
fetches=["lstm/b_state:0","b_logits/b_logits/BiasAdd:0"],
feed_dict={"lstm/b_input_feed:0":input_feed,
"lstm/b_state_feed:0":zero_state})
for step in range(len(question_ids)-blank_posi):
input_feed = np.reshape(test_rnn_feat[question_ids[-step-2]], [1,-1])
[lstm_state, lstm_output] = sess.run(
fetches=["lstm/b_state:0","b_logits/b_logits/BiasAdd:0"],
feed_dict={"lstm/b_input_feed:0":input_feed,
"lstm/b_state_feed:0":lstm_state})
b_softmax = np.exp(np.dot(lstm_output,
np.transpose(test_rnn_feat[answer_ids])))
if FLAGS.direction == 2:
rnn_score = (f_softmax / np.sum(f_softmax) +
b_softmax / np.sum(b_softmax))
rnn_score /= 2
elif FLAGS.direction == 1:
rnn_score = f_softmax / np.sum(f_softmax)
else:
rnn_score = b_softmax / np.sum(b_softmax)
predicted_answer = np.argsort(-rnn_score)[0]
return rnn_score, predicted_answer
def main(_):
# Build the inference graph.
top_k = 4 # Print the top_k accuracy.
true_pred = np.zeros(top_k)
# Load pre-computed image features.
with open(FLAGS.feature_file, "rb") as f:
test_data = pkl.load(f)
test_ids = test_data.keys()
test_feat = np.zeros((len(test_ids),
len(test_data[test_ids[0]]["image_feat"])))
test_rnn_feat = np.zeros((len(test_ids),
len(test_data[test_ids[0]]["image_rnn_feat"])))
for i, test_id in enumerate(test_ids):
# Image feature in visual-semantic embedding space.
test_feat[i] = test_data[test_id]["image_feat"]
# Image feature in the RNN space.
test_rnn_feat[i] = test_data[test_id]["image_rnn_feat"]
g = tf.Graph()
with g.as_default():
model_config = configuration.ModelConfig()
model_config.rnn_type = FLAGS.rnn_type
model = polyvore_model.PolyvoreModel(model_config, mode="inference")
model.build()
saver = tf.train.Saver()
g.finalize()
with tf.Session() as sess:
saver.restore(sess, FLAGS.checkpoint_path)
questions = json.load(open(FLAGS.json_file))
all_pred = []
set_ids = []
all_scores = []
for question in questions:
score, pred = run_question_inference(sess, question, test_ids,
test_feat, test_rnn_feat,
model_config.num_lstm_units)
if pred != []:
all_pred.append(pred)
all_scores.append(score)
set_ids.append(question["question"][0].split("_")[0])
# 0 is the correct answer, iterate over top_k.
for i in range(top_k):
if 0 in pred[:i+1]:
true_pred[i] += 1
# Print all top-k accuracy.
for i in range(top_k):
print("Top %d Accuracy: " % (i + 1))
print("%d correct answers in %d valid questions." %
(true_pred[i], len(all_pred)))
print("Accuracy: %f" % (true_pred[i] / len(all_pred)))
s = np.empty((len(all_scores),), dtype=np.object)
for i in range(len(all_scores)):
s[i] = all_scores[i]
with open(FLAGS.result_file, "wb") as f:
pkl.dump({"set_ids": set_ids, "pred": all_pred, "score": s}, f)
if __name__ == "__main__":
tf.app.run()
================================================
FILE: polyvore/fill_in_blank_siamese.py
================================================
# Copyright 2017 Xintong Han. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Fill in blank evaluation."""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import json
import tensorflow as tf
import numpy as np
import pickle as pkl
import configuration
import polyvore_model_siamese as polyvore_model
FLAGS = tf.flags.FLAGS
tf.flags.DEFINE_string("checkpoint_path", "",
"Model checkpoint file or directory containing a "
"model checkpoint file.")
tf.flags.DEFINE_string("json_file", "",
"Json file containing questions and answers.")
tf.flags.DEFINE_string("feature_file", "", "pkl files containing the features")
tf.flags.DEFINE_string("result_file", "", "File to store the results.")
def run_question_inference(sess, question, test_ids, test_feat):
question_ids = []
answer_ids = []
for q in question["question"]:
try:
question_ids.append(test_ids.index(q))
except:
return [], []
for a in question["answers"]:
try:
answer_ids.append(test_ids.index(a))
except:
return [], []
blank_posi = question["blank_position"]
# Average pooling of the VSE embeddings
question_emb = np.reshape(np.mean(test_feat[question_ids], 0), [1,-1])
q_emb = question_emb / np.linalg.norm(question_emb, axis=1)[:, np.newaxis]
a_emb = (test_feat[answer_ids] /
np.linalg.norm(test_feat[answer_ids], axis=1)[:, np.newaxis])
score = (np.dot(q_emb, np.transpose(a_emb)) + 1) / 2 # scale to [0,1]
predicted_answer = np.argsort(-score)[0]
return score, predicted_answer
def main(_):
# Build the inference graph.
top_k = 4 # Print the top_k accuracy.
true_pred = np.zeros(top_k)
# Load pre-computed image features.
with open(FLAGS.feature_file, "rb") as f:
test_data = pkl.load(f)
test_ids = test_data.keys()
test_feat = np.zeros((len(test_ids),
len(test_data[test_ids[0]]["image_feat"])))
for i, test_id in enumerate(test_ids):
# Image feature in visual-semantic embedding space.
test_feat[i] = test_data[test_id]["image_feat"]
g = tf.Graph()
with g.as_default():
model_config = configuration.ModelConfig()
model = polyvore_model.PolyvoreModel(model_config, mode="inference")
model.build()
saver = tf.train.Saver()
g.finalize()
with tf.Session() as sess:
saver.restore(sess, FLAGS.checkpoint_path)
questions = json.load(open(FLAGS.json_file))
all_pred = []
set_ids = []
all_scores = []
for question in questions:
score, pred = run_question_inference(sess, question, test_ids,
test_feat)
if pred != []:
all_pred.append(pred)
all_scores.append(score)
set_ids.append(question["question"][0].split("_")[0])
# 0 is the correct answer, iterate over top_k.
for i in range(top_k):
if 0 in pred[:i+1]:
true_pred[i] += 1
# Print all top-k accuracy.
for i in range(top_k):
print("Top %d Accuracy: " % (i + 1))
print("%d correct answers in %d valid questions." %
(true_pred[i], len(all_pred)))
print("Accuracy: %f" % (true_pred[i] / len(all_pred)))
s = np.empty((len(all_scores),), dtype=np.object)
for i in range(len(all_scores)):
s[i] = all_scores[i]
with open(FLAGS.result_file, "wb") as f:
pkl.dump({"set_ids": set_ids, "pred": all_pred, "score": s}, f)
if __name__ == "__main__":
tf.app.run()
================================================
FILE: polyvore/ops/__init__.py
================================================
================================================
FILE: polyvore/ops/image_embedding.py
================================================
# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Image embedding ops."""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import tensorflow as tf
from tensorflow.contrib.slim.python.slim.nets.inception_v3 import inception_v3_base
slim = tf.contrib.slim
def inception_v3(images,
trainable=True,
is_training=True,
weight_decay=0.00004,
stddev=0.1,
dropout_keep_prob=0.8,
use_batch_norm=True,
batch_norm_params=None,
add_summaries=True,
scope="InceptionV3"):
"""Builds an Inception V3 subgraph for image embeddings.
Args:
images: A float32 Tensor of shape [batch, height, width, channels].
trainable: Whether the inception submodel should be trainable or not.
is_training: Boolean indicating training mode or not.
weight_decay: Coefficient for weight regularization.
stddev: The standard deviation of the trunctated normal weight initializer.
dropout_keep_prob: Dropout keep probability.
use_batch_norm: Whether to use batch normalization.
batch_norm_params: Parameters for batch normalization. See
tf.contrib.layers.batch_norm for details.
add_summaries: Whether to add activation summaries.
scope: Optional Variable scope.
Returns:
end_points: A dictionary of activations from inception_v3 layers.
"""
# Only consider the inception model to be in training mode if it's trainable.
is_inception_model_training = trainable and is_training
if use_batch_norm:
# Default parameters for batch normalization.
if not batch_norm_params:
batch_norm_params = {
"is_training": is_inception_model_training,
"trainable": trainable,
# Decay for the moving averages.
"decay": 0.9997,
# Epsilon to prevent 0s in variance.
"epsilon": 0.001,
# Collection containing the moving mean and moving variance.
"variables_collections": {
"beta": None,
"gamma": None,
"moving_mean": ["moving_vars"],
"moving_variance": ["moving_vars"],
}
}
else:
batch_norm_params = None
if trainable:
weights_regularizer = tf.contrib.layers.l2_regularizer(weight_decay)
else:
weights_regularizer = None
with tf.variable_scope(scope, "InceptionV3", [images]) as scope:
with slim.arg_scope(
[slim.conv2d, slim.fully_connected],
weights_regularizer=weights_regularizer,
trainable=trainable):
with slim.arg_scope(
[slim.conv2d],
weights_initializer=tf.truncated_normal_initializer(stddev=stddev),
activation_fn=tf.nn.relu,
normalizer_fn=slim.batch_norm,
normalizer_params=batch_norm_params):
net, end_points = inception_v3_base(images, scope=scope)
with tf.variable_scope("logits"):
shape = net.get_shape()
net = slim.avg_pool2d(net, shape[1:3], padding="VALID", scope="pool")
net = slim.dropout(
net,
keep_prob=dropout_keep_prob,
is_training=is_inception_model_training,
scope="dropout")
net = slim.flatten(net, scope="flatten")
# Add summaries.
if add_summaries:
for v in end_points.values():
tf.contrib.layers.summaries.summarize_activation(v)
return net
================================================
FILE: polyvore/ops/image_embedding_test.py
================================================
# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Tests for tensorflow_models.im2txt.ops.image_embedding."""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import tensorflow as tf
from polyvore.ops import image_embedding
class InceptionV3Test(tf.test.TestCase):
def setUp(self):
super(InceptionV3Test, self).setUp()
batch_size = 4
height = 299
width = 299
num_channels = 3
self._images = tf.placeholder(tf.float32,
[batch_size, height, width, num_channels])
self._batch_size = batch_size
def _countInceptionParameters(self):
"""Counts the number of parameters in the inception model at top scope."""
counter = {}
for v in tf.all_variables():
name_tokens = v.op.name.split("/")
if name_tokens[0] == "InceptionV3":
name = "InceptionV3/" + name_tokens[1]
num_params = v.get_shape().num_elements()
assert num_params
counter[name] = counter.get(name, 0) + num_params
return counter
def _verifyParameterCounts(self):
"""Verifies the number of parameters in the inception model."""
param_counts = self._countInceptionParameters()
expected_param_counts = {
"InceptionV3/Conv2d_1a_3x3": 960,
"InceptionV3/Conv2d_2a_3x3": 9312,
"InceptionV3/Conv2d_2b_3x3": 18624,
"InceptionV3/Conv2d_3b_1x1": 5360,
"InceptionV3/Conv2d_4a_3x3": 138816,
"InceptionV3/Mixed_5b": 256368,
"InceptionV3/Mixed_5c": 277968,
"InceptionV3/Mixed_5d": 285648,
"InceptionV3/Mixed_6a": 1153920,
"InceptionV3/Mixed_6b": 1298944,
"InceptionV3/Mixed_6c": 1692736,
"InceptionV3/Mixed_6d": 1692736,
"InceptionV3/Mixed_6e": 2143872,
"InceptionV3/Mixed_7a": 1699584,
"InceptionV3/Mixed_7b": 5047872,
"InceptionV3/Mixed_7c": 6080064,
}
self.assertDictEqual(expected_param_counts, param_counts)
def _assertCollectionSize(self, expected_size, collection):
actual_size = len(tf.get_collection(collection))
if expected_size != actual_size:
self.fail("Found %d items in collection %s (expected %d)." %
(actual_size, collection, expected_size))
def testTrainableTrueIsTrainingTrue(self):
embeddings = image_embedding.inception_v3(
self._images, trainable=True, is_training=True)
self.assertEqual([self._batch_size, 2048], embeddings.get_shape().as_list())
self._verifyParameterCounts()
self._assertCollectionSize(376, tf.GraphKeys.VARIABLES)
self._assertCollectionSize(188, tf.GraphKeys.TRAINABLE_VARIABLES)
self._assertCollectionSize(188, tf.GraphKeys.UPDATE_OPS)
self._assertCollectionSize(94, tf.GraphKeys.REGULARIZATION_LOSSES)
self._assertCollectionSize(0, tf.GraphKeys.LOSSES)
self._assertCollectionSize(23, tf.GraphKeys.SUMMARIES)
def testTrainableTrueIsTrainingFalse(self):
embeddings = image_embedding.inception_v3(
self._images, trainable=True, is_training=False)
self.assertEqual([self._batch_size, 2048], embeddings.get_shape().as_list())
self._verifyParameterCounts()
self._assertCollectionSize(376, tf.GraphKeys.VARIABLES)
self._assertCollectionSize(188, tf.GraphKeys.TRAINABLE_VARIABLES)
self._assertCollectionSize(0, tf.GraphKeys.UPDATE_OPS)
self._assertCollectionSize(94, tf.GraphKeys.REGULARIZATION_LOSSES)
self._assertCollectionSize(0, tf.GraphKeys.LOSSES)
self._assertCollectionSize(23, tf.GraphKeys.SUMMARIES)
def testTrainableFalseIsTrainingTrue(self):
embeddings = image_embedding.inception_v3(
self._images, trainable=False, is_training=True)
self.assertEqual([self._batch_size, 2048], embeddings.get_shape().as_list())
self._verifyParameterCounts()
self._assertCollectionSize(376, tf.GraphKeys.VARIABLES)
self._assertCollectionSize(0, tf.GraphKeys.TRAINABLE_VARIABLES)
self._assertCollectionSize(0, tf.GraphKeys.UPDATE_OPS)
self._assertCollectionSize(0, tf.GraphKeys.REGULARIZATION_LOSSES)
self._assertCollectionSize(0, tf.GraphKeys.LOSSES)
self._assertCollectionSize(23, tf.GraphKeys.SUMMARIES)
def testTrainableFalseIsTrainingFalse(self):
embeddings = image_embedding.inception_v3(
self._images, trainable=False, is_training=False)
self.assertEqual([self._batch_size, 2048], embeddings.get_shape().as_list())
self._verifyParameterCounts()
self._assertCollectionSize(376, tf.GraphKeys.VARIABLES)
self._assertCollectionSize(0, tf.GraphKeys.TRAINABLE_VARIABLES)
self._assertCollectionSize(0, tf.GraphKeys.UPDATE_OPS)
self._assertCollectionSize(0, tf.GraphKeys.REGULARIZATION_LOSSES)
self._assertCollectionSize(0, tf.GraphKeys.LOSSES)
self._assertCollectionSize(23, tf.GraphKeys.SUMMARIES)
if __name__ == "__main__":
tf.test.main()
================================================
FILE: polyvore/ops/image_processing.py
================================================
# Copyright 2017 Xintong Han. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Helper functions for image preprocessing."""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import tensorflow as tf
def distort_image(image):
"""Perform random distortions on an image.
Args:
image: A float32 Tensor of shape [height, width, 3] with values in [0, 1).
Returns:
distorted_image: A float32 Tensor of shape [height, width, 3] with values in
[0, 1].
"""
# Randomly flip horizontally. No color distortion.
with tf.name_scope("flip_horizontal", values=[image]):
image = tf.image.random_flip_left_right(image)
return image
def process_image(encoded_image,
is_training,
height,
width,
resize_height=299,
resize_width=299,
image_format="jpeg",
image_idx=0):
"""Decode an image, resize and apply random distortions.
Args:
encoded_image: String Tensor containing the image.
is_training: Boolean; whether preprocessing for training or eval.
height: Height of the output image.
width: Width of the output image.
resize_height: If > 0, resize height before crop to final dimensions.
resize_width: If > 0, resize width before crop to final dimensions.
image_format: "jpeg" or "png".
image_idx: image index of the image in an outfit.
Returns:
A float32 Tensor of shape [height, width, 3] with values in [-1, 1].
Raises:
ValueError: If image_format is invalid.
"""
# Helper function to log an image summary to the visualizer. Summaries are
# only logged in thread 0.
def image_summary(name, image):
tf.image_summary(name, tf.expand_dims(image, 0))
# Decode image into a float32 Tensor of shape [?, ?, 3] with values in [0, 1).
with tf.name_scope("decode", values=[encoded_image]):
if image_format == "jpeg":
image = tf.image.decode_jpeg(encoded_image, channels=3)
elif image_format == "png":
image = tf.image.decode_png(encoded_image, channels=3)
else:
raise ValueError("Invalid image format: %s" % image_format)
image = tf.image.convert_image_dtype(image, dtype=tf.float32)
image_summary("original_image/" + str(image_idx), image)
# Resize image.
assert (resize_height > 0) == (resize_width > 0)
if resize_height:
image = tf.image.resize_images(image,
size=[resize_height, resize_width],
method=tf.image.ResizeMethod.BILINEAR)
# Crop to final dimensions. In the Polyvore model, no cropping is used
# since we set height=resize_height and width=resize_width
if is_training:
image = tf.random_crop(image, [height, width, 3])
else:
image = tf.image.resize_image_with_crop_or_pad(image, height, width)
image_summary("resized_image/" + str(image_idx), image)
# Randomly distort the image.
if is_training:
image = distort_image(image)
image_summary("final_image/" + str(image_idx), image)
# Rescale to [-1,1] instead of [0, 1]
image = tf.sub(image, 0.5)
image = tf.mul(image, 2.0)
return image
================================================
FILE: polyvore/ops/inputs.py
================================================
# Copyright 2017 Xintong Han. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Input ops."""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import tensorflow as tf
def parse_sequence_example(serialized, set_id, image_feature,
image_index, caption_feature, number_set_images):
"""Parses a tensorflow.SequenceExample into a set of images and caption.
Args:
serialized: A scalar string Tensor; a single serialized SequenceExample.
set_id: Name of SequenceExample context feature containing the id of
the outfit.
image_feature: Name of SequenceExample context feature containing image
data.
image_index: Name of SequenceExample feature list containing the index of
the item in the outfit.
caption_feature: Name of SequenceExample feature list containing integer
captions.
number_set_images: Number of images in an outfit.
Returns:
set_id: Set id of the outfit.
encoded_images: A string Tensor containing all JPEG encoded images
in the outfit.
image_ids: Image ids of the items in the outfit.
captions: A 2-D uint64 Tensor with dynamically specified length.
likes: Number of likes of the outfit. Hard coded name,
not used in our model.
"""
context_features = {}
context_features[set_id] = tf.FixedLenFeature([], dtype=tf.string)
context_features['likes'] = tf.FixedLenFeature([], dtype=tf.int64,
default_value=0)
for i in range(number_set_images):
context_features[image_feature + '/' + str(i)] = tf.FixedLenFeature([],
dtype=tf.string,
default_value = '')
context, sequence = tf.parse_single_sequence_example(
serialized,
context_features=context_features,
sequence_features={
image_index: tf.FixedLenSequenceFeature([], dtype=tf.int64),
caption_feature: tf.VarLenFeature(dtype=tf.int64),
})
set_id = context[set_id]
likes = context['likes']
encoded_images = []
for i in range(number_set_images):
encoded_images.append(context[image_feature + '/' + str(i)])
captions = sequence[caption_feature]
captions = tf.sparse_tensor_to_dense(captions)
image_ids = sequence[image_index]
return set_id, encoded_images, image_ids, captions, likes
def prefetch_input_data(reader,
file_pattern,
is_training,
batch_size,
values_per_shard,
input_queue_capacity_factor=16,
num_reader_threads=1,
shard_queue_name="filename_queue",
value_queue_name="input_queue"):
"""Prefetches string values from disk into an input queue.
In training the capacity of the queue is important because a larger queue
means better mixing of training examples between shards. The minimum number of
values kept in the queue is values_per_shard * input_queue_capacity_factor,
where input_queue_memory factor should be chosen to trade-off better mixing
with memory usage.
Args:
reader: Instance of tf.ReaderBase.
file_pattern: Comma-separated list of file patterns (e.g.
/tmp/train_data-?????-of-00100).
is_training: Boolean; whether prefetching for training or eval.
batch_size: Model batch size used to determine queue capacity.
values_per_shard: Approximate number of values per shard.
input_queue_capacity_factor: Minimum number of values to keep in the queue
in multiples of values_per_shard. See comments above.
num_reader_threads: Number of reader threads to fill the queue.
shard_queue_name: Name for the shards filename queue.
value_queue_name: Name for the values input queue.
Returns:
A Queue containing prefetched string values.
"""
data_files = []
for pattern in file_pattern.split(","):
data_files.extend(tf.gfile.Glob(pattern))
if not data_files:
tf.logging.fatal("Found no input files matching %s", file_pattern)
else:
tf.logging.info("Prefetching values from %d files matching %s",
len(data_files), file_pattern)
if is_training:
filename_queue = tf.train.string_input_producer(
data_files, shuffle=True, capacity=16, name=shard_queue_name)
min_queue_examples = values_per_shard * input_queue_capacity_factor
capacity = min_queue_examples + 100 * batch_size
values_queue = tf.RandomShuffleQueue(
capacity=capacity,
min_after_dequeue=min_queue_examples,
dtypes=[tf.string],
name="random_" + value_queue_name)
else:
filename_queue = tf.train.string_input_producer(
data_files, shuffle=False, capacity=1, name=shard_queue_name)
capacity = values_per_shard + 3 * batch_size
values_queue = tf.FIFOQueue(
capacity=capacity, dtypes=[tf.string], name="fifo_" + value_queue_name)
enqueue_ops = []
for _ in range(num_reader_threads):
_, value = reader.read(filename_queue)
enqueue_ops.append(values_queue.enqueue([value]))
tf.train.queue_runner.add_queue_runner(tf.train.queue_runner.QueueRunner(
values_queue, enqueue_ops))
tf.scalar_summary(
"queue/%s/fraction_of_%d_full" % (values_queue.name, capacity),
tf.cast(values_queue.size(), tf.float32) * (1. / capacity))
return values_queue
def batch_with_dynamic_pad(images_and_captions,
batch_size,
queue_capacity,
add_summaries=True):
"""Batches input images and captions.
This function splits the caption into an input sequence and a target sequence,
where the target sequence is the input sequence right-shifted by 1. Input and
target sequences are batched and padded up to the maximum length of sequences
in the batch. A mask is created to distinguish real words from padding words.
Similar sequence processing is used for images in an outfit.
Example:
Actual captions in the batch ('-' denotes padded character):
[
[ 1 2 5 4 5 ],
[ 1 2 3 4 - ],
[ 1 2 3 - - ],
]
input_seqs:
[
[ 1 2 3 4 ],
[ 1 2 3 - ],
[ 1 2 - - ],
]
target_seqs:
[
[ 2 3 4 5 ],
[ 2 3 4 - ],
[ 2 3 - - ],
]
mask:
[
[ 1 1 1 1 ],
[ 1 1 1 0 ],
[ 1 1 0 0 ],
]
Args:
images_and_captions: A list of image and caption meta data
batch_size: Batch size.
queue_capacity: Queue capacity.
add_summaries: If true, add caption length summaries.
Returns:
Padded image, captions, masks, etc.
"""
enqueue_list = []
for set_id, images, image_ids, captions, likes in images_and_captions:
image_seq_length = tf.shape(image_ids)[0]
input_length = tf.sub(image_seq_length, 0) # change 1 to 0
cap_indicator = tf.cast(tf.not_equal(captions,
tf.zeros_like(captions)),
tf.int32)
indicator = tf.ones(tf.expand_dims(input_length, 0), dtype=tf.int32)
loss_indicator = tf.ones(tf.expand_dims(image_seq_length, 0),
dtype=tf.int32)
images = tf.pack(images)
enqueue_list.append([set_id, images, indicator, loss_indicator,
image_ids, captions, cap_indicator, likes])
(set_ids, images, mask, loss_mask, image_ids,
captions, cap_mask, likes) = tf.train.batch_join(enqueue_list,
batch_size=batch_size,
capacity=queue_capacity,
dynamic_pad=True,
name="batch_and_pad")
if add_summaries:
lengths = tf.add(tf.reduce_sum(mask, 1), 1)
tf.scalar_summary("caption_length/batch_min", tf.reduce_min(lengths))
tf.scalar_summary("caption_length/batch_max", tf.reduce_max(lengths))
tf.scalar_summary("caption_length/batch_mean", tf.reduce_mean(lengths))
return (set_ids, images, image_ids, mask, loss_mask, captions, cap_mask, likes)
================================================
FILE: polyvore/polyvore_model_bi.py
================================================
# Copyright 2017 Xintong Han. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""
Polyvore model used in ACM MM"17 paper
"Learning Fashion Compatibility with Bidirectional LSTMs"
Link: https://arxiv.org/pdf/1707.05691.pdf
"""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import numpy as np
import tensorflow as tf
from ops import image_embedding
from ops import image_processing
from ops import inputs as input_ops
class PolyvoreModel(object):
""" Model for fashion set on Polyvore dataset.
"""
def __init__(self, config, mode, train_inception=False):
"""Basic setup.
Args:
config: Object containing configuration parameters.
mode: "train", "eval" or "inference".
train_inception: Whether the inception submodel variables are trainable.
"""
assert mode in ["train", "eval", "inference"]
self.config = config
self.mode = mode
self.train_inception = train_inception
# Reader for the input data.
self.reader = tf.TFRecordReader()
# To match the "Show and Tell" paper we initialize all variables with a
# random uniform initializer.
self.initializer = tf.random_uniform_initializer(
minval=-self.config.initializer_scale,
maxval=self.config.initializer_scale)
# A float32 Tensor with shape
# [batch_size, num_images, height, width, channels].
# num_images is the number of images in one outfit, default is 8.
self.images = None
# Forward RNN input and target sequences.
# An int32 Tensor with shape [batch_size, padded_length].
self.f_input_seqs = None
# An int32 Tensor with shape [batch_size, padded_length].
self.f_target_seqs = None
# Backward RNN input and target sequences.
# An int32 Tensor with shape [batch_size, padded_length].
self.b_input_seqs = None
# An int32 Tensor with shape [batch_size, padded_length].
self.b_target_seqs = None
# An int32 0/1 Tensor with shape [batch_size, padded_length].
self.input_mask = None
# Image caption sequence and masks.
# An int32 Tensor with shape [batch_size, num_images, padded_length].
self.cap_seqs = None
# An int32 0/1 Tensor with shape [batch_size, padded_length].
self.cap_mask = None
# Caption sequence embeddings, we use simple bag of word model.
# A float32 Tensor with shape [batch_size, num_images, embedding_size].
self.seq_embeddings = None
# Image embeddings in the joint visual-semantic space
# A float32 Tensor with shape [batch_size, num_images, embedding_size].
self.image_embeddings = None
# Image embeddings in the RNN output/prediction space.
self.rnn_image_embeddings = None
# Word embedding map.
self.embedding_map = None
# A float32 scalar Tensor; the total loss for the trainer to optimize.
self.total_loss = None
# Forward and backward RNN loss.
# A float32 Tensor with shape [batch_size * padded_length].
self.forward_losses = None
# A float32 Tensor with shape [batch_size * padded_length].
self.backward_losses = None
# RNN loss, forward + backward.
self.lstm_losses = None
# Loss mask for lstm loss.
self.loss_mask = None
# Visual Semantic Embedding loss.
# A float32 Tensor with shape [batch_size * padded_length].
self.emb_losses = None
# A float32 Tensor with shape [batch_size * padded_length].
self.target_weights = None
# Collection of variables from the inception submodel.
self.inception_variables = []
# Function to restore the inception submodel from checkpoint.
self.init_fn = None
# Global step Tensor.
self.global_step = None
# Some output for debugging purposes .
self.target_embeddings = None
self.input_embeddings = None
self.set_ids = None
self.f_lstm_state = None
self.b_lstm_state = None
self.lstm_output = None
self.lstm_xent_loss = None
def is_training(self):
"""Returns true if the model is built for training mode."""
return self.mode == "train"
def process_image(self, encoded_image, thread_id=0, image_idx=0):
"""Decodes and processes an image string.
Args:
encoded_image: A scalar string Tensor; the encoded image.
thread_id: Preprocessing thread id used to select the ordering of color
distortions. Not used in our model.
image_idx: Index of the image in an outfit. Only used for summaries.
Returns:
A float32 Tensor of shape [height, width, 3]; the processed image.
"""
return image_processing.process_image(encoded_image,
is_training=self.is_training(),
height=self.config.image_height,
width=self.config.image_width,
image_format=self.config.image_format,
image_idx=image_idx)
def build_inputs(self):
"""Input prefetching, preprocessing and batching.
Outputs:
Inputs of the model.
"""
if self.mode == "inference":
# In inference mode, images and inputs are fed via placeholders.
image_feed = tf.placeholder(dtype=tf.string, shape=[], name="image_feed")
# Process image and insert batch dimensions.
image_feed = self.process_image(image_feed)
input_feed = tf.placeholder(dtype=tf.int64,
shape=[None], # batch_size
name="input_feed")
# Process image and insert batch dimensions.
image_seqs = tf.expand_dims(image_feed, 0)
cap_seqs = tf.expand_dims(input_feed, 1)
# No target sequences or input mask in inference mode.
input_mask = tf.placeholder(dtype=tf.int64,
shape=[1, 8], # batch_size
name="input_mask")
cap_mask = None
loss_mask = None
set_ids = None
else:
# Prefetch serialized SequenceExample protos.
input_queue = input_ops.prefetch_input_data(
self.reader,
self.config.input_file_pattern,
is_training=self.is_training(),
batch_size=self.config.batch_size,
values_per_shard=self.config.values_per_input_shard,
input_queue_capacity_factor=self.config.input_queue_capacity_factor,
num_reader_threads=self.config.num_input_reader_threads)
# Image processing and random distortion. Split across multiple threads
# with each thread applying a slightly different distortion. But we only
# use one thread in our Polyvore model. likes are not used.
images_and_captions = []
for thread_id in range(self.config.num_preprocess_threads):
serialized_sequence_example = input_queue.dequeue()
set_id, encoded_images, image_ids, captions, likes = (
input_ops.parse_sequence_example(
serialized_sequence_example,
set_id =self.config.set_id_name,
image_feature=self.config.image_feature_name,
image_index=self.config.image_index_name,
caption_feature=self.config.caption_feature_name,
number_set_images=self.config.number_set_images))
images = []
for i in range(self.config.number_set_images):
images.append(self.process_image(encoded_images[i],image_idx=i))
images_and_captions.append([set_id, images, image_ids, captions, likes])
# Batch inputs.
queue_capacity = (5 * self.config.num_preprocess_threads *
self.config.batch_size)
(set_ids, image_seqs, image_ids, input_mask,
loss_mask, cap_seqs, cap_mask, likes) = (
input_ops.batch_with_dynamic_pad(images_and_captions,
batch_size=self.config.batch_size,
queue_capacity=queue_capacity))
self.images = image_seqs
self.input_mask = input_mask
self.loss_mask = loss_mask
self.cap_seqs = cap_seqs
self.cap_mask = cap_mask
self.set_ids = set_ids
def build_image_embeddings(self):
"""Builds the image model subgraph and generates image embeddings
in visual semantic joint space and RNN prediction space.
Inputs:
self.images
Outputs:
self.image_embeddings
self.rnn_image_embeddings
"""
# Reshape 5D image tensor.
images = tf.reshape(self.images, [-1,
self.config.image_height,
self.config.image_height,
3])
inception_output = image_embedding.inception_v3(
images,
trainable=self.train_inception,
is_training=self.is_training())
self.inception_variables = tf.get_collection(
tf.GraphKeys.VARIABLES, scope="InceptionV3")
# Map inception output into embedding space.
with tf.variable_scope("image_embedding") as scope:
image_embeddings = tf.contrib.layers.fully_connected(
inputs=inception_output,
num_outputs=self.config.embedding_size,
activation_fn=None,
weights_initializer=self.initializer,
biases_initializer=None,
scope=scope)
with tf.variable_scope("rnn_image_embedding") as scope:
rnn_image_embeddings = tf.contrib.layers.fully_connected(
inputs=inception_output,
num_outputs=self.config.embedding_size,
activation_fn=None,
weights_initializer=self.initializer,
biases_initializer=None,
scope=scope)
# Save the embedding size in the graph.
tf.constant(self.config.embedding_size, name="embedding_size")
self.image_embeddings = tf.reshape(image_embeddings,
[tf.shape(self.images)[0],
-1,
self.config.embedding_size])
self.rnn_image_embeddings = tf.reshape(rnn_image_embeddings,
[tf.shape(self.images)[0],
-1,
self.config.embedding_size])
def build_seq_embeddings(self):
"""Builds the input sequence embeddings.
Inputs:
self.input_seqs
Outputs:
self.seq_embeddings
self.embedding_map
"""
with tf.variable_scope("seq_embedding"), tf.device("/cpu:0"):
embedding_map = tf.get_variable(
name="map",
shape=[self.config.vocab_size, self.config.embedding_size],
initializer=self.initializer)
seq_embeddings = tf.nn.embedding_lookup(embedding_map, self.cap_seqs)
# Average pooling the seq_embeddings (bag of words).
if self.mode != "inference":
seq_embeddings = tf.batch_matmul(
tf.cast(tf.expand_dims(self.cap_mask, 2),
tf.float32),
seq_embeddings)
seq_embeddings = tf.squeeze(seq_embeddings, [2])
self.embedding_map = embedding_map
self.seq_embeddings = seq_embeddings
def build_model(self):
"""Builds the model.
The original code is written with Tensorflow r0.10
for Tensorflow > r1.0, many functions can be simplified.
For example Tensors support slicing now, so no need to use tf.slice()
"""
norm_image_embeddings = tf.nn.l2_normalize(self.image_embeddings, 2,
name="norm_image_embeddings")
norm_seq_embeddings = tf.nn.l2_normalize(self.seq_embeddings, 2)
norm_seq_embeddings = (
tf.pad(norm_seq_embeddings, [[0, 0],
[0, self.config.number_set_images - tf.shape(norm_seq_embeddings)[1]],
[0, 0]], name="norm_seq_embeddings"))
if self.mode == "inference":
pass
else:
# Compute losses for joint embedding.
# Only look at the captions that have length >= 2.
emb_loss_mask = tf.greater(tf.reduce_sum(self.cap_mask, 2), 1)
# Image mask is padded it to max length.
emb_loss_mask = tf.pad(emb_loss_mask,
[[0,0],
[0, self.config.number_set_images - tf.shape(emb_loss_mask)[1]]])
# Select the valid image-caption pair.
emb_loss_mask = tf.reshape(emb_loss_mask, [-1])
norm_image_embeddings = tf.reshape(norm_image_embeddings,
[self.config.number_set_images * self.config.batch_size,
self.config.embedding_size])
norm_image_embeddings = tf.boolean_mask(norm_image_embeddings,
emb_loss_mask)
norm_seq_embeddings = tf.reshape(norm_seq_embeddings,
[self.config.number_set_images * self.config.batch_size,
self.config.embedding_size])
norm_seq_embeddings = tf.boolean_mask(norm_seq_embeddings, emb_loss_mask)
# The following defines contrastive loss in the joint space.
# Reference: https://github.com/ryankiros/visual-semantic-embedding/blob/master/model.py#L39
scores = tf.matmul(norm_seq_embeddings, norm_image_embeddings,
transpose_a=False, transpose_b=True, name="scores")
diagonal = tf.expand_dims(tf.diag_part(scores), 1)
cost_s = tf.maximum(0.0, self.config.emb_margin - diagonal + scores)
cost_im = tf.maximum(0.0,
self.config.emb_margin - tf.transpose(diagonal) + scores)
cost_s = cost_s - tf.diag(tf.diag_part(cost_s))
cost_im = cost_im - tf.diag(tf.diag_part(cost_im))
emb_batch_loss = tf.reduce_sum(cost_s) + tf.reduce_sum(cost_im)
emb_batch_loss = (emb_batch_loss /
tf.cast(tf.shape(norm_seq_embeddings)[0], tf.float32) ** 2)
if self.config.emb_loss_factor > 0.0:
tf.contrib.losses.add_loss(emb_batch_loss * self.config.emb_loss_factor)
# Compute image LSTM loss.
# Start with one direction.
tf.logging.info("Rnn_type: %s" % self.config.rnn_type)
if self.config.rnn_type == "lstm":
tf.logging.info("----- RNN Type: LSTM ------")
# Forward LSTM.
f_lstm_cell = tf.nn.rnn_cell.BasicLSTMCell(
num_units=self.config.num_lstm_units, state_is_tuple=True)
# Backward LSTM.
b_lstm_cell = tf.nn.rnn_cell.BasicLSTMCell(
num_units=self.config.num_lstm_units, state_is_tuple=True)
elif self.config.rnn_type == "gru":
tf.logging.info("----- RNN Type: GRU ------")
# Forward GRU.
f_lstm_cell = tf.nn.rnn_cell.GRUCell(num_units=self.config.num_lstm_units)
# Backward GRU.
b_lstm_cell = tf.nn.rnn_cell.GRUCell(num_units=self.config.num_lstm_units)
else:
tf.logging.info("----- RNN Type: RNN ------")
# Forward RNN.
f_lstm_cell = tf.nn.rnn_cell.BasicRNNCell(num_units=self.config.num_lstm_units)
# Backward RNN.
b_lstm_cell = tf.nn.rnn_cell.BasicRNNCell(num_units=self.config.num_lstm_units)
if self.mode == "train":
f_lstm_cell = tf.nn.rnn_cell.DropoutWrapper(
f_lstm_cell,
input_keep_prob=self.config.lstm_dropout_keep_prob,
output_keep_prob=self.config.lstm_dropout_keep_prob)
b_lstm_cell = tf.nn.rnn_cell.DropoutWrapper(
b_lstm_cell,
input_keep_prob=self.config.lstm_dropout_keep_prob,
output_keep_prob=self.config.lstm_dropout_keep_prob)
with tf.variable_scope("lstm", initializer=self.initializer) as lstm_scope:
if self.mode == "inference":
# Inference for Bi-LSTM.
pred_feed = tf.placeholder(dtype=tf.float32,
shape=[None, None],
name="pred_feed")
next_index_feed = tf.placeholder(dtype=tf.int64,
shape=[None],
name="next_index_feed")
self.lstm_xent_loss = tf.nn.sparse_softmax_cross_entropy_with_logits(
logits=pred_feed,
labels=next_index_feed,
name="lstm_xent")
if self.config.rnn_type == "lstm":
# In inference mode, use concatenated states for convenient feeding
# and fetching.
# Forward
# Placeholder for feeding a batch of concatenated states.
f_state_feed = tf.placeholder(dtype=tf.float32,
shape=[None, sum(f_lstm_cell.state_size)],
name="f_state_feed")
f_input_feed = tf.placeholder(dtype=tf.float32,
shape=[None, self.config.embedding_size],
name="f_input_feed")
# Backward:
# Placeholder for feeding a batch of concatenated states.
b_state_feed = tf.placeholder(dtype=tf.float32,
shape=[None, sum(b_lstm_cell.state_size)],
name="b_state_feed")
b_input_feed = tf.placeholder(dtype=tf.float32,
shape=[None, self.config.embedding_size],
name="b_input_feed")
f_state_tuple = tf.split(1, 2, f_state_feed)
# Run a single LSTM step.
with tf.variable_scope("FW"):
f_lstm_outputs, f_state_tuple = f_lstm_cell(
inputs=f_input_feed,
state=f_state_tuple)
# Concatentate the resulting state.
self.f_lstm_state = tf.concat(1, f_state_tuple, name="f_state")
b_state_tuple = tf.split(1, 2, b_state_feed)
# Run a single LSTM step.
with tf.variable_scope("BW"):
b_lstm_outputs, b_state_tuple = b_lstm_cell(
inputs=b_input_feed,
state=b_state_tuple)
# Concatentate the resulting state.
self.b_lstm_state = tf.concat(1, b_state_tuple, name="b_state")
else:
# For non-LSTM RNN models, no tuple is used.
# Forward
# Placeholder for feeding a batch of concatenated states.
f_state_feed = tf.placeholder(dtype=tf.float32,
shape=[None, f_lstm_cell.state_size],
name="f_state_feed")
f_input_feed = tf.placeholder(dtype=tf.float32,
shape=[None, self.config.embedding_size],
name="f_input_feed")
# Backward:
# Placeholder for feeding a batch of concatenated states.
b_state_feed = tf.placeholder(dtype=tf.float32,
shape=[None, b_lstm_cell.state_size],
name="b_state_feed")
b_input_feed = tf.placeholder(dtype=tf.float32,
shape=[None, self.config.embedding_size],
name="b_input_feed")
# Run a single RNN step.
with tf.variable_scope("FW"):
f_lstm_outputs, f_state_tuple = f_lstm_cell(
inputs=f_input_feed,
state=f_state_feed)
f_state_tuple = tf.identity(f_state_tuple, name="f_state")
with tf.variable_scope("BW"):
b_lstm_outputs, b_state_tuple = b_lstm_cell(
inputs=b_input_feed,
state=b_state_feed)
b_state_tuple = tf.identity(b_state_tuple, name="b_state")
lstm_outputs = (f_lstm_outputs, b_lstm_outputs)
sequence_length = None
else:
# Run the batch of sequence embeddings through the LSTM.
sequence_length = tf.reduce_sum(self.input_mask, 1)
lstm_outputs, _ = tf.nn.bidirectional_dynamic_rnn(cell_fw=f_lstm_cell,
cell_bw=b_lstm_cell,
inputs=self.rnn_image_embeddings,
initial_state_fw=None,
initial_state_bw=None,
sequence_length=sequence_length,
dtype=tf.float32,
scope=lstm_scope)
# Stack batches vertically.
f_lstm_outputs = tf.reshape(lstm_outputs[0], [-1, f_lstm_cell.output_size])
if self.mode == "inference":
b_lstm_outputs = lstm_outputs[1]
else:
b_lstm_outputs = tf.reverse_sequence(lstm_outputs[1],
seq_lengths=sequence_length,
seq_dim=1,
batch_dim=0)
b_lstm_outputs = tf.reshape(b_lstm_outputs, [-1, b_lstm_cell.output_size])
with tf.variable_scope("f_logits") as logits_scope:
f_input_embeddings = tf.contrib.layers.fully_connected(
inputs=f_lstm_outputs,
num_outputs=self.config.embedding_size,
activation_fn=None,
weights_initializer=self.initializer,
scope=logits_scope)
with tf.variable_scope("b_logits") as logits_scope:
b_input_embeddings = tf.contrib.layers.fully_connected(
inputs=b_lstm_outputs,
num_outputs=self.config.embedding_size,
activation_fn=None,
weights_initializer=self.initializer,
scope=logits_scope)
if self.mode == "inference":
pass
else:
# Padding input_mask to match dimension.
input_mask = tf.pad(self.input_mask,
[[0,0],
[0, self.config.number_set_images + 1 - tf.shape(self.input_mask)[1]]])
input_mask = tf.to_float(
tf.reshape(tf.slice(input_mask, [0,1], [-1, -1]), [-1,1]))
loss_mask = tf.pad(self.loss_mask,
[[0,0],
[0, self.config.number_set_images - tf.shape(self.loss_mask)[1]]])
loss_mask = tf.reshape(tf.to_float(loss_mask),
[self.config.number_set_images * self.config.batch_size,1])
# Forward rnn.
f_target_embeddings = tf.slice(tf.pad(self.rnn_image_embeddings,
[[0,0], [0,1], [0,0]]), [0,1,0], [-1,-1,-1])
f_target_embeddings = tf.reshape(f_target_embeddings,
[self.config.number_set_images * self.config.batch_size,
self.config.embedding_size])
f_target_embeddings = tf.mul(f_target_embeddings,
input_mask,
name="target_embeddings")
# Softmax loss over all items in this minibatch.
loss_mask = tf.squeeze(loss_mask)
f_input_embeddings = tf.boolean_mask(f_input_embeddings,
tf.cast(loss_mask, tf.bool))
f_target_embeddings = tf.boolean_mask(f_target_embeddings,
tf.cast(loss_mask, tf.bool))
f_lstm_scores = tf.matmul(f_input_embeddings,
f_target_embeddings,
transpose_a=False,
transpose_b=True)
f_lstm_loss = tf.nn.sparse_softmax_cross_entropy_with_logits(
logits=f_lstm_scores,
labels=tf.range(tf.shape(f_lstm_scores)[0]))
f_lstm_loss = tf.div(tf.reduce_sum(f_lstm_loss),
tf.reduce_sum(loss_mask),
name="f_lstm_loss")
# Backward rnn.
# It would be better to put write a function to calcute lstm_loss from
# loss_mask, inputs, and targets, so the code can be reused, for now
# just copy and paste the forward to get the backward loss.
reverse_embeddings = tf.reverse_sequence(self.rnn_image_embeddings,
seq_lengths=sequence_length,
seq_dim=1,
batch_dim=0)
b_target_embeddings = tf.slice(tf.pad(reverse_embeddings,
[[0,0], [0,1], [0,0]]),
[0,1,0], [-1,-1,-1])
b_target_embeddings = tf.reshape(b_target_embeddings,
[self.config.number_set_images * self.config.batch_size,
self.config.embedding_size])
b_target_embeddings = tf.mul(b_target_embeddings,
input_mask,
name="target_embeddings")
# Softmax loss over all items in this minibatch
b_input_embeddings = tf.boolean_mask(b_input_embeddings,
tf.cast(loss_mask, tf.bool))
b_target_embeddings = tf.boolean_mask(b_target_embeddings,
tf.cast(loss_mask, tf.bool))
b_lstm_scores = tf.matmul(b_input_embeddings,
b_target_embeddings,
transpose_a=False,
transpose_b=True)
b_lstm_loss = tf.nn.sparse_softmax_cross_entropy_with_logits(
logits=b_lstm_scores,
labels=tf.range(tf.shape(b_lstm_scores)[0]))
b_lstm_loss = tf.div(tf.reduce_sum(b_lstm_loss),
tf.reduce_sum(loss_mask),
name="b_lstm_loss")
if self.config.f_rnn_loss_factor > 0:
tf.contrib.losses.add_loss(f_lstm_loss * self.config.f_rnn_loss_factor)
if self.config.b_rnn_loss_factor > 0:
tf.contrib.losses.add_loss(b_lstm_loss * self.config.b_rnn_loss_factor)
# Merge all losses and stats.
total_loss = tf.contrib.losses.get_total_loss()
# Add summaries.
tf.scalar_summary("emb_batch_loss", emb_batch_loss)
tf.scalar_summary("f_lstm_loss", f_lstm_loss)
tf.scalar_summary("b_lstm_loss", b_lstm_loss)
tf.scalar_summary("lstm_loss",
(f_lstm_loss * self.config.f_rnn_loss_factor +
b_lstm_loss * self.config.b_rnn_loss_factor))
tf.scalar_summary("total_loss", total_loss)
for var in tf.trainable_variables():
tf.histogram_summary(var.op.name, var)
weights = tf.to_float(tf.reshape(emb_loss_mask, [-1]))
self.loss_mask = loss_mask
self.input_mask = input_mask
self.target_embeddings = (f_target_embeddings, b_target_embeddings)
self.input_embeddings = (f_input_embeddings, b_input_embeddings)
self.total_loss = total_loss
self.emb_losses = emb_batch_loss # Used in evaluation.
self.lstm_losses = (f_lstm_loss * self.config.f_rnn_loss_factor +
b_lstm_loss * self.config.b_rnn_loss_factor) # Used in evaluation.
self.target_weights = weights # Used in evaluation.
def setup_inception_initializer(self):
"""Sets up the function to restore inception variables from checkpoint."""
if self.mode != "inference":
# Restore inception variables only.
saver = tf.train.Saver(self.inception_variables)
def restore_fn(sess):
tf.logging.info("Restoring Inception variables from checkpoint %s" %
self.config.inception_checkpoint_file)
saver.restore(sess, self.config.inception_checkpoint_file)
self.init_fn = restore_fn
def setup_global_step(self):
"""Sets up the global step Tensor."""
global_step = tf.Variable(
initial_value=0,
name="global_step",
trainable=False,
collections=[tf.GraphKeys.GLOBAL_STEP, tf.GraphKeys.VARIABLES])
self.global_step = global_step
def build(self):
"""Creates all ops for training and evaluation."""
self.build_inputs()
self.build_image_embeddings()
self.build_seq_embeddings()
self.build_model()
self.setup_inception_initializer()
self.setup_global_step()
================================================
FILE: polyvore/polyvore_model_siamese.py
================================================
# Copyright 2017 Xintong Han. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Siamese Network for compatibility modeling/
"""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import numpy as np
import tensorflow as tf
import scipy.io as sio
from scipy.linalg import block_diag
from ops import image_embedding
from ops import image_processing
from ops import inputs as input_ops
class PolyvoreModel(object):
""" Model for fashion set on Polyvore dataset
"""
def __init__(self, config, mode, train_inception=False):
"""Basic setup.
Args:
config: Object containing configuration parameters.
mode: "train", "eval" or "inference".
train_inception: Whether the inception submodel variables are trainable.
"""
assert mode in ["train", "eval", "inference"]
self.config = config
self.mode = mode
self.train_inception = train_inception
# Reader for the input data.
self.reader = tf.TFRecordReader()
# To match the "Show and Tell" paper we initialize all variables with a
# random uniform initializer.
self.initializer = tf.random_uniform_initializer(
minval=-self.config.initializer_scale,
maxval=self.config.initializer_scale)
# A float32 Tensor with shape [batch_size, num_images, height, width, channels].
self.images = None
# An int32 0/1 Tensor with shape [batch_size, padded_length].
self.input_mask = None
# A float32 Tensor with shape [batch_size, num_images, embedding_size].
self.image_embeddings = None
# A float32 scalar Tensor; the total loss for the trainer to optimize.
self.total_loss = None
# Collection of variables from the inception submodel.
self.inception_variables = []
# Function to restore the inception submodel from checkpoint.
self.init_fn = None
# Global step Tensor.
self.global_step = None
def is_training(self):
"""Returns true if the model is built for training mode."""
return self.mode == "train"
def process_image(self, encoded_image, thread_id=0, image_idx=0):
"""Decodes and processes an image string.
Args:
encoded_image: A scalar string Tensor; the encoded image.
thread_id: Preprocessing thread id used to select the ordering of color
distortions.
Returns:
A float32 Tensor of shape [height, width, 3]; the processed image.
"""
return image_processing.process_image(encoded_image,
is_training=self.is_training(),
height=self.config.image_height,
width=self.config.image_width,
image_format=self.config.image_format,
image_idx=image_idx)
def build_inputs(self):
"""Input prefetching, preprocessing and batching.
Outputs:
images and seqs
"""
if self.mode == "inference":
# In inference mode, images and inputs are fed via placeholders.
image_feed = tf.placeholder(dtype=tf.string, shape=[], name="image_feed")
# Process image and insert batch dimensions.
image_feed = self.process_image(image_feed)
# Process image and insert batch dimensions.
image_seqs = tf.expand_dims(image_feed, 0)
# No target sequences or input mask in inference mode.
input_mask = tf.placeholder(dtype=tf.int64,
shape=[1,8], # batch_size
name="input_mask")
else:
# Prefetch serialized SequenceExample protos.
input_queue = input_ops.prefetch_input_data(
self.reader,
self.config.input_file_pattern,
is_training=self.is_training(),
batch_size=self.config.batch_size,
values_per_shard=self.config.values_per_input_shard,
input_queue_capacity_factor=self.config.input_queue_capacity_factor,
num_reader_threads=self.config.num_input_reader_threads)
# Image processing and random distortion. Split across multiple threads
# with each thread applying a slightly different distortion.
# assert self.config.num_preprocess_threads % 2 == 0
images_and_captions = []
for thread_id in range(self.config.num_preprocess_threads):
serialized_sequence_example = input_queue.dequeue()
set_id, encoded_images, image_ids, captions, likes = (
input_ops.parse_sequence_example(
serialized_sequence_example,
set_id =self.config.set_id_name,
image_feature=self.config.image_feature_name,
image_index=self.config.image_index_name,
caption_feature=self.config.caption_feature_name,
number_set_images=self.config.number_set_images))
images = []
for i in range(self.config.number_set_images):
images.append(self.process_image(encoded_images[i],image_idx=i))
images_and_captions.append([set_id, images, image_ids, captions, likes])
# Batch inputs.
queue_capacity = (5 * self.config.num_preprocess_threads *
self.config.batch_size)
#(set_ids, image_seqs, image_ids, f_input_seqs, f_target_seqs,
# b_input_seqs, b_target_seqs, input_mask, cap_seqs, cap_mask) = (
(set_ids, image_seqs, image_ids, input_mask,
loss_mask, cap_seqs, cap_mask, likes) = (
input_ops.batch_with_dynamic_pad(images_and_captions,
batch_size=self.config.batch_size,
queue_capacity=queue_capacity))
self.images = image_seqs
self.input_mask = input_mask
def build_image_embeddings(self):
"""Builds the image model subgraph and generates image embeddings.
Inputs:
self.images
Outputs:
self.image_embeddings
"""
# Reshape 5D image tensor.
images = tf.reshape(self.images, [-1,
self.config.image_height,
self.config.image_height,
3])
inception_output = image_embedding.inception_v3(
images,
trainable=self.train_inception,
is_training=self.is_training())
self.inception_variables = tf.get_collection(
tf.GraphKeys.VARIABLES, scope="InceptionV3")
# Map inception output into embedding space.
with tf.variable_scope("image_embedding") as scope:
image_embeddings = tf.contrib.layers.fully_connected(
inputs=inception_output,
num_outputs=self.config.embedding_size,
activation_fn=None,
weights_initializer=self.initializer,
biases_initializer=None,
scope=scope)
# Save the embedding size in the graph.
tf.constant(self.config.embedding_size, name="embedding_size")
self.image_embeddings = tf.reshape(image_embeddings,
[tf.shape(self.images)[0],
-1,
self.config.embedding_size])
def build_model(self):
"""Builds the model.
Inputs:
self.image_embeddings
self.seq_embeddings
self.target_seqs (training and eval only)
self.input_mask (training and eval only)
Outputs:
self.total_loss (training and eval only)
self.target_cross_entropy_losses (training and eval only)
self.target_cross_entropy_loss_weights (training and eval only)
"""
norm_image_embeddings = tf.nn.l2_normalize(self.image_embeddings, 2,
name="norm_image_embeddings")
if self.mode == "inference":
pass
else:
# Select the valid siamese pairs. Hacky for now!
emb_loss_mask = np.ones((self.config.number_set_images,
self.config.number_set_images))
# Manually replicate for 8 times
emb_loss_mask = block_diag(emb_loss_mask, emb_loss_mask,
emb_loss_mask, emb_loss_mask,
emb_loss_mask, emb_loss_mask,
emb_loss_mask, emb_loss_mask,
emb_loss_mask, emb_loss_mask)
norm_image_embeddings = tf.reshape(norm_image_embeddings,
[self.config.number_set_images * self.config.batch_size,
self.config.embedding_size])
scores = tf.matmul(norm_image_embeddings, norm_image_embeddings,
transpose_a=False, transpose_b=True, name="scores")
posi_scores = tf.reduce_sum(tf.mul(scores, emb_loss_mask)) / np.sum(emb_loss_mask)
emb_loss_mask = 1.0 - emb_loss_mask
m = 0.8 # magin in Siamese network
nega_scores = tf.maximum(tf.mul(scores, emb_loss_mask) - 0.8, 0.0)
nega_scores = tf.reduce_sum(nega_scores) / np.sum(emb_loss_mask)
# nega_scores = (tf.reduce_sum(nega_scores) -
# m * np.sum(1 - emb_loss_mask)) / np.sum(emb_loss_mask)
emb_batch_loss = tf.sub(nega_scores, posi_scores, name="emb_batch_loss")
tf.contrib.losses.add_loss(emb_batch_loss)
# Merge all losses and stats.
total_loss = tf.contrib.losses.get_total_loss()
# Add summaries.
tf.scalar_summary("emb_batch_loss", emb_batch_loss)
tf.scalar_summary("total_loss", total_loss)
for var in tf.trainable_variables():
tf.histogram_summary(var.op.name, var)
self.total_loss = total_loss
def setup_inception_initializer(self):
"""Sets up the function to restore inception variables from checkpoint."""
if self.mode != "inference":
# Restore inception variables only.
saver = tf.train.Saver(self.inception_variables)
def restore_fn(sess):
tf.logging.info("Restoring Inception variables from checkpoint file %s",
self.config.inception_checkpoint_file)
saver.restore(sess, self.config.inception_checkpoint_file)
self.init_fn = restore_fn
def setup_global_step(self):
"""Sets up the global step Tensor."""
global_step = tf.Variable(
initial_value=0,
name="global_step",
trainable=False,
collections=[tf.GraphKeys.GLOBAL_STEP, tf.GraphKeys.VARIABLES])
self.global_step = global_step
def build(self):
"""Creates all ops for training and evaluation."""
self.build_inputs()
self.build_image_embeddings()
self.build_model()
self.setup_inception_initializer()
self.setup_global_step()
================================================
FILE: polyvore/polyvore_model_vse.py
================================================
# Copyright 2017 Xintong Han. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""
Polyvore model used in ACM MM"17 paper
"Learning Fashion Compatibility with Bidirectional LSTMs"
Link: https://arxiv.org/pdf/1707.05691.pdf
"""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import numpy as np
import tensorflow as tf
from ops import image_embedding
from ops import image_processing
from ops import inputs as input_ops
class PolyvoreModel(object):
""" Model for fashion set on Polyvore dataset.
"""
def __init__(self, config, mode, train_inception=False):
"""Basic setup.
Args:
config: Object containing configuration parameters.
mode: "train", "eval" or "inference".
train_inception: Whether the inception submodel variables are trainable.
"""
assert mode in ["train", "eval", "inference"]
self.config = config
self.mode = mode
self.train_inception = train_inception
# Reader for the input data.
self.reader = tf.TFRecordReader()
# To match the "Show and Tell" paper we initialize all variables with a
# random uniform initializer.
self.initializer = tf.random_uniform_initializer(
minval=-self.config.initializer_scale,
maxval=self.config.initializer_scale)
# A float32 Tensor with shape
# [batch_size, num_images, height, width, channels].
# num_images is the number of images in one outfit, default is 8.
self.images = None
# Forward RNN input and target sequences.
# An int32 Tensor with shape [batch_size, padded_length].
self.f_input_seqs = None
# An int32 Tensor with shape [batch_size, padded_length].
self.f_target_seqs = None
# Backward RNN input and target sequences.
# An int32 Tensor with shape [batch_size, padded_length].
self.b_input_seqs = None
# An int32 Tensor with shape [batch_size, padded_length].
self.b_target_seqs = None
# An int32 0/1 Tensor with shape [batch_size, padded_length].
self.input_mask = None
# Image caption sequence and masks.
# An int32 Tensor with shape [batch_size, num_images, padded_length].
self.cap_seqs = None
# An int32 0/1 Tensor with shape [batch_size, padded_length].
self.cap_mask = None
# Caption sequence embeddings, we use simple bag of word model.
# A float32 Tensor with shape [batch_size, num_images, embedding_size].
self.seq_embeddings = None
# Image embeddings in the joint visual-semantic space
# A float32 Tensor with shape [batch_size, num_images, embedding_size].
self.image_embeddings = None
# Image embeddings in the RNN output/prediction space.
self.rnn_image_embeddings = None
# Word embedding map.
self.embedding_map = None
# A float32 scalar Tensor; the total loss for the trainer to optimize.
self.total_loss = None
# Forward and backward RNN loss.
# A float32 Tensor with shape [batch_size * padded_length].
self.forward_losses = None
# A float32 Tensor with shape [batch_size * padded_length].
self.backward_losses = None
# RNN loss, forward + backward.
self.lstm_losses = None
# Loss mask for lstm loss.
self.loss_mask = None
# Visual Semantic Embedding loss.
# A float32 Tensor with shape [batch_size * padded_length].
self.emb_losses = None
# A float32 Tensor with shape [batch_size * padded_length].
self.target_weights = None
# Collection of variables from the inception submodel.
self.inception_variables = []
# Function to restore the inception submodel from checkpoint.
self.init_fn = None
# Global step Tensor.
self.global_step = None
def is_training(self):
"""Returns true if the model is built for training mode."""
return self.mode == "train"
def process_image(self, encoded_image, thread_id=0, image_idx=0):
"""Decodes and processes an image string.
Args:
encoded_image: A scalar string Tensor; the encoded image.
thread_id: Preprocessing thread id used to select the ordering of color
distortions. Not used in our model.
image_idx: Index of the image in an outfit. Only used for summaries.
Returns:
A float32 Tensor of shape [height, width, 3]; the processed image.
"""
return image_processing.process_image(encoded_image,
is_training=self.is_training(),
height=self.config.image_height,
width=self.config.image_width,
image_format=self.config.image_format,
image_idx=image_idx)
def build_inputs(self):
"""Input prefetching, preprocessing and batching.
Outputs:
Inputs of the model.
"""
if self.mode == "inference":
# In inference mode, images and inputs are fed via placeholders.
image_feed = tf.placeholder(dtype=tf.string, shape=[], name="image_feed")
# Process image and insert batch dimensions.
image_feed = self.process_image(image_feed)
input_feed = tf.placeholder(dtype=tf.int64,
shape=[None], # batch_size
name="input_feed")
# Process image and insert batch dimensions.
image_seqs = tf.expand_dims(image_feed, 0)
cap_seqs = tf.expand_dims(input_feed, 1)
# No target sequences or input mask in inference mode.
input_mask = tf.placeholder(dtype=tf.int64,
shape=[1, 8], # batch_size
name="input_mask")
cap_mask = None
loss_mask = None
set_ids = None
else:
# Prefetch serialized SequenceExample protos.
input_queue = input_ops.prefetch_input_data(
self.reader,
self.config.input_file_pattern,
is_training=self.is_training(),
batch_size=self.config.batch_size,
values_per_shard=self.config.values_per_input_shard,
input_queue_capacity_factor=self.config.input_queue_capacity_factor,
num_reader_threads=self.config.num_input_reader_threads)
# Image processing and random distortion. Split across multiple threads
# with each thread applying a slightly different distortion. But we only
# use one thread in our Polyvore model. likes are not used.
images_and_captions = []
for thread_id in range(self.config.num_preprocess_threads):
serialized_sequence_example = input_queue.dequeue()
set_id, encoded_images, image_ids, captions, likes = (
input_ops.parse_sequence_example(
serialized_sequence_example,
set_id =self.config.set_id_name,
image_feature=self.config.image_feature_name,
image_index=self.config.image_index_name,
caption_feature=self.config.caption_feature_name,
number_set_images=self.config.number_set_images))
images = []
for i in range(self.config.number_set_images):
images.append(self.process_image(encoded_images[i],image_idx=i))
images_and_captions.append([set_id, images, image_ids, captions, likes])
# Batch inputs.
queue_capacity = (5 * self.config.num_preprocess_threads *
self.config.batch_size)
(set_ids, image_seqs, image_ids, input_mask,
loss_mask, cap_seqs, cap_mask, likes) = (
input_ops.batch_with_dynamic_pad(images_and_captions,
batch_size=self.config.batch_size,
queue_capacity=queue_capacity))
self.images = image_seqs
self.input_mask = input_mask
self.loss_mask = loss_mask
self.cap_seqs = cap_seqs
self.cap_mask = cap_mask
self.set_ids = set_ids
def build_image_embeddings(self):
"""Builds the image model subgraph and generates image embeddings
in visual semantic joint space and RNN prediction space.
Inputs:
self.images
Outputs:
self.image_embeddings
self.rnn_image_embeddings
"""
# Reshape 5D image tensor.
images = tf.reshape(self.images, [-1,
self.config.image_height,
self.config.image_height,
3])
inception_output = image_embedding.inception_v3(
images,
trainable=self.train_inception,
is_training=self.is_training())
self.inception_variables = tf.get_collection(
tf.GraphKeys.VARIABLES, scope="InceptionV3")
# Map inception output into embedding space.
with tf.variable_scope("image_embedding") as scope:
image_embeddings = tf.contrib.layers.fully_connected(
inputs=inception_output,
num_outputs=self.config.embedding_size,
activation_fn=None,
weights_initializer=self.initializer,
biases_initializer=None,
scope=scope)
# Save the embedding size in the graph.
tf.constant(self.config.embedding_size, name="embedding_size")
self.image_embeddings = tf.reshape(image_embeddings,
[tf.shape(self.images)[0],
-1,
self.config.embedding_size])
def build_seq_embeddings(self):
"""Builds the input sequence embeddings.
Inputs:
self.input_seqs
Outputs:
self.seq_embeddings
self.embedding_map
"""
with tf.variable_scope("seq_embedding"), tf.device("/cpu:0"):
embedding_map = tf.get_variable(
name="map",
shape=[self.config.vocab_size, self.config.embedding_size],
initializer=self.initializer)
seq_embeddings = tf.nn.embedding_lookup(embedding_map, self.cap_seqs)
# Average pooling the seq_embeddings (bag of words).
if self.mode != "inference":
seq_embeddings = tf.batch_matmul(
tf.cast(tf.expand_dims(self.cap_mask, 2),
tf.float32),
seq_embeddings)
seq_embeddings = tf.squeeze(seq_embeddings, [2])
self.embedding_map = embedding_map
self.seq_embeddings = seq_embeddings
def build_model(self):
"""Builds the model.
The original code is written with Tensorflow r0.10
for Tensorflow > r1.0, many functions can be simplified.
For example Tensors support slicing now, so no need to use tf.slice()
"""
norm_image_embeddings = tf.nn.l2_normalize(self.image_embeddings, 2,
name="norm_image_embeddings")
norm_seq_embeddings = tf.nn.l2_normalize(self.seq_embeddings, 2)
norm_seq_embeddings = (
tf.pad(norm_seq_embeddings, [[0, 0],
[0, self.config.number_set_images - tf.shape(norm_seq_embeddings)[1]],
[0, 0]], name="norm_seq_embeddings"))
if self.mode == "inference":
pass
else:
# Compute losses for joint embedding.
# Only look at the captions that have length >= 2.
emb_loss_mask = tf.greater(tf.reduce_sum(self.cap_mask, 2), 1)
# Image mask is padded it to max length.
emb_loss_mask = tf.pad(emb_loss_mask,
[[0,0],
[0, self.config.number_set_images - tf.shape(emb_loss_mask)[1]]])
# Select the valid image-caption pair.
emb_loss_mask = tf.reshape(emb_loss_mask, [-1])
norm_image_embeddings = tf.reshape(norm_image_embeddings,
[self.config.number_set_images * self.config.batch_size,
self.config.embedding_size])
norm_image_embeddings = tf.boolean_mask(norm_image_embeddings,
emb_loss_mask)
norm_seq_embeddings = tf.reshape(norm_seq_embeddings,
[self.config.number_set_images * self.config.batch_size,
self.config.embedding_size])
norm_seq_embeddings = tf.boolean_mask(norm_seq_embeddings, emb_loss_mask)
# The following defines contrastive loss in the joint space.
# Reference: https://github.com/ryankiros/visual-semantic-embedding/blob/master/model.py#L39
scores = tf.matmul(norm_seq_embeddings, norm_image_embeddings,
transpose_a=False, transpose_b=True, name="scores")
diagonal = tf.expand_dims(tf.diag_part(scores), 1)
cost_s = tf.maximum(0.0, self.config.emb_margin - diagonal + scores)
cost_im = tf.maximum(0.0,
self.config.emb_margin - tf.transpose(diagonal) + scores)
cost_s = cost_s - tf.diag(tf.diag_part(cost_s))
cost_im = cost_im - tf.diag(tf.diag_part(cost_im))
emb_batch_loss = tf.reduce_sum(cost_s) + tf.reduce_sum(cost_im)
emb_batch_loss = (emb_batch_loss /
tf.cast(tf.shape(norm_seq_embeddings)[0], tf.float32) ** 2)
tf.contrib.losses.add_loss(emb_batch_loss * self.config.emb_loss_factor)
total_loss = tf.contrib.losses.get_total_loss()
# Add summaries.
tf.scalar_summary("emb_batch_loss", emb_batch_loss)
tf.scalar_summary("total_loss", total_loss)
for var in tf.trainable_variables():
tf.histogram_summary(var.op.name, var)
weights = tf.to_float(tf.reshape(emb_loss_mask, [-1]))
self.loss_mask = loss_mask
self.input_mask = input_mask
self.total_loss = total_loss
self.emb_losses = emb_batch_loss # Used in evaluation.
def setup_inception_initializer(self):
"""Sets up the function to restore inception variables from checkpoint."""
if self.mode != "inference":
# Restore inception variables only.
saver = tf.train.Saver(self.inception_variables)
def restore_fn(sess):
tf.logging.info("Restoring Inception variables from checkpoint %s" %
self.config.inception_checkpoint_file)
saver.restore(sess, self.config.inception_checkpoint_file)
self.init_fn = restore_fn
def setup_global_step(self):
"""Sets up the global step Tensor."""
global_step = tf.Variable(
initial_value=0,
name="global_step",
trainable=False,
collections=[tf.GraphKeys.GLOBAL_STEP, tf.GraphKeys.VARIABLES])
self.global_step = global_step
def build(self):
"""Creates all ops for training and evaluation."""
self.build_inputs()
self.build_image_embeddings()
self.build_seq_embeddings()
self.build_model()
self.setup_inception_initializer()
self.setup_global_step()
================================================
FILE: polyvore/run_inference.py
================================================
# Copyright 2017 Xintong Han. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Run the inference of Bi-LSTM model given input images."""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import os
import json
import tensorflow as tf
import pickle as pkl
import numpy as np
import configuration
import polyvore_model_bi as polyvore_model
FLAGS = tf.flags.FLAGS
tf.flags.DEFINE_string("checkpoint_path", "",
"Model checkpoint file or directory containing a "
"model checkpoint file.")
tf.flags.DEFINE_string("json_file", "data/label/test-no-dup.json",
"Json file containing the inference data.")
tf.flags.DEFINE_string("image_dir", "data/images",
"Directory containing images.")
tf.flags.DEFINE_string("feature_file", "data/features/test_features.pkl",
"Directory to save the features")
tf.flags.DEFINE_string("rnn_type", "", "Type of RNN.")
def main(_):
if os.path.isfile(FLAGS.feature_file):
print("Feature file already exist.")
return
# Build the inference graph.
g = tf.Graph()
with g.as_default():
model_config = configuration.ModelConfig()
model_config.rnn_type = FLAGS.rnn_type
model = polyvore_model.PolyvoreModel(model_config, mode="inference")
model.build()
saver = tf.train.Saver()
g.finalize()
sess = tf.Session(graph=g)
saver.restore(sess, FLAGS.checkpoint_path)
test_json = json.load(open(FLAGS.json_file))
k = 0
# Save image ids and features in a dictionary.
test_features = dict()
for image_set in test_json:
set_id = image_set["set_id"]
image_feat = []
image_rnn_feat = []
ids = []
k = k + 1
print(str(k) + " : " + set_id)
for image in image_set["items"]:
filename = os.path.join(FLAGS.image_dir, set_id,
str(image["index"]) + ".jpg")
with tf.gfile.GFile(filename, "r") as f:
image_feed = f.read()
[feat, rnn_feat] = sess.run([model.image_embeddings,
model.rnn_image_embeddings],
feed_dict={"image_feed:0": image_feed})
image_name = set_id + "_" + str(image["index"])
test_features[image_name] = dict()
test_features[image_name]["image_feat"] = np.squeeze(feat)
test_features[image_name]["image_rnn_feat"] = np.squeeze(rnn_feat)
with open(FLAGS.feature_file, "wb") as f:
pkl.dump(test_features, f)
if __name__ == "__main__":
tf.app.run()
================================================
FILE: polyvore/run_inference_siamese.py
================================================
# Copyright 2017 Xintong Han. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Run the inference of Siamese Network given input images."""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import os
import json
import tensorflow as tf
import pickle as pkl
import numpy as np
import configuration
import polyvore_model_siamese as polyvore_model
FLAGS = tf.flags.FLAGS
tf.flags.DEFINE_string("checkpoint_path", "",
"Model checkpoint file or directory containing a "
"model checkpoint file.")
tf.flags.DEFINE_string("json_file", "data/label/test-no-dup.json",
"Json file containing the inference data.")
tf.flags.DEFINE_string("image_dir", "data/images",
"Directory containing images.")
tf.flags.DEFINE_string("feature_file",
"data/features/test_features_siamese.pkl",
"Directory to save the features")
def main(_):
if os.path.isfile(FLAGS.feature_file):
print("Feature file already exist.")
return
# Build the inference graph.
g = tf.Graph()
with g.as_default():
model_config = configuration.ModelConfig()
model = polyvore_model.PolyvoreModel(model_config, mode="inference")
model.build()
saver = tf.train.Saver()
g.finalize()
sess = tf.Session(graph=g)
saver.restore(sess, FLAGS.checkpoint_path)
test_json = json.load(open(FLAGS.json_file))
k = 0
# Save image ids and features in a dictionary.
test_features = dict()
for image_set in test_json:
set_id = image_set["set_id"]
image_feat = []
image_rnn_feat = []
ids = []
k = k + 1
print(str(k) + " : " + set_id)
for image in image_set["items"]:
filename = os.path.join(FLAGS.image_dir, set_id,
str(image["index"]) + ".jpg")
with tf.gfile.GFile(filename, "r") as f:
image_feed = f.read()
[feat] = sess.run([model.image_embeddings],
feed_dict={"image_feed:0": image_feed})
image_name = set_id + "_" + str(image["index"])
test_features[image_name] = dict()
test_features[image_name]["image_feat"] = np.squeeze(feat)
with open(FLAGS.feature_file, "wb") as f:
pkl.dump(test_features, f)
if __name__ == "__main__":
tf.app.run()
================================================
FILE: polyvore/run_inference_vse.py
================================================
# Copyright 2017 Xintong Han. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Run the inference of Siamese Network given input images."""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import os
import json
import tensorflow as tf
import pickle as pkl
import numpy as np
import configuration
import polyvore_model_vse as polyvore_model
FLAGS = tf.flags.FLAGS
tf.flags.DEFINE_string("checkpoint_path", "",
"Model checkpoint file or directory containing a "
"model checkpoint file.")
tf.flags.DEFINE_string("json_file", "data/label/test-no-dup.json",
"Json file containing the inference data.")
tf.flags.DEFINE_string("image_dir", "data/images",
"Directory containing images.")
tf.flags.DEFINE_string("feature_file",
"data/features/test_features_siamese.pkl",
"Directory to save the features")
def main(_):
if os.path.isfile(FLAGS.feature_file):
print("Feature file already exist.")
return
# Build the inference graph.
g = tf.Graph()
with g.as_default():
model_config = configuration.ModelConfig()
model = polyvore_model.PolyvoreModel(model_config, mode="inference")
model.build()
saver = tf.train.Saver()
g.finalize()
sess = tf.Session(graph=g)
saver.restore(sess, FLAGS.checkpoint_path)
test_json = json.load(open(FLAGS.json_file))
k = 0
# Save image ids and features in a dictionary.
test_features = dict()
for image_set in test_json:
set_id = image_set["set_id"]
image_feat = []
image_rnn_feat = []
ids = []
k = k + 1
print(str(k) + " : " + set_id)
for image in image_set["items"]:
filename = os.path.join(FLAGS.image_dir, set_id,
str(image["index"]) + ".jpg")
with tf.gfile.GFile(filename, "r") as f:
image_feed = f.read()
[feat] = sess.run([model.image_embeddings],
feed_dict={"image_feed:0": image_feed})
image_name = set_id + "_" + str(image["index"])
test_features[image_name] = dict()
test_features[image_name]["image_feat"] = np.squeeze(feat)
with open(FLAGS.feature_file, "wb") as f:
pkl.dump(test_features, f)
if __name__ == "__main__":
tf.app.run()
================================================
FILE: polyvore/set_generation.py
================================================
# Copyright 2017 Xintong Han. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
r"""Given multimodal queries, complete the outfit wiht bi-LSTM and VSE model."""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import json
import math
import os
import pickle as pkl
import tensorflow as tf
import numpy as np
import configuration
import polyvore_model_bi as polyvore_model
FLAGS = tf.flags.FLAGS
tf.flags.DEFINE_string("checkpoint_path", "",
"Model checkpoint file or directory containing a "
"model checkpoint file.")
tf.flags.DEFINE_string("image_dir", "", "Directory containing images.")
tf.flags.DEFINE_string("feature_file", "", "File which contains the features.")
tf.flags.DEFINE_string("word_dict_file", "", "File containing word list.")
tf.flags.DEFINE_string("query_file", "",
"A json file containing the query to generate outfit.")
tf.flags.DEFINE_string("result_dir", "results",
"Directory to save the results.")
tf.flags.DEFINE_float("balance_factor", 2.0,
"Trade off between image and text input."
"Larger balance_factor encourages higher correlation with text query")
def norm_row(a):
"""L2 normalize each row of a given set."""
try:
return a / np.linalg.norm(a, axis=1)[:, np.newaxis]
except:
return a / np.linalg.norm(a)
def rnn_one_step(sess, input_feed, lstm_state, direction='f'):
"""Run one step of the RNN."""
if direction == 'f':
# Forward
[lstm_state, lstm_output] = sess.run(
fetches=['lstm/f_state:0', 'f_logits/f_logits/BiasAdd:0'],
feed_dict={'lstm/f_input_feed:0': input_feed,
'lstm/f_state_feed:0': lstm_state})
else:
# Backward
[lstm_state, lstm_output] = sess.run(
fetches=['lstm/b_state:0', 'b_logits/b_logits/BiasAdd:0'],
feed_dict={'lstm/b_input_feed:0': input_feed,
'lstm/b_state_feed:0': lstm_state})
return lstm_state, lstm_output
def run_forward_rnn(sess, test_idx, test_feat, num_lstm_units):
""" Run forward RNN given a query."""
res_set = []
lstm_state = np.zeros([1, 2 * num_lstm_units])
for test_id in test_idx:
input_feed = np.reshape(test_feat[test_id], [1, -1])
# Run first step with all zeros initial state.
[lstm_state, lstm_output] = rnn_one_step(
sess, input_feed, lstm_state, direction='f')
# Maximum length of the outfit is set to 10.
for step in range(10):
curr_score = np.exp(np.dot(lstm_output, np.transpose(test_feat)))
curr_score /= np.sum(curr_score)
next_image = np.argsort(-curr_score)[0][0]
# 0.00001 is used as a probablity threshold to stop the generation.
# i.e, if the prob of end-of-set is larger than 0.00001, then stop.
if next_image == test_feat.shape[0] - 1 or curr_score[0][-1] > 0.00001:
# print('OVER')
break
else:
input_feed = np.reshape(test_feat[next_image], [1, -1])
[lstm_state, lstm_output] = rnn_one_step(
sess, input_feed, lstm_state, direction='f')
res_set.append(next_image)
return res_set
def run_backward_rnn(sess, test_idx, test_feat, num_lstm_units):
""" Run backward RNN given a query."""
res_set = []
lstm_state = np.zeros([1, 2 * num_lstm_units])
for test_id in reversed(test_idx):
input_feed = np.reshape(test_feat[test_id], [1, -1])
[lstm_state, lstm_output] = rnn_one_step(
sess, input_feed, lstm_state, direction='b')
for step in range(10):
curr_score = np.exp(np.dot(lstm_output, np.transpose(test_feat)))
curr_score /= np.sum(curr_score)
next_image = np.argsort(-curr_score)[0][0]
# 0.00001 is used as a probablity threshold to stop the generation.
# i.e, if the prob of end-of-set is larger than 0.00001, then stop.
if next_image == test_feat.shape[0] - 1 or curr_score[0][-1] > 0.00001:
# print('OVER')
break
else:
input_feed = np.reshape(test_feat[next_image], [1, -1])
[lstm_state, lstm_output] = rnn_one_step(
sess, input_feed, lstm_state, direction='b')
res_set.append(next_image)
return res_set
def run_fill_rnn(sess, start_id, end_id, num_blank, test_feat, num_lstm_units):
"""Fill in the blanks between start and end."""
if num_blank == 0:
return [start_id, end_id]
lstm_f_outputs = []
lstm_state = np.zeros([1, 2 * num_lstm_units])
input_feed = np.reshape(test_feat[start_id], [1, -1])
[lstm_state, lstm_output] = rnn_one_step(
sess, input_feed, lstm_state, direction='f')
f_outputs = []
for i in range(num_blank):
f_outputs.append(lstm_output[0])
curr_score = np.exp(np.dot(lstm_output, np.transpose(test_feat)))
curr_score /= np.sum(curr_score)
next_image = np.argsort(-curr_score)[0][0]
input_feed = np.reshape(test_feat[next_image], [1, -1])
[lstm_state, lstm_output] = rnn_one_step(
sess, input_feed, lstm_state, direction='f')
lstm_state = np.zeros([1, 2 * num_lstm_units])
input_feed = np.reshape(test_feat[end_id], [1, -1])
[lstm_state, lstm_output] = rnn_one_step(
sess, input_feed, lstm_state, direction='b')
b_outputs = []
for i in range(num_blank):
b_outputs.insert(0, lstm_output[0])
curr_score = np.exp(np.dot(lstm_output, np.transpose(test_feat)))
curr_score /= np.sum(curr_score)
next_image = np.argsort(-curr_score)[0][0]
input_feed = np.reshape(test_feat[next_image], [1, -1])
[lstm_state, lstm_output] = rnn_one_step(
sess, input_feed, lstm_state, direction='b')
outputs = np.asarray(f_outputs) + np.asarray(b_outputs)
score = np.exp(np.dot(outputs, np.transpose(test_feat)))
score /= np.sum(score, axis=1)[:, np.newaxis]
blank_ids = np.argmax(score, axis=1)
return [start_id] + list(blank_ids) + [end_id]
def run_set_inference(sess, set_name, test_ids, test_feat, num_lstm_units):
test_idx = []
for name in set_name:
try:
test_idx.append(test_ids.index(name))
except:
print('not found')
return
# dynamic search
# run the whole bi-LSTM on the first item
first_f_set = run_forward_rnn(sess, test_idx[:1], test_feat, num_lstm_units)
first_b_set = run_backward_rnn(sess, test_idx[:1], test_feat, num_lstm_units)
first_posi = len(first_b_set)
first_set = first_b_set + test_idx[:1] + first_f_set
image_set = []
for i in first_set:
image_set.append(test_ids[i])
# # Write results into folder.
# os.system('mkdir %s/%s' % (FLAGS.result_dir, 'first'))
# for i, image in enumerate(image_set):
# name = image.split('_')
# os.system('cp %s/%s/%s.jpg %s/%s/%d_%s.jpg' % (FLAGS.image_dir,
# name[0], name[1], FLAGS.result_dir, 'first', i, image))
if len(set_name) >= 2:
current_set = norm_row(test_feat[first_set, :])
all_position = [first_posi]
for test_id in test_idx[1:]:
# gradually adding items into it
# findng nn of the next item
insert_posi = np.argmax(
np.dot(norm_row(test_feat[test_id, :]), np.transpose(current_set)))
all_position.append(insert_posi)
# run bi LSTM to fill items between first item and this item
start_posi = np.min(all_position)
end_posi = np.max(all_position)
sets = run_fill_rnn(sess, test_idx[0], test_idx[1],
end_posi - start_posi - 1, test_feat, num_lstm_units)
else:
# run bi LSTM again
sets = test_idx
f_set = run_forward_rnn(sess, sets, test_feat, num_lstm_units)
b_set = run_backward_rnn(sess, sets, test_feat, num_lstm_units)
image_set = []
for i in b_set[::-1] + sets+f_set:
image_set.append(test_ids[i])
# os.system('mkdir %s/%s' % (FLAGS.result_dir, 'final'))
# for i, image in enumerate(image_set):
# name = image.split('_')
# os.system('cp %s/%s/%s.jpg %s/%s/%d_%s.jpg' % (FLAGS.image_dir,
# name[0], name[1], FLAGS.result_dir, 'final', i, image))
return b_set[::-1] + sets + f_set
def nn_search(i, test_emb, word_vec):
# score = np.dot(test_emb, np.transpose(test_emb[i] + word_vec))
score = np.dot(test_emb,
np.transpose(test_emb[i] + FLAGS.balance_factor * word_vec))
return np.argmax(score)
def main(_):
# Build the inference graph.
g = tf.Graph()
with g.as_default():
model_config = configuration.ModelConfig()
model = polyvore_model.PolyvoreModel(model_config, mode="inference")
model.build()
saver = tf.train.Saver()
g.finalize()
with tf.Session() as sess:
saver.restore(sess, FLAGS.checkpoint_path)
with open(FLAGS.feature_file, "rb") as f:
test_data = pkl.load(f)
test_ids = test_data.keys()
test_feat = np.zeros((len(test_ids) + 1,
len(test_data[test_ids[0]]["image_rnn_feat"])))
test_emb = np.zeros((len(test_ids),
len(test_data[test_ids[0]]["image_feat"])))
for i, test_id in enumerate(test_ids):
# Image feature in the RNN space.
test_feat[i] = test_data[test_id]["image_rnn_feat"]
# Image feature in the joint embedding space.
test_emb[i] = test_data[test_id]["image_feat"]
test_emb = norm_row(test_emb)
# load queries from JSON file
queries = json.load(open(FLAGS.query_file))
# Get the word embedding.
[word_emb] = sess.run([model.embedding_map])
# Read word name
words = open(FLAGS.word_dict_file).read().splitlines()
for i, w in enumerate(words):
words[i] = w.split()[0]
# Calculate the embedding of the word query
# only run the first query for demo
for q in queries[:1]:
set_name = q['image_query']
print(set_name)
# Run Bi-LSTM model using the image query.
rnn_sets = run_set_inference(sess, set_name, test_ids,
test_feat, model_config.num_lstm_units)
print(rnn_sets)
# Reranking the LSTM prediction with similarity with the text query
word_query = str(q['text_query'])
print(word_query)
if word_query != "":
# Get the indices of images.
test_idx = []
for name in set_name:
try:
test_idx.append(test_ids.index(name))
except:
print('not found')
return
# Calculate the word embedding
word_query = [i+1 for i in range(len(words))
if words[i] in word_query.split()]
print(word_query)
query_emb = norm_row(np.sum(word_emb[word_query], axis=0))
for i, j in enumerate(rnn_sets):
if j not in test_idx:
rnn_sets[i] = nn_search(j, test_emb, query_emb)
print(rnn_sets)
# write images
image_set = []
for i in rnn_sets:
image_set.append(test_ids[i])
# write results
# os.system('mkdir %s/%s' % (FLAGS.result_dir, 'emb_final'))
# for i, image in enumerate(image_set):
# name = image.split('_')
# os.system('cp %s/%s/%s.jpg %s/%s/%d_%s.jpg' % (FLAGS.image_dir,
# name[0], name[1], FLAGS.result_dir, 'emb_final', i, image))
for i, image in enumerate(image_set):
name = image.split('_')
os.system('cp %s/%s/%s.jpg %s/%d_%s.jpg' % (FLAGS.image_dir,
name[0], name[1], FLAGS.result_dir, i, image))
if __name__ == "__main__":
tf.app.run()
================================================
FILE: polyvore/train.py
================================================
# Copyright 2017 Xintong Han. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Train the model."""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import tensorflow as tf
import configuration
import polyvore_model_bi as polyvore_model
FLAGS = tf.app.flags.FLAGS
tf.flags.DEFINE_string("input_file_pattern", "",
"File pattern of sharded TFRecord input files.")
tf.flags.DEFINE_string("inception_checkpoint_file", "",
"Path to a pretrained inception_v3 model.")
tf.flags.DEFINE_string("train_dir", "",
"Directory for saving and loading model checkpoints.")
tf.flags.DEFINE_boolean("train_inception", False,
"Whether to train inception submodel variables.")
tf.flags.DEFINE_integer("number_of_steps", 1000000, "Number of training steps.")
tf.flags.DEFINE_integer("log_every_n_steps", 1,
"Frequency at which loss and global step are logged.")
tf.logging.set_verbosity(tf.logging.INFO)
def main(unused_argv):
assert FLAGS.input_file_pattern, "--input_file_pattern is required"
assert FLAGS.train_dir, "--train_dir is required"
model_config = configuration.ModelConfig()
model_config.input_file_pattern = FLAGS.input_file_pattern
model_config.inception_checkpoint_file = FLAGS.inception_checkpoint_file
training_config = configuration.TrainingConfig()
# Create training directory.
train_dir = FLAGS.train_dir
if not tf.gfile.IsDirectory(train_dir):
tf.logging.info("Creating training directory: %s", train_dir)
tf.gfile.MakeDirs(train_dir)
# Build the TensorFlow graph.
g = tf.Graph()
with g.as_default():
# Build the model.
model = polyvore_model.PolyvoreModel(
model_config, mode="train", train_inception=FLAGS.train_inception)
model.build()
learning_rate = tf.constant(training_config.initial_learning_rate)
learning_rate_decay_fn = None
if training_config.learning_rate_decay_factor > 0:
num_batches_per_epoch = (training_config.num_examples_per_epoch /
model_config.batch_size)
decay_steps = int(num_batches_per_epoch *
training_config.num_epochs_per_decay)
def _learning_rate_decay_fn(learning_rate, global_step):
return tf.train.exponential_decay(
learning_rate,
global_step,
decay_steps=decay_steps,
decay_rate=training_config.learning_rate_decay_factor,
staircase=True)
learning_rate_decay_fn = _learning_rate_decay_fn
# Set up the training ops.
train_op = tf.contrib.layers.optimize_loss(
loss=model.total_loss,
global_step=model.global_step,
learning_rate=learning_rate,
optimizer=training_config.optimizer,
clip_gradients=training_config.clip_gradients,
learning_rate_decay_fn=learning_rate_decay_fn)
# Set up the Saver for saving and restoring model checkpoints.
saver = tf.train.Saver(max_to_keep=training_config.max_checkpoints_to_keep)
# Run training.
tf.contrib.slim.learning.train(
train_op,
train_dir,
log_every_n_steps=FLAGS.log_every_n_steps,
graph=g,
global_step=model.global_step,
number_of_steps=FLAGS.number_of_steps,
init_fn=model.init_fn,
saver=saver)
if __name__ == "__main__":
tf.app.run()
================================================
FILE: polyvore/train_siamese.py
================================================
# Copyright 2017 Xintong Han. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Train the Siamese Network."""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import tensorflow as tf
import configuration
import polyvore_model_siamese as polyvore_model
FLAGS = tf.app.flags.FLAGS
tf.flags.DEFINE_string("input_file_pattern", "",
"File pattern of sharded TFRecord input files.")
tf.flags.DEFINE_string("inception_checkpoint_file", "",
"Path to a pretrained inception_v3 model.")
tf.flags.DEFINE_string("train_dir", "",
"Directory for saving and loading model checkpoints.")
tf.flags.DEFINE_boolean("train_inception", False,
"Whether to train inception submodel variables.")
tf.flags.DEFINE_integer("number_of_steps", 1000000, "Number of training steps.")
tf.flags.DEFINE_integer("log_every_n_steps", 1,
"Frequency at which loss and global step are logged.")
tf.flags.DEFINE_float("learning_rate", 0.2, "Initial learning rate.")
tf.flags.DEFINE_string("rnn_type", "",
"Types of rnn, lstm, gru or basic rnn.")
tf.logging.set_verbosity(tf.logging.INFO)
def main(unused_argv):
assert FLAGS.input_file_pattern, "--input_file_pattern is required"
assert FLAGS.train_dir, "--train_dir is required"
model_config = configuration.ModelConfig()
model_config.input_file_pattern = FLAGS.input_file_pattern
model_config.inception_checkpoint_file = FLAGS.inception_checkpoint_file
training_config = configuration.TrainingConfig()
# May use a different learning rate
training_config.initial_learning_rate = FLAGS.learning_rate
# Create training directory.
train_dir = FLAGS.train_dir
if not tf.gfile.IsDirectory(train_dir):
tf.logging.info("Creating training directory: %s", train_dir)
tf.gfile.MakeDirs(train_dir)
# Build the TensorFlow graph.
g = tf.Graph()
with g.as_default():
# Build the model.
model = polyvore_model.PolyvoreModel(
model_config, mode="train", train_inception=FLAGS.train_inception)
model.build()
# Set up the learning rate.
learning_rate = tf.constant(training_config.initial_learning_rate)
learning_rate_decay_fn = None
if training_config.learning_rate_decay_factor > 0:
num_batches_per_epoch = (training_config.num_examples_per_epoch /
model_config.batch_size)
decay_steps = int(num_batches_per_epoch *
training_config.num_epochs_per_decay)
def _learning_rate_decay_fn(learning_rate, global_step):
return tf.train.exponential_decay(
learning_rate,
global_step,
decay_steps=decay_steps,
decay_rate=training_config.learning_rate_decay_factor,
staircase=True)
learning_rate_decay_fn = _learning_rate_decay_fn
# Set up the training ops.
train_op = tf.contrib.layers.optimize_loss(
loss=model.total_loss,
global_step=model.global_step,
learning_rate=learning_rate,
optimizer=training_config.optimizer,
clip_gradients=training_config.clip_gradients,
learning_rate_decay_fn=learning_rate_decay_fn)
# Set up the Saver for saving and restoring model checkpoints.
saver = tf.train.Saver(max_to_keep=training_config.max_checkpoints_to_keep)
# saver = tf.train.Saver(keep_checkpoint_every_n_hours=0.1)
# Run training.
tf.contrib.slim.learning.train(
train_op,
train_dir,
log_every_n_steps=FLAGS.log_every_n_steps,
graph=g,
global_step=model.global_step,
number_of_steps=FLAGS.number_of_steps,
init_fn=model.init_fn,
saver=saver)
if __name__ == "__main__":
tf.app.run()
================================================
FILE: predict_compatibility.sh
================================================
#!/bin/bash
CHECKPOINT_DIR="model/model_final/model.ckpt-34865"
python polyvore/fashion_compatibility.py \
--checkpoint_path=${CHECKPOINT_DIR} \
--label_file="data/label/fashion_compatibility_prediction.txt" \
--feature_file="data/features/test_features.pkl" \
--rnn_type="lstm" \
--direction="2" \
--result_file="fashion_compatibility.pkl"
================================================
FILE: query.json
================================================
[
{
"image_query": [
"131138376_1",
"131138376_3"
],
"text_query": "blue"
}
]
================================================
FILE: results/README.md
================================================
The generated outfit goes here.
================================================
FILE: train.sh
================================================
#!/bin/bash
# Inception v3 checkpoint file.
INCEPTION_CHECKPOINT="model/inception_v3.ckpt"
# Directory to save the model.
MODEL_DIR="model/bi_lstm/"
# Run the training code.
python polyvore/train.py \
--input_file_pattern="data/tf_records/train-no-dup-?????-of-00128" \
--inception_checkpoint_file="${INCEPTION_CHECKPOINT}" \
--train_dir="${MODEL_DIR}/train" \
--train_inception=true \
--number_of_steps=100000
# # Training Siamese Network
# # Directory to save the model.
# MODEL_DIR="model/siamese/"
# # Run the training code.
# python polyvore/train_siamese.py \
# --input_file_pattern="data/tf_records/train-no-dup-?????-of-00128" \
# --inception_checkpoint_file="${INCEPTION_CHECKPOINT}" \
# --train_dir="${MODEL_DIR}/train" \
# --train_inception=true \
# --number_of_steps=100000