Repository: xthan/polyvore Branch: master Commit: dd9e6cc450a6 Files: 32 Total size: 196.8 KB Directory structure: gitextract_bx3trk6u/ ├── .gitignore ├── Dockerfile ├── LICENSE ├── README.md ├── data/ │ ├── build_polyvore_data.py │ ├── features/ │ │ └── README.md │ └── final_word_dict.txt ├── extract_feature.sh ├── fill_in_blank.sh ├── outfit_generation.sh ├── polyvore/ │ ├── configuration.py │ ├── fashion_compatibility.py │ ├── fill_in_blank.py │ ├── fill_in_blank_siamese.py │ ├── ops/ │ │ ├── __init__.py │ │ ├── image_embedding.py │ │ ├── image_embedding_test.py │ │ ├── image_processing.py │ │ └── inputs.py │ ├── polyvore_model_bi.py │ ├── polyvore_model_siamese.py │ ├── polyvore_model_vse.py │ ├── run_inference.py │ ├── run_inference_siamese.py │ ├── run_inference_vse.py │ ├── set_generation.py │ ├── train.py │ └── train_siamese.py ├── predict_compatibility.sh ├── query.json ├── results/ │ └── README.md └── train.sh ================================================ FILE CONTENTS ================================================ ================================================ FILE: .gitignore ================================================ # Compiled source # ################### *.com *.class *.dll *.exe *.o *.so *.pyc *.mat *.png *.jpg # Packages # ############ # it's better to unpack these files and commit the raw source # git has its own built in compression methods *.7z *.dmg *.gz *.iso *.jar *.rar *.tar *.zip *~ .gitlab .github data/label/* data/tf_records/* model/* # Logs and databases # ###################### *.log *.sql *.sqlite *.out # OS generated files # ###################### .DS_Store .DS_Store? ._* .Spotlight-V100 .Trashes ehthumbs.db Thumbs.db ================================================ FILE: Dockerfile ================================================ FROM nvidia/cuda:8.0-cudnn5-devel # Pick up some TF dependencies RUN apt-get update && apt-get install -y --no-install-recommends \ build-essential \ curl \ libfreetype6-dev \ libpng12-dev \ libzmq3-dev \ pkg-config \ python \ python-dev \ rsync \ software-properties-common \ unzip \ && \ apt-get clean && \ rm -rf /var/lib/apt/lists/* RUN curl -O https://bootstrap.pypa.io/get-pip.py && \ python get-pip.py && \ rm get-pip.py RUN pip --no-cache-dir install \ ipykernel \ jupyter \ matplotlib \ numpy \ scipy \ scikit-learn \ && \ python -m ipykernel.kernelspec ENV TENSORFLOW_VERSION 0.11.0 # --- DO NOT EDIT OR DELETE BETWEEN THE LINES --- # # These lines will be edited automatically by parameterized_docker_build.sh. # # COPY _PIP_FILE_ / # RUN pip --no-cache-dir install /_PIP_FILE_ # RUN rm -f /_PIP_FILE_ # Install TensorFlow GPU version. RUN pip --no-cache-dir install \ http://storage.googleapis.com/tensorflow/linux/gpu/tensorflow-${TENSORFLOW_VERSION}-cp27-none-linux_x86_64.whl # --- ~ DO NOT EDIT OR DELETE BETWEEN THE LINES --- # # TensorBoard EXPOSE 6006 # IPython EXPOSE 8888 WORKDIR "/root" CMD ["/bin/bash"] ================================================ FILE: LICENSE ================================================ Apache License Version 2.0, January 2004 http://www.apache.org/licenses/ TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 1. Definitions. "License" shall mean the terms and conditions for use, reproduction, and distribution as defined by Sections 1 through 9 of this document. "Licensor" shall mean the copyright owner or entity authorized by the copyright owner that is granting the License. "Legal Entity" shall mean the union of the acting entity and all other entities that control, are controlled by, or are under common control with that entity. For the purposes of this definition, "control" means (i) the power, direct or indirect, to cause the direction or management of such entity, whether by contract or otherwise, or (ii) ownership of fifty percent (50%) or more of the outstanding shares, or (iii) beneficial ownership of such entity. "You" (or "Your") shall mean an individual or Legal Entity exercising permissions granted by this License. "Source" form shall mean the preferred form for making modifications, including but not limited to software source code, documentation source, and configuration files. "Object" form shall mean any form resulting from mechanical transformation or translation of a Source form, including but not limited to compiled object code, generated documentation, and conversions to other media types. "Work" shall mean the work of authorship, whether in Source or Object form, made available under the License, as indicated by a copyright notice that is included in or attached to the work (an example is provided in the Appendix below). "Derivative Works" shall mean any work, whether in Source or Object form, that is based on (or derived from) the Work and for which the editorial revisions, annotations, elaborations, or other modifications represent, as a whole, an original work of authorship. For the purposes of this License, Derivative Works shall not include works that remain separable from, or merely link (or bind by name) to the interfaces of, the Work and Derivative Works thereof. "Contribution" shall mean any work of authorship, including the original version of the Work and any modifications or additions to that Work or Derivative Works thereof, that is intentionally submitted to Licensor for inclusion in the Work by the copyright owner or by an individual or Legal Entity authorized to submit on behalf of the copyright owner. For the purposes of this definition, "submitted" means any form of electronic, verbal, or written communication sent to the Licensor or its representatives, including but not limited to communication on electronic mailing lists, source code control systems, and issue tracking systems that are managed by, or on behalf of, the Licensor for the purpose of discussing and improving the Work, but excluding communication that is conspicuously marked or otherwise designated in writing by the copyright owner as "Not a Contribution." "Contributor" shall mean Licensor and any individual or Legal Entity on behalf of whom a Contribution has been received by Licensor and subsequently incorporated within the Work. 2. Grant of Copyright License. Subject to the terms and conditions of this License, each Contributor hereby grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable copyright license to reproduce, prepare Derivative Works of, publicly display, publicly perform, sublicense, and distribute the Work and such Derivative Works in Source or Object form. 3. Grant of Patent License. Subject to the terms and conditions of this License, each Contributor hereby grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable (except as stated in this section) patent license to make, have made, use, offer to sell, sell, import, and otherwise transfer the Work, where such license applies only to those patent claims licensable by such Contributor that are necessarily infringed by their Contribution(s) alone or by combination of their Contribution(s) with the Work to which such Contribution(s) was submitted. If You institute patent litigation against any entity (including a cross-claim or counterclaim in a lawsuit) alleging that the Work or a Contribution incorporated within the Work constitutes direct or contributory patent infringement, then any patent licenses granted to You under this License for that Work shall terminate as of the date such litigation is filed. 4. Redistribution. You may reproduce and distribute copies of the Work or Derivative Works thereof in any medium, with or without modifications, and in Source or Object form, provided that You meet the following conditions: (a) You must give any other recipients of the Work or Derivative Works a copy of this License; and (b) You must cause any modified files to carry prominent notices stating that You changed the files; and (c) You must retain, in the Source form of any Derivative Works that You distribute, all copyright, patent, trademark, and attribution notices from the Source form of the Work, excluding those notices that do not pertain to any part of the Derivative Works; and (d) If the Work includes a "NOTICE" text file as part of its distribution, then any Derivative Works that You distribute must include a readable copy of the attribution notices contained within such NOTICE file, excluding those notices that do not pertain to any part of the Derivative Works, in at least one of the following places: within a NOTICE text file distributed as part of the Derivative Works; within the Source form or documentation, if provided along with the Derivative Works; or, within a display generated by the Derivative Works, if and wherever such third-party notices normally appear. The contents of the NOTICE file are for informational purposes only and do not modify the License. You may add Your own attribution notices within Derivative Works that You distribute, alongside or as an addendum to the NOTICE text from the Work, provided that such additional attribution notices cannot be construed as modifying the License. You may add Your own copyright statement to Your modifications and may provide additional or different license terms and conditions for use, reproduction, or distribution of Your modifications, or for any such Derivative Works as a whole, provided Your use, reproduction, and distribution of the Work otherwise complies with the conditions stated in this License. 5. Submission of Contributions. Unless You explicitly state otherwise, any Contribution intentionally submitted for inclusion in the Work by You to the Licensor shall be under the terms and conditions of this License, without any additional terms or conditions. Notwithstanding the above, nothing herein shall supersede or modify the terms of any separate license agreement you may have executed with Licensor regarding such Contributions. 6. Trademarks. This License does not grant permission to use the trade names, trademarks, service marks, or product names of the Licensor, except as required for reasonable and customary use in describing the origin of the Work and reproducing the content of the NOTICE file. 7. Disclaimer of Warranty. Unless required by applicable law or agreed to in writing, Licensor provides the Work (and each Contributor provides its Contributions) on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied, including, without limitation, any warranties or conditions of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A PARTICULAR PURPOSE. You are solely responsible for determining the appropriateness of using or redistributing the Work and assume any risks associated with Your exercise of permissions under this License. 8. Limitation of Liability. In no event and under no legal theory, whether in tort (including negligence), contract, or otherwise, unless required by applicable law (such as deliberate and grossly negligent acts) or agreed to in writing, shall any Contributor be liable to You for damages, including any direct, indirect, special, incidental, or consequential damages of any character arising as a result of this License or out of the use or inability to use the Work (including but not limited to damages for loss of goodwill, work stoppage, computer failure or malfunction, or any and all other commercial damages or losses), even if such Contributor has been advised of the possibility of such damages. 9. Accepting Warranty or Additional Liability. While redistributing the Work or Derivative Works thereof, You may choose to offer, and charge a fee for, acceptance of support, warranty, indemnity, or other liability obligations and/or rights consistent with this License. However, in accepting such obligations, You may act only on Your own behalf and on Your sole responsibility, not on behalf of any other Contributor, and only if You agree to indemnify, defend, and hold each Contributor harmless for any liability incurred by, or claims asserted against, such Contributor by reason of your accepting any such warranty or additional liability. END OF TERMS AND CONDITIONS APPENDIX: How to apply the Apache License to your work. To apply the Apache License to your work, attach the following boilerplate notice, with the fields enclosed by brackets "{}" replaced with your own identifying information. (Don't include the brackets!) The text should be enclosed in the appropriate comment syntax for the file format. We also recommend that a file or class name and description of purpose be included on the same "printed page" as the copyright notice for easier identification within third-party archives. Copyright 2017 Xintong Han Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. ================================================ FILE: README.md ================================================ ## Bi-LSTM model for learning fashion compatibility. Code for ACM MM'17 paper "Learning Fashion Compatibility with Bidirectional LSTMs" [[paper]](https://arxiv.org/pdf/1707.05691.pdf). Parts of the code are from an older version of Tensorflow's im2txt repo [GitHub](https://github.com/tensorflow/models/blob/master/research/im2txt). The corresponding dataset can be found on [GitHub](https://github.com/xthan/polyvore-dataset) or [Google Drive](https://drive.google.com/drive/folders/0B4Eo9mft9jwoVDNEWlhEbUNUSE0?resourcekey=0-vQg9TMSLKnmPCuuWwl5Ebw&usp=sharing). ### Contact Author: Xintong Han Contact: xintong@umd.edu ### Polyvore.com [Polyvore.com](https://www.polyvore.com/outfits/search.sets?date=day&item_count.from=4&item_count.to=10) is a popular fashion website, where user can create and upload outfit data. Here is an [exmaple](https://www.polyvore.com/striped_blazer/set?id=227166819). ### Required Packages * **TensorFlow** ~~0.10.0~~ 0.11 ([instructions](https://www.tensorflow.org/install/)) * **NumPy** ([instructions](http://www.scipy.org/install.html)) * **scikit-learn** I actually used some version between r0.10 to r0.11 as the first commit of Tensorflow's im2txt, you might need to install r0.11 and modify some functions to run the code. Newer versions of Tensorflow prevent me from doing inference with my old code and restoring my models trained using this version. However, I have a commit that supports training using TensorFlow 1.0 or greater [idd1e03e](https://github.com/xthan/polyvore/tree/dd1e03e27fab12ef0051dd2a8ba7a61caaded499). I will create a new repo supporting TensorFlow version >= 1.0. #### Recommended Setup * [**docker-ce**](https://docs.docker.com/install/linux/docker-ce/ubuntu/) * [**nvidia-docker**](https://github.com/NVIDIA/nvidia-docker) * bulid TensorFlow image excute the below command at this repository root: ```sh docker build -t tensorflow:0.11 . ``` * run container ```sh docker run -it \ --runtime=nvidia \ -p 8888:8888 \ -p 6006:6006 \ -v $CURRENT:/root/workdir \ tensorflow:0.11 ``` ### Prepare the Training Data Download the dataset and put it in the ./data folder: 0. Decompress polyvore.tar.gz into ./data/label/ 1. Decompress plyvore-images.tar.gz to ./data/, so all outfit image folders are in ./data/images/ 2. Run the following commands to generate TFRecords in ./data/tf_records/: ``` python data/build_polyvore_data.py ``` ### Download the Inception v3 Checkpoint This model requires a pretrained *Inception v3* checkpoint file to initialize the network. This checkpoint file is provided by the [TensorFlow-Slim image classification library](https://github.com/tensorflow/models/tree/master/research/slim#tensorflow-slim-image-classification-library) which provides a suite of pre-trained image classification models. You can read more about the models provided by the library [here](https://github.com/tensorflow/models/tree/master/research/slim#pre-trained-models). Run the following commands to download the *Inception v3* checkpoint. ```shell # Save the Inception v3 checkpoint in model folder. wget "http://download.tensorflow.org/models/inception_v3_2016_08_28.tar.gz" tar -xvf "inception_v3_2016_08_28.tar.gz" -C ${INCEPTION_DIR} rm "inception_v3_2016_08_28.tar.gz" ``` ### Training ```shell ./train.sh ``` The models will be saved in model/bi_lstm ### Inference #### Trained model Download the trained models from the final_model folder on [Google Drive](https://drive.google.com/drive/folders/0B4Eo9mft9jwoVDNEWlhEbUNUSE0) and put it in ./model/final_model/model.ckpt-34865. #### Extract features of test data To do all three kinds of tasks mentioned in the paper. We need to first extract the features of test images: ``` ./extract_features.sh ``` And the image features will be in data/features/test_features.pkl. You can also perform end-to-end inference by modifying the corresponding code. For example, input a sequence of images and output a compatibility score. #### Fashion fill-in-the-blank ``` ./fill_in_blank.sh ``` Note that we further optimized some design choices in the released model. It can achieve 73.5% accuracy, which is higher than the number reported in our paper. #### Compatibility prediction ``` ./predict_compatibility.sh ``` Different from the training process where the loss is calculated in each mini batch, during testing, we get the loss againist the whole test set. This is pretty slow, maybe a better method could be used (e.g., using distance between LSTM predicted representation and the target image embedding). #### Outfit generation ``` ./outfit_generation.sh ``` It generates an outfit given the image/text query in query.json, and saves the results in the results dir. For demo purposes, the query.json only contains one example: where green boxes indicate the image query, and the text query is "blue". #### Some notes We found that a late fusion of different single models (Bi-LSTM w/o VSE + VSE + Siamese) can achieve superior results on all tasks. These models are also available in the same folder on [Google Drive](https://drive.google.com/drive/folders/0B4Eo9mft9jwoVDNEWlhEbUNUSE0). ### Todo list - [x] Add multiple choice inference code. - [x] Add compatibility prediction inference code. - [x] Add image outfit generation code. Very similar to compatibility prediction, you can try to do it yourself if in a hurry. - [x] Release trained models. - [x] Release Siamese/VSE models. - [ ] Polish the code. ### Citation If this code or the Polyvore dataset helps your research, please cite our paper: @inproceedings{han2017learning, author = {Han, Xintong and Wu, Zuxuan and Jiang, Yu-Gang and Davis, Larry S}, title = {Learning Fashion Compatibility with Bidirectional LSTMs}, booktitle = {ACM Multimedia}, year = {2017}, } ================================================ FILE: data/build_polyvore_data.py ================================================ # Copyright 2017 Xintong Han. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== """Prepare Polyvore outfit data.""" from __future__ import absolute_import from __future__ import division from __future__ import print_function from datetime import datetime import json import os import random import sys import threading import numpy as np import tensorflow as tf tf.app.flags.DEFINE_string('train_label', 'data/label/train_no_dup.json', 'Training label file') tf.app.flags.DEFINE_string('test_label', 'data/label/test_no_dup.json', 'Testing label file') tf.app.flags.DEFINE_string('valid_label','data/label/valid_no_dup.json', 'Validation label file') tf.app.flags.DEFINE_string('output_directory', 'data/tf_records/', 'Output data directory') tf.app.flags.DEFINE_string('image_dir', 'data/images/', 'Directory of image patches') tf.app.flags.DEFINE_string('word_dict_file', 'data/final_word_dict.txt', 'File containing the word dictionary.') tf.app.flags.DEFINE_integer('train_shards', 128, 'Number of shards in training TFRecord files.') tf.app.flags.DEFINE_integer('test_shards', 16, 'Number of shards in test TFRecord files.') tf.app.flags.DEFINE_integer('valid_shards', 8, 'Number of shards in validation TFRecord files.') tf.app.flags.DEFINE_integer('num_threads', 8, 'Number of threads to preprocess the images.') FLAGS = tf.flags.FLAGS class Vocabulary(object): """Simple vocabulary wrapper.""" def __init__(self, vocab, unk_id): """Initializes the vocabulary. Args: vocab: A dictionary of word to word_id. unk_id: Id of the special 'unknown' word. """ self._vocab = vocab self._unk_id = unk_id def word_to_id(self, word): """Returns the integer id of a word string.""" if word in self._vocab: return self._vocab[word] else: print('unknow: ' + word) return self._unk_id def _is_png(filename): """Determine if a file contains a PNG format image. Args: filename: string, path of the image file. Returns: boolean indicating if the image is a PNG. """ return '.png' in filename def _int64_feature(value): """Wrapper for inserting int64 features into Example proto.""" if not isinstance(value, list): value = [value] return tf.train.Feature(int64_list=tf.train.Int64List(value=value)) def _float_feature(value): """Wrapper for inserting float features into Example proto.""" if not isinstance(value, list): value = [value] return tf.train.Feature(float_list=tf.train.FloatList(value=value)) def _bytes_feature(value): """Wrapper for inserting bytes features into Example proto.""" return tf.train.Feature(bytes_list=tf.train.BytesList(value=[str(value)])) def _int64_feature_list(values): """Wrapper for inserting an int64 FeatureList into a SequenceExample proto.""" return tf.train.FeatureList(feature=[_int64_feature(v) for v in values]) def _int64_list_feature_list(values): """Wrapper for inserting an int64 list FeatureList into a SequenceExample proto.""" return tf.train.FeatureList(feature=[_int64_feature(v) for v in values]) def _bytes_feature_list(values): """Wrapper for inserting a bytes FeatureList into a SequenceExample proto.""" return tf.train.FeatureList(feature=[_bytes_feature(v) for v in values]) def _float_feature_list(values): """Wrapper for inserting a float FeatureList into a SequenceExample proto.""" return tf.train.FeatureList(feature=[_float_feature(v) for v in values]) def _to_sequence_example(set_info, decoder, vocab): """Builds a SequenceExample proto for an outfit. """ set_id = set_info['set_id'] image_data = [] image_ids = [] caption_data = [] caption_ids = [] for image_info in set_info['items']: filename = os.path.join(FLAGS.image_dir, set_id, str(image_info['index']) + '.jpg') with open(filename, "r") as f: encoded_image = f.read() try: decoded_image = decoder.decode_jpeg(encoded_image) except (tf.errors.InvalidArgumentError, AssertionError): print("Skipping file with invalid JPEG data: %s" % filename) return image_data.append(encoded_image) image_ids.append(image_info['index']) caption = image_info['name'].encode('utf-8') caption_data.append(caption) caption_id = [vocab.word_to_id(word) + 1 for word in caption.split()] caption_ids.append(caption_id) feature = {} # Only keep 8 images, if outfit has less than 8 items, repeat the last one. for index in range(8): if index >= len(image_data): feature['images/' + str(index)] = _bytes_feature(image_data[-1]) else: feature['images/' + str(index)] = _bytes_feature(image_data[index]) feature["set_id"] = _bytes_feature(set_id) feature["set_url"] = _bytes_feature(set_info['set_url']) # Likes and Views are not used in our model, but we put it into TFRecords. feature["likes"] = _int64_feature(set_info['likes']) feature["views"] = _int64_feature(set_info['views']) context = tf.train.Features(feature=feature) feature_lists = tf.train.FeatureLists(feature_list={ "caption": _bytes_feature_list(caption_data), "caption_ids": _int64_list_feature_list(caption_ids), "image_index": _int64_feature_list(image_ids) }) sequence_example = tf.train.SequenceExample( context=context, feature_lists=feature_lists) return sequence_example class ImageCoder(object): """Helper class that provides TensorFlow image coding utilities.""" def __init__(self): # Create a single Session to run all image coding calls. self._sess = tf.Session() # Initializes function that converts PNG to JPEG data. self._png_data = tf.placeholder(dtype=tf.string) image = tf.image.decode_png(self._png_data, channels=3) self._png_to_jpeg = tf.image.encode_jpeg(image, format='rgb', quality=100) # Initializes function that decodes RGB JPEG data. self._decode_jpeg_data = tf.placeholder(dtype=tf.string) self._decode_jpeg = tf.image.decode_jpeg( self._decode_jpeg_data, channels=3) def png_to_jpeg(self, image_data): return self._sess.run(self._png_to_jpeg, feed_dict={self._png_data: image_data}) def decode_jpeg(self, image_data): image = self._sess.run(self._decode_jpeg, feed_dict={self._decode_jpeg_data: image_data}) assert len(image.shape) == 3 assert image.shape[2] == 3 return image def _process_image_files_batch(coder, thread_index, ranges, name, all_sets, vocab, num_shards): """Processes and saves list of images as TFRecord in 1 thread. """ # Each thread produces N shards where N = int(num_shards / num_threads). # For instance, if num_shards = 128, and the num_threads = 2, then the first # thread would produce shards [0, 64). num_threads = len(ranges) assert not num_shards % num_threads num_shards_per_batch = int(num_shards / num_threads) shard_ranges = np.linspace(ranges[thread_index][0], ranges[thread_index][1], num_shards_per_batch + 1).astype(int) num_files_in_thread = ranges[thread_index][1] - ranges[thread_index][0] counter = 0 for s in xrange(num_shards_per_batch): # Generate a sharded version of the file name, e.g. 'train-00002-of-00010' shard = thread_index * num_shards_per_batch + s output_filename = '%s-%.5d-of-%.5d' % (name, shard, num_shards) output_file = os.path.join(FLAGS.output_directory, output_filename) writer = tf.python_io.TFRecordWriter(output_file) shard_counter = 0 files_in_shard = np.arange(shard_ranges[s], shard_ranges[s + 1], dtype=int) for i in files_in_shard: sequence_example = _to_sequence_example(all_sets[i], coder, vocab) if not sequence_example: print('fail for set: ' + all_sets[i]['set_id']) continue writer.write(sequence_example.SerializeToString()) shard_counter += 1 counter += 1 if not counter % 100: print('%s [thread %d]: Processed %d of %d images in thread batch.' % (datetime.now(), thread_index, counter, num_files_in_thread)) sys.stdout.flush() writer.close() print('%s [thread %d]: Wrote %d images to %s' % (datetime.now(), thread_index, shard_counter, output_file)) sys.stdout.flush() shard_counter = 0 print('%s [thread %d]: Wrote %d images to %d shards.' % (datetime.now(), thread_index, counter, num_files_in_thread)) sys.stdout.flush() def _process_image_files(name, all_sets, vocab, num_shards): """Process and save list of images as TFRecord of Example protos. """ # Break all images into batches with a [ranges[i][0], ranges[i][1]]. spacing = np.linspace(0, len(all_sets), FLAGS.num_threads + 1).astype(np.int) ranges = [] for i in xrange(len(spacing) - 1): ranges.append([spacing[i], spacing[i+1]]) # Launch a thread for each batch. print('Launching %d threads for spacings: %s' % (FLAGS.num_threads, ranges)) sys.stdout.flush() # Create a mechanism for monitoring when all threads are finished. coord = tf.train.Coordinator() # Create a generic TensorFlow-based utility for converting all image codings. coder = ImageCoder() threads = [] for thread_index in xrange(len(ranges)): args = (coder, thread_index, ranges, name, all_sets, vocab, num_shards) t = threading.Thread(target=_process_image_files_batch, args=args) t.start() threads.append(t) # Wait for all the threads to terminate. coord.join(threads) print('%s: Finished writing all %d fashion sets in data set.' % (datetime.now(), len(all_sets))) sys.stdout.flush() def _create_vocab(filename): """Creates the vocabulary of word to word_id. """ # Create the vocabulary dictionary. word_counts = open(filename).read().splitlines() reverse_vocab = [x.split()[0] for x in word_counts] unk_id = len(reverse_vocab) vocab_dict = dict([(x, y) for (y, x) in enumerate(reverse_vocab)]) vocab = Vocabulary(vocab_dict, unk_id) return vocab def _find_image_files(labels_file, name): """Build a list of all images files and labels in the data set. """ # Read image ids all_sets = json.load(open(labels_file)) # Shuffle the ordering of all image files in order to guarantee # random ordering of the images with respect to label in the # saved TFRecord files. Make the randomization repeatable. shuffled_index = range(len(all_sets)) random.seed(12345) random.shuffle(shuffled_index) all_sets = [all_sets[i] for i in shuffled_index] print('Found %d fashion sets.' % (len(all_sets))) return all_sets def _process_dataset(name, label_file, vocab, num_shards): """Process a complete data set and save it as a TFRecord. Args: name: string, unique identifier specifying the data set. directory: string, root path to the data set. num_shards: integer number of shards for this data set. labels_file: string, path to the labels file. """ print(label_file) all_sets = _find_image_files(label_file, name) _process_image_files(name, all_sets, vocab, num_shards) def main(unused_argv): assert not FLAGS.train_shards % FLAGS.num_threads, ( 'Please make the FLAGS.num_threads commensurate with FLAGS.train_shards') assert not FLAGS.test_shards % FLAGS.num_threads, ( 'Please make the FLAGS.num_threads commensurate with ' 'FLAGS.test_shards') assert not FLAGS.valid_shards % FLAGS.num_threads, ( 'Please make the FLAGS.num_threads commensurate with ' 'FLAGS.valid_shards') print('Saving results to %s' % FLAGS.output_directory) vocab = _create_vocab(FLAGS.word_dict_file) # Run it! _process_dataset('valid-no-dup', FLAGS.valid_label, vocab, FLAGS.valid_shards) _process_dataset('test-no-dup', FLAGS.test_label, vocab, FLAGS.test_shards) _process_dataset('train-no-dup', FLAGS.train_label, vocab, FLAGS.train_shards) if __name__ == '__main__': tf.app.run() ================================================ FILE: data/features/README.md ================================================ Extracted image features go here. ================================================ FILE: data/final_word_dict.txt ================================================ black 9909 leather 8516 bag 6350 women's 5810 top 4504 jeans 4133 dress 4100 gold 4031 white 3837 earrings 3619 iphone 3613 sunglasses 3382 necklace 3381 skirt 3254 boots 3142 suede 3004 jacket 2922 case 2871 denim 2763 ring 2703 mini 2622 yoins 2563 high 2535 blue 2533 clutch 2497 plus 2465 bracelet 2418 skinny 2164 coat 2127 shoulder 2125 sandals 2122 long 2112 set 2106 women 2106 lace 2069 red 2014 new 1996 print 1986 pink 1961 sleeve 1954 ankle 1949 silver 1894 pre-owned 1877 lipstick 1861 shorts 1850 topshop 1818 sweater 1788 size 1749 faux 1711 vintage 1699 shoes 1693 rose 1689 pumps 1651 de 1642 michael 1634 crop 1634 color 1598 eye 1554 watch 1541 shirt 1538 round 1527 backpack 1526 diamond 1506 brown 1499 tote 1477 velvet 1474 floral 1454 neck 1382 lip 1372 saint 1356 laurent 1328 cropped 1299 stud 1282 wool 1278 blouse 1277 sheinside 1264 shein 1260 t-shirt 1209 chanel 1199 small 1191 gucci 1187 crossbody 1185 fashion 1184 kate 1170 short 1156 strap 1156 star 1154 classic 1154 sneakers 1153 womens 1151 heel 1137 cover 1101 toe 1075 kors 1075 hat 1066 nail 1061 grey 1051 chain 1044 platform 1043 boho 1040 alexander 1040 pants 1035 flower 1033 pearl 1018 hair 1009 dolce 1007 crystal 996 metal 995 metallic 994 cotton 988 silk 983 design 979 green 974 love 969 marc 969 valentino 951 tank 951 ripped 935 york 927 striped 921 collection 919 yellow 918 converse 914 gabbana 912 printed 911 embellished 910 mascara 900 heart 896 knit 894 double 885 spade 876 waist 875 fur 867 h&m 862 choker 846 large 841 bow 830 eau 828 medium 826 light 819 pu 818 flat 811 lace-up 811 boohoo 811 matte 807 jewelry 805 embroidered 801 heels 791 style 790 tee 788 pendant 772 patent 769 taylor 766 miu 762 wrap 761 casual 759 zip 756 beauty 747 slim 738 collar 737 charlotte 736 distressed 733 nars 725 satchel 724 christian 722 scarf 720 blazer 719 givenchy 717 sleeveless 709 sandal 707 drop 702 jean 699 makeup 699 frame 696 island 696 cuff 696 front 696 river 691 boot 683 pencil 682 bobbi 664 forever 663 jacobs 660 liquid 647 cream 646 la 644 back 641 look 641 tassel 640 shadow 633 stripe 632 cashmere 629 pleated 629 boyfriend 626 miss 625 louboutin 624 trousers 624 dior 620 oversized 619 zipper 614 moto 614 sterling 612 satin 607 sweatshirt 607 eyeshadow 605 nude 604 palette 604 jumper 604 cross 602 chuck 601 stella 599 le 594 pump 592 button 589 cat 588 biker 587 burberry 586 one 583 rossi 583 london 573 chunky 572 fringe 567 stretch 566 dark 564 plaid 562 powder 560 solid 557 cut 555 belt 553 parfum 551 midi 548 wang 547 gianvito 547 canvas 547 cardigan 546 pocket 544 adidas 542 handbag 536 hem 533 retro 532 beanie 531 tie 531 ladies 529 men's 528 body 528 oz 521 mcqueen 518 studded 516 wide 516 box 515 14k 515 loose 515 fit 513 gold-tone 512 bangle 507 polish 506 vans 505 trainers 504 mccartney 504 block 503 mac 499 low 498 nails 496 stone 494 selfridge 490 navy 489 nike 485 detail 482 summer 481 booties 479 wallet 476 pointed 475 flats 474 glitter 473 super 472 mango 467 gloss 466 quilted 466 blush 463 chloé 460 square 457 buckle 454 ray-ban 448 open 447 x 447 pack 444 bags 439 petite 438 ribbed 438 leggings 437 leg 436 colour 436 flap 435 beach 434 soft 434 jimmy 429 skater 428 chiffon 428 cami 427 wash 423 stiletto 422 hot 421 rouge 420 v 417 steel 414 turtleneck 414 choo 410 clear 408 natural 405 rag 404 bone 403 orange 402 rise 402 oz. 400 pattern 400 russe 399 preowned 396 rings 395 bucket 393 waisted 392 mid 392 zara 391 eyeliner 386 crepe 384 rhinestone 384 brush 384 mesh 383 beige 383 cosmetics 379 knitted 377 bomber 377 giuseppe 375 clothing 374 charm 370 zanotti 370 drawstring 369 wedge 368 tory 368 pure 368 olivia 366 moschino 366 multi 365 glasses 364 accessories 362 band 361 burch 360 couture 359 acne 359 chic 358 maison 358 18k 358 vest 358 layered 356 jersey 355 logo 355 knee 354 trim 350 statement 349 golden 348 balmain 348 paris 346 phone 346 beaded 345 lapel 343 acetate 342 strappy 341 aviator 340 stainless 339 cap 337 sneaker 337 spray 336 steve 336 maxi 335 crochet 333 madden 333 fedora 331 shoe 331 sporty 329 side 329 triangle 327 earring 325 pom 324 edition 321 fringed 321 lauren 320 rebecca 318 fendi 318 wedding 315 eyes 315 evening 314 victoria 314 textured 314 studs 314 liner 313 circle 313 foundation 312 girl 312 rockstud 311 monki 311 sheer 310 unisex 310 face 309 party 307 elastic 307 bootie 307 v-neck 306 waterproof 305 pullover 305 sleeves 304 handbags 303 prada 303 alice 302 dot 302 designer 301 hooded 299 limited 299 moon 298 burgundy 297 hoop 297 studios 295 contrast 294 j.crew 292 pockets 291 authentic 291 purple 290 plated 289 feather 288 sexy 288 straw 286 lens 284 straight 283 bra 281 bling 281 candy 281 stylish 280 brand 280 men 279 ear 278 preppy 278 wool-blend 276 leaf 276 prom 275 dorothy 274 day 274 marni 273 sole 273 hoodie 271 quartz 271 handle 271 perkins 271 pin 271 check 271 secret 270 margiela 268 purse 266 art 265 asos 264 outerwear 263 flared 262 woven 261 balenciaga 260 oscar 258 big 257 full 256 clip 256 balm 256 originals 255 gray 254 hand 253 swarovski 253 envelope 253 lash 252 gel 252 lim 252 goop 252 messenger 251 leopard 251 geometric 249 smith 248 christmas 247 daisy 246 coral 246 pro 244 trench 244 tom 244 khaki 243 a-line 243 sequin 243 phillip 243 heeled 242 yves 240 store 240 isabel 240 sun 238 minkoff 238 cutout 237 gift 237 camel 237 rock 237 j 237 ml 236 row 235 lacquer 235 klein 233 travel 232 hollow 232 formal 232 renta 232 urban 231 belted 231 jane 230 air 229 topic 228 tan 228 tone 227 chicnova 227 mirror 227 peep 225 two 225 line 224 combat 224 single 224 amazon.com 224 monogram 223 cable 223 guess 223 pant 222 bodycon 222 ford 221 chicwish 221 marant 220 coffee 219 ruffle 219 dr. 218 proenza 218 schouler 217 colors 217 leather-look 217 loafers 216 slip 216 mirrored 216 notebook 215 chloe 215 beckham 215 flowers 215 school 214 hi 213 6s 213 calf 213 accessorize 213 winter 212 cute 212 headband 212 blend 211 baker 211 skull 211 plain 211 armani 210 basic 208 pastel 207 sweet 207 mid-rise 207 jacquard 207 dial 207 court 206 dsquared2 206 chelsea 205 mint 205 halter 205 online 205 crew 204 embroidery 204 embossed 203 martens 203 apple 203 toilette 202 butterfly 202 baseball 202 patch 201 gown 201 von 201 free 201 arrow 200 flare 200 victoria's 199 asymmetric 198 olympia 197 ombre 197 glass 197 lips 196 breasted 195 jet 195 lanvin 195 superstar 195 saffiano 194 linda 194 cotton-blend 193 socks 192 rubber 192 american 191 graphic 191 ralph 191 floppy 190 volume 190 spring 190 key 189 letter 189 cape 189 felt 189 pave 187 bar 187 artificial 186 peach 186 polka 185 calvin 185 ruffled 185 boutique 184 galaxy 184 luxe 184 skin 184 panel 184 cat-eye 184 simple 184 nyx 184 bralet 183 ox 183 kit 183 punk 183 paul 183 length 181 finish 181 street 181 james 181 perfect 180 snake 180 dresses 180 fall 180 tights 180 patchwork 180 aquazzura 179 vince 179 pouch 179 studio 178 elizabeth 177 ultra 177 modern 176 m 176 bead 176 frayed 175 6/6s 175 onyx 175 shine 175 joseph 175 ball 174 lime 174 table 174 original 173 elegant 173 maybelline 172 versace 172 city 172 saddle 172 west 171 moda 171 round-frame 171 ted 171 diane 171 crown 170 infinity 170 max 170 life 169 ballet 168 aeropostale 168 home 167 braided 167 brim 167 butter 167 farrow 167 intense 167 washed 166 bright 166 bikini 166 tall 166 shop 166 grunge 165 australia 165 effect 165 cocktail 165 noir 164 oversize 163 tattoo 162 gold-plated 162 extreme 162 ivory 162 swing 161 tulle 160 50ml 160 true 160 mixed 160 diamonds 160 inspired 159 ice 159 house 159 water 159 exclusive 159 premium 158 glow 158 wine 157 turquoise 157 bracelets 157 bold 157 shimmer 157 neon 156 lily 156 vegan 156 half 155 girls 154 tweed 154 pieces 154 trio 154 mens 153 pointy 153 headphones 153 paige 152 rivet 152 gladiator 151 signature 151 le3no 151 shell 150 bib 150 antigona 149 pretty 149 closure 149 make 149 wear 149 peplum 149 linen 149 amazon 148 enamel 148 garden 148 end 148 wood 148 lock 147 textured-leather 147 duo 147 made 147 plastic 147 lady 146 trendy 146 genuine 146 furstenberg 145 co. 145 gloves 145 pen 145 kimono 145 old 145 3/4 145 split 144 spike 144 sizes 143 sapphire 143 lipsy 143 optical 143 choies 143 rip 143 real 142 patent-leather 142 msgm 142 90s 142 stretchy 141 kenneth 141 roll 140 bell 140 silver-tone 140 wayfarer 140 best 140 ariana 140 marble 140 lashes 140 mary 140 grande 140 military 139 mom 139 fine 139 edge 139 long-wear 138 karl 138 crime 138 polo 138 transparent 138 ugg 137 tilbury 137 coco 137 jewellery 137 oval 137 goth 137 wall 137 baby 136 jamie 136 destroyed 136 shape 135 magnetic 134 lambskin 134 garavani 134 sport 134 bamboo 134 ct. 134 resin 134 john 133 sea 133 david 133 cz 133 spf 133 spaghetti 133 warehouse 133 jennifer 133 bohemian 132 edgy 132 stila 131 compact 131 camera 131 tribal 131 ruby 131 little 131 5s 131 professional 131 french 131 calfskin 131 engagement 130 bronze 130 handmade 130 inch 130 fake 130 acrylic 130 hipster 129 palm 129 w/ 129 shearling 129 nylon 129 paper 128 high-top 128 bridal 128 lucluc 128 glam 128 bear 128 queen 128 wild 128 stars 127 wide-leg 127 current/elliott 127 triple 127 betsey 127 johnson 127 deep 127 night 126 off-the-shoulder 126 designs 126 laura 126 high-rise 126 tunic 126 berry 126 power 126 lane 125 false 125 perfume 125 hard 125 lagerfeld 125 stick 125 point 124 essie 124 multicolor 124 lenses 124 banana 124 ea 123 hermes 123 apricot 123 boy 122 pale 122 slit 122 decay 122 luxury 121 aldo 121 madewell 121 see 120 antique 120 nine 120 tree 120 kylie 120 society 120 army 120 scoop 119 cut-out 119 lo 119 cord 119 slouchy 119 oasis 119 nly 119 oxford 119 knot 119 bottle 118 pinterest 118 rib 118 casadei 117 fox 117 grace 117 kiss 117 adjustable 117 stripes 117 chair 116 gradient 116 3d 116 plant 116 vera 115 layer 115 louis 115 strapless 115 tumblr 115 kim 115 tartan 115 clinique 114 official 114 lands 114 bendel 114 roberto 114 ferragamo 114 cartier 114 lauder 114 legging 114 maurices 113 tops 113 gavriel 113 cold 113 coin 113 cool 113 rainbow 112 smooth 112 fresh 112 alexis 112 wildfox 112 two-tone 112 l 111 blossom 111 salvatore 111 teardrop 111 vero 111 olive 111 puma 111 lamp 111 tibi 111 stand 111 work 111 sparkle 111 three 111 music 110 freshwater 110 decor 110 bouquet 110 card 110 brooch 110 cosmetic 110 smashbox 110 mara 110 shopper 109 kenzo 109 sophie 109 women’s 109 lancome 109 henri 109 karen 109 pull 108 ballerina 108 seconds 108 hollister 108 5/5s 108 vase 108 velvetine 108 fragrance 107 bustier 107 mansur 107 micro 107 flip 107 t-strap 107 camuto 107 pandora 107 head 107 around 107 flannel 107 chevron 106 zirconia 106 carven 106 boxy 106 parker 106 nudes 106 gemstone 106 mineral 106 cherry 105 tube 105 les 105 audacious 105 leather-trimmed 105 high-waisted 105 fitted 105 cluster 105 ceramic 104 sand 104 scott 104 twill 104 festival 104 cross-body 104 cuffed 104 bird 104 python 104 cc 104 drew 104 inc 104 cavalli 104 silk-blend 104 brass 104 sophia 104 lined 104 pillow 103 allure 103 wig 103 jessica 103 martin 103 gypsy 102 n 102 jeggings 102 t.w. 102 merino 102 bleach 102 stuart 102 midnight 102 book 102 bralette 101 dangle 101 perforated 101 joni 101 chocolate 101 warm 101 mix 101 time 100 jay 99 faye 99 people 99 twist 99 cubic 99 checked 99 throw 99 asymmetrical 99 smokey 99 fabric 98 brogues 98 emilio 98 beautiful 98 united 98 skate 98 draped 98 piece 98 accent 98 avenue 97 animal 97 céline 97 carat 97 reversible 97 bardot 97 sale 97 ribbon 97 sky 97 royal 96 loafer 96 slip-on 96 hippie 96 stack 96 club 96 low-rise 96 cheap 95 brow 95 floral-print 95 celine 95 deluxe 95 vuitton 95 shades 94 happy 94 cole 94 necklaces 94 ii 94 paint 94 aztec 94 athletic 94 thong 94 mankind 94 apparel 94 drape 94 raw 94 shawl 93 100mm 93 mulberry 93 weitzman 93 kendall 93 mcq 93 dream 93 shift 93 jeffrey 93 rolled 93 chine 92 stay 92 5sos 92 beads 92 sports 92 bcbgmaxazria 92 campbell 92 eyewear 92 cashmere-blend 92 fossil 92 gem 92 xl 92 giorgio 92 pierre 92 fly 92 mark 92 abercrombie 92 eyeglasses 92 watches 91 espadrille 91 tiffany 91 fitch 91 silicone 91 kendra 91 cult 91 guerlain 91 pop 91 pucci 91 zimmermann 91 5c 90 direction 90 acid 90 philosophy 90 extra 90 fleece 90 suedette 90 thick 90 mono 90 ideas 90 cheek 90 jour 90 sans 89 terry 89 holiday 89 webster 89 link 89 pins 89 witchery 89 allurez 89 pyramid 89 essential 89 cushion 89 oliver 89 vogue 89 thigh 88 lightweight 88 roses 88 woolen 88 gorgeous 88 mother 88 sweat 88 turtle 88 jil 87 snapback 87 chronograph 87 autumn 87 sander 87 halo 87 opi 87 brushes 87 jewel 87 us 87 site 87 shiny 87 topaz 87 park 87 tapered 87 iconic 87 custom 87 uniqlo 87 dkny 87 faced 87 souci 86 lana 86 faceted 86 mm 86 holder 86 magic 86 billabong 86 crystal-embellished 86 lord 86 snakeskin 86 tommy 86 hilfiger 86 over-the-knee 85 outdoor 85 culottes 85 rug 85 gothic 85 hole 85 wire 85 tiny 85 caviar 85 target 85 modcloth 85 champagne 85 iro 84 cup 84 peep-toe 84 rental 84 bandeau 84 vernis 84 piercing 84 helmut 84 pineapple 84 keds 84 pleat 84 ribkoff 84 naked 84 clip-on 84 vanessa 84 padded 83 nile 83 bittar 83 ag 83 agate 83 mohair 83 vinyl 83 hardy 83 timberland 83 runway 83 bella 83 coach 83 tattoos 83 anne 83 caged 82 burton 82 trend 82 tailored 82 core 82 painted 82 convertible 82 crystals 82 forever21 82 slippers 82 bradley 82 tropical 81 alex 81 tassels 81 republic 81 lucy 81 funny 81 estee 81 heritage 81 hobo 81 hydrating 81 hairstyles 81 mink 81 eugenia 81 bottega 81 sac 81 ponte 80 lang 80 teal 80 alloy 80 insert 80 tea 80 ink 80 parka 80 sugar 80 poppy 80 veneta 80 backless 79 perry 79 leaves 79 nappa 79 des 79 edp 79 playsuit 79 thin 79 120mm 79 stacking 79 straight-leg 79 loeffler 78 patterned 78 colorful 78 vivienne 78 mock 78 nina 78 black/white 78 nose 78 kelly 78 trouser 78 photo 78 note 78 randall 78 gap 78 candle 77 levi's 77 selma 77 trends 77 sicily 77 mask 77 scallop 77 buttons 77 goldtone 77 longline 77 tshirt 77 cotton-jersey 77 chandelier 77 honey 77 jumpsuit 77 shirts 76 details 76 slim-fit 76 100ml 76 get 76 angel 76 pur 76 waterfall 76 bodysuit 76 westwood 76 anna 76 stitch 75 across 75 organic 75 unique 75 mercier 75 go 75 disney 75 straps 75 emerald 75 batwing 75 hudson 75 irregular 75 rihanna 75 religion 75 bowknot 75 clean 75 glamorous 75 berricle 75 bath 74 elephant 74 capri 74 co 74 highlighter 74 gg 74 filigree 74 jaeger 74 monsoon 74 camo 74 contour 74 zizzi 74 deborah 74 hayden 74 monochrome 74 nearly 74 muscle 74 peoples 74 monday 74 market 74 st. 74 dip 74 molly 74 30ml 73 like 73 mug 73 m·a·c 73 sequined 73 ruched 73 doll 73 lasting 73 theory 73 owl 73 arrangement 73 sam 73 sleeved 73 buttoned 73 slingback 73 fashionable 72 bubble 72 pressed 72 splatter 72 corduroy 72 shaped 72 ivy 72 disc 72 good 72 dye 72 stackable 71 furla 71 bb 71 ca 71 bennett 71 horn 71 barbara 71 oil 71 macbook 71 ipad 71 elie 71 blonde 70 houndstooth 70 beats 70 pcs 70 anya 70 tuxedo 70 juicy 70 sergio 70 foldover 70 crescent 70 regular 70 low-top 70 goddess 70 heather 70 plunge 70 amber 70 lulu 70 nubuck 70 panama 70 sequins 69 g 69 scuba 69 greek 69 run 69 slouch 69 10k 69 snow 69 easy 69 lucky 69 stones 69 princess 69 snap 69 lilly 69 bangles 69 diorshow 69 company 68 concealer 68 Étoile 68 lotion 68 turn 68 cultured 68 swimsuit 68 donna 68 lux 68 brushed 68 pearls 68 pusheen 68 post 68 falabella 68 ny 68 duster 68 stitching 68 melissa 68 strand 68 eos 68 cell 68 18-karat 68 pilot 67 laser 67 succulent 67 illesteva 67 pavé 67 quay 67 wolf 67 mickey 67 office 67 mist 67 roksanda 67 roshe 66 assorted 66 hammered 66 shredded 66 amethyst 66 wedges 66 sonia 66 scalloped 66 b 66 lippmann 66 yurman 66 clubmaster 66 mermaid 66 d'orsay 66 duffle 66 italian 66 bridesmaid 66 teen 66 etro 66 five 66 raglan 66 harlow 66 south 66 hearts 66 ballerinas 66 paisley 66 opal 65 away 65 rare 65 crocodile 65 soap 65 touch 65 platinum 65 instant 65 cargo 65 rope 65 copper 65 keychain 65 deco 65 nyc 65 di 65 fancy 65 pier 64 instagram 64 bandana 64 geo 64 luggage 64 spliced 64 flag 64 ct 64 knitwear 64 k.i.s.s.i.n.g 64 temporary 64 hood 64 monster 64 reading 64 peace 64 steampunk 64 rimmel 64 jeanne 64 addict 64 snowflake 64 simpson 64 coast 64 boss 64 pot 64 saab 64 rich 63 singlet 63 diamante 63 kisses 63 rachel 63 smart 63 fold 63 citizens 63 hindmarch 63 w 63 wallpaper 63 cm 63 rebel 63 digital 63 humanity 63 redvalentino 63 bandage 63 motorcycle 63 tennis 63 leopard-print 63 clips 63 laptop 63 factory 63 opening 62 frames 62 union 61 wave 61 ashley 61 stan 61 basket 61 yeezy 61 blackfive 61 missoni 61 western 61 double-breasted 61 harry 61 a.l.c. 61 peter 61 solitaire 61 vila 61 antonio 61 floor 61 collarless 61 bronzer 61 running 61 rain 61 mason 60 plate 60 emma 60 zipped 60 edie 60 charcoal 60 toms 60 polished 60 lee 60 oxfords 60 pair 60 beret 60 garnet 60 typography 60 arden 60 d 60 knotted 60 hardware 60 4s 60 rolex 60 states 60 indie 60 abstract 60 michel 60 nicholas 60 aqua 60 rick 59 frill 59 no. 59 quote 59 versatile 59 cases 59 goose 59 et 59 coconut 59 blahnik 59 four 59 manolo 59 jumbo 59 mustard 59 rolling 59 decorative 59 cambridge 59 topman 59 wooden 58 faded 58 letters 58 puffer 58 m&co 58 samsung 58 mm6 58 hidden 58 walker 58 blanket 58 tortoise 58 l.k. 58 lemon 58 edelman 58 delpozo 58 semi 58 jack 58 passport 58 dome 58 boat 58 shopping 58 chains 58 pencils 58 sensational 57 rectangle 57 kurt 57 eagle 57 luminous 57 ankle-strap 57 applique 57 halloween 57 pebbled 57 birkin 57 uk 57 specs 57 plum 57 virgin 57 relaxed 57 espadrilles 57 quotes 57 plus/6/5/5s/5c 57 season 57 zoe 56 4/4s 56 skool 56 derek 56 clarins 56 andrew 56 precision 56 lipgloss 56 rochas 56 i'm 56 vacation 56 base 56 anchor 56 primer 56 poncho 56 usa 56 k 56 flatform 56 polarized 56 fluffy 56 rosie 56 soho 56 edt 56 diesel 56 bleached 56 celebrity 56 native 56 wristlet 55 clock 55 cuffs 55 scrunchie 55 marie 55 date 55 leo 55 geiger 55 eyelashes 55 series 55 quad 55 deer 55 forest 55 cartoon 55 faux-leather 55 lolita 55 minaudiere 55 sofa 55 ethnic 55 14kt 54 chino 54 zippers 54 chambray 54 gigi 54 taupe 54 jackets 54 express 54 millen 54 camisole 54 space 54 doublju 54 charles 54 varsity 54 corset 54 owens 54 text 54 schutz 54 levis 54 splicing 54 cage 54 kane 54 rips 54 need 54 muse 54 sk8-hi 54 rocket 54 organza 53 crewneck 53 amy 53 birger 53 finger 53 delicate 53 beverly 53 hills 53 flash 53 dune 53 mcm 53 vermeil 53 bun 53 dots 53 jade 53 neoprene 53 monica 53 belle 53 peony 53 weave 53 fluid 53 flowy 53 crisscross 53 marmont 53 calypso 53 imports 53 rabbit 53 locket 52 rykiel 52 costume 52 maria 52 colorblock 52 search 52 legendary 52 infinite 52 bunny 52 teaspoon 52 adult 52 vita 52 21+ 52 dionysus 52 curly 52 perla 52 flora 52 dahlia 52 pvc 52 violet 52 photos 51 brunello 51 bailey 51 robinson 51 curl 51 wing 51 neo 51 sydney 51 carved 51 jordan 51 grained 51 rivets 51 supply 51 hats 51 mouret 51 baublebar 51 xs 51 dre 51 point-toe 51 cucinelli 51 padlock 51 black/gold 51 bobby 51 fleur 51 woman 51 orchid 51 poplin 51 roland 51 lizzie 51 diana 51 lewis 50 equipment 50 label 50 tiered 50 moonstone 50 moisturizing 50 stretch-jersey 50 slim-leg 50 smoky 50 audrey 50 raffia 50 p 50 natasha 50 sunset 50 rhodium 50 rupert 50 ysl 50 ilia 50 leigh 50 cara 50 mouse 50 rosa 50 junior 50 van 50 ann 50 sweetheart 50 ippolita 50 intarsia 50 illamasqua 50 lorac 50 gilet 49 amazing 49 jules 49 international 49 harrods 49 sanderson 49 always 49 planter 49 longwear 49 whistles 49 fallon 49 malene 49 friendship 49 structured 49 38mm 49 ever 49 twisted 49 backpacks 49 cotton-poplin 49 synthetic 49 fan 49 giambattista 48 kensington 48 barrel 48 pulitzer 48 petal 48 unicorn 48 metro 48 first 48 braid 48 funnel 48 lavender 48 girly 48 plus/7/6 48 lipcolor 48 croc 48 bui 48 pointed-toe 48 dual 48 indigo 48 l'absolu 48 fishnet 48 ounce 48 vetements 48 graham 48 effy 48 gifts 48 rhea 48 ally 48 operandi 48 tips 48 norman 48 marilyn 48 curved 48 sleek 48 valli 48 tinted 48 tarte 48 various 48 18ct 47 aspinal 47 margot 47 lisa 47 wrist 47 trimmed 47 spiral 47 maroon 47 atelier 47 bo 47 opaque 47 products 47 sunday 47 holland 47 brocade 47 cozy 47 shower 47 hulme 47 suit 47 matthew 47 leisure 47 loop 47 minimal 47 brooks 47 wireless 47 chinese 47 plants 47 dance 47 messy 47 translucent 47 kirkwood 47 force 47 cactus 47 creamy 47 radiant 47 branch 47 waistcoat 46 world 46 sarah 46 humble 46 almond 46 tint 46 80s 46 kevyn 46 rustic 46 blade 46 scarves 46 1/2 46 bed 46 bee 46 georgia 46 fun 46 rx 46 vertical 46 eva 46 refill 46 goldschmied 46 thing 46 grid 46 low-tops 46 tortoiseshell 46 temple 46 scotch 46 bronzing 46 aucoin 46 hi-top 46 williamson 46 adriano 46 tag 46 sephora 46 comb 46 lola 46 watercolor 46 manon 45 jar 45 aeo 45 ancient 45 colours 45 icon 45 engraved 45 pajama 45 facial 45 potter 45 a5 45 comme 45 romance 45 faith 45 buckled 45 dreamcatcher 45 pigalle 45 iris 45 money 45 boys 45 peekaboo 45 sailor 45 clasp 45 christopher 45 elle 45 rucksack 45 silk-satin 45 detachable 45 essentials 45 rim 45 chan 44 totes 44 wrapped 44 simons 44 rb3025 44 performance 44 mule 44 monogramme 44 bruno 44 alien 44 cocoon 44 press 44 harris 44 eddie 44 cloud 44 ricci 44 narrow 44 larger 44 buy 44 baroque 44 curve 44 frye 44 tight 44 shaping 44 classics 44 off-shoulder 44 verdugo 44 matt 44 tutorial 44 rhinestones 44 swag 44 hermès 44 o 44 tod's 44 glossy 44 great 44 90's 44 desk 44 radiance 44 lam 44 crossover 44 arm 44 dusty 43 sock 43 slimming 43 ears 43 ella 43 ultimate 43 hour 43 justin 43 soda 43 towel 43 hunter 43 miller 43 clutches 43 dr 43 spitfire 43 nerd 43 electric 43 paolo 43 evan 43 kitty 43 perspex 43 charms 43 latest 43 sign 43 nautical 43 care 43 bvlgari 43 crème 43 camouflage 43 jonathan 43 friends 43 disco 43 crocheted 43 lion 43 lamb 43 tiger 43 pony 43 baptiste 43 elyse 43 lights 43 lapis 43 moi 43 trapeze 43 toast 43 lacoste 43 styles 43 cutoff 43 velvetines 42 nature 42 web 42 varnish 42 bottoms 42 seven 42 ocean 42 mules 42 alexa 42 curling 42 criss 42 anastasia 42 palazzo 42 lizard 42 personalized 42 strawberry 42 baked 42 brick 42 continental 42 basics 42 simulated 42 classy 42 amazon.co.uk 42 los 42 55mm 42 gym 42 canada 42 l.a. 42 wonderland 42 printing 42 phase 42 zac 42 single-breasted 42 better 42 stacked 42 lovers 42 giant 42 arizona 42 jelly 42 closed 42 keyhole 42 . 41 two-piece 41 18kt 41 pochette 41 moisture 41 seam 41 comfort 41 valentine 41 ceremony 41 clic 41 velour 41 twin 41 baguette 41 fruit 41 colored 41 high-waist 41 skort 41 y 41 paradise 41 champion 41 milly 41 cedar 41 neutral 41 barth 41 bareminerals 41 belly 41 leonard 41 embellishment 41 ctw 41 iron 41 penny 41 briefcase 41 bad 41 plush 41 sunscreen 41 hip 41 duffel 40 blair 40 vibrant 40 erickson 40 thierry 40 fuchsia 40 appliqué 40 envy 40 broken 40 dolce&gabbana 40 cameo 40 coats 40 lengthening 40 openwork 40 brothers 40 michelle 40 glittered 40 cloth 40 ipod 40 pompom 40 brooklyn 40 snapmade.com 40 barneys 40 quality 40 lucite 40 sling 40 italy 40 cabochon 40 supreme 40 notes 40 shoedazzle 40 romper 39 tech 39 dyed 39 bonded 39 alexandre 39 product 39 simone 39 femme 39 flops 39 magazine 39 eyelet 39 katrantzou 39 fl 39 agent 39 victorian 39 gunmetal 39 decoration 39 seamed 39 miranda 39 polka-dot 39 available 39 pizza 39 amanda 39 wings 39 lorenzo 39 part 39 manicure 39 yang 39 dylan 39 gorjana 39 timeless 39 brian 39 raey 39 15ml 39 barely 39 dog 39 pour 39 sieraden 39 edited 39 overall 39 105mm 39 cady 39 appliquéd 39 movado 39 10mm 39 chiara 38 vivier 38 paneled 38 faux-fur 38 daniel 38 grand 38 charming 38 sofia 38 room 38 lovely 38 r13 38 gowns 38 splendid 38 knuckle 38 king 38 bezel 38 nixon 38 gauze 38 peacock 38 carolina 38 angeles 38 lizzy 38 marco 38 buckles 38 eyebrow 38 neckline 38 pom-pom 38 j.w.anderson 38 sylvie 38 kitten 38 knee-high 38 nano 38 stem 38 katy 38 wellington 38 romantic 38 polyvore 38 50s 38 make-up 38 mademoiselle 38 rails 38 wharf 38 shaggy 38 pan 38 gazelle 38 36mm 38 tulip 37 spectrum 37 bottom 37 cobalt 37 passion 37 year 37 starbucks 37 iantorno 37 narciso 37 eyelash 37 live 37 3x1 37 stretch-cotton 37 8mm 37 off-white 37 se 37 /jean 37 eve 37 luna 37 wars 37 show 37 folding 37 steven 37 silvertone 37 mia 37 fabulous 37 shadows 37 temperley 37 anouk 37 roman 37 racerback 37 lilac 37 laundry 37 dainty 37 hinge 37 frost 37 jeweled 37 40mm 37 supra 37 view 37 hanging 36 one-shoulder 36 graffiti 36 self 36 alpaca 36 cognac 36 ps1 36 slide 36 turn-down 36 u 36 borgo 36 nicole 36 tower 36 lighting 36 joy 36 skirts 36 keyring 36 flock 36 du 36 emoji 36 plexi 36 ash 36 henna 36 mod 36 edit 36 harper 36 100% 36 cleansing 36 creme 36 lotus 36 georgette 36 inches 36 mid-length 36 kat 36 ready 36 demi 36 safari 36 moss 36 bing 36 mother-of-pearl 36 shade 36 hamilton 36 skyline 36 roger 36 complete 36 pierced 36 jogger 36 mytheresa.com 36 bieber 36 laque 36 daily 36 milk 36 homme 36 everyday 36 simmons 36 silk-chiffon 36 smythson 36 tab 36 peacoat 35 era 35 judith 35 bouclé 35 sheath 35 match 35 pamela 35 spiked 35 holographic 35 m&s 35 joe 35 babies 35 carvela 35 silky 35 sharon 35 rotita 35 monarch 35 fling 35 en 35 chico's 35 nordstrom 35 marciano 35 string 35 girlfriend 35 bloom 35 twenty 35 eight 35 ava 35 s4 35 bite 35 zippered 35 brit 35 gentle 35 citrine 35 barrette 35 heavy 35 step 35 coated 35 journal 35 dry 35 mat 35 simply 35 extensions 35 emporio 35 wardrobe 35 illuminating 35 violeta 35 plein 35 brogue 35 tools 35 sticker 35 bare 35 balance 35 accessory 35 irene 35 styling 35 cable-knit 35 linea 35 looks 35 wide-brim 35 don't 35 pigment 34 minnie 34 shahida 34 pumpkin 34 special 34 jegging 34 resistant 34 weekend 34 mineralize 34 thicken 34 selected 34 coachella 34 fern 34 color-block 34 dannijo 34 fair 34 future 34 rodriguez 34 qupid 34 nero 34 robert 34 alice+olivia 34 underwear 34 luke 34 follies 34 meyer 34 heat 34 ani 34 dining 34 parides 34 young 34 betty 34 24k 34 voyage 34 loewe 34 jungle 34 automatic 34 robyn 34 rita 34 h 34 puff 34 baume 34 bowler 34 holly 34 japanese 34 jennings 34 hermès 34 jansport 34 patches 34 raf 34 kill 34 lancôme 34 grain 34 kohl 33 incase 33 books 33 gallery 33 lost 33 foil 33 cosmic 33 zero 33 broderie 33 brief 33 tabitha 33 cube 33 cuba 33 take 33 sheet 33 sheepskin 33 1/4 33 luu 33 precious 33 briefs 33 thomas 33 pots 33 hourglass 33 bees 33 zebra 33 use 33 nirvana 33 allen 33 wink 33 catcher 33 man 33 splash 33 cynthia 33 miniskirt 33 cyber 33 magnolia 33 louise 33 things 33 neuwirth 33 tied 33 lattice 33 sparkling 33 bebe 33 slogan 33 ribbed-knit 33 skagen 33 bay 33 amelie 33 shark 33 atwood 33 tip 33 superdry 33 barn 33 vanilla 33 slub 33 master 32 panelled 32 bianca 32 babe 32 wet 32 vampire 32 broad 32 nwt 32 morning 32 spikes 32 illuminator 32 bridge 32 larsson 32 zodiac 32 wreath 32 alaïa 32 novica 32 slipper 32 play 32 fire 32 hello 32 hoops 32 juniors 32 leiber 32 lingerie 32 block-heel 32 rio 32 clover 32 kingdom 32 blank 32 pr 32 martini 32 brilliant 32 footwear 32 veau 32 neiman 32 brightening 32 1980s 32 willow 32 chantecaille 32 credit 32 etched 32 stylo 32 marcus 32 right 32 fenty 32 edward 32 anita 32 self-tie 32 maya 32 brows 32 aerin 32 umbrella 32 lurex 32 birman 32 chest 32 road 32 viva 32 birthday 32 inspirational 32 yoga 31 analog 31 derby 31 illusion 31 stretch-crepe 31 falke 31 headpiece 31 wavy 31 high-low 31 north 31 marchesa 31 watermelon 31 batman 31 ferragni 31 stilettos 31 barbour 31 liberty 31 spirit 31 motif 31 jasmine 31 ae 31 iridescent 31 results 31 hope 31 wool-crepe 31 medusa 31 sandy 31 yoni 31 stocking 31 lasry 31 romy 31 national 31 mr 31 eiffel 31 datejust 31 icing 31 awesome 31 75ml 31 terre 31 re/done 31 glitz 31 pear 31 way 31 chestnut 31 papier 31 erin 31 t-bar 31 viparo 31 ray 31 lara 31 morganite 31 rocha 31 haider 31 poison 31 lariat 31 push 31 l'oreal 31 business 31 utility 31 smoke 31 jones 31 high-heel 31 track 31 marl 31 please 31 desert 31 lattori 31 bean 31 a.p.c. 31 bidermann 31 velours 31 aluminum 31 thread 31 premiere 31 crossbar 30 menswear 30 connection 30 flawless 30 e 30 sigma 30 high-heeled 30 patrick 30 polyester 30 collections 30 gold/black 30 stretch-knit 30 ace 30 let 30 1990s 30 famous 30 bordeaux 30 cicihot 30 petits 30 monroe 30 dakota 30 lookbook 30 model 30 1970s 30 stain 30 checkered 30 polly 30 mandala 30 racer 30 crosby 30 veil 30 posh 30 message 30 skeleton 30 cowl 30 lazy 30 valentines 30 sutton 30 college 30 bermuda 30 reiss 30 flex 30 east 30 tees 30 diorific 30 karan 30 word 30 carpet 30 fujifilm 30 cut-off 30 arms 30 sparkly 30 mila 30 ================================================ FILE: extract_feature.sh ================================================ #!/bin/bash CHECKPOINT_DIR="model/model_final/model.ckpt-34865" python polyvore/run_inference.py \ --checkpoint_path=${CHECKPOINT_DIR} \ --json_file="data/label/test_no_dup.json" \ --image_dir="data/images/" \ --feature_file="data/features/test_features.pkl" \ --rnn_type="lstm" # # Extract features of Bi-LSTM without VSE # CHECKPOINT_DIR="model/model_final/model_bi_no_emb.ckpt" # python polyvore/run_inference.py \ # --checkpoint_path=${CHECKPOINT_DIR} \ # --json_file="data/label/test_no_dup.json" \ # --image_dir="data/images/" \ # --feature_file="data/features/test_features_bi_no_emb.pkl" \ # --rnn_type="lstm" # # Extract features of VSE model without LSTM # CHECKPOINT_DIR="model/model_final/model_emb.ckpt" # python polyvore/run_inference_vse.py \ # --checkpoint_path=${CHECKPOINT_DIR} \ # --json_file="data/label/test_no_dup.json" \ # --image_dir="data/images/" \ # --feature_file="data/features/test_features_emb.pkl" \ # # Extract features of Siamese Network # CHECKPOINT_DIR="model/model_final/model_siamese.ckpt" # python polyvore/run_inference_siamese.py \ # --checkpoint_path=${CHECKPOINT_DIR} \ # --json_file="data/label/test_no_dup.json" \ # --image_dir="data/images/" \ # --feature_file="data/features/test_features_siamese.pkl" ================================================ FILE: fill_in_blank.sh ================================================ #!/bin/bash CHECKPOINT_DIR="model/model_final/model.ckpt-34865" python polyvore/fill_in_blank.py \ --checkpoint_path=${CHECKPOINT_DIR} \ --json_file="data/label/fill_in_blank_test.json" \ --feature_file="data/features/test_features.pkl" \ --rnn_type="lstm" \ --direction="2" \ --result_file="fill_in_blank_result.pkl" # # Fill in the blank Siamese Network # CHECKPOINT_DIR="model/model_final/model_siamese.ckpt" # python polyvore/fill_in_blank_siamese.py \ # --checkpoint_path=${CHECKPOINT_DIR} \ # --json_file="data/label/fill_in_blank_test.json" \ # --feature_file="data/features/test_features_siamese.pkl" \ # --result_file="fill_in_blank_siamese_result.pkl" ================================================ FILE: outfit_generation.sh ================================================ #!/bin/bash CHECKPOINT_DIR="model/model_final/model.ckpt-34865" # Run inference on images. python polyvore/set_generation.py \ --checkpoint_path=${CHECKPOINT_DIR} \ --image_dir="data/images/test_no_dup/" \ --feature_file="data/features/test_features.pkl" \ --query_file="query.json" \ --word_dict_file="data/final_word_dict.txt" \ --result_dir="results/" ================================================ FILE: polyvore/configuration.py ================================================ # Copyright 2017 Xintong Han. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== """Bi-LSTM Polyvore model and training configurations.""" from __future__ import absolute_import from __future__ import division from __future__ import print_function class ModelConfig(object): """Wrapper class for model hyperparameters.""" def __init__(self): """Sets the default model hyperparameters.""" # File pattern of sharded TFRecord file containing SequenceExample protos. # Must be provided in training and evaluation modes. self.input_file_pattern = None # Image format ("jpeg" or "png"). self.image_format = "jpeg" # Approximate number of values per input shard. Used to ensure sufficient # mixing between shards in training. self.values_per_input_shard = 135 # Minimum number of shards to keep in the input queue. self.input_queue_capacity_factor = 2 # Number of threads for prefetching SequenceExample protos. self.num_input_reader_threads = 1 # Name of the SequenceExample context feature containing set ids. self.set_id_name = "set_id" # Name of the SequenceExample feature list containing captions and images. self.image_feature_name = "images" self.image_index_name = "image_index" self.caption_feature_name = "caption_ids" # Number of unique words in the vocab (plus 1, for ). # The default value is larger than the expected actual vocab size to allow # for differences between tokenizer versions used in preprocessing. There is # no harm in using a value greater than the actual vocab size, but using a # value less than the actual vocab size will result in an error. self.vocab_size = 2757 # Number of threads for image preprocessing. self.num_preprocess_threads = 1 # Batch size. self.batch_size = 10 # File containing an Inception v3 checkpoint to initialize the variables # of the Inception model. Must be provided when starting training for the # first time. self.inception_checkpoint_file = None # Dimensions of Inception v3 input images. self.image_height = 299 self.image_width = 299 # Scale used to initialize model variables. self.initializer_scale = 0.08 # LSTM input and output dimensionality, respectively. embedding_size is also # the embedding size in the visual-semantic joint space. self.embedding_size = 512 self.num_lstm_units = 512 # If < 1.0, the dropout keep probability applied to LSTM variables. self.lstm_dropout_keep_prob = 0.7 # Largest number of images in a fashion set. self.number_set_images = 8 # Margin for the embedding loss. self.emb_margin = 0.2 # Balance factor of all losses. self.emb_loss_factor = 1.0 # VSE loss self.f_rnn_loss_factor = 1.0 # Forward LSTM self.b_rnn_loss_factor = 1.0 # Backward LSTM, might give it a lower weight # because it is harder to predict backward than forward in our senario. # RNN type. "lstm", "gru", "rnn" self.rnn_type = "lstm" class TrainingConfig(object): """Wrapper class for training hyperparameters.""" def __init__(self): """Sets the default training hyperparameters.""" # Number of examples per epoch of training data. self.num_examples_per_epoch = 17316 # Optimizer for training the model. self.optimizer = "SGD" # Learning rate for the initial phase of training. # by the FLAGS in train.py self.initial_learning_rate = 0.2 self.learning_rate_decay_factor = 0.5 self.num_epochs_per_decay = 2.0 # If not None, clip gradients to this value. self.clip_gradients = 5.0 # How many model checkpoints to keep. self.max_checkpoints_to_keep = 10 ================================================ FILE: polyvore/fashion_compatibility.py ================================================ # Copyright 2017 Xintong Han. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== """Predict the fashion compatibility of a given image sequence.""" from __future__ import absolute_import from __future__ import division from __future__ import print_function import os import json import tensorflow as tf import numpy as np import pickle as pkl from sklearn import metrics import configuration import polyvore_model_bi as polyvore_model FLAGS = tf.flags.FLAGS tf.flags.DEFINE_string("checkpoint_path", "", "Model checkpoint file or directory containing a " "model checkpoint file.") tf.flags.DEFINE_string("label_file", "", "Txt file containing test outfits.") tf.flags.DEFINE_string("feature_file", "", "Files containing image features") tf.flags.DEFINE_string("rnn_type", "", "Type of RNN.") tf.flags.DEFINE_string("result_file", "", "File to store the results.") tf.flags.DEFINE_integer("direction", 2, "2: bidirectional; 1: forward only;" "-1: backward only.") def run_compatibility_inference(sess, image_seqs, test_feat, num_lstm_units, model): emb_seqs = test_feat[image_seqs,:] num_images = float(len(image_seqs)) if FLAGS.rnn_type == "lstm": zero_state = np.zeros([1, 2 * num_lstm_units]) else: zero_state = np.zeros([1, num_lstm_units]) f_score = 0 b_score = 0 if FLAGS.direction != -1: # Forward RNN. outputs = [] input_feed = np.reshape(emb_seqs[0], [1,-1]) # Run first step with all zeros initial state. [lstm_state, lstm_output] = sess.run( fetches=["lstm/f_state:0","f_logits/f_logits/BiasAdd:0"], feed_dict={"lstm/f_input_feed:0":input_feed, "lstm/f_state_feed:0":zero_state}) outputs.append(lstm_output) # Run remaining steps. for step in range(int(num_images)-1): input_feed = np.reshape(emb_seqs[step+1], [1,-1]) [lstm_state, lstm_output] = sess.run( fetches=["lstm/f_state:0","f_logits/f_logits/BiasAdd:0"], feed_dict={"lstm/f_input_feed:0":input_feed, "lstm/f_state_feed:0":lstm_state}) outputs.append(lstm_output) # Calculate the loss. # Different from the training process where the loss is calculated in each # mini batch, during testing, we get the loss againist the whole test set. # This is pretty slow, maybe a better method could be used. s = np.squeeze(np.dot(np.asarray(outputs), np.transpose(test_feat))) f_score = sess.run(model.lstm_xent_loss, feed_dict={"lstm/pred_feed:0":s, "lstm/next_index_feed:0":image_seqs[1:] + [test_feat.shape[0]-1]}) f_score = - np.mean(f_score) if FLAGS.direction != 1: # Backward RNN. outputs = [] input_feed = np.reshape(emb_seqs[-1], [1,-1]) [lstm_state, lstm_output] = sess.run( fetches=["lstm/b_state:0","b_logits/b_logits/BiasAdd:0"], feed_dict={"lstm/b_input_feed:0":input_feed, "lstm/b_state_feed:0":zero_state}) outputs.append(lstm_output) for step in range(int(num_images)-1): input_feed = np.reshape(emb_seqs[int(num_images)-2-step], [1,-1]) [lstm_state, lstm_output] = sess.run( fetches=["lstm/b_state:0","b_logits/b_logits/BiasAdd:0"], feed_dict={"lstm/b_input_feed:0":input_feed, "lstm/b_state_feed:0":lstm_state}) outputs.append(lstm_output) # Calculate the loss. s = np.squeeze(np.dot(np.asarray(outputs), np.transpose(test_feat))) b_score = sess.run(model.lstm_xent_loss, feed_dict={"lstm/pred_feed:0":s, "lstm/next_index_feed:0": image_seqs[-2::-1] + [test_feat.shape[0]-1]}) b_score = - np.mean(b_score) return [f_score, b_score] def main(_): # Build the inference graph. g = tf.Graph() with g.as_default(): model_config = configuration.ModelConfig() model_config.rnn_type = FLAGS.rnn_type model = polyvore_model.PolyvoreModel(model_config, mode="inference") model.build() saver = tf.train.Saver() # Load pre-computed image features. with open(FLAGS.feature_file, "rb") as f: test_data = pkl.load(f) test_ids = test_data.keys() test_feat = np.zeros((len(test_ids) + 1, len(test_data[test_ids[0]]["image_rnn_feat"]))) # test_feat has one more zero vector as the representation of END of # RNN prediction. for i, test_id in enumerate(test_ids): # Image feature in the RNN space. test_feat[i] = test_data[test_id]["image_rnn_feat"] g.finalize() with tf.Session() as sess: saver.restore(sess, FLAGS.checkpoint_path) all_f_scores = [] all_b_scores = [] all_scores = [] all_labels = [] testset = open(FLAGS.label_file).read().splitlines() k = 0 for test_outfit in testset: k += 1 if k % 100 == 0: print("Finish %d outfits." % k) image_seqs = [] for test_image in test_outfit.split()[1:]: image_seqs.append(test_ids.index(test_image)) [f_score, b_score] = run_compatibility_inference(sess, image_seqs, test_feat, model_config.num_lstm_units, model) all_f_scores.append(f_score) all_b_scores.append(b_score) all_scores.append(f_score + b_score) all_labels.append(int(test_outfit[0])) # calculate AUC and AP fpr, tpr, thresholds = metrics.roc_curve(all_labels, all_scores, pos_label=1) print("Compatibility AUC: %f for %d outfits" % (metrics.auc(fpr, tpr), len(all_labels))) with open(FLAGS.result_file, "wb") as f: pkl.dump({"all_labels": all_labels, "all_f_scores": all_f_scores, "all_b_scores": all_b_scores}, f) if __name__ == "__main__": tf.app.run() ================================================ FILE: polyvore/fill_in_blank.py ================================================ # Copyright 2017 Xintong Han. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== """Fill in blank evaluation.""" from __future__ import absolute_import from __future__ import division from __future__ import print_function import json import tensorflow as tf import numpy as np import pickle as pkl import configuration import polyvore_model_bi as polyvore_model FLAGS = tf.flags.FLAGS tf.flags.DEFINE_string("checkpoint_path", "", "Model checkpoint file or directory containing a " "model checkpoint file.") tf.flags.DEFINE_string("json_file", "", "Json file containing questions and answers.") tf.flags.DEFINE_string("feature_file", "", "pkl files containing the features") tf.flags.DEFINE_string("rnn_type", "lstm", "Type of RNN.") tf.flags.DEFINE_string("result_file", "", "File to store the results.") tf.flags.DEFINE_integer("direction", 2, "2: bidirectional; 1: forward only;" "-1: backward only; 0: Average pooling no RNN.") def run_question_inference(sess, question, test_ids, test_feat, test_rnn_feat, num_lstm_units): question_ids = [] answer_ids = [] for q in question["question"]: try: question_ids.append(test_ids.index(q)) except: return [], [] for a in question["answers"]: try: answer_ids.append(test_ids.index(a)) except: return [], [] blank_posi = question["blank_position"] # Average pooling of the VSE embeddings question_emb = np.reshape(np.mean(test_feat[question_ids], 0), [1,-1]) q_emb = question_emb / np.linalg.norm(question_emb, axis=1)[:, np.newaxis] a_emb = (test_feat[answer_ids] / np.linalg.norm(test_feat[answer_ids], axis=1)[:, np.newaxis]) vse_score = (np.dot(q_emb, np.transpose(a_emb)) + 1) / 2 # scale to [0,1] vse_score = vse_score #/ np.sum(vse_score) # normalize to sum to 1. if FLAGS.direction == 0: # Only use VSE predicted_answer = np.argsort(-vse_score)[0] return vse_score, predicted_answer if FLAGS.rnn_type == "lstm": # LSTM has two states. zero_state = np.zeros([1, 2 * num_lstm_units]) else: zero_state = np.zeros([1, num_lstm_units]) # Blank is the last item. if blank_posi == len(question_ids) + 1: if FLAGS.direction == -1: return [], [] # Only do forward rnn input_feed = np.reshape(test_rnn_feat[question_ids[0]], [1,-1]) # Run first step with all zeros initial state. [lstm_state, lstm_output] = sess.run( fetches=["lstm/f_state:0","f_logits/f_logits/BiasAdd:0"], feed_dict={"lstm/f_input_feed:0":input_feed, "lstm/f_state_feed:0":zero_state}) for step in range(len(question_ids)-1): input_feed = np.reshape(test_rnn_feat[question_ids[step + 1]], [1,-1]) [lstm_state, lstm_output] = sess.run( fetches=["lstm/f_state:0","f_logits/f_logits/BiasAdd:0"], feed_dict={"lstm/f_input_feed:0":input_feed, "lstm/f_state_feed:0":lstm_state}) # Search in answers rnn_score = np.exp(np.dot(lstm_output, np.transpose(test_rnn_feat[answer_ids]))) rnn_score = rnn_score / np.sum(rnn_score) # Blank is the frist item elif blank_posi == 1: if FLAGS.direction == 1: return [], [] # only do backward rnn input_feed = np.reshape(test_rnn_feat[question_ids[-1]], [1,-1]) # Run first step with all zeros initial state. [lstm_state, lstm_output] = sess.run( fetches=["lstm/b_state:0","b_logits/b_logits/BiasAdd:0"], feed_dict={"lstm/b_input_feed:0":input_feed, "lstm/b_state_feed:0":zero_state}) for step in range(len(question_ids)-1): input_feed = np.reshape(test_rnn_feat[question_ids[-step-2]], [1,-1]) [lstm_state, lstm_output] = sess.run( fetches=["lstm/b_state:0","b_logits/b_logits/BiasAdd:0"], feed_dict={"lstm/b_input_feed:0":input_feed, "lstm/b_state_feed:0":lstm_state}) rnn_score = np.exp(np.dot(lstm_output, np.transpose(test_rnn_feat[answer_ids]))) rnn_score = rnn_score / np.sum(rnn_score) # Blank is in the middle. else: # Do bidirectional rnn. # Forward: input_feed = np.reshape(test_rnn_feat[question_ids[0]], [1,-1]) # Run first step with all zeros initial state. [lstm_state, lstm_output] = sess.run( fetches=["lstm/f_state:0","f_logits/f_logits/BiasAdd:0"], feed_dict={"lstm/f_input_feed:0":input_feed, "lstm/f_state_feed:0":zero_state}) for step in range(blank_posi - 2): input_feed = np.reshape(test_rnn_feat[question_ids[step+1]], [1,-1]) [lstm_state, lstm_output] = sess.run( fetches=["lstm/f_state:0","f_logits/f_logits/BiasAdd:0"], feed_dict={"lstm/f_input_feed:0":input_feed, "lstm/f_state_feed:0":lstm_state}) # Search in answers. f_softmax = np.exp(np.dot(lstm_output, np.transpose(test_rnn_feat[answer_ids]))) # Backward: input_feed = np.reshape(test_rnn_feat[question_ids[-1]], [1,-1]) # Run first step with all zeros initial state. [lstm_state, lstm_output] = sess.run( fetches=["lstm/b_state:0","b_logits/b_logits/BiasAdd:0"], feed_dict={"lstm/b_input_feed:0":input_feed, "lstm/b_state_feed:0":zero_state}) for step in range(len(question_ids)-blank_posi): input_feed = np.reshape(test_rnn_feat[question_ids[-step-2]], [1,-1]) [lstm_state, lstm_output] = sess.run( fetches=["lstm/b_state:0","b_logits/b_logits/BiasAdd:0"], feed_dict={"lstm/b_input_feed:0":input_feed, "lstm/b_state_feed:0":lstm_state}) b_softmax = np.exp(np.dot(lstm_output, np.transpose(test_rnn_feat[answer_ids]))) if FLAGS.direction == 2: rnn_score = (f_softmax / np.sum(f_softmax) + b_softmax / np.sum(b_softmax)) rnn_score /= 2 elif FLAGS.direction == 1: rnn_score = f_softmax / np.sum(f_softmax) else: rnn_score = b_softmax / np.sum(b_softmax) predicted_answer = np.argsort(-rnn_score)[0] return rnn_score, predicted_answer def main(_): # Build the inference graph. top_k = 4 # Print the top_k accuracy. true_pred = np.zeros(top_k) # Load pre-computed image features. with open(FLAGS.feature_file, "rb") as f: test_data = pkl.load(f) test_ids = test_data.keys() test_feat = np.zeros((len(test_ids), len(test_data[test_ids[0]]["image_feat"]))) test_rnn_feat = np.zeros((len(test_ids), len(test_data[test_ids[0]]["image_rnn_feat"]))) for i, test_id in enumerate(test_ids): # Image feature in visual-semantic embedding space. test_feat[i] = test_data[test_id]["image_feat"] # Image feature in the RNN space. test_rnn_feat[i] = test_data[test_id]["image_rnn_feat"] g = tf.Graph() with g.as_default(): model_config = configuration.ModelConfig() model_config.rnn_type = FLAGS.rnn_type model = polyvore_model.PolyvoreModel(model_config, mode="inference") model.build() saver = tf.train.Saver() g.finalize() with tf.Session() as sess: saver.restore(sess, FLAGS.checkpoint_path) questions = json.load(open(FLAGS.json_file)) all_pred = [] set_ids = [] all_scores = [] for question in questions: score, pred = run_question_inference(sess, question, test_ids, test_feat, test_rnn_feat, model_config.num_lstm_units) if pred != []: all_pred.append(pred) all_scores.append(score) set_ids.append(question["question"][0].split("_")[0]) # 0 is the correct answer, iterate over top_k. for i in range(top_k): if 0 in pred[:i+1]: true_pred[i] += 1 # Print all top-k accuracy. for i in range(top_k): print("Top %d Accuracy: " % (i + 1)) print("%d correct answers in %d valid questions." % (true_pred[i], len(all_pred))) print("Accuracy: %f" % (true_pred[i] / len(all_pred))) s = np.empty((len(all_scores),), dtype=np.object) for i in range(len(all_scores)): s[i] = all_scores[i] with open(FLAGS.result_file, "wb") as f: pkl.dump({"set_ids": set_ids, "pred": all_pred, "score": s}, f) if __name__ == "__main__": tf.app.run() ================================================ FILE: polyvore/fill_in_blank_siamese.py ================================================ # Copyright 2017 Xintong Han. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== """Fill in blank evaluation.""" from __future__ import absolute_import from __future__ import division from __future__ import print_function import json import tensorflow as tf import numpy as np import pickle as pkl import configuration import polyvore_model_siamese as polyvore_model FLAGS = tf.flags.FLAGS tf.flags.DEFINE_string("checkpoint_path", "", "Model checkpoint file or directory containing a " "model checkpoint file.") tf.flags.DEFINE_string("json_file", "", "Json file containing questions and answers.") tf.flags.DEFINE_string("feature_file", "", "pkl files containing the features") tf.flags.DEFINE_string("result_file", "", "File to store the results.") def run_question_inference(sess, question, test_ids, test_feat): question_ids = [] answer_ids = [] for q in question["question"]: try: question_ids.append(test_ids.index(q)) except: return [], [] for a in question["answers"]: try: answer_ids.append(test_ids.index(a)) except: return [], [] blank_posi = question["blank_position"] # Average pooling of the VSE embeddings question_emb = np.reshape(np.mean(test_feat[question_ids], 0), [1,-1]) q_emb = question_emb / np.linalg.norm(question_emb, axis=1)[:, np.newaxis] a_emb = (test_feat[answer_ids] / np.linalg.norm(test_feat[answer_ids], axis=1)[:, np.newaxis]) score = (np.dot(q_emb, np.transpose(a_emb)) + 1) / 2 # scale to [0,1] predicted_answer = np.argsort(-score)[0] return score, predicted_answer def main(_): # Build the inference graph. top_k = 4 # Print the top_k accuracy. true_pred = np.zeros(top_k) # Load pre-computed image features. with open(FLAGS.feature_file, "rb") as f: test_data = pkl.load(f) test_ids = test_data.keys() test_feat = np.zeros((len(test_ids), len(test_data[test_ids[0]]["image_feat"]))) for i, test_id in enumerate(test_ids): # Image feature in visual-semantic embedding space. test_feat[i] = test_data[test_id]["image_feat"] g = tf.Graph() with g.as_default(): model_config = configuration.ModelConfig() model = polyvore_model.PolyvoreModel(model_config, mode="inference") model.build() saver = tf.train.Saver() g.finalize() with tf.Session() as sess: saver.restore(sess, FLAGS.checkpoint_path) questions = json.load(open(FLAGS.json_file)) all_pred = [] set_ids = [] all_scores = [] for question in questions: score, pred = run_question_inference(sess, question, test_ids, test_feat) if pred != []: all_pred.append(pred) all_scores.append(score) set_ids.append(question["question"][0].split("_")[0]) # 0 is the correct answer, iterate over top_k. for i in range(top_k): if 0 in pred[:i+1]: true_pred[i] += 1 # Print all top-k accuracy. for i in range(top_k): print("Top %d Accuracy: " % (i + 1)) print("%d correct answers in %d valid questions." % (true_pred[i], len(all_pred))) print("Accuracy: %f" % (true_pred[i] / len(all_pred))) s = np.empty((len(all_scores),), dtype=np.object) for i in range(len(all_scores)): s[i] = all_scores[i] with open(FLAGS.result_file, "wb") as f: pkl.dump({"set_ids": set_ids, "pred": all_pred, "score": s}, f) if __name__ == "__main__": tf.app.run() ================================================ FILE: polyvore/ops/__init__.py ================================================ ================================================ FILE: polyvore/ops/image_embedding.py ================================================ # Copyright 2016 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== """Image embedding ops.""" from __future__ import absolute_import from __future__ import division from __future__ import print_function import tensorflow as tf from tensorflow.contrib.slim.python.slim.nets.inception_v3 import inception_v3_base slim = tf.contrib.slim def inception_v3(images, trainable=True, is_training=True, weight_decay=0.00004, stddev=0.1, dropout_keep_prob=0.8, use_batch_norm=True, batch_norm_params=None, add_summaries=True, scope="InceptionV3"): """Builds an Inception V3 subgraph for image embeddings. Args: images: A float32 Tensor of shape [batch, height, width, channels]. trainable: Whether the inception submodel should be trainable or not. is_training: Boolean indicating training mode or not. weight_decay: Coefficient for weight regularization. stddev: The standard deviation of the trunctated normal weight initializer. dropout_keep_prob: Dropout keep probability. use_batch_norm: Whether to use batch normalization. batch_norm_params: Parameters for batch normalization. See tf.contrib.layers.batch_norm for details. add_summaries: Whether to add activation summaries. scope: Optional Variable scope. Returns: end_points: A dictionary of activations from inception_v3 layers. """ # Only consider the inception model to be in training mode if it's trainable. is_inception_model_training = trainable and is_training if use_batch_norm: # Default parameters for batch normalization. if not batch_norm_params: batch_norm_params = { "is_training": is_inception_model_training, "trainable": trainable, # Decay for the moving averages. "decay": 0.9997, # Epsilon to prevent 0s in variance. "epsilon": 0.001, # Collection containing the moving mean and moving variance. "variables_collections": { "beta": None, "gamma": None, "moving_mean": ["moving_vars"], "moving_variance": ["moving_vars"], } } else: batch_norm_params = None if trainable: weights_regularizer = tf.contrib.layers.l2_regularizer(weight_decay) else: weights_regularizer = None with tf.variable_scope(scope, "InceptionV3", [images]) as scope: with slim.arg_scope( [slim.conv2d, slim.fully_connected], weights_regularizer=weights_regularizer, trainable=trainable): with slim.arg_scope( [slim.conv2d], weights_initializer=tf.truncated_normal_initializer(stddev=stddev), activation_fn=tf.nn.relu, normalizer_fn=slim.batch_norm, normalizer_params=batch_norm_params): net, end_points = inception_v3_base(images, scope=scope) with tf.variable_scope("logits"): shape = net.get_shape() net = slim.avg_pool2d(net, shape[1:3], padding="VALID", scope="pool") net = slim.dropout( net, keep_prob=dropout_keep_prob, is_training=is_inception_model_training, scope="dropout") net = slim.flatten(net, scope="flatten") # Add summaries. if add_summaries: for v in end_points.values(): tf.contrib.layers.summaries.summarize_activation(v) return net ================================================ FILE: polyvore/ops/image_embedding_test.py ================================================ # Copyright 2016 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== """Tests for tensorflow_models.im2txt.ops.image_embedding.""" from __future__ import absolute_import from __future__ import division from __future__ import print_function import tensorflow as tf from polyvore.ops import image_embedding class InceptionV3Test(tf.test.TestCase): def setUp(self): super(InceptionV3Test, self).setUp() batch_size = 4 height = 299 width = 299 num_channels = 3 self._images = tf.placeholder(tf.float32, [batch_size, height, width, num_channels]) self._batch_size = batch_size def _countInceptionParameters(self): """Counts the number of parameters in the inception model at top scope.""" counter = {} for v in tf.all_variables(): name_tokens = v.op.name.split("/") if name_tokens[0] == "InceptionV3": name = "InceptionV3/" + name_tokens[1] num_params = v.get_shape().num_elements() assert num_params counter[name] = counter.get(name, 0) + num_params return counter def _verifyParameterCounts(self): """Verifies the number of parameters in the inception model.""" param_counts = self._countInceptionParameters() expected_param_counts = { "InceptionV3/Conv2d_1a_3x3": 960, "InceptionV3/Conv2d_2a_3x3": 9312, "InceptionV3/Conv2d_2b_3x3": 18624, "InceptionV3/Conv2d_3b_1x1": 5360, "InceptionV3/Conv2d_4a_3x3": 138816, "InceptionV3/Mixed_5b": 256368, "InceptionV3/Mixed_5c": 277968, "InceptionV3/Mixed_5d": 285648, "InceptionV3/Mixed_6a": 1153920, "InceptionV3/Mixed_6b": 1298944, "InceptionV3/Mixed_6c": 1692736, "InceptionV3/Mixed_6d": 1692736, "InceptionV3/Mixed_6e": 2143872, "InceptionV3/Mixed_7a": 1699584, "InceptionV3/Mixed_7b": 5047872, "InceptionV3/Mixed_7c": 6080064, } self.assertDictEqual(expected_param_counts, param_counts) def _assertCollectionSize(self, expected_size, collection): actual_size = len(tf.get_collection(collection)) if expected_size != actual_size: self.fail("Found %d items in collection %s (expected %d)." % (actual_size, collection, expected_size)) def testTrainableTrueIsTrainingTrue(self): embeddings = image_embedding.inception_v3( self._images, trainable=True, is_training=True) self.assertEqual([self._batch_size, 2048], embeddings.get_shape().as_list()) self._verifyParameterCounts() self._assertCollectionSize(376, tf.GraphKeys.VARIABLES) self._assertCollectionSize(188, tf.GraphKeys.TRAINABLE_VARIABLES) self._assertCollectionSize(188, tf.GraphKeys.UPDATE_OPS) self._assertCollectionSize(94, tf.GraphKeys.REGULARIZATION_LOSSES) self._assertCollectionSize(0, tf.GraphKeys.LOSSES) self._assertCollectionSize(23, tf.GraphKeys.SUMMARIES) def testTrainableTrueIsTrainingFalse(self): embeddings = image_embedding.inception_v3( self._images, trainable=True, is_training=False) self.assertEqual([self._batch_size, 2048], embeddings.get_shape().as_list()) self._verifyParameterCounts() self._assertCollectionSize(376, tf.GraphKeys.VARIABLES) self._assertCollectionSize(188, tf.GraphKeys.TRAINABLE_VARIABLES) self._assertCollectionSize(0, tf.GraphKeys.UPDATE_OPS) self._assertCollectionSize(94, tf.GraphKeys.REGULARIZATION_LOSSES) self._assertCollectionSize(0, tf.GraphKeys.LOSSES) self._assertCollectionSize(23, tf.GraphKeys.SUMMARIES) def testTrainableFalseIsTrainingTrue(self): embeddings = image_embedding.inception_v3( self._images, trainable=False, is_training=True) self.assertEqual([self._batch_size, 2048], embeddings.get_shape().as_list()) self._verifyParameterCounts() self._assertCollectionSize(376, tf.GraphKeys.VARIABLES) self._assertCollectionSize(0, tf.GraphKeys.TRAINABLE_VARIABLES) self._assertCollectionSize(0, tf.GraphKeys.UPDATE_OPS) self._assertCollectionSize(0, tf.GraphKeys.REGULARIZATION_LOSSES) self._assertCollectionSize(0, tf.GraphKeys.LOSSES) self._assertCollectionSize(23, tf.GraphKeys.SUMMARIES) def testTrainableFalseIsTrainingFalse(self): embeddings = image_embedding.inception_v3( self._images, trainable=False, is_training=False) self.assertEqual([self._batch_size, 2048], embeddings.get_shape().as_list()) self._verifyParameterCounts() self._assertCollectionSize(376, tf.GraphKeys.VARIABLES) self._assertCollectionSize(0, tf.GraphKeys.TRAINABLE_VARIABLES) self._assertCollectionSize(0, tf.GraphKeys.UPDATE_OPS) self._assertCollectionSize(0, tf.GraphKeys.REGULARIZATION_LOSSES) self._assertCollectionSize(0, tf.GraphKeys.LOSSES) self._assertCollectionSize(23, tf.GraphKeys.SUMMARIES) if __name__ == "__main__": tf.test.main() ================================================ FILE: polyvore/ops/image_processing.py ================================================ # Copyright 2017 Xintong Han. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== """Helper functions for image preprocessing.""" from __future__ import absolute_import from __future__ import division from __future__ import print_function import tensorflow as tf def distort_image(image): """Perform random distortions on an image. Args: image: A float32 Tensor of shape [height, width, 3] with values in [0, 1). Returns: distorted_image: A float32 Tensor of shape [height, width, 3] with values in [0, 1]. """ # Randomly flip horizontally. No color distortion. with tf.name_scope("flip_horizontal", values=[image]): image = tf.image.random_flip_left_right(image) return image def process_image(encoded_image, is_training, height, width, resize_height=299, resize_width=299, image_format="jpeg", image_idx=0): """Decode an image, resize and apply random distortions. Args: encoded_image: String Tensor containing the image. is_training: Boolean; whether preprocessing for training or eval. height: Height of the output image. width: Width of the output image. resize_height: If > 0, resize height before crop to final dimensions. resize_width: If > 0, resize width before crop to final dimensions. image_format: "jpeg" or "png". image_idx: image index of the image in an outfit. Returns: A float32 Tensor of shape [height, width, 3] with values in [-1, 1]. Raises: ValueError: If image_format is invalid. """ # Helper function to log an image summary to the visualizer. Summaries are # only logged in thread 0. def image_summary(name, image): tf.image_summary(name, tf.expand_dims(image, 0)) # Decode image into a float32 Tensor of shape [?, ?, 3] with values in [0, 1). with tf.name_scope("decode", values=[encoded_image]): if image_format == "jpeg": image = tf.image.decode_jpeg(encoded_image, channels=3) elif image_format == "png": image = tf.image.decode_png(encoded_image, channels=3) else: raise ValueError("Invalid image format: %s" % image_format) image = tf.image.convert_image_dtype(image, dtype=tf.float32) image_summary("original_image/" + str(image_idx), image) # Resize image. assert (resize_height > 0) == (resize_width > 0) if resize_height: image = tf.image.resize_images(image, size=[resize_height, resize_width], method=tf.image.ResizeMethod.BILINEAR) # Crop to final dimensions. In the Polyvore model, no cropping is used # since we set height=resize_height and width=resize_width if is_training: image = tf.random_crop(image, [height, width, 3]) else: image = tf.image.resize_image_with_crop_or_pad(image, height, width) image_summary("resized_image/" + str(image_idx), image) # Randomly distort the image. if is_training: image = distort_image(image) image_summary("final_image/" + str(image_idx), image) # Rescale to [-1,1] instead of [0, 1] image = tf.sub(image, 0.5) image = tf.mul(image, 2.0) return image ================================================ FILE: polyvore/ops/inputs.py ================================================ # Copyright 2017 Xintong Han. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== """Input ops.""" from __future__ import absolute_import from __future__ import division from __future__ import print_function import tensorflow as tf def parse_sequence_example(serialized, set_id, image_feature, image_index, caption_feature, number_set_images): """Parses a tensorflow.SequenceExample into a set of images and caption. Args: serialized: A scalar string Tensor; a single serialized SequenceExample. set_id: Name of SequenceExample context feature containing the id of the outfit. image_feature: Name of SequenceExample context feature containing image data. image_index: Name of SequenceExample feature list containing the index of the item in the outfit. caption_feature: Name of SequenceExample feature list containing integer captions. number_set_images: Number of images in an outfit. Returns: set_id: Set id of the outfit. encoded_images: A string Tensor containing all JPEG encoded images in the outfit. image_ids: Image ids of the items in the outfit. captions: A 2-D uint64 Tensor with dynamically specified length. likes: Number of likes of the outfit. Hard coded name, not used in our model. """ context_features = {} context_features[set_id] = tf.FixedLenFeature([], dtype=tf.string) context_features['likes'] = tf.FixedLenFeature([], dtype=tf.int64, default_value=0) for i in range(number_set_images): context_features[image_feature + '/' + str(i)] = tf.FixedLenFeature([], dtype=tf.string, default_value = '') context, sequence = tf.parse_single_sequence_example( serialized, context_features=context_features, sequence_features={ image_index: tf.FixedLenSequenceFeature([], dtype=tf.int64), caption_feature: tf.VarLenFeature(dtype=tf.int64), }) set_id = context[set_id] likes = context['likes'] encoded_images = [] for i in range(number_set_images): encoded_images.append(context[image_feature + '/' + str(i)]) captions = sequence[caption_feature] captions = tf.sparse_tensor_to_dense(captions) image_ids = sequence[image_index] return set_id, encoded_images, image_ids, captions, likes def prefetch_input_data(reader, file_pattern, is_training, batch_size, values_per_shard, input_queue_capacity_factor=16, num_reader_threads=1, shard_queue_name="filename_queue", value_queue_name="input_queue"): """Prefetches string values from disk into an input queue. In training the capacity of the queue is important because a larger queue means better mixing of training examples between shards. The minimum number of values kept in the queue is values_per_shard * input_queue_capacity_factor, where input_queue_memory factor should be chosen to trade-off better mixing with memory usage. Args: reader: Instance of tf.ReaderBase. file_pattern: Comma-separated list of file patterns (e.g. /tmp/train_data-?????-of-00100). is_training: Boolean; whether prefetching for training or eval. batch_size: Model batch size used to determine queue capacity. values_per_shard: Approximate number of values per shard. input_queue_capacity_factor: Minimum number of values to keep in the queue in multiples of values_per_shard. See comments above. num_reader_threads: Number of reader threads to fill the queue. shard_queue_name: Name for the shards filename queue. value_queue_name: Name for the values input queue. Returns: A Queue containing prefetched string values. """ data_files = [] for pattern in file_pattern.split(","): data_files.extend(tf.gfile.Glob(pattern)) if not data_files: tf.logging.fatal("Found no input files matching %s", file_pattern) else: tf.logging.info("Prefetching values from %d files matching %s", len(data_files), file_pattern) if is_training: filename_queue = tf.train.string_input_producer( data_files, shuffle=True, capacity=16, name=shard_queue_name) min_queue_examples = values_per_shard * input_queue_capacity_factor capacity = min_queue_examples + 100 * batch_size values_queue = tf.RandomShuffleQueue( capacity=capacity, min_after_dequeue=min_queue_examples, dtypes=[tf.string], name="random_" + value_queue_name) else: filename_queue = tf.train.string_input_producer( data_files, shuffle=False, capacity=1, name=shard_queue_name) capacity = values_per_shard + 3 * batch_size values_queue = tf.FIFOQueue( capacity=capacity, dtypes=[tf.string], name="fifo_" + value_queue_name) enqueue_ops = [] for _ in range(num_reader_threads): _, value = reader.read(filename_queue) enqueue_ops.append(values_queue.enqueue([value])) tf.train.queue_runner.add_queue_runner(tf.train.queue_runner.QueueRunner( values_queue, enqueue_ops)) tf.scalar_summary( "queue/%s/fraction_of_%d_full" % (values_queue.name, capacity), tf.cast(values_queue.size(), tf.float32) * (1. / capacity)) return values_queue def batch_with_dynamic_pad(images_and_captions, batch_size, queue_capacity, add_summaries=True): """Batches input images and captions. This function splits the caption into an input sequence and a target sequence, where the target sequence is the input sequence right-shifted by 1. Input and target sequences are batched and padded up to the maximum length of sequences in the batch. A mask is created to distinguish real words from padding words. Similar sequence processing is used for images in an outfit. Example: Actual captions in the batch ('-' denotes padded character): [ [ 1 2 5 4 5 ], [ 1 2 3 4 - ], [ 1 2 3 - - ], ] input_seqs: [ [ 1 2 3 4 ], [ 1 2 3 - ], [ 1 2 - - ], ] target_seqs: [ [ 2 3 4 5 ], [ 2 3 4 - ], [ 2 3 - - ], ] mask: [ [ 1 1 1 1 ], [ 1 1 1 0 ], [ 1 1 0 0 ], ] Args: images_and_captions: A list of image and caption meta data batch_size: Batch size. queue_capacity: Queue capacity. add_summaries: If true, add caption length summaries. Returns: Padded image, captions, masks, etc. """ enqueue_list = [] for set_id, images, image_ids, captions, likes in images_and_captions: image_seq_length = tf.shape(image_ids)[0] input_length = tf.sub(image_seq_length, 0) # change 1 to 0 cap_indicator = tf.cast(tf.not_equal(captions, tf.zeros_like(captions)), tf.int32) indicator = tf.ones(tf.expand_dims(input_length, 0), dtype=tf.int32) loss_indicator = tf.ones(tf.expand_dims(image_seq_length, 0), dtype=tf.int32) images = tf.pack(images) enqueue_list.append([set_id, images, indicator, loss_indicator, image_ids, captions, cap_indicator, likes]) (set_ids, images, mask, loss_mask, image_ids, captions, cap_mask, likes) = tf.train.batch_join(enqueue_list, batch_size=batch_size, capacity=queue_capacity, dynamic_pad=True, name="batch_and_pad") if add_summaries: lengths = tf.add(tf.reduce_sum(mask, 1), 1) tf.scalar_summary("caption_length/batch_min", tf.reduce_min(lengths)) tf.scalar_summary("caption_length/batch_max", tf.reduce_max(lengths)) tf.scalar_summary("caption_length/batch_mean", tf.reduce_mean(lengths)) return (set_ids, images, image_ids, mask, loss_mask, captions, cap_mask, likes) ================================================ FILE: polyvore/polyvore_model_bi.py ================================================ # Copyright 2017 Xintong Han. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== """ Polyvore model used in ACM MM"17 paper "Learning Fashion Compatibility with Bidirectional LSTMs" Link: https://arxiv.org/pdf/1707.05691.pdf """ from __future__ import absolute_import from __future__ import division from __future__ import print_function import numpy as np import tensorflow as tf from ops import image_embedding from ops import image_processing from ops import inputs as input_ops class PolyvoreModel(object): """ Model for fashion set on Polyvore dataset. """ def __init__(self, config, mode, train_inception=False): """Basic setup. Args: config: Object containing configuration parameters. mode: "train", "eval" or "inference". train_inception: Whether the inception submodel variables are trainable. """ assert mode in ["train", "eval", "inference"] self.config = config self.mode = mode self.train_inception = train_inception # Reader for the input data. self.reader = tf.TFRecordReader() # To match the "Show and Tell" paper we initialize all variables with a # random uniform initializer. self.initializer = tf.random_uniform_initializer( minval=-self.config.initializer_scale, maxval=self.config.initializer_scale) # A float32 Tensor with shape # [batch_size, num_images, height, width, channels]. # num_images is the number of images in one outfit, default is 8. self.images = None # Forward RNN input and target sequences. # An int32 Tensor with shape [batch_size, padded_length]. self.f_input_seqs = None # An int32 Tensor with shape [batch_size, padded_length]. self.f_target_seqs = None # Backward RNN input and target sequences. # An int32 Tensor with shape [batch_size, padded_length]. self.b_input_seqs = None # An int32 Tensor with shape [batch_size, padded_length]. self.b_target_seqs = None # An int32 0/1 Tensor with shape [batch_size, padded_length]. self.input_mask = None # Image caption sequence and masks. # An int32 Tensor with shape [batch_size, num_images, padded_length]. self.cap_seqs = None # An int32 0/1 Tensor with shape [batch_size, padded_length]. self.cap_mask = None # Caption sequence embeddings, we use simple bag of word model. # A float32 Tensor with shape [batch_size, num_images, embedding_size]. self.seq_embeddings = None # Image embeddings in the joint visual-semantic space # A float32 Tensor with shape [batch_size, num_images, embedding_size]. self.image_embeddings = None # Image embeddings in the RNN output/prediction space. self.rnn_image_embeddings = None # Word embedding map. self.embedding_map = None # A float32 scalar Tensor; the total loss for the trainer to optimize. self.total_loss = None # Forward and backward RNN loss. # A float32 Tensor with shape [batch_size * padded_length]. self.forward_losses = None # A float32 Tensor with shape [batch_size * padded_length]. self.backward_losses = None # RNN loss, forward + backward. self.lstm_losses = None # Loss mask for lstm loss. self.loss_mask = None # Visual Semantic Embedding loss. # A float32 Tensor with shape [batch_size * padded_length]. self.emb_losses = None # A float32 Tensor with shape [batch_size * padded_length]. self.target_weights = None # Collection of variables from the inception submodel. self.inception_variables = [] # Function to restore the inception submodel from checkpoint. self.init_fn = None # Global step Tensor. self.global_step = None # Some output for debugging purposes . self.target_embeddings = None self.input_embeddings = None self.set_ids = None self.f_lstm_state = None self.b_lstm_state = None self.lstm_output = None self.lstm_xent_loss = None def is_training(self): """Returns true if the model is built for training mode.""" return self.mode == "train" def process_image(self, encoded_image, thread_id=0, image_idx=0): """Decodes and processes an image string. Args: encoded_image: A scalar string Tensor; the encoded image. thread_id: Preprocessing thread id used to select the ordering of color distortions. Not used in our model. image_idx: Index of the image in an outfit. Only used for summaries. Returns: A float32 Tensor of shape [height, width, 3]; the processed image. """ return image_processing.process_image(encoded_image, is_training=self.is_training(), height=self.config.image_height, width=self.config.image_width, image_format=self.config.image_format, image_idx=image_idx) def build_inputs(self): """Input prefetching, preprocessing and batching. Outputs: Inputs of the model. """ if self.mode == "inference": # In inference mode, images and inputs are fed via placeholders. image_feed = tf.placeholder(dtype=tf.string, shape=[], name="image_feed") # Process image and insert batch dimensions. image_feed = self.process_image(image_feed) input_feed = tf.placeholder(dtype=tf.int64, shape=[None], # batch_size name="input_feed") # Process image and insert batch dimensions. image_seqs = tf.expand_dims(image_feed, 0) cap_seqs = tf.expand_dims(input_feed, 1) # No target sequences or input mask in inference mode. input_mask = tf.placeholder(dtype=tf.int64, shape=[1, 8], # batch_size name="input_mask") cap_mask = None loss_mask = None set_ids = None else: # Prefetch serialized SequenceExample protos. input_queue = input_ops.prefetch_input_data( self.reader, self.config.input_file_pattern, is_training=self.is_training(), batch_size=self.config.batch_size, values_per_shard=self.config.values_per_input_shard, input_queue_capacity_factor=self.config.input_queue_capacity_factor, num_reader_threads=self.config.num_input_reader_threads) # Image processing and random distortion. Split across multiple threads # with each thread applying a slightly different distortion. But we only # use one thread in our Polyvore model. likes are not used. images_and_captions = [] for thread_id in range(self.config.num_preprocess_threads): serialized_sequence_example = input_queue.dequeue() set_id, encoded_images, image_ids, captions, likes = ( input_ops.parse_sequence_example( serialized_sequence_example, set_id =self.config.set_id_name, image_feature=self.config.image_feature_name, image_index=self.config.image_index_name, caption_feature=self.config.caption_feature_name, number_set_images=self.config.number_set_images)) images = [] for i in range(self.config.number_set_images): images.append(self.process_image(encoded_images[i],image_idx=i)) images_and_captions.append([set_id, images, image_ids, captions, likes]) # Batch inputs. queue_capacity = (5 * self.config.num_preprocess_threads * self.config.batch_size) (set_ids, image_seqs, image_ids, input_mask, loss_mask, cap_seqs, cap_mask, likes) = ( input_ops.batch_with_dynamic_pad(images_and_captions, batch_size=self.config.batch_size, queue_capacity=queue_capacity)) self.images = image_seqs self.input_mask = input_mask self.loss_mask = loss_mask self.cap_seqs = cap_seqs self.cap_mask = cap_mask self.set_ids = set_ids def build_image_embeddings(self): """Builds the image model subgraph and generates image embeddings in visual semantic joint space and RNN prediction space. Inputs: self.images Outputs: self.image_embeddings self.rnn_image_embeddings """ # Reshape 5D image tensor. images = tf.reshape(self.images, [-1, self.config.image_height, self.config.image_height, 3]) inception_output = image_embedding.inception_v3( images, trainable=self.train_inception, is_training=self.is_training()) self.inception_variables = tf.get_collection( tf.GraphKeys.VARIABLES, scope="InceptionV3") # Map inception output into embedding space. with tf.variable_scope("image_embedding") as scope: image_embeddings = tf.contrib.layers.fully_connected( inputs=inception_output, num_outputs=self.config.embedding_size, activation_fn=None, weights_initializer=self.initializer, biases_initializer=None, scope=scope) with tf.variable_scope("rnn_image_embedding") as scope: rnn_image_embeddings = tf.contrib.layers.fully_connected( inputs=inception_output, num_outputs=self.config.embedding_size, activation_fn=None, weights_initializer=self.initializer, biases_initializer=None, scope=scope) # Save the embedding size in the graph. tf.constant(self.config.embedding_size, name="embedding_size") self.image_embeddings = tf.reshape(image_embeddings, [tf.shape(self.images)[0], -1, self.config.embedding_size]) self.rnn_image_embeddings = tf.reshape(rnn_image_embeddings, [tf.shape(self.images)[0], -1, self.config.embedding_size]) def build_seq_embeddings(self): """Builds the input sequence embeddings. Inputs: self.input_seqs Outputs: self.seq_embeddings self.embedding_map """ with tf.variable_scope("seq_embedding"), tf.device("/cpu:0"): embedding_map = tf.get_variable( name="map", shape=[self.config.vocab_size, self.config.embedding_size], initializer=self.initializer) seq_embeddings = tf.nn.embedding_lookup(embedding_map, self.cap_seqs) # Average pooling the seq_embeddings (bag of words). if self.mode != "inference": seq_embeddings = tf.batch_matmul( tf.cast(tf.expand_dims(self.cap_mask, 2), tf.float32), seq_embeddings) seq_embeddings = tf.squeeze(seq_embeddings, [2]) self.embedding_map = embedding_map self.seq_embeddings = seq_embeddings def build_model(self): """Builds the model. The original code is written with Tensorflow r0.10 for Tensorflow > r1.0, many functions can be simplified. For example Tensors support slicing now, so no need to use tf.slice() """ norm_image_embeddings = tf.nn.l2_normalize(self.image_embeddings, 2, name="norm_image_embeddings") norm_seq_embeddings = tf.nn.l2_normalize(self.seq_embeddings, 2) norm_seq_embeddings = ( tf.pad(norm_seq_embeddings, [[0, 0], [0, self.config.number_set_images - tf.shape(norm_seq_embeddings)[1]], [0, 0]], name="norm_seq_embeddings")) if self.mode == "inference": pass else: # Compute losses for joint embedding. # Only look at the captions that have length >= 2. emb_loss_mask = tf.greater(tf.reduce_sum(self.cap_mask, 2), 1) # Image mask is padded it to max length. emb_loss_mask = tf.pad(emb_loss_mask, [[0,0], [0, self.config.number_set_images - tf.shape(emb_loss_mask)[1]]]) # Select the valid image-caption pair. emb_loss_mask = tf.reshape(emb_loss_mask, [-1]) norm_image_embeddings = tf.reshape(norm_image_embeddings, [self.config.number_set_images * self.config.batch_size, self.config.embedding_size]) norm_image_embeddings = tf.boolean_mask(norm_image_embeddings, emb_loss_mask) norm_seq_embeddings = tf.reshape(norm_seq_embeddings, [self.config.number_set_images * self.config.batch_size, self.config.embedding_size]) norm_seq_embeddings = tf.boolean_mask(norm_seq_embeddings, emb_loss_mask) # The following defines contrastive loss in the joint space. # Reference: https://github.com/ryankiros/visual-semantic-embedding/blob/master/model.py#L39 scores = tf.matmul(norm_seq_embeddings, norm_image_embeddings, transpose_a=False, transpose_b=True, name="scores") diagonal = tf.expand_dims(tf.diag_part(scores), 1) cost_s = tf.maximum(0.0, self.config.emb_margin - diagonal + scores) cost_im = tf.maximum(0.0, self.config.emb_margin - tf.transpose(diagonal) + scores) cost_s = cost_s - tf.diag(tf.diag_part(cost_s)) cost_im = cost_im - tf.diag(tf.diag_part(cost_im)) emb_batch_loss = tf.reduce_sum(cost_s) + tf.reduce_sum(cost_im) emb_batch_loss = (emb_batch_loss / tf.cast(tf.shape(norm_seq_embeddings)[0], tf.float32) ** 2) if self.config.emb_loss_factor > 0.0: tf.contrib.losses.add_loss(emb_batch_loss * self.config.emb_loss_factor) # Compute image LSTM loss. # Start with one direction. tf.logging.info("Rnn_type: %s" % self.config.rnn_type) if self.config.rnn_type == "lstm": tf.logging.info("----- RNN Type: LSTM ------") # Forward LSTM. f_lstm_cell = tf.nn.rnn_cell.BasicLSTMCell( num_units=self.config.num_lstm_units, state_is_tuple=True) # Backward LSTM. b_lstm_cell = tf.nn.rnn_cell.BasicLSTMCell( num_units=self.config.num_lstm_units, state_is_tuple=True) elif self.config.rnn_type == "gru": tf.logging.info("----- RNN Type: GRU ------") # Forward GRU. f_lstm_cell = tf.nn.rnn_cell.GRUCell(num_units=self.config.num_lstm_units) # Backward GRU. b_lstm_cell = tf.nn.rnn_cell.GRUCell(num_units=self.config.num_lstm_units) else: tf.logging.info("----- RNN Type: RNN ------") # Forward RNN. f_lstm_cell = tf.nn.rnn_cell.BasicRNNCell(num_units=self.config.num_lstm_units) # Backward RNN. b_lstm_cell = tf.nn.rnn_cell.BasicRNNCell(num_units=self.config.num_lstm_units) if self.mode == "train": f_lstm_cell = tf.nn.rnn_cell.DropoutWrapper( f_lstm_cell, input_keep_prob=self.config.lstm_dropout_keep_prob, output_keep_prob=self.config.lstm_dropout_keep_prob) b_lstm_cell = tf.nn.rnn_cell.DropoutWrapper( b_lstm_cell, input_keep_prob=self.config.lstm_dropout_keep_prob, output_keep_prob=self.config.lstm_dropout_keep_prob) with tf.variable_scope("lstm", initializer=self.initializer) as lstm_scope: if self.mode == "inference": # Inference for Bi-LSTM. pred_feed = tf.placeholder(dtype=tf.float32, shape=[None, None], name="pred_feed") next_index_feed = tf.placeholder(dtype=tf.int64, shape=[None], name="next_index_feed") self.lstm_xent_loss = tf.nn.sparse_softmax_cross_entropy_with_logits( logits=pred_feed, labels=next_index_feed, name="lstm_xent") if self.config.rnn_type == "lstm": # In inference mode, use concatenated states for convenient feeding # and fetching. # Forward # Placeholder for feeding a batch of concatenated states. f_state_feed = tf.placeholder(dtype=tf.float32, shape=[None, sum(f_lstm_cell.state_size)], name="f_state_feed") f_input_feed = tf.placeholder(dtype=tf.float32, shape=[None, self.config.embedding_size], name="f_input_feed") # Backward: # Placeholder for feeding a batch of concatenated states. b_state_feed = tf.placeholder(dtype=tf.float32, shape=[None, sum(b_lstm_cell.state_size)], name="b_state_feed") b_input_feed = tf.placeholder(dtype=tf.float32, shape=[None, self.config.embedding_size], name="b_input_feed") f_state_tuple = tf.split(1, 2, f_state_feed) # Run a single LSTM step. with tf.variable_scope("FW"): f_lstm_outputs, f_state_tuple = f_lstm_cell( inputs=f_input_feed, state=f_state_tuple) # Concatentate the resulting state. self.f_lstm_state = tf.concat(1, f_state_tuple, name="f_state") b_state_tuple = tf.split(1, 2, b_state_feed) # Run a single LSTM step. with tf.variable_scope("BW"): b_lstm_outputs, b_state_tuple = b_lstm_cell( inputs=b_input_feed, state=b_state_tuple) # Concatentate the resulting state. self.b_lstm_state = tf.concat(1, b_state_tuple, name="b_state") else: # For non-LSTM RNN models, no tuple is used. # Forward # Placeholder for feeding a batch of concatenated states. f_state_feed = tf.placeholder(dtype=tf.float32, shape=[None, f_lstm_cell.state_size], name="f_state_feed") f_input_feed = tf.placeholder(dtype=tf.float32, shape=[None, self.config.embedding_size], name="f_input_feed") # Backward: # Placeholder for feeding a batch of concatenated states. b_state_feed = tf.placeholder(dtype=tf.float32, shape=[None, b_lstm_cell.state_size], name="b_state_feed") b_input_feed = tf.placeholder(dtype=tf.float32, shape=[None, self.config.embedding_size], name="b_input_feed") # Run a single RNN step. with tf.variable_scope("FW"): f_lstm_outputs, f_state_tuple = f_lstm_cell( inputs=f_input_feed, state=f_state_feed) f_state_tuple = tf.identity(f_state_tuple, name="f_state") with tf.variable_scope("BW"): b_lstm_outputs, b_state_tuple = b_lstm_cell( inputs=b_input_feed, state=b_state_feed) b_state_tuple = tf.identity(b_state_tuple, name="b_state") lstm_outputs = (f_lstm_outputs, b_lstm_outputs) sequence_length = None else: # Run the batch of sequence embeddings through the LSTM. sequence_length = tf.reduce_sum(self.input_mask, 1) lstm_outputs, _ = tf.nn.bidirectional_dynamic_rnn(cell_fw=f_lstm_cell, cell_bw=b_lstm_cell, inputs=self.rnn_image_embeddings, initial_state_fw=None, initial_state_bw=None, sequence_length=sequence_length, dtype=tf.float32, scope=lstm_scope) # Stack batches vertically. f_lstm_outputs = tf.reshape(lstm_outputs[0], [-1, f_lstm_cell.output_size]) if self.mode == "inference": b_lstm_outputs = lstm_outputs[1] else: b_lstm_outputs = tf.reverse_sequence(lstm_outputs[1], seq_lengths=sequence_length, seq_dim=1, batch_dim=0) b_lstm_outputs = tf.reshape(b_lstm_outputs, [-1, b_lstm_cell.output_size]) with tf.variable_scope("f_logits") as logits_scope: f_input_embeddings = tf.contrib.layers.fully_connected( inputs=f_lstm_outputs, num_outputs=self.config.embedding_size, activation_fn=None, weights_initializer=self.initializer, scope=logits_scope) with tf.variable_scope("b_logits") as logits_scope: b_input_embeddings = tf.contrib.layers.fully_connected( inputs=b_lstm_outputs, num_outputs=self.config.embedding_size, activation_fn=None, weights_initializer=self.initializer, scope=logits_scope) if self.mode == "inference": pass else: # Padding input_mask to match dimension. input_mask = tf.pad(self.input_mask, [[0,0], [0, self.config.number_set_images + 1 - tf.shape(self.input_mask)[1]]]) input_mask = tf.to_float( tf.reshape(tf.slice(input_mask, [0,1], [-1, -1]), [-1,1])) loss_mask = tf.pad(self.loss_mask, [[0,0], [0, self.config.number_set_images - tf.shape(self.loss_mask)[1]]]) loss_mask = tf.reshape(tf.to_float(loss_mask), [self.config.number_set_images * self.config.batch_size,1]) # Forward rnn. f_target_embeddings = tf.slice(tf.pad(self.rnn_image_embeddings, [[0,0], [0,1], [0,0]]), [0,1,0], [-1,-1,-1]) f_target_embeddings = tf.reshape(f_target_embeddings, [self.config.number_set_images * self.config.batch_size, self.config.embedding_size]) f_target_embeddings = tf.mul(f_target_embeddings, input_mask, name="target_embeddings") # Softmax loss over all items in this minibatch. loss_mask = tf.squeeze(loss_mask) f_input_embeddings = tf.boolean_mask(f_input_embeddings, tf.cast(loss_mask, tf.bool)) f_target_embeddings = tf.boolean_mask(f_target_embeddings, tf.cast(loss_mask, tf.bool)) f_lstm_scores = tf.matmul(f_input_embeddings, f_target_embeddings, transpose_a=False, transpose_b=True) f_lstm_loss = tf.nn.sparse_softmax_cross_entropy_with_logits( logits=f_lstm_scores, labels=tf.range(tf.shape(f_lstm_scores)[0])) f_lstm_loss = tf.div(tf.reduce_sum(f_lstm_loss), tf.reduce_sum(loss_mask), name="f_lstm_loss") # Backward rnn. # It would be better to put write a function to calcute lstm_loss from # loss_mask, inputs, and targets, so the code can be reused, for now # just copy and paste the forward to get the backward loss. reverse_embeddings = tf.reverse_sequence(self.rnn_image_embeddings, seq_lengths=sequence_length, seq_dim=1, batch_dim=0) b_target_embeddings = tf.slice(tf.pad(reverse_embeddings, [[0,0], [0,1], [0,0]]), [0,1,0], [-1,-1,-1]) b_target_embeddings = tf.reshape(b_target_embeddings, [self.config.number_set_images * self.config.batch_size, self.config.embedding_size]) b_target_embeddings = tf.mul(b_target_embeddings, input_mask, name="target_embeddings") # Softmax loss over all items in this minibatch b_input_embeddings = tf.boolean_mask(b_input_embeddings, tf.cast(loss_mask, tf.bool)) b_target_embeddings = tf.boolean_mask(b_target_embeddings, tf.cast(loss_mask, tf.bool)) b_lstm_scores = tf.matmul(b_input_embeddings, b_target_embeddings, transpose_a=False, transpose_b=True) b_lstm_loss = tf.nn.sparse_softmax_cross_entropy_with_logits( logits=b_lstm_scores, labels=tf.range(tf.shape(b_lstm_scores)[0])) b_lstm_loss = tf.div(tf.reduce_sum(b_lstm_loss), tf.reduce_sum(loss_mask), name="b_lstm_loss") if self.config.f_rnn_loss_factor > 0: tf.contrib.losses.add_loss(f_lstm_loss * self.config.f_rnn_loss_factor) if self.config.b_rnn_loss_factor > 0: tf.contrib.losses.add_loss(b_lstm_loss * self.config.b_rnn_loss_factor) # Merge all losses and stats. total_loss = tf.contrib.losses.get_total_loss() # Add summaries. tf.scalar_summary("emb_batch_loss", emb_batch_loss) tf.scalar_summary("f_lstm_loss", f_lstm_loss) tf.scalar_summary("b_lstm_loss", b_lstm_loss) tf.scalar_summary("lstm_loss", (f_lstm_loss * self.config.f_rnn_loss_factor + b_lstm_loss * self.config.b_rnn_loss_factor)) tf.scalar_summary("total_loss", total_loss) for var in tf.trainable_variables(): tf.histogram_summary(var.op.name, var) weights = tf.to_float(tf.reshape(emb_loss_mask, [-1])) self.loss_mask = loss_mask self.input_mask = input_mask self.target_embeddings = (f_target_embeddings, b_target_embeddings) self.input_embeddings = (f_input_embeddings, b_input_embeddings) self.total_loss = total_loss self.emb_losses = emb_batch_loss # Used in evaluation. self.lstm_losses = (f_lstm_loss * self.config.f_rnn_loss_factor + b_lstm_loss * self.config.b_rnn_loss_factor) # Used in evaluation. self.target_weights = weights # Used in evaluation. def setup_inception_initializer(self): """Sets up the function to restore inception variables from checkpoint.""" if self.mode != "inference": # Restore inception variables only. saver = tf.train.Saver(self.inception_variables) def restore_fn(sess): tf.logging.info("Restoring Inception variables from checkpoint %s" % self.config.inception_checkpoint_file) saver.restore(sess, self.config.inception_checkpoint_file) self.init_fn = restore_fn def setup_global_step(self): """Sets up the global step Tensor.""" global_step = tf.Variable( initial_value=0, name="global_step", trainable=False, collections=[tf.GraphKeys.GLOBAL_STEP, tf.GraphKeys.VARIABLES]) self.global_step = global_step def build(self): """Creates all ops for training and evaluation.""" self.build_inputs() self.build_image_embeddings() self.build_seq_embeddings() self.build_model() self.setup_inception_initializer() self.setup_global_step() ================================================ FILE: polyvore/polyvore_model_siamese.py ================================================ # Copyright 2017 Xintong Han. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== """Siamese Network for compatibility modeling/ """ from __future__ import absolute_import from __future__ import division from __future__ import print_function import numpy as np import tensorflow as tf import scipy.io as sio from scipy.linalg import block_diag from ops import image_embedding from ops import image_processing from ops import inputs as input_ops class PolyvoreModel(object): """ Model for fashion set on Polyvore dataset """ def __init__(self, config, mode, train_inception=False): """Basic setup. Args: config: Object containing configuration parameters. mode: "train", "eval" or "inference". train_inception: Whether the inception submodel variables are trainable. """ assert mode in ["train", "eval", "inference"] self.config = config self.mode = mode self.train_inception = train_inception # Reader for the input data. self.reader = tf.TFRecordReader() # To match the "Show and Tell" paper we initialize all variables with a # random uniform initializer. self.initializer = tf.random_uniform_initializer( minval=-self.config.initializer_scale, maxval=self.config.initializer_scale) # A float32 Tensor with shape [batch_size, num_images, height, width, channels]. self.images = None # An int32 0/1 Tensor with shape [batch_size, padded_length]. self.input_mask = None # A float32 Tensor with shape [batch_size, num_images, embedding_size]. self.image_embeddings = None # A float32 scalar Tensor; the total loss for the trainer to optimize. self.total_loss = None # Collection of variables from the inception submodel. self.inception_variables = [] # Function to restore the inception submodel from checkpoint. self.init_fn = None # Global step Tensor. self.global_step = None def is_training(self): """Returns true if the model is built for training mode.""" return self.mode == "train" def process_image(self, encoded_image, thread_id=0, image_idx=0): """Decodes and processes an image string. Args: encoded_image: A scalar string Tensor; the encoded image. thread_id: Preprocessing thread id used to select the ordering of color distortions. Returns: A float32 Tensor of shape [height, width, 3]; the processed image. """ return image_processing.process_image(encoded_image, is_training=self.is_training(), height=self.config.image_height, width=self.config.image_width, image_format=self.config.image_format, image_idx=image_idx) def build_inputs(self): """Input prefetching, preprocessing and batching. Outputs: images and seqs """ if self.mode == "inference": # In inference mode, images and inputs are fed via placeholders. image_feed = tf.placeholder(dtype=tf.string, shape=[], name="image_feed") # Process image and insert batch dimensions. image_feed = self.process_image(image_feed) # Process image and insert batch dimensions. image_seqs = tf.expand_dims(image_feed, 0) # No target sequences or input mask in inference mode. input_mask = tf.placeholder(dtype=tf.int64, shape=[1,8], # batch_size name="input_mask") else: # Prefetch serialized SequenceExample protos. input_queue = input_ops.prefetch_input_data( self.reader, self.config.input_file_pattern, is_training=self.is_training(), batch_size=self.config.batch_size, values_per_shard=self.config.values_per_input_shard, input_queue_capacity_factor=self.config.input_queue_capacity_factor, num_reader_threads=self.config.num_input_reader_threads) # Image processing and random distortion. Split across multiple threads # with each thread applying a slightly different distortion. # assert self.config.num_preprocess_threads % 2 == 0 images_and_captions = [] for thread_id in range(self.config.num_preprocess_threads): serialized_sequence_example = input_queue.dequeue() set_id, encoded_images, image_ids, captions, likes = ( input_ops.parse_sequence_example( serialized_sequence_example, set_id =self.config.set_id_name, image_feature=self.config.image_feature_name, image_index=self.config.image_index_name, caption_feature=self.config.caption_feature_name, number_set_images=self.config.number_set_images)) images = [] for i in range(self.config.number_set_images): images.append(self.process_image(encoded_images[i],image_idx=i)) images_and_captions.append([set_id, images, image_ids, captions, likes]) # Batch inputs. queue_capacity = (5 * self.config.num_preprocess_threads * self.config.batch_size) #(set_ids, image_seqs, image_ids, f_input_seqs, f_target_seqs, # b_input_seqs, b_target_seqs, input_mask, cap_seqs, cap_mask) = ( (set_ids, image_seqs, image_ids, input_mask, loss_mask, cap_seqs, cap_mask, likes) = ( input_ops.batch_with_dynamic_pad(images_and_captions, batch_size=self.config.batch_size, queue_capacity=queue_capacity)) self.images = image_seqs self.input_mask = input_mask def build_image_embeddings(self): """Builds the image model subgraph and generates image embeddings. Inputs: self.images Outputs: self.image_embeddings """ # Reshape 5D image tensor. images = tf.reshape(self.images, [-1, self.config.image_height, self.config.image_height, 3]) inception_output = image_embedding.inception_v3( images, trainable=self.train_inception, is_training=self.is_training()) self.inception_variables = tf.get_collection( tf.GraphKeys.VARIABLES, scope="InceptionV3") # Map inception output into embedding space. with tf.variable_scope("image_embedding") as scope: image_embeddings = tf.contrib.layers.fully_connected( inputs=inception_output, num_outputs=self.config.embedding_size, activation_fn=None, weights_initializer=self.initializer, biases_initializer=None, scope=scope) # Save the embedding size in the graph. tf.constant(self.config.embedding_size, name="embedding_size") self.image_embeddings = tf.reshape(image_embeddings, [tf.shape(self.images)[0], -1, self.config.embedding_size]) def build_model(self): """Builds the model. Inputs: self.image_embeddings self.seq_embeddings self.target_seqs (training and eval only) self.input_mask (training and eval only) Outputs: self.total_loss (training and eval only) self.target_cross_entropy_losses (training and eval only) self.target_cross_entropy_loss_weights (training and eval only) """ norm_image_embeddings = tf.nn.l2_normalize(self.image_embeddings, 2, name="norm_image_embeddings") if self.mode == "inference": pass else: # Select the valid siamese pairs. Hacky for now! emb_loss_mask = np.ones((self.config.number_set_images, self.config.number_set_images)) # Manually replicate for 8 times emb_loss_mask = block_diag(emb_loss_mask, emb_loss_mask, emb_loss_mask, emb_loss_mask, emb_loss_mask, emb_loss_mask, emb_loss_mask, emb_loss_mask, emb_loss_mask, emb_loss_mask) norm_image_embeddings = tf.reshape(norm_image_embeddings, [self.config.number_set_images * self.config.batch_size, self.config.embedding_size]) scores = tf.matmul(norm_image_embeddings, norm_image_embeddings, transpose_a=False, transpose_b=True, name="scores") posi_scores = tf.reduce_sum(tf.mul(scores, emb_loss_mask)) / np.sum(emb_loss_mask) emb_loss_mask = 1.0 - emb_loss_mask m = 0.8 # magin in Siamese network nega_scores = tf.maximum(tf.mul(scores, emb_loss_mask) - 0.8, 0.0) nega_scores = tf.reduce_sum(nega_scores) / np.sum(emb_loss_mask) # nega_scores = (tf.reduce_sum(nega_scores) - # m * np.sum(1 - emb_loss_mask)) / np.sum(emb_loss_mask) emb_batch_loss = tf.sub(nega_scores, posi_scores, name="emb_batch_loss") tf.contrib.losses.add_loss(emb_batch_loss) # Merge all losses and stats. total_loss = tf.contrib.losses.get_total_loss() # Add summaries. tf.scalar_summary("emb_batch_loss", emb_batch_loss) tf.scalar_summary("total_loss", total_loss) for var in tf.trainable_variables(): tf.histogram_summary(var.op.name, var) self.total_loss = total_loss def setup_inception_initializer(self): """Sets up the function to restore inception variables from checkpoint.""" if self.mode != "inference": # Restore inception variables only. saver = tf.train.Saver(self.inception_variables) def restore_fn(sess): tf.logging.info("Restoring Inception variables from checkpoint file %s", self.config.inception_checkpoint_file) saver.restore(sess, self.config.inception_checkpoint_file) self.init_fn = restore_fn def setup_global_step(self): """Sets up the global step Tensor.""" global_step = tf.Variable( initial_value=0, name="global_step", trainable=False, collections=[tf.GraphKeys.GLOBAL_STEP, tf.GraphKeys.VARIABLES]) self.global_step = global_step def build(self): """Creates all ops for training and evaluation.""" self.build_inputs() self.build_image_embeddings() self.build_model() self.setup_inception_initializer() self.setup_global_step() ================================================ FILE: polyvore/polyvore_model_vse.py ================================================ # Copyright 2017 Xintong Han. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== """ Polyvore model used in ACM MM"17 paper "Learning Fashion Compatibility with Bidirectional LSTMs" Link: https://arxiv.org/pdf/1707.05691.pdf """ from __future__ import absolute_import from __future__ import division from __future__ import print_function import numpy as np import tensorflow as tf from ops import image_embedding from ops import image_processing from ops import inputs as input_ops class PolyvoreModel(object): """ Model for fashion set on Polyvore dataset. """ def __init__(self, config, mode, train_inception=False): """Basic setup. Args: config: Object containing configuration parameters. mode: "train", "eval" or "inference". train_inception: Whether the inception submodel variables are trainable. """ assert mode in ["train", "eval", "inference"] self.config = config self.mode = mode self.train_inception = train_inception # Reader for the input data. self.reader = tf.TFRecordReader() # To match the "Show and Tell" paper we initialize all variables with a # random uniform initializer. self.initializer = tf.random_uniform_initializer( minval=-self.config.initializer_scale, maxval=self.config.initializer_scale) # A float32 Tensor with shape # [batch_size, num_images, height, width, channels]. # num_images is the number of images in one outfit, default is 8. self.images = None # Forward RNN input and target sequences. # An int32 Tensor with shape [batch_size, padded_length]. self.f_input_seqs = None # An int32 Tensor with shape [batch_size, padded_length]. self.f_target_seqs = None # Backward RNN input and target sequences. # An int32 Tensor with shape [batch_size, padded_length]. self.b_input_seqs = None # An int32 Tensor with shape [batch_size, padded_length]. self.b_target_seqs = None # An int32 0/1 Tensor with shape [batch_size, padded_length]. self.input_mask = None # Image caption sequence and masks. # An int32 Tensor with shape [batch_size, num_images, padded_length]. self.cap_seqs = None # An int32 0/1 Tensor with shape [batch_size, padded_length]. self.cap_mask = None # Caption sequence embeddings, we use simple bag of word model. # A float32 Tensor with shape [batch_size, num_images, embedding_size]. self.seq_embeddings = None # Image embeddings in the joint visual-semantic space # A float32 Tensor with shape [batch_size, num_images, embedding_size]. self.image_embeddings = None # Image embeddings in the RNN output/prediction space. self.rnn_image_embeddings = None # Word embedding map. self.embedding_map = None # A float32 scalar Tensor; the total loss for the trainer to optimize. self.total_loss = None # Forward and backward RNN loss. # A float32 Tensor with shape [batch_size * padded_length]. self.forward_losses = None # A float32 Tensor with shape [batch_size * padded_length]. self.backward_losses = None # RNN loss, forward + backward. self.lstm_losses = None # Loss mask for lstm loss. self.loss_mask = None # Visual Semantic Embedding loss. # A float32 Tensor with shape [batch_size * padded_length]. self.emb_losses = None # A float32 Tensor with shape [batch_size * padded_length]. self.target_weights = None # Collection of variables from the inception submodel. self.inception_variables = [] # Function to restore the inception submodel from checkpoint. self.init_fn = None # Global step Tensor. self.global_step = None def is_training(self): """Returns true if the model is built for training mode.""" return self.mode == "train" def process_image(self, encoded_image, thread_id=0, image_idx=0): """Decodes and processes an image string. Args: encoded_image: A scalar string Tensor; the encoded image. thread_id: Preprocessing thread id used to select the ordering of color distortions. Not used in our model. image_idx: Index of the image in an outfit. Only used for summaries. Returns: A float32 Tensor of shape [height, width, 3]; the processed image. """ return image_processing.process_image(encoded_image, is_training=self.is_training(), height=self.config.image_height, width=self.config.image_width, image_format=self.config.image_format, image_idx=image_idx) def build_inputs(self): """Input prefetching, preprocessing and batching. Outputs: Inputs of the model. """ if self.mode == "inference": # In inference mode, images and inputs are fed via placeholders. image_feed = tf.placeholder(dtype=tf.string, shape=[], name="image_feed") # Process image and insert batch dimensions. image_feed = self.process_image(image_feed) input_feed = tf.placeholder(dtype=tf.int64, shape=[None], # batch_size name="input_feed") # Process image and insert batch dimensions. image_seqs = tf.expand_dims(image_feed, 0) cap_seqs = tf.expand_dims(input_feed, 1) # No target sequences or input mask in inference mode. input_mask = tf.placeholder(dtype=tf.int64, shape=[1, 8], # batch_size name="input_mask") cap_mask = None loss_mask = None set_ids = None else: # Prefetch serialized SequenceExample protos. input_queue = input_ops.prefetch_input_data( self.reader, self.config.input_file_pattern, is_training=self.is_training(), batch_size=self.config.batch_size, values_per_shard=self.config.values_per_input_shard, input_queue_capacity_factor=self.config.input_queue_capacity_factor, num_reader_threads=self.config.num_input_reader_threads) # Image processing and random distortion. Split across multiple threads # with each thread applying a slightly different distortion. But we only # use one thread in our Polyvore model. likes are not used. images_and_captions = [] for thread_id in range(self.config.num_preprocess_threads): serialized_sequence_example = input_queue.dequeue() set_id, encoded_images, image_ids, captions, likes = ( input_ops.parse_sequence_example( serialized_sequence_example, set_id =self.config.set_id_name, image_feature=self.config.image_feature_name, image_index=self.config.image_index_name, caption_feature=self.config.caption_feature_name, number_set_images=self.config.number_set_images)) images = [] for i in range(self.config.number_set_images): images.append(self.process_image(encoded_images[i],image_idx=i)) images_and_captions.append([set_id, images, image_ids, captions, likes]) # Batch inputs. queue_capacity = (5 * self.config.num_preprocess_threads * self.config.batch_size) (set_ids, image_seqs, image_ids, input_mask, loss_mask, cap_seqs, cap_mask, likes) = ( input_ops.batch_with_dynamic_pad(images_and_captions, batch_size=self.config.batch_size, queue_capacity=queue_capacity)) self.images = image_seqs self.input_mask = input_mask self.loss_mask = loss_mask self.cap_seqs = cap_seqs self.cap_mask = cap_mask self.set_ids = set_ids def build_image_embeddings(self): """Builds the image model subgraph and generates image embeddings in visual semantic joint space and RNN prediction space. Inputs: self.images Outputs: self.image_embeddings self.rnn_image_embeddings """ # Reshape 5D image tensor. images = tf.reshape(self.images, [-1, self.config.image_height, self.config.image_height, 3]) inception_output = image_embedding.inception_v3( images, trainable=self.train_inception, is_training=self.is_training()) self.inception_variables = tf.get_collection( tf.GraphKeys.VARIABLES, scope="InceptionV3") # Map inception output into embedding space. with tf.variable_scope("image_embedding") as scope: image_embeddings = tf.contrib.layers.fully_connected( inputs=inception_output, num_outputs=self.config.embedding_size, activation_fn=None, weights_initializer=self.initializer, biases_initializer=None, scope=scope) # Save the embedding size in the graph. tf.constant(self.config.embedding_size, name="embedding_size") self.image_embeddings = tf.reshape(image_embeddings, [tf.shape(self.images)[0], -1, self.config.embedding_size]) def build_seq_embeddings(self): """Builds the input sequence embeddings. Inputs: self.input_seqs Outputs: self.seq_embeddings self.embedding_map """ with tf.variable_scope("seq_embedding"), tf.device("/cpu:0"): embedding_map = tf.get_variable( name="map", shape=[self.config.vocab_size, self.config.embedding_size], initializer=self.initializer) seq_embeddings = tf.nn.embedding_lookup(embedding_map, self.cap_seqs) # Average pooling the seq_embeddings (bag of words). if self.mode != "inference": seq_embeddings = tf.batch_matmul( tf.cast(tf.expand_dims(self.cap_mask, 2), tf.float32), seq_embeddings) seq_embeddings = tf.squeeze(seq_embeddings, [2]) self.embedding_map = embedding_map self.seq_embeddings = seq_embeddings def build_model(self): """Builds the model. The original code is written with Tensorflow r0.10 for Tensorflow > r1.0, many functions can be simplified. For example Tensors support slicing now, so no need to use tf.slice() """ norm_image_embeddings = tf.nn.l2_normalize(self.image_embeddings, 2, name="norm_image_embeddings") norm_seq_embeddings = tf.nn.l2_normalize(self.seq_embeddings, 2) norm_seq_embeddings = ( tf.pad(norm_seq_embeddings, [[0, 0], [0, self.config.number_set_images - tf.shape(norm_seq_embeddings)[1]], [0, 0]], name="norm_seq_embeddings")) if self.mode == "inference": pass else: # Compute losses for joint embedding. # Only look at the captions that have length >= 2. emb_loss_mask = tf.greater(tf.reduce_sum(self.cap_mask, 2), 1) # Image mask is padded it to max length. emb_loss_mask = tf.pad(emb_loss_mask, [[0,0], [0, self.config.number_set_images - tf.shape(emb_loss_mask)[1]]]) # Select the valid image-caption pair. emb_loss_mask = tf.reshape(emb_loss_mask, [-1]) norm_image_embeddings = tf.reshape(norm_image_embeddings, [self.config.number_set_images * self.config.batch_size, self.config.embedding_size]) norm_image_embeddings = tf.boolean_mask(norm_image_embeddings, emb_loss_mask) norm_seq_embeddings = tf.reshape(norm_seq_embeddings, [self.config.number_set_images * self.config.batch_size, self.config.embedding_size]) norm_seq_embeddings = tf.boolean_mask(norm_seq_embeddings, emb_loss_mask) # The following defines contrastive loss in the joint space. # Reference: https://github.com/ryankiros/visual-semantic-embedding/blob/master/model.py#L39 scores = tf.matmul(norm_seq_embeddings, norm_image_embeddings, transpose_a=False, transpose_b=True, name="scores") diagonal = tf.expand_dims(tf.diag_part(scores), 1) cost_s = tf.maximum(0.0, self.config.emb_margin - diagonal + scores) cost_im = tf.maximum(0.0, self.config.emb_margin - tf.transpose(diagonal) + scores) cost_s = cost_s - tf.diag(tf.diag_part(cost_s)) cost_im = cost_im - tf.diag(tf.diag_part(cost_im)) emb_batch_loss = tf.reduce_sum(cost_s) + tf.reduce_sum(cost_im) emb_batch_loss = (emb_batch_loss / tf.cast(tf.shape(norm_seq_embeddings)[0], tf.float32) ** 2) tf.contrib.losses.add_loss(emb_batch_loss * self.config.emb_loss_factor) total_loss = tf.contrib.losses.get_total_loss() # Add summaries. tf.scalar_summary("emb_batch_loss", emb_batch_loss) tf.scalar_summary("total_loss", total_loss) for var in tf.trainable_variables(): tf.histogram_summary(var.op.name, var) weights = tf.to_float(tf.reshape(emb_loss_mask, [-1])) self.loss_mask = loss_mask self.input_mask = input_mask self.total_loss = total_loss self.emb_losses = emb_batch_loss # Used in evaluation. def setup_inception_initializer(self): """Sets up the function to restore inception variables from checkpoint.""" if self.mode != "inference": # Restore inception variables only. saver = tf.train.Saver(self.inception_variables) def restore_fn(sess): tf.logging.info("Restoring Inception variables from checkpoint %s" % self.config.inception_checkpoint_file) saver.restore(sess, self.config.inception_checkpoint_file) self.init_fn = restore_fn def setup_global_step(self): """Sets up the global step Tensor.""" global_step = tf.Variable( initial_value=0, name="global_step", trainable=False, collections=[tf.GraphKeys.GLOBAL_STEP, tf.GraphKeys.VARIABLES]) self.global_step = global_step def build(self): """Creates all ops for training and evaluation.""" self.build_inputs() self.build_image_embeddings() self.build_seq_embeddings() self.build_model() self.setup_inception_initializer() self.setup_global_step() ================================================ FILE: polyvore/run_inference.py ================================================ # Copyright 2017 Xintong Han. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== """Run the inference of Bi-LSTM model given input images.""" from __future__ import absolute_import from __future__ import division from __future__ import print_function import os import json import tensorflow as tf import pickle as pkl import numpy as np import configuration import polyvore_model_bi as polyvore_model FLAGS = tf.flags.FLAGS tf.flags.DEFINE_string("checkpoint_path", "", "Model checkpoint file or directory containing a " "model checkpoint file.") tf.flags.DEFINE_string("json_file", "data/label/test-no-dup.json", "Json file containing the inference data.") tf.flags.DEFINE_string("image_dir", "data/images", "Directory containing images.") tf.flags.DEFINE_string("feature_file", "data/features/test_features.pkl", "Directory to save the features") tf.flags.DEFINE_string("rnn_type", "", "Type of RNN.") def main(_): if os.path.isfile(FLAGS.feature_file): print("Feature file already exist.") return # Build the inference graph. g = tf.Graph() with g.as_default(): model_config = configuration.ModelConfig() model_config.rnn_type = FLAGS.rnn_type model = polyvore_model.PolyvoreModel(model_config, mode="inference") model.build() saver = tf.train.Saver() g.finalize() sess = tf.Session(graph=g) saver.restore(sess, FLAGS.checkpoint_path) test_json = json.load(open(FLAGS.json_file)) k = 0 # Save image ids and features in a dictionary. test_features = dict() for image_set in test_json: set_id = image_set["set_id"] image_feat = [] image_rnn_feat = [] ids = [] k = k + 1 print(str(k) + " : " + set_id) for image in image_set["items"]: filename = os.path.join(FLAGS.image_dir, set_id, str(image["index"]) + ".jpg") with tf.gfile.GFile(filename, "r") as f: image_feed = f.read() [feat, rnn_feat] = sess.run([model.image_embeddings, model.rnn_image_embeddings], feed_dict={"image_feed:0": image_feed}) image_name = set_id + "_" + str(image["index"]) test_features[image_name] = dict() test_features[image_name]["image_feat"] = np.squeeze(feat) test_features[image_name]["image_rnn_feat"] = np.squeeze(rnn_feat) with open(FLAGS.feature_file, "wb") as f: pkl.dump(test_features, f) if __name__ == "__main__": tf.app.run() ================================================ FILE: polyvore/run_inference_siamese.py ================================================ # Copyright 2017 Xintong Han. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== """Run the inference of Siamese Network given input images.""" from __future__ import absolute_import from __future__ import division from __future__ import print_function import os import json import tensorflow as tf import pickle as pkl import numpy as np import configuration import polyvore_model_siamese as polyvore_model FLAGS = tf.flags.FLAGS tf.flags.DEFINE_string("checkpoint_path", "", "Model checkpoint file or directory containing a " "model checkpoint file.") tf.flags.DEFINE_string("json_file", "data/label/test-no-dup.json", "Json file containing the inference data.") tf.flags.DEFINE_string("image_dir", "data/images", "Directory containing images.") tf.flags.DEFINE_string("feature_file", "data/features/test_features_siamese.pkl", "Directory to save the features") def main(_): if os.path.isfile(FLAGS.feature_file): print("Feature file already exist.") return # Build the inference graph. g = tf.Graph() with g.as_default(): model_config = configuration.ModelConfig() model = polyvore_model.PolyvoreModel(model_config, mode="inference") model.build() saver = tf.train.Saver() g.finalize() sess = tf.Session(graph=g) saver.restore(sess, FLAGS.checkpoint_path) test_json = json.load(open(FLAGS.json_file)) k = 0 # Save image ids and features in a dictionary. test_features = dict() for image_set in test_json: set_id = image_set["set_id"] image_feat = [] image_rnn_feat = [] ids = [] k = k + 1 print(str(k) + " : " + set_id) for image in image_set["items"]: filename = os.path.join(FLAGS.image_dir, set_id, str(image["index"]) + ".jpg") with tf.gfile.GFile(filename, "r") as f: image_feed = f.read() [feat] = sess.run([model.image_embeddings], feed_dict={"image_feed:0": image_feed}) image_name = set_id + "_" + str(image["index"]) test_features[image_name] = dict() test_features[image_name]["image_feat"] = np.squeeze(feat) with open(FLAGS.feature_file, "wb") as f: pkl.dump(test_features, f) if __name__ == "__main__": tf.app.run() ================================================ FILE: polyvore/run_inference_vse.py ================================================ # Copyright 2017 Xintong Han. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== """Run the inference of Siamese Network given input images.""" from __future__ import absolute_import from __future__ import division from __future__ import print_function import os import json import tensorflow as tf import pickle as pkl import numpy as np import configuration import polyvore_model_vse as polyvore_model FLAGS = tf.flags.FLAGS tf.flags.DEFINE_string("checkpoint_path", "", "Model checkpoint file or directory containing a " "model checkpoint file.") tf.flags.DEFINE_string("json_file", "data/label/test-no-dup.json", "Json file containing the inference data.") tf.flags.DEFINE_string("image_dir", "data/images", "Directory containing images.") tf.flags.DEFINE_string("feature_file", "data/features/test_features_siamese.pkl", "Directory to save the features") def main(_): if os.path.isfile(FLAGS.feature_file): print("Feature file already exist.") return # Build the inference graph. g = tf.Graph() with g.as_default(): model_config = configuration.ModelConfig() model = polyvore_model.PolyvoreModel(model_config, mode="inference") model.build() saver = tf.train.Saver() g.finalize() sess = tf.Session(graph=g) saver.restore(sess, FLAGS.checkpoint_path) test_json = json.load(open(FLAGS.json_file)) k = 0 # Save image ids and features in a dictionary. test_features = dict() for image_set in test_json: set_id = image_set["set_id"] image_feat = [] image_rnn_feat = [] ids = [] k = k + 1 print(str(k) + " : " + set_id) for image in image_set["items"]: filename = os.path.join(FLAGS.image_dir, set_id, str(image["index"]) + ".jpg") with tf.gfile.GFile(filename, "r") as f: image_feed = f.read() [feat] = sess.run([model.image_embeddings], feed_dict={"image_feed:0": image_feed}) image_name = set_id + "_" + str(image["index"]) test_features[image_name] = dict() test_features[image_name]["image_feat"] = np.squeeze(feat) with open(FLAGS.feature_file, "wb") as f: pkl.dump(test_features, f) if __name__ == "__main__": tf.app.run() ================================================ FILE: polyvore/set_generation.py ================================================ # Copyright 2017 Xintong Han. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== r"""Given multimodal queries, complete the outfit wiht bi-LSTM and VSE model.""" from __future__ import absolute_import from __future__ import division from __future__ import print_function import json import math import os import pickle as pkl import tensorflow as tf import numpy as np import configuration import polyvore_model_bi as polyvore_model FLAGS = tf.flags.FLAGS tf.flags.DEFINE_string("checkpoint_path", "", "Model checkpoint file or directory containing a " "model checkpoint file.") tf.flags.DEFINE_string("image_dir", "", "Directory containing images.") tf.flags.DEFINE_string("feature_file", "", "File which contains the features.") tf.flags.DEFINE_string("word_dict_file", "", "File containing word list.") tf.flags.DEFINE_string("query_file", "", "A json file containing the query to generate outfit.") tf.flags.DEFINE_string("result_dir", "results", "Directory to save the results.") tf.flags.DEFINE_float("balance_factor", 2.0, "Trade off between image and text input." "Larger balance_factor encourages higher correlation with text query") def norm_row(a): """L2 normalize each row of a given set.""" try: return a / np.linalg.norm(a, axis=1)[:, np.newaxis] except: return a / np.linalg.norm(a) def rnn_one_step(sess, input_feed, lstm_state, direction='f'): """Run one step of the RNN.""" if direction == 'f': # Forward [lstm_state, lstm_output] = sess.run( fetches=['lstm/f_state:0', 'f_logits/f_logits/BiasAdd:0'], feed_dict={'lstm/f_input_feed:0': input_feed, 'lstm/f_state_feed:0': lstm_state}) else: # Backward [lstm_state, lstm_output] = sess.run( fetches=['lstm/b_state:0', 'b_logits/b_logits/BiasAdd:0'], feed_dict={'lstm/b_input_feed:0': input_feed, 'lstm/b_state_feed:0': lstm_state}) return lstm_state, lstm_output def run_forward_rnn(sess, test_idx, test_feat, num_lstm_units): """ Run forward RNN given a query.""" res_set = [] lstm_state = np.zeros([1, 2 * num_lstm_units]) for test_id in test_idx: input_feed = np.reshape(test_feat[test_id], [1, -1]) # Run first step with all zeros initial state. [lstm_state, lstm_output] = rnn_one_step( sess, input_feed, lstm_state, direction='f') # Maximum length of the outfit is set to 10. for step in range(10): curr_score = np.exp(np.dot(lstm_output, np.transpose(test_feat))) curr_score /= np.sum(curr_score) next_image = np.argsort(-curr_score)[0][0] # 0.00001 is used as a probablity threshold to stop the generation. # i.e, if the prob of end-of-set is larger than 0.00001, then stop. if next_image == test_feat.shape[0] - 1 or curr_score[0][-1] > 0.00001: # print('OVER') break else: input_feed = np.reshape(test_feat[next_image], [1, -1]) [lstm_state, lstm_output] = rnn_one_step( sess, input_feed, lstm_state, direction='f') res_set.append(next_image) return res_set def run_backward_rnn(sess, test_idx, test_feat, num_lstm_units): """ Run backward RNN given a query.""" res_set = [] lstm_state = np.zeros([1, 2 * num_lstm_units]) for test_id in reversed(test_idx): input_feed = np.reshape(test_feat[test_id], [1, -1]) [lstm_state, lstm_output] = rnn_one_step( sess, input_feed, lstm_state, direction='b') for step in range(10): curr_score = np.exp(np.dot(lstm_output, np.transpose(test_feat))) curr_score /= np.sum(curr_score) next_image = np.argsort(-curr_score)[0][0] # 0.00001 is used as a probablity threshold to stop the generation. # i.e, if the prob of end-of-set is larger than 0.00001, then stop. if next_image == test_feat.shape[0] - 1 or curr_score[0][-1] > 0.00001: # print('OVER') break else: input_feed = np.reshape(test_feat[next_image], [1, -1]) [lstm_state, lstm_output] = rnn_one_step( sess, input_feed, lstm_state, direction='b') res_set.append(next_image) return res_set def run_fill_rnn(sess, start_id, end_id, num_blank, test_feat, num_lstm_units): """Fill in the blanks between start and end.""" if num_blank == 0: return [start_id, end_id] lstm_f_outputs = [] lstm_state = np.zeros([1, 2 * num_lstm_units]) input_feed = np.reshape(test_feat[start_id], [1, -1]) [lstm_state, lstm_output] = rnn_one_step( sess, input_feed, lstm_state, direction='f') f_outputs = [] for i in range(num_blank): f_outputs.append(lstm_output[0]) curr_score = np.exp(np.dot(lstm_output, np.transpose(test_feat))) curr_score /= np.sum(curr_score) next_image = np.argsort(-curr_score)[0][0] input_feed = np.reshape(test_feat[next_image], [1, -1]) [lstm_state, lstm_output] = rnn_one_step( sess, input_feed, lstm_state, direction='f') lstm_state = np.zeros([1, 2 * num_lstm_units]) input_feed = np.reshape(test_feat[end_id], [1, -1]) [lstm_state, lstm_output] = rnn_one_step( sess, input_feed, lstm_state, direction='b') b_outputs = [] for i in range(num_blank): b_outputs.insert(0, lstm_output[0]) curr_score = np.exp(np.dot(lstm_output, np.transpose(test_feat))) curr_score /= np.sum(curr_score) next_image = np.argsort(-curr_score)[0][0] input_feed = np.reshape(test_feat[next_image], [1, -1]) [lstm_state, lstm_output] = rnn_one_step( sess, input_feed, lstm_state, direction='b') outputs = np.asarray(f_outputs) + np.asarray(b_outputs) score = np.exp(np.dot(outputs, np.transpose(test_feat))) score /= np.sum(score, axis=1)[:, np.newaxis] blank_ids = np.argmax(score, axis=1) return [start_id] + list(blank_ids) + [end_id] def run_set_inference(sess, set_name, test_ids, test_feat, num_lstm_units): test_idx = [] for name in set_name: try: test_idx.append(test_ids.index(name)) except: print('not found') return # dynamic search # run the whole bi-LSTM on the first item first_f_set = run_forward_rnn(sess, test_idx[:1], test_feat, num_lstm_units) first_b_set = run_backward_rnn(sess, test_idx[:1], test_feat, num_lstm_units) first_posi = len(first_b_set) first_set = first_b_set + test_idx[:1] + first_f_set image_set = [] for i in first_set: image_set.append(test_ids[i]) # # Write results into folder. # os.system('mkdir %s/%s' % (FLAGS.result_dir, 'first')) # for i, image in enumerate(image_set): # name = image.split('_') # os.system('cp %s/%s/%s.jpg %s/%s/%d_%s.jpg' % (FLAGS.image_dir, # name[0], name[1], FLAGS.result_dir, 'first', i, image)) if len(set_name) >= 2: current_set = norm_row(test_feat[first_set, :]) all_position = [first_posi] for test_id in test_idx[1:]: # gradually adding items into it # findng nn of the next item insert_posi = np.argmax( np.dot(norm_row(test_feat[test_id, :]), np.transpose(current_set))) all_position.append(insert_posi) # run bi LSTM to fill items between first item and this item start_posi = np.min(all_position) end_posi = np.max(all_position) sets = run_fill_rnn(sess, test_idx[0], test_idx[1], end_posi - start_posi - 1, test_feat, num_lstm_units) else: # run bi LSTM again sets = test_idx f_set = run_forward_rnn(sess, sets, test_feat, num_lstm_units) b_set = run_backward_rnn(sess, sets, test_feat, num_lstm_units) image_set = [] for i in b_set[::-1] + sets+f_set: image_set.append(test_ids[i]) # os.system('mkdir %s/%s' % (FLAGS.result_dir, 'final')) # for i, image in enumerate(image_set): # name = image.split('_') # os.system('cp %s/%s/%s.jpg %s/%s/%d_%s.jpg' % (FLAGS.image_dir, # name[0], name[1], FLAGS.result_dir, 'final', i, image)) return b_set[::-1] + sets + f_set def nn_search(i, test_emb, word_vec): # score = np.dot(test_emb, np.transpose(test_emb[i] + word_vec)) score = np.dot(test_emb, np.transpose(test_emb[i] + FLAGS.balance_factor * word_vec)) return np.argmax(score) def main(_): # Build the inference graph. g = tf.Graph() with g.as_default(): model_config = configuration.ModelConfig() model = polyvore_model.PolyvoreModel(model_config, mode="inference") model.build() saver = tf.train.Saver() g.finalize() with tf.Session() as sess: saver.restore(sess, FLAGS.checkpoint_path) with open(FLAGS.feature_file, "rb") as f: test_data = pkl.load(f) test_ids = test_data.keys() test_feat = np.zeros((len(test_ids) + 1, len(test_data[test_ids[0]]["image_rnn_feat"]))) test_emb = np.zeros((len(test_ids), len(test_data[test_ids[0]]["image_feat"]))) for i, test_id in enumerate(test_ids): # Image feature in the RNN space. test_feat[i] = test_data[test_id]["image_rnn_feat"] # Image feature in the joint embedding space. test_emb[i] = test_data[test_id]["image_feat"] test_emb = norm_row(test_emb) # load queries from JSON file queries = json.load(open(FLAGS.query_file)) # Get the word embedding. [word_emb] = sess.run([model.embedding_map]) # Read word name words = open(FLAGS.word_dict_file).read().splitlines() for i, w in enumerate(words): words[i] = w.split()[0] # Calculate the embedding of the word query # only run the first query for demo for q in queries[:1]: set_name = q['image_query'] print(set_name) # Run Bi-LSTM model using the image query. rnn_sets = run_set_inference(sess, set_name, test_ids, test_feat, model_config.num_lstm_units) print(rnn_sets) # Reranking the LSTM prediction with similarity with the text query word_query = str(q['text_query']) print(word_query) if word_query != "": # Get the indices of images. test_idx = [] for name in set_name: try: test_idx.append(test_ids.index(name)) except: print('not found') return # Calculate the word embedding word_query = [i+1 for i in range(len(words)) if words[i] in word_query.split()] print(word_query) query_emb = norm_row(np.sum(word_emb[word_query], axis=0)) for i, j in enumerate(rnn_sets): if j not in test_idx: rnn_sets[i] = nn_search(j, test_emb, query_emb) print(rnn_sets) # write images image_set = [] for i in rnn_sets: image_set.append(test_ids[i]) # write results # os.system('mkdir %s/%s' % (FLAGS.result_dir, 'emb_final')) # for i, image in enumerate(image_set): # name = image.split('_') # os.system('cp %s/%s/%s.jpg %s/%s/%d_%s.jpg' % (FLAGS.image_dir, # name[0], name[1], FLAGS.result_dir, 'emb_final', i, image)) for i, image in enumerate(image_set): name = image.split('_') os.system('cp %s/%s/%s.jpg %s/%d_%s.jpg' % (FLAGS.image_dir, name[0], name[1], FLAGS.result_dir, i, image)) if __name__ == "__main__": tf.app.run() ================================================ FILE: polyvore/train.py ================================================ # Copyright 2017 Xintong Han. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== """Train the model.""" from __future__ import absolute_import from __future__ import division from __future__ import print_function import tensorflow as tf import configuration import polyvore_model_bi as polyvore_model FLAGS = tf.app.flags.FLAGS tf.flags.DEFINE_string("input_file_pattern", "", "File pattern of sharded TFRecord input files.") tf.flags.DEFINE_string("inception_checkpoint_file", "", "Path to a pretrained inception_v3 model.") tf.flags.DEFINE_string("train_dir", "", "Directory for saving and loading model checkpoints.") tf.flags.DEFINE_boolean("train_inception", False, "Whether to train inception submodel variables.") tf.flags.DEFINE_integer("number_of_steps", 1000000, "Number of training steps.") tf.flags.DEFINE_integer("log_every_n_steps", 1, "Frequency at which loss and global step are logged.") tf.logging.set_verbosity(tf.logging.INFO) def main(unused_argv): assert FLAGS.input_file_pattern, "--input_file_pattern is required" assert FLAGS.train_dir, "--train_dir is required" model_config = configuration.ModelConfig() model_config.input_file_pattern = FLAGS.input_file_pattern model_config.inception_checkpoint_file = FLAGS.inception_checkpoint_file training_config = configuration.TrainingConfig() # Create training directory. train_dir = FLAGS.train_dir if not tf.gfile.IsDirectory(train_dir): tf.logging.info("Creating training directory: %s", train_dir) tf.gfile.MakeDirs(train_dir) # Build the TensorFlow graph. g = tf.Graph() with g.as_default(): # Build the model. model = polyvore_model.PolyvoreModel( model_config, mode="train", train_inception=FLAGS.train_inception) model.build() learning_rate = tf.constant(training_config.initial_learning_rate) learning_rate_decay_fn = None if training_config.learning_rate_decay_factor > 0: num_batches_per_epoch = (training_config.num_examples_per_epoch / model_config.batch_size) decay_steps = int(num_batches_per_epoch * training_config.num_epochs_per_decay) def _learning_rate_decay_fn(learning_rate, global_step): return tf.train.exponential_decay( learning_rate, global_step, decay_steps=decay_steps, decay_rate=training_config.learning_rate_decay_factor, staircase=True) learning_rate_decay_fn = _learning_rate_decay_fn # Set up the training ops. train_op = tf.contrib.layers.optimize_loss( loss=model.total_loss, global_step=model.global_step, learning_rate=learning_rate, optimizer=training_config.optimizer, clip_gradients=training_config.clip_gradients, learning_rate_decay_fn=learning_rate_decay_fn) # Set up the Saver for saving and restoring model checkpoints. saver = tf.train.Saver(max_to_keep=training_config.max_checkpoints_to_keep) # Run training. tf.contrib.slim.learning.train( train_op, train_dir, log_every_n_steps=FLAGS.log_every_n_steps, graph=g, global_step=model.global_step, number_of_steps=FLAGS.number_of_steps, init_fn=model.init_fn, saver=saver) if __name__ == "__main__": tf.app.run() ================================================ FILE: polyvore/train_siamese.py ================================================ # Copyright 2017 Xintong Han. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== """Train the Siamese Network.""" from __future__ import absolute_import from __future__ import division from __future__ import print_function import tensorflow as tf import configuration import polyvore_model_siamese as polyvore_model FLAGS = tf.app.flags.FLAGS tf.flags.DEFINE_string("input_file_pattern", "", "File pattern of sharded TFRecord input files.") tf.flags.DEFINE_string("inception_checkpoint_file", "", "Path to a pretrained inception_v3 model.") tf.flags.DEFINE_string("train_dir", "", "Directory for saving and loading model checkpoints.") tf.flags.DEFINE_boolean("train_inception", False, "Whether to train inception submodel variables.") tf.flags.DEFINE_integer("number_of_steps", 1000000, "Number of training steps.") tf.flags.DEFINE_integer("log_every_n_steps", 1, "Frequency at which loss and global step are logged.") tf.flags.DEFINE_float("learning_rate", 0.2, "Initial learning rate.") tf.flags.DEFINE_string("rnn_type", "", "Types of rnn, lstm, gru or basic rnn.") tf.logging.set_verbosity(tf.logging.INFO) def main(unused_argv): assert FLAGS.input_file_pattern, "--input_file_pattern is required" assert FLAGS.train_dir, "--train_dir is required" model_config = configuration.ModelConfig() model_config.input_file_pattern = FLAGS.input_file_pattern model_config.inception_checkpoint_file = FLAGS.inception_checkpoint_file training_config = configuration.TrainingConfig() # May use a different learning rate training_config.initial_learning_rate = FLAGS.learning_rate # Create training directory. train_dir = FLAGS.train_dir if not tf.gfile.IsDirectory(train_dir): tf.logging.info("Creating training directory: %s", train_dir) tf.gfile.MakeDirs(train_dir) # Build the TensorFlow graph. g = tf.Graph() with g.as_default(): # Build the model. model = polyvore_model.PolyvoreModel( model_config, mode="train", train_inception=FLAGS.train_inception) model.build() # Set up the learning rate. learning_rate = tf.constant(training_config.initial_learning_rate) learning_rate_decay_fn = None if training_config.learning_rate_decay_factor > 0: num_batches_per_epoch = (training_config.num_examples_per_epoch / model_config.batch_size) decay_steps = int(num_batches_per_epoch * training_config.num_epochs_per_decay) def _learning_rate_decay_fn(learning_rate, global_step): return tf.train.exponential_decay( learning_rate, global_step, decay_steps=decay_steps, decay_rate=training_config.learning_rate_decay_factor, staircase=True) learning_rate_decay_fn = _learning_rate_decay_fn # Set up the training ops. train_op = tf.contrib.layers.optimize_loss( loss=model.total_loss, global_step=model.global_step, learning_rate=learning_rate, optimizer=training_config.optimizer, clip_gradients=training_config.clip_gradients, learning_rate_decay_fn=learning_rate_decay_fn) # Set up the Saver for saving and restoring model checkpoints. saver = tf.train.Saver(max_to_keep=training_config.max_checkpoints_to_keep) # saver = tf.train.Saver(keep_checkpoint_every_n_hours=0.1) # Run training. tf.contrib.slim.learning.train( train_op, train_dir, log_every_n_steps=FLAGS.log_every_n_steps, graph=g, global_step=model.global_step, number_of_steps=FLAGS.number_of_steps, init_fn=model.init_fn, saver=saver) if __name__ == "__main__": tf.app.run() ================================================ FILE: predict_compatibility.sh ================================================ #!/bin/bash CHECKPOINT_DIR="model/model_final/model.ckpt-34865" python polyvore/fashion_compatibility.py \ --checkpoint_path=${CHECKPOINT_DIR} \ --label_file="data/label/fashion_compatibility_prediction.txt" \ --feature_file="data/features/test_features.pkl" \ --rnn_type="lstm" \ --direction="2" \ --result_file="fashion_compatibility.pkl" ================================================ FILE: query.json ================================================ [ { "image_query": [ "131138376_1", "131138376_3" ], "text_query": "blue" } ] ================================================ FILE: results/README.md ================================================ The generated outfit goes here. ================================================ FILE: train.sh ================================================ #!/bin/bash # Inception v3 checkpoint file. INCEPTION_CHECKPOINT="model/inception_v3.ckpt" # Directory to save the model. MODEL_DIR="model/bi_lstm/" # Run the training code. python polyvore/train.py \ --input_file_pattern="data/tf_records/train-no-dup-?????-of-00128" \ --inception_checkpoint_file="${INCEPTION_CHECKPOINT}" \ --train_dir="${MODEL_DIR}/train" \ --train_inception=true \ --number_of_steps=100000 # # Training Siamese Network # # Directory to save the model. # MODEL_DIR="model/siamese/" # # Run the training code. # python polyvore/train_siamese.py \ # --input_file_pattern="data/tf_records/train-no-dup-?????-of-00128" \ # --inception_checkpoint_file="${INCEPTION_CHECKPOINT}" \ # --train_dir="${MODEL_DIR}/train" \ # --train_inception=true \ # --number_of_steps=100000