Repository: MILVLG/openvqa
Branch: master
Commit: f8f9966f202d
Files: 95
Total size: 465.4 KB
Directory structure:
gitextract_8y0cfniw/
├── .gitignore
├── Dockerfile
├── LICENSE
├── README.md
├── configs/
│ ├── clevr/
│ │ └── mcan_small.yml
│ ├── gqa/
│ │ ├── ban_4.yml
│ │ ├── ban_8.yml
│ │ ├── butd.yml
│ │ ├── mcan_large.yml
│ │ └── mcan_small.yml
│ └── vqa/
│ ├── ban_4.yml
│ ├── ban_8.yml
│ ├── butd.yml
│ ├── mcan_large.yml
│ ├── mcan_small.yml
│ ├── mfb.yml
│ ├── mfh.yml
│ ├── mmnasnet_large.yml
│ └── mmnasnet_small.yml
├── data/
│ ├── clevr/
│ │ ├── clevr_extract_feat.py
│ │ ├── feats/
│ │ │ └── .gitkeep
│ │ └── raw/
│ │ └── .gitkeep
│ ├── gqa/
│ │ ├── feats/
│ │ │ └── .gitkeep
│ │ ├── gqa_feat_preproc.py
│ │ └── raw/
│ │ └── .gitkeep
│ └── vqa/
│ ├── feats/
│ │ └── .gitkeep
│ └── raw/
│ └── .gitkeep
├── docs/
│ ├── Makefile
│ ├── _source/
│ │ ├── _static/
│ │ │ ├── custom.css
│ │ │ ├── mathjax_mathml.user.js
│ │ │ └── mathjax_wikipedia.user.js
│ │ ├── advanced/
│ │ │ ├── adding_model.md
│ │ │ └── contributing.md
│ │ ├── basic/
│ │ │ ├── getting_started.md
│ │ │ ├── install.md
│ │ │ └── model_zoo.md
│ │ ├── conf.py
│ │ └── index.rst
│ ├── _templates/
│ │ └── layout.html
│ ├── make.bat
│ ├── readme.md
│ └── requirements.txt
├── openvqa/
│ ├── core/
│ │ ├── base_cfgs.py
│ │ ├── base_dataset.py
│ │ └── path_cfgs.py
│ ├── datasets/
│ │ ├── clevr/
│ │ │ ├── clevr_loader.py
│ │ │ └── eval/
│ │ │ └── result_eval.py
│ │ ├── dataset_loader.py
│ │ ├── gqa/
│ │ │ ├── dicts.json
│ │ │ ├── eval/
│ │ │ │ ├── gqa_eval.py
│ │ │ │ └── result_eval.py
│ │ │ └── gqa_loader.py
│ │ └── vqa/
│ │ ├── answer_dict.json
│ │ ├── eval/
│ │ │ ├── result_eval.py
│ │ │ ├── vqa.py
│ │ │ └── vqaEval.py
│ │ └── vqa_loader.py
│ ├── models/
│ │ ├── ban/
│ │ │ ├── adapter.py
│ │ │ ├── ban.py
│ │ │ ├── model_cfgs.py
│ │ │ └── net.py
│ │ ├── butd/
│ │ │ ├── adapter.py
│ │ │ ├── model_cfgs.py
│ │ │ ├── net.py
│ │ │ └── tda.py
│ │ ├── mcan/
│ │ │ ├── adapter.py
│ │ │ ├── mca.py
│ │ │ ├── model_cfgs.py
│ │ │ └── net.py
│ │ ├── mfb/
│ │ │ ├── adapter.py
│ │ │ ├── mfb.py
│ │ │ ├── model_cfgs.py
│ │ │ └── net.py
│ │ ├── mmnasnet/
│ │ │ ├── adapter.py
│ │ │ ├── model_cfgs.py
│ │ │ ├── nasnet.py
│ │ │ └── net.py
│ │ └── model_loader.py
│ ├── ops/
│ │ ├── fc.py
│ │ └── layer_norm.py
│ └── utils/
│ ├── ans_punct.py
│ ├── feat_filter.py
│ ├── make_mask.py
│ └── optim.py
├── requirements.txt
├── results/
│ ├── cache/
│ │ └── .gitkeep
│ ├── log/
│ │ └── .gitkeep
│ ├── pred/
│ │ └── .gitkeep
│ └── result_test/
│ └── .gitkeep
├── run.py
└── utils/
├── exec.py
├── proc_dict_gqa.py
├── proc_dict_vqa.py
├── test_engine.py
└── train_engine.py
================================================
FILE CONTENTS
================================================
================================================
FILE: .gitignore
================================================
ckpts/
results/cache/*.json
results/cache/*.txt
results/result_test/*.json
results/result_test/*.txt
results/pred/*.pkl
results/log/*.txt
data/clevr/raw/images/
data/clevr/raw/questions/
data/clevr/raw/scenes/
data/clevr/feats/train/
data/clevr/feats/val/
data/clevr/feats/test/
data/gqa/raw/eval/
data/gqa/raw/questions1.2/
data/gqa/raw/sceneGraphs/
data/gqa/feats/gqa-frcn/
data/gqa/feats/gqa-grid/
data/vqa/raw/*.json
data/vqa/feats/train2014/
data/vqa/feats/val2014/
data/vqa/feats/test2015/
.idea/
# Byte-compiled / optimized / DLL files
__pycache__/
*.py[cod]
*$py.class
# C extensions
*.so
# Distribution / packaging
.Python
build/
develop-eggs/
dist/
downloads/
eggs/
.eggs/
lib/
lib64/
parts/
sdist/
var/
wheels/
pip-wheel-metadata/
share/python-wheels/
*.egg-info/
.installed.cfg
*.egg
MANIFEST
# PyInstaller
# Usually these files are written by a python script from a template
# before PyInstaller builds the exe, so as to inject date/other infos into it.
*.manifest
*.spec
# Installer logs
pip-log.txt
pip-delete-this-directory.txt
# Unit test / coverage reports
htmlcov/
.tox/
.nox/
.coverage
.coverage.*
.cache
nosetests.xml
coverage.xml
*.cover
.hypothesis/
.pytest_cache/
# Translations
*.mo
*.pot
# Django stuff:
*.log
local_settings.py
db.sqlite3
db.sqlite3-journal
# Flask stuff:
instance/
.webassets-cache
# Scrapy stuff:
.scrapy
# Sphinx documentation
docs/_build/
# PyBuilder
target/
# Jupyter Notebook
.ipynb_checkpoints
# IPython
profile_default/
ipython_config.py
# pyenv
.python-version
# pipenv
# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
# However, in case of collaboration, if having platform-specific dependencies or dependencies
# having no cross-platform support, pipenv may install dependencies that don't work, or not
# install all needed dependencies.
#Pipfile.lock
# celery beat schedule file
celerybeat-schedule
# SageMath parsed files
*.sage.py
# Environments
.env
.venv
env/
venv/
ENV/
env.bak/
venv.bak/
# Spyder project settings
.spyderproject
.spyproject
# Rope project settings
.ropeproject
# mkdocs documentation
/site
# mypy
.mypy_cache/
.dmypy.json
dmypy.json
# Pyre type checker
.pyre/
# Microsoft
.vscode
.vscode/
.vs
.vs/
================================================
FILE: Dockerfile
================================================
FROM nvidia/cuda:10.0-cudnn7-devel-ubuntu16.04
# install anaconda 5.2.0
ENV LANG=C.UTF-8 LC_ALL=C.UTF-8
ENV PATH /opt/conda/bin:$PATH
RUN apt-get update --fix-missing && apt-get install -y wget bzip2 ca-certificates \
libglib2.0-0 libxext6 libsm6 libxrender1 \
git mercurial subversion
RUN wget --quiet https://repo.anaconda.com/archive/Anaconda3-5.2.0-Linux-x86_64.sh -O ~/anaconda.sh && \
/bin/bash ~/anaconda.sh -b -p /opt/conda && \
rm ~/anaconda.sh && \
ln -s /opt/conda/etc/profile.d/conda.sh /etc/profile.d/conda.sh && \
echo ". /opt/conda/etc/profile.d/conda.sh" >> ~/.bashrc && \
echo "conda activate base" >> ~/.bashrc
RUN apt-get install -y curl grep sed dpkg && \
TINI_VERSION=`curl https://github.com/krallin/tini/releases/latest | grep -o "/v.*\"" | sed 's:^..\(.*\).$:\1:'` && \
curl -L "https://github.com/krallin/tini/releases/download/v${TINI_VERSION}/tini_${TINI_VERSION}.deb" > tini.deb && \
dpkg -i tini.deb && \
rm tini.deb && \
apt-get clean
ENTRYPOINT [ "/usr/bin/tini", "--" ]
CMD [ "/bin/bash" ]
# install pytorch 1.1 and cudatoolkit
RUN conda install pytorch==1.1.0 torchvision==0.3.0 cudatoolkit=10.0 -c pytorch
# clone and install openvqa dependencies
RUN mkdir /workspace && \
cd /workspace && \
git clone https://github.com/MILVLG/openvqa.git && \
cd openvqa &&\
pip install -r requirements.txt && \
wget https://github.com/explosion/spacy-models/releases/download/en_vectors_web_lg-2.1.0/en_vectors_web_lg-2.1.0.tar.gz -O en_vectors_web_lg-2.1.0.tar.gz && \
pip install en_vectors_web_lg-2.1.0.tar.gz && \
rm en_vectors_web_lg-2.1.0.tar.gz && \
cd /
# delete openvqa repo
RUN rm -r /workspace/openvqa
WORKDIR /workspace
================================================
FILE: LICENSE
================================================
Apache License
Version 2.0, January 2004
http://www.apache.org/licenses/
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
1. Definitions.
"License" shall mean the terms and conditions for use, reproduction,
and distribution as defined by Sections 1 through 9 of this document.
"Licensor" shall mean the copyright owner or entity authorized by
the copyright owner that is granting the License.
"Legal Entity" shall mean the union of the acting entity and all
other entities that control, are controlled by, or are under common
control with that entity. For the purposes of this definition,
"control" means (i) the power, direct or indirect, to cause the
direction or management of such entity, whether by contract or
otherwise, or (ii) ownership of fifty percent (50%) or more of the
outstanding shares, or (iii) beneficial ownership of such entity.
"You" (or "Your") shall mean an individual or Legal Entity
exercising permissions granted by this License.
"Source" form shall mean the preferred form for making modifications,
including but not limited to software source code, documentation
source, and configuration files.
"Object" form shall mean any form resulting from mechanical
transformation or translation of a Source form, including but
not limited to compiled object code, generated documentation,
and conversions to other media types.
"Work" shall mean the work of authorship, whether in Source or
Object form, made available under the License, as indicated by a
copyright notice that is included in or attached to the work
(an example is provided in the Appendix below).
"Derivative Works" shall mean any work, whether in Source or Object
form, that is based on (or derived from) the Work and for which the
editorial revisions, annotations, elaborations, or other modifications
represent, as a whole, an original work of authorship. For the purposes
of this License, Derivative Works shall not include works that remain
separable from, or merely link (or bind by name) to the interfaces of,
the Work and Derivative Works thereof.
"Contribution" shall mean any work of authorship, including
the original version of the Work and any modifications or additions
to that Work or Derivative Works thereof, that is intentionally
submitted to Licensor for inclusion in the Work by the copyright owner
or by an individual or Legal Entity authorized to submit on behalf of
the copyright owner. For the purposes of this definition, "submitted"
means any form of electronic, verbal, or written communication sent
to the Licensor or its representatives, including but not limited to
communication on electronic mailing lists, source code control systems,
and issue tracking systems that are managed by, or on behalf of, the
Licensor for the purpose of discussing and improving the Work, but
excluding communication that is conspicuously marked or otherwise
designated in writing by the copyright owner as "Not a Contribution."
"Contributor" shall mean Licensor and any individual or Legal Entity
on behalf of whom a Contribution has been received by Licensor and
subsequently incorporated within the Work.
2. Grant of Copyright License. Subject to the terms and conditions of
this License, each Contributor hereby grants to You a perpetual,
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
copyright license to reproduce, prepare Derivative Works of,
publicly display, publicly perform, sublicense, and distribute the
Work and such Derivative Works in Source or Object form.
3. Grant of Patent License. Subject to the terms and conditions of
this License, each Contributor hereby grants to You a perpetual,
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
(except as stated in this section) patent license to make, have made,
use, offer to sell, sell, import, and otherwise transfer the Work,
where such license applies only to those patent claims licensable
by such Contributor that are necessarily infringed by their
Contribution(s) alone or by combination of their Contribution(s)
with the Work to which such Contribution(s) was submitted. If You
institute patent litigation against any entity (including a
cross-claim or counterclaim in a lawsuit) alleging that the Work
or a Contribution incorporated within the Work constitutes direct
or contributory patent infringement, then any patent licenses
granted to You under this License for that Work shall terminate
as of the date such litigation is filed.
4. Redistribution. You may reproduce and distribute copies of the
Work or Derivative Works thereof in any medium, with or without
modifications, and in Source or Object form, provided that You
meet the following conditions:
(a) You must give any other recipients of the Work or
Derivative Works a copy of this License; and
(b) You must cause any modified files to carry prominent notices
stating that You changed the files; and
(c) You must retain, in the Source form of any Derivative Works
that You distribute, all copyright, patent, trademark, and
attribution notices from the Source form of the Work,
excluding those notices that do not pertain to any part of
the Derivative Works; and
(d) If the Work includes a "NOTICE" text file as part of its
distribution, then any Derivative Works that You distribute must
include a readable copy of the attribution notices contained
within such NOTICE file, excluding those notices that do not
pertain to any part of the Derivative Works, in at least one
of the following places: within a NOTICE text file distributed
as part of the Derivative Works; within the Source form or
documentation, if provided along with the Derivative Works; or,
within a display generated by the Derivative Works, if and
wherever such third-party notices normally appear. The contents
of the NOTICE file are for informational purposes only and
do not modify the License. You may add Your own attribution
notices within Derivative Works that You distribute, alongside
or as an addendum to the NOTICE text from the Work, provided
that such additional attribution notices cannot be construed
as modifying the License.
You may add Your own copyright statement to Your modifications and
may provide additional or different license terms and conditions
for use, reproduction, or distribution of Your modifications, or
for any such Derivative Works as a whole, provided Your use,
reproduction, and distribution of the Work otherwise complies with
the conditions stated in this License.
5. Submission of Contributions. Unless You explicitly state otherwise,
any Contribution intentionally submitted for inclusion in the Work
by You to the Licensor shall be under the terms and conditions of
this License, without any additional terms or conditions.
Notwithstanding the above, nothing herein shall supersede or modify
the terms of any separate license agreement you may have executed
with Licensor regarding such Contributions.
6. Trademarks. This License does not grant permission to use the trade
names, trademarks, service marks, or product names of the Licensor,
except as required for reasonable and customary use in describing the
origin of the Work and reproducing the content of the NOTICE file.
7. Disclaimer of Warranty. Unless required by applicable law or
agreed to in writing, Licensor provides the Work (and each
Contributor provides its Contributions) on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
implied, including, without limitation, any warranties or conditions
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
PARTICULAR PURPOSE. You are solely responsible for determining the
appropriateness of using or redistributing the Work and assume any
risks associated with Your exercise of permissions under this License.
8. Limitation of Liability. In no event and under no legal theory,
whether in tort (including negligence), contract, or otherwise,
unless required by applicable law (such as deliberate and grossly
negligent acts) or agreed to in writing, shall any Contributor be
liable to You for damages, including any direct, indirect, special,
incidental, or consequential damages of any character arising as a
result of this License or out of the use or inability to use the
Work (including but not limited to damages for loss of goodwill,
work stoppage, computer failure or malfunction, or any and all
other commercial damages or losses), even if such Contributor
has been advised of the possibility of such damages.
9. Accepting Warranty or Additional Liability. While redistributing
the Work or Derivative Works thereof, You may choose to offer,
and charge a fee for, acceptance of support, warranty, indemnity,
or other liability obligations and/or rights consistent with this
License. However, in accepting such obligations, You may act only
on Your own behalf and on Your sole responsibility, not on behalf
of any other Contributor, and only if You agree to indemnify,
defend, and hold each Contributor harmless for any liability
incurred by, or claims asserted against, such Contributor by reason
of your accepting any such warranty or additional liability.
END OF TERMS AND CONDITIONS
APPENDIX: How to apply the Apache License to your work.
To apply the Apache License to your work, attach the following
boilerplate notice, with the fields enclosed by brackets "[]"
replaced with your own identifying information. (Don't include
the brackets!) The text should be enclosed in the appropriate
comment syntax for the file format. We also recommend that a
file or class name and description of purpose be included on the
same "printed page" as the copyright notice for easier
identification within third-party archives.
Copyright [2019] [Vision and Language Group@ MIL]
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
================================================
FILE: README.md
================================================
# OpenVQA
OpenVQA is a general platform for visual question ansering (VQA) research, with implementing state-of-the-art approaches (e.g., [BUTD](https://arxiv.org/abs/1707.07998), [MFH](https://arxiv.org/abs/1708.03619), [BAN](https://arxiv.org/abs/1805.07932), [MCAN](https://arxiv.org/abs/1906.10770) and [MMNasNet](https://arxiv.org/pdf/2004.12070.pdf)) on different benchmark datasets like [VQA-v2](https://visualqa.org/), [GQA](https://cs.stanford.edu/people/dorarad/gqa/index.html) and [CLEVR](https://cs.stanford.edu/people/jcjohns/clevr/). Supports for more methods and datasets will be updated continuously.
## Documentation
Getting started and learn more about OpenVQA [here](https://openvqa.readthedocs.io/en/latest/).
## Benchmark and Model Zoo
Supported methods and benchmark datasets are shown in the below table.
Results and models are available in [MODEL ZOO](https://openvqa.readthedocs.io/en/latest/basic/model_zoo.html).
| | [VQA-v2](https://visualqa.org/) | [GQA](https://cs.stanford.edu/people/dorarad/gqa/index.html) | [CLEVR](https://cs.stanford.edu/people/jcjohns/clevr/) |
|:-----------------------------------------:|:-------------------------------:|:------------------------------------------------------------:|:------------------------------------------------------:|
| [BUTD](https://arxiv.org/abs/1707.07998) | ✓ | ✓ | |
| [MFB](https://arxiv.org/abs/1708.01471v1) | ✓ | | |
| [MFH](https://arxiv.org/abs/1708.03619) | ✓ | | |
| [BAN](https://arxiv.org/abs/1805.07932) | ✓ | ✓ | |
| [MCAN](https://arxiv.org/abs/1906.10770) | ✓ | ✓ | ✓ |
| [MMNasNet](https://arxiv.org/pdf/2004.12070.pdf) | ✓ | | |
## News & Updates
#### v0.7.5 (30/12/2019)
- Add supports and pre-trained models for the approaches on CLEVR.
#### v0.7 (29/11/2019)
- Add supports and pre-trained models for the approaches on GQA.
- Add an document to tell developers how to add a new model to OpenVQA.
#### v0.6 (18/09/2019)
- Refactoring the documents and using Sphinx to build the whole documents.
#### v0.5 (31/07/2019)
- Implement the basic framework for OpenVQA.
- Add supports and pre-trained models for BUTD, MFB, MFH, BAN, MCAN on VQA-v2.
## License
This project is released under the [Apache 2.0 license](LICENSE).
## Contact
This repo is currently maintained by Zhou Yu ([@yuzcccc](https://github.com/yuzcccc)) and Yuhao Cui ([@cuiyuhao1996](https://github.com/cuiyuhao1996)).
## Citation
If this repository is helpful for your research or you want to refer the provided results in the modelzoo, you could cite the work using the following BibTeX entry:
```
@misc{yu2019openvqa,
author = {Yu, Zhou and Cui, Yuhao and Shao, Zhenwei and Gao, Pengbing and Yu, Jun},
title = {OpenVQA},
howpublished = {\url{https://github.com/MILVLG/openvqa}},
year = {2019}
}
================================================
FILE: configs/clevr/mcan_small.yml
================================================
# Network
MODEL_USE: mcan
LAYER: 6
HIDDEN_SIZE: 512
FF_SIZE: 2048
MULTI_HEAD: 8
DROPOUT_R: 0.1
FLAT_MLP_SIZE: 512
FLAT_GLIMPSES: 1
FLAT_OUT_SIZE: 1024
# Execution
BATCH_SIZE: 64
LR_BASE: 0.00004
LR_DECAY_R: 0.2
LR_DECAY_LIST: [13, 15]
WARMUP_EPOCH: 3
MAX_EPOCH: 16
GRAD_NORM_CLIP: -1
GRAD_ACCU_STEPS: 2
LOSS_FUNC: ce
LOSS_REDUCTION: sum
OPT: Adam
OPT_PARAMS: {betas: '(0.9, 0.98)', eps: '1e-9'}
================================================
FILE: configs/gqa/ban_4.yml
================================================
# Network
MODEL_USE: ban
GLIMPSE: 4
HIDDEN_SIZE: 1024
K_TIMES: 3
DROPOUT_R: 0.2
CLASSIFER_DROPOUT_R: 0.5
FLAT_OUT_SIZE: 2048
USE_BBOX_FEAT: True
BBOXFEAT_EMB_SIZE: 1024
IMG_FEAT_SIZE: 1024
# Execution
BATCH_SIZE: 512
LR_BASE: 0.002
LR_DECAY_R: 0.25
LR_DECAY_LIST: [10, 12]
WARMUP_EPOCH: 3
MAX_EPOCH: 13
GRAD_NORM_CLIP: 0.25
GRAD_ACCU_STEPS: 8
LOSS_FUNC: ce
LOSS_REDUCTION: sum
OPT: Adamax
OPT_PARAMS: {betas: '(0.9, 0.999)', eps: '1e-9'}
================================================
FILE: configs/gqa/ban_8.yml
================================================
# Network
MODEL_USE: ban
GLIMPSE: 8
HIDDEN_SIZE: 1024
K_TIMES: 3
DROPOUT_R: 0.2
CLASSIFER_DROPOUT_R: 0.5
FLAT_OUT_SIZE: 2048
USE_BBOX_FEAT: True
BBOXFEAT_EMB_SIZE: 1024
IMG_FEAT_SIZE: 1024
# Execution
BATCH_SIZE: 512
LR_BASE: 0.001
LR_DECAY_R: 0.25
LR_DECAY_LIST: [10]
WARMUP_EPOCH: 3
MAX_EPOCH: 11
GRAD_NORM_CLIP: 0.25
GRAD_ACCU_STEPS: 8
LOSS_FUNC: ce
LOSS_REDUCTION: sum
OPT: Adamax
OPT_PARAMS: {betas: '(0.9, 0.999)', eps: '1e-9'}
================================================
FILE: configs/gqa/butd.yml
================================================
# Network
MODEL_USE: butd
HIDDEN_SIZE: 1024
DROPOUT_R: 0.2
CLASSIFER_DROPOUT_R: 0.5
FLAT_OUT_SIZE: 2048
USE_BBOX_FEAT: True
BBOXFEAT_EMB_SIZE: 1024
IMG_FEAT_SIZE: 1024
# Execution
BATCH_SIZE: 512
LR_BASE: 0.002
LR_DECAY_R: 0.2
LR_DECAY_LIST: [10, 12]
WARMUP_EPOCH: 3
MAX_EPOCH: 13
GRAD_NORM_CLIP: 0.25
GRAD_ACCU_STEPS: 1
LOSS_FUNC: ce
LOSS_REDUCTION: sum
OPT: Adamax
OPT_PARAMS: {betas: '(0.9, 0.999)', eps: '1e-9'}
================================================
FILE: configs/gqa/mcan_large.yml
================================================
# Network
MODEL_USE: mcan
LAYER: 6
HIDDEN_SIZE: 1024
FF_SIZE: 4096
MULTI_HEAD: 8
DROPOUT_R: 0.1
FLAT_MLP_SIZE: 512
FLAT_GLIMPSES: 1
FLAT_OUT_SIZE: 2048
USE_BBOX_FEAT: True
USE_AUX_FEAT: True
# Execution
BATCH_SIZE: 64
LR_BASE: 0.00005
LR_DECAY_R: 0.2
LR_DECAY_LIST: [8, 10]
WARMUP_EPOCH: 2
MAX_EPOCH: 11
GRAD_NORM_CLIP: -1
GRAD_ACCU_STEPS: 4
LOSS_FUNC: ce
LOSS_REDUCTION: sum
OPT: Adam
OPT_PARAMS: {betas: '(0.9, 0.98)', eps: '1e-9'}
================================================
FILE: configs/gqa/mcan_small.yml
================================================
# Network
MODEL_USE: mcan
LAYER: 6
HIDDEN_SIZE: 512
FF_SIZE: 2048
MULTI_HEAD: 8
DROPOUT_R: 0.1
FLAT_MLP_SIZE: 512
FLAT_GLIMPSES: 1
FLAT_OUT_SIZE: 1024
USE_BBOX_FEAT: True
USE_AUX_FEAT: True
# Execution
BATCH_SIZE: 64
LR_BASE: 0.0001
LR_DECAY_R: 0.2
LR_DECAY_LIST: [8, 10]
WARMUP_EPOCH: 2
MAX_EPOCH: 11
GRAD_NORM_CLIP: -1
GRAD_ACCU_STEPS: 1
LOSS_FUNC: ce
LOSS_REDUCTION: sum
OPT: Adam
OPT_PARAMS: {betas: '(0.9, 0.98)', eps: '1e-9'}
================================================
FILE: configs/vqa/ban_4.yml
================================================
# Network
MODEL_USE: ban
GLIMPSE: 4
HIDDEN_SIZE: 1024
K_TIMES: 3
DROPOUT_R: 0.2
CLASSIFER_DROPOUT_R: 0.5
FLAT_OUT_SIZE: 2048
# Execution
BATCH_SIZE: 512
LR_BASE: 0.002
LR_DECAY_R: 0.25
LR_DECAY_LIST: [10, 12]
WARMUP_EPOCH: 3
MAX_EPOCH: 13
GRAD_NORM_CLIP: 0.25
GRAD_ACCU_STEPS: 8
LOSS_FUNC: bce
LOSS_REDUCTION: sum
OPT: Adamax
OPT_PARAMS: {betas: '(0.9, 0.999)', eps: '1e-9'}
================================================
FILE: configs/vqa/ban_8.yml
================================================
# Network
MODEL_USE: ban
GLIMPSE: 8
HIDDEN_SIZE: 1024
K_TIMES: 3
DROPOUT_R: 0.2
CLASSIFER_DROPOUT_R: 0.5
FLAT_OUT_SIZE: 2048
# Execution
BATCH_SIZE: 512
LR_BASE: 0.002
LR_DECAY_R: 0.25
LR_DECAY_LIST: [10, 12]
WARMUP_EPOCH: 3
MAX_EPOCH: 13
GRAD_NORM_CLIP: 0.25
GRAD_ACCU_STEPS: 8
LOSS_FUNC: bce
LOSS_REDUCTION: sum
OPT: Adamax
OPT_PARAMS: {betas: '(0.9, 0.999)', eps: '1e-9'}
================================================
FILE: configs/vqa/butd.yml
================================================
# Network
MODEL_USE: butd
HIDDEN_SIZE: 1024
DROPOUT_R: 0.2
CLASSIFER_DROPOUT_R: 0.5
FLAT_OUT_SIZE: 2048
# Execution
BATCH_SIZE: 512
LR_BASE: 0.002
LR_DECAY_R: 0.2
LR_DECAY_LIST: [10, 12]
WARMUP_EPOCH: 3
MAX_EPOCH: 13
GRAD_NORM_CLIP: 0.25
GRAD_ACCU_STEPS: 1
LOSS_FUNC: bce
LOSS_REDUCTION: sum
OPT: Adamax
OPT_PARAMS: {betas: '(0.9, 0.999)', eps: '1e-9'}
================================================
FILE: configs/vqa/mcan_large.yml
================================================
# Network
MODEL_USE: mcan
LAYER: 6
HIDDEN_SIZE: 1024
FF_SIZE: 4096
MULTI_HEAD: 8
DROPOUT_R: 0.1
FLAT_MLP_SIZE: 512
FLAT_GLIMPSES: 1
FLAT_OUT_SIZE: 2048
USE_BBOX_FEAT: False
# Execution
BATCH_SIZE: 64
LR_BASE: 0.00007 # 0.00005 for train+val+vg->test
LR_DECAY_R: 0.2
LR_DECAY_LIST: [10, 12]
WARMUP_EPOCH: 3
MAX_EPOCH: 13
GRAD_NORM_CLIP: -1
GRAD_ACCU_STEPS: 2 # to reduce GPU memory cost
LOSS_FUNC: bce
LOSS_REDUCTION: sum
OPT: Adam
OPT_PARAMS: {betas: '(0.9, 0.98)', eps: '1e-9'}
================================================
FILE: configs/vqa/mcan_small.yml
================================================
# Network
MODEL_USE: mcan
LAYER: 6
HIDDEN_SIZE: 512
FF_SIZE: 2048
MULTI_HEAD: 8
DROPOUT_R: 0.1
FLAT_MLP_SIZE: 512
FLAT_GLIMPSES: 1
FLAT_OUT_SIZE: 1024
USE_BBOX_FEAT: False
# Execution
BATCH_SIZE: 64
LR_BASE: 0.0001
LR_DECAY_R: 0.2
LR_DECAY_LIST: [10, 12]
WARMUP_EPOCH: 3
MAX_EPOCH: 13
GRAD_NORM_CLIP: -1
GRAD_ACCU_STEPS: 1
LOSS_FUNC: bce
LOSS_REDUCTION: sum
OPT: Adam
OPT_PARAMS: {betas: '(0.9, 0.98)', eps: '1e-9'}
================================================
FILE: configs/vqa/mfb.yml
================================================
# Network
MODEL_USE: mfb
HIGH_ORDER: False # True for MFH, False for MFB
HIDDEN_SIZE: 512
MFB_K: 5
MFB_O: 1000
LSTM_OUT_SIZE: 1024
DROPOUT_R: 0.1
I_GLIMPSES: 2
Q_GLIMPSES: 2
# Execution
BATCH_SIZE: 64
LR_BASE: 0.0007
LR_DECAY_R: 0.5
LR_DECAY_LIST: [6, 12]
WARMUP_EPOCH: 3
MAX_EPOCH: 13
GRAD_NORM_CLIP: -1
GRAD_ACCU_STEPS: 1
LOSS_FUNC: kld
LOSS_REDUCTION: sum
OPT: Adam
OPT_PARAMS: {betas: '(0.9, 0.99)', eps: '1e-9'}
================================================
FILE: configs/vqa/mfh.yml
================================================
# Network
MODEL_USE: mfb
HIGH_ORDER: True # True for MFH, False for MFB
HIDDEN_SIZE: 512
MFB_K: 5
MFB_O: 1000
LSTM_OUT_SIZE: 1024
DROPOUT_R: 0.1
I_GLIMPSES: 2
Q_GLIMPSES: 2
# Execution
BATCH_SIZE: 64
LR_BASE: 0.0007
LR_DECAY_R: 0.25
LR_DECAY_LIST: [10, 12]
WARMUP_EPOCH: 3
MAX_EPOCH: 13
GRAD_NORM_CLIP: -1
GRAD_ACCU_STEPS: 1
LOSS_FUNC: kld
LOSS_REDUCTION: sum
OPT: Adam
OPT_PARAMS: {betas: '(0.9, 0.99)', eps: '1e-9'}
================================================
FILE: configs/vqa/mmnasnet_large.yml
================================================
# Network
MODEL_USE: mmnasnet
ARCH: {
enc: [SA, SA, SA, SA, FFN, FFN, FFN, FFN, SA, FFN, FFN, FFN],
dec: [GA, GA, FFN, FFN, GA, FFN, RSA, GA, FFN, GA, RSA, FFN, RSA, SA, FFN, RSA, GA, FFN]
}
HIDDEN_SIZE: 1024
REL_HBASE: 128
REL_SIZE: 64
MULTI_HEAD: 8
DROPOUT_R: 0.1
FLAT_MLP_SIZE: 1024
FLAT_GLIMPSES: 1
FLAT_OUT_SIZE: 2048
# Execution
BATCH_SIZE: 64
LR_BASE: 0.00007 # 5e-5 for train+val+vg->test
LR_DECAY_R: 0.2
LR_DECAY_LIST: [10, 12]
WARMUP_EPOCH: 3
MAX_EPOCH: 13
GRAD_NORM_CLIP: 1.0
GRAD_ACCU_STEPS: 1
LOSS_FUNC: bce
LOSS_REDUCTION: sum
OPT: Adam
OPT_PARAMS: {betas: '(0.9, 0.98)', eps: '1e-9'}
================================================
FILE: configs/vqa/mmnasnet_small.yml
================================================
# Network
MODEL_USE: mmnasnet
ARCH: {
enc: [SA, SA, SA, SA, FFN, FFN, FFN, FFN, SA, FFN, FFN, FFN],
dec: [GA, GA, FFN, FFN, GA, FFN, RSA, GA, FFN, GA, RSA, FFN, RSA, SA, FFN, RSA, GA, FFN]
}
HIDDEN_SIZE: 512
REL_HBASE: 64
REL_SIZE: 64
MULTI_HEAD: 8
DROPOUT_R: 0.1
FLAT_MLP_SIZE: 512
FLAT_GLIMPSES: 1
FLAT_OUT_SIZE: 1024
# Execution
BATCH_SIZE: 64
LR_BASE: 0.00012 # 1e-4 for train+val+vg->test
LR_DECAY_R: 0.2
LR_DECAY_LIST: [10, 12]
WARMUP_EPOCH: 3
MAX_EPOCH: 13
GRAD_NORM_CLIP: 1.0
GRAD_ACCU_STEPS: 1
LOSS_FUNC: bce
LOSS_REDUCTION: sum
OPT: Adam
OPT_PARAMS: {betas: '(0.9, 0.98)', eps: '1e-9'}
================================================
FILE: data/clevr/clevr_extract_feat.py
================================================
# --------------------------------------------------------
# OpenVQA
# CLEVR images feature extraction script
# Written by Pengbing Gao https://github.com/nbgao
# --------------------------------------------------------
'''
Command line example:
python clevr_extract_feat.py --mode=all --gpu=0
python clevr_extract_feat.py --mode=train --gpu=0 --model=resnet101 --model_stage=3 --batch_size=128 --image_height=224 --image_width=224
'''
import argparse, os, json
import numpy as np
from scipy.misc import imread, imresize
import torch
import torchvision
torch.set_num_threads(5)
def build_model(args):
if not hasattr(torchvision.models, args.model):
raise ValueError('Invalid model "%s"' % args.model)
if not 'resnet' in args.model:
raise ValueError('Feature extraction only supports ResNets')
cnn = getattr(torchvision.models, args.model)(pretrained=True)
layers = [cnn.conv1,
cnn.bn1,
cnn.relu,
cnn.maxpool]
for i in range(args.model_stage):
name = 'layer%d' % (i + 1)
layers.append(getattr(cnn, name))
model = torch.nn.Sequential(*layers)
model.cuda()
model.eval()
return model
def batch_feat(cur_batch, model):
mean = np.array([0.485, 0.456, 0.406]).reshape(1, 3, 1, 1)
std = np.array([0.229, 0.224, 0.224]).reshape(1, 3, 1, 1)
image_batch = np.concatenate(cur_batch, 0).astype(np.float32)
image_batch = (image_batch / 255.0 - mean) / std
image_batch = torch.FloatTensor(image_batch).cuda()
image_batch = torch.autograd.Variable(image_batch, volatile=True)
feats = model(image_batch)
feats = feats.data.cpu().clone().numpy()
return feats
def extract_feature(args, images_path, feats_npz_path):
input_paths = []
idx_set = set()
for file in os.listdir(images_path):
if not file.endswith('.png'):
continue
idx = int(os.path.splitext(file)[0].split('_')[-1])
input_paths.append((os.path.join(images_path, file), idx))
idx_set.add(idx)
input_paths.sort(key=lambda x: x[1])
assert len(idx_set) == len(input_paths)
assert min(idx_set) == 0 and max(idx_set) == len(idx_set) - 1
print('Image number:', len(input_paths))
model = build_model(args)
if not os.path.exists(feats_npz_path):
os.mkdir(feats_npz_path)
print('Create dir:', feats_npz_path)
img_size = (args.image_height, args.image_width)
ix = 0
cur_batch = []
for i, (path, idx) in enumerate(input_paths):
img = imread(path, mode='RGB')
img = imresize(img, img_size, interp='bicubic')
img = img.transpose(2, 0, 1)[None]
cur_batch.append(img)
if len(cur_batch) == args.batch_size:
feats = batch_feat(cur_batch, model)
for j in range(feats.shape[0]):
np.savez(feats_npz_path + str(ix) + '.npz', x=feats[j].reshape(1024, 196).transpose(1, 0))
ix += 1
print('Processed %d/%d images' % (ix, len(input_paths)), end='\r')
cur_batch = []
if len(cur_batch) > 0:
feats = batch_feat(cur_batch, model)
for j in range(feats.shape[0]):
np.savez(feats_npz_path + str(ix) + '.npz', x=feats[j].reshape(1024, 196).transpose(1, 0))
ix += 1
print('Processed %d/%d images' % (ix, len(input_paths)), end='\r')
print('Extract image features to generate npz files sucessfully!')
parser = argparse.ArgumentParser(description='clevr_extract_feat')
parser.add_argument('--mode', '-mode', choices=['all', 'train', 'val', 'test'], default='all', help='mode', type=str)
parser.add_argument('--gpu', '-gpu', default='0', type=str)
parser.add_argument('--model', '-model', default='resnet101')
parser.add_argument('--model_stage', '-model_stage', default=3, type=int)
parser.add_argument('--batch_size', '-batch_size', default=128, type=int)
parser.add_argument('--image_height', '-image_height', default=224, type=int)
parser.add_argument('--image_width', '-image_width', default=224, type=int)
if __name__ == '__main__':
train_images_path = './raws/images/train/'
val_images_path = './raws/images/val/'
test_images_path = './raws/images/test/'
train_feats_npz_path = './feats/train/'
val_feats_npz_path = './feats/val/'
test_feats_npz_path = './feats/test/'
args = parser.parse_args()
print('mode:', args.mode)
print('gpu:', args.gpu)
print('model:', args.model)
print('model_stage:', args.model_stage)
print('batch_size:', args.batch_size)
print('image_height:', args.image_height)
print('image_width:', args.image_width)
os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu
# process train images features
if args.mode in ['train', 'all']:
print('\nProcess [train] images features:')
extract_feature(args, train_images_path, train_feats_npz_path)
# process val images features
if args.mode in ['val', 'all']:
print('\nProcess [val] images features:')
extract_feature(args, val_images_path, val_feats_npz_path)
# processs test images features
if args.mode in ['test', 'all']:
print('\nProcess [test] images features:')
extract_feature(args, test_images_path, test_feats_npz_path)
================================================
FILE: data/clevr/feats/.gitkeep
================================================
================================================
FILE: data/clevr/raw/.gitkeep
================================================
================================================
FILE: data/gqa/feats/.gitkeep
================================================
================================================
FILE: data/gqa/gqa_feat_preproc.py
================================================
# --------------------------------------------------------
# OpenVQA
# GQA spatial features & object features .h5 files to .npz files transform script
# Written by Pengbing Gao https://github.com/nbgao
# --------------------------------------------------------
'''
Command line example:
(1) Process spatial features
python gqa_feat_preproc.py --mode=spatial --spatial_dir=./spatialFeatures --out_dir=./feats/gqa-grid
(2) Process object features
python gqa_feat_preproc.py --mode=object --object_dir=./objectFeatures --out_dir=./feats/gqa-frcn
'''
import h5py, glob, json, cv2, argparse
import numpy as np
# spatial features
def process_spatial_features(feat_path, out_path):
info_file = feat_path + '/gqa_spatial_info.json'
try:
info = json.load(open(info_file, 'r'))
except:
print('Failed to open info file:', info_file)
return
print('Total grid features', len(info))
print('Making the to dict...')
h5idx_to_imgid = {}
for img_id in info:
h5idx_to_imgid[str(info[img_id]['file']) + '_' + str(info[img_id]['idx'])] = img_id
for ix in range(16):
feat_file = feat_path + '/gqa_spatial_' + str(ix) + '.h5'
print('Processing', feat_file)
try:
feat_dict = h5py.File(feat_file, 'r')
except:
print('Failed to open feat file:', feat_file)
return
features = feat_dict['features']
for iy in range(features.shape[0]):
img_id = h5idx_to_imgid[str(ix) + '_' + str(iy)]
feature = features[iy]
# save to .npz file ['x']
np.savez(
out_path + '/' + img_id + '.npz',
x=feature.reshape(2048, 49).transpose(1, 0), # (49, 2048)
)
print('Process spatial features successfully!')
# object features
def process_object_features(feat_path, out_path):
info_file = feat_path + '/gqa_objects_info.json'
try:
info = json.load(open(info_file, 'r'))
except:
print('Failed to open info file:', info_file)
return
print('Total frcn features', len(info))
print('Making the to dict...')
h5idx_to_imgid = {}
for img_id in info:
h5idx_to_imgid[str(info[img_id]['file']) + '_' + str(info[img_id]['idx'])] = img_id
for ix in range(16):
feat_file = feat_path + '/gqa_objects_' + str(ix) + '.h5'
print('Processing', feat_file)
try:
feat_dict = h5py.File(feat_file, 'r')
except:
print('Failed to open feat file:', feat_file)
return
bboxes = feat_dict['bboxes']
features = feat_dict['features']
for iy in range(features.shape[0]):
img_id = h5idx_to_imgid[str(ix) + '_' + str(iy)]
img_info = info[img_id]
objects_num = img_info['objectsNum']
# save to .npz file ['x', 'bbox', 'width', 'height']
np.savez(
out_path + '/' + img_id + '.npz',
x=features[iy, :objects_num],
bbox=bboxes[iy, :objects_num],
width=img_info['width'],
height=img_info['height'],
)
print('Process object features successfully!')
parser = argparse.ArgumentParser(description='gqa_h52npz')
parser.add_argument('--mode', '-mode', choices=['object', 'spatial', 'frcn', 'grid'], help='mode', type=str)
parser.add_argument('--object_dir', '-object_dir', help='object features dir', type=str)
parser.add_argument('--spatial_dir', '-spatial_dir', help='spatial features dir', type=str)
parser.add_argument('--out_dir', '-out_dir', help='output dir', type=str)
args = parser.parse_args()
mode = args.mode
object_path = args.object_dir
spatial_path = args.spatial_dir
out_path = args.out_dir
print('mode:', mode)
print('object_path:', object_path)
print('spatial_path:', spatial_path)
print('out_path:', out_path)
# process spatial features
if mode in ['spatial', 'grid']:
process_spatial_features(spatial_path, out_path)
# process object features
if mode in ['object', 'frcn']:
process_object_features(object_path, out_path)
================================================
FILE: data/gqa/raw/.gitkeep
================================================
================================================
FILE: data/vqa/feats/.gitkeep
================================================
================================================
FILE: data/vqa/raw/.gitkeep
================================================
================================================
FILE: docs/Makefile
================================================
# Minimal makefile for Sphinx documentation
#
# You can set these variables from the command line, and also
# from the environment for the first two.
SPHINXOPTS ?=
SPHINXBUILD ?= sphinx-build
SOURCEDIR = _source
BUILDDIR = _build
# Put it first so that "make" without argument is like "make help".
help:
@$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
.PHONY: help Makefile
# Catch-all target: route all unknown targets to Sphinx using the new
# "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS).
%: Makefile
@$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
================================================
FILE: docs/_source/_static/custom.css
================================================
.rst-content code.literal {
color: inherit;
font-size: 85%;
border: none;
background: #F0F0F0;
padding: 2px 3px 1px;
}
================================================
FILE: docs/_source/_static/mathjax_mathml.user.js
================================================
// ==UserScript==
// @name MathJax MathML
// @namespace http://www.mathjax.org/
// @description Insert MathJax into pages containing MathML
// @include *
// ==/UserScript==
if ((window.unsafeWindow == null ? window : unsafeWindow).MathJax == null) {
if ((document.getElementsByTagName("math").length > 0) ||
(document.getElementsByTagNameNS == null ? false :
(document.getElementsByTagNameNS("http://www.w3.org/1998/Math/MathML","math").length > 0))) {
var script = document.createElement("script");
script.type = "text/javascript";
script.src = "https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.5/latest.js?config=TeX-AMS-MML_CHTML-full";
var config = 'MathJax.Hub.Startup.onload()';
document.getElementsByTagName("head")[0].appendChild(script);
}
}
================================================
FILE: docs/_source/_static/mathjax_wikipedia.user.js
================================================
// ==UserScript==
// @name MathJax in Wikipedia
// @namespace http://www.mathjax.org/
// @description Insert MathJax into Wikipedia pages
// @include http://en.wikipedia.org/wiki/*
// ==/UserScript==
if ((window.unsafeWindow == null ? window : unsafeWindow).MathJax == null) {
//
// Replace the images with MathJax scripts of type math/tex
//
var images = document.getElementsByTagName('img'), count = 0;
for (var i = images.length - 1; i >= 0; i--) {
var img = images[i];
if (img.className === "tex") {
var script = document.createElement("script"); script.type = "math/tex";
if (window.opera) {script.innerHTML = img.alt} else {script.text = img.alt}
img.parentNode.replaceChild(script,img); count++;
}
}
if (count) {
//
// Load MathJax and have it process the page
//
var script = document.createElement("script");
script.type = "text/javascript";
script.src = "https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.5/latest.js?config=TeX-AMS-MML_CHTML-full";
document.getElementsByTagName("head")[0].appendChild(script);
}
}
================================================
FILE: docs/_source/advanced/adding_model.md
================================================
# Adding a custom VQA model
This is a tutorial on how to add a custom VQA model into OpenVQA. Follow the steps below, you will obtain a model that can run across VQA/GQA/CLEVR datasets.
## 1. Preliminary
All implemented models are placed at ```/openvqa/models/```, so the first thing to do is to create a folder there for your VQA model named by ``. After that, all your model related files will be placed in the folder ```/openvqa/models//```.
## 2. Dataset Adapter
Create a python file `/openvqa/models//adapter.py` to bridge your model and different datasets. Different datasets have different input features, thus resulting in different operators to handle the features.
#### Input
Input features (packed as `feat_dict`) for different datasets.
#### Output
Customized pre-processed features to be fed into the model.
#### Adapter Template
```
from openvqa.core.base_dataset import BaseAdapter
class Adapter(BaseAdapter):
def __init__(self, __C):
super(Adapter, self).__init__(__C)
self.__C = __C
def vqa_init(self, __C):
# Your Implementation
def gqa_init(self, __C):
# Your Implementation
def clevr_init(self, __C):
# Your Implementation
def vqa_forward(self, feat_dict):
# Your Implementation
def gqa_forward(self, feat_dict):
# Your Implementation
def clevr_forward(self, feat_dict):
# Your Implementation
```
Each dataset-specific initiation function `def _init(self, __C)` corresponds to one feed-forward function `def _forward(self, feat_dict)`, your implementations should follow the principles ```torch.nn.Module.__init__()``` and ```torch.nn.Module.forward()```, respectively.
The variable ` feat_dict` consists of the input feature names for the datasets, which corresponds to the definitions in `/openvqa/core/base_cfg.py`
```
vqa:{
'FRCN_FEAT': buttom-up features -> [batchsize, num_bbox, 2048],
'BBOX_FEAT': bbox coordinates -> [batchsize, num_bbox, 5],
}
gqa:{
'FRCN_FEAT': official buttom-up features -> [batchsize, num_bbox, 2048],
'BBOX_FEAT': official bbox coordinates -> [batchsize, num_bbox, 5],
'GRID_FEAT': official resnet grid features -> [batchsize, num_grid, 2048],
}
clevr:{
'GRID_FEAT': resnet grid features -> [batchsize, num_grid, 1024],
}
```
More detailed examples can be referred to the adapter for the [MCAN](https://github.com/MILVLG/openvqa/tree/master/openvqa/models/mcan/adapter.py) model.
## 3. Definition of model hyper-parameters
Create a python file named ```/openvqa/models//model_cfgs.py```
#### Configuration Template
```
from openvqa.core.base_cfgs import BaseCfgs
class Cfgs(BaseCfgs):
def __init__(self):
super(Cfgs, self).__init__()
# Your Implementation
```
Only the variable you defined here can be used in the network. The variable value can be override in the running configuration file described later.
#### Example
```
# model_cfgs.py
from openvqa.core.base_cfgs import BaseCfgs
class Cfgs(BaseCfgs):
def __init__(self):
super(Cfgs, self).__init__()
self.LAYER = 6
```
```
# net.py
class Net(nn.Module):
def __init__(self, __C, pretrained_emb, token_size, answer_size):
super(Net, self).__init__()
self.__C = __C
print(__C.LAYER)
```
```
Output: 6
```
## 4. Main body
Create a python file for the main body of the model as ```/openvqa/models//net.py```. Note that the filename must be `net.py` since this filename will be invoked by the running script. Except the file, other auxiliary model files invoked by `net.py` can be named arbitrarily.
When implementation, you should pay attention to the following restrictions:
- The main module should be named `Net`, i.e., `class Net(nn.Module):`
- The `init` function has three input variables: *pretrained_emb* corresponds to the GloVe embedding features for the question; *token\_size* corresponds to the number of all dataset words; *answer_size* corresponds to the number of classes for prediction.
- The `forward` function has four input variables: *frcn_feat*, *grid_feat*, *bbox_feat*, *ques_ix*.
- In the `init` function, you should initialize the `Adapter` which you've already defined above. In the `forward` function, you should feed *frcn_feat*, *grid_feat*, *bbox_feat* into the `Adapter` to obtain the processed image features.
- Return a prediction tensor of size [batch\_size, answer_size]. Note that no activation function like ```sigmoid``` or ```softmax``` is appended on the prediction. The activation has been designed for the prediction in the loss function outside.
#### Model Template
```
import torch.nn as nn
from openvqa.models.mcan.adapter import Adapter
class Net(nn.Module):
def __init__(self, __C, pretrained_emb, token_size, answer_size):
super(Net, self).__init__()
self.__C = __C
self.adapter = Adapter(__C)
def forward(self, frcn_feat, grid_feat, bbox_feat, ques_ix):
img_feat = self.adapter(frcn_feat, grid_feat, bbox_feat)
# model implementation
...
return pred
```
## 5. Declaration of running configurations
Create a `yml` file at```/configs//.yml``` and define your hyper-parameters here. We suggest that ``= ``. If you have the requirement to have one base model support the running scripts for different variants. (e.g., MFB and MFH), you can have different yml files (e.g., `mfb.yml` and `mfh.yml`) and use the `MODEL_USE` param in the yml file to specify the actual used model (i.e., mfb).
### Example:
```
MODEL_USE: # Must be defined
LAYER: 6
LOSS_FUNC: bce
LOSS_REDUCTION: sum
```
Finally, to register the added model to the running script, you can modify `` by adding your `` into the arguments for models [here](https://github.com/MILVLG/openvqa/tree/master/run.py#L22).
By doing all the steps above, you are able to use ```--MODEL=``` to train/val/test your model like other provided models. For more information about the usage of the running script, please refer to the [Getting Started](https://openvqa.readthedocs.io/en/latest/basic/getting_started.html) page.
================================================
FILE: docs/_source/advanced/contributing.md
================================================
# Contributing to OpenVQA
All kinds of contributions are welcome, including but not limited to the following.
- Fixes (typo, bugs)
- New features and components
## Workflow
1. fork and pull the latest version of OpenVQA
2. checkout a new branch (do not use master branch for PRs)
3. commit your changes
4. create a PR
## Code style
### Python
We adopt [PEP8](https://www.python.org/dev/peps/pep-0008/) as the preferred code style.
We use [flake8](http://flake8.pycqa.org/en/latest/) as the linter and [yapf](https://github.com/google/yapf) as the formatter.
Please upgrade to the latest yapf (>=0.27.0) and refer to the configuration.
>Before you create a PR, make sure that your code lints and is formatted by yapf.
### C++ and CUDA
We follow the [Google C++ Style Guide](https://google.github.io/styleguide/cppguide.html).
================================================
FILE: docs/_source/basic/getting_started.md
================================================
# Getting Started
This page provides basic tutorials about the usage of mmdetection.
For installation instructions, please see [Installation](install).
## Training
The following script will start training a `mcan_small` model on the `VQA-v2` dataset:
```bash
$ python3 run.py --RUN='train' --MODEL='mcan_small' --DATASET='vqa'
```
- ```--RUN={'train','val','test'}``` to set the mode to be executed.
- ```--MODEL=str```, e.g., to assign the model to be executed.
- ```--DATASET={'vqa','gqa','clevr'}``` to choose the dataset to be executed.
All checkpoint files will be saved to:
```
ckpts/ckpt_/epoch.pkl
```
and the training log file will be placed at:
```
results/log/log_run_.txt
```
To add:
- ```--VERSION=str```, e.g., ```--VERSION='v1'``` to assign a name for your this model.
- ```--GPU=str```, e.g., ```--GPU='2'``` to train the model on specified GPU device.
- ```--SEED=int```, e.g., ```--SEED=123``` to use a fixed seed to initialize the model, which obtains exactly the same model. Unset it results in random seeds.
- ```--NW=int```, e.g., ```--NW=8``` to accelerate I/O speed.
- ```--SPLIT=str``` to set the training sets as you want. Setting ```--SPLIT='train'``` will trigger the evaluation script to run the validation score after every epoch automatically.
- ```--RESUME=True``` to start training with saved checkpoint parameters. In this stage, you should assign the checkpoint version```--CKPT_V=str``` and the resumed epoch number ```CKPT_E=int```.
- ```--MAX_EPOCH=int``` to stop training at a specified epoch number.
If you want to resume training from an existing checkpoint, you can use the following script:
```bash
$ python3 run.py --RUN='train' --MODEL='mcan_small' --DATASET='vqa' --CKPT_V=str --CKPT_E=int
```
where the args `CKPT_V` and `CKPT_E` must be specified, corresponding to the version and epoch number of the loaded model.
#### Multi-GPU Training and Gradient Accumulation
We recommend to use the GPU with at least 8 GB memory, but if you don't have such device, we provide two solutions to deal with it:
- _Multi-GPU Training_:
If you want to accelerate training or train the model on a device with limited GPU memory, you can use more than one GPUs:
Add ```--GPU='0, 1, 2, 3...'```
The batch size on each GPU will be adjusted to `BATCH_SIZE`/#GPUs automatically.
- _Gradient Accumulation_:
If you only have one GPU less than 8GB, an alternative strategy is provided to use the gradient accumulation during training:
Add ```--ACCU=n```
This makes the optimizer accumulate gradients for`n` small batches and update the model weights at once. It is worth noting that `BATCH_SIZE` must be divided by ```n``` to run this mode correctly.
## Validation and Testing
**Warning**: The args ```--MODEL``` and `--DATASET` should be set to the same values as those in the training stage.
### Validation on Local Machine
Offline evaluation on local machine only support the evaluations on the *val* split. If you want to evaluate the *test* split, please see [Evaluation on online server](#Evaluation on online server).
There are two ways to start:
(Recommend)
```bash
$ python3 run.py --RUN='val' --MODEL=str --DATASET='{vqa,gqa,clevr}' --CKPT_V=str --CKPT_E=int
```
or use the absolute path instead:
```bash
$ python3 run.py --RUN='val' --MODEL=str --DATASET='{vqa,gqa,clevr}' --CKPT_PATH=str
```
- For VQA-v2, the results on *val* split
### Testing on Online Server
All the evaluations on the test split of VQA-v2, GQA and CLEVR benchmarks can be achieved by using
```bash
$ python3 run.py --RUN='test' --MODEL=str --DATASET='{vqa,gqa,clevr}' --CKPT_V=str --CKPT_E=int
```
Result file are saved at: ```results/result_test/result_run__.json```
- For VQA-v2, the result file is uploaded the [VQA challenge website](https://evalai.cloudcv.org/web/challenges/challenge-page/163/overview) to evaluate the scores on *test-dev* or *test-std* split.
- For GQA, the result file is uploaded to the [GQA Challenge website]() to evaluate the scores on *test* or *test-dev* split.
- For CLEVR, the result file can be evaluated via sending an email to the author [Justin Johnson]( ) with attaching this file, and he will reply the scores via email too.
================================================
FILE: docs/_source/basic/install.md
================================================
# Installation
This page provides basic prerequisites to run OpenVQA, including the setups of hardware, software, and datasets.
## Hardware & Software Setup
A machine with at least **1 GPU (>= 8GB)**, **20GB memory** and **50GB free disk space** is required. We strongly recommend to use a SSD drive to guarantee high-speed I/O.
The following packages are required to build the project correctly.
- [Python](https://www.python.org/downloads/) >= 3.5
- [Cuda](https://developer.nvidia.com/cuda-toolkit) >= 9.0 and [cuDNN](https://developer.nvidia.com/cudnn)
- [PyTorch](http://pytorch.org/) >= 0.4.1 with CUDA (**PyTorch 1.x is also supported**).
- [SpaCy](https://spacy.io/) and initialize the [GloVe](https://github.com/explosion/spacy-models/releases/download/en_vectors_web_lg-2.1.0/en_vectors_web_lg-2.1.0.tar.gz) as follows:
```bash
$ pip install -r requirements.txt
$ wget https://github.com/explosion/spacy-models/releases/download/en_vectors_web_lg-2.1.0/en_vectors_web_lg-2.1.0.tar.gz -O en_vectors_web_lg-2.1.0.tar.gz
$ pip install en_vectors_web_lg-2.1.0.tar.gz
```
## Dataset Setup
The following datasets should be prepared before running the experiments.
**Note that if you only want to run experiments on one specific dataset, you can focus on the setup for that and skip the rest.**
### VQA-v2
- Image Features
The image features are extracted using the [bottom-up-attention](https://github.com/peteanderson80/bottom-up-attention) strategy, with each image being represented as an dynamic number (from 10 to 100) of 2048-D features. We store the features for each image in a `.npz` file. You can prepare the visual features by yourself or download the extracted features from [OneDrive](https://awma1-my.sharepoint.com/:f:/g/personal/yuz_l0_tn/EsfBlbmK1QZFhCOFpr4c5HUBzUV0aH2h1McnPG1jWAxytQ?e=2BZl8O) or [BaiduYun](https://pan.baidu.com/s/1C7jIWgM3hFPv-YXJexItgw#list/path=%2F). The downloaded files contains three files: **train2014.tar.gz, val2014.tar.gz, and test2015.tar.gz**, corresponding to the features of the train/val/test images for *VQA-v2*, respectively.
All the image feature files are unzipped and placed in the `data/vqa/feats` folder to form the following tree structure:
```
|-- data
|-- vqa
| |-- feats
| | |-- train2014
| | | |-- COCO_train2014_...jpg.npz
| | | |-- ...
| | |-- val2014
| | | |-- COCO_val2014_...jpg.npz
| | | |-- ...
| | |-- test2015
| | | |-- COCO_test2015_...jpg.npz
| | | |-- ...
```
- QA Annotations
Download all the annotation `json` files for VQA-v2, including the [train questions](https://s3.amazonaws.com/cvmlp/vqa/mscoco/vqa/v2_Questions_Train_mscoco.zip), [val questions](https://s3.amazonaws.com/cvmlp/vqa/mscoco/vqa/v2_Questions_Val_mscoco.zip), [test questions](https://s3.amazonaws.com/cvmlp/vqa/mscoco/vqa/v2_Questions_Test_mscoco.zip), [train answers](https://s3.amazonaws.com/cvmlp/vqa/mscoco/vqa/v2_Annotations_Train_mscoco.zip), and [val answers](https://s3.amazonaws.com/cvmlp/vqa/mscoco/vqa/v2_Annotations_Val_mscoco.zip).
In addition, we use the VQA samples from the Visual Genome to augment the training samples. We pre-processed these samples by two rules:
1. Select the QA pairs with the corresponding images appear in the MS-COCO *train* and *val* splits;
2. Select the QA pairs with the answer appear in the processed answer list (occurs more than 8 times in whole *VQA-v2* answers).
We provide our processed vg questions and annotations files, you can download them from [OneDrive](https://awma1-my.sharepoint.com/:f:/g/personal/yuz_l0_tn/EmVHVeGdck1IifPczGmXoaMBFiSvsegA6tf_PqxL3HXclw) or [BaiduYun](https://pan.baidu.com/s/1QCOtSxJGQA01DnhUg7FFtQ#list/path=%2F).
All the QA annotation files are unzipped and placed in the `data/vqa/raw` folder to form the following tree structure:
```
|-- data
|-- vqa
| |-- raw
| | |-- v2_OpenEnded_mscoco_train2014_questions.json
| | |-- v2_OpenEnded_mscoco_val2014_questions.json
| | |-- v2_OpenEnded_mscoco_test2015_questions.json
| | |-- v2_OpenEnded_mscoco_test-dev2015_questions.json
| | |-- v2_mscoco_train2014_annotations.json
| | |-- v2_mscoco_val2014_annotations.json
| | |-- VG_questions.json
| | |-- VG_annotations.json
```
### GQA
- Image Features
Download the [spatial features](https://nlp.stanford.edu/data/gqa/spatialFeatures.zip) and [object features](https://nlp.stanford.edu/data/gqa/objectFeatures.zip) for GQA from its official website. **Spatial Features Files** include `gqa_spatial_*.h5` and `gqa_spatial_info.json`. **Object Features Files** include `gqa_objects_*.h5` and `gqa_objects_info.json`.
To make the input features consistent with those for VQA-v2, we provide a [script](https://github.com/MILVLG/openvqa/tree/master/data/gqa/gqa_feat_preproc.py) to transform `.h5` feature files into multiple `.npz` files, with each file corresponding to one image.
```bash
$ cd data/gqa
$ unzip spatialFeatures.zip
$ python gqa_feat_preproc.py --mode=spatial --spatial_dir=./spatialFeatures --out_dir=./feats/gqa-grid
$ rm -r spatialFeatures.zip ./spatialFeatures
$ unzip objectFeatures.zip
$ python gqa_feat_preproc.py --mode=object --object_dir=./objectFeatures --out_dir=./feats/gqa-frcn
$ rm -r objectFeatures.zip ./objectFeatures
```
All the processed feature files are placed in the `data/gqa/feats` folder to form the following tree structure:
```
|-- data
|-- gqa
| |-- feats
| | |-- gqa-frcn
| | | |-- 1.npz
| | | |-- ...
| | |-- gqa-grid
| | | |-- 1.npz
| | | |-- ...
```
- Questions and Scene Graphs
Download all the GQA [QA files](https://nlp.stanford.edu/data/gqa/questions1.2.zip) from the official site, including all the splits needed for training, validation and testing. Download the [scene graphs files](https://nlp.stanford.edu/data/gqa/sceneGraphs.zip) for `train` and `val` splits from the official site. Download the [supporting files](https://nlp.stanford.edu/data/gqa/eval.zip) from the official site, including the `train` and `val` choices supporting files for the evaluation.
All the question files and scene graph files are unzipped and placed in the `data/gqa/raw` folder to form the following tree structure:
```
|-- data
|-- gqa
| |-- raw
| | |-- questions1.2
| | | |-- train_all_questions
| | | | |-- train_all_questions_0.json
| | | | |-- ...
| | | | |-- train_all_questions_9.json
| | | |-- train_balanced_questions.json
| | | |-- val_all_questions.json
| | | |-- val_balanced_questions.json
| | | |-- testdev_all_questions.json
| | | |-- testdev_balanced_questions.json
| | | |-- test_all_questions.json
| | | |-- test_balanced_questions.json
| | | |-- challenge_all_questions.json
| | | |-- challenge_balanced_questions.json
| | | |-- submission_all_questions.json
| | |-- eval
| | | |-- train_choices
| | | | |-- train_all_questions_0.json
| | | | |-- ...
| | | | |-- train_all_questions_9.json
| | | |-- val_choices.json
| | |-- sceneGraphs
| | | |-- train_sceneGraphs.json
| | | |-- val_sceneGraphs.json
```
### CLEVR
- Images, Questions and Scene Graphs
Download all the [CLEVR v1.0](https://dl.fbaipublicfiles.com/clevr/CLEVR_v1.0.zip) from the official site, including all the splits needed for training, validation and testing.
All the image files, question files and scene graph files are unzipped and placed in the `data/clevr/raw` folder to form the following tree structure:
```
|-- data
|-- clevr
| |-- raw
| | |-- images
| | | |-- train
| | | | |-- CLEVR_train_000000.json
| | | | |-- ...
| | | | |-- CLEVR_train_069999.json
| | | |-- val
| | | | |-- CLEVR_val_000000.json
| | | | |-- ...
| | | | |-- CLEVR_val_014999.json
| | | |-- test
| | | | |-- CLEVR_test_000000.json
| | | | |-- ...
| | | | |-- CLEVR_test_014999.json
| | |-- questions
| | | |-- CLEVR_train_questions.json
| | | |-- CLEVR_val_questions.json
| | | |-- CLEVR_test_questions.json
| | |-- scenes
| | | |-- CLEVR_train_scenes.json
| | | |-- CLEVR_val_scenes.json
```
- Image Features
To make the input features consistent with those for VQA-v2, we provide a [script](https://github.com/MILVLG/openvqa/tree/master/data/clevr/clevr_extract_feat.py) to extract image features using a pre-trained ResNet-101 model like most previous works did and generate `.h5` files, with each file corresponding to one image.
```bash
$ cd data/clevr
$ python clevr_extract_feat.py --mode=all --gpu=0
```
All the processed feature files are placed in the `data/clevr/feats` folder to form the following tree structure:
```
|-- data
|-- clevr
| |-- feats
| | |-- train
| | | |-- 1.npz
| | | |-- ...
| | |-- val
| | | |-- 1.npz
| | | |-- ...
| | |-- test
| | | |-- 1.npz
| | | |-- ...
```
================================================
FILE: docs/_source/basic/model_zoo.md
================================================
# Benchmark and Model Zoo
## Environment
We use the following environment to run all the experiments in this page.
- Python 3.6
- PyTorch 0.4.1
- CUDA 9.0.176
- CUDNN 7.0.4
## VQA-v2
We provide three groups of results (including the accuracies of *Overall*, *Yes/No*, *Number* and *Other*) for each model on VQA-v2 using different training schemes as follows. We provide pre-trained models for the latter two schemes.
- **Train -> Val**: trained on the `train` split and evaluated on the `val` split.
- **Train+val -> Test-dev**: trained on the `train+val` splits and evaluated on the `test-dev` split.
- **Train+val+vg -> Test-dev**: trained on the `train+val+vg` splits and evaluated on the `test-dev` split.
**Note that for one model, the used base learning rate in the two schemes may be different, you should modify this setting in the config file to reproduce the results.**
#### Train -> Val
| Model | Base lr | Overall (%) | Yes/No (%) | Number (%) | Other (%) |
|:--------------------------------------------------------------------------------------:|:-------:|:-----------:|:----------:|:----------:|:---------:|
| [BUTD](https://github.com/MILVLG/openvqa/tree/master/configs/vqa/butd.yml) | 2e-3 | 63.84 | 81.40 | 43.81 | 55.78 |
| [MFB](https://github.com/MILVLG/openvqa/tree/master/configs/vqa/mfb.yml) | 7e-4 | 65.35 | 83.23 | 45.31 | 57.05 |
| [MFH](https://github.com/MILVLG/openvqa/tree/master/configs/vqa/mfh.yml) | 7e-4 | 66.18 | 84.07 | 46.55 | 57.78 |
| [BAN-4](https://github.com/MILVLG/openvqa/tree/master/configs/vqa/ban_4.yml) | 2e-3 | 65.86 | 83.53 | 46.36 | 57.56 |
| [BAN-8](https://github.com/MILVLG/openvqa/tree/master/configs/vqa/ban_8.yml) | 2e-3 | 66.00 | 83.61 | 47.04 | 57.62 |
| [MCAN-small](https://github.com/MILVLG/openvqa/tree/master/configs/vqa/mcan_small.yml) | 1e-4 | 67.17 | 84.82 | 49.31 | 58.48 |
| [MCAN-large](https://github.com/MILVLG/openvqa/tree/master/configs/vqa/mcan_large.yml) | 7e-5 | 67.50 | 85.14 | 49.66 | 58.80 |
| [MMNasNet-small](https://github.com/MILVLG/openvqa/tree/master/configs/vqa/mmnasnet_small.yml) | 1.2e-4 | 67.79 | 85.02 | 52.25 | 58.80 |
| [MMNasNet-large](https://github.com/MILVLG/openvqa/tree/master/configs/vqa/mmnasnet_large.yml) | 7e-5 | 67.98 | 85.22 | 52.04 | 59.09 |
#### Train+val -> Test-dev
| Model | Base lr | Overall (%) | Yes/No (%) | Number (%) | Other (%) | Download |
|:--------------------------------------------------------------------------------------:|:-------:|:-----------:|:----------:|:----------:|:---------:|:-------------------------------------------------------------------------------------------------------------------------:|
| [BUTD](https://github.com/MILVLG/openvqa/tree/master/configs/vqa/butd.yml) | 2e-3 | 66.98 | 83.28 | 46.19 | 57.85 | [model](https://awma1-my.sharepoint.com/:u:/g/personal/yuz_l0_tn/EWSOkcCVGMpAot9ol0IJP3ABv3cWFRvGFB67980PHiCk3Q?download=1) |
| [MFB](https://github.com/MILVLG/openvqa/tree/master/configs/vqa/mfb.yml) | 7e-4 | 68.29 | 84.64 | 48.29 | 58.89 | [model](https://awma1-my.sharepoint.com/:u:/g/personal/yuz_l0_tn/ET-B23hG7UNPrQ0hha77V5kBMxAokIr486lB3YwMt-zhow?download=1) |
| [MFH](https://github.com/MILVLG/openvqa/tree/master/configs/vqa/mfh.yml) | 7e-4 | 69.11 | 85.56 | 48.81 | 59.69 | [model](https://awma1-my.sharepoint.com/:u:/g/personal/yuz_l0_tn/EUpvJD3c7NZJvBAbFOXTS0IBk1jCSz46bi7Pfq1kzJ35PA?download=1) |
| [BAN-4](https://github.com/MILVLG/openvqa/tree/master/configs/vqa/ban_4.yml) | 1.4e-3 | 68.9 | 85.0 | 49.5 | 59.56 | [model](https://awma1-my.sharepoint.com/:u:/g/personal/yuz_l0_tn/EVUabhYppDBImgV6b0DdGr0BrxTdSLm7ux9rN65T_8DZ0Q?download=1) |
| [BAN-8](https://github.com/MILVLG/openvqa/tree/master/configs/vqa/ban_8.yml) | 1.4e-3 | 69.07 | 85.2 | 49.63 | 59.71 | [model](https://awma1-my.sharepoint.com/:u:/g/personal/yuz_l0_tn/EbJgyL7FPTFAqzMm3HB1xDIBjXpWygOoXrdnDZKEIu34rg?download=1) |
| [MCAN-small](https://github.com/MILVLG/openvqa/tree/master/configs/vqa/mcan_small.yml) | 1e-4 | 70.33 | 86.77 | 52.14 | 60.40 | [model](https://awma1-my.sharepoint.com/:u:/g/personal/yuz_l0_tn/EcFeQCi_9MVBn6MeESly8OYBZCeBEuaPQqZjT-oXidgKKg?download=1) |
| [MCAN-large](https://github.com/MILVLG/openvqa/tree/master/configs/vqa/mcan_large.yml) | 5e-5 | 70.48 | 86.90 | 52.11 | 60.63 | [model](https://awma1-my.sharepoint.com/:u:/g/personal/yuz_l0_tn/Ee6HdFN_FcZAsQEm85WesHgBZBkY8dZ-278dDYG_ty_IwA?download=1) |
#### Train+val+vg -> Test-dev
| Model | Base lr | Overall (%) | Yes/No (%) | Number (%) | Other (%) | Download |
|:--------------------------------------------------------------------------------------:|:-------:|:-----------:|:----------:|:----------:|:---------:|:-------------------------------------------------------------------------------------------------------------------------:|
| [BUTD](https://github.com/MILVLG/openvqa/tree/master/configs/vqa/butd.yml) | 2e-3 | 67.54 | 83.48 | 46.97 | 58.62 | [model](https://awma1-my.sharepoint.com/:u:/g/personal/yuz_l0_tn/EbLMhJsx9AVJi-ipqtkzHckBS5TWo_au3T8wHPEdDKMgPQ?download=1) |
| [MFB](https://github.com/MILVLG/openvqa/tree/master/configs/vqa/mfb.yml) | 7e-4 | 68.25 | 84.79 | 48.24 | 58.68 | [model](https://awma1-my.sharepoint.com/:u:/g/personal/yuz_l0_tn/EfLYkv1XBgNJgOMU5PAo04YBHxAVmpeJtnZecqJztJdNig?download=1) |
| [MFH](https://github.com/MILVLG/openvqa/tree/master/configs/vqa/mfh.yml) | 7e-4 | 68.86 | 85.38 | 49.27 | 59.21 | [model](https://awma1-my.sharepoint.com/:u:/g/personal/yuz_l0_tn/EXGNuWmba8JOnQkkpfqokqcBzJ6Yw1ID6hl7hj2nyJaNJA?download=1) |
| [BAN-4](https://github.com/MILVLG/openvqa/tree/master/configs/vqa/ban_4.yml) | 1.4e-3 | 69.31 | 85.42 | 50.15 | 59.91 | [model](https://awma1-my.sharepoint.com/:u:/g/personal/yuz_l0_tn/ERAUbsBJzcNHjXcINxDoWOQByR0jSbdNp8nonuFdbyc8yA?download=1) |
| [BAN-8](https://github.com/MILVLG/openvqa/tree/master/configs/vqa/ban_8.yml) | 1.4e-3 | 69.48 | 85.40 | 50.82 | 60.14 | [model](https://awma1-my.sharepoint.com/:u:/g/personal/yuz_l0_tn/EW6v-dZOdJhFoKwT3bIx8M8B_U998hE8YD9zUJsUpo0rjQ?download=1) |
| [MCAN-small](https://github.com/MILVLG/openvqa/tree/master/configs/vqa/mcan_small.yml) | 1e-4 | 70.69 | 87.08 | 53.16 | 60.66 | [model](https://awma1-my.sharepoint.com/:u:/g/personal/yuz_l0_tn/EWSniKgB8Y9PropErzcAedkBKwJCeBP6b5x5oT_I4LiWtg?download=1) |
| [MCAN-large](https://github.com/MILVLG/openvqa/tree/master/configs/vqa/mcan_large.yml) | 5e-5 | 70.82 | 87.19 | 52.56 | 60.98 | [model](https://awma1-my.sharepoint.com/:u:/g/personal/yuz_l0_tn/EQvT2mjBm4ZGnE-jBgAJCbIBC9RBiHwl-XEDr8T63DS10w?download=1) |
| [MMNasNet-small](https://github.com/MILVLG/openvqa/tree/master/configs/vqa/mmnasnet_small.yml) | 1e-4 | 71.24 | 87.11 | 56.15 | 61.08 | [model](https://awma1-my.sharepoint.com/:u:/g/personal/yuz_l0_tn/EaUf4tRcw0FPghbwRoVcMo8BQT9SWzgiZBpD2CrFRfS54w?download=1) |
| [MMNasNet-large](https://github.com/MILVLG/openvqa/tree/master/configs/vqa/mmnasnet_large.yml) | 5e-5 | 71.45 | 87.29 | 55.71 | 61.45 | [model](https://awma1-my.sharepoint.com/:u:/g/personal/yuz_l0_tn/EQwNsq0AVehGqhWS4iwuWsYBPtP78xEqRgFKuRGKodkQWA?download=1) |
## GQA
We provide a group of results (including *Accuracy*, *Binary*, *Open*, *Validity*, *Plausibility*, *Consistency*, *Distribution*) for each model on GQA as follows.
- **Train+val -> Test-dev**: trained on the `train(balance) + val(balance)` splits and evaluated on the `test-dev(balance)` split.
**The results shown in the following are obtained from the [online server](https://evalai.cloudcv.org/web/challenges/challenge-page/225/overview). Note that the offline Test-dev result is evaluated by the provided offical script, which results in slight difference compared to the online result due to some unknown reasons.**
#### Train+val -> Test-dev
| Model | Base lr | Accuracy (%) | Binary (%) | Open (%) | Validity (%) | Plausibility (%) | Consistency (%) | Distribution | Download |
|:------:|:-------:|:------------:|:----------:|:--------:|:------------:|:----------------:|:----------------:|:------------:|:--------:|
| [BUTD (frcn+bbox)](https://github.com/MILVLG/openvqa/tree/master/configs/gqa/butd.yml) | 2e-3 | 53.38 | 67.78 | 40.72 | 96.62 | 84.81 | 77.62 | 1.26 | [model](https://awma1-my.sharepoint.com/:u:/g/personal/yuz_l0_tn/EaalaQ6VmBJCgeoZiPp45_gBn20g7tpkp-Uq8IVFcun64w?download=1) |
| [BAN-4 (frcn+bbox)](https://github.com/MILVLG/openvqa/tree/master/configs/gqa/ban_4.yml) | 2e-3 | 55.01 | 72.02 | 40.06 | 96.94 | 85.67 | 81.85 | 1.04 | [model](https://awma1-my.sharepoint.com/:u:/g/personal/yuz_l0_tn/EdRIuVXaJqBJoXg3T7N0xfYBsPl-GlgW2hq2toqm2gOxXg?download=1) |
| [BAN-8 (frcn+bbox)](https://github.com/MILVLG/openvqa/tree/master/configs/gqa/ban_8.yml) | 1e-3 | 56.19 | 73.31 | 41.13 | 96.77 | 85.58 | 84.64 | 1.09 | [model](https://awma1-my.sharepoint.com/:u:/g/personal/yuz_l0_tn/ES8FCQxFsqJBnvdoOcF_724BJgJml6iStYYK9UeUbI8Uyw?download=1) |
| [MCAN-small (frcn)](https://github.com/MILVLG/openvqa/tree/master/configs/gqa/mcan_small.yml) | 1e-4 | 53.41 | 70.29 | 38.56 | 96.77 | 85.32 | 82.29 | 1.40 | [model](https://awma1-my.sharepoint.com/:u:/g/personal/yuz_l0_tn/ER_i5xbPuXNCiC15iVtxBvgBTe7IBRpqpWTmeAY5svv3Ew?download=1) |
| [MCAN-small (frcn+grid)](https://github.com/MILVLG/openvqa/tree/master/configs/gqa/mcan_small.yml) | 1e-4 | 54.28 | 71.68 | 38.97 | 96.79 | 85.11 | 84.49 | 1.20 | [model](https://awma1-my.sharepoint.com/:u:/g/personal/yuz_l0_tn/EbsPhIGkvpNKtqBbFmIFIucBQO_dM6lDgQL-gdd3RnzziQ?download=1) |
| [MCAN-small (frcn+bbox)](https://github.com/MILVLG/openvqa/tree/master/configs/gqa/mcan_small.yml) | 1e-4 | 58.20 | 75.87 | 42.66 | 97.01 | 85.41 | 87.99 | 1.25 | [model](https://awma1-my.sharepoint.com/:u:/g/personal/yuz_l0_tn/EQCUNFPnpC1HliLDFCSDUc4BUdbdq40iPZVi5tLOCrVaQA?download=1) |
| [MCAN-small (frcn+bbox+grid)](https://github.com/MILVLG/openvqa/tree/master/configs/gqa/mcan_small.yml) | 1e-4 | 58.38 | 76.49 | 42.45 | 96.98 | 84.47 | 87.36 | 1.29 | [model](https://awma1-my.sharepoint.com/:u:/g/personal/yuz_l0_tn/EcrY2vDlzERLksouT5_cbcIBM1BCPkPdg4MyPmci8xrQig?download=1) |
| [MCAN-large (frcn+bbox+grid)](https://github.com/MILVLG/openvqa/tree/master/configs/gqa/mcan_large.yml) | 5e-5 | 58.10 | 76.98 | 41.50 | 97.01 | 85.43 | 87.34 | 1.20 | [model](https://awma1-my.sharepoint.com/:u:/g/personal/yuz_l0_tn/Ed6PBjIDEHpDot3vY__T-OIBJGdW51RFo2u_pm-7S5TMPA?download=1) |
## CLEVR
We provide a group of results (including *Overall*, *Count*, *Exist*, *Compare Numbers*, *Query Attribute*, *Compare Attribute*) for each model on CLEVR as follows.
- **Train -> Val**: trained on the `train` split and evaluated on the `val` split.
#### Train -> Val
| Model | Base lr | Overall (%) | Count (%) | Exist (%) | Compare Numbers (%) | Query Attribute (%) | Compare Attribute (%) | Download |
|:-----:|:-------:|:-------------:|:--------:|:--------:|:--------:|:--------:|:--------:|:--------:|
| [MCAN-small](https://github.com/MILVLG/openvqa/tree/master/configs/clevr/mcan_small.yml) | 4e-5 | 98.74 | 96.81 | 99.27 | 98.89 | 99.53 | 99.19 | [model](https://awma1-my.sharepoint.com/:u:/g/personal/yuz_l0_tn/ERtwnuAoeHNKjs0qTkWC3cYBWVuUk7BLk88cnCKNFxYYlQ?download=1) |
================================================
FILE: docs/_source/conf.py
================================================
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
#
# Configuration file for the Sphinx documentation builder.
#
# This file only contains a selection of the most common options. For a full
# list see the documentation:
# http://www.sphinx-doc.org/en/master/config
# -- Path setup --------------------------------------------------------------
# If extensions (or modules to document with autodoc) are in another directory,
# add these directories to sys.path here. If the directory is relative to the
# documentation root, use os.path.abspath to make it absolute, like shown here.
#
import os
import sys
sys.path.insert(0, os.path.abspath('../..'))
RELEASE = os.environ.get('RELEASE', False)
# -- Project information -----------------------------------------------------
project = u'OpenVQA'
copyright = u'2019, MILVLG'
author = u'MILVLG'
# The version info for the project you're documenting, acts as replacement for
# |version| and |release|, also used in various other places throughout the
# built documents.
#
# The short X.Y version.
# version = '1.0'
# The full version, including alpha/beta/rc tags.
# release = '0.0'
# -- General configuration ---------------------------------------------------
master_doc = 'index'
# The suffix(es) of source filenames.
# You can specify multiple suffix as a list of string:
#
source_suffix = {
'.rst': 'restructuredtext',
'.txt': 'markdown',
'.md': 'markdown',
}
# Add any Sphinx extension module names here, as strings. They can be
# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
# ones.
extensions = [
'sphinx.ext.autodoc',
'sphinx.ext.autosummary',
'sphinx.ext.doctest',
'sphinx.ext.intersphinx',
'sphinx.ext.todo',
'sphinx.ext.coverage',
'sphinx.ext.napoleon',
'sphinx.ext.viewcode',
'sphinx_markdown_tables',
'recommonmark',
]
# Add any paths that contain templates here, relative to this directory.
templates_path = ['_templates']
# List of patterns, relative to source directory, that match files and
# directories to ignore when looking for source files.
# This pattern also affects html_static_path and html_extra_path.
exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store']
# -- Options for HTML output -------------------------------------------------
# The theme to use for HTML and HTML Help pages. See the documentation for
# a list of builtin themes.
#
html_theme = 'sphinx_rtd_theme'
# Add any paths that contain custom static files (such as style sheets) here,
# relative to this directory. They are copied after the builtin static files,
# so a file named "default.css" will overwrite the builtin "default.css".
html_static_path = ['_static']
# Add cusotm css overrides
def setup(app):
app.add_stylesheet( "custom.css" )
# Add any paths that contain templates here, relative to this directory.
templates_path = ['_templates']
if RELEASE:
templates_path = ['_templates-stable']
# The name of the Pygments (syntax highlighting) style to use.
pygments_style = 'sphinx'
# Disable docstring inheritance
autodoc_inherit_docstrings = False
# -- Other Options ------------------------------------------------------------
# intersphinx_mapping = {
# 'python': ('https://docs.python.org/3', None)
# }
================================================
FILE: docs/_source/index.rst
================================================
.. OpenVQA documentation master file, created by
sphinx-quickstart on Sun Aug 4 13:54:29 2019.
You can adapt this file completely to your liking, but it should at least
contain the root `toctree` directive.
:github_url: https://github.com/MILVLG/openvqa
OpenVQA Documentation
=====================
.. raw:: html
OpenVQA is a general platform for visual question ansering (VQA) research,
with implementing state-of-the-art approaches on different benchmark datasets.
Supports for more methods and datasets will be updated continuously.
.. toctree::
:caption: The Basics
:name: basics
:maxdepth: 1
Installation
Getting Started
Model Zoo
.. toctree::
:caption: Advanced topics
:name: advanced-topics
:maxdepth: 1
Adding a Model
Contributing
------
This repo is currently maintained by Zhou Yu (`@yuzcccc`_) and Yuhao Cui (`@cuiyuhao1996`_).
This version of the documentation was built on |today|.
.. _@yuzcccc: https://github.com/yuzcccc
.. _@cuiyuhao1996: https://github.com/cuiyuhao1996
================================================
FILE: docs/_templates/layout.html
================================================
{% extends "!layout.html" %}
{% block menu %}
{{ super() }}
{% endblock %}
{% block footer %}
{{ super() }}
{% endblock %}
================================================
FILE: docs/make.bat
================================================
@ECHO OFF
pushd %~dp0
REM Command file for Sphinx documentation
if "%SPHINXBUILD%" == "" (
set SPHINXBUILD=sphinx-build
)
set SOURCEDIR=_source
set BUILDDIR=_build
if "%1" == "" goto help
%SPHINXBUILD% >NUL 2>NUL
if errorlevel 9009 (
echo.
echo.The 'sphinx-build' command was not found. Make sure you have Sphinx
echo.installed, then set the SPHINXBUILD environment variable to point
echo.to the full path of the 'sphinx-build' executable. Alternatively you
echo.may add the Sphinx directory to PATH.
echo.
echo.If you don't have Sphinx installed, grab it from
echo.http://sphinx-doc.org/
exit /b 1
)
%SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
goto end
:help
%SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
:end
popd
================================================
FILE: docs/readme.md
================================================
# How to Edit OpenVQA Document
OpenVQA Document is built by [Sphix](https://www.sphinx-doc.org/en/master/) and hosted on [Read the Docs](https://readthedocs.org/).
You need know both [Markdown](https://markdown-zh.readthedocs.io/) and
[reStructuredText](http://docutils.sourceforge.net/rst.html) plaintext markup syntax.
We use the `.md` and `.rst` suffixes to distinguish them.
Usually OpenVQA source coders will participate in the maintenance of the document.
In most cases, programmers have learned markdown syntax. So the markdown syntax is used for simple content.
In order to use the [autodoc](https://www.sphinx-doc.org/ext/autodoc.html) feature in Sphix,
you must be familiar with the documentation content mentioned above.
## Edit and Debug
Different developers have different document maintenance habits,
it is recommended to maintain the document with a separate `docs: xxxx` branch
instead of directly making Pull Requests to the master branch.
When debugging locally, we usually use two instructions:
```shell
.\make.bat clean
.\make.bat html
```
Note:
- Make sure the current path is under the `docs` folder and have installed all things in `requirements.txt`.
- `clean` operation must be performed before `build`, otherwise undetectable errors may occur.
## Push to GitHub
In order to simplify the code review process and reduce `.git` size, changes to the `_build` folder are usually not logged.
(Check the `.gitignore` file in the root path of the project and find `docs/_build/` line for Sphinx documentation)
Only the contents in the `_source` folder will be submitted to GitHub (unless `_template` or `_theme` is used).
## Build and Host on Readthedocs
Readthedocs detect changes to the source code of the document through webhooks,
after the source code is updated, you need to check whether the document hosted in readthedocs is successfully built.
================================================
FILE: docs/requirements.txt
================================================
sphinx
sphinx_rtd_theme
recommonmark
sphinx-markdown-tables
================================================
FILE: openvqa/core/base_cfgs.py
================================================
# --------------------------------------------------------
# OpenVQA
# Written by Yuhao Cui https://github.com/cuiyuhao1996
# --------------------------------------------------------
from openvqa.core.path_cfgs import PATH
import os, torch, random
import numpy as np
from types import MethodType
class BaseCfgs(PATH):
def __init__(self):
super(BaseCfgs, self).__init__()
# Set Devices
# If use multi-gpu training, you can set e.g.'0, 1, 2' instead
self.GPU = '0'
# Set Seed For CPU And GPUs
self.SEED = random.randint(0, 9999999)
# -------------------------
# ---- Version Control ----
# -------------------------
# You can set a name to start new training
self.VERSION = str(self.SEED)
# Use checkpoint to resume training
self.RESUME = False
# Resume training version or testing version
self.CKPT_VERSION = self.VERSION
# Resume training epoch or testing epoch
self.CKPT_EPOCH = 0
# if set 'CKPT_PATH', -> 'CKPT_VERSION' and 'CKPT_EPOCH' will not work any more
self.CKPT_PATH = None
# Print loss every iteration
self.VERBOSE = True
# ------------------------------
# ---- Data Provider Params ----
# ------------------------------
self.MODEL = ''
self.MODEL_USE = ''
self.DATASET = ''
# Run as 'train' 'val' or 'test'
self.RUN_MODE = ''
# Set True to evaluate offline when an epoch finished
# (only work when train with 'train' split)
self.EVAL_EVERY_EPOCH = True
# Set True to save the prediction vector
# (use in ensemble)
self.TEST_SAVE_PRED = False
# A external method to set train split
# will override the SPLIT['train']
self.TRAIN_SPLIT = 'train'
# Set True to use pretrained GloVe word embedding
# (GloVe: spaCy https://spacy.io/)
self.USE_GLOVE = True
# Word embedding matrix size
# (token size x WORD_EMBED_SIZE)
self.WORD_EMBED_SIZE = 300
# All features size
self.FEAT_SIZE = {
'vqa': {
'FRCN_FEAT_SIZE': (100, 2048),
'BBOX_FEAT_SIZE': (100, 5),
},
'gqa': {
'FRCN_FEAT_SIZE': (100, 2048),
'GRID_FEAT_SIZE': (49, 2048),
'BBOX_FEAT_SIZE': (100, 5),
},
'clevr': {
'GRID_FEAT_SIZE': (196, 1024),
},
}
# Set if bbox_feat need be normalize by image size, default: False
self.BBOX_NORMALIZE = False
# Default training batch size: 64
self.BATCH_SIZE = 64
# Multi-thread I/O
self.NUM_WORKERS = 8
# Use pin memory
# (Warning: pin memory can accelerate GPU loading but may
# increase the CPU memory usage when NUM_WORKS is big)
self.PIN_MEM = True
# Large model can not training with batch size 64
# Gradient accumulate can split batch to reduce gpu memory usage
# (Warning: BATCH_SIZE should be divided by GRAD_ACCU_STEPS)
self.GRAD_ACCU_STEPS = 1
# --------------------------
# ---- Optimizer Params ----
# --------------------------
# Define the loss function
'''
Loss(case-sensitive):
'ce' : Cross Entropy -> NLLLoss(LogSoftmax(output), label) = CrossEntropyLoss(output, label)
'bce' : Binary Cross Entropy -> BCELoss(Sigmoid(output), label) = BCEWithLogitsLoss(output, label)
'kld' : Kullback-Leibler Divergence -> KLDivLoss(LogSoftmax(output), Softmax(label))
'mse' : Mean Squared Error -> MSELoss(output, label)
Reduction(case-sensitive):
'none': no reduction will be applied
'elementwise_mean': the sum of the output will be divided by the number of elements in the output
'sum': the output will be summed
'''
self.LOSS_FUNC = ''
self.LOSS_REDUCTION = ''
# The base learning rate
self.LR_BASE = 0.0001
# Learning rate decay ratio
self.LR_DECAY_R = 0.2
# Learning rate decay at {x, y, z...} epoch
self.LR_DECAY_LIST = [10, 12]
# Warmup epoch lr*{1/(n+1), 2/(n+1), ... , n/(n+1)}
self.WARMUP_EPOCH = 3
# Max training epoch
self.MAX_EPOCH = 13
# Gradient clip
# (default: -1 means not using)
self.GRAD_NORM_CLIP = -1
# Optimizer
'''
Optimizer(case-sensitive):
'Adam' : default -> {betas:(0.9, 0.999), eps:1e-8, weight_decay:0, amsgrad:False}
'Adamax' : default -> {betas:(0.9, 0.999), eps:1e-8, weight_decay:0}
'RMSprop' : default -> {alpha:0.99, eps:1e-8, weight_decay:0, momentum:0, centered:False}
'SGD' : default -> {momentum:0, dampening:0, weight_decay:0, nesterov:False}
'Adadelta' : default -> {rho:0.9, eps:1e-6, weight_decay:0}
'Adagrad' : default -> {lr_decay:0, weight_decay:0, initial_accumulator_value:0}
In YML files:
If you want to self-define the optimizer parameters, set a dict named OPT_PARAMS contains the keys you want to modify.
!!! Warning: keys: ['params, 'lr'] should not be set.
!!! Warning: To avoid ambiguity, the value of keys should be defined as string type.
If you not define the OPT_PARAMS, all parameters of optimizer will be set as default.
Example:
mcan_small.yml ->
OPT: Adam
OPT_PARAMS: {betas: '(0.9, 0.98)', eps: '1e-9'}
'''
# case-sensitive
self.OPT = ''
self.OPT_PARAMS = {}
def str_to_bool(self, args):
bool_list = [
'EVAL_EVERY_EPOCH',
'TEST_SAVE_PRED',
'RESUME',
'PIN_MEM',
'VERBOSE',
]
for arg in dir(args):
if arg in bool_list and getattr(args, arg) is not None:
setattr(args, arg, eval(getattr(args, arg)))
return args
def parse_to_dict(self, args):
args_dict = {}
for arg in dir(args):
if not arg.startswith('_') and not isinstance(getattr(args, arg), MethodType):
if getattr(args, arg) is not None:
args_dict[arg] = getattr(args, arg)
return args_dict
def add_args(self, args_dict):
for arg in args_dict:
setattr(self, arg, args_dict[arg])
def proc(self):
assert self.RUN_MODE in ['train', 'val', 'test']
# ------------ Devices setup
os.environ['CUDA_VISIBLE_DEVICES'] = self.GPU
self.N_GPU = len(self.GPU.split(','))
self.DEVICES = [_ for _ in range(self.N_GPU)]
torch.set_num_threads(2)
# ------------ Path check
self.check_path(self.DATASET)
# ------------ Model setup (Deprecated)
# self.MODEL_USE = self.MODEL.split('_')[0]
# ------------ Seed setup
# fix pytorch seed
torch.manual_seed(self.SEED)
if self.N_GPU < 2:
torch.cuda.manual_seed(self.SEED)
else:
torch.cuda.manual_seed_all(self.SEED)
torch.backends.cudnn.deterministic = True
# fix numpy seed
np.random.seed(self.SEED)
# fix random seed
random.seed(self.SEED)
if self.CKPT_PATH is not None:
print("Warning: you are now using 'CKPT_PATH' args, "
"'CKPT_VERSION' and 'CKPT_EPOCH' will not work")
self.CKPT_VERSION = self.CKPT_PATH.split('/')[-1] + '_' + str(random.randint(0, 9999999))
# ------------ Split setup
self.SPLIT = self.SPLITS[self.DATASET]
self.SPLIT['train'] = self.TRAIN_SPLIT
if self.SPLIT['val'] in self.SPLIT['train'].split('+') or self.RUN_MODE not in ['train']:
self.EVAL_EVERY_EPOCH = False
if self.RUN_MODE not in ['test']:
self.TEST_SAVE_PRED = False
# ------------ Gradient accumulate setup
assert self.BATCH_SIZE % self.GRAD_ACCU_STEPS == 0
self.SUB_BATCH_SIZE = int(self.BATCH_SIZE / self.GRAD_ACCU_STEPS)
# Set small eval batch size will reduce gpu memory usage
self.EVAL_BATCH_SIZE = int(self.SUB_BATCH_SIZE / 2)
# ------------ Loss process
assert self.LOSS_FUNC in ['ce', 'bce', 'kld', 'mse']
assert self.LOSS_REDUCTION in ['none', 'elementwise_mean', 'sum']
self.LOSS_FUNC_NAME_DICT = {
'ce': 'CrossEntropyLoss',
'bce': 'BCEWithLogitsLoss',
'kld': 'KLDivLoss',
'mse': 'MSELoss',
}
self.LOSS_FUNC_NONLINEAR = {
'ce': [None, 'flat'],
'bce': [None, None],
'kld': ['log_softmax', None],
'mse': [None, None],
}
self.TASK_LOSS_CHECK = {
'vqa': ['bce', 'kld'],
'gqa': ['ce'],
'clevr': ['ce'],
}
assert self.LOSS_FUNC in self.TASK_LOSS_CHECK[self.DATASET], \
self.DATASET + 'task only support' + str(self.TASK_LOSS_CHECK[self.DATASET]) + 'loss.' + \
'Modify the LOSS_FUNC in configs to get a better score.'
# ------------ Optimizer parameters process
assert self.OPT in ['Adam', 'Adamax', 'RMSprop', 'SGD', 'Adadelta', 'Adagrad']
optim = getattr(torch.optim, self.OPT)
default_params_dict = dict(zip(optim.__init__.__code__.co_varnames[3: optim.__init__.__code__.co_argcount],
optim.__init__.__defaults__[1:]))
def all(iterable):
for element in iterable:
if not element:
return False
return True
assert all(list(map(lambda x: x in default_params_dict, self.OPT_PARAMS)))
for key in self.OPT_PARAMS:
if isinstance(self.OPT_PARAMS[key], str):
self.OPT_PARAMS[key] = eval(self.OPT_PARAMS[key])
else:
print("To avoid ambiguity, set the value of 'OPT_PARAMS' to string type")
exit(-1)
self.OPT_PARAMS = {**default_params_dict, **self.OPT_PARAMS}
def __str__(self):
__C_str = ''
for attr in dir(self):
if not attr.startswith('__') and not isinstance(getattr(self, attr), MethodType):
__C_str += '{ %-17s }->' % attr + str(getattr(self, attr)) + '\n'
return __C_str
#
#
# if __name__ == '__main__':
# __C = Cfgs()
# __C.proc()
================================================
FILE: openvqa/core/base_dataset.py
================================================
# --------------------------------------------------------
# OpenVQA
# Written by Yuhao Cui https://github.com/cuiyuhao1996
# --------------------------------------------------------
import numpy as np
import glob, json, torch, random
import torch.utils.data as Data
import torch.nn as nn
from openvqa.utils.feat_filter import feat_filter
class BaseDataSet(Data.Dataset):
def __init__(self):
self.token_to_ix = None
self.pretrained_emb = None
self.ans_to_ix = None
self.ix_to_ans = None
self.data_size = None
self.token_size = None
self.ans_size = None
def load_ques_ans(self, idx):
raise NotImplementedError()
def load_img_feats(self, idx, iid):
raise NotImplementedError()
def __getitem__(self, idx):
ques_ix_iter, ans_iter, iid = self.load_ques_ans(idx)
frcn_feat_iter, grid_feat_iter, bbox_feat_iter = self.load_img_feats(idx, iid)
return \
torch.from_numpy(frcn_feat_iter),\
torch.from_numpy(grid_feat_iter),\
torch.from_numpy(bbox_feat_iter),\
torch.from_numpy(ques_ix_iter),\
torch.from_numpy(ans_iter)
def __len__(self):
return self.data_size
def shuffle_list(self, list):
random.shuffle(list)
class BaseAdapter(nn.Module):
def __init__(self, __C):
super(BaseAdapter, self).__init__()
self.__C = __C
if self.__C.DATASET in ['vqa']:
self.vqa_init(__C)
elif self.__C.DATASET in ['gqa']:
self.gqa_init(__C)
elif self.__C.DATASET in ['clevr']:
self.clevr_init(__C)
else:
exit(-1)
# eval('self.' + __C.DATASET + '_init()')
def vqa_init(self, __C):
raise NotImplementedError()
def gqa_init(self, __C):
raise NotImplementedError()
def clevr_init(self, __C):
raise NotImplementedError()
def forward(self, frcn_feat, grid_feat, bbox_feat):
feat_dict = feat_filter(self.__C.DATASET, frcn_feat, grid_feat, bbox_feat)
if self.__C.DATASET in ['vqa']:
return self.vqa_forward(feat_dict)
elif self.__C.DATASET in ['gqa']:
return self.gqa_forward(feat_dict)
elif self.__C.DATASET in ['clevr']:
return self.clevr_forward(feat_dict)
else:
exit(-1)
def vqa_forward(self, feat_dict):
raise NotImplementedError()
def gqa_forward(self, feat_dict):
raise NotImplementedError()
def clevr_forward(self, feat_dict):
raise NotImplementedError()
================================================
FILE: openvqa/core/path_cfgs.py
================================================
# --------------------------------------------------------
# OpenVQA
# Written by Yuhao Cui https://github.com/cuiyuhao1996
# --------------------------------------------------------
import os
class PATH:
def __init__(self):
self.init_path()
# self.check_path()
def init_path(self):
self.DATA_ROOT = './data'
# self.DATA_ROOT = '/data/datasets'
# self.DATA_ROOT = '/data1/datasets'
# self.DATA_ROOT = '/home/features'
self.DATA_PATH = {
'vqa': self.DATA_ROOT + '/vqa',
'gqa': self.DATA_ROOT + '/gqa',
'clevr': self.DATA_ROOT + '/clevr',
}
self.FEATS_PATH = {
'vqa': {
'train': self.DATA_PATH['vqa'] + '/feats' + '/train2014',
'val': self.DATA_PATH['vqa'] + '/feats' + '/val2014',
'test': self.DATA_PATH['vqa'] + '/feats' + '/test2015',
},
'gqa': {
'default-frcn': self.DATA_PATH['gqa'] + '/feats' + '/gqa-frcn',
'default-grid': self.DATA_PATH['gqa'] + '/feats' + '/gqa-grid',
},
'clevr': {
'train': self.DATA_PATH['clevr'] + '/feats' + '/train',
'val': self.DATA_PATH['clevr'] + '/feats' + '/val',
'test': self.DATA_PATH['clevr'] + '/feats' + '/test',
},
}
self.RAW_PATH = {
'vqa': {
'train': self.DATA_PATH['vqa'] + '/raw' + '/v2_OpenEnded_mscoco_train2014_questions.json',
'train-anno': self.DATA_PATH['vqa'] + '/raw' + '/v2_mscoco_train2014_annotations.json',
'val': self.DATA_PATH['vqa'] + '/raw' + '/v2_OpenEnded_mscoco_val2014_questions.json',
'val-anno': self.DATA_PATH['vqa'] + '/raw' + '/v2_mscoco_val2014_annotations.json',
'vg': self.DATA_PATH['vqa'] + '/raw' + '/VG_questions.json',
'vg-anno': self.DATA_PATH['vqa'] + '/raw' + '/VG_annotations.json',
'test': self.DATA_PATH['vqa'] + '/raw' + '/v2_OpenEnded_mscoco_test2015_questions.json',
},
'gqa': {
'train': self.DATA_PATH['gqa'] + '/raw' + '/questions1.2/train_balanced_questions.json',
'val': self.DATA_PATH['gqa'] + '/raw' + '/questions1.2/val_balanced_questions.json',
'testdev': self.DATA_PATH['gqa'] + '/raw' + '/questions1.2/testdev_balanced_questions.json',
'test': self.DATA_PATH['gqa'] + '/raw' + '/questions1.2/submission_all_questions.json',
'val_all': self.DATA_PATH['gqa'] + '/raw' + '/questions1.2/val_all_questions.json',
'testdev_all': self.DATA_PATH['gqa'] + '/raw' + '/questions1.2/testdev_all_questions.json',
'train_choices': self.DATA_PATH['gqa'] + '/raw' + '/eval/train_choices',
'val_choices': self.DATA_PATH['gqa'] + '/raw' + '/eval/val_choices.json',
},
'clevr': {
'train': self.DATA_PATH['clevr'] + '/raw' + '/questions/CLEVR_train_questions.json',
'val': self.DATA_PATH['clevr'] + '/raw' + '/questions/CLEVR_val_questions.json',
'test': self.DATA_PATH['clevr'] + '/raw' + '/questions/CLEVR_test_questions.json',
},
}
self.SPLITS = {
'vqa': {
'train': '',
'val': 'val',
'test': 'test',
},
'gqa': {
'train': '',
'val': 'testdev',
'test': 'test',
},
'clevr': {
'train': '',
'val': 'val',
'test': 'test',
},
}
self.RESULT_PATH = './results/result_test'
self.PRED_PATH = './results/pred'
self.CACHE_PATH = './results/cache'
self.LOG_PATH = './results/log'
self.CKPTS_PATH = './ckpts'
if 'result_test' not in os.listdir('./results'):
os.mkdir('./results/result_test')
if 'pred' not in os.listdir('./results'):
os.mkdir('./results/pred')
if 'cache' not in os.listdir('./results'):
os.mkdir('./results/cache')
if 'log' not in os.listdir('./results'):
os.mkdir('./results/log')
if 'ckpts' not in os.listdir('./'):
os.mkdir('./ckpts')
def check_path(self, dataset=None):
print('Checking dataset ........')
if dataset:
for item in self.FEATS_PATH[dataset]:
if not os.path.exists(self.FEATS_PATH[dataset][item]):
print(self.FEATS_PATH[dataset][item], 'NOT EXIST')
exit(-1)
for item in self.RAW_PATH[dataset]:
if not os.path.exists(self.RAW_PATH[dataset][item]):
print(self.RAW_PATH[dataset][item], 'NOT EXIST')
exit(-1)
else:
for dataset in self.FEATS_PATH:
for item in self.FEATS_PATH[dataset]:
if not os.path.exists(self.FEATS_PATH[dataset][item]):
print(self.FEATS_PATH[dataset][item], 'NOT EXIST')
exit(-1)
for dataset in self.RAW_PATH:
for item in self.RAW_PATH[dataset]:
if not os.path.exists(self.RAW_PATH[dataset][item]):
print(self.RAW_PATH[dataset][item], 'NOT EXIST')
exit(-1)
print('Finished!')
print('')
================================================
FILE: openvqa/datasets/clevr/clevr_loader.py
================================================
# --------------------------------------------------------
# OpenVQA
# Written by Yuhao Cui https://github.com/cuiyuhao1996
# --------------------------------------------------------
import numpy as np
import glob, json, re, en_vectors_web_lg
from openvqa.core.base_dataset import BaseDataSet
from openvqa.utils.ans_punct import prep_ans
class DataSet(BaseDataSet):
def __init__(self, __C):
super(DataSet, self).__init__()
self.__C = __C
# --------------------------
# ---- Raw data loading ----
# --------------------------
# Loading all image paths
# grid_feat_path_list = \
# glob.glob(__C.FEATS_PATH[__C.DATASET]['train'] + '/*.npz') + \
# glob.glob(__C.FEATS_PATH[__C.DATASET]['val'] + '/*.npz') + \
# glob.glob(__C.FEATS_PATH[__C.DATASET]['test'] + '/*.npz')
# Loading question word list
stat_ques_list = \
json.load(open(__C.RAW_PATH[__C.DATASET]['train'], 'r'))['questions'] + \
json.load(open(__C.RAW_PATH[__C.DATASET]['val'], 'r'))['questions'] + \
json.load(open(__C.RAW_PATH[__C.DATASET]['test'], 'r'))['questions']
# Loading answer word list
stat_ans_list = \
json.load(open(__C.RAW_PATH[__C.DATASET]['train'], 'r'))['questions'] + \
json.load(open(__C.RAW_PATH[__C.DATASET]['val'], 'r'))['questions']
# Loading question and answer list
self.ques_list = []
grid_feat_path_list = []
split_list = __C.SPLIT[__C.RUN_MODE].split('+')
for split in split_list:
self.ques_list += json.load(open(__C.RAW_PATH[__C.DATASET][split], 'r'))['questions']
grid_feat_path_list += glob.glob(__C.FEATS_PATH[__C.DATASET][split] + '/*.npz')
# Define run data size
self.data_size = self.ques_list.__len__()
print(' ========== Dataset size:', self.data_size)
# ------------------------
# ---- Data statistic ----
# ------------------------
# {image id} -> {image feature absolutely path}
self.iid_to_grid_feat_path = self.img_feat_path_load(grid_feat_path_list)
# Tokenize
self.token_to_ix, self.pretrained_emb, max_token = self.tokenize(stat_ques_list, __C.USE_GLOVE)
self.token_size = self.token_to_ix.__len__()
print(' ========== Question token vocab size:', self.token_size)
self.max_token = -1
if self.max_token == -1:
self.max_token = max_token
print('Max token length:', max_token, 'Trimmed to:', self.max_token)
# Answers statistic
self.ans_to_ix, self.ix_to_ans = self.ans_stat(stat_ans_list)
self.ans_size = self.ans_to_ix.__len__()
print(' ========== Answer token vocab size:', self.ans_size)
print('Finished!')
print('')
def img_feat_path_load(self, path_list):
iid_to_path = {}
for ix, path in enumerate(path_list):
iid = path.split('/')[-1].split('.')[0]
iid_to_path[iid] = path
return iid_to_path
def tokenize(self, stat_ques_list, use_glove):
token_to_ix = {
'PAD': 0,
'UNK': 1,
'CLS': 2,
}
spacy_tool = None
pretrained_emb = []
if use_glove:
spacy_tool = en_vectors_web_lg.load()
pretrained_emb.append(spacy_tool('PAD').vector)
pretrained_emb.append(spacy_tool('UNK').vector)
pretrained_emb.append(spacy_tool('CLS').vector)
max_token = 0
for ques in stat_ques_list:
words = re.sub(
r"([.,'!?\"()*#:;])",
'',
ques['question'].lower()
).replace('-', ' ').replace('/', ' ').split()
if len(words) > max_token:
max_token = len(words)
for word in words:
if word not in token_to_ix:
token_to_ix[word] = len(token_to_ix)
if use_glove:
pretrained_emb.append(spacy_tool(word).vector)
pretrained_emb = np.array(pretrained_emb)
return token_to_ix, pretrained_emb, max_token
def ans_stat(self, stat_ans_list):
ans_to_ix = {}
ix_to_ans = {}
for ans_stat in stat_ans_list:
ans = ans_stat['answer']
if ans not in ans_to_ix:
ix_to_ans[ans_to_ix.__len__()] = ans
ans_to_ix[ans] = ans_to_ix.__len__()
return ans_to_ix, ix_to_ans
# ----------------------------------------------
# ---- Real-Time Processing Implementations ----
# ----------------------------------------------
def load_ques_ans(self, idx):
# if self.__C.RUN_MODE in ['train']:
ques = self.ques_list[idx]
iid = str(ques['image_index'])
# Process question
ques_ix_iter = self.proc_ques(ques, self.token_to_ix, max_token=self.max_token)
ans_iter = np.zeros(1)
if self.__C.RUN_MODE in ['train']:
# process answers
ans = ques['answer']
ans_iter = self.proc_ans(ans, self.ans_to_ix)
return ques_ix_iter, ans_iter, iid
def load_img_feats(self, idx, iid):
grid_feat = np.load(self.iid_to_grid_feat_path[iid])
grid_feat_iter = grid_feat['x']
return np.zeros(1), grid_feat_iter, np.zeros(1)
# ------------------------------------
# ---- Real-Time Processing Utils ----
# ------------------------------------
def proc_ques(self, ques, token_to_ix, max_token):
ques_ix = np.zeros(max_token, np.int64)
words = re.sub(
r"([.,'!?\"()*#:;])",
'',
ques['question'].lower()
).replace('-', ' ').replace('/', ' ').split()
for ix, word in enumerate(words):
if word in token_to_ix:
ques_ix[ix] = token_to_ix[word]
else:
ques_ix[ix] = token_to_ix['UNK']
if ix + 1 == max_token:
break
return ques_ix
def proc_ans(self, ans, ans_to_ix):
ans_ix = np.zeros(1, np.int64)
ans_ix[0] = ans_to_ix[ans]
return ans_ix
================================================
FILE: openvqa/datasets/clevr/eval/result_eval.py
================================================
# --------------------------------------------------------
# OpenVQA
# Written by Yuhao Cui https://github.com/cuiyuhao1996
# --------------------------------------------------------
import json, pickle
import numpy as np
from collections import defaultdict
def eval(__C, dataset, ans_ix_list, pred_list, result_eval_file, ensemble_file, log_file, valid=False):
result_eval_file = result_eval_file + '.txt'
ans_size = dataset.ans_size
result_eval_file_fs = open(result_eval_file, 'w')
for qix in range(dataset.data_size):
result_eval_file_fs.write(dataset.ix_to_ans[ans_ix_list[qix]])
result_eval_file_fs.write("\n")
result_eval_file_fs.close()
if __C.TEST_SAVE_PRED:
print('Save the prediction vector to file: {}'.format(ensemble_file))
pred_list = np.array(pred_list).reshape(-1, ans_size)
result_pred = [{
'pred': pred_list[qix],
'qid': qix
} for qix in range(dataset.data_size)]
pickle.dump(result_pred, open(ensemble_file, 'wb+'), protocol=-1)
if valid:
ques_file_path = __C.RAW_PATH[__C.DATASET][__C.SPLIT['val']]
true_answers = []
with open(ques_file_path, 'r') as f:
questions = json.load(f)['questions']
for ques in questions:
true_answers.append(ques['answer'])
correct_by_q_type = defaultdict(list)
# Load predicted answers
predicted_answers = []
with open(result_eval_file, 'r') as f:
for line in f:
predicted_answers.append(line.strip())
num_true, num_pred = len(true_answers), len(predicted_answers)
assert num_true == num_pred, 'Expected %d answers but got %d' % (
num_true, num_pred)
for i, (true_answer, predicted_answer) in enumerate(zip(true_answers, predicted_answers)):
correct = 1 if true_answer == predicted_answer else 0
correct_by_q_type['Overall'].append(correct)
q_type = questions[i]['program'][-1]['function']
correct_by_q_type[q_type].append(correct)
print('Write to log file: {}'.format(log_file))
logfile = open(log_file, 'a+')
q_dict = {}
for q_type, vals in sorted(correct_by_q_type.items()):
vals = np.asarray(vals)
q_dict[q_type] = [vals.sum(), vals.shape[0]]
# print(q_type, '%d / %d = %.2f' % (vals.sum(), vals.shape[0], 100.0 * vals.mean()))
# logfile.write(q_type + ' : ' + '%d / %d = %.2f\n' % (vals.sum(), vals.shape[0], 100.0 * vals.mean()))
# Score Summary
score_type = ['Overall', 'Count', 'Exist', 'Compare_Numbers', 'Query_Attribute', 'Compare_Attribute']
compare_numbers_type = ['greater_than', 'less_than']
query_attribute_type = ['query_color', 'query_material', 'query_shape', 'query_size']
compare_attribute_type = ['equal_color', 'equal_integer', 'equal_material', 'equal_shape', 'equal_size']
score_dict = {}
score_dict['Overall'] = q_dict['Overall']
score_dict['Count'] = q_dict['count']
score_dict['Exist'] = q_dict['exist']
correct_num, total_num = 0, 0
for q_type in compare_numbers_type:
correct_num += q_dict[q_type][0]
total_num += q_dict[q_type][1]
score_dict['Compare_Numbers'] = [correct_num, total_num]
correct_num, total_num = 0, 0
for q_type in query_attribute_type:
correct_num += q_dict[q_type][0]
total_num += q_dict[q_type][1]
score_dict['Query_Attribute'] = [correct_num, total_num]
correct_num, total_num = 0, 0
for q_type in compare_attribute_type:
correct_num += q_dict[q_type][0]
total_num += q_dict[q_type][1]
score_dict['Compare_Attribute'] = [correct_num, total_num]
for q_type in score_type:
val, tol = score_dict[q_type]
print(q_type, '%d / %d = %.2f' % (val, tol, 100.0 * val / tol))
logfile.write(q_type + ' : ' + '%d / %d = %.2f\n' % (val, tol, 100.0 * val / tol))
logfile.write("\n")
logfile.close()
================================================
FILE: openvqa/datasets/dataset_loader.py
================================================
# --------------------------------------------------------
# OpenVQA
# Written by Yuhao Cui https://github.com/cuiyuhao1996
# --------------------------------------------------------
from importlib import import_module
class DatasetLoader:
def __init__(self, __C):
self.__C = __C
self.dataset = __C.DATASET
dataset_moudle_path = 'openvqa.datasets.' + self.dataset +'.' + self.dataset + '_loader'
self.dataset_moudle = import_module(dataset_moudle_path)
def DataSet(self):
return self.dataset_moudle.DataSet(self.__C)
class EvalLoader:
def __init__(self, __C):
self.__C = __C
self.dataset = __C.DATASET
eval_moudle_path = 'openvqa.datasets.' + self.dataset + '.' + 'eval' + '.' + 'result_eval'
self.eval_moudle = import_module(eval_moudle_path)
def eval(self, __arg1, __arg2, __arg3, __arg4, __arg5, __arg6, __arg7):
return self.eval_moudle.eval(self.__C, __arg1, __arg2, __arg3, __arg4, __arg5, __arg6, __arg7)
================================================
FILE: openvqa/datasets/gqa/dicts.json
================================================
[{"yes": 0, "pipe": 1, "no": 2, "large": 3, "girl": 4, "bed": 5, "sofa": 6, "right": 7, "dark": 8, "cabinet": 9, "left": 10, "bird": 11, "brick": 12, "rock": 13, "children": 14, "brown": 15, "blond": 16, "pants": 17, "top": 18, "horse": 19, "blue": 20, "hot dog": 21, "banana": 22, "laptop": 23, "desk": 24, "bottom": 25, "eating": 26, "man": 27, "grass": 28, "dog": 29, "silver": 30, "bag": 31, "pedestrian": 32, "cabinets": 33, "green": 34, "window": 35, "giraffe": 36, "tiny": 37, "child": 38, "yellow": 39, "wooden": 40, "parking meter": 41, "fries": 42, "plants": 43, "kiosk": 44, "orange": 45, "van": 46, "shirt": 47, "coat": 48, "controller": 49, "bench": 50, "television": 51, "black": 52, "carrot": 53, "sandwich": 54, "city": 55, "street": 56, "couch": 57, "closed": 58, "field": 59, "pink": 60, "boy": 61, "lady": 62, "tomato": 63, "horses": 64, "white": 65, "beef": 66, "cat": 67, "cutting board": 68, "overcast": 69, "vegetables": 70, "gray": 71, "onions": 72, "wood": 73, "toaster": 74, "bread": 75, "fence": 76, "player": 77, "roof": 78, "meadow": 79, "baby": 80, "calf": 81, "branch": 82, "street sign": 83, "backpack": 84, "jacket": 85, "teddy bear": 86, "game controller": 87, "herd": 88, "zoo": 89, "truck": 90, "red": 91, "printer": 92, "yard": 93, "end table": 94, "wetsuit": 95, "building": 96, "carrots": 97, "train": 98, "pans": 99, "giraffes": 100, "coffee maker": 101, "bathroom": 102, "woman": 103, "monitor": 104, "sheep": 105, "trees": 106, "dining table": 107, "park": 108, "nightstand": 109, "car": 110, "table": 111, "bicycle": 112, "donkey": 113, "cell phone": 114, "teal": 115, "chair": 116, "bathtub": 117, "waiting": 118, "purple": 119, "small": 120, "airport": 121, "colorful": 122, "stuffed bear": 123, "light brown": 124, "piano": 125, "lying": 126, "clock": 127, "pavement": 128, "snow": 129, "lemon": 130, "sandy": 131, "shelf": 132, "cheese": 133, "light blue": 134, "plant": 135, "bowl": 136, "bus": 137, "dishwasher": 138, "pepperoni": 139, "pole": 140, "bear": 141, "monkey": 142, "shore": 143, "hedges": 144, "wall": 145, "elephant": 146, "sidewalk": 147, "swimming pool": 148, "blender": 149, "bookshelves": 150, "mountain": 151, "pizza": 152, "birds": 153, "people": 154, "radiator": 155, "metal": 156, "striped": 157, "playing": 158, "kitten": 159, "dirty": 160, "runway": 161, "salad": 162, "sailboat": 163, "zebra": 164, "counter": 165, "lettuce": 166, "seat": 167, "asparagus": 168, "color": 169, "plastic": 170, "racket": 171, "dress": 172, "frisbee": 173, "standing": 174, "sea": 175, "keyboard": 176, "motorcycle": 177, "phone": 178, "tree": 179, "computer": 180, "pointing": 181, "iron": 182, "skis": 183, "blouse": 184, "onion": 185, "bat": 186, "light switch": 187, "hook": 188, "mirror": 189, "surfboard": 190, "candle": 191, "catcher": 192, "bricks": 193, "newspaper": 194, "handbag": 195, "knife": 196, "branches": 197, "cap": 198, "stove": 199, "pots": 200, "lawn": 201, "computer mouse": 202, "chef": 203, "steps": 204, "tan": 205, "eggplant": 206, "mountains": 207, "open": 208, "refrigerator": 209, "oranges": 210, "snowboarding": 211, "oven": 212, "utensil": 213, "bedroom": 214, "olives": 215, "little": 216, "cow": 217, "boat": 218, "microwave": 219, "pizza oven": 220, "taxi": 221, "young": 222, "drawers": 223, "tablet": 224, "choppy": 225, "foggy": 226, "apron": 227, "syrup": 228, "plate": 229, "coffee cup": 230, "taking picture": 231, "shoe": 232, "basket": 233, "pigeon": 234, "water": 235, "stop sign": 236, "mailbox": 237, "leather": 238, "remote control": 239, "home plate": 240, "spinach": 241, "tea kettle": 242, "cereal": 243, "tall": 244, "helmet": 245, "celery": 246, "vase": 247, "alien": 248, "collar": 249, "shorts": 250, "suit": 251, "supermarket": 252, "carpet": 253, "donuts": 254, "batter": 255, "outdoors": 256, "girls": 257, "skier": 258, "entertainment center": 259, "floor": 260, "chain": 261, "lamp": 262, "rope": 263, "pepper": 264, "tomatoes": 265, "drawer": 266, "forest": 267, "cars": 268, "balcony": 269, "guy": 270, "boats": 271, "scooter": 272, "flower": 273, "wii controller": 274, "down": 275, "shopping bag": 276, "grape": 277, "ski": 278, "ocean": 279, "comforter": 280, "mattress": 281, "lamb": 282, "customer": 283, "pan": 284, "highway": 285, "long": 286, "display": 287, "shower": 288, "nuts": 289, "sign": 290, "clear": 291, "letters": 292, "surfer": 293, "mannequin": 294, "checkered": 295, "ground": 296, "fisherman": 297, "egg": 298, "zebras": 299, "shoes": 300, "dish": 301, "coffee": 302, "paper": 303, "store": 304, "thin": 305, "glass": 306, "hat": 307, "station": 308, "spatula": 309, "train car": 310, "skateboard": 311, "lake": 312, "airplane": 313, "concrete": 314, "stainless steel": 315, "bushes": 316, "hill": 317, "road": 318, "spoon": 319, "lobby": 320, "indoors": 321, "armchair": 322, "flowers": 323, "broccoli": 324, "suv": 325, "umbrella": 326, "glasses": 327, "ham": 328, "rubber duck": 329, "croissants": 330, "carriage": 331, "burger": 332, "beach": 333, "pen": 334, "laptops": 335, "athlete": 336, "pickles": 337, "dark brown": 338, "trains": 339, "living room": 340, "screen": 341, "bikes": 342, "beige": 343, "napkin": 344, "gravel": 345, "papers": 346, "door": 347, "gold": 348, "cloudy": 349, "tofu": 350, "cows": 351, "hillside": 352, "sun": 353, "behind": 354, "jet": 355, "mushroom": 356, "material": 357, "snowboard": 358, "produce": 359, "dvd player": 360, "camera": 361, "sky": 362, "bun": 363, "walkway": 364, "vest": 365, "watch": 366, "sunny": 367, "locomotive": 368, "sausage": 369, "shop": 370, "ball": 371, "sneakers": 372, "sea foam": 373, "clouds": 374, "leaves": 375, "dresser": 376, "chili": 377, "gate": 378, "flag": 379, "stick": 380, "leggings": 381, "rubber": 382, "mugs": 383, "parsley": 384, "merchandise": 385, "grill": 386, "shallow": 387, "medicine cabinet": 388, "chairs": 389, "ceiling": 390, "curtains": 391, "peppers": 392, "huge": 393, "kettle": 394, "crouching": 395, "deer": 396, "picture": 397, "passenger": 398, "bears": 399, "ship": 400, "belt": 401, "umpire": 402, "short": 403, "driver": 404, "thick": 405, "reading": 406, "tape": 407, "doll": 408, "bookshelf": 409, "basil": 410, "tongs": 411, "cream colored": 412, "oil": 413, "flames": 414, "gift": 415, "rocks": 416, "apple": 417, "blanket": 418, "menu": 419, "lego": 420, "wine": 421, "kite": 422, "aquarium": 423, "swan": 424, "mask": 425, "boot": 426, "dessert": 427, "wide": 428, "headphones": 429, "baseball bat": 430, "tables": 431, "drain": 432, "logo": 433, "tie": 434, "crates": 435, "blueberries": 436, "worker": 437, "chocolate": 438, "kiwi": 439, "cookie": 440, "wine glass": 441, "boys": 442, "jumping": 443, "cloudless": 444, "rain": 445, "cord": 446, "books": 447, "speaker": 448, "heater": 449, "mickey mouse": 450, "cake": 451, "microphone": 452, "computer desk": 453, "coleslaw": 454, "wet": 455, "video games": 456, "uncomfortable": 457, "canoe": 458, "hotel room": 459, "giant": 460, "dolls": 461, "desserts": 462, "pear": 463, "roses": 464, "apples": 465, "cooker": 466, "tablecloth": 467, "unpeeled": 468, "coffee pot": 469, "bucket": 470, "buildings": 471, "gas station": 472, "staring": 473, "snow pants": 474, "gloves": 475, "couple": 476, "fireplace": 477, "stone": 478, "skate park": 479, "statue": 480, "cyclist": 481, "money": 482, "jeans": 483, "cup": 484, "cinnamon": 485, "pie": 486, "tissues": 487, "fork": 488, "at camera": 489, "rabbit": 490, "off": 491, "stuffed animal": 492, "sweet potato": 493, "skinny": 494, "duck": 495, "deep": 496, "pedestrians": 497, "sleeveless": 498, "maroon": 499, "wheelchair": 500, "skillet": 501, "snowsuit": 502, "potatoes": 503, "glove": 504, "dogs": 505, "elephants": 506, "chimney": 507, "pillow": 508, "stickers": 509, "suitcase": 510, "toilet paper": 511, "lion": 512, "drink": 513, "potato": 514, "stormy": 515, "bananas": 516, "empty": 517, "grassy": 518, "scarf": 519, "trailer": 520, "sandals": 521, "aircraft": 522, "pot": 523, "parking lot": 524, "bunny": 525, "puppy": 526, "sauce": 527, "ladder": 528, "smoke": 529, "t shirt": 530, "biker": 531, "fish": 532, "tangerine": 533, "coffee table": 534, "mixer": 535, "beer": 536, "tinted": 537, "sweater": 538, "doorway": 539, "berries": 540, "cooking": 541, "meats": 542, "towel": 543, "fire truck": 544, "round": 545, "donut": 546, "short sleeved": 547, "toast": 548, "hotel": 549, "squash": 550, "raincoat": 551, "talking": 552, "bagel": 553, "sour cream": 554, "lid": 555, "bandana": 556, "tissue": 557, "shelves": 558, "cupcake": 559, "men": 560, "sand": 561, "above": 562, "router": 563, "bridge": 564, "trunks": 565, "steak": 566, "long sleeved": 567, "wires": 568, "pancakes": 569, "moon": 570, "up": 571, "noodles": 572, "arrow": 573, "goose": 574, "american flag": 575, "wagon": 576, "beach umbrella": 577, "airplanes": 578, "river": 579, "heart": 580, "cone": 581, "countertop": 582, "poster": 583, "meat": 584, "drinking": 585, "rice": 586, "toilet": 587, "running": 588, "clean": 589, "roadway": 590, "dishes": 591, "fake": 592, "path": 593, "players": 594, "toothbrush": 595, "corn": 596, "skateboarder": 597, "controllers": 598, "tennis": 599, "dark blue": 600, "number": 601, "on": 602, "pastries": 603, "graffiti": 604, "place": 605, "shuttle": 606, "barrier": 607, "mustard": 608, "tray": 609, "calculator": 610, "sleeping": 611, "dispenser": 612, "cupboard": 613, "skateboarding": 614, "shape": 615, "ketchup": 616, "strawberries": 617, "kitchen": 618, "buoy": 619, "cups": 620, "skater": 621, "flower pot": 622, "match": 623, "buses": 624, "ribs": 625, "socks": 626, "grater": 627, "pipes": 628, "railroad": 629, "tree branches": 630, "jersey": 631, "air conditioner": 632, "train station": 633, "pastry": 634, "bookcase": 635, "chinese food": 636, "outfit": 637, "bike": 638, "nut": 639, "onion rings": 640, "museum": 641, "tractor": 642, "toddler": 643, "cooking pot": 644, "mushrooms": 645, "mat": 646, "bleachers": 647, "stained": 648, "walking": 649, "full": 650, "uniform": 651, "power line": 652, "pumpkin": 653, "watermelon": 654, "crust": 655, "bush": 656, "picture frame": 657, "looking down": 658, "grapes": 659, "character": 660, "honey": 661, "olive": 662, "power lines": 663, "serving tray": 664, "dragon": 665, "toys": 666, "helicopter": 667, "bacon": 668, "eye glasses": 669, "sword": 670, "garage": 671, "women": 672, "sitting": 673, "light bulb": 674, "chicken breast": 675, "video camera": 676, "decorations": 677, "platform": 678, "hamburger": 679, "boots": 680, "porcelain": 681, "bare": 682, "soda": 683, "light": 684, "radio": 685, "stadium": 686, "pine tree": 687, "action figure": 688, "coach": 689, "pasture": 690, "seal": 691, "cooler": 692, "guitar": 693, "sandwiches": 694, "vegetable": 695, "curly": 696, "front": 697, "toothpaste": 698, "polo shirt": 699, "dotted": 700, "restaurant": 701, "life preserver": 702, "rackets": 703, "tomato sauce": 704, "rose": 705, "eggs": 706, "fan": 707, "bedding": 708, "zucchini": 709, "dried": 710, "closet": 711, "star": 712, "khaki": 713, "artichoke": 714, "peas": 715, "umbrellas": 716, "bull": 717, "sandal": 718, "lemons": 719, "painting": 720, "blinds": 721, "magazine": 722, "cookies": 723, "luggage": 724, "pig": 725, "snake": 726, "cappuccino": 727, "intersection": 728, "beans": 729, "school bus": 730, "square": 731, "trash can": 732, "briefcase": 733, "sunglasses": 734, "dining room": 735, "juice": 736, "dirt": 737, "baseball": 738, "toy": 739, "log": 740, "cherry": 741, "toothbrushes": 742, "tennis ball": 743, "book": 744, "narrow": 745, "skirt": 746, "fire hydrant": 747, "game": 748, "house": 749, "clock tower": 750, "chicken": 751, "stroller": 752, "mud": 753, "bronze": 754, "jeep": 755, "seagull": 756, "street light": 757, "traffic lights": 758, "strawberry": 759, "computer monitor": 760, "hallway": 761, "ambulance": 762, "costume": 763, "panda bear": 764, "market": 765, "wii": 766, "raw": 767, "still": 768, "pineapple": 769, "goat": 770, "trunk": 771, "vacuum": 772, "tent": 773, "curtain": 774, "dock": 775, "liquid": 776, "pitcher": 777, "cage": 778, "folding chair": 779, "console": 780, "ring": 781, "dry": 782, "workers": 783, "bell": 784, "shampoo bottle": 785, "new": 786, "bushy": 787, "heavy": 788, "cats": 789, "mangoes": 790, "dome": 791, "label": 792, "pasta salad": 793, "light fixture": 794, "can": 795, "traffic light": 796, "male": 797, "jockey": 798, "spectators": 799, "letter": 800, "town": 801, "sheet": 802, "powder": 803, "ice cream": 804, "toy car": 805, "video game": 806, "turkey": 807, "utensils": 808, "pond": 809, "cupcakes": 810, "riding": 811, "lime": 812, "sock": 813, "cucumber": 814, "saucer": 815, "plantains": 816, "spectator": 817, "rectangular": 818, "dugout": 819, "tea": 820, "dress shirt": 821, "owl": 822, "cables": 823, "mouse pad": 824, "rug": 825, "purse": 826, "ostrich": 827, "bottles": 828, "toppings": 829, "net": 830, "gun": 831, "cooked": 832, "adult": 833, "trash bag": 834, "undershirt": 835, "snowy": 836, "driving": 837, "fruit": 838, "symbol": 839, "receipt": 840, "ipod": 841, "figure": 842, "resting": 843, "soup": 844, "terminal": 845, "machine": 846, "cherries": 847, "pretzel": 848, "surfing": 849, "hose": 850, "binder": 851, "old": 852, "mother": 853, "sink": 854, "vine": 855, "appetizers": 856, "earring": 857, "porch": 858, "steam": 859, "calm": 860, "posing": 861, "brunette": 862, "bicycles": 863, "polar bear": 864, "paddle": 865, "microwave oven": 866, "garden": 867, "jackets": 868, "cauliflower": 869, "spices": 870, "farm": 871, "antelope": 872, "container": 873, "fence post": 874, "octopus": 875, "garnish": 876, "planter": 877, "stones": 878, "sheets": 879, "coke": 880, "bottle": 881, "swimsuit": 882, "spice": 883, "walnut": 884, "word": 885, "policeman": 886, "waffle": 887, "ottoman": 888, "desktop computer": 889, "cart": 890, "dryer": 891, "candy": 892, "skiing": 893, "broth": 894, "dip": 895, "milkshake": 896, "plates": 897, "wire": 898, "silverware": 899, "asian": 900, "office chair": 901, "outlet": 902, "projector": 903, "flags": 904, "partly cloudy": 905, "buns": 906, "sad": 907, "feta cheese": 908, "pasta": 909, "hard drive": 910, "balloon": 911, "tv stand": 912, "tank top": 913, "lunch box": 914, "stuffed dog": 915, "picnic table": 916, "real": 917, "pillows": 918, "rainy": 919, "air": 920, "berry": 921, "soft drink": 922, "nike": 923, "trucks": 924, "harbor": 925, "restroom": 926, "tower": 927, "post": 928, "squirrel": 929, "crosswalk": 930, "mixing bowl": 931, "ducks": 932, "crackers": 933, "wine bottle": 934, "soccer player": 935, "pouch": 936, "necklace": 937, "wicker": 938, "pecan": 939, "hills": 940, "mashed potatoes": 941, "straw": 942, "boulders": 943, "peach": 944, "snowboarder": 945, "tag": 946, "pizza slice": 947, "fountain": 948, "magazines": 949, "chrome": 950, "side table": 951, "alligator": 952, "salt": 953, "rocky": 954, "frosting": 955, "orchard": 956, "hospital": 957, "map": 958, "gas stove": 959, "raisin": 960, "metallic": 961, "words": 962, "monitors": 963, "sticky notes": 964, "popcorn": 965, "parmesan cheese": 966, "cafe": 967, "placemat": 968, "hard": 969, "platter": 970, "bar stool": 971, "passengers": 972, "parachute": 973, "avocado": 974, "vases": 975, "bracelet": 976, "games": 977, "making face": 978, "fried": 979, "black and white": 980, "chocolate chips": 981, "seagulls": 982, "scissors": 983, "dinosaur": 984, "outfits": 985, "soccer ball": 986, "shirts": 987, "office": 988, "employee": 989, "kites": 990, "ropes": 991, "bracelets": 992, "gym": 993, "bison": 994, "sushi": 995, "soldier": 996, "rifle": 997, "cracker": 998, "notebook": 999, "mozzarella": 1000, "box": 1001, "cabbage": 1002, "onion ring": 1003, "trash": 1004, "palm tree": 1005, "coffee cups": 1006, "marina": 1007, "paintings": 1008, "rainbow colored": 1009, "chopsticks": 1010, "cross": 1011, "bikini": 1012, "bee": 1013, "paved": 1014, "frog": 1015, "burrito": 1016, "cheeseburger": 1017, "milk": 1018, "mug": 1019, "wristband": 1020, "dvds": 1021, "sweatshirt": 1022, "shark": 1023, "computers": 1024, "grilled": 1025, "pita": 1026, "burner": 1027, "train tracks": 1028, "sticker": 1029, "rhino": 1030, "parrot": 1031, "rough": 1032, "performer": 1033, "drapes": 1034, "courtyard": 1035, "decoration": 1036, "chandelier": 1037, "beverages": 1038, "sweet potatoes": 1039, "crumbs": 1040, "flour": 1041, "frame": 1042, "paint": 1043, "candles": 1044, "beverage": 1045, "cheesecake": 1046, "steel": 1047, "cream": 1048, "shopping center": 1049, "leafy": 1050, "brownie": 1051, "benches": 1052, "happy": 1053, "goats": 1054, "cactus": 1055, "charger": 1056, "antenna": 1057, "breakfast": 1058, "crate": 1059, "washing machine": 1060, "omelette": 1061, "packet": 1062, "beer mug": 1063, "wii controllers": 1064, "crab": 1065, "ripe": 1066, "feeder": 1067, "herb": 1068, "tunnel": 1069, "step": 1070, "flamingo": 1071, "muffin": 1072, "speakers": 1073, "baking pan": 1074, "carts": 1075, "cream cheese": 1076, "pigeons": 1077, "camel": 1078, "tool": 1079, "cafeteria": 1080, "gown": 1081, "blueberry": 1082, "jumpsuit": 1083, "crowd": 1084, "potato chips": 1085, "raisins": 1086, "minivan": 1087, "cobblestone": 1088, "dough": 1089, "panda": 1090, "brush": 1091, "wristwatch": 1092, "pizzas": 1093, "unpaved": 1094, "skateboards": 1095, "jars": 1096, "moss": 1097, "magnets": 1098, "jar": 1099, "hair": 1100, "hot dogs": 1101, "numbers": 1102, "penguin": 1103, "blazer": 1104, "roll": 1105, "figurines": 1106, "dolphin": 1107, "stairs": 1108, "safety jacket": 1109, "shrimp": 1110, "styrofoam": 1111, "officers": 1112, "soda bottle": 1113, "gummy bear": 1114, "cans": 1115, "watermelons": 1116, "wine glasses": 1117, "soldiers": 1118, "desert": 1119, "pine trees": 1120, "garlic": 1121, "lush": 1122, "gentleman": 1123, "dressing": 1124, "soap bottle": 1125, "unripe": 1126, "towels": 1127, "containers": 1128, "liquor": 1129, "murky": 1130, "whale": 1131, "potato salad": 1132, "waffles": 1133, "poodle": 1134, "hay": 1135, "yogurt": 1136, "sculpture": 1137, "alcohol": 1138, "tiles": 1139, "palm trees": 1140, "pajamas": 1141, "copper": 1142, "croissant": 1143, "swimming": 1144, "church": 1145, "bags": 1146, "snail": 1147, "diaper": 1148, "wavy": 1149, "library": 1150, "wool": 1151, "sprinkles": 1152, "fire extinguisher": 1153, "bowls": 1154, "light bulbs": 1155, "hats": 1156, "spoons": 1157, "peacock": 1158, "boxes": 1159, "upward": 1160, "eagle": 1161, "cinnamon roll": 1162, "granite": 1163, "roasted": 1164, "daughter": 1165, "foil": 1166, "icing": 1167, "peaches": 1168, "bath towel": 1169, "officer": 1170, "pesto": 1171, "telephone pole": 1172, "artwork": 1173, "bedspread": 1174, "caucasian": 1175, "bending": 1176, "female": 1177, "plain": 1178, "toaster oven": 1179, "walnuts": 1180, "triangular": 1181, "beet": 1182, "headband": 1183, "drawings": 1184, "beach chair": 1185, "donkeys": 1186, "below": 1187, "bread loaf": 1188, "paper towel": 1189, "gourd": 1190, "rotten": 1191, "mound": 1192, "whipped cream": 1193, "low": 1194, "parent": 1195, "bus stop": 1196, "bar stools": 1197, "gadget": 1198, "cakes": 1199, "phones": 1200, "cupboards": 1201, "wine bottles": 1202, "gravy": 1203, "covered": 1204, "cockpit": 1205, "mayonnaise": 1206, "marble": 1207, "cereal box": 1208, "butterfly": 1209, "kimono": 1210, "clocks": 1211, "tea pot": 1212, "food truck": 1213, "cords": 1214, "urinal": 1215, "bamboo": 1216, "peanut": 1217, "tissue box": 1218, "fire": 1219, "nutella": 1220, "ramekin": 1221, "leaf": 1222, "village": 1223, "name tag": 1224, "rolling pin": 1225, "olive oil": 1226, "hummus": 1227, "balls": 1228, "wines": 1229, "pizza shop": 1230, "pea": 1231, "goggles": 1232, "dragons": 1233, "drinks": 1234, "marshmallow": 1235, "audience": 1236, "dumplings": 1237, "traffic sign": 1238, "oreo": 1239, "raspberry": 1240, "skating": 1241, "patio": 1242, "bone": 1243, "classroom": 1244, "beer bottle": 1245, "chalkboard": 1246, "life jacket": 1247, "lemonade": 1248, "deck": 1249, "pancake": 1250, "cathedral": 1251, "toiletries": 1252, "backyard": 1253, "mall": 1254, "whisk": 1255, "brass": 1256, "vending machine": 1257, "island": 1258, "fog": 1259, "water bottle": 1260, "canopy": 1261, "drape": 1262, "topping": 1263, "parking sign": 1264, "antique": 1265, "mesh": 1266, "pens": 1267, "cowboy hat": 1268, "fruits": 1269, "cucumbers": 1270, "grapefruit": 1271, "fans": 1272, "meatballs": 1273, "houses": 1274, "under": 1275, "farmer": 1276, "crane": 1277, "hand dryer": 1278, "cowboy": 1279, "beds": 1280, "macaroni": 1281, "cheetah": 1282, "puddle": 1283, "stuffed animals": 1284, "coffee mug": 1285, "bakery": 1286, "lamps": 1287, "herbs": 1288, "bouquet": 1289, "hair clip": 1290, "cable": 1291, "biscuit": 1292, "cell phones": 1293, "tree leaves": 1294, "pizza pan": 1295, "drum": 1296, "raspberries": 1297, "ice maker": 1298, "shut": 1299, "cards": 1300, "pocket": 1301, "faucet": 1302, "guacamole": 1303, "coconut": 1304, "baseball players": 1305, "bug": 1306, "high": 1307, "brushing teeth": 1308, "ice": 1309, "toothpicks": 1310, "waiter": 1311, "tortilla": 1312, "spider": 1313, "snoopy": 1314, "weeds": 1315, "stew": 1316, "asphalt": 1317, "buoys": 1318, "family": 1319, "logs": 1320, "adidas": 1321, "underwear": 1322, "cliff": 1323, "sailboats": 1324, "robe": 1325, "casserole": 1326, "ketchup bottle": 1327, "teddy bears": 1328, "lock": 1329, "couches": 1330, "figurine": 1331, "pencil": 1332, "leafless": 1333, "drawing": 1334, "flip flops": 1335, "hippo": 1336, "paper dispenser": 1337, "cigarette": 1338, "barn": 1339, "hardwood": 1340, "staircase": 1341, "entrance": 1342, "windows": 1343, "picnic tables": 1344, "fudge": 1345, "performing trick": 1346, "blind": 1347, "vinegar": 1348, "beets": 1349, "curved": 1350, "away": 1351, "roast beef": 1352, "spray bottle": 1353, "chopstick": 1354, "soap dispenser": 1355, "dog food": 1356, "bus driver": 1357, "banana bunch": 1358, "dumpster": 1359, "twigs": 1360, "napkins": 1361, "bagels": 1362, "stage": 1363, "baskets": 1364, "ceramic": 1365, "pineapples": 1366, "street lights": 1367, "soap": 1368, "brownies": 1369, "christmas lights": 1370, "cameras": 1371, "fruit stand": 1372, "soda can": 1373, "hotdog bun": 1374, "fat": 1375, "pizza boxes": 1376, "melon": 1377, "customers": 1378, "athletic shoe": 1379, "peeled": 1380, "food container": 1381, "powdered sugar": 1382, "rice cooker": 1383, "spots": 1384, "sugar": 1385, "hair dryer": 1386, "tractors": 1387, "broom": 1388, "skin": 1389, "pillowcase": 1390, "smoothie": 1391, "ear buds": 1392, "garment": 1393, "soft": 1394, "walls": 1395, "ravioli": 1396, "seafood": 1397, "hammer": 1398, "sack": 1399, "blenders": 1400, "sponge": 1401, "sunflower": 1402, "cabin": 1403, "tuna": 1404, "beautiful": 1405, "heels": 1406, "butter": 1407, "scooters": 1408, "wardrobe": 1409, "taking pictures": 1410, "forks": 1411, "lambs": 1412, "tin": 1413, "cat food": 1414, "engineer": 1415, "oatmeal": 1416, "clay": 1417, "butterflies": 1418, "team": 1419, "lipstick": 1420, "ladle": 1421, "food processor": 1422, "wok": 1423, "shelter": 1424, "lobster": 1425, "snacks": 1426, "vests": 1427, "face mask": 1428, "peanut butter": 1429, "balloons": 1430, "peanuts": 1431, "wallpaper": 1432, "cranberries": 1433, "crown": 1434, "caramel": 1435, "floor lamp": 1436, "shower curtain": 1437, "blankets": 1438, "hangar": 1439, "surfboards": 1440, "meal": 1441, "wolf": 1442, "gifts": 1443, "father": 1444, "cd": 1445, "chains": 1446, "tourist": 1447, "canister": 1448, "spear": 1449, "pilot": 1450, "mountain side": 1451, "pencils": 1452, "trumpet": 1453, "knives": 1454, "mango": 1455, "magnet": 1456, "guys": 1457, "satellite dish": 1458, "table lamp": 1459, "keyboards": 1460, "swimmer": 1461, "stump": 1462, "amusement park": 1463, "goal": 1464, "roadside": 1465, "wig": 1466, "chickens": 1467, "card": 1468, "looking up": 1469, "aluminum": 1470, "pandas": 1471, "soap dish": 1472, "pomegranate": 1473, "tourists": 1474, "parrots": 1475, "toilet brush": 1476, "remote controls": 1477, "suits": 1478, "cotton": 1479, "tents": 1480, "water glass": 1481, "healthy": 1482, "envelope": 1483, "baking sheet": 1484, "marker": 1485, "muffins": 1486, "salon": 1487, "snow flakes": 1488, "dry erase board": 1489, "fishing pole": 1490, "lighthouse": 1491, "earphones": 1492, "photographer": 1493, "gorilla": 1494, "seeds": 1495, "sticks": 1496, "shopping cart": 1497, "pears": 1498, "alarm clock": 1499, "tree branch": 1500, "almonds": 1501, "theater": 1502, "tiger": 1503, "forward": 1504, "temple": 1505, "hedge": 1506, "kitchen towel": 1507, "motorcycles": 1508, "garland": 1509, "pudding": 1510, "vintage": 1511, "coarse": 1512, "swans": 1513, "pretzels": 1514, "swamp": 1515, "dense": 1516, "auditorium": 1517, "daisy": 1518, "dish soap": 1519, "opaque": 1520, "french toast": 1521, "straight": 1522, "tennis balls": 1523, "orchid": 1524, "champagne": 1525, "pizza pie": 1526, "egg roll": 1527, "flatbread": 1528, "coconuts": 1529, "flip flop": 1530, "skyscraper": 1531, "fur": 1532, "denim": 1533, "scaffolding": 1534, "coin": 1535, "policemen": 1536, "stuffed bears": 1537, "cane": 1538, "cloth": 1539, "cake stand": 1540, "pump": 1541, "soccer": 1542, "loaf": 1543, "knife block": 1544, "ski lift": 1545, "modern": 1546, "old fashioned": 1547, "cotton dessert": 1548, "trays": 1549, "smooth": 1550, "lunch": 1551, "dull": 1552, "shield": 1553, "dinner": 1554, "cloths": 1555, "waterfall": 1556, "waste basket": 1557, "scarce": 1558, "shaking hands": 1559, "salt shaker": 1560, "pocket watch": 1561, "unhealthy": 1562, "lounge": 1563, "moose": 1564, "seaweed": 1565, "panda bears": 1566, "candies": 1567, "batteries": 1568, "comb": 1569, "wallet": 1570, "students": 1571, "school": 1572, "geese": 1573, "apartment building": 1574, "stars": 1575, "granola": 1576, "leopard": 1577, "cardboard": 1578, "shoe laces": 1579, "hairbrush": 1580, "chef hat": 1581, "crystal": 1582, "pizza tray": 1583, "bread box": 1584, "luggage cart": 1585, "apartment": 1586, "angry": 1587, "characters": 1588, "oak tree": 1589, "angry bird": 1590, "backpacks": 1591, "shaving cream": 1592, "cemetery": 1593, "lace": 1594, "anchovies": 1595, "dresses": 1596, "paper towels": 1597, "garage door": 1598, "vanilla": 1599, "uncooked": 1600, "battery": 1601, "butter knife": 1602, "mint": 1603, "package": 1604, "biscuits": 1605, "son": 1606, "cake pan": 1607, "snack": 1608, "riding boots": 1609, "rooftop": 1610, "irregular": 1611, "baked": 1612, "kittens": 1613, "sconce": 1614, "serving dish": 1615, "mirrors": 1616, "taking photo": 1617, "bubble": 1618, "printers": 1619, "ice cube": 1620, "knee pads": 1621, "doors": 1622, "ceiling light": 1623, "cotton candy": 1624, "helmets": 1625, "cheese cube": 1626, "bartender": 1627, "pistachio": 1628, "ugly": 1629, "sausages": 1630, "beer can": 1631, "baker": 1632, "coffee beans": 1633, "almond": 1634, "ovens": 1635, "curled": 1636, "underneath": 1637, "suitcases": 1638, "food": 1639, "taking bath": 1640, "vendor": 1641, "lizard": 1642, "homes": 1643, "shops": 1644, "mannequins": 1645, "turtle": 1646, "blossom": 1647, "chickpeas": 1648, "outside": 1649, "ornament": 1650, "milk carton": 1651, "mexican food": 1652, "seed": 1653, "avocados": 1654, "masks": 1655, "pumpkins": 1656, "papaya": 1657, "stapler": 1658, "hamburgers": 1659, "earrings": 1660, "back": 1661, "wildflowers": 1662, "bats": 1663, "hand soap": 1664, "fresh": 1665, "manhole cover": 1666, "dolphins": 1667, "thermometer": 1668, "castle": 1669, "cones": 1670, "pizza cutter": 1671, "pizza box": 1672, "heel": 1673, "salmon": 1674, "door frame": 1675, "taco": 1676, "pork": 1677, "wedding": 1678, "bubbles": 1679, "eiffel tower": 1680, "cranberry": 1681, "napkin dispenser": 1682, "bandage": 1683, "elmo": 1684, "notepad": 1685, "pepper shaker": 1686, "artichokes": 1687, "tools": 1688, "window frame": 1689, "steamed": 1690, "groceries": 1691, "lily": 1692, "cookbook": 1693, "paper container": 1694, "hippos": 1695, "hilltop": 1696, "twig": 1697, "animal": 1698, "wii game": 1699, "beads": 1700, "lilies": 1701, "towel dispenser": 1702, "blood": 1703, "ladles": 1704, "jewelry": 1705, "hearts": 1706, "snow boots": 1707, "ahead": 1708, "utensil holder": 1709, "football": 1710, "bird cage": 1711, "dish drainer": 1712, "cds": 1713, "banana peel": 1714, "vines": 1715, "pizza crust": 1716, "shopper": 1717, "tags": 1718, "keypad": 1719, "dinosaurs": 1720, "stir fry": 1721, "bomb": 1722, "necklaces": 1723, "packages": 1724, "uniforms": 1725, "sparse": 1726, "unhappy": 1727, "control panel": 1728, "antennas": 1729, "spray can": 1730, "feathers": 1731, "electric toothbrush": 1732, "potted": 1733, "juice box": 1734, "toolbox": 1735, "visitor": 1736, "ornaments": 1737, "sign post": 1738, "baseball mitt": 1739, "robot": 1740, "blackberries": 1741, "desk lamp": 1742, "glaze": 1743, "melons": 1744, "cookie dough": 1745, "paint brush": 1746, "mustard bottle": 1747, "apple logo": 1748, "salad dressing": 1749, "mattresses": 1750, "cash register": 1751, "nest": 1752, "knee pad": 1753, "out": 1754, "toasted": 1755, "price tag": 1756, "canisters": 1757, "christmas light": 1758, "antelopes": 1759, "dream catcher": 1760, "student": 1761, "fine": 1762, "kangaroo": 1763, "smoke stack": 1764, "music": 1765, "cages": 1766, "soccer balls": 1767, "ostriches": 1768, "coffee shop": 1769, "ice cubes": 1770, "downward": 1771, "televisions": 1772, "candle holder": 1773, "grinder": 1774, "xbox controller": 1775, "cricket": 1776, "hurdle": 1777, "obstacle": 1778, "lab coat": 1779, "gas pump": 1780, "banana bunches": 1781, "bell tower": 1782, "waitress": 1783, "in mirror": 1784, "coats": 1785, "attic": 1786, "sugar packet": 1787, "taking photograph": 1788, "mountain peak": 1789, "pub": 1790, "silk": 1791, "blossoms": 1792, "pillars": 1793, "scrub brush": 1794, "kiwis": 1795, "octagonal": 1796, "parachutes": 1797, "lions": 1798, "sideways": 1799, "egg carton": 1800, "visitors": 1801, "sunflowers": 1802, "shoe lace": 1803, "rhinos": 1804, "elbow pad": 1805, "egg yolk": 1806, "outlets": 1807, "baseball bats": 1808, "life jackets": 1809, "snakes": 1810, "vitamins": 1811, "cigar": 1812, "upwards": 1813, "beneath": 1814, "taking photos": 1815, "storage box": 1816, "armor": 1817, "cookie jar": 1818, "rounded": 1819, "seat belt": 1820, "owls": 1821, "appetizer": 1822, "beer cans": 1823, "stores": 1824, "shoppers": 1825, "bird house": 1826, "sugar packets": 1827, "wild": 1828, "dvd players": 1829, "towers": 1830, "water bottles": 1831, "waves": 1832, "pikachu": 1833, "wolves": 1834, "immature": 1835, "shampoo": 1836, "orchids": 1837, "elevator": 1838, "taking notes": 1839, "wave": 1840, "horse hoof": 1841, "bottle cap": 1842}, {"0": "yes", "1": "pipe", "2": "no", "3": "large", "4": "girl", "5": "bed", "6": "sofa", "7": "right", "8": "dark", "9": "cabinet", "10": "left", "11": "bird", "12": "brick", "13": "rock", "14": "children", "15": "brown", "16": "blond", "17": "pants", "18": "top", "19": "horse", "20": "blue", "21": "hot dog", "22": "banana", "23": "laptop", "24": "desk", "25": "bottom", "26": "eating", "27": "man", "28": "grass", "29": "dog", "30": "silver", "31": "bag", "32": "pedestrian", "33": "cabinets", "34": "green", "35": "window", "36": "giraffe", "37": "tiny", "38": "child", "39": "yellow", "40": "wooden", "41": "parking meter", "42": "fries", "43": "plants", "44": "kiosk", "45": "orange", "46": "van", "47": "shirt", "48": "coat", "49": "controller", "50": "bench", "51": "television", "52": "black", "53": "carrot", "54": "sandwich", "55": "city", "56": "street", "57": "couch", "58": "closed", "59": "field", "60": "pink", "61": "boy", "62": "lady", "63": "tomato", "64": "horses", "65": "white", "66": "beef", "67": "cat", "68": "cutting board", "69": "overcast", "70": "vegetables", "71": "gray", "72": "onions", "73": "wood", "74": "toaster", "75": "bread", "76": "fence", "77": "player", "78": "roof", "79": "meadow", "80": "baby", "81": "calf", "82": "branch", "83": "street sign", "84": "backpack", "85": "jacket", "86": "teddy bear", "87": "game controller", "88": "herd", "89": "zoo", "90": "truck", "91": "red", "92": "printer", "93": "yard", "94": "end table", "95": "wetsuit", "96": "building", "97": "carrots", "98": "train", "99": "pans", "100": "giraffes", "101": "coffee maker", "102": "bathroom", "103": "woman", "104": "monitor", "105": "sheep", "106": "trees", "107": "dining table", "108": "park", "109": "nightstand", "110": "car", "111": "table", "112": "bicycle", "113": "donkey", "114": "cell phone", "115": "teal", "116": "chair", "117": "bathtub", "118": "waiting", "119": "purple", "120": "small", "121": "airport", "122": "colorful", "123": "stuffed bear", "124": "light brown", "125": "piano", "126": "lying", "127": "clock", "128": "pavement", "129": "snow", "130": "lemon", "131": "sandy", "132": "shelf", "133": "cheese", "134": "light blue", "135": "plant", "136": "bowl", "137": "bus", "138": "dishwasher", "139": "pepperoni", "140": "pole", "141": "bear", "142": "monkey", "143": "shore", "144": "hedges", "145": "wall", "146": "elephant", "147": "sidewalk", "148": "swimming pool", "149": "blender", "150": "bookshelves", "151": "mountain", "152": "pizza", "153": "birds", "154": "people", "155": "radiator", "156": "metal", "157": "striped", "158": "playing", "159": "kitten", "160": "dirty", "161": "runway", "162": "salad", "163": "sailboat", "164": "zebra", "165": "counter", "166": "lettuce", "167": "seat", "168": "asparagus", "169": "color", "170": "plastic", "171": "racket", "172": "dress", "173": "frisbee", "174": "standing", "175": "sea", "176": "keyboard", "177": "motorcycle", "178": "phone", "179": "tree", "180": "computer", "181": "pointing", "182": "iron", "183": "skis", "184": "blouse", "185": "onion", "186": "bat", "187": "light switch", "188": "hook", "189": "mirror", "190": "surfboard", "191": "candle", "192": "catcher", "193": "bricks", "194": "newspaper", "195": "handbag", "196": "knife", "197": "branches", "198": "cap", "199": "stove", "200": "pots", "201": "lawn", "202": "computer mouse", "203": "chef", "204": "steps", "205": "tan", "206": "eggplant", "207": "mountains", "208": "open", "209": "refrigerator", "210": "oranges", "211": "snowboarding", "212": "oven", "213": "utensil", "214": "bedroom", "215": "olives", "216": "little", "217": "cow", "218": "boat", "219": "microwave", "220": "pizza oven", "221": "taxi", "222": "young", "223": "drawers", "224": "tablet", "225": "choppy", "226": "foggy", "227": "apron", "228": "syrup", "229": "plate", "230": "coffee cup", "231": "taking picture", "232": "shoe", "233": "basket", "234": "pigeon", "235": "water", "236": "stop sign", "237": "mailbox", "238": "leather", "239": "remote control", "240": "home plate", "241": "spinach", "242": "tea kettle", "243": "cereal", "244": "tall", "245": "helmet", "246": "celery", "247": "vase", "248": "alien", "249": "collar", "250": "shorts", "251": "suit", "252": "supermarket", "253": "carpet", "254": "donuts", "255": "batter", "256": "outdoors", "257": "girls", "258": "skier", "259": "entertainment center", "260": "floor", "261": "chain", "262": "lamp", "263": "rope", "264": "pepper", "265": "tomatoes", "266": "drawer", "267": "forest", "268": "cars", "269": "balcony", "270": "guy", "271": "boats", "272": "scooter", "273": "flower", "274": "wii controller", "275": "down", "276": "shopping bag", "277": "grape", "278": "ski", "279": "ocean", "280": "comforter", "281": "mattress", "282": "lamb", "283": "customer", "284": "pan", "285": "highway", "286": "long", "287": "display", "288": "shower", "289": "nuts", "290": "sign", "291": "clear", "292": "letters", "293": "surfer", "294": "mannequin", "295": "checkered", "296": "ground", "297": "fisherman", "298": "egg", "299": "zebras", "300": "shoes", "301": "dish", "302": "coffee", "303": "paper", "304": "store", "305": "thin", "306": "glass", "307": "hat", "308": "station", "309": "spatula", "310": "train car", "311": "skateboard", "312": "lake", "313": "airplane", "314": "concrete", "315": "stainless steel", "316": "bushes", "317": "hill", "318": "road", "319": "spoon", "320": "lobby", "321": "indoors", "322": "armchair", "323": "flowers", "324": "broccoli", "325": "suv", "326": "umbrella", "327": "glasses", "328": "ham", "329": "rubber duck", "330": "croissants", "331": "carriage", "332": "burger", "333": "beach", "334": "pen", "335": "laptops", "336": "athlete", "337": "pickles", "338": "dark brown", "339": "trains", "340": "living room", "341": "screen", "342": "bikes", "343": "beige", "344": "napkin", "345": "gravel", "346": "papers", "347": "door", "348": "gold", "349": "cloudy", "350": "tofu", "351": "cows", "352": "hillside", "353": "sun", "354": "behind", "355": "jet", "356": "mushroom", "357": "material", "358": "snowboard", "359": "produce", "360": "dvd player", "361": "camera", "362": "sky", "363": "bun", "364": "walkway", "365": "vest", "366": "watch", "367": "sunny", "368": "locomotive", "369": "sausage", "370": "shop", "371": "ball", "372": "sneakers", "373": "sea foam", "374": "clouds", "375": "leaves", "376": "dresser", "377": "chili", "378": "gate", "379": "flag", "380": "stick", "381": "leggings", "382": "rubber", "383": "mugs", "384": "parsley", "385": "merchandise", "386": "grill", "387": "shallow", "388": "medicine cabinet", "389": "chairs", "390": "ceiling", "391": "curtains", "392": "peppers", "393": "huge", "394": "kettle", "395": "crouching", "396": "deer", "397": "picture", "398": "passenger", "399": "bears", "400": "ship", "401": "belt", "402": "umpire", "403": "short", "404": "driver", "405": "thick", "406": "reading", "407": "tape", "408": "doll", "409": "bookshelf", "410": "basil", "411": "tongs", "412": "cream colored", "413": "oil", "414": "flames", "415": "gift", "416": "rocks", "417": "apple", "418": "blanket", "419": "menu", "420": "lego", "421": "wine", "422": "kite", "423": "aquarium", "424": "swan", "425": "mask", "426": "boot", "427": "dessert", "428": "wide", "429": "headphones", "430": "baseball bat", "431": "tables", "432": "drain", "433": "logo", "434": "tie", "435": "crates", "436": "blueberries", "437": "worker", "438": "chocolate", "439": "kiwi", "440": "cookie", "441": "wine glass", "442": "boys", "443": "jumping", "444": "cloudless", "445": "rain", "446": "cord", "447": "books", "448": "speaker", "449": "heater", "450": "mickey mouse", "451": "cake", "452": "microphone", "453": "computer desk", "454": "coleslaw", "455": "wet", "456": "video games", "457": "uncomfortable", "458": "canoe", "459": "hotel room", "460": "giant", "461": "dolls", "462": "desserts", "463": "pear", "464": "roses", "465": "apples", "466": "cooker", "467": "tablecloth", "468": "unpeeled", "469": "coffee pot", "470": "bucket", "471": "buildings", "472": "gas station", "473": "staring", "474": "snow pants", "475": "gloves", "476": "couple", "477": "fireplace", "478": "stone", "479": "skate park", "480": "statue", "481": "cyclist", "482": "money", "483": "jeans", "484": "cup", "485": "cinnamon", "486": "pie", "487": "tissues", "488": "fork", "489": "at camera", "490": "rabbit", "491": "off", "492": "stuffed animal", "493": "sweet potato", "494": "skinny", "495": "duck", "496": "deep", "497": "pedestrians", "498": "sleeveless", "499": "maroon", "500": "wheelchair", "501": "skillet", "502": "snowsuit", "503": "potatoes", "504": "glove", "505": "dogs", "506": "elephants", "507": "chimney", "508": "pillow", "509": "stickers", "510": "suitcase", "511": "toilet paper", "512": "lion", "513": "drink", "514": "potato", "515": "stormy", "516": "bananas", "517": "empty", "518": "grassy", "519": "scarf", "520": "trailer", "521": "sandals", "522": "aircraft", "523": "pot", "524": "parking lot", "525": "bunny", "526": "puppy", "527": "sauce", "528": "ladder", "529": "smoke", "530": "t shirt", "531": "biker", "532": "fish", "533": "tangerine", "534": "coffee table", "535": "mixer", "536": "beer", "537": "tinted", "538": "sweater", "539": "doorway", "540": "berries", "541": "cooking", "542": "meats", "543": "towel", "544": "fire truck", "545": "round", "546": "donut", "547": "short sleeved", "548": "toast", "549": "hotel", "550": "squash", "551": "raincoat", "552": "talking", "553": "bagel", "554": "sour cream", "555": "lid", "556": "bandana", "557": "tissue", "558": "shelves", "559": "cupcake", "560": "men", "561": "sand", "562": "above", "563": "router", "564": "bridge", "565": "trunks", "566": "steak", "567": "long sleeved", "568": "wires", "569": "pancakes", "570": "moon", "571": "up", "572": "noodles", "573": "arrow", "574": "goose", "575": "american flag", "576": "wagon", "577": "beach umbrella", "578": "airplanes", "579": "river", "580": "heart", "581": "cone", "582": "countertop", "583": "poster", "584": "meat", "585": "drinking", "586": "rice", "587": "toilet", "588": "running", "589": "clean", "590": "roadway", "591": "dishes", "592": "fake", "593": "path", "594": "players", "595": "toothbrush", "596": "corn", "597": "skateboarder", "598": "controllers", "599": "tennis", "600": "dark blue", "601": "number", "602": "on", "603": "pastries", "604": "graffiti", "605": "place", "606": "shuttle", "607": "barrier", "608": "mustard", "609": "tray", "610": "calculator", "611": "sleeping", "612": "dispenser", "613": "cupboard", "614": "skateboarding", "615": "shape", "616": "ketchup", "617": "strawberries", "618": "kitchen", "619": "buoy", "620": "cups", "621": "skater", "622": "flower pot", "623": "match", "624": "buses", "625": "ribs", "626": "socks", "627": "grater", "628": "pipes", "629": "railroad", "630": "tree branches", "631": "jersey", "632": "air conditioner", "633": "train station", "634": "pastry", "635": "bookcase", "636": "chinese food", "637": "outfit", "638": "bike", "639": "nut", "640": "onion rings", "641": "museum", "642": "tractor", "643": "toddler", "644": "cooking pot", "645": "mushrooms", "646": "mat", "647": "bleachers", "648": "stained", "649": "walking", "650": "full", "651": "uniform", "652": "power line", "653": "pumpkin", "654": "watermelon", "655": "crust", "656": "bush", "657": "picture frame", "658": "looking down", "659": "grapes", "660": "character", "661": "honey", "662": "olive", "663": "power lines", "664": "serving tray", "665": "dragon", "666": "toys", "667": "helicopter", "668": "bacon", "669": "eye glasses", "670": "sword", "671": "garage", "672": "women", "673": "sitting", "674": "light bulb", "675": "chicken breast", "676": "video camera", "677": "decorations", "678": "platform", "679": "hamburger", "680": "boots", "681": "porcelain", "682": "bare", "683": "soda", "684": "light", "685": "radio", "686": "stadium", "687": "pine tree", "688": "action figure", "689": "coach", "690": "pasture", "691": "seal", "692": "cooler", "693": "guitar", "694": "sandwiches", "695": "vegetable", "696": "curly", "697": "front", "698": "toothpaste", "699": "polo shirt", "700": "dotted", "701": "restaurant", "702": "life preserver", "703": "rackets", "704": "tomato sauce", "705": "rose", "706": "eggs", "707": "fan", "708": "bedding", "709": "zucchini", "710": "dried", "711": "closet", "712": "star", "713": "khaki", "714": "artichoke", "715": "peas", "716": "umbrellas", "717": "bull", "718": "sandal", "719": "lemons", "720": "painting", "721": "blinds", "722": "magazine", "723": "cookies", "724": "luggage", "725": "pig", "726": "snake", "727": "cappuccino", "728": "intersection", "729": "beans", "730": "school bus", "731": "square", "732": "trash can", "733": "briefcase", "734": "sunglasses", "735": "dining room", "736": "juice", "737": "dirt", "738": "baseball", "739": "toy", "740": "log", "741": "cherry", "742": "toothbrushes", "743": "tennis ball", "744": "book", "745": "narrow", "746": "skirt", "747": "fire hydrant", "748": "game", "749": "house", "750": "clock tower", "751": "chicken", "752": "stroller", "753": "mud", "754": "bronze", "755": "jeep", "756": "seagull", "757": "street light", "758": "traffic lights", "759": "strawberry", "760": "computer monitor", "761": "hallway", "762": "ambulance", "763": "costume", "764": "panda bear", "765": "market", "766": "wii", "767": "raw", "768": "still", "769": "pineapple", "770": "goat", "771": "trunk", "772": "vacuum", "773": "tent", "774": "curtain", "775": "dock", "776": "liquid", "777": "pitcher", "778": "cage", "779": "folding chair", "780": "console", "781": "ring", "782": "dry", "783": "workers", "784": "bell", "785": "shampoo bottle", "786": "new", "787": "bushy", "788": "heavy", "789": "cats", "790": "mangoes", "791": "dome", "792": "label", "793": "pasta salad", "794": "light fixture", "795": "can", "796": "traffic light", "797": "male", "798": "jockey", "799": "spectators", "800": "letter", "801": "town", "802": "sheet", "803": "powder", "804": "ice cream", "805": "toy car", "806": "video game", "807": "turkey", "808": "utensils", "809": "pond", "810": "cupcakes", "811": "riding", "812": "lime", "813": "sock", "814": "cucumber", "815": "saucer", "816": "plantains", "817": "spectator", "818": "rectangular", "819": "dugout", "820": "tea", "821": "dress shirt", "822": "owl", "823": "cables", "824": "mouse pad", "825": "rug", "826": "purse", "827": "ostrich", "828": "bottles", "829": "toppings", "830": "net", "831": "gun", "832": "cooked", "833": "adult", "834": "trash bag", "835": "undershirt", "836": "snowy", "837": "driving", "838": "fruit", "839": "symbol", "840": "receipt", "841": "ipod", "842": "figure", "843": "resting", "844": "soup", "845": "terminal", "846": "machine", "847": "cherries", "848": "pretzel", "849": "surfing", "850": "hose", "851": "binder", "852": "old", "853": "mother", "854": "sink", "855": "vine", "856": "appetizers", "857": "earring", "858": "porch", "859": "steam", "860": "calm", "861": "posing", "862": "brunette", "863": "bicycles", "864": "polar bear", "865": "paddle", "866": "microwave oven", "867": "garden", "868": "jackets", "869": "cauliflower", "870": "spices", "871": "farm", "872": "antelope", "873": "container", "874": "fence post", "875": "octopus", "876": "garnish", "877": "planter", "878": "stones", "879": "sheets", "880": "coke", "881": "bottle", "882": "swimsuit", "883": "spice", "884": "walnut", "885": "word", "886": "policeman", "887": "waffle", "888": "ottoman", "889": "desktop computer", "890": "cart", "891": "dryer", "892": "candy", "893": "skiing", "894": "broth", "895": "dip", "896": "milkshake", "897": "plates", "898": "wire", "899": "silverware", "900": "asian", "901": "office chair", "902": "outlet", "903": "projector", "904": "flags", "905": "partly cloudy", "906": "buns", "907": "sad", "908": "feta cheese", "909": "pasta", "910": "hard drive", "911": "balloon", "912": "tv stand", "913": "tank top", "914": "lunch box", "915": "stuffed dog", "916": "picnic table", "917": "real", "918": "pillows", "919": "rainy", "920": "air", "921": "berry", "922": "soft drink", "923": "nike", "924": "trucks", "925": "harbor", "926": "restroom", "927": "tower", "928": "post", "929": "squirrel", "930": "crosswalk", "931": "mixing bowl", "932": "ducks", "933": "crackers", "934": "wine bottle", "935": "soccer player", "936": "pouch", "937": "necklace", "938": "wicker", "939": "pecan", "940": "hills", "941": "mashed potatoes", "942": "straw", "943": "boulders", "944": "peach", "945": "snowboarder", "946": "tag", "947": "pizza slice", "948": "fountain", "949": "magazines", "950": "chrome", "951": "side table", "952": "alligator", "953": "salt", "954": "rocky", "955": "frosting", "956": "orchard", "957": "hospital", "958": "map", "959": "gas stove", "960": "raisin", "961": "metallic", "962": "words", "963": "monitors", "964": "sticky notes", "965": "popcorn", "966": "parmesan cheese", "967": "cafe", "968": "placemat", "969": "hard", "970": "platter", "971": "bar stool", "972": "passengers", "973": "parachute", "974": "avocado", "975": "vases", "976": "bracelet", "977": "games", "978": "making face", "979": "fried", "980": "black and white", "981": "chocolate chips", "982": "seagulls", "983": "scissors", "984": "dinosaur", "985": "outfits", "986": "soccer ball", "987": "shirts", "988": "office", "989": "employee", "990": "kites", "991": "ropes", "992": "bracelets", "993": "gym", "994": "bison", "995": "sushi", "996": "soldier", "997": "rifle", "998": "cracker", "999": "notebook", "1000": "mozzarella", "1001": "box", "1002": "cabbage", "1003": "onion ring", "1004": "trash", "1005": "palm tree", "1006": "coffee cups", "1007": "marina", "1008": "paintings", "1009": "rainbow colored", "1010": "chopsticks", "1011": "cross", "1012": "bikini", "1013": "bee", "1014": "paved", "1015": "frog", "1016": "burrito", "1017": "cheeseburger", "1018": "milk", "1019": "mug", "1020": "wristband", "1021": "dvds", "1022": "sweatshirt", "1023": "shark", "1024": "computers", "1025": "grilled", "1026": "pita", "1027": "burner", "1028": "train tracks", "1029": "sticker", "1030": "rhino", "1031": "parrot", "1032": "rough", "1033": "performer", "1034": "drapes", "1035": "courtyard", "1036": "decoration", "1037": "chandelier", "1038": "beverages", "1039": "sweet potatoes", "1040": "crumbs", "1041": "flour", "1042": "frame", "1043": "paint", "1044": "candles", "1045": "beverage", "1046": "cheesecake", "1047": "steel", "1048": "cream", "1049": "shopping center", "1050": "leafy", "1051": "brownie", "1052": "benches", "1053": "happy", "1054": "goats", "1055": "cactus", "1056": "charger", "1057": "antenna", "1058": "breakfast", "1059": "crate", "1060": "washing machine", "1061": "omelette", "1062": "packet", "1063": "beer mug", "1064": "wii controllers", "1065": "crab", "1066": "ripe", "1067": "feeder", "1068": "herb", "1069": "tunnel", "1070": "step", "1071": "flamingo", "1072": "muffin", "1073": "speakers", "1074": "baking pan", "1075": "carts", "1076": "cream cheese", "1077": "pigeons", "1078": "camel", "1079": "tool", "1080": "cafeteria", "1081": "gown", "1082": "blueberry", "1083": "jumpsuit", "1084": "crowd", "1085": "potato chips", "1086": "raisins", "1087": "minivan", "1088": "cobblestone", "1089": "dough", "1090": "panda", "1091": "brush", "1092": "wristwatch", "1093": "pizzas", "1094": "unpaved", "1095": "skateboards", "1096": "jars", "1097": "moss", "1098": "magnets", "1099": "jar", "1100": "hair", "1101": "hot dogs", "1102": "numbers", "1103": "penguin", "1104": "blazer", "1105": "roll", "1106": "figurines", "1107": "dolphin", "1108": "stairs", "1109": "safety jacket", "1110": "shrimp", "1111": "styrofoam", "1112": "officers", "1113": "soda bottle", "1114": "gummy bear", "1115": "cans", "1116": "watermelons", "1117": "wine glasses", "1118": "soldiers", "1119": "desert", "1120": "pine trees", "1121": "garlic", "1122": "lush", "1123": "gentleman", "1124": "dressing", "1125": "soap bottle", "1126": "unripe", "1127": "towels", "1128": "containers", "1129": "liquor", "1130": "murky", "1131": "whale", "1132": "potato salad", "1133": "waffles", "1134": "poodle", "1135": "hay", "1136": "yogurt", "1137": "sculpture", "1138": "alcohol", "1139": "tiles", "1140": "palm trees", "1141": "pajamas", "1142": "copper", "1143": "croissant", "1144": "swimming", "1145": "church", "1146": "bags", "1147": "snail", "1148": "diaper", "1149": "wavy", "1150": "library", "1151": "wool", "1152": "sprinkles", "1153": "fire extinguisher", "1154": "bowls", "1155": "light bulbs", "1156": "hats", "1157": "spoons", "1158": "peacock", "1159": "boxes", "1160": "upward", "1161": "eagle", "1162": "cinnamon roll", "1163": "granite", "1164": "roasted", "1165": "daughter", "1166": "foil", "1167": "icing", "1168": "peaches", "1169": "bath towel", "1170": "officer", "1171": "pesto", "1172": "telephone pole", "1173": "artwork", "1174": "bedspread", "1175": "caucasian", "1176": "bending", "1177": "female", "1178": "plain", "1179": "toaster oven", "1180": "walnuts", "1181": "triangular", "1182": "beet", "1183": "headband", "1184": "drawings", "1185": "beach chair", "1186": "donkeys", "1187": "below", "1188": "bread loaf", "1189": "paper towel", "1190": "gourd", "1191": "rotten", "1192": "mound", "1193": "whipped cream", "1194": "low", "1195": "parent", "1196": "bus stop", "1197": "bar stools", "1198": "gadget", "1199": "cakes", "1200": "phones", "1201": "cupboards", "1202": "wine bottles", "1203": "gravy", "1204": "covered", "1205": "cockpit", "1206": "mayonnaise", "1207": "marble", "1208": "cereal box", "1209": "butterfly", "1210": "kimono", "1211": "clocks", "1212": "tea pot", "1213": "food truck", "1214": "cords", "1215": "urinal", "1216": "bamboo", "1217": "peanut", "1218": "tissue box", "1219": "fire", "1220": "nutella", "1221": "ramekin", "1222": "leaf", "1223": "village", "1224": "name tag", "1225": "rolling pin", "1226": "olive oil", "1227": "hummus", "1228": "balls", "1229": "wines", "1230": "pizza shop", "1231": "pea", "1232": "goggles", "1233": "dragons", "1234": "drinks", "1235": "marshmallow", "1236": "audience", "1237": "dumplings", "1238": "traffic sign", "1239": "oreo", "1240": "raspberry", "1241": "skating", "1242": "patio", "1243": "bone", "1244": "classroom", "1245": "beer bottle", "1246": "chalkboard", "1247": "life jacket", "1248": "lemonade", "1249": "deck", "1250": "pancake", "1251": "cathedral", "1252": "toiletries", "1253": "backyard", "1254": "mall", "1255": "whisk", "1256": "brass", "1257": "vending machine", "1258": "island", "1259": "fog", "1260": "water bottle", "1261": "canopy", "1262": "drape", "1263": "topping", "1264": "parking sign", "1265": "antique", "1266": "mesh", "1267": "pens", "1268": "cowboy hat", "1269": "fruits", "1270": "cucumbers", "1271": "grapefruit", "1272": "fans", "1273": "meatballs", "1274": "houses", "1275": "under", "1276": "farmer", "1277": "crane", "1278": "hand dryer", "1279": "cowboy", "1280": "beds", "1281": "macaroni", "1282": "cheetah", "1283": "puddle", "1284": "stuffed animals", "1285": "coffee mug", "1286": "bakery", "1287": "lamps", "1288": "herbs", "1289": "bouquet", "1290": "hair clip", "1291": "cable", "1292": "biscuit", "1293": "cell phones", "1294": "tree leaves", "1295": "pizza pan", "1296": "drum", "1297": "raspberries", "1298": "ice maker", "1299": "shut", "1300": "cards", "1301": "pocket", "1302": "faucet", "1303": "guacamole", "1304": "coconut", "1305": "baseball players", "1306": "bug", "1307": "high", "1308": "brushing teeth", "1309": "ice", "1310": "toothpicks", "1311": "waiter", "1312": "tortilla", "1313": "spider", "1314": "snoopy", "1315": "weeds", "1316": "stew", "1317": "asphalt", "1318": "buoys", "1319": "family", "1320": "logs", "1321": "adidas", "1322": "underwear", "1323": "cliff", "1324": "sailboats", "1325": "robe", "1326": "casserole", "1327": "ketchup bottle", "1328": "teddy bears", "1329": "lock", "1330": "couches", "1331": "figurine", "1332": "pencil", "1333": "leafless", "1334": "drawing", "1335": "flip flops", "1336": "hippo", "1337": "paper dispenser", "1338": "cigarette", "1339": "barn", "1340": "hardwood", "1341": "staircase", "1342": "entrance", "1343": "windows", "1344": "picnic tables", "1345": "fudge", "1346": "performing trick", "1347": "blind", "1348": "vinegar", "1349": "beets", "1350": "curved", "1351": "away", "1352": "roast beef", "1353": "spray bottle", "1354": "chopstick", "1355": "soap dispenser", "1356": "dog food", "1357": "bus driver", "1358": "banana bunch", "1359": "dumpster", "1360": "twigs", "1361": "napkins", "1362": "bagels", "1363": "stage", "1364": "baskets", "1365": "ceramic", "1366": "pineapples", "1367": "street lights", "1368": "soap", "1369": "brownies", "1370": "christmas lights", "1371": "cameras", "1372": "fruit stand", "1373": "soda can", "1374": "hotdog bun", "1375": "fat", "1376": "pizza boxes", "1377": "melon", "1378": "customers", "1379": "athletic shoe", "1380": "peeled", "1381": "food container", "1382": "powdered sugar", "1383": "rice cooker", "1384": "spots", "1385": "sugar", "1386": "hair dryer", "1387": "tractors", "1388": "broom", "1389": "skin", "1390": "pillowcase", "1391": "smoothie", "1392": "ear buds", "1393": "garment", "1394": "soft", "1395": "walls", "1396": "ravioli", "1397": "seafood", "1398": "hammer", "1399": "sack", "1400": "blenders", "1401": "sponge", "1402": "sunflower", "1403": "cabin", "1404": "tuna", "1405": "beautiful", "1406": "heels", "1407": "butter", "1408": "scooters", "1409": "wardrobe", "1410": "taking pictures", "1411": "forks", "1412": "lambs", "1413": "tin", "1414": "cat food", "1415": "engineer", "1416": "oatmeal", "1417": "clay", "1418": "butterflies", "1419": "team", "1420": "lipstick", "1421": "ladle", "1422": "food processor", "1423": "wok", "1424": "shelter", "1425": "lobster", "1426": "snacks", "1427": "vests", "1428": "face mask", "1429": "peanut butter", "1430": "balloons", "1431": "peanuts", "1432": "wallpaper", "1433": "cranberries", "1434": "crown", "1435": "caramel", "1436": "floor lamp", "1437": "shower curtain", "1438": "blankets", "1439": "hangar", "1440": "surfboards", "1441": "meal", "1442": "wolf", "1443": "gifts", "1444": "father", "1445": "cd", "1446": "chains", "1447": "tourist", "1448": "canister", "1449": "spear", "1450": "pilot", "1451": "mountain side", "1452": "pencils", "1453": "trumpet", "1454": "knives", "1455": "mango", "1456": "magnet", "1457": "guys", "1458": "satellite dish", "1459": "table lamp", "1460": "keyboards", "1461": "swimmer", "1462": "stump", "1463": "amusement park", "1464": "goal", "1465": "roadside", "1466": "wig", "1467": "chickens", "1468": "card", "1469": "looking up", "1470": "aluminum", "1471": "pandas", "1472": "soap dish", "1473": "pomegranate", "1474": "tourists", "1475": "parrots", "1476": "toilet brush", "1477": "remote controls", "1478": "suits", "1479": "cotton", "1480": "tents", "1481": "water glass", "1482": "healthy", "1483": "envelope", "1484": "baking sheet", "1485": "marker", "1486": "muffins", "1487": "salon", "1488": "snow flakes", "1489": "dry erase board", "1490": "fishing pole", "1491": "lighthouse", "1492": "earphones", "1493": "photographer", "1494": "gorilla", "1495": "seeds", "1496": "sticks", "1497": "shopping cart", "1498": "pears", "1499": "alarm clock", "1500": "tree branch", "1501": "almonds", "1502": "theater", "1503": "tiger", "1504": "forward", "1505": "temple", "1506": "hedge", "1507": "kitchen towel", "1508": "motorcycles", "1509": "garland", "1510": "pudding", "1511": "vintage", "1512": "coarse", "1513": "swans", "1514": "pretzels", "1515": "swamp", "1516": "dense", "1517": "auditorium", "1518": "daisy", "1519": "dish soap", "1520": "opaque", "1521": "french toast", "1522": "straight", "1523": "tennis balls", "1524": "orchid", "1525": "champagne", "1526": "pizza pie", "1527": "egg roll", "1528": "flatbread", "1529": "coconuts", "1530": "flip flop", "1531": "skyscraper", "1532": "fur", "1533": "denim", "1534": "scaffolding", "1535": "coin", "1536": "policemen", "1537": "stuffed bears", "1538": "cane", "1539": "cloth", "1540": "cake stand", "1541": "pump", "1542": "soccer", "1543": "loaf", "1544": "knife block", "1545": "ski lift", "1546": "modern", "1547": "old fashioned", "1548": "cotton dessert", "1549": "trays", "1550": "smooth", "1551": "lunch", "1552": "dull", "1553": "shield", "1554": "dinner", "1555": "cloths", "1556": "waterfall", "1557": "waste basket", "1558": "scarce", "1559": "shaking hands", "1560": "salt shaker", "1561": "pocket watch", "1562": "unhealthy", "1563": "lounge", "1564": "moose", "1565": "seaweed", "1566": "panda bears", "1567": "candies", "1568": "batteries", "1569": "comb", "1570": "wallet", "1571": "students", "1572": "school", "1573": "geese", "1574": "apartment building", "1575": "stars", "1576": "granola", "1577": "leopard", "1578": "cardboard", "1579": "shoe laces", "1580": "hairbrush", "1581": "chef hat", "1582": "crystal", "1583": "pizza tray", "1584": "bread box", "1585": "luggage cart", "1586": "apartment", "1587": "angry", "1588": "characters", "1589": "oak tree", "1590": "angry bird", "1591": "backpacks", "1592": "shaving cream", "1593": "cemetery", "1594": "lace", "1595": "anchovies", "1596": "dresses", "1597": "paper towels", "1598": "garage door", "1599": "vanilla", "1600": "uncooked", "1601": "battery", "1602": "butter knife", "1603": "mint", "1604": "package", "1605": "biscuits", "1606": "son", "1607": "cake pan", "1608": "snack", "1609": "riding boots", "1610": "rooftop", "1611": "irregular", "1612": "baked", "1613": "kittens", "1614": "sconce", "1615": "serving dish", "1616": "mirrors", "1617": "taking photo", "1618": "bubble", "1619": "printers", "1620": "ice cube", "1621": "knee pads", "1622": "doors", "1623": "ceiling light", "1624": "cotton candy", "1625": "helmets", "1626": "cheese cube", "1627": "bartender", "1628": "pistachio", "1629": "ugly", "1630": "sausages", "1631": "beer can", "1632": "baker", "1633": "coffee beans", "1634": "almond", "1635": "ovens", "1636": "curled", "1637": "underneath", "1638": "suitcases", "1639": "food", "1640": "taking bath", "1641": "vendor", "1642": "lizard", "1643": "homes", "1644": "shops", "1645": "mannequins", "1646": "turtle", "1647": "blossom", "1648": "chickpeas", "1649": "outside", "1650": "ornament", "1651": "milk carton", "1652": "mexican food", "1653": "seed", "1654": "avocados", "1655": "masks", "1656": "pumpkins", "1657": "papaya", "1658": "stapler", "1659": "hamburgers", "1660": "earrings", "1661": "back", "1662": "wildflowers", "1663": "bats", "1664": "hand soap", "1665": "fresh", "1666": "manhole cover", "1667": "dolphins", "1668": "thermometer", "1669": "castle", "1670": "cones", "1671": "pizza cutter", "1672": "pizza box", "1673": "heel", "1674": "salmon", "1675": "door frame", "1676": "taco", "1677": "pork", "1678": "wedding", "1679": "bubbles", "1680": "eiffel tower", "1681": "cranberry", "1682": "napkin dispenser", "1683": "bandage", "1684": "elmo", "1685": "notepad", "1686": "pepper shaker", "1687": "artichokes", "1688": "tools", "1689": "window frame", "1690": "steamed", "1691": "groceries", "1692": "lily", "1693": "cookbook", "1694": "paper container", "1695": "hippos", "1696": "hilltop", "1697": "twig", "1698": "animal", "1699": "wii game", "1700": "beads", "1701": "lilies", "1702": "towel dispenser", "1703": "blood", "1704": "ladles", "1705": "jewelry", "1706": "hearts", "1707": "snow boots", "1708": "ahead", "1709": "utensil holder", "1710": "football", "1711": "bird cage", "1712": "dish drainer", "1713": "cds", "1714": "banana peel", "1715": "vines", "1716": "pizza crust", "1717": "shopper", "1718": "tags", "1719": "keypad", "1720": "dinosaurs", "1721": "stir fry", "1722": "bomb", "1723": "necklaces", "1724": "packages", "1725": "uniforms", "1726": "sparse", "1727": "unhappy", "1728": "control panel", "1729": "antennas", "1730": "spray can", "1731": "feathers", "1732": "electric toothbrush", "1733": "potted", "1734": "juice box", "1735": "toolbox", "1736": "visitor", "1737": "ornaments", "1738": "sign post", "1739": "baseball mitt", "1740": "robot", "1741": "blackberries", "1742": "desk lamp", "1743": "glaze", "1744": "melons", "1745": "cookie dough", "1746": "paint brush", "1747": "mustard bottle", "1748": "apple logo", "1749": "salad dressing", "1750": "mattresses", "1751": "cash register", "1752": "nest", "1753": "knee pad", "1754": "out", "1755": "toasted", "1756": "price tag", "1757": "canisters", "1758": "christmas light", "1759": "antelopes", "1760": "dream catcher", "1761": "student", "1762": "fine", "1763": "kangaroo", "1764": "smoke stack", "1765": "music", "1766": "cages", "1767": "soccer balls", "1768": "ostriches", "1769": "coffee shop", "1770": "ice cubes", "1771": "downward", "1772": "televisions", "1773": "candle holder", "1774": "grinder", "1775": "xbox controller", "1776": "cricket", "1777": "hurdle", "1778": "obstacle", "1779": "lab coat", "1780": "gas pump", "1781": "banana bunches", "1782": "bell tower", "1783": "waitress", "1784": "in mirror", "1785": "coats", "1786": "attic", "1787": "sugar packet", "1788": "taking photograph", "1789": "mountain peak", "1790": "pub", "1791": "silk", "1792": "blossoms", "1793": "pillars", "1794": "scrub brush", "1795": "kiwis", "1796": "octagonal", "1797": "parachutes", "1798": "lions", "1799": "sideways", "1800": "egg carton", "1801": "visitors", "1802": "sunflowers", "1803": "shoe lace", "1804": "rhinos", "1805": "elbow pad", "1806": "egg yolk", "1807": "outlets", "1808": "baseball bats", "1809": "life jackets", "1810": "snakes", "1811": "vitamins", "1812": "cigar", "1813": "upwards", "1814": "beneath", "1815": "taking photos", "1816": "storage box", "1817": "armor", "1818": "cookie jar", "1819": "rounded", "1820": "seat belt", "1821": "owls", "1822": "appetizer", "1823": "beer cans", "1824": "stores", "1825": "shoppers", "1826": "bird house", "1827": "sugar packets", "1828": "wild", "1829": "dvd players", "1830": "towers", "1831": "water bottles", "1832": "waves", "1833": "pikachu", "1834": "wolves", "1835": "immature", "1836": "shampoo", "1837": "orchids", "1838": "elevator", "1839": "taking notes", "1840": "wave", "1841": "horse hoof", "1842": "bottle cap"}, {"PAD": 0, "UNK": 1, "CLS": 2, "is": 3, "the": 4, "sky": 5, "dark": 6, "what": 7, "on": 8, "white": 9, "wall": 10, "that": 11, "pipe": 12, "red": 13, "tall": 14, "clock": 15, "small": 16, "or": 17, "large": 18, "who": 19, "wearing": 20, "a": 21, "shirt": 22, "do": 23, "you": 24, "think": 25, "he": 26, "sleeping": 27, "in": 28, "cheese": 29, "to": 30, "left": 31, "of": 32, "food": 33, "plate": 34, "piece": 35, "furniture": 36, "kind": 37, "right": 38, "chair": 39, "steel": 40, "spatula": 41, "top": 42, "image": 43, "woman": 44, "man": 45, "color": 46, "are": 47, "there": 48, "any": 49, "umbrellas": 50, "motorcycles": 51, "photograph": 52, "bird": 53, "coat": 54, "either": 55, "traffic": 56, "light": 57, "stop": 58, "sign": 59, "which": 60, "cups": 61, "hanging": 62, "cup": 63, "people": 64, "mirror": 65, "part": 66, "both": 67, "bikes": 68, "and": 69, "cars": 70, "this": 71, "scene": 72, "animal": 73, "sits": 74, "bench": 75, "side": 76, "ground": 77, "made": 78, "jeans": 79, "look": 80, "bicycles": 81, "helmets": 82, "skier": 83, "helmet": 84, "children": 85, "picture": 86, "rock": 87, "tools": 88, "kids": 89, "grapefruit": 90, "hair": 91, "street": 92, "where": 93, "photo": 94, "green": 95, "chairs": 96, "bottom": 97, "field": 98, "see": 99, "horses": 100, "eating": 101, "from": 102, "grass": 103, "striped": 104, "ripe": 105, "bananas": 106, "above": 107, "newspaper": 108, "device": 109, "table": 110, "laptop": 111, "couch": 112, "fast": 113, "looks": 114, "does": 115, "seem": 116, "be": 117, "wooden": 118, "bread": 119, "box": 120, "fruit": 121, "called": 122, "fork": 123, "ski": 124, "napkin": 125, "stuffed": 126, "dog": 127, "front": 128, "cell": 129, "phone": 130, "whats": 131, "doing": 132, "tree": 133, "little": 134, "curtain": 135, "soccer": 136, "ball": 137, "have": 138, "different": 139, "colors": 140, "pillow": 141, "bed": 142, "post": 143, "standing": 144, "person": 145, "surf": 146, "board": 147, "wear": 148, "glasses": 149, "how": 150, "rug": 151, "silver": 152, "pink": 153, "sofa": 154, "bicycle": 155, "surfboard": 156, "middle": 157, "pedestrian": 158, "scooters": 159, "bag": 160, "car": 161, "cone": 162, "elbow": 163, "pad": 164, "other": 165, "black": 166, "giraffes": 167, "elephants": 168, "oven": 169, "customer": 170, "under": 171, "yellow": 172, "window": 173, "girl": 174, "pants": 175, "size": 176, "near": 177, "watching": 178, "running": 179, "trousers": 180, "feathers": 181, "blue": 182, "keyboard": 183, "metallic": 184, "boxes": 185, "apples": 186, "bike": 187, "holding": 188, "bat": 189, "wardrobe": 190, "gray": 191, "umpire": 192, "cooking": 193, "utensil": 194, "faucet": 195, "utensils": 196, "tray": 197, "type": 198, "step": 199, "place": 200, "orange": 201, "fries": 202, "vehicle": 203, "isnt": 204, "bottle": 205, "pedestrians": 206, "surfboards": 207, "freezer": 208, "remote": 209, "item": 210, "television": 211, "cabinet": 212, "happy": 213, "baby": 214, "statue": 215, "curtains": 216, "truck": 217, "highway": 218, "socks": 219, "inside": 220, "boots": 221, "bags": 222, "sidewalk": 223, "cap": 224, "church": 225, "women": 226, "men": 227, "same": 228, "shoe": 229, "lace": 230, "lamps": 231, "books": 232, "backpack": 233, "vegetable": 234, "knee": 235, "pads": 236, "backpacks": 237, "umbrella": 238, "behind": 239, "spectator": 240, "fences": 241, "mirrors": 242, "not": 243, "brown": 244, "bleachers": 245, "by": 246, "lamb": 247, "it": 248, "elephant": 249, "walking": 250, "closed": 251, "open": 252, "coffee": 253, "tables": 254, "glass": 255, "container": 256, "cow": 257, "containers": 258, "peppers": 259, "tomatoes": 260, "frosting": 261, "fence": 262, "pillows": 263, "towels": 264, "balancing": 265, "skateboard": 266, "tent": 267, "sneakers": 268, "metal": 269, "pizza": 270, "cart": 271, "sweater": 272, "mushroom": 273, "blanket": 274, "animals": 275, "officers": 276, "sitting": 277, "hats": 278, "scarves": 279, "parking": 280, "lot": 281, "tea": 282, "kettle": 283, "meat": 284, "spinach": 285, "sandwich": 286, "name": 287, "shopping": 288, "counter": 289, "cat": 290, "clear": 291, "overcast": 292, "breads": 293, "paper": 294, "vegetables": 295, "carrots": 296, "folding": 297, "vases": 298, "spoon": 299, "plastic": 300, "kite": 301, "triangular": 302, "shape": 303, "wears": 304, "appliance": 305, "baked": 306, "good": 307, "platform": 308, "trains": 309, "player": 310, "flag": 311, "sandal": 312, "looking": 313, "at": 314, "weather": 315, "building": 316, "toilet": 317, "than": 318, "dryer": 319, "guys": 320, "frisbee": 321, "bicyclist": 322, "center": 323, "next": 324, "lady": 325, "tents": 326, "lights": 327, "pole": 328, "desks": 329, "lying": 330, "kayak": 331, "wood": 332, "these": 333, "species": 334, "hot": 335, "clothing": 336, "saucer": 337, "mug": 338, "jacket": 339, "crossing": 340, "road": 341, "toy": 342, "teddy": 343, "bear": 344, "doll": 345, "doors": 346, "windows": 347, "shelf": 348, "swinging": 349, "racket": 350, "skateboarders": 351, "air": 352, "toothbrush": 353, "full": 354, "sheep": 355, "purple": 356, "palm": 357, "van": 358, "chain": 359, "train": 360, "ovens": 361, "cabinets": 362, "printers": 363, "desk": 364, "frisbees": 365, "fruits": 366, "walnuts": 367, "kid": 368, "indoors": 369, "pot": 370, "pan": 371, "an": 372, "tiles": 373, "pans": 374, "horse": 375, "bowl": 376, "lid": 377, "around": 378, "parachutes": 379, "kites": 380, "biscuit": 381, "down": 382, "hold": 383, "dining": 384, "motorbike": 385, "driving": 386, "tvs": 387, "pen": 388, "plants": 389, "child": 390, "skirt": 391, "beach": 392, "machine": 393, "cats": 394, "sand": 395, "nightstand": 396, "beds": 397, "hat": 398, "cones": 399, "shorts": 400, "big": 401, "plant": 402, "two": 403, "material": 404, "as": 405, "outdoors": 406, "lamp": 407, "cooked": 408, "platter": 409, "presented": 410, "toilets": 411, "shower": 412, "carpet": 413, "beige": 414, "flowers": 415, "benches": 416, "leaves": 417, "logo": 418, "teal": 419, "soap": 420, "bottles": 421, "plates": 422, "vase": 423, "boy": 424, "snow": 425, "refrigerators": 426, "towel": 427, "fridge": 428, "wig": 429, "coats": 430, "placemats": 431, "clocks": 432, "cafe": 433, "dress": 434, "pepper": 435, "shakers": 436, "locks": 437, "plane": 438, "airplane": 439, "cupboards": 440, "end": 441, "cakes": 442, "giraffe": 443, "bucket": 444, "candles": 445, "using": 446, "door": 447, "colorful": 448, "blond": 449, "has": 450, "adidas": 451, "computer": 452, "mouse": 453, "skis": 454, "crate": 455, "instrument": 456, "ladders": 457, "piano": 458, "pictures": 459, "house": 460, "could": 461, "pulls": 462, "stove": 463, "lemon": 464, "cucumber": 465, "square": 466, "smooth": 467, "him": 468, "snowy": 469, "sandy": 470, "skiing": 471, "goggles": 472, "bacon": 473, "can": 474, "donuts": 475, "bus": 476, "laptops": 477, "video": 478, "cameras": 479, "liquid": 480, "bun": 481, "banana": 482, "mannequins": 483, "napkins": 484, "sit": 485, "she": 486, "tiny": 487, "old": 488, "hedges": 489, "feeding": 490, "shown": 491, "boot": 492, "shoes": 493, "necktie": 494, "ties": 495, "ladder": 496, "covering": 497, "carrot": 498, "brunette": 499, "magazines": 500, "pieces": 501, "book": 502, "groomed": 503, "onion": 504, "for": 505, "umpires": 506, "potatoes": 507, "belt": 508, "glove": 509, "motorcycle": 510, "bridge": 511, "trash": 512, "bin": 513, "pattern": 514, "batter": 515, "liquor": 516, "kittens": 517, "clean": 518, "bright": 519, "bookcases": 520, "tissue": 521, "drinking": 522, "buildings": 523, "tan": 524, "pictured": 525, "dressing": 526, "soup": 527, "salad": 528, "watercraft": 529, "water": 530, "bush": 531, "riding": 532, "suitcases": 533, "iron": 534, "branches": 535, "trays": 536, "donkey": 537, "computers": 538, "with": 539, "watch": 540, "drawer": 541, "gas": 542, "boys": 543, "bears": 544, "dishwasher": 545, "below": 546, "curly": 547, "asparaguss": 548, "mans": 549, "uses": 550, "tool": 551, "wolves": 552, "scarf": 553, "taxi": 554, "common": 555, "spoons": 556, "comforter": 557, "appear": 558, "athletic": 559, "sock": 560, "murky": 561, "wavy": 562, "duck": 563, "swimming": 564, "game": 565, "tshirt": 566, "pitcher": 567, "t": 568, "microwaves": 569, "sheets": 570, "carrying": 571, "wheels": 572, "ice": 573, "makers": 574, "wires": 575, "olives": 576, "foggy": 577, "stands": 578, "types": 579, "gate": 580, "wire": 581, "hospital": 582, "catcher": 583, "long": 584, "sleeved": 585, "blouse": 586, "drives": 587, "hook": 588, "flags": 589, "located": 590, "fireplace": 591, "butter": 592, "knives": 593, "crates": 594, "waiting": 595, "cake": 596, "knife": 597, "runs": 598, "sugar": 599, "packets": 600, "brush": 601, "covered": 602, "map": 603, "skateboarder": 604, "cookie": 605, "pots": 606, "blender": 607, "apple": 608, "carriage": 609, "speaker": 610, "guy": 611, "balls": 612, "trees": 613, "moss": 614, "stick": 615, "motorbikes": 616, "gold": 617, "trucks": 618, "buses": 619, "snowboarding": 620, "boat": 621, "shrub": 622, "ropes": 623, "basket": 624, "restaurant": 625, "crab": 626, "seat": 627, "bookshelf": 628, "signal": 629, "candies": 630, "cranberries": 631, "dried": 632, "bell": 633, "younger": 634, "tomato": 635, "thing": 636, "cloudy": 637, "round": 638, "snowpants": 639, "marina": 640, "bookcase": 641, "wine": 642, "sheet": 643, "pillowcase": 644, "stapler": 645, "ketchup": 646, "zebra": 647, "gloves": 648, "papers": 649, "out": 650, "denim": 651, "kitten": 652, "comfortable": 653, "necklace": 654, "screen": 655, "tablecloth": 656, "calm": 657, "choppy": 658, "deer": 659, "hose": 660, "nightstands": 661, "forks": 662, "ring": 663, "soft": 664, "drinks": 665, "peacocks": 666, "dogs": 667, "beneath": 668, "dragon": 669, "syrup": 670, "mobile": 671, "dirty": 672, "contains": 673, "silverware": 674, "shop": 675, "mixer": 676, "microwave": 677, "control": 678, "american": 679, "rope": 680, "perched": 681, "words": 682, "mailbox": 683, "beside": 684, "handbag": 685, "pavement": 686, "underneath": 687, "squirrel": 688, "outfit": 689, "bridges": 690, "bulbs": 691, "refrigerator": 692, "rectangular": 693, "short": 694, "store": 695, "womans": 696, "up": 697, "wide": 698, "dish": 699, "beverage": 700, "cigarettes": 701, "mats": 702, "alien": 703, "male": 704, "moving": 705, "cowboy": 706, "engine": 707, "screens": 708, "keyboards": 709, "clouds": 710, "ocean": 711, "mugs": 712, "wineglass": 713, "lipstick": 714, "lift": 715, "tennis": 716, "squash": 717, "flower": 718, "pastry": 719, "walks": 720, "berry": 721, "shelves": 722, "hydrant": 723, "fire": 724, "playing": 725, "baseball": 726, "river": 727, "vacuum": 728, "cleaner": 729, "floor": 730, "camera": 731, "bikers": 732, "bartender": 733, "parked": 734, "surfing": 735, "bells": 736, "kitchen": 737, "alarm": 738, "telephone": 739, "was": 740, "taken": 741, "wicker": 742, "brooms": 743, "vehicles": 744, "onions": 745, "apartment": 746, "hill": 747, "cutting": 748, "eyes": 749, "hills": 750, "wii": 751, "controller": 752, "extinguisher": 753, "items": 754, "countertop": 755, "leaning": 756, "performing": 757, "trick": 758, "swim": 759, "suit": 760, "wet": 761, "airplanes": 762, "skinny": 763, "sandals": 764, "rhinos": 765, "steps": 766, "seagull": 767, "headboard": 768, "mattress": 769, "dresser": 770, "quilt": 771, "number": 772, "walk": 773, "grazing": 774, "drawers": 775, "reflected": 776, "dessert": 777, "baking": 778, "houses": 779, "length": 780, "beard": 781, "cord": 782, "scooter": 783, "like": 784, "letters": 785, "donut": 786, "sprinkles": 787, "birds": 788, "wagon": 789, "croissant": 790, "puppys": 791, "checkered": 792, "coins": 793, "used": 794, "make": 795, "broccoli": 796, "cows": 797, "zebras": 798, "toasts": 799, "eggs": 800, "comforters": 801, "filled": 802, "crosswalk": 803, "gender": 804, "armchair": 805, "pastries": 806, "tee": 807, "hay": 808, "pouch": 809, "rackets": 810, "dispenser": 811, "thick": 812, "crust": 813, "lettuce": 814, "chicken": 815, "pepperoni": 816, "tongs": 817, "clothes": 818, "cds": 819, "turned": 820, "off": 821, "throwing": 822, "snowboarder": 823, "soda": 824, "electrical": 825, "outlet": 826, "pines": 827, "panda": 828, "pizzas": 829, "microphone": 830, "swan": 831, "aircraft": 832, "all": 833, "washer": 834, "stainless": 835, "monkey": 836, "cans": 837, "safety": 838, "vests": 839, "sticker": 840, "garbage": 841, "surrounding": 842, "yard": 843, "bushes": 844, "bar": 845, "stools": 846, "collar": 847, "beans": 848, "mushrooms": 849, "cabbage": 850, "branch": 851, "pulled": 852, "cubes": 853, "stand": 854, "suitcase": 855, "sponge": 856, "golden": 857, "cream": 858, "passengers": 859, "ham": 860, "cupboard": 861, "squashes": 862, "mice": 863, "headphones": 864, "cab": 865, "city": 866, "uniform": 867, "bakery": 868, "juice": 869, "sliced": 870, "ridged": 871, "burger": 872, "purse": 873, "mask": 874, "paddle": 875, "paddles": 876, "routers": 877, "calculators": 878, "dressed": 879, "armors": 880, "pliers": 881, "blonde": 882, "phones": 883, "girls": 884, "cellphone": 885, "tourist": 886, "drink": 887, "salt": 888, "shaker": 889, "pickles": 890, "uncooked": 891, "buns": 892, "bathroom": 893, "living": 894, "room": 895, "tank": 896, "steering": 897, "wheel": 898, "tablecloths": 899, "rough": 900, "bowls": 901, "potato": 902, "wallet": 903, "gravel": 904, "shampoos": 905, "ladys": 906, "bracelet": 907, "shields": 908, "airport": 909, "furry": 910, "sailboat": 911, "briefcases": 912, "couches": 913, "blazer": 914, "holds": 915, "sink": 916, "opens": 917, "jet": 918, "star": 919, "heart": 920, "snowboard": 921, "produce": 922, "hippos": 923, "dvd": 924, "tv": 925, "her": 926, "planter": 927, "wristband": 928, "controls": 929, "flying": 930, "staring": 931, "envelopes": 932, "pine": 933, "sheeps": 934, "makes": 935, "kick": 936, "kicks": 937, "chocolate": 938, "coconut": 939, "sculpture": 940, "rounded": 941, "garden": 942, "sea": 943, "foam": 944, "lies": 945, "touching": 946, "headband": 947, "toothbrushes": 948, "floating": 949, "policeman": 950, "apron": 951, "slices": 952, "mashed": 953, "corn": 954, "leggings": 955, "balloon": 956, "dumpster": 957, "spectators": 958, "colored": 959, "cereal": 960, "jersey": 961, "pulling": 962, "walls": 963, "trunk": 964, "station": 965, "asparagus": 966, "leather": 967, "rubber": 968, "net": 969, "eats": 970, "notebook": 971, "boars": 972, "rabbits": 973, "sponges": 974, "shade": 975, "soaps": 976, "drainers": 977, "merchandise": 978, "cash": 979, "registers": 980, "shallow": 981, "deep": 982, "life": 983, "hand": 984, "public": 985, "mustard": 986, "rugs": 987, "cages": 988, "menu": 989, "object": 990, "vending": 991, "older": 992, "tractor": 993, "chrome": 994, "tie": 995, "catching": 996, "cover": 997, "pens": 998, "undershirt": 999, "chimney": 1000, "zoo": 1001, "peanuts": 1002, "olive": 1003, "monitor": 1004, "scissors": 1005, "word": 1006, "printed": 1007, "blinds": 1008, "mat": 1009, "huge": 1010, "barn": 1011, "cloth": 1012, "console": 1013, "crouching": 1014, "chewing": 1015, "sauce": 1016, "ship": 1017, "grapes": 1018, "through": 1019, "forest": 1020, "radiator": 1021, "hangs": 1022, "young": 1023, "passenger": 1024, "concrete": 1025, "polar": 1026, "topped": 1027, "nose": 1028, "flamingoes": 1029, "sausage": 1030, "cupcake": 1031, "canopy": 1032, "goat": 1033, "cloths": 1034, "poster": 1035, "conditions": 1036, "printer": 1037, "leans": 1038, "against": 1039, "pilot": 1040, "trailer": 1041, "oval": 1042, "wristwatch": 1043, "placemat": 1044, "basil": 1045, "dirt": 1046, "brief": 1047, "case": 1048, "jar": 1049, "pecan": 1050, "eye": 1051, "clips": 1052, "flames": 1053, "home": 1054, "bending": 1055, "lock": 1056, "tape": 1057, "sack": 1058, "balloons": 1059, "frame": 1060, "raw": 1061, "onto": 1062, "trashcan": 1063, "aquarium": 1064, "spider": 1065, "tap": 1066, "bees": 1067, "mother": 1068, "goose": 1069, "parachute": 1070, "tower": 1071, "wetsuit": 1072, "talking": 1073, "entering": 1074, "skateboards": 1075, "narrow": 1076, "ipod": 1077, "bare": 1078, "canister": 1079, "garland": 1080, "picnic": 1081, "driver": 1082, "brick": 1083, "envelope": 1084, "seems": 1085, "healthier": 1086, "chimneys": 1087, "drain": 1088, "bathtub": 1089, "objects": 1090, "trunks": 1091, "rocks": 1092, "direction": 1093, "empty": 1094, "fisherman": 1095, "canoe": 1096, "decorated": 1097, "blueberries": 1098, "raspberries": 1099, "caps": 1100, "reading": 1101, "snowboards": 1102, "park": 1103, "entertainment": 1104, "devices": 1105, "painting": 1106, "ducks": 1107, "blankets": 1108, "skating": 1109, "ear": 1110, "buds": 1111, "pipes": 1112, "jumping": 1113, "stars": 1114, "beer": 1115, "batters": 1116, "players": 1117, "hows": 1118, "bats": 1119, "maker": 1120, "egg": 1121, "sandwiches": 1122, "lions": 1123, "radio": 1124, "mickey": 1125, "cutter": 1126, "tail": 1127, "coleslaw": 1128, "dry": 1129, "televisions": 1130, "mud": 1131, "venue": 1132, "pigeons": 1133, "gulls": 1134, "uncomfortable": 1135, "luggage": 1136, "oranges": 1137, "oar": 1138, "wreath": 1139, "giant": 1140, "toys": 1141, "desserts": 1142, "crackers": 1143, "posing": 1144, "sculptures": 1145, "toddler": 1146, "pig": 1147, "marble": 1148, "avocado": 1149, "peeled": 1150, "unpeeled": 1151, "vest": 1152, "leading": 1153, "still": 1154, "meal": 1155, "fur": 1156, "toppings": 1157, "over": 1158, "rice": 1159, "barefoot": 1160, "roof": 1161, "tin": 1162, "mountain": 1163, "wrist": 1164, "band": 1165, "planters": 1166, "rings": 1167, "snacks": 1168, "faucets": 1169, "tracks": 1170, "heavy": 1171, "wrinkled": 1172, "couple": 1173, "facing": 1174, "stone": 1175, "represent": 1176, "resting": 1177, "figurine": 1178, "urinal": 1179, "thin": 1180, "showing": 1181, "blueberry": 1182, "money": 1183, "modern": 1184, "tire": 1185, "arrow": 1186, "ingredient": 1187, "lake": 1188, "waiters": 1189, "articles": 1190, "heater": 1191, "skateboarding": 1192, "sprinkled": 1193, "pie": 1194, "engineers": 1195, "toddlers": 1196, "catchers": 1197, "ostriches": 1198, "granite": 1199, "hallway": 1200, "candle": 1201, "smokes": 1202, "cigarette": 1203, "jumpsuit": 1204, "stoves": 1205, "moon": 1206, "rabbit": 1207, "sweet": 1208, "fat": 1209, "ladles": 1210, "hydrants": 1211, "sword": 1212, "cooler": 1213, "today": 1214, "depth": 1215, "bouquets": 1216, "working": 1217, "pushing": 1218, "stroller": 1219, "mannequin": 1220, "rocky": 1221, "sleeveless": 1222, "partly": 1223, "boats": 1224, "skillet": 1225, "parsley": 1226, "tasty": 1227, "tissues": 1228, "gull": 1229, "angry": 1230, "stickers": 1231, "cube": 1232, "cage": 1233, "maybe": 1234, "stormy": 1235, "pier": 1236, "cabin": 1237, "leopards": 1238, "penguins": 1239, "bandana": 1240, "grassy": 1241, "wrapped": 1242, "workers": 1243, "farm": 1244, "dad": 1245, "female": 1246, "kept": 1247, "strawberries": 1248, "shrubs": 1249, "adult": 1250, "garage": 1251, "ladle": 1252, "monitors": 1253, "stones": 1254, "bath": 1255, "tub": 1256, "speakers": 1257, "kneeling": 1258, "leafy": 1259, "coming": 1260, "jets": 1261, "cookers": 1262, "rides": 1263, "robot": 1264, "dumpsters": 1265, "before": 1266, "charger": 1267, "tangerine": 1268, "soldiers": 1269, "babies": 1270, "pineapple": 1271, "tied": 1272, "artichokes": 1273, "tinted": 1274, "staircase": 1275, "suvs": 1276, "shield": 1277, "numbers": 1278, "almond": 1279, "shore": 1280, "vans": 1281, "toast": 1282, "pancake": 1283, "guitars": 1284, "microphones": 1285, "hippo": 1286, "baseballs": 1287, "sour": 1288, "herbs": 1289, "drivers": 1290, "cloudless": 1291, "bagels": 1292, "mixers": 1293, "toasters": 1294, "drapes": 1295, "desktop": 1296, "muffin": 1297, "hang": 1298, "hugging": 1299, "noodles": 1300, "grilled": 1301, "license": 1302, "play": 1303, "plays": 1304, "buckets": 1305, "office": 1306, "omelette": 1307, "mound": 1308, "character": 1309, "dispensers": 1310, "signs": 1311, "sad": 1312, "sunlit": 1313, "broom": 1314, "rooms": 1315, "kitchens": 1316, "panel": 1317, "calf": 1318, "log": 1319, "dishwashers": 1320, "lemons": 1321, "skyscraper": 1322, "chef": 1323, "styrofoam": 1324, "veggies": 1325, "tag": 1326, "conditioner": 1327, "ottoman": 1328, "past": 1329, "cement": 1330, "cobblestone": 1331, "melted": 1332, "armchairs": 1333, "toolboxes": 1334, "puddle": 1335, "vines": 1336, "pomegranate": 1337, "swimmers": 1338, "harbor": 1339, "real": 1340, "sunny": 1341, "canes": 1342, "grinders": 1343, "machines": 1344, "flip": 1345, "flop": 1346, "coca": 1347, "cola": 1348, "coke": 1349, "diapers": 1350, "cardboard": 1351, "doughnut": 1352, "ceiling": 1353, "barrier": 1354, "shuttle": 1355, "shut": 1356, "kicking": 1357, "library": 1358, "sun": 1359, "village": 1360, "lawn": 1361, "patio": 1362, "surrounded": 1363, "artwork": 1364, "magazine": 1365, "fallen": 1366, "bartenders": 1367, "students": 1368, "jackets": 1369, "french": 1370, "throws": 1371, "surfers": 1372, "pears": 1373, "curved": 1374, "dock": 1375, "rose": 1376, "plaid": 1377, "ribs": 1378, "broken": 1379, "antique": 1380, "cookbooks": 1381, "head": 1382, "wheelchairs": 1383, "octagonal": 1384, "persons": 1385, "desert": 1386, "chinese": 1387, "leaf": 1388, "cheesecake": 1389, "binders": 1390, "hitting": 1391, "suv": 1392, "navy": 1393, "cauliflower": 1394, "grill": 1395, "taco": 1396, "power": 1397, "line": 1398, "lunch": 1399, "straight": 1400, "sparse": 1401, "grinder": 1402, "path": 1403, "childs": 1404, "puppy": 1405, "strawberry": 1406, "asian": 1407, "pasta": 1408, "guitar": 1409, "bull": 1410, "exiting": 1411, "eat": 1412, "high": 1413, "flies": 1414, "stuck": 1415, "flour": 1416, "fried": 1417, "smoothies": 1418, "outside": 1419, "purses": 1420, "jars": 1421, "jockey": 1422, "pond": 1423, "twig": 1424, "hairbrushes": 1425, "toaster": 1426, "powerlines": 1427, "lines": 1428, "melon": 1429, "helping": 1430, "serving": 1431, "mountains": 1432, "dinosaur": 1433, "decoration": 1434, "paintings": 1435, "butterfly": 1436, "display": 1437, "glowing": 1438, "bakers": 1439, "shoppers": 1440, "batteries": 1441, "extinguishers": 1442, "carts": 1443, "bandages": 1444, "ceramic": 1445, "canisters": 1446, "hamburger": 1447, "weapon": 1448, "frog": 1449, "visitors": 1450, "fresh": 1451, "ahead": 1452, "breakfast": 1453, "balcony": 1454, "railroad": 1455, "ostrich": 1456, "meters": 1457, "deck": 1458, "mane": 1459, "bouquet": 1460, "tone": 1461, "visible": 1462, "spray": 1463, "alongside": 1464, "caucasian": 1465, "pushed": 1466, "chalkboard": 1467, "vintage": 1468, "propeller": 1469, "reaching": 1470, "between": 1471, "fan": 1472, "coach": 1473, "seal": 1474, "hotdog": 1475, "officer": 1476, "decorative": 1477, "doughnuts": 1478, "crispy": 1479, "fathers": 1480, "taking": 1481, "wild": 1482, "meter": 1483, "toothpastes": 1484, "incomplete": 1485, "calculator": 1486, "cloud": 1487, "lion": 1488, "bedspread": 1489, "preserver": 1490, "casseroles": 1491, "horn": 1492, "cupcakes": 1493, "beautiful": 1494, "porcelain": 1495, "delicious": 1496, "bedroom": 1497, "chess": 1498, "monster": 1499, "pool": 1500, "appetizers": 1501, "khaki": 1502, "muffins": 1503, "contain": 1504, "castle": 1505, "beak": 1506, "piled": 1507, "shiny": 1508, "eyeglasses": 1509, "pencil": 1510, "school": 1511, "shark": 1512, "audience": 1513, "toasted": 1514, "hurdle": 1515, "less": 1516, "healthy": 1517, "cookies": 1518, "cross": 1519, "snakes": 1520, "dinosaurs": 1521, "glazed": 1522, "card": 1523, "intersection": 1524, "mixed": 1525, "stares": 1526, "briefcase": 1527, "along": 1528, "chickens": 1529, "donkeys": 1530, "label": 1531, "graffiti": 1532, "stir": 1533, "fry": 1534, "sofas": 1535, "petting": 1536, "falling": 1537, "width": 1538, "carriages": 1539, "jeeps": 1540, "snake": 1541, "statues": 1542, "feeder": 1543, "celery": 1544, "powerline": 1545, "goats": 1546, "letter": 1547, "cords": 1548, "lie": 1549, "hoses": 1550, "dome": 1551, "closet": 1552, "creamy": 1553, "cooker": 1554, "bronze": 1555, "clay": 1556, "height": 1557, "preparing": 1558, "skiers": 1559, "hairy": 1560, "electric": 1561, "streetlight": 1562, "earring": 1563, "cherries": 1564, "monkeys": 1565, "stool": 1566, "ambulance": 1567, "berries": 1568, "back": 1569, "observable": 1570, "shelter": 1571, "pineapples": 1572, "seen": 1573, "tourists": 1574, "sweatshirt": 1575, "ramekin": 1576, "gun": 1577, "match": 1578, "sausages": 1579, "wool": 1580, "cigars": 1581, "pigs": 1582, "plugged": 1583, "into": 1584, "figurines": 1585, "hammers": 1586, "ride": 1587, "comb": 1588, "fish": 1589, "lambs": 1590, "painted": 1591, "drawing": 1592, "rainbow": 1593, "new": 1594, "bushy": 1595, "butterflies": 1596, "skyscrapers": 1597, "towers": 1598, "drum": 1599, "rain": 1600, "mouses": 1601, "stacked": 1602, "mangoes": 1603, "licking": 1604, "lean": 1605, "crosses": 1606, "dugout": 1607, "beef": 1608, "homemade": 1609, "smoke": 1610, "steam": 1611, "watermelon": 1612, "temple": 1613, "pockets": 1614, "market": 1615, "coconuts": 1616, "toward": 1617, "peacock": 1618, "candy": 1619, "asphalt": 1620, "dolphin": 1621, "crowd": 1622, "skate": 1623, "maroon": 1624, "crabs": 1625, "mothers": 1626, "sticky": 1627, "notes": 1628, "cyclist": 1629, "nuts": 1630, "oceans": 1631, "rivers": 1632, "hard": 1633, "tangerines": 1634, "chopsticks": 1635, "chop": 1636, "sticks": 1637, "hotel": 1638, "footballs": 1639, "fluffy": 1640, "robe": 1641, "low": 1642, "wiis": 1643, "whisk": 1644, "lime": 1645, "garlic": 1646, "slice": 1647, "plantains": 1648, "blocks": 1649, "fishermen": 1650, "athletes": 1651, "heels": 1652, "graze": 1653, "carries": 1654, "frogs": 1655, "dolphins": 1656, "milk": 1657, "swing": 1658, "swings": 1659, "orchid": 1660, "palms": 1661, "lighthouse": 1662, "hedge": 1663, "bunched": 1664, "unhealthy": 1665, "camel": 1666, "article": 1667, "stew": 1668, "sons": 1669, "jockeys": 1670, "his": 1671, "brushing": 1672, "teeth": 1673, "mounted": 1674, "skatepark": 1675, "spiders": 1676, "folded": 1677, "crowns": 1678, "chasing": 1679, "bandage": 1680, "following": 1681, "mousepad": 1682, "military": 1683, "bald": 1684, "taxis": 1685, "croissants": 1686, "motorcyclist": 1687, "smiling": 1688, "wigs": 1689, "selling": 1690, "cucumbers": 1691, "kiwi": 1692, "peach": 1693, "symbol": 1694, "figure": 1695, "surfer": 1696, "crown": 1697, "watches": 1698, "spots": 1699, "ladies": 1700, "remotes": 1701, "garlands": 1702, "holder": 1703, "notebooks": 1704, "dragging": 1705, "burrito": 1706, "binder": 1707, "aluminum": 1708, "eggplants": 1709, "growing": 1710, "grows": 1711, "combs": 1712, "minivan": 1713, "vine": 1714, "porch": 1715, "photographer": 1716, "waste": 1717, "baskets": 1718, "radiators": 1719, "magnet": 1720, "crane": 1721, "runway": 1722, "projectors": 1723, "antenna": 1724, "antelope": 1725, "burner": 1726, "about": 1727, "hit": 1728, "bookshelves": 1729, "processor": 1730, "stairs": 1731, "spread": 1732, "macaroni": 1733, "waffle": 1734, "nut": 1735, "spatulas": 1736, "papaya": 1737, "tacoes": 1738, "reflecting": 1739, "scrambled": 1740, "steak": 1741, "wheelchair": 1742, "sailboats": 1743, "lighthouses": 1744, "crumbs": 1745, "served": 1746, "smelling": 1747, "penguin": 1748, "bunny": 1749, "watermelons": 1750, "blossom": 1751, "packet": 1752, "battery": 1753, "spice": 1754, "swimsuit": 1755, "gummy": 1756, "celeries": 1757, "walnut": 1758, "loaves": 1759, "snails": 1760, "papayas": 1761, "police": 1762, "arent": 1763, "tofu": 1764, "points": 1765, "pull": 1766, "unpaved": 1767, "mozzarella": 1768, "gifts": 1769, "broth": 1770, "plush": 1771, "bronwy": 1772, "museum": 1773, "bagel": 1774, "cane": 1775, "silk": 1776, "medicine": 1777, "tablet": 1778, "ethnic": 1779, "group": 1780, "topping": 1781, "beverages": 1782, "knit": 1783, "tossing": 1784, "cranberry": 1785, "projector": 1786, "brass": 1787, "dip": 1788, "artificial": 1789, "handmade": 1790, "lemonade": 1791, "squirrels": 1792, "grape": 1793, "roast": 1794, "flatbreads": 1795, "biker": 1796, "professional": 1797, "leafless": 1798, "typing": 1799, "carried": 1800, "chandelier": 1801, "collars": 1802, "wallpaper": 1803, "carry": 1804, "fountain": 1805, "boiled": 1806, "seasoned": 1807, "grabbing": 1808, "rainy": 1809, "beaches": 1810, "gardens": 1811, "shirts": 1812, "yogurt": 1813, "lush": 1814, "action": 1815, "swans": 1816, "bombs": 1817, "breast": 1818, "company": 1819, "edge": 1820, "evergreen": 1821, "walkway": 1822, "hardwood": 1823, "doorway": 1824, "powdered": 1825, "icing": 1826, "marker": 1827, "pasture": 1828, "sunflower": 1829, "mattresses": 1830, "factory": 1831, "cherry": 1832, "kettles": 1833, "employees": 1834, "sacks": 1835, "hung": 1836, "attached": 1837, "scrub": 1838, "brushes": 1839, "bathing": 1840, "dressers": 1841, "skater": 1842, "floats": 1843, "controllers": 1844, "chefs": 1845, "wolf": 1846, "tight": 1847, "dumplings": 1848, "pretty": 1849, "pandas": 1850, "fake": 1851, "portable": 1852, "granola": 1853, "canoes": 1854, "student": 1855, "cables": 1856, "entrance": 1857, "worn": 1858, "licks": 1859, "rolled": 1860, "shampoo": 1861, "biscuits": 1862, "cuts": 1863, "alert": 1864, "paint": 1865, "grow": 1866, "dishes": 1867, "parrots": 1868, "chili": 1869, "crooked": 1870, "sinks": 1871, "cliff": 1872, "helicopter": 1873, "saucers": 1874, "foil": 1875, "electronic": 1876, "splashing": 1877, "alcohol": 1878, "parmesan": 1879, "dolls": 1880, "snowsuit": 1881, "paved": 1882, "poodle": 1883, "cd": 1884, "unopened": 1885, "towards": 1886, "cut": 1887, "crumpled": 1888, "gadgets": 1889, "cheeseburger": 1890, "games": 1891, "toolbox": 1892, "buoy": 1893, "gown": 1894, "read": 1895, "soldier": 1896, "underwear": 1897, "outfits": 1898, "balding": 1899, "approach": 1900, "receipt": 1901, "roadside": 1902, "patterned": 1903, "erase": 1904, "boards": 1905, "stare": 1906, "draperies": 1907, "policemen": 1908, "vendor": 1909, "bracelets": 1910, "towing": 1911, "ottomen": 1912, "roll": 1913, "grouped": 1914, "guiding": 1915, "almonds": 1916, "fans": 1917, "bison": 1918, "ambulances": 1919, "lit": 1920, "ugly": 1921, "rifle": 1922, "strollers": 1923, "radios": 1924, "dotted": 1925, "urinals": 1926, "lipsticks": 1927, "wetsuits": 1928, "worker": 1929, "pear": 1930, "hairbrush": 1931, "customers": 1932, "roasted": 1933, "brownie": 1934, "fashioned": 1935, "jeep": 1936, "father": 1937, "angled": 1938, "shaded": 1939, "biting": 1940, "pigeon": 1941, "condiments": 1942, "floral": 1943, "peaches": 1944, "chandeliers": 1945, "pancakes": 1946, "glaze": 1947, "cowboys": 1948, "fly": 1949, "castles": 1950, "classroom": 1951, "parents": 1952, "slicing": 1953, "island": 1954, "kissing": 1955, "juicy": 1956, "cyclists": 1957, "rests": 1958, "cookbook": 1959, "whipped": 1960, "eggplant": 1961, "daughter": 1962, "mom": 1963, "kangaroo": 1964, "peas": 1965, "traveling": 1966, "notepad": 1967, "pork": 1968, "unripe": 1969, "snowboarders": 1970, "stage": 1971, "dinner": 1972, "horns": 1973, "meats": 1974, "copper": 1975, "farmer": 1976, "performer": 1977, "displayed": 1978, "drive": 1979, "tigers": 1980, "drape": 1981, "artichoke": 1982, "armor": 1983, "pianoes": 1984, "pump": 1985, "lips": 1986, "polo": 1987, "vendors": 1988, "cigar": 1989, "hamburgers": 1990, "observing": 1991, "bikini": 1992, "commercial": 1993, "avocadoes": 1994, "tortilla": 1995, "making": 1996, "jump": 1997, "spices": 1998, "sharks": 1999, "stump": 2000, "cracker": 2001, "marshmallow": 2002, "sniffing": 2003, "vitamins": 2004, "pajamas": 2005, "shrimp": 2006, "masks": 2007, "seaweed": 2008, "held": 2009, "washing": 2010, "whales": 2011, "cable": 2012, "wines": 2013, "weight": 2014, "rhino": 2015, "coin": 2016, "having": 2017, "meeting": 2018, "work": 2019, "marshmallows": 2020, "peanut": 2021, "raisins": 2022, "chips": 2023, "xbox": 2024, "wedding": 2025, "poodles": 2026, "dragonfly": 2027, "closets": 2028, "lego": 2029, "steamed": 2030, "close": 2031, "bricks": 2032, "bulb": 2033, "lizards": 2034, "opener": 2035, "limes": 2036, "salmon": 2037, "chopstick": 2038, "parent": 2039, "face": 2040, "they": 2041, "guacamole": 2042, "draped": 2043, "buoys": 2044, "drums": 2045, "trumpets": 2046, "blind": 2047, "parks": 2048, "fixture": 2049, "massive": 2050, "leaving": 2051, "raincoat": 2052, "weeds": 2053, "honey": 2054, "antelopes": 2055, "flakes": 2056, "arched": 2057, "art": 2058, "background": 2059, "eagle": 2060, "crystal": 2061, "ipods": 2062, "farmers": 2063, "grater": 2064, "cotton": 2065, "ramekins": 2066, "nutella": 2067, "burgundy": 2068, "daisy": 2069, "earphones": 2070, "flat": 2071, "dough": 2072, "octopus": 2073, "mangos": 2074, "hippoes": 2075, "shopper": 2076, "dense": 2077, "opening": 2078, "woven": 2079, "designed": 2080, "wallets": 2081, "industrial": 2082, "costume": 2083, "geese": 2084, "puffy": 2085, "straw": 2086, "satellite": 2087, "dvds": 2088, "cabs": 2089, "upside": 2090, "alligators": 2091, "hillside": 2092, "plain": 2093, "connected": 2094, "waffles": 2095, "owls": 2096, "rolls": 2097, "gift": 2098, "irregular": 2099, "grazes": 2100, "gentleman": 2101, "oil": 2102, "warm": 2103, "pointing": 2104, "palaces": 2105, "pumpkin": 2106, "blurry": 2107, "register": 2108, "those": 2109, "champagne": 2110, "blackberries": 2111, "digital": 2112, "chalkboards": 2113, "dragons": 2114, "puppies": 2115, "ships": 2116, "diaper": 2117, "octopodes": 2118, "cinnamon": 2119, "adjusting": 2120, "crowded": 2121, "brownies": 2122, "cluttered": 2123, "shining": 2124, "alligator": 2125, "terminal": 2126, "dragonflies": 2127, "zucchini": 2128, "oak": 2129, "hammer": 2130, "collared": 2131, "run": 2132, "glossy": 2133, "gourds": 2134, "approaching": 2135, "reflective": 2136, "manhole": 2137, "router": 2138, "bite": 2139, "flops": 2140, "bent": 2141, "boar": 2142, "pistachios": 2143, "staplers": 2144, "uniforms": 2145, "popcorn": 2146, "athlete": 2147, "sushi": 2148, "dream": 2149, "herd": 2150, "mesh": 2151, "wagons": 2152, "shuttles": 2153, "trying": 2154, "catch": 2155, "tablets": 2156, "leave": 2157, "loaf": 2158, "mango": 2159, "rotten": 2160, "football": 2161, "cabins": 2162, "come": 2163, "owl": 2164, "powder": 2165, "hits": 2166, "utility": 2167, "enclosing": 2168, "gadget": 2169, "zucchinis": 2170, "tractors": 2171, "curled": 2172, "reach": 2173, "pocket": 2174, "goal": 2175, "kisses": 2176, "magnets": 2177, "faces": 2178, "wooded": 2179, "town": 2180, "bamboo": 2181, "pastel": 2182, "raspberry": 2183, "parrot": 2184, "mooses": 2185, "beets": 2186, "edged": 2187, "vertical": 2188, "bunnies": 2189, "smoking": 2190, "lead": 2191, "herb": 2192, "climbing": 2193, "mixing": 2194, "hummus": 2195, "flatbread": 2196, "bedding": 2197, "wildflowers": 2198, "scattered": 2199, "shredded": 2200, "bunch": 2201, "whisks": 2202, "blank": 2203, "sunglasses": 2204, "framed": 2205, "blenders": 2206, "race": 2207, "moves": 2208, "raisin": 2209, "bone": 2210, "hilltop": 2211, "talks": 2212, "pea": 2213, "bulls": 2214, "shapes": 2215, "upholstered": 2216, "turkey": 2217, "sleep": 2218, "pale": 2219, "wait": 2220, "mousess": 2221, "carton": 2222, "browned": 2223, "lobster": 2224, "placed": 2225, "pushes": 2226, "cactus": 2227, "son": 2228, "eiffel": 2229, "roses": 2230, "swimmer": 2231, "smoothie": 2232, "block": 2233, "flowered": 2234, "tortillas": 2235, "picking": 2236, "fog": 2237, "mailboxes": 2238, "ethnicity": 2239, "gorilla": 2240, "harbors": 2241, "mall": 2242, "boulders": 2243, "counters": 2244, "spiky": 2245, "nike": 2246, "dips": 2247, "garment": 2248, "meadow": 2249, "notepads": 2250, "packed": 2251, "grapefruits": 2252, "bathrooms": 2253, "horizontal": 2254, "nests": 2255, "dryers": 2256, "tunnel": 2257, "follows": 2258, "photographers": 2259, "cutters": 2260, "scaffolding": 2261, "cellphones": 2262, "movies": 2263, "ginger": 2264, "pilots": 2265, "minivans": 2266, "flavor": 2267, "day": 2268, "eagles": 2269, "heaters": 2270, "obstacle": 2271, "clip": 2272, "reflect": 2273, "chopped": 2274, "seals": 2275, "pencils": 2276, "seagulls": 2277, "lab": 2278, "babys": 2279, "peak": 2280, "pillowcases": 2281, "covers": 2282, "drapery": 2283, "spear": 2284, "hearts": 2285, "cards": 2286, "chained": 2287, "earrings": 2288, "bug": 2289, "serve": 2290, "fall": 2291, "toothpicks": 2292, "calves": 2293, "squatting": 2294, "uneven": 2295, "waiter": 2296, "cappuccino": 2297, "centers": 2298, "snoopy": 2299, "across": 2300, "aliens": 2301, "sticking": 2302, "surround": 2303, "messy": 2304, "switch": 2305, "melons": 2306, "package": 2307, "sells": 2308, "double": 2309, "decker": 2310, "elmo": 2311, "stained": 2312, "dull": 2313, "lily": 2314, "thermometer": 2315, "unhappy": 2316, "toothpaste": 2317, "waving": 2318, "backyard": 2319, "climbs": 2320, "drawings": 2321, "climb": 2322, "pita": 2323, "peel": 2324, "cleaning": 2325, "twigs": 2326, "employee": 2327, "muddy": 2328, "lounge": 2329, "push": 2330, "feta": 2331, "fuzzy": 2332, "protective": 2333, "jewelry": 2334, "halved": 2335, "churches": 2336, "hotels": 2337, "volleyball": 2338, "vinegar": 2339, "garnish": 2340, "locomotive": 2341, "unlit": 2342, "away": 2343, "shirtless": 2344, "graters": 2345, "family": 2346, "bites": 2347, "tires": 2348, "atop": 2349, "logs": 2350, "fancy": 2351, "wok": 2352, "mitt": 2353, "padded": 2354, "avocados": 2355, "bedspreads": 2356, "crusty": 2357, "snack": 2358, "wired": 2359, "decorations": 2360, "christmas": 2361, "help": 2362, "helps": 2363, "support": 2364, "kangaroos": 2365, "sniffs": 2366, "sniff": 2367, "nest": 2368, "cheetahs": 2369, "going": 2370, "hallways": 2371, "stores": 2372, "kiwis": 2373, "kiosk": 2374, "their": 2375, "surrounds": 2376, "tilted": 2377, "vanilla": 2378, "leopard": 2379, "approaches": 2380, "overhead": 2381, "ruffled": 2382, "storage": 2383, "necklaces": 2384, "coral": 2385, "guns": 2386, "swords": 2387, "things": 2388, "mayonnaise": 2389, "turtle": 2390, "steep": 2391, "eaten": 2392, "skin": 2393, "pies": 2394, "textured": 2395, "chains": 2396, "comes": 2397, "pouring": 2398, "chickpeas": 2399, "wardrobes": 2400, "buying": 2401, "stack": 2402, "touch": 2403, "ravioli": 2404, "cathedral": 2405, "assorted": 2406, "seasonings": 2407, "pecans": 2408, "lizard": 2409, "appliances": 2410, "blooming": 2411, "snail": 2412, "pretzels": 2413, "trailers": 2414, "coaches": 2415, "bubbles": 2416, "conditioners": 2417, "boarding": 2418, "getting": 2419, "skaters": 2420, "burnt": 2421, "suits": 2422, "drainer": 2423, "consoles": 2424, "funny": 2425, "waitress": 2426, "lobsters": 2427, "palace": 2428, "packages": 2429, "tags": 2430, "gorillas": 2431, "taller": 2432, "litter": 2433, "observes": 2434, "pillars": 2435, "damaged": 2436, "pretzel": 2437, "pets": 2438, "peoples": 2439, "point": 2440, "crocodile": 2441, "float": 2442, "salon": 2443, "potted": 2444, "cockpit": 2445, "mustache": 2446, "foreign": 2447, "buy": 2448, "sealed": 2449, "washes": 2450, "strong": 2451, "mature": 2452, "kiss": 2453, "rusty": 2454, "pudding": 2455, "decorating": 2456, "beyond": 2457, "lined": 2458, "pet": 2459, "rest": 2460, "planes": 2461, "gravy": 2462, "loose": 2463, "stadium": 2464, "trumpet": 2465, "ornate": 2466, "toiletries": 2467, "sharp": 2468, "price": 2469, "laughing": 2470, "performers": 2471, "bomb": 2472, "sail": 2473, "condiment": 2474, "birthday": 2475, "powerful": 2476, "followed": 2477, "blossoms": 2478, "shorter": 2479, "kimono": 2480, "camels": 2481, "cheesecakes": 2482, "entrees": 2483, "catches": 2484, "larger": 2485, "picks": 2486, "hug": 2487, "dusty": 2488, "shaggy": 2489, "gentlemen": 2490, "homes": 2491, "deers": 2492, "chargers": 2493, "shops": 2494, "heel": 2495, "casserole": 2496, "figures": 2497, "gooses": 2498, "cabbages": 2499, "seeds": 2500, "elderly": 2501, "cathedrals": 2502, "baker": 2503, "anchovies": 2504, "reads": 2505, "coarse": 2506, "tuna": 2507, "flamingo": 2508, "forward": 2509, "cylindrical": 2510, "ornamental": 2511, "follow": 2512, "talk": 2513, "greasy": 2514, "seafood": 2515, "policemans": 2516, "auditorium": 2517, "hugs": 2518, "texture": 2519, "patchy": 2520, "beet": 2521, "blowing": 2522, "skillets": 2523, "opaque": 2524, "rooftop": 2525, "hangar": 2526, "diced": 2527, "abandoned": 2528, "formal": 2529, "daughters": 2530, "locomotives": 2531, "translucent": 2532, "transparent": 2533, "immature": 2534, "ivory": 2535, "team": 2536, "temples": 2537, "frosted": 2538, "whale": 2539, "rolling": 2540, "pins": 2541, "plier": 2542, "burritoes": 2543, "meatballs": 2544, "oversized": 2545, "keypad": 2546, "oatmeal": 2547, "guests": 2548, "chews": 2549, "rifles": 2550, "neat": 2551, "pumpkins": 2552, "backwards": 2553, "milkshakes": 2554, "turtles": 2555, "milkshake": 2556, "wireless": 2557, "waterfall": 2558, "paneled": 2559, "faded": 2560, "scarce": 2561, "abundant": 2562, "shaking": 2563, "hands": 2564, "barns": 2565, "courtyard": 2566, "smell": 2567, "smells": 2568, "appetizer": 2569, "farms": 2570, "sleeps": 2571, "wrinkly": 2572, "moose": 2573, "throw": 2574, "cook": 2575, "garages": 2576, "lots": 2577, "roadway": 2578, "photographing": 2579, "raised": 2580, "goes": 2581, "crumbled": 2582, "tying": 2583, "moustache": 2584, "muscular": 2585, "tattoos": 2586, "balance": 2587, "balances": 2588, "pantry": 2589, "accessory": 2590, "exits": 2591, "turquoise": 2592, "pomegranates": 2593, "handbags": 2594, "characters": 2595, "pointy": 2596, "shaving": 2597, "sturdy": 2598, "touches": 2599, "amusement": 2600, "dresses": 2601, "fireplaces": 2602, "supermarket": 2603, "spraying": 2604, "chases": 2605, "yolk": 2606, "legs": 2607, "illuminated": 2608, "cleans": 2609, "tiger": 2610, "processors": 2611, "docked": 2612, "sliding": 2613, "observe": 2614, "regular": 2615, "elevated": 2616, "deserts": 2617, "burgers": 2618, "rustic": 2619, "bubble": 2620, "enter": 2621, "entree": 2622, "neon": 2623, "legoes": 2624, "seed": 2625, "shadowed": 2626, "shaped": 2627, "wading": 2628, "exit": 2629, "herding": 2630, "genders": 2631, "swims": 2632, "pistachio": 2633, "enters": 2634, "pouches": 2635, "busy": 2636, "instruments": 2637, "fishing": 2638, "smiles": 2639, "fine": 2640, "oreo": 2641, "weed": 2642, "wash": 2643, "longer": 2644, "elevator": 2645, "frozen": 2646, "bee": 2647, "breaking": 2648, "ornament": 2649, "mexican": 2650, "miniature": 2651, "works": 2652, "cheetah": 2653, "factories": 2654, "cheeseburgers": 2655, "grated": 2656, "vinyl": 2657, "unoccupied": 2658, "rippled": 2659, "burning": 2660, "openers": 2661, "theater": 2662, "go": 2663, "laces": 2664, "gym": 2665, "fenced": 2666, "touchpad": 2667, "try": 2668, "pitchers": 2669, "tropical": 2670, "serves": 2671, "engineer": 2672, "tabby": 2673, "written": 2674, "iced": 2675, "bedrooms": 2676, "did": 2677, "forested": 2678, "jagged": 2679, "apartments": 2680, "spears": 2681, "guide": 2682, "beads": 2683, "weathered": 2684, "groceries": 2685, "move": 2686, "curious": 2687, "drawn": 2688, "chase": 2689, "sell": 2690, "quilted": 2691, "goods": 2692, "elmoes": 2693, "zoos": 2694, "sunflowers": 2695, "toss": 2696, "bathtubs": 2697, "buys": 2698, "sewn": 2699, "crisp": 2700, "jumps": 2701, "poses": 2702, "clumped": 2703, "arm": 2704, "frying": 2705, "blood": 2706, "visitor": 2707, "grabs": 2708, "smile": 2709, "cast": 2710, "shadow": 2711, "polished": 2712, "chubby": 2713, "calico": 2714, "being": 2715, "oriental": 2716, "inflatable": 2717, "vacua": 2718, "adjusts": 2719, "adjust": 2720, "oblong": 2721, "steaks": 2722, "strings": 2723, "floppy": 2724, "pills": 2725, "restroom": 2726, "lilies": 2727, "logoes": 2728, "swamp": 2729, "pick": 2730, "sleepy": 2731, "basketball": 2732, "pin": 2733, "herds": 2734, "chipped": 2735, "supplies": 2736, "hidden": 2737, "caramel": 2738, "tiled": 2739, "fields": 2740, "waits": 2741, "lower": 2742, "tangled": 2743, "torn": 2744, "feathered": 2745, "disposable": 2746, "lick": 2747, "twisted": 2748, "pose": 2749, "upwards": 2750, "sheer": 2751, "airports": 2752, "stations": 2753, "suspended": 2754, "barren": 2755, "cracked": 2756, "higher": 2757, "them": 2758, "inflated": 2759, "prepares": 2760, "half": 2761, "speckled": 2762, "pesto": 2763, "trimmed": 2764, "overgrown": 2765, "upward": 2766, "sweaters": 2767, "vast": 2768, "bugs": 2769, "cushioned": 2770, "pours": 2771, "carved": 2772, "wades": 2773, "splashes": 2774, "chew": 2775, "mowed": 2776, "neck": 2777, "buss": 2778, "downward": 2779, "skates": 2780, "rugged": 2781, "fudge": 2782, "reaches": 2783, "ostrichs": 2784, "were": 2785, "reflects": 2786, "gourd": 2787, "slanted": 2788, "sconce": 2789, "rippling": 2790, "cafeteria": 2791, "cactuss": 2792, "wispy": 2793, "headphone": 2794, "showers": 2795, "leads": 2796, "shaved": 2797, "curvy": 2798, "buffet": 2799, "ponds": 2800, "forests": 2801, "woks": 2802, "markers": 2803, "meatballss": 2804, "antennas": 2805, "lobby": 2806, "sideways": 2807, "multi": 2808, "tow": 2809, "grasss": 2810, "orchard": 2811, "elongated": 2812, "cordless": 2813, "fluorescent": 2814, "upper": 2815, "chased": 2816, "erasers": 2817, "hollow": 2818, "bigger": 2819, "hibiscus": 2820, "mint": 2821, "outlets": 2822, "winter": 2823, "typical": 2824, "cooks": 2825, "helicopters": 2826, "sculpted": 2827, "offices": 2828, "shines": 2829, "restaurants": 2830, "turbine": 2831, "gets": 2832, "fishs": 2833, "packaged": 2834, "beetle": 2835, "analog": 2836, "downwards": 2837, "deciduous": 2838, "patched": 2839, "prepare": 2840, "lighted": 2841, "spiral": 2842, "flamingos": 2843, "photographs": 2844, "ornaments": 2845, "melting": 2846, "smaller": 2847, "cricket": 2848, "waitresses": 2849, "outdoor": 2850, "aircrafts": 2851, "pour": 2852, "gentlemans": 2853, "complete": 2854, "foot": 2855, "decorates": 2856, "falls": 2857, "menus": 2858, "vacant": 2859, "tosses": 2860, "crossed": 2861, "tries": 2862, "pokemon": 2863, "pikachu": 2864, "wave": 2865, "manicured": 2866, "led": 2867, "simple": 2868, "mouth": 2869, "pub": 2870, "attic": 2871, "batting": 2872, "blow": 2873, "blows": 2874, "orchids": 2875, "music": 2876, "hoof": 2877, "directional": 2878, "barbed": 2879, "calfs": 2880, "shingled": 2881, "corded": 2882, "sporting": 2883, "equipment": 2884, "multicolored": 2885, "wade": 2886, "within": 2887, "weapons": 2888, "pitas": 2889, "rusted": 2890, "marinas": 2891, "cities": 2892, "villages": 2893, "beaded": 2894, "intricate": 2895, "citrus": 2896, "braided": 2897, "carpeted": 2898, "breakable": 2899, "shine": 2900, "domed": 2901, "bunches": 2902, "urban": 2903, "guides": 2904, "misty": 2905, "winding": 2906, "roman": 2907, "womens": 2908, "exterior": 2909, "recessed": 2910, "hazy": 2911, "dangling": 2912, "pikachus": 2913, "libraries": 2914, "decorate": 2915, "foamy": 2916, "abstract": 2917, "tongue": 2918, "spinning": 2919, "vibrant": 2920, "checked": 2921, "knotted": 2922, "crouched": 2923, "diamond": 2924, "paw": 2925, "grab": 2926, "rimmed": 2927, "got": 2928, "sold": 2929, "piercing": 2930, "pokemons": 2931, "peeling": 2932}, 29]
================================================
FILE: openvqa/datasets/gqa/eval/gqa_eval.py
================================================
# --------------------------------------------------------
# OpenVQA
# Written by Yuhao Cui https://github.com/cuiyuhao1996
# --------------------------------------------------------
from collections import defaultdict
from tqdm import tqdm
import os.path
import glob
import json
class GQAEval:
def __init__(self, __C, result_eval_file, ques_file_path, choices_path=None, EVAL_CONSISTENCY=False):
##### Files Loading
##########################################################################################
# self.question_path = __C.QUESTION_PATH[__C.SPLIT[__C.RUN_MODE]]
# self.val_choices_path = __C.EVAL_PATH['val_choices']
# self.prediction_path = __C.EVAL_PATH['tmp'] + 'result_run_' + __C.VERSION + '.json'
# # Load scene graphs
# print("Loading scene graphs...")
# scenes = self.loadFile(args.scenes.format(tier=args.tier))
# Load questions
print("Loading questions...")
questions = self.loadFile(ques_file_path)
# Load choices
choices = None
if choices_path is not None:
print("Loading choices...")
choices = self.loadFile(choices_path)
# Load predictions and turn them into a dictionary
print("Loading predictions...")
self.predictions = self.loadFile(result_eval_file)
self.predictions = {p["questionId"]: p["prediction"] for p in self.predictions}
# Make sure all question have predictions
for qid in questions:
if (qid not in self.predictions) and (EVAL_CONSISTENCY or questions[qid]["isBalanced"]):
print("no prediction for question {}. Please add prediction for all questions.".format(qid))
raise Exception("missing predictions")
self.scores = {
"accuracy": [], # list of accuracies per question (1 if correct else 0). Will be averaged ultimately.
"binary": [], # list of accuracies per a binary question (1 if correct else 0). Will be averaged ultimately.
"open": [], # list of accuracies per an open question (1 if correct else 0). Will be averaged ultimately.
"validity": [], # list of validity per question (1 if valid else 0).
"plausibility": [], # list of plausibility per question (1 if plausible else 0).
"consistency": [], # list of consistency scores for entailed questions.
"accuracyPerStructuralType": defaultdict(list), # list of question accuracies for each structural type (e.g. compare, logic questions).
"accuracyPerSemanticType": defaultdict(list), # list of question accuracies for each semantic type (e.g. questions about an object, an attribute, a relation).
"accuracyPerLength": defaultdict(list), # list of question accuracies per question's word number.
"accuracyPerSteps": defaultdict(list), # list of question accuracies per question's reasoning length (steps number).
"grounding": [] # list of grounding scores for each question.
}
# Initialize golden and predicted histograms per each question group. Used to compute the distribution metric.
self.dist = {
"gold": defaultdict(lambda: defaultdict(int)),
"predicted": defaultdict(lambda: defaultdict(int))
}
##### Main score computation
##########################################################################################
# Loop over the questions and compute mterics
for qid, question in tqdm(questions.items()):
gold = question["answer"]
predicted = self.predictions[qid]
self.correct = (predicted == gold)
score = self.toScore(self.correct)
wordsNum = self.getWordsNum(question)
stepsNum = self.getStepsNum(question)
# Compute scores over the balanced dataset (more robust against cheating by making educated guesses)
if question["isBalanced"]:
# Update accuracy
self.scores["accuracy"].append(score)
self.scores["accuracyPerLength"][wordsNum].append(score)
self.scores["accuracyPerSteps"][stepsNum].append(score)
self.scores["accuracyPerStructuralType"][question["types"]["structural"]].append(score)
self.scores["accuracyPerSemanticType"][question["types"]["semantic"]].append(score)
answerType = "open" if question["types"]["structural"] == "query" else "binary"
self.scores[answerType].append(score)
if choices_path is not None:
# Update validity score
valid = self.belongs(predicted, choices[qid]["valid"], question)
self.scores["validity"].append(self.toScore(valid))
# Update plausibility score
plausible = self.belongs(predicted, choices[qid]["plausible"], question)
self.scores["plausibility"].append(self.toScore(plausible))
# Update histograms for gold and predicted answers
globalGroup = question["groups"]["global"]
if globalGroup is not None:
self.dist["gold"][globalGroup][gold] += 1
self.dist["predicted"][globalGroup][predicted] += 1
if EVAL_CONSISTENCY:
# Compute consistency (for entailed questions)
self.updateConsistency(qid, question, questions)
# Compute distribution score
self.scores["distribution"] = self.chiSquare(self.dist["gold"], self.dist["predicted"]) / 100
# Average scores over all questions (in the balanced dataset) and print scores
metrics = [
"binary",
"open",
"accuracy",
"consistency",
"validity",
"plausibility",
"grounding",
"distribution"
]
detailedMetrics = [
("accuracyPerStructuralType", "Accuracy / structural type"),
("accuracyPerSemanticType", "Accuracy / semantic type"),
("accuracyPerSteps", "Accuracy / steps number"),
("accuracyPerLength", "Accuracy / words number")
]
subMetrics = {
"attr": "attribute",
"cat": "category",
"global": "scene",
"obj": "object",
"rel": "relation"
}
# average
for k in metrics:
if isinstance(self.scores[k], list):
self.scores[k] = self.avg(self.scores[k]) * 100
for k, _ in detailedMetrics:
for t in self.scores[k]:
self.scores[k][t] = self.avg(self.scores[k][t]) * 100, len(self.scores[k][t])
self.result_string = []
self.detail_result_string = []
# print
# print("")
for m in metrics:
# skip grounding and consistency scores if not requested
if m == "grounding":
continue
if m == "consistency" and not EVAL_CONSISTENCY:
continue
if m == "validity" and choices_path is None:
continue
if m == "plausibility" and choices_path is None:
continue
self.result_string.append("{title}: {score:.2f}{suffix}".format(title=m.capitalize(), score=self.scores[m],
suffix=" (lower is better)" if m == "distribution" else "%"))
# print score
# print("{title}: {score:.2f}{suffix}".format(title=m.capitalize(), score=self.scores[m],
# suffix=" (lower is better)" if m == "distribution" else "%"))
for m, mPrintName in detailedMetrics:
# print("")
# self.detail_result_string.append('\n')
# print metric title
# print("{}:".format(mPrintName))
self.detail_result_string.append("{}:".format(mPrintName))
for t in sorted(list(self.scores[m].keys())):
# set sub-metric title
tName = t
if isinstance(self.scores[k], list):
tName = subMetrics.get(t, t).capitalize()
self.detail_result_string.append(" {title}: {score:.2f}{suffix} ({amount} questions)".format(title=tName,
score=self.scores[m][t][0], suffix="%",
amount=self.scores[m][t][1]))
# # print score
# print(" {title}: {score:.2f}{suffix} ({amount} questions)".format(title=tName,
# score=self.scores[m][t][0], suffix="%",
# amount=self.scores[m][t][1]))
def get_str_result(self):
return self.result_string, self.detail_result_string
def loadFile(self, name):
# load standard json file
if os.path.isfile(name):
with open(name) as file:
data = json.load(file)
# load file chunks if too big
elif os.path.isdir(name.split(".")[0]):
data = {}
chunks = glob.glob('{dir}/{dir}_*.{ext}'.format(dir = name.split(".")[0], ext = name.split(".")[1]))
for chunk in chunks:
with open(chunk) as file:
data.update(json.load(file))
else:
raise Exception("Can't find {}".format(name))
return data
# book to float
def toScore(self, b):
return float(1 if b else 0)
# Compute average of a list
def avg(self, l):
if len(l) == 0:
return 0
return float(sum(l)) / len(l)
def wavg(self, l, w):
if sum(w) == 0:
return None
return float(sum(l[i] * w[i] for i in range(len(l)))) / sum(w)
##### Question lengths - words numbers and reasoning steps number
##########################################################################################
# Compute question length (words number)
def getWordsNum(self, question):
return len(question["question"].split())
# Compute number of reasoning steps (excluding the final "querying" step which doesn't increase effective reasoning length)
def getStepsNum(self, question):
return len([c for c in question["semantic"] if not (any([o in "{}: {}".format(c["operation"], c["argument"])
for o in ["exist", "query: name", "choose name"]]))])
# ##### Functions for question annotations
# ##########################################################################################
#
# # Utility function for converting question annotations string keys to slices
# def toSlice(self, strSlice):
# sliceLims = (int(n) for n in strSlice.split(':'))
# return apply(slice, sliceLims)
#
# # Utility function for converting question annotations string keys to indexes list:
# # "1" => [0]
# # "1:3" => [1, 2]
# # "4:9:2" => [4, 6, 8]
# def intsFromSlice(self, strSlice):
# slice_obj = get_slice_obj(slicearg)
# return (range(slice_obj.start or 0, slice_obj.stop or -1, slice_obj.step or 1))
##### Functions for validity and plausibility
##########################################################################################
def belongs(self, element, group, question):
# normalization ()
if "Common" in question["types"]["detailed"]:
group = ["color", "material", "shape"]
return element in group
##### Functions for consistency scores (for entailed questions ("inferred"))
##########################################################################################
def updateConsistency(self, questionId, question, questions):
inferredQuestions = [eid for eid in question["entailed"] if eid != questionId]
if self.correct and len(inferredQuestions) > 0:
cosnsitencyScores = []
for eid in inferredQuestions:
gold = questions[eid]["answer"]
predicted = self.predictions[eid]
score = self.toScore(predicted == gold)
cosnsitencyScores.append(score)
self.scores["consistency"].append(self.avg(cosnsitencyScores))
##### Functions for distribution score
##########################################################################################
# Compute chi square statistic of gold distribution vs predicted distribution,
# averaged over all question groups
def chiSquare(self, goldDist, predictedDist):
sumScore, sumOverall = 0, 0
for group in goldDist:
score, overall = 0, 0
for ans in goldDist[group]:
e = goldDist[group][ans]
o = predictedDist[group].get(ans, 0)
score += ((float(o - e) ** 2) / e)
overall += goldDist[group][ans]
sumScore += score * overall
sumOverall += overall
avgScore = float(sumScore) / sumOverall
return avgScore
================================================
FILE: openvqa/datasets/gqa/eval/result_eval.py
================================================
# --------------------------------------------------------
# OpenVQA
# Written by Yuhao Cui https://github.com/cuiyuhao1996
# --------------------------------------------------------
from openvqa.datasets.gqa.eval.gqa_eval import GQAEval
import json, pickle
import numpy as np
def eval(__C, dataset, ans_ix_list, pred_list, result_eval_file, ensemble_file, log_file, valid=False):
result_eval_file = result_eval_file + '.json'
qid_list = [qid for qid in dataset.qid_list]
ans_size = dataset.ans_size
result = [{
'questionId': qid_list[ix],
'prediction': dataset.ix_to_ans[str(ans_ix_list[ix])],
} for ix in range(len(qid_list))]
print('Save the result to file: {}'.format(result_eval_file))
json.dump(result, open(result_eval_file, 'w'))
if __C.TEST_SAVE_PRED:
print('Save the prediction vector to file: {}'.format(ensemble_file))
pred_list = np.array(pred_list).reshape(-1, ans_size)
result_pred = [{
'pred': pred_list[qix],
'qid': int(qid_list[qix])
} for qix in range(qid_list.__len__())]
pickle.dump(result_pred, open(ensemble_file, 'wb+'), protocol=-1)
if valid:
# create vqa object and vqaRes object
ques_file_path = __C.RAW_PATH[__C.DATASET][__C.SPLIT['val']]
choices_path = None
if __C.SPLIT['val'] + '_choices' in __C.RAW_PATH[__C.DATASET]:
choices_path = __C.RAW_PATH[__C.DATASET][__C.SPLIT['val'] + '_choices']
eval_gqa = GQAEval(__C, result_eval_file, ques_file_path, choices_path, EVAL_CONSISTENCY=False)
result_string, detail_result_string = eval_gqa.get_str_result()
print('Write to log file: {}'.format(log_file))
logfile = open(log_file, 'a+')
for result_string_ in result_string:
logfile.write(result_string_)
logfile.write('\n')
print(result_string_)
for detail_result_string_ in detail_result_string:
logfile.write(detail_result_string_)
logfile.write("\n")
logfile.write('\n')
logfile.close()
================================================
FILE: openvqa/datasets/gqa/gqa_loader.py
================================================
# --------------------------------------------------------
# OpenVQA
# Written by Yuhao Cui https://github.com/cuiyuhao1996
# --------------------------------------------------------
import numpy as np
import glob, json, re, en_vectors_web_lg
from openvqa.core.base_dataset import BaseDataSet
from openvqa.utils.ans_punct import prep_ans
class DataSet(BaseDataSet):
def __init__(self, __C):
super(DataSet, self).__init__()
self.__C = __C
# --------------------------
# ---- Raw data loading ----
# --------------------------
ques_dict_preread = {
'train': json.load(open(__C.RAW_PATH[__C.DATASET]['train'], 'r')),
'val': json.load(open(__C.RAW_PATH[__C.DATASET]['val'], 'r')),
'testdev': json.load(open(__C.RAW_PATH[__C.DATASET]['testdev'], 'r')),
'test': json.load(open(__C.RAW_PATH[__C.DATASET]['test'], 'r')),
}
# Loading all image paths
frcn_feat_path_list = glob.glob(__C.FEATS_PATH[__C.DATASET]['default-frcn'] + '/*.npz')
grid_feat_path_list = glob.glob(__C.FEATS_PATH[__C.DATASET]['default-grid'] + '/*.npz')
# Loading question word list
# stat_ques_dict = {
# **ques_dict_preread['train'],
# **ques_dict_preread['val'],
# **ques_dict_preread['testdev'],
# **ques_dict_preread['test'],
# }
# Loading answer word list
# stat_ans_dict = {
# **ques_dict_preread['train'],
# **ques_dict_preread['val'],
# **ques_dict_preread['testdev'],
# }
# Loading question and answer list
self.ques_dict = {}
split_list = __C.SPLIT[__C.RUN_MODE].split('+')
for split in split_list:
if split in ques_dict_preread:
self.ques_dict = {
**self.ques_dict,
**ques_dict_preread[split],
}
else:
self.ques_dict = {
**self.ques_dict,
**json.load(open(__C.RAW_PATH[__C.DATASET][split], 'r')),
}
# Define run data size
self.data_size = self.ques_dict.__len__()
print(' ========== Dataset size:', self.data_size)
# ------------------------
# ---- Data statistic ----
# ------------------------
# {image id} -> {image feature absolutely path}
self.iid_to_frcn_feat_path = self.img_feat_path_load(frcn_feat_path_list)
self.iid_to_grid_feat_path = self.img_feat_path_load(grid_feat_path_list)
# Loading dict: question dict -> question list
self.qid_list = list(self.ques_dict.keys())
# Tokenize
self.token_to_ix, self.pretrained_emb, max_token = self.tokenize('openvqa/datasets/gqa/dicts.json', __C.USE_GLOVE)
self.token_size = self.token_to_ix.__len__()
print(' ========== Question token vocab size:', self.token_size)
self.max_token = -1
if self.max_token == -1:
self.max_token = max_token
print('Max token length:', max_token, 'Trimmed to:', self.max_token)
# Answers statistic
self.ans_to_ix, self.ix_to_ans = self.ans_stat('openvqa/datasets/gqa/dicts.json')
self.ans_size = self.ans_to_ix.__len__()
print(' ========== Answer token vocab size:', self.ans_size)
print('Finished!')
print('')
def img_feat_path_load(self, path_list):
iid_to_path = {}
for ix, path in enumerate(path_list):
iid = path.split('/')[-1].split('.')[0]
iid_to_path[iid] = path
return iid_to_path
# def tokenize(self, stat_ques_dict, use_glove):
# token_to_ix = {
# 'PAD': 0,
# 'UNK': 1,
# 'CLS': 2,
# }
#
# spacy_tool = None
# pretrained_emb = []
# if use_glove:
# spacy_tool = en_vectors_web_lg.load()
# pretrained_emb.append(spacy_tool('PAD').vector)
# pretrained_emb.append(spacy_tool('UNK').vector)
# pretrained_emb.append(spacy_tool('CLS').vector)
#
# max_token = 0
# for qid in stat_ques_dict:
# ques = stat_ques_dict[qid]['question']
# words = re.sub(
# r"([.,'!?\"()*#:;])",
# '',
# ques.lower()
# ).replace('-', ' ').replace('/', ' ').split()
#
# if len(words) > max_token:
# max_token = len(words)
#
# for word in words:
# if word not in token_to_ix:
# token_to_ix[word] = len(token_to_ix)
# if use_glove:
# pretrained_emb.append(spacy_tool(word).vector)
#
# pretrained_emb = np.array(pretrained_emb)
#
# return token_to_ix, pretrained_emb, max_token
#
#
# def ans_stat(self, stat_ans_dict):
# ans_to_ix = {}
# ix_to_ans = {}
#
# for qid in stat_ans_dict:
# ans = stat_ans_dict[qid]['answer']
# ans = prep_ans(ans)
#
# if ans not in ans_to_ix:
# ix_to_ans[ans_to_ix.__len__()] = ans
# ans_to_ix[ans] = ans_to_ix.__len__()
#
# return ans_to_ix, ix_to_ans
def tokenize(self, json_file, use_glove):
token_to_ix, max_token = json.load(open(json_file, 'r'))[2:]
spacy_tool = None
if use_glove:
spacy_tool = en_vectors_web_lg.load()
pretrained_emb = []
for word in token_to_ix:
if use_glove:
pretrained_emb.append(spacy_tool(word).vector)
pretrained_emb = np.array(pretrained_emb)
return token_to_ix, pretrained_emb, max_token
def ans_stat(self, json_file):
ans_to_ix, ix_to_ans = json.load(open(json_file, 'r'))[:2]
return ans_to_ix, ix_to_ans
# ----------------------------------------------
# ---- Real-Time Processing Implementations ----
# ----------------------------------------------
def load_ques_ans(self, idx):
qid = self.qid_list[idx]
iid = self.ques_dict[qid]['imageId']
ques = self.ques_dict[qid]['question']
ques_ix_iter = self.proc_ques(ques, self.token_to_ix, max_token=self.max_token)
ans_iter = np.zeros(1)
if self.__C.RUN_MODE in ['train']:
# process answers
ans = self.ques_dict[qid]['answer']
ans_iter = self.proc_ans(ans, self.ans_to_ix)
return ques_ix_iter, ans_iter, iid
def load_img_feats(self, idx, iid):
frcn_feat = np.load(self.iid_to_frcn_feat_path[iid])
frcn_feat_iter = self.proc_img_feat(frcn_feat['x'], img_feat_pad_size=self.__C.FEAT_SIZE['gqa']['FRCN_FEAT_SIZE'][0])
grid_feat = np.load(self.iid_to_grid_feat_path[iid])
grid_feat_iter = grid_feat['x']
bbox_feat_iter = self.proc_img_feat(
self.proc_bbox_feat(
frcn_feat['bbox'],
(frcn_feat['height'], frcn_feat['width'])
),
img_feat_pad_size=self.__C.FEAT_SIZE['gqa']['BBOX_FEAT_SIZE'][0]
)
return frcn_feat_iter, grid_feat_iter, bbox_feat_iter
# ------------------------------------
# ---- Real-Time Processing Utils ----
# ------------------------------------
def proc_img_feat(self, img_feat, img_feat_pad_size):
if img_feat.shape[0] > img_feat_pad_size:
img_feat = img_feat[:img_feat_pad_size]
img_feat = np.pad(
img_feat,
((0, img_feat_pad_size - img_feat.shape[0]), (0, 0)),
mode='constant',
constant_values=0
)
return img_feat
def proc_bbox_feat(self, bbox, img_shape):
bbox_feat = np.zeros((bbox.shape[0], 4), dtype=np.float32)
bbox_feat[:, 0] = bbox[:, 0] / float(img_shape[1])
bbox_feat[:, 1] = bbox[:, 1] / float(img_shape[0])
bbox_feat[:, 2] = bbox[:, 2] / float(img_shape[1])
bbox_feat[:, 3] = bbox[:, 3] / float(img_shape[0])
return bbox_feat
def proc_ques(self, ques, token_to_ix, max_token):
ques_ix = np.zeros(max_token, np.int64)
words = re.sub(
r"([.,'!?\"()*#:;])",
'',
ques.lower()
).replace('-', ' ').replace('/', ' ').split()
for ix, word in enumerate(words):
if word in token_to_ix:
ques_ix[ix] = token_to_ix[word]
else:
ques_ix[ix] = token_to_ix['UNK']
if ix + 1 == max_token:
break
return ques_ix
def proc_ans(self, ans, ans_to_ix):
ans_ix = np.zeros(1, np.int64)
ans = prep_ans(ans)
ans_ix[0] = ans_to_ix[ans]
return ans_ix
================================================
FILE: openvqa/datasets/vqa/answer_dict.json
================================================
[{"net": 0, "pitcher": 1, "orange": 2, "yes": 3, "white": 4, "skiing": 5, "red": 6, "frisbee": 7, "brushing teeth": 8, "no": 9, "black and white": 10, "skateboard": 11, "1": 12, "blue": 13, "green": 14, "motorcycle": 15, "gray": 16, "2": 17, "purse": 18, "skis": 19, "poles": 20, "surfboard": 21, "dog": 22, "on": 23, "office": 24, "large": 25, "very big": 26, "laptop": 27, "vent": 28, "computer": 29, "black": 30, "bear": 31, "3": 32, "wii": 33, "glasses": 34, "tree": 35, "eating": 36, "log": 37, "5": 38, "raft": 39, "left": 40, "living room": 41, "pink": 42, "right": 43, "railing": 44, "grass": 45, "wire": 46, "10 years": 47, "knife": 48, "cake": 49, "banana": 50, "chef": 51, "vanilla": 52, "4": 53, "outdoor": 54, "mustard": 55, "bun": 56, "clouds": 57, "dock": 58, "brown": 59, "silver": 60, "refrigerator": 61, "square": 62, "teddy": 63, "elm": 64, "stripes": 65, "baseball": 66, "catcher": 67, "beer": 68, "bottom": 69, "north": 70, "nike": 71, "yellow and white": 72, "morning": 73, "elephant": 74, "red and white": 75, "propeller": 76, "tan": 77, "wall": 78, "rolex": 79, "clock": 80, "table": 81, "0": 82, "wood": 83, "christmas": 84, "spinach": 85, "thick": 86, "bag": 87, "leaves": 88, "necklace": 89, "6": 90, "bathroom": 91, "shower": 92, "towel": 93, "solid": 94, "referee": 95, "wilson": 96, "8:00": 97, "e": 98, "24": 99, "hat": 100, "grazing": 101, "sheep": 102, "10": 103, "tag": 104, "spanish": 105, "hot dog": 106, "plate": 107, "lunch": 108, "butter": 109, "peppers": 110, "onions": 111, "very": 112, "mayonnaise": 113, "mayo": 114, "sweet potato": 115, "pig": 116, "sweet": 117, "flowers": 118, "floral": 119, "yellow": 120, "window": 121, "7": 122, "pizza": 123, "car": 124, "": 125, "cargo": 126, "stairs": 127, "abstract": 128, "rug": 129, "baseball cap": 130, "texting": 131, "pole": 132, "crosswalk": 133, "nothing": 134, "urban": 135, "bus": 136, "light": 137, "afternoon": 138, "boat": 139, "cheese": 140, "paper": 141, "real": 142, "sun": 143, "birthday": 144, "words": 145, "inside": 146, "shadows": 147, "tomato": 148, "evergreen": 149, "100 feet": 150, "shingles": 151, "trees": 152, "building": 153, "hay": 154, "ski pole": 155, "patterned": 156, "walking": 157, "ice": 158, "laundry": 159, "pepsi": 160, "good": 161, "1:50": 162, "purple": 163, "13": 164, "africa": 165, "teddy bears": 166, "socks": 167, "giraffe": 168, "soccer": 169, "blue and yellow": 170, "zebras": 171, "cupcake": 172, "broccoli": 173, "soldier": 174, "parking lot": 175, "cows": 176, "herding": 177, "on table": 178, "fish": 179, "nightstand": 180, "50": 181, "overcast": 182, "cross": 183, "toaster oven": 184, "tile": 185, "11:55": 186, "red and yellow": 187, "nowhere": 188, "hair dryer": 189, "truck": 190, "11": 191, "people": 192, "rectangle": 193, "hot dogs": 194, "party": 195, "12:55": 196, "apron": 197, "kitchen": 198, "cooking": 199, "ring": 200, "1 way": 201, "stop": 202, "neither": 203, "many": 204, "female": 205, "brushing": 206, "tie": 207, "tennis racket": 208, "knife and fork": 209, "restaurant": 210, "cat": 211, "bed": 212, "sand": 213, "ocean": 214, "cold": 215, "kites": 216, "cumulus": 217, "standing": 218, "male": 219, "star": 220, "tracks": 221, "chocolate": 222, "round": 223, "fork and knife": 224, "yankees": 225, "pictures": 226, "dots": 227, "bird": 228, "parrot": 229, "red white and blue": 230, "man": 231, "metal": 232, "fence": 233, "snowboarding": 234, "pine": 235, "snow": 236, "shorts": 237, "swim": 238, "wine": 239, "brick": 240, "no parking": 241, "children": 242, "beef": 243, "phone": 244, "english": 245, "cell phone": 246, "pink and yellow": 247, "clear": 248, "watermelon": 249, "bedroom": 250, "fork": 251, "cow": 252, "rackets": 253, "tennis rackets": 254, "8": 255, "collar": 256, "tennis": 257, "1950s": 258, "playing tennis": 259, "skirt": 260, "30": 261, "polka dot": 262, "beach": 263, "horse": 264, "grill": 265, "african american": 266, "down": 267, "street": 268, "in air": 269, "sweater": 270, "yellow and blue": 271, "park": 272, "backyard": 273, "spectators": 274, "parasailing": 275, "31": 276, "river": 277, "55": 278, "shadow": 279, "winter": 280, "chicken": 281, "tea": 282, "evening": 283, "dusk": 284, "ski resort": 285, "helmet": 286, "penne": 287, "bench": 288, "resting": 289, "elephants": 290, "southwest": 291, "usa": 292, "cars": 293, "town": 294, "bananas": 295, "umbrella": 296, "container": 297, "woman": 298, "on counter": 299, "salad": 300, "striped": 301, "motel": 302, "vertical": 303, "oranges": 304, "hot sauce": 305, "bottle": 306, "juice": 307, "eyes": 308, "ground": 309, "backpack": 310, "black and yellow": 311, "forward": 312, "jackets": 313, "1 on right": 314, "green and yellow": 315, "playing baseball": 316, "riding": 317, "sitting": 318, "carrot": 319, "basket": 320, "seagull": 321, "ski poles": 322, "p": 323, "parking": 324, "street light": 325, "mets": 326, "strap": 327, "bike": 328, "riding bike": 329, "poodle": 330, "shoes": 331, "carpet": 332, "lettuce": 333, "food": 334, "1 foot": 335, "roses": 336, "mountains": 337, "scissors": 338, "camera": 339, "beige": 340, "beard": 341, "cutting": 342, "baby": 343, "tape": 344, "watch": 345, "never": 346, "taking picture": 347, "eggs": 348, "syrup": 349, "sandwich": 350, "water skiing": 351, "microphone": 352, "back": 353, "bears": 354, "donuts": 355, "w": 356, "sky": 357, "double decker": 358, "england": 359, "surfing": 360, "running": 361, "shirt": 362, "barn": 363, "weather vane": 364, "white and blue": 365, "fishing": 366, "bridge": 367, "los angeles": 368, "open": 369, "red sox": 370, "bat": 371, "plane": 372, "white and green": 373, "transportation": 374, "sunny": 375, "bus stop": 376, "city": 377, "brown and white": 378, "bicycle": 379, "crow": 380, "magazines": 381, "daisy": 382, "14": 383, "old": 384, "curtains": 385, "jumped": 386, "snowboard": 387, "dinosaur": 388, "racing": 389, "asphalt": 390, "court": 391, "plastic": 392, "circle": 393, "red and blue": 394, "zebra": 395, "12": 396, "biplane": 397, "shallow": 398, "brazil": 399, "logo": 400, "2:20": 401, "electric": 402, "night time": 403, "motion": 404, "toothbrushes": 405, "orange and white": 406, "66": 407, "spoon": 408, "toyota": 409, "tennis shoes": 410, "46": 411, "second": 412, "no 1": 413, "iphone": 414, "friend": 415, "apple": 416, "carnation": 417, "15": 418, "tiger": 419, "glove": 420, "airplane": 421, "bow": 422, "air france": 423, "passengers": 424, "tv": 425, "on building": 426, "3:55": 427, "victorian": 428, "steeple": 429, "happy": 430, "skateboarding": 431, "fruit": 432, "cutting board": 433, "cantaloupe": 434, "kiwi": 435, "sliced": 436, "heart": 437, "water": 438, "rainy": 439, "carrots": 440, "giraffes": 441, "eat": 442, "ramp": 443, "lab": 444, "field": 445, "horizontal": 446, "birds": 447, "home": 448, "shrimp": 449, "12 feet": 450, "girl": 451, "modern": 452, "turtle": 453, "dell": 454, "boots": 455, "sunglasses": 456, "black and orange": 457, "yellow and black": 458, "gloves": 459, "hp": 460, "desk": 461, "both": 462, "sign": 463, "on street": 464, "2000": 465, "cirrus": 466, "to dry": 467, "ceiling": 468, "fluorescent": 469, "up": 470, "9": 471, "boys": 472, "playing soccer": 473, "american": 474, "passenger": 475, "turn": 476, "palm": 477, "no train": 478, "wedding": 479, "branch": 480, "parrots": 481, "air force": 482, "on tracks": 483, "small": 484, "tank": 485, "dirty": 486, "france": 487, "honda": 488, "2.00": 489, "whale": 490, "vase": 491, "flying": 492, "professional": 493, "driving": 494, "tissue": 495, "protest": 496, "corona": 497, "for balance": 498, "twin": 499, "clothes": 500, "t shirt": 501, "window sill": 502, "wild": 503, "noon": 504, "caution": 505, "spring": 506, "raining": 507, "cane": 508, "school": 509, "windsurfing": 510, "parachute": 511, "black and red": 512, "25": 513, "background": 514, "toaster": 515, "planes": 516, "yellow and red": 517, "spatula": 518, "10:10": 519, "ivory": 520, "train": 521, "welcome": 522, "highway": 523, "off": 524, "on track": 525, "electricity": 526, "italy": 527, "dinner": 528, "sink": 529, "squares": 530, "5 ft": 531, "parked": 532, "store": 533, "dress": 534, "signs": 535, "meow": 536, "football": 537, "rugby": 538, "stainless steel": 539, "la": 540, "dirt": 541, "blue and white": 542, "klm": 543, "house": 544, "unknown": 545, "ford": 546, "reading": 547, "chair": 548, "mountain": 549, "alive": 550, "water skis": 551, "picture": 552, "parade": 553, "slippers": 554, "trailer": 555, "boating": 556, "holding it": 557, "shade": 558, "cloth": 559, "6:20": 560, "candle": 561, "hose": 562, "hand": 563, "3:25": 564, "on sidewalk": 565, "poster": 566, "downhill": 567, "68": 568, "reflection": 569, "summer": 570, "pickles": 571, "halloween": 572, "bats": 573, "london": 574, "zoo": 575, "surfer": 576, "racket": 577, "flickr": 578, "cutting hair": 579, "strawberries": 580, "mushroom": 581, "teddy bear": 582, "big": 583, "suitcase": 584, "veggie": 585, "pepper": 586, "houses": 587, "70": 588, "toshiba": 589, "triangle": 590, "boxes": 591, "photograph": 592, "smoke": 593, "engine": 594, "camel": 595, "sidewalk": 596, "left 1": 597, "red and green": 598, "4:35": 599, "on couch": 600, "candy": 601, "minnie mouse": 602, "homemade": 603, "mouse": 604, "box": 605, "movie": 606, "45": 607, "strawberry": 608, "fridge": 609, "full": 610, "vegetables": 611, "bright": 612, "play": 613, "remote": 614, "pond": 615, "savannah": 616, "celery": 617, "concrete": 618, "semi": 619, "dump": 620, "scania": 621, "safety": 622, "posing": 623, "fabric": 624, "laying": 625, "couch": 626, "blueberries": 627, "handle": 628, "pipe": 629, "stick": 630, "parmesan": 631, "steak": 632, "chain link": 633, "catch": 634, "barbed wire": 635, "mozzarella": 636, "soda": 637, "fire hydrant": 638, "cat food": 639, "pepperoni": 640, "lot": 641, "licking": 642, "red and black": 643, "clay": 644, "tennis court": 645, "jumping": 646, "potatoes": 647, "toothbrush": 648, "kite": 649, "not at all": 650, "flying kite": 651, "broken": 652, "black and silver": 653, "lap": 654, "outside": 655, "44": 656, "delta": 657, "greyhound": 658, "ring finger": 659, "talking on phone": 660, "bad": 661, "kettle": 662, "35": 663, "motorcycles": 664, "produce": 665, "comfort": 666, "steering wheel": 667, "18": 668, "humans": 669, "coffee": 670, "white and brown": 671, "fall": 672, "bread": 673, "cherry": 674, "4:30": 675, "flag": 676, "night": 677, "lamp": 678, "cucumber": 679, "can't see": 680, "porcelain": 681, "oval": 682, "museum": 683, "rain": 684, "sprinkles": 685, "20": 686, "kids": 687, "bracelet": 688, "sneakers": 689, "mask": 690, "mickey mouse": 691, "twins": 692, "very high": 693, "costume": 694, "cabbage": 695, "paint": 696, "lighting": 697, "young": 698, "air conditioner": 699, "wooden": 700, "board": 701, "someone": 702, "beets": 703, "16": 704, "day time": 705, "4 inches": 706, "lights": 707, "ladder": 708, "glass": 709, "ferris wheel": 710, "fries": 711, "steamed": 712, "shepherd": 713, "cotton": 714, "suit": 715, "goatee": 716, "on his head": 717, "print": 718, "happy birthday": 719, "forks": 720, "travel": 721, "maple": 722, "200": 723, "oil": 724, "jeans": 725, "can": 726, "chopsticks": 727, "on wall": 728, "construction": 729, "mack": 730, "36": 731, "chinese": 732, "moped": 733, "festival": 734, "gas": 735, "throwing": 736, "circus": 737, "wires": 738, "not possible": 739, "plates": 740, "sugar": 741, "in": 742, "women's": 743, "door": 744, "no man": 745, "volleyball": 746, "serving": 747, "ponytail": 748, "business": 749, "decoration": 750, "santa": 751, "flat": 752, "barrel": 753, "12:15": 754, "candles": 755, "atv": 756, "free": 757, "hair": 758, "waffle": 759, "ball": 760, "stop sign": 761, "wetsuit": 762, "very deep": 763, "swimsuit": 764, "green and black": 765, "foreground": 766, "stands": 767, "china airlines": 768, "flower": 769, "300": 770, "lobster": 771, "on bench": 772, "plaster": 773, "phones": 774, "sailboat": 775, "apples": 776, "road": 777, "recently": 778, "cones": 779, "cactus": 780, "rice": 781, "vegetarian": 782, "donut": 783, "ketchup": 784, "police": 785, "mirror": 786, "rock": 787, "meat": 788, "blinds": 789, "cell phones": 790, "china": 791, "rust": 792, "7:25": 793, "stone": 794, "vans": 795, "middle": 796, "eagle": 797, "9:30": 798, "ping pong": 799, "microwave": 800, "gmc": 801, "umbrellas": 802, "wrist": 803, "cuddling": 804, "laughing": 805, "boy": 806, "next to toilet": 807, "tabby": 808, "petting": 809, "south": 810, "40": 811, "name tag": 812, "checkered": 813, "name": 814, "slow": 815, "cardboard": 816, "windows": 817, "croissant": 818, "plain": 819, "cookie": 820, "on ground": 821, "low": 822, "water bottle": 823, "goggles": 824, "turkey": 825, "pull": 826, "shut": 827, "kite flying": 828, "bowl": 829, "smile": 830, "in bowl": 831, "bush": 832, "cloudy": 833, "top left": 834, "skateboarder": 835, "coca cola": 836, "pan": 837, "drinking": 838, "short": 839, "floor": 840, "thanksgiving": 841, "radio": 842, "drink": 843, "on toilet": 844, "bike rack": 845, "bleachers": 846, "train tracks": 847, "horses": 848, "far": 849, "top": 850, "toilet": 851, "in water": 852, "private": 853, "nature": 854, "checkers": 855, "commercial": 856, "stroller": 857, "power": 858, "stuffed animals": 859, "uniforms": 860, "japan": 861, "liquor": 862, "faucet": 863, "green and orange": 864, "corn": 865, "sub": 866, "white and yellow": 867, "mercedes": 868, "in sky": 869, "tarp": 870, "indian": 871, "counter": 872, "multicolored": 873, "polar": 874, "go": 875, "now": 876, "no number": 877, "swimming": 878, "bridle": 879, "cowboy": 880, "union station": 881, "salt and pepper": 882, "olives": 883, "pizza cutter": 884, "british airways": 885, "nighttime": 886, "domestic": 887, "trolley": 888, "australia": 889, "tiles": 890, "pug": 891, "wicker": 892, "british": 893, "us airways express": 894, "burton": 895, "christmas tree": 896, "napkin": 897, "writing": 898, "rocks": 899, "hello kitty": 900, "lacoste": 901, "gold": 902, "fan": 903, "skateboards": 904, "day": 905, "on floor": 906, "2008": 907, "dark": 908, "flying kites": 909, "rural": 910, "olympics": 911, "bmw": 912, "34": 913, "factory": 914, "denim": 915, "typing": 916, "for fun": 917, "steel": 918, "watching tv": 919, "chevron": 920, "driver": 921, "baggage claim": 922, "grapes": 923, "f": 924, "angels": 925, "roof": 926, "handlebars": 927, "train station": 928, "public": 929, "oak": 930, "sleeping": 931, "canada": 932, "on runway": 933, "air canada": 934, "on top": 935, "tired": 936, "blonde": 937, "cups": 938, "little": 939, "adidas": 940, "10 feet": 941, "white and gray": 942, "leaf": 943, "fisheye": 944, "forest": 945, "war": 946, "octagon": 947, "raspberry": 948, "helmets": 949, "united states": 950, "29": 951, "noodles": 952, "van": 953, "long": 954, "traveling": 955, "luggage": 956, "airport": 957, "single": 958, "pitching": 959, "dugout": 960, "garbage": 961, "in street": 962, "happiness": 963, "cigarette": 964, "on tower": 965, "antelope": 966, "graffiti": 967, "skating": 968, "on road": 969, "curved": 970, "red light": 971, "washington": 972, "ski lift": 973, "athletics": 974, "brace": 975, "squatting": 976, "catching": 977, "batter": 978, "batting": 979, "game": 980, "towards": 981, "33": 982, "sliding": 983, "makeup": 984, "japanese": 985, "person": 986, "pirates": 987, "plaid": 988, "rose": 989, "daytime": 990, "keyboard": 991, "surfboards": 992, "hummingbird": 993, "ollie": 994, "11:30": 995, "clock tower": 996, "5:55": 997, "san francisco": 998, "stopping": 999, "tags": 1000, "samsung": 1001, "computers": 1002, "cabinets": 1003, "talking": 1004, "cage": 1005, "asparagus": 1006, "5 years": 1007, "hanger": 1008, "adult": 1009, "rabbit": 1010, "empty": 1011, "softball": 1012, "1st": 1013, "playing": 1014, "chairs": 1015, "farm": 1016, "cross country": 1017, "dump truck": 1018, "women": 1019, "snowboarder": 1020, "tall": 1021, "monkey": 1022, "mantle": 1023, "fire": 1024, "books": 1025, "quilt": 1026, "cessna": 1027, "chandelier": 1028, "dunkin donuts": 1029, "beans": 1030, "relish": 1031, "no flag": 1032, "parking meter": 1033, "spots": 1034, "ducks": 1035, "sandals": 1036, "doughnut": 1037, "lighthouse": 1038, "yacht": 1039, "german shepherd": 1040, "in middle": 1041, "raw": 1042, "chain": 1043, "2 feet": 1044, "pedestal": 1045, "sauerkraut": 1046, "bagels": 1047, "mutt": 1048, "dog and cat": 1049, "race": 1050, "poor": 1051, "cat and dog": 1052, "station": 1053, "printer": 1054, "daisies": 1055, "front": 1056, "gravel": 1057, "rear": 1058, "grassy": 1059, "pigeons": 1060, "dogs": 1061, "in car": 1062, "life": 1063, "wii remotes": 1064, "suv": 1065, "leather": 1066, "bottom right": 1067, "peace": 1068, "facebook": 1069, "blanket": 1070, "fountain": 1071, "frisbees": 1072, "12:30": 1073, "am": 1074, "scooter": 1075, "going": 1076, "analog": 1077, "america": 1078, "pitbull": 1079, "relaxing": 1080, "paddle boarding": 1081, "white and pink": 1082, "shampoo": 1083, "alps": 1084, "ride": 1085, "side": 1086, "mane": 1087, "on desk": 1088, "on chair": 1089, "2012": 1090, "multi": 1091, "straight": 1092, "big ben": 1093, "closed": 1094, "frosted": 1095, "3 feet": 1096, "waves": 1097, "buoy": 1098, "life vest": 1099, "trash can": 1100, "medium": 1101, "boxer": 1102, "very tall": 1103, "yamaha": 1104, "sunlight": 1105, "hit ball": 1106, "dry": 1107, "coke": 1108, "gym": 1109, "orange and black": 1110, "center": 1111, "rope": 1112, "flip flops": 1113, "4th of july": 1114, "siamese": 1115, "crafts": 1116, "color": 1117, "italian": 1118, "playing frisbee": 1119, "skate park": 1120, "orange juice": 1121, "windowsill": 1122, "corgi": 1123, "thumb": 1124, "peanut butter": 1125, "pie": 1126, "toast": 1127, "no hat": 1128, "benches": 1129, "diamond": 1130, "blender": 1131, "avocado": 1132, "television": 1133, "speakers": 1134, "pony": 1135, "baseball field": 1136, "pavement": 1137, "sydney": 1138, "not there": 1139, "diamonds": 1140, "4 feet": 1141, "goalie": 1142, "soccer ball": 1143, "runway": 1144, "video game": 1145, "gaming": 1146, "casual": 1147, "green and white": 1148, "toilet brush": 1149, "working": 1150, "pickup": 1151, "girls": 1152, "remotes": 1153, "pasta": 1154, "hood": 1155, "braves": 1156, "skier": 1157, "motorola": 1158, "17": 1159, "b": 1160, "100": 1161, "diet coke": 1162, "hospital": 1163, "wagon": 1164, "milk": 1165, "ferry": 1166, "rainbow": 1167, "on bed": 1168, "toward": 1169, "1:30": 1170, "19": 1171, "security": 1172, "herself": 1173, "mercedes benz": 1174, "supreme": 1175, "thin": 1176, "platform": 1177, "gray and red": 1178, "thai": 1179, "storage": 1180, "thailand": 1181, "swan": 1182, "peach": 1183, "10:05": 1184, "dome": 1185, "chiquita": 1186, "2:00": 1187, "mountain dew": 1188, "23": 1189, "knives": 1190, "street sign": 1191, "on beach": 1192, "playing wii": 1193, "using laptop": 1194, "stickers": 1195, "yogurt": 1196, "on grass": 1197, "9:50": 1198, "9:45": 1199, "sweat": 1200, "gatorade": 1201, "umpire": 1202, "37": 1203, "transport": 1204, "desktop": 1205, "desserts": 1206, "main": 1207, "boston": 1208, "fell": 1209, "top right": 1210, "case": 1211, "asleep": 1212, "over": 1213, "9:55": 1214, "grapefruit": 1215, "breakfast": 1216, "headphones": 1217, "freight": 1218, "cup": 1219, "sweatband": 1220, "nobody": 1221, "lamps": 1222, "9:25": 1223, "scarf": 1224, "on fridge": 1225, "main st": 1226, "moving": 1227, "confused": 1228, "fresh": 1229, "kiting": 1230, "blue jay": 1231, "flats": 1232, "long time": 1233, "chihuahua": 1234, "ceramic": 1235, "mushrooms": 1236, "on plate": 1237, "human": 1238, "power lines": 1239, "hotel": 1240, "map": 1241, "earring": 1242, "boarding": 1243, "display": 1244, "warm": 1245, "napkins": 1246, "brown and black": 1247, "broom": 1248, "basketball": 1249, "papers": 1250, "holding baby": 1251, "sad": 1252, "kickstand": 1253, "60": 1254, "shoulder": 1255, "sleep": 1256, "footprints": 1257, "tunnel": 1258, "1990": 1259, "hats": 1260, "6 inches": 1261, "ham": 1262, "bacon": 1263, "church": 1264, "53": 1265, "pineapple": 1266, "at camera": 1267, "red bull": 1268, "pilot": 1269, "tattoo": 1270, "work": 1271, "polar bear": 1272, "taking off": 1273, "website": 1274, "22": 1275, "4:00": 1276, "coffee maker": 1277, "fast": 1278, "fur": 1279, "rubber": 1280, "tongs": 1281, "german": 1282, "germany": 1283, "3 inches": 1284, "toy": 1285, "3:20": 1286, "calm": 1287, "pots": 1288, "balloons": 1289, "fruits": 1290, "9:20": 1291, "drawer": 1292, "oven": 1293, "soup": 1294, "stove": 1295, "heels": 1296, "wind": 1297, "island": 1298, "blood": 1299, "leg": 1300, "theater": 1301, "tennis racquet": 1302, "21": 1303, "gothic": 1304, "2:35": 1305, "wii remote": 1306, "turning": 1307, "20 feet": 1308, "pink and black": 1309, "ears": 1310, "fun": 1311, "wreath": 1312, "to right": 1313, "child": 1314, "fly": 1315, "head": 1316, "drywall": 1317, "shorter": 1318, "pier": 1319, "feeding giraffe": 1320, "in vase": 1321, "burger": 1322, "easter": 1323, "onion": 1324, "uniform": 1325, "remote control": 1326, "guitar": 1327, "time": 1328, "verizon": 1329, "tomatoes": 1330, "ship": 1331, "tulips": 1332, "glaze": 1333, "on suitcase": 1334, "tent": 1335, "1:45": 1336, "market": 1337, "bnsf": 1338, "bandana": 1339, "still": 1340, "don't know": 1341, "piano": 1342, "mouth": 1343, "run": 1344, "sparrow": 1345, "throw": 1346, "lines": 1347, "vest": 1348, "1950": 1349, "jet": 1350, "sepia": 1351, "2015": 1352, "busy": 1353, "lighter": 1354, "dessert": 1355, "bending": 1356, "75": 1357, "finch": 1358, "pastries": 1359, "outdoors": 1360, "bakery": 1361, "clean": 1362, "ipod": 1363, "tablecloth": 1364, "cigarettes": 1365, "looking at phone": 1366, "in front": 1367, "food truck": 1368, "face": 1369, "swinging": 1370, "safari": 1371, "500": 1372, "volkswagen": 1373, "2010": 1374, "shape": 1375, "shelves": 1376, "riding horses": 1377, "2016": 1378, "behind bus": 1379, "towels": 1380, "lemon": 1381, "straw": 1382, "bamboo": 1383, "5 feet": 1384, "hardwood": 1385, "oregon": 1386, "schnauzer": 1387, "organic": 1388, "h": 1389, "kid": 1390, "meter": 1391, "61": 1392, "charging": 1393, "bald": 1394, "caucasian": 1395, "man on left": 1396, "stand": 1397, "27": 1398, "dining room": 1399, "sandwiches": 1400, "32": 1401, "apartment": 1402, "tower": 1403, "virgin": 1404, "out": 1405, "white and red": 1406, "2:05": 1407, "i don't know": 1408, "chains": 1409, "legs": 1410, "age": 1411, "goats": 1412, "s": 1413, "congratulations": 1414, "dresser": 1415, "camper": 1416, "half": 1417, "silverware": 1418, "decorative": 1419, "hawaiian": 1420, "petting horse": 1421, "wheel": 1422, "florida": 1423, "reds": 1424, "washington dc": 1425, "moon": 1426, "conference": 1427, "screen": 1428, "controller": 1429, "robin": 1430, "men": 1431, "protection": 1432, "roll": 1433, "harley davidson": 1434, "coal": 1435, "mustache": 1436, "smiling": 1437, "pedestrians": 1438, "88": 1439, "me": 1440, "tray": 1441, "males": 1442, "monitor": 1443, "bell": 1444, "landscape": 1445, "club": 1446, "toothpick": 1447, "seagulls": 1448, "bowtie": 1449, "lake": 1450, "steam": 1451, "surf": 1452, "baseball glove": 1453, "blinders": 1454, "woods": 1455, "stuffed": 1456, "sunbathing": 1457, "shearing": 1458, "dad": 1459, "mixer": 1460, "pot": 1461, "blending": 1462, "identification": 1463, "owl": 1464, "wine glass": 1465, "on bike": 1466, "billabong": 1467, "new york": 1468, "yarn": 1469, "tube": 1470, "tennis ball": 1471, "2:55": 1472, "ice cream": 1473, "chevrolet": 1474, "shirt and tie": 1475, "taking selfie": 1476, "blue and green": 1477, "he isn't": 1478, "cutting cake": 1479, "east": 1480, "setting": 1481, "brewers": 1482, "riding bikes": 1483, "7 eleven": 1484, "stars": 1485, "jockey": 1486, "jacket": 1487, "standing still": 1488, "book": 1489, "gray and white": 1490, "pen": 1491, "red white blue": 1492, "above": 1493, "alaska": 1494, "tongue": 1495, "feathers": 1496, "k": 1497, "camping": 1498, "pasture": 1499, "corner": 1500, "away": 1501, "ski": 1502, "texas": 1503, "fire truck": 1504, "sailboats": 1505, "jump": 1506, "walk": 1507, "spray paint": 1508, "loading": 1509, "united": 1510, "1000": 1511, "brushing his teeth": 1512, "roman numerals": 1513, "garlic": 1514, "surprise": 1515, "3rd": 1516, "first": 1517, "side of road": 1518, "dodgers": 1519, "airplanes": 1520, "unsure": 1521, "russian": 1522, "wet": 1523, "skyscraper": 1524, "5 star": 1525, "brushing her teeth": 1526, "blankets": 1527, "natural": 1528, "across street": 1529, "smartphone": 1530, "duck": 1531, "sausage": 1532, "paris": 1533, "newspaper": 1534, "pants": 1535, "spices": 1536, "pillow": 1537, "to left": 1538, "snowboards": 1539, "colgate": 1540, "on elephant": 1541, "string": 1542, "horns": 1543, "2:40": 1544, "men's": 1545, "cobblestone": 1546, "regular": 1547, "staring": 1548, "28": 1549, "barber shop": 1550, "linoleum": 1551, "grind": 1552, "cut": 1553, "x": 1554, "above sink": 1555, "above stove": 1556, "dishes": 1557, "dalmatian": 1558, "watching": 1559, "glazed": 1560, "5:25": 1561, "j": 1562, "messy": 1563, "wallet": 1564, "tuna": 1565, "toasted": 1566, "grilled": 1567, "french": 1568, "green and blue": 1569, "sunflowers": 1570, "to catch frisbee": 1571, "wool": 1572, "sprint": 1573, "no grass": 1574, "cabinet": 1575, "shell": 1576, "foil": 1577, "bottles": 1578, "bar": 1579, "king": 1580, "paper towels": 1581, "friends": 1582, "beagle": 1583, "school bus": 1584, "laptops": 1585, "snowing": 1586, "cement": 1587, "pc": 1588, "accident": 1589, "stuffed animal": 1590, "wakeboard": 1591, "balance": 1592, "in suitcase": 1593, "white and black": 1594, "nikon": 1595, "cleats": 1596, "on sink": 1597, "pool": 1598, "mom": 1599, "downtown": 1600, "asian": 1601, "heater": 1602, "bathing": 1603, "193": 1604, "against wall": 1605, "canopy": 1606, "jungle": 1607, "berries": 1608, "military": 1609, "pickle": 1610, "clams": 1611, "seafood": 1612, "in box": 1613, "boats": 1614, "tables": 1615, "lizard": 1616, "lemonade": 1617, "m": 1618, "soft": 1619, "illinois": 1620, "country": 1621, "for sale": 1622, "arm": 1623, "listening": 1624, "curly": 1625, "play tennis": 1626, "hands": 1627, "cereal": 1628, "blue and red": 1629, "robe": 1630, "around neck": 1631, "red and silver": 1632, "soap": 1633, "trains": 1634, "throwing frisbee": 1635, "smoking": 1636, "india": 1637, "headband": 1638, "not very": 1639, "westin": 1640, "serve": 1641, "bicycles": 1642, "can't tell": 1643, "to catch ball": 1644, "visibility": 1645, "ana": 1646, "reins": 1647, "rodeo": 1648, "boot": 1649, "on horse": 1650, "12:35": 1651, "riding motorcycle": 1652, "mexico": 1653, "mother": 1654, "african": 1655, "left and right": 1656, "button": 1657, "earrings": 1658, "blackberry": 1659, "cell": 1660, "10:00": 1661, "harness": 1662, "pillows": 1663, "vegetable": 1664, "tablet": 1665, "fern": 1666, "cats": 1667, "golden retriever": 1668, "goat": 1669, "tractor": 1670, "valentine's day": 1671, "hearts": 1672, "khaki": 1673, "man on right": 1674, "mcdonald's": 1675, "player": 1676, "arriving": 1677, "husky": 1678, "on skateboard": 1679, "vases": 1680, "coat": 1681, "beanie": 1682, "coming": 1683, "granite": 1684, "shopping cart": 1685, "it's raining": 1686, "sports": 1687, "leash": 1688, "balls": 1689, "blurry": 1690, "baseball bat": 1691, "team": 1692, "mango": 1693, "mug": 1694, "eiffel tower": 1695, "worms": 1696, "trash": 1697, "robot": 1698, "show": 1699, "terrier": 1700, "painting": 1701, "rooster": 1702, "42": 1703, "jones": 1704, "state farm": 1705, "balloon": 1706, "trunk": 1707, "coach": 1708, "t": 1709, "playing game": 1710, "fireplace": 1711, "behind clouds": 1712, "uphill": 1713, "motocross": 1714, "sony": 1715, "magazine": 1716, "kitesurfing": 1717, "catching frisbee": 1718, "catch frisbee": 1719, "bud light": 1720, "drive": 1721, "fighting": 1722, "1 on left": 1723, "very old": 1724, "hallway": 1725, "lexus": 1726, "wii controller": 1727, "9:15": 1728, "fast food": 1729, "5:45": 1730, "catholic": 1731, "muffin": 1732, "traffic light": 1733, "band": 1734, "button up": 1735, "grocery": 1736, "shelf": 1737, "2:25": 1738, "honey": 1739, "plants": 1740, "oars": 1741, "foggy": 1742, "nathan's": 1743, "cord": 1744, "yard": 1745, "48": 1746, "donut shop": 1747, "chimney": 1748, "calico": 1749, "suits": 1750, "sideways": 1751, "animals": 1752, "black and blue": 1753, "bikini": 1754, "photographer": 1755, "700": 1756, "queen": 1757, "1:00": 1758, "12:05": 1759, "horseback riding": 1760, "awake": 1761, "bunny": 1762, "12:00": 1763, "continental": 1764, "flamingo": 1765, "rye": 1766, "family": 1767, "lots": 1768, "owner": 1769, "stew": 1770, "palm tree": 1771, "cruise ship": 1772, "56": 1773, "design": 1774, "ny": 1775, "far right": 1776, "tire": 1777, "younger": 1778, "biking": 1779, "at&t": 1780, "giants": 1781, "marshmallows": 1782, "caramel": 1783, "polo": 1784, "emirates": 1785, "salon": 1786, "focus": 1787, "on motorcycle": 1788, "magnets": 1789, "mat": 1790, "ivy": 1791, "cakes": 1792, "chrome": 1793, "bob": 1794, "asia": 1795, "graduation": 1796, "cauliflower": 1797, "in snow": 1798, "c": 1799, "rough": 1800, "vacation": 1801, "air": 1802, "windy": 1803, "victoria": 1804, "4:45": 1805, "trick": 1806, "coconut": 1807, "labrador": 1808, "on left": 1809, "yellow and green": 1810, "butterfly": 1811, "fake": 1812, "on napkin": 1813, "bricks": 1814, "wine glasses": 1815, "detroit": 1816, "man's": 1817, "parsley": 1818, "art": 1819, "subway": 1820, "wave": 1821, "placemat": 1822, "hydrant": 1823, "sofa": 1824, "pigeon": 1825, "riding elephant": 1826, "all": 1827, "branches": 1828, "plant": 1829, "to eat": 1830, "zucchini": 1831, "feta": 1832, "neon": 1833, "mouse pad": 1834, "cloud": 1835, "toilet paper": 1836, "pumpkin": 1837, "rowing": 1838, "toronto": 1839, "handicap": 1840, "seeds": 1841, "fly kite": 1842, "chicago": 1843, "marble": 1844, "frame": 1845, "150": 1846, "rocky": 1847, "give way": 1848, "sauce": 1849, "it's not": 1850, "control": 1851, "high chair": 1852, "playstation": 1853, "xbox": 1854, "not likely": 1855, "roman": 1856, "land": 1857, "1:35": 1858, "lifeguard": 1859, "on pizza": 1860, "size": 1861, "bull": 1862, "dandelions": 1863, "equestrian": 1864, "goose": 1865, "8 feet": 1866, "recessed": 1867, "statue": 1868, "index": 1869, "phillies": 1870, "strike": 1871, "mirrors": 1872, "pointing": 1873, "farmer": 1874, "collie": 1875, "motorbike": 1876, "lanes": 1877, "bikes": 1878, "biker": 1879, "arrows": 1880, "gas station": 1881, "logs": 1882, "smaller": 1883, "desert": 1884, "yield": 1885, "flags": 1886, "stool": 1887, "kitten": 1888, "doll": 1889, "daffodils": 1890, "letters": 1891, "dishwasher": 1892, "first base": 1893, "nuts": 1894, "2013": 1895, "persian": 1896, "swim trunks": 1897, "deep": 1898, "o": 1899, "doubles": 1900, "toothpicks": 1901, "in field": 1902, "wristband": 1903, "wheels": 1904, "baking": 1905, "4:15": 1906, "11:00": 1907, "ear": 1908, "2007": 1909, "51": 1910, "chevy": 1911, "using computer": 1912, "frog": 1913, "storm": 1914, "boogie board": 1915, "hungry": 1916, "by window": 1917, "ambulance": 1918, "pigtails": 1919, "audi": 1920, "microsoft": 1921, "on man": 1922, "cannot tell": 1923, "stained glass": 1924, "hugging": 1925, "laying down": 1926, "3:00": 1927, "taxi": 1928, "pedestrian": 1929, "landing": 1930, "numbers": 1931, "38": 1932, "stones": 1933, "on tree": 1934, "clocks": 1935, "new": 1936, "picnic": 1937, "fog": 1938, "buffalo": 1939, "under armour": 1940, "cocker spaniel": 1941, "orioles": 1942, "no sign": 1943, "telling time": 1944, "bags": 1945, "golden gate": 1946, "cover": 1947, "castle": 1948, "canoe": 1949, "selfie": 1950, "cream": 1951, "floating": 1952, "indoor": 1953, "antique": 1954, "aluminum": 1955, "silver and black": 1956, "cast iron": 1957, "peas": 1958, "sun hat": 1959, "on right": 1960, "swiss": 1961, "flour": 1962, "under sink": 1963, "fashion": 1964, "fedora": 1965, "shells": 1966, "1 hour": 1967, "puppy": 1968, "in stands": 1969, "not here": 1970, "motor": 1971, "thousands": 1972, "120": 1973, "sail": 1974, "butt": 1975, "mexican": 1976, "dead end": 1977, "paddle": 1978, "bathing suit": 1979, "shop": 1980, "onion rings": 1981, "boxing": 1982, "birthday cake": 1983, "chalk": 1984, "scenery": 1985, "style": 1986, "nissan": 1987, "sticker": 1988, "on rack": 1989, "1 4": 1990, "woman's": 1991, "surprised": 1992, "north face": 1993, "squash": 1994, "not sure": 1995, "email": 1996, "spotted": 1997, "seat": 1998, "himself": 1999, "circles": 2000, "san diego": 2001, "kia": 2002, "mattress": 2003, "obama": 2004, "lamb": 2005, "american flag": 2006, "climbing": 2007, "skull and crossbones": 2008, "roast beef": 2009, "visor": 2010, "herd": 2011, "double": 2012, "52": 2013, "high": 2014, "stagecoach": 2015, "cart": 2016, "feeding": 2017, "eaten": 2018, "cone": 2019, "11:15": 2020, "smoothie": 2021, "golf": 2022, "colorado": 2023, "electronics": 2024, "5:15": 2025, "bowling": 2026, "players": 2027, "ketchup and mustard": 2028, "styrofoam": 2029, "6 feet": 2030, "hawk": 2031, "cheddar": 2032, "12:28": 2033, "arabic": 2034, "12:25": 2035, "12:10": 2036, "shower curtain": 2037, "army": 2038, "salmon": 2039, "10:40": 2040, "hanging": 2041, "whole": 2042, "behind fence": 2043, "bars": 2044, "moss": 2045, "no dog": 2046, "traffic": 2047, "10:25": 2048, "r": 2049, "countryside": 2050, "machine": 2051, "directions": 2052, "cooked": 2053, "aa": 2054, "6:45": 2055, "4 way": 2056, "stripe": 2057, "brand": 2058, "baseball player": 2059, "bunk": 2060, "coleslaw": 2061, "fishing boat": 2062, "at table": 2063, "europe": 2064, "dead": 2065, "arch": 2066, "scrambled": 2067, "clothing": 2068, "closet": 2069, "egg": 2070, "suitcases": 2071, "indoors": 2072, "coffee pot": 2073, "tires": 2074, "lilies": 2075, "cafe": 2076, "9:35": 2077, "teal": 2078, "toothpaste": 2079, "in background": 2080, "tarmac": 2081, "painted": 2082, "sunset": 2083, "orange and yellow": 2084, "oar": 2085, "peaches": 2086, "zebra and giraffe": 2087, "ladybug": 2088, "20 ft": 2089, "sesame seeds": 2090, "hills": 2091, "2:30": 2092, "stucco": 2093, "tail": 2094, "couple": 2095, "kawasaki": 2096, "smooth": 2097, "powdered sugar": 2098, "pedestrian crossing": 2099, "french fries": 2100, "picnic table": 2101, "teeth": 2102, "ribbon": 2103, "saddle": 2104, "15 feet": 2105, "earbuds": 2106, "on train": 2107, "39": 2108, "curb": 2109, "tow": 2110, "shark": 2111, "white and orange": 2112, "6:25": 2113, "gravy": 2114, "fork and spoon": 2115, "pooping": 2116, "curtain": 2117, "lime": 2118, "skull": 2119, "crossing": 2120, "speed limit": 2121, "peacock": 2122, "boredom": 2123, "neck": 2124, "hit": 2125, "dragon": 2126, "tissues": 2127, "basil": 2128, "waving": 2129, "blue team": 2130, "rectangles": 2131, "helicopter": 2132, "mud": 2133, "us": 2134, "balcony": 2135, "red and gray": 2136, "firefighter": 2137, "sunflower": 2138, "wallpaper": 2139, "best buy": 2140, "11:20": 2141, "public market center": 2142, "seattle": 2143, "bookshelf": 2144, "looking": 2145, "1 inch": 2146, "harley": 2147, "urinal": 2148, "cartoon": 2149, "t shirt and jeans": 2150, "navy": 2151, "fedex": 2152, "rays": 2153, "deck": 2154, "coaster": 2155, "1:20": 2156, "50 feet": 2157, "4:20": 2158, "us open": 2159, "looking at camera": 2160, "600": 2161, "national express": 2162, "white house": 2163, "5:00": 2164, "jp morgan": 2165, "palm trees": 2166, "tub": 2167, "pens": 2168, "soldiers": 2169, "2 people": 2170, "animal": 2171, "speaker": 2172, "hamburger": 2173, "spaghetti": 2174, "green beans": 2175, "it isn't": 2176, "10:20": 2177, "buildings": 2178, "on shelf": 2179, "baseball uniform": 2180, "tiled": 2181, "orange and blue": 2182, "90": 2183, "north america": 2184, "arrow": 2185, "news": 2186, "tropicana": 2187, "formal": 2188, "in grass": 2189, "thumbs up": 2190, "clip": 2191, "gate": 2192, "tennis player": 2193, "lilac": 2194, "pastry": 2195, "nose": 2196, "pacifier": 2197, "11:35": 2198, "different teams": 2199, "cardinals": 2200, "exhaust": 2201, "hauling": 2202, "on tray": 2203, "bagel": 2204, "huge": 2205, "out of focus": 2206, "cook": 2207, "wheat": 2208, "photo": 2209, "ghost": 2210, "sedan": 2211, "qatar": 2212, "zig zag": 2213, "lanyard": 2214, "pink and white": 2215, "sesame": 2216, "space": 2217, "no clock": 2218, "warning": 2219, "snowy": 2220, "tater tots": 2221, "tropical": 2222, "grandfather": 2223, "mac": 2224, "magnet": 2225, "photoshop": 2226, "pajamas": 2227, "350": 2228, "casserole": 2229, "4:55": 2230, "pelican": 2231, "2009": 2232, "clydesdale": 2233, "tow truck": 2234, "belt": 2235, "west": 2236, "omelet": 2237, "heavy": 2238, "crown": 2239, "in corner": 2240, "hexagon": 2241, "mound": 2242, "iris": 2243, "g": 2244, "12:45": 2245, "2:15": 2246, "3:10": 2247, "drawing": 2248, "only": 2249, "little girl": 2250, "washing": 2251, "nokia": 2252, "windsor": 2253, "2 men": 2254, "parmesan cheese": 2255, "on woman": 2256, "freezer": 2257, "icing": 2258, "venice": 2259, "dairy": 2260, "several": 2261, "concentration": 2262, "3:15": 2263, "no smoking": 2264, "kayak": 2265, "frosting": 2266, "jetblue": 2267, "thoroughbred": 2268, "parakeet": 2269, "shoe": 2270, "skeleton": 2271, "britain": 2272, "ties": 2273, "in sink": 2274, "patio": 2275, "bank": 2276, "camouflage": 2277, "privacy": 2278, "bib": 2279, "blue and gray": 2280, "looking out window": 2281, "falling": 2282, "bucket": 2283, "cupcakes": 2284, "throw ball": 2285, "garden": 2286, "almonds": 2287, "ducati": 2288, "ireland": 2289, "plastic wrap": 2290, "starbucks": 2291, "all way": 2292, "bark": 2293, "home plate": 2294, "base": 2295, "dog food": 2296, "toys": 2297, "blue and orange": 2298, "1 in front": 2299, "foot": 2300, "dc": 2301, "california": 2302, "towing": 2303, "cheesecake": 2304, "bushes": 2305, "bow tie": 2306, "millions": 2307, "down street": 2308, "2011": 2309, "police officer": 2310, "windmill": 2311, "taking pictures": 2312, "street name": 2313, "cleaning": 2314, "on pole": 2315, "russia": 2316, "main street": 2317, "catch ball": 2318, "mario": 2319, "pirate": 2320, "track": 2321, "garage": 2322, "7:10": 2323, "they aren't": 2324, "mother and child": 2325, "tents": 2326, "fancy": 2327, "tattoos": 2328, "alcohol": 2329, "2:45": 2330, "wheelchair": 2331, "money": 2332, "top hat": 2333, "willow": 2334, "cd": 2335, "brushing hair": 2336, "pancake": 2337, "80": 2338, "listening to music": 2339, "green and red": 2340, "barrier": 2341, "vests": 2342, "hiking": 2343, "tank top": 2344, "lufthansa": 2345, "student": 2346, "menu": 2347, "forehand": 2348, "wii controllers": 2349, "acer": 2350, "wall st": 2351, "hundreds": 2352, "water ski": 2353, "furniture": 2354, "paisley": 2355, "pizza hut": 2356, "baseball game": 2357, "hill": 2358, "prom": 2359, "1 world": 2360, "tiara": 2361, "students": 2362, "information": 2363, "hazy": 2364, "nasa": 2365, "canon": 2366, "bird feeder": 2367, "crane": 2368, "dr pepper": 2369, "logitech": 2370, "2:10": 2371, "all of them": 2372, "utensils": 2373, "telephone": 2374, "converse": 2375, "bone": 2376, "jeep": 2377, "nursing": 2378, "krispy kreme": 2379, "cameraman": 2380, "pee": 2381, "ranch": 2382, "polka dots": 2383, "railroad crossing": 2384, "shirts": 2385, "feeder": 2386, "above toilet": 2387, "unclear": 2388, "below": 2389, "43": 2390, "spoons": 2391, "calendar": 2392, "vaio": 2393, "fox": 2394, "mint": 2395, "after": 2396, "spiderman": 2397, "lg": 2398, "concert": 2399, "on rock": 2400, "fluffy": 2401, "gray and black": 2402, "coats": 2403, "lady": 2404, "dodge": 2405, "easyjet": 2406, "pearl": 2407, "bunt": 2408, "flat screen": 2409, "10:30": 2410, "music": 2411, "polar bears": 2412, "riding horse": 2413, "lift": 2414, "angry": 2415, "cookies": 2416, "3:45": 2417, "buttons": 2418, "hot": 2419, "cute": 2420, "behind": 2421, "dole": 2422, "in motion": 2423, "26": 2424, "pans": 2425, "love": 2426, "winnie pooh": 2427, "pear": 2428, "copyright": 2429, "2 hours": 2430, "snowsuit": 2431, "kissing": 2432, "backhand": 2433, "to get to other side": 2434, "metro": 2435, "swans": 2436, "very fast": 2437, "can't see it": 2438, "nintendo": 2439, "direction": 2440, "waiting": 2441, "mohawk": 2442, "st patrick's day": 2443, "rail": 2444, "hoodie": 2445, "feet": 2446, "swirls": 2447, "muffins": 2448, "4:05": 2449, "106": 2450, "10:55": 2451, "coins": 2452, "mitt": 2453, "game controller": 2454, "room": 2455, "adults": 2456, "urinals": 2457, "cameras": 2458, "marker": 2459, "upright": 2460, "brass": 2461, "sled": 2462, "teacher": 2463, "conductor": 2464, "farmers market": 2465, "toiletries": 2466, "blue and black": 2467, "soccer field": 2468, "banana peel": 2469, "sprite": 2470, "doughnuts": 2471, "bank of america": 2472, "on his face": 2473, "heat": 2474, "emergency": 2475, "ski slope": 2476, "hard": 2477, "41": 2478, "6:00": 2479, "in his hand": 2480, "cluttered": 2481, "dog show": 2482, "on boat": 2483, "grizzly": 2484, "drums": 2485, "not": 2486, "in hand": 2487, "easy": 2488, "400": 2489, "under table": 2490, "d": 2491, "hitting ball": 2492, "photography": 2493, "intersection": 2494, "backwards": 2495, "crocs": 2496, "marina": 2497, "chips": 2498, "bible": 2499, "harry potter": 2500, "hawaii": 2501, "fanta": 2502, "half full": 2503, "carriage": 2504, "curious": 2505, "12:50": 2506, "black white": 2507, "geese": 2508, "pork": 2509, "mailbox": 2510, "l": 2511, "sidecar": 2512, "poop": 2513, "wings": 2514, "penguin": 2515, "to see": 2516, "pocket": 2517, "steps": 2518, "cubs": 2519, "junk": 2520, "deer": 2521, "ottoman": 2522, "salt": 2523, "condiments": 2524, "1:55": 2525, "post": 2526, "bulldog": 2527, "notebook": 2528, "no cat": 2529, "champagne": 2530, "jets": 2531, "knee pads": 2532, "throw frisbee": 2533, "drinks": 2534, "leopard": 2535, "taller": 2536, "cooler": 2537, "bundt": 2538, "monday": 2539, "grape": 2540, "wine tasting": 2541, "under": 2542, "baskets": 2543, "santa hat": 2544, "chest": 2545, "sewing": 2546, "on car": 2547, "sony ericsson": 2548, "peeing": 2549, "for photo": 2550, "tour": 2551, "few": 2552, "singapore": 2553, "fireman": 2554, "fire extinguisher": 2555, "wildebeest": 2556, "lemons": 2557, "peanuts": 2558, "babies": 2559, "wiimote": 2560, "guitar hero": 2561, "slide": 2562, "stopped": 2563, "library": 2564, "multi colored": 2565, "blue and pink": 2566, "choppy": 2567, "sailing": 2568, "brush": 2569, "grinding": 2570, "jelly": 2571, "dairy queen": 2572, "shaking hands": 2573, "ge": 2574, "tigers": 2575, "tokyo": 2576, "philadelphia": 2577, "ski boots": 2578, "buses": 2579, "11:45": 2580, "collage": 2581, "pink and blue": 2582, "jesus": 2583, "singles": 2584, "iron": 2585, "coffee table": 2586, "2 years": 2587, "don't walk": 2588, "classroom": 2589, "on water": 2590, "potato salad": 2591, "posts": 2592, "harbor": 2593, "residential": 2594, "joshua": 2595, "uk": 2596, "burgers": 2597, "deli": 2598, "kicking": 2599, "lace": 2600, "overalls": 2601, "vehicles": 2602, "ram": 2603, "dancing": 2604, "47": 2605, "shed": 2606, "lid": 2607, "he's not": 2608, "fans": 2609, "amtrak": 2610, "space shuttle": 2611, "ostrich": 2612, "bathtub": 2613, "kneeling": 2614, "2:50": 2615, "mall": 2616, "yellow and orange": 2617, "gazebo": 2618, "wax": 2619, "slow down": 2620, "lays": 2621, "hammer time": 2622, "octopus": 2623, "crib": 2624, "banana split": 2625, "broadway": 2626, "pottery": 2627, "wavy": 2628, "farmers": 2629, "holding phone": 2630, "on phone": 2631, "squirrel": 2632, "wax paper": 2633, "tusks": 2634, "dining": 2635, "packing": 2636, "kangaroo": 2637, "dawn": 2638, "defense": 2639, "powdered": 2640, "thomas": 2641, "budweiser": 2642, "back left": 2643, "stir fry": 2644, "beijing": 2645, "11:10": 2646, "tripod": 2647, "wide": 2648, "slope": 2649, "black and gray": 2650, "planter": 2651, "chili": 2652, "siblings": 2653, "kayaking": 2654, "captivity": 2655, "opaque": 2656, "rack": 2657, "panda": 2658, "doorway": 2659, "wheelie": 2660, "pelicans": 2661, "genetics": 2662, "not in service": 2663, "volvo": 2664, "dachshund": 2665, "v": 2666, "on laptop": 2667, "western": 2668, "gone": 2669, "birthday party": 2670, "parking garage": 2671, "tying tie": 2672, "blueberry": 2673, "scale": 2674, "notes": 2675, "train car": 2676, "man made": 2677, "stability": 2678, "lily": 2679, "lying down": 2680, "pacific": 2681, "high heels": 2682, "pare": 2683, "checkerboard": 2684, "partly cloudy": 2685, "cool": 2686, "n": 2687, "toilets": 2688, "tree branch": 2689, "copper": 2690, "cycling": 2691, "5:50": 2692, "870": 2693, "shopping": 2694, "7:05": 2695, "zipper": 2696, "holding umbrella": 2697, "batman": 2698, "lotion": 2699, "1:25": 2700, "black and brown": 2701, "playing video game": 2702, "girl on right": 2703, "legos": 2704, "drinking water": 2705, "burrito": 2706, "plow": 2707, "jet ski": 2708, "spiral": 2709, "ibm": 2710, "tools": 2711, "flashlight": 2712, "cherries": 2713, "maple leaf": 2714, "mountainous": 2715, "under tree": 2716, "vines": 2717, "sushi": 2718, "baker": 2719, "snake": 2720, "globe": 2721, "target": 2722, "john": 2723, "pomeranian": 2724, "tuxedo": 2725, "hockey": 2726, "sleeve": 2727, "leaning": 2728, "wireless": 2729, "11:05": 2730, "compaq": 2731, "do not enter": 2732, "radish": 2733, "1:05": 2734, "dim": 2735, "advertisement": 2736, "movement": 2737, "model": 2738, "hammock": 2739, "swing": 2740, "sheet": 2741, "google": 2742, "boardwalk": 2743, "right 1": 2744, "haircut": 2745, "ankle": 2746, "3:30": 2747, "exit": 2748, "csx": 2749, "tim hortons": 2750, "lego": 2751, "cucumbers": 2752, "angel": 2753, "12:20": 2754, "racquet": 2755, "behind woman": 2756, "potato": 2757, "egg salad": 2758, "controllers": 2759, "recliner": 2760, "upside down": 2761, "mosaic": 2762, "before": 2763, "antenna": 2764, "3:50": 2765, "10:15": 2766, "lion": 2767, "camo": 2768, "fighter": 2769, "silver and red": 2770, "dirt bike": 2771, "playing video games": 2772, "used": 2773, "crates": 2774, "horizontally": 2775, "plunger": 2776, "refrigerators": 2777, "radiator": 2778, "stork": 2779, "in basket": 2780, "cap": 2781, "living": 2782, "married": 2783, "briefcase": 2784, "bottom left": 2785, "30 mph": 2786, "ascending": 2787, "flip phone": 2788, "101": 2789, "11:50": 2790, "gun": 2791, "arizona": 2792, "foam": 2793, "serious": 2794, "y": 2795, "close up": 2796, "pancakes": 2797, "heineken": 2798, "paw": 2799, "cnn": 2800, "comforter": 2801, "sheets": 2802, "8:35": 2803, "driveway": 2804, "fair": 2805, "cleaner": 2806, "1 year": 2807, "delivery": 2808, "commuter": 2809, "apple and banana": 2810, "chase": 2811, "72": 2812, "safe": 2813, "trucks": 2814, "trunks": 2815, "spider": 2816, "64": 2817, "slacks": 2818, "meeting": 2819, "7:00": 2820, "skiers": 2821, "shaved": 2822, "carrot cake": 2823, "holding": 2824, "surfers": 2825, "giraffe and zebra": 2826, "7:45": 2827, "mississippi": 2828, "seaweed": 2829, "black and pink": 2830, "horse racing": 2831, "orchid": 2832, "rv": 2833, "tourist": 2834, "above door": 2835, "leaving": 2836, "pitch": 2837, "crest": 2838, "miami": 2839, "asics": 2840, "flood": 2841, "bus station": 2842, "take off": 2843, "amazon": 2844, "practice": 2845, "entering": 2846, "diesel": 2847, "pm": 2848, "wetsuits": 2849, "remodeling": 2850, "porch": 2851, "7:35": 2852, "tie dye": 2853, "baked": 2854, "life jacket": 2855, "cylinder": 2856, "grilled cheese": 2857, "meatballs": 2858, "paddling": 2859, "banana bread": 2860, "monster": 2861, "smiley face": 2862, "not high": 2863, "keys": 2864, "dreadlocks": 2865, "kitchenaid": 2866, "straight ahead": 2867, "badminton": 2868, "long sleeve": 2869, "sheepdog": 2870, "5:18": 2871, "end": 2872, "on shore": 2873, "scratching": 2874, "oriental": 2875, "5:05": 2876, "alligator": 2877, "city bus": 2878, "purple and white": 2879, "10:50": 2880, "each other": 2881, "weeds": 2882, "tinkerbell": 2883, "rottweiler": 2884, "apartments": 2885, "snowflakes": 2886, "stop light": 2887, "sweatshirt": 2888, "shore": 2889, "bidet": 2890, "switzerland": 2891, "stretching": 2892, "tv stand": 2893, "boundaries": 2894, "65": 2895, "bronze": 2896, "jar": 2897, "middle 1": 2898, "54": 2899, "skate": 2900, "easton": 2901, "turn right": 2902, "raspberries": 2903, "singing": 2904, "on bus": 2905, "carnations": 2906, "descending": 2907, "classic": 2908, "suspenders": 2909, "not long": 2910, "8:50": 2911, "father": 2912, "anniversary": 2913, "hsbc": 2914, "very long": 2915, "space needle": 2916, "skatepark": 2917, "fruit salad": 2918, "kenmore": 2919, "no water": 2920, "8:05": 2921, "db": 2922, "baby's breath": 2923, "shelter": 2924, "1980": 2925, "no left turn": 2926, "washington monument": 2927, "ham and cheese": 2928, "10 inches": 2929, "8:55": 2930, "savory": 2931, "6:35": 2932, "indians": 2933, "9:05": 2934, "fires": 2935, "pipes": 2936, "donkey": 2937, "cds": 2938, "mitsubishi": 2939, "tell time": 2940, "outfield": 2941, "christian": 2942, "puma": 2943, "parking meters": 2944, "cranes": 2945, "flip": 2946, "wine bottle": 2947, "stadium": 2948, "mouthwash": 2949, "heinz": 2950, "distance": 2951, "macaroni": 2952, "on plane": 2953, "triumph": 2954, "more": 2955, "4:50": 2956, "single engine": 2957, "disney": 2958, "on stove": 2959, "shih tzu": 2960, "fried": 2961, "to hit ball": 2962, "in her hand": 2963, "sunrise": 2964, "2nd": 2965, "elmo": 2966, "kite string": 2967, "suzuki": 2968, "traffic lights": 2969, "blt": 2970, "i": 2971, "hitting": 2972, "htc": 2973, "healthy": 2974, "current": 2975, "star alliance": 2976, "stomach": 2977, "watch tv": 2978, "tulip": 2979, "5:10": 2980, "right side": 2981, "4:40": 2982, "ginger": 2983, "on sign": 2984, "cushion": 2985, "5:30": 2986, "learning": 2987, "pencil": 2988, "maroon": 2989, "food processor": 2990, "5:40": 2991, "dog bed": 2992, "michigan": 2993, "close": 2994, "license plate": 2995, "crows": 2996, "right hand": 2997, "normal": 2998, "green and brown": 2999, "1.00": 3000, "000": 3001, "1:40": 3002, "wing": 3003, "american airlines": 3004, "kodak": 3005, "mural": 3006, "sniffing": 3007, "1:15": 3008, "behind bench": 3009, "cardinal": 3010, "no light": 3011, "warmth": 3012, "paved": 3013, "skyscrapers": 3014, "swinging bat": 3015, "watermark": 3016, "in cup": 3017, "pizza box": 3018, "dough": 3019, "hiding": 3020, "goal": 3021, "no plate": 3022, "shower head": 3023, "ripe": 3024, "1:10": 3025, "1 in back": 3026, "older": 3027, "nest": 3028, "multiple": 3029, "cinnamon": 3030, "bin": 3031, "new orleans": 3032, "colored": 3033, "enclosure": 3034, "bride": 3035, "on dresser": 3036, "star wars": 3037, "in back": 3038, "triangles": 3039, "over easy": 3040, "cilantro": 3041, "statues": 3042, "sticks": 3043, "formica": 3044, "roundabout": 3045, "bowls": 3046, "ahead": 3047, "years": 3048, "drain": 3049, "veggies": 3050, "no shirt": 3051, "taking photo": 3052, "tugboat": 3053, "broke": 3054, "59": 3055, "cadillac": 3056, "prince": 3057, "left side": 3058, "1 in middle": 3059, "10:45": 3060, "drying": 3061, "11:25": 3062, "silk": 3063, "conference room": 3064, "buoys": 3065, "pockets": 3066, "daffodil": 3067, "6:40": 3068, "walgreens": 3069, "4 ft": 3070, "6:05": 3071, "virgin atlantic": 3072, "12:40": 3073, "digital": 3074, "ups": 3075, "westjet": 3076, "bikers": 3077, "us air force": 3078, "limes": 3079, "comcast": 3080, "dip": 3081, "7:55": 3082, "man in middle": 3083, "bus driver": 3084, "soon": 3085, "futon": 3086, "selling": 3087, "braid": 3088, "mariners": 3089, "wisconsin": 3090, "99": 3091, "citizen": 3092, "broccoli and carrots": 3093, "grocery store": 3094, "us airways": 3095, "49": 3096, "bored": 3097, "red velvet": 3098, "hotel room": 3099, "qantas": 3100, "tam": 3101, "korean air": 3102, "10:35": 3103, "whirlpool": 3104, "coffee cup": 3105, "hilly": 3106, "9:12": 3107, "whipped cream": 3108, "video": 3109, "finger": 3110, "competition": 3111, "hollywood": 3112, "sas": 3113, "backward": 3114, "beads": 3115, "cosmo": 3116, "10:08": 3117, "jal": 3118, "6:30": 3119, "100 year party ct": 3120, "hispanic": 3121, "in cabbage town": 3122, "opponent": 3123, "woodpecker": 3124, "visilab": 3125, "mt airy": 3126, "crosstown": 3127, "freightliner": 3128}, {"0": "net", "1": "pitcher", "2": "orange", "3": "yes", "4": "white", "5": "skiing", "6": "red", "7": "frisbee", "8": "brushing teeth", "9": "no", "10": "black and white", "11": "skateboard", "12": "1", "13": "blue", "14": "green", "15": "motorcycle", "16": "gray", "17": "2", "18": "purse", "19": "skis", "20": "poles", "21": "surfboard", "22": "dog", "23": "on", "24": "office", "25": "large", "26": "very big", "27": "laptop", "28": "vent", "29": "computer", "30": "black", "31": "bear", "32": "3", "33": "wii", "34": "glasses", "35": "tree", "36": "eating", "37": "log", "38": "5", "39": "raft", "40": "left", "41": "living room", "42": "pink", "43": "right", "44": "railing", "45": "grass", "46": "wire", "47": "10 years", "48": "knife", "49": "cake", "50": "banana", "51": "chef", "52": "vanilla", "53": "4", "54": "outdoor", "55": "mustard", "56": "bun", "57": "clouds", "58": "dock", "59": "brown", "60": "silver", "61": "refrigerator", "62": "square", "63": "teddy", "64": "elm", "65": "stripes", "66": "baseball", "67": "catcher", "68": "beer", "69": "bottom", "70": "north", "71": "nike", "72": "yellow and white", "73": "morning", "74": "elephant", "75": "red and white", "76": "propeller", "77": "tan", "78": "wall", "79": "rolex", "80": "clock", "81": "table", "82": "0", "83": "wood", "84": "christmas", "85": "spinach", "86": "thick", "87": "bag", "88": "leaves", "89": "necklace", "90": "6", "91": "bathroom", "92": "shower", "93": "towel", "94": "solid", "95": "referee", "96": "wilson", "97": "8:00", "98": "e", "99": "24", "100": "hat", "101": "grazing", "102": "sheep", "103": "10", "104": "tag", "105": "spanish", "106": "hot dog", "107": "plate", "108": "lunch", "109": "butter", "110": "peppers", "111": "onions", "112": "very", "113": "mayonnaise", "114": "mayo", "115": "sweet potato", "116": "pig", "117": "sweet", "118": "flowers", "119": "floral", "120": "yellow", "121": "window", "122": "7", "123": "pizza", "124": "car", "125": "", "126": "cargo", "127": "stairs", "128": "abstract", "129": "rug", "130": "baseball cap", "131": "texting", "132": "pole", "133": "crosswalk", "134": "nothing", "135": "urban", "136": "bus", "137": "light", "138": "afternoon", "139": "boat", "140": "cheese", "141": "paper", "142": "real", "143": "sun", "144": "birthday", "145": "words", "146": "inside", "147": "shadows", "148": "tomato", "149": "evergreen", "150": "100 feet", "151": "shingles", "152": "trees", "153": "building", "154": "hay", "155": "ski pole", "156": "patterned", "157": "walking", "158": "ice", "159": "laundry", "160": "pepsi", "161": "good", "162": "1:50", "163": "purple", "164": "13", "165": "africa", "166": "teddy bears", "167": "socks", "168": "giraffe", "169": "soccer", "170": "blue and yellow", "171": "zebras", "172": "cupcake", "173": "broccoli", "174": "soldier", "175": "parking lot", "176": "cows", "177": "herding", "178": "on table", "179": "fish", "180": "nightstand", "181": "50", "182": "overcast", "183": "cross", "184": "toaster oven", "185": "tile", "186": "11:55", "187": "red and yellow", "188": "nowhere", "189": "hair dryer", "190": "truck", "191": "11", "192": "people", "193": "rectangle", "194": "hot dogs", "195": "party", "196": "12:55", "197": "apron", "198": "kitchen", "199": "cooking", "200": "ring", "201": "1 way", "202": "stop", "203": "neither", "204": "many", "205": "female", "206": "brushing", "207": "tie", "208": "tennis racket", "209": "knife and fork", "210": "restaurant", "211": "cat", "212": "bed", "213": "sand", "214": "ocean", "215": "cold", "216": "kites", "217": "cumulus", "218": "standing", "219": "male", "220": "star", "221": "tracks", "222": "chocolate", "223": "round", "224": "fork and knife", "225": "yankees", "226": "pictures", "227": "dots", "228": "bird", "229": "parrot", "230": "red white and blue", "231": "man", "232": "metal", "233": "fence", "234": "snowboarding", "235": "pine", "236": "snow", "237": "shorts", "238": "swim", "239": "wine", "240": "brick", "241": "no parking", "242": "children", "243": "beef", "244": "phone", "245": "english", "246": "cell phone", "247": "pink and yellow", "248": "clear", "249": "watermelon", "250": "bedroom", "251": "fork", "252": "cow", "253": "rackets", "254": "tennis rackets", "255": "8", "256": "collar", "257": "tennis", "258": "1950s", "259": "playing tennis", "260": "skirt", "261": "30", "262": "polka dot", "263": "beach", "264": "horse", "265": "grill", "266": "african american", "267": "down", "268": "street", "269": "in air", "270": "sweater", "271": "yellow and blue", "272": "park", "273": "backyard", "274": "spectators", "275": "parasailing", "276": "31", "277": "river", "278": "55", "279": "shadow", "280": "winter", "281": "chicken", "282": "tea", "283": "evening", "284": "dusk", "285": "ski resort", "286": "helmet", "287": "penne", "288": "bench", "289": "resting", "290": "elephants", "291": "southwest", "292": "usa", "293": "cars", "294": "town", "295": "bananas", "296": "umbrella", "297": "container", "298": "woman", "299": "on counter", "300": "salad", "301": "striped", "302": "motel", "303": "vertical", "304": "oranges", "305": "hot sauce", "306": "bottle", "307": "juice", "308": "eyes", "309": "ground", "310": "backpack", "311": "black and yellow", "312": "forward", "313": "jackets", "314": "1 on right", "315": "green and yellow", "316": "playing baseball", "317": "riding", "318": "sitting", "319": "carrot", "320": "basket", "321": "seagull", "322": "ski poles", "323": "p", "324": "parking", "325": "street light", "326": "mets", "327": "strap", "328": "bike", "329": "riding bike", "330": "poodle", "331": "shoes", "332": "carpet", "333": "lettuce", "334": "food", "335": "1 foot", "336": "roses", "337": "mountains", "338": "scissors", "339": "camera", "340": "beige", "341": "beard", "342": "cutting", "343": "baby", "344": "tape", "345": "watch", "346": "never", "347": "taking picture", "348": "eggs", "349": "syrup", "350": "sandwich", "351": "water skiing", "352": "microphone", "353": "back", "354": "bears", "355": "donuts", "356": "w", "357": "sky", "358": "double decker", "359": "england", "360": "surfing", "361": "running", "362": "shirt", "363": "barn", "364": "weather vane", "365": "white and blue", "366": "fishing", "367": "bridge", "368": "los angeles", "369": "open", "370": "red sox", "371": "bat", "372": "plane", "373": "white and green", "374": "transportation", "375": "sunny", "376": "bus stop", "377": "city", "378": "brown and white", "379": "bicycle", "380": "crow", "381": "magazines", "382": "daisy", "383": "14", "384": "old", "385": "curtains", "386": "jumped", "387": "snowboard", "388": "dinosaur", "389": "racing", "390": "asphalt", "391": "court", "392": "plastic", "393": "circle", "394": "red and blue", "395": "zebra", "396": "12", "397": "biplane", "398": "shallow", "399": "brazil", "400": "logo", "401": "2:20", "402": "electric", "403": "night time", "404": "motion", "405": "toothbrushes", "406": "orange and white", "407": "66", "408": "spoon", "409": "toyota", "410": "tennis shoes", "411": "46", "412": "second", "413": "no 1", "414": "iphone", "415": "friend", "416": "apple", "417": "carnation", "418": "15", "419": "tiger", "420": "glove", "421": "airplane", "422": "bow", "423": "air france", "424": "passengers", "425": "tv", "426": "on building", "427": "3:55", "428": "victorian", "429": "steeple", "430": "happy", "431": "skateboarding", "432": "fruit", "433": "cutting board", "434": "cantaloupe", "435": "kiwi", "436": "sliced", "437": "heart", "438": "water", "439": "rainy", "440": "carrots", "441": "giraffes", "442": "eat", "443": "ramp", "444": "lab", "445": "field", "446": "horizontal", "447": "birds", "448": "home", "449": "shrimp", "450": "12 feet", "451": "girl", "452": "modern", "453": "turtle", "454": "dell", "455": "boots", "456": "sunglasses", "457": "black and orange", "458": "yellow and black", "459": "gloves", "460": "hp", "461": "desk", "462": "both", "463": "sign", "464": "on street", "465": "2000", "466": "cirrus", "467": "to dry", "468": "ceiling", "469": "fluorescent", "470": "up", "471": "9", "472": "boys", "473": "playing soccer", "474": "american", "475": "passenger", "476": "turn", "477": "palm", "478": "no train", "479": "wedding", "480": "branch", "481": "parrots", "482": "air force", "483": "on tracks", "484": "small", "485": "tank", "486": "dirty", "487": "france", "488": "honda", "489": "2.00", "490": "whale", "491": "vase", "492": "flying", "493": "professional", "494": "driving", "495": "tissue", "496": "protest", "497": "corona", "498": "for balance", "499": "twin", "500": "clothes", "501": "t shirt", "502": "window sill", "503": "wild", "504": "noon", "505": "caution", "506": "spring", "507": "raining", "508": "cane", "509": "school", "510": "windsurfing", "511": "parachute", "512": "black and red", "513": "25", "514": "background", "515": "toaster", "516": "planes", "517": "yellow and red", "518": "spatula", "519": "10:10", "520": "ivory", "521": "train", "522": "welcome", "523": "highway", "524": "off", "525": "on track", "526": "electricity", "527": "italy", "528": "dinner", "529": "sink", "530": "squares", "531": "5 ft", "532": "parked", "533": "store", "534": "dress", "535": "signs", "536": "meow", "537": "football", "538": "rugby", "539": "stainless steel", "540": "la", "541": "dirt", "542": "blue and white", "543": "klm", "544": "house", "545": "unknown", "546": "ford", "547": "reading", "548": "chair", "549": "mountain", "550": "alive", "551": "water skis", "552": "picture", "553": "parade", "554": "slippers", "555": "trailer", "556": "boating", "557": "holding it", "558": "shade", "559": "cloth", "560": "6:20", "561": "candle", "562": "hose", "563": "hand", "564": "3:25", "565": "on sidewalk", "566": "poster", "567": "downhill", "568": "68", "569": "reflection", "570": "summer", "571": "pickles", "572": "halloween", "573": "bats", "574": "london", "575": "zoo", "576": "surfer", "577": "racket", "578": "flickr", "579": "cutting hair", "580": "strawberries", "581": "mushroom", "582": "teddy bear", "583": "big", "584": "suitcase", "585": "veggie", "586": "pepper", "587": "houses", "588": "70", "589": "toshiba", "590": "triangle", "591": "boxes", "592": "photograph", "593": "smoke", "594": "engine", "595": "camel", "596": "sidewalk", "597": "left 1", "598": "red and green", "599": "4:35", "600": "on couch", "601": "candy", "602": "minnie mouse", "603": "homemade", "604": "mouse", "605": "box", "606": "movie", "607": "45", "608": "strawberry", "609": "fridge", "610": "full", "611": "vegetables", "612": "bright", "613": "play", "614": "remote", "615": "pond", "616": "savannah", "617": "celery", "618": "concrete", "619": "semi", "620": "dump", "621": "scania", "622": "safety", "623": "posing", "624": "fabric", "625": "laying", "626": "couch", "627": "blueberries", "628": "handle", "629": "pipe", "630": "stick", "631": "parmesan", "632": "steak", "633": "chain link", "634": "catch", "635": "barbed wire", "636": "mozzarella", "637": "soda", "638": "fire hydrant", "639": "cat food", "640": "pepperoni", "641": "lot", "642": "licking", "643": "red and black", "644": "clay", "645": "tennis court", "646": "jumping", "647": "potatoes", "648": "toothbrush", "649": "kite", "650": "not at all", "651": "flying kite", "652": "broken", "653": "black and silver", "654": "lap", "655": "outside", "656": "44", "657": "delta", "658": "greyhound", "659": "ring finger", "660": "talking on phone", "661": "bad", "662": "kettle", "663": "35", "664": "motorcycles", "665": "produce", "666": "comfort", "667": "steering wheel", "668": "18", "669": "humans", "670": "coffee", "671": "white and brown", "672": "fall", "673": "bread", "674": "cherry", "675": "4:30", "676": "flag", "677": "night", "678": "lamp", "679": "cucumber", "680": "can't see", "681": "porcelain", "682": "oval", "683": "museum", "684": "rain", "685": "sprinkles", "686": "20", "687": "kids", "688": "bracelet", "689": "sneakers", "690": "mask", "691": "mickey mouse", "692": "twins", "693": "very high", "694": "costume", "695": "cabbage", "696": "paint", "697": "lighting", "698": "young", "699": "air conditioner", "700": "wooden", "701": "board", "702": "someone", "703": "beets", "704": "16", "705": "day time", "706": "4 inches", "707": "lights", "708": "ladder", "709": "glass", "710": "ferris wheel", "711": "fries", "712": "steamed", "713": "shepherd", "714": "cotton", "715": "suit", "716": "goatee", "717": "on his head", "718": "print", "719": "happy birthday", "720": "forks", "721": "travel", "722": "maple", "723": "200", "724": "oil", "725": "jeans", "726": "can", "727": "chopsticks", "728": "on wall", "729": "construction", "730": "mack", "731": "36", "732": "chinese", "733": "moped", "734": "festival", "735": "gas", "736": "throwing", "737": "circus", "738": "wires", "739": "not possible", "740": "plates", "741": "sugar", "742": "in", "743": "women's", "744": "door", "745": "no man", "746": "volleyball", "747": "serving", "748": "ponytail", "749": "business", "750": "decoration", "751": "santa", "752": "flat", "753": "barrel", "754": "12:15", "755": "candles", "756": "atv", "757": "free", "758": "hair", "759": "waffle", "760": "ball", "761": "stop sign", "762": "wetsuit", "763": "very deep", "764": "swimsuit", "765": "green and black", "766": "foreground", "767": "stands", "768": "china airlines", "769": "flower", "770": "300", "771": "lobster", "772": "on bench", "773": "plaster", "774": "phones", "775": "sailboat", "776": "apples", "777": "road", "778": "recently", "779": "cones", "780": "cactus", "781": "rice", "782": "vegetarian", "783": "donut", "784": "ketchup", "785": "police", "786": "mirror", "787": "rock", "788": "meat", "789": "blinds", "790": "cell phones", "791": "china", "792": "rust", "793": "7:25", "794": "stone", "795": "vans", "796": "middle", "797": "eagle", "798": "9:30", "799": "ping pong", "800": "microwave", "801": "gmc", "802": "umbrellas", "803": "wrist", "804": "cuddling", "805": "laughing", "806": "boy", "807": "next to toilet", "808": "tabby", "809": "petting", "810": "south", "811": "40", "812": "name tag", "813": "checkered", "814": "name", "815": "slow", "816": "cardboard", "817": "windows", "818": "croissant", "819": "plain", "820": "cookie", "821": "on ground", "822": "low", "823": "water bottle", "824": "goggles", "825": "turkey", "826": "pull", "827": "shut", "828": "kite flying", "829": "bowl", "830": "smile", "831": "in bowl", "832": "bush", "833": "cloudy", "834": "top left", "835": "skateboarder", "836": "coca cola", "837": "pan", "838": "drinking", "839": "short", "840": "floor", "841": "thanksgiving", "842": "radio", "843": "drink", "844": "on toilet", "845": "bike rack", "846": "bleachers", "847": "train tracks", "848": "horses", "849": "far", "850": "top", "851": "toilet", "852": "in water", "853": "private", "854": "nature", "855": "checkers", "856": "commercial", "857": "stroller", "858": "power", "859": "stuffed animals", "860": "uniforms", "861": "japan", "862": "liquor", "863": "faucet", "864": "green and orange", "865": "corn", "866": "sub", "867": "white and yellow", "868": "mercedes", "869": "in sky", "870": "tarp", "871": "indian", "872": "counter", "873": "multicolored", "874": "polar", "875": "go", "876": "now", "877": "no number", "878": "swimming", "879": "bridle", "880": "cowboy", "881": "union station", "882": "salt and pepper", "883": "olives", "884": "pizza cutter", "885": "british airways", "886": "nighttime", "887": "domestic", "888": "trolley", "889": "australia", "890": "tiles", "891": "pug", "892": "wicker", "893": "british", "894": "us airways express", "895": "burton", "896": "christmas tree", "897": "napkin", "898": "writing", "899": "rocks", "900": "hello kitty", "901": "lacoste", "902": "gold", "903": "fan", "904": "skateboards", "905": "day", "906": "on floor", "907": "2008", "908": "dark", "909": "flying kites", "910": "rural", "911": "olympics", "912": "bmw", "913": "34", "914": "factory", "915": "denim", "916": "typing", "917": "for fun", "918": "steel", "919": "watching tv", "920": "chevron", "921": "driver", "922": "baggage claim", "923": "grapes", "924": "f", "925": "angels", "926": "roof", "927": "handlebars", "928": "train station", "929": "public", "930": "oak", "931": "sleeping", "932": "canada", "933": "on runway", "934": "air canada", "935": "on top", "936": "tired", "937": "blonde", "938": "cups", "939": "little", "940": "adidas", "941": "10 feet", "942": "white and gray", "943": "leaf", "944": "fisheye", "945": "forest", "946": "war", "947": "octagon", "948": "raspberry", "949": "helmets", "950": "united states", "951": "29", "952": "noodles", "953": "van", "954": "long", "955": "traveling", "956": "luggage", "957": "airport", "958": "single", "959": "pitching", "960": "dugout", "961": "garbage", "962": "in street", "963": "happiness", "964": "cigarette", "965": "on tower", "966": "antelope", "967": "graffiti", "968": "skating", "969": "on road", "970": "curved", "971": "red light", "972": "washington", "973": "ski lift", "974": "athletics", "975": "brace", "976": "squatting", "977": "catching", "978": "batter", "979": "batting", "980": "game", "981": "towards", "982": "33", "983": "sliding", "984": "makeup", "985": "japanese", "986": "person", "987": "pirates", "988": "plaid", "989": "rose", "990": "daytime", "991": "keyboard", "992": "surfboards", "993": "hummingbird", "994": "ollie", "995": "11:30", "996": "clock tower", "997": "5:55", "998": "san francisco", "999": "stopping", "1000": "tags", "1001": "samsung", "1002": "computers", "1003": "cabinets", "1004": "talking", "1005": "cage", "1006": "asparagus", "1007": "5 years", "1008": "hanger", "1009": "adult", "1010": "rabbit", "1011": "empty", "1012": "softball", "1013": "1st", "1014": "playing", "1015": "chairs", "1016": "farm", "1017": "cross country", "1018": "dump truck", "1019": "women", "1020": "snowboarder", "1021": "tall", "1022": "monkey", "1023": "mantle", "1024": "fire", "1025": "books", "1026": "quilt", "1027": "cessna", "1028": "chandelier", "1029": "dunkin donuts", "1030": "beans", "1031": "relish", "1032": "no flag", "1033": "parking meter", "1034": "spots", "1035": "ducks", "1036": "sandals", "1037": "doughnut", "1038": "lighthouse", "1039": "yacht", "1040": "german shepherd", "1041": "in middle", "1042": "raw", "1043": "chain", "1044": "2 feet", "1045": "pedestal", "1046": "sauerkraut", "1047": "bagels", "1048": "mutt", "1049": "dog and cat", "1050": "race", "1051": "poor", "1052": "cat and dog", "1053": "station", "1054": "printer", "1055": "daisies", "1056": "front", "1057": "gravel", "1058": "rear", "1059": "grassy", "1060": "pigeons", "1061": "dogs", "1062": "in car", "1063": "life", "1064": "wii remotes", "1065": "suv", "1066": "leather", "1067": "bottom right", "1068": "peace", "1069": "facebook", "1070": "blanket", "1071": "fountain", "1072": "frisbees", "1073": "12:30", "1074": "am", "1075": "scooter", "1076": "going", "1077": "analog", "1078": "america", "1079": "pitbull", "1080": "relaxing", "1081": "paddle boarding", "1082": "white and pink", "1083": "shampoo", "1084": "alps", "1085": "ride", "1086": "side", "1087": "mane", "1088": "on desk", "1089": "on chair", "1090": "2012", "1091": "multi", "1092": "straight", "1093": "big ben", "1094": "closed", "1095": "frosted", "1096": "3 feet", "1097": "waves", "1098": "buoy", "1099": "life vest", "1100": "trash can", "1101": "medium", "1102": "boxer", "1103": "very tall", "1104": "yamaha", "1105": "sunlight", "1106": "hit ball", "1107": "dry", "1108": "coke", "1109": "gym", "1110": "orange and black", "1111": "center", "1112": "rope", "1113": "flip flops", "1114": "4th of july", "1115": "siamese", "1116": "crafts", "1117": "color", "1118": "italian", "1119": "playing frisbee", "1120": "skate park", "1121": "orange juice", "1122": "windowsill", "1123": "corgi", "1124": "thumb", "1125": "peanut butter", "1126": "pie", "1127": "toast", "1128": "no hat", "1129": "benches", "1130": "diamond", "1131": "blender", "1132": "avocado", "1133": "television", "1134": "speakers", "1135": "pony", "1136": "baseball field", "1137": "pavement", "1138": "sydney", "1139": "not there", "1140": "diamonds", "1141": "4 feet", "1142": "goalie", "1143": "soccer ball", "1144": "runway", "1145": "video game", "1146": "gaming", "1147": "casual", "1148": "green and white", "1149": "toilet brush", "1150": "working", "1151": "pickup", "1152": "girls", "1153": "remotes", "1154": "pasta", "1155": "hood", "1156": "braves", "1157": "skier", "1158": "motorola", "1159": "17", "1160": "b", "1161": "100", "1162": "diet coke", "1163": "hospital", "1164": "wagon", "1165": "milk", "1166": "ferry", "1167": "rainbow", "1168": "on bed", "1169": "toward", "1170": "1:30", "1171": "19", "1172": "security", "1173": "herself", "1174": "mercedes benz", "1175": "supreme", "1176": "thin", "1177": "platform", "1178": "gray and red", "1179": "thai", "1180": "storage", "1181": "thailand", "1182": "swan", "1183": "peach", "1184": "10:05", "1185": "dome", "1186": "chiquita", "1187": "2:00", "1188": "mountain dew", "1189": "23", "1190": "knives", "1191": "street sign", "1192": "on beach", "1193": "playing wii", "1194": "using laptop", "1195": "stickers", "1196": "yogurt", "1197": "on grass", "1198": "9:50", "1199": "9:45", "1200": "sweat", "1201": "gatorade", "1202": "umpire", "1203": "37", "1204": "transport", "1205": "desktop", "1206": "desserts", "1207": "main", "1208": "boston", "1209": "fell", "1210": "top right", "1211": "case", "1212": "asleep", "1213": "over", "1214": "9:55", "1215": "grapefruit", "1216": "breakfast", "1217": "headphones", "1218": "freight", "1219": "cup", "1220": "sweatband", "1221": "nobody", "1222": "lamps", "1223": "9:25", "1224": "scarf", "1225": "on fridge", "1226": "main st", "1227": "moving", "1228": "confused", "1229": "fresh", "1230": "kiting", "1231": "blue jay", "1232": "flats", "1233": "long time", "1234": "chihuahua", "1235": "ceramic", "1236": "mushrooms", "1237": "on plate", "1238": "human", "1239": "power lines", "1240": "hotel", "1241": "map", "1242": "earring", "1243": "boarding", "1244": "display", "1245": "warm", "1246": "napkins", "1247": "brown and black", "1248": "broom", "1249": "basketball", "1250": "papers", "1251": "holding baby", "1252": "sad", "1253": "kickstand", "1254": "60", "1255": "shoulder", "1256": "sleep", "1257": "footprints", "1258": "tunnel", "1259": "1990", "1260": "hats", "1261": "6 inches", "1262": "ham", "1263": "bacon", "1264": "church", "1265": "53", "1266": "pineapple", "1267": "at camera", "1268": "red bull", "1269": "pilot", "1270": "tattoo", "1271": "work", "1272": "polar bear", "1273": "taking off", "1274": "website", "1275": "22", "1276": "4:00", "1277": "coffee maker", "1278": "fast", "1279": "fur", "1280": "rubber", "1281": "tongs", "1282": "german", "1283": "germany", "1284": "3 inches", "1285": "toy", "1286": "3:20", "1287": "calm", "1288": "pots", "1289": "balloons", "1290": "fruits", "1291": "9:20", "1292": "drawer", "1293": "oven", "1294": "soup", "1295": "stove", "1296": "heels", "1297": "wind", "1298": "island", "1299": "blood", "1300": "leg", "1301": "theater", "1302": "tennis racquet", "1303": "21", "1304": "gothic", "1305": "2:35", "1306": "wii remote", "1307": "turning", "1308": "20 feet", "1309": "pink and black", "1310": "ears", "1311": "fun", "1312": "wreath", "1313": "to right", "1314": "child", "1315": "fly", "1316": "head", "1317": "drywall", "1318": "shorter", "1319": "pier", "1320": "feeding giraffe", "1321": "in vase", "1322": "burger", "1323": "easter", "1324": "onion", "1325": "uniform", "1326": "remote control", "1327": "guitar", "1328": "time", "1329": "verizon", "1330": "tomatoes", "1331": "ship", "1332": "tulips", "1333": "glaze", "1334": "on suitcase", "1335": "tent", "1336": "1:45", "1337": "market", "1338": "bnsf", "1339": "bandana", "1340": "still", "1341": "don't know", "1342": "piano", "1343": "mouth", "1344": "run", "1345": "sparrow", "1346": "throw", "1347": "lines", "1348": "vest", "1349": "1950", "1350": "jet", "1351": "sepia", "1352": "2015", "1353": "busy", "1354": "lighter", "1355": "dessert", "1356": "bending", "1357": "75", "1358": "finch", "1359": "pastries", "1360": "outdoors", "1361": "bakery", "1362": "clean", "1363": "ipod", "1364": "tablecloth", "1365": "cigarettes", "1366": "looking at phone", "1367": "in front", "1368": "food truck", "1369": "face", "1370": "swinging", "1371": "safari", "1372": "500", "1373": "volkswagen", "1374": "2010", "1375": "shape", "1376": "shelves", "1377": "riding horses", "1378": "2016", "1379": "behind bus", "1380": "towels", "1381": "lemon", "1382": "straw", "1383": "bamboo", "1384": "5 feet", "1385": "hardwood", "1386": "oregon", "1387": "schnauzer", "1388": "organic", "1389": "h", "1390": "kid", "1391": "meter", "1392": "61", "1393": "charging", "1394": "bald", "1395": "caucasian", "1396": "man on left", "1397": "stand", "1398": "27", "1399": "dining room", "1400": "sandwiches", "1401": "32", "1402": "apartment", "1403": "tower", "1404": "virgin", "1405": "out", "1406": "white and red", "1407": "2:05", "1408": "i don't know", "1409": "chains", "1410": "legs", "1411": "age", "1412": "goats", "1413": "s", "1414": "congratulations", "1415": "dresser", "1416": "camper", "1417": "half", "1418": "silverware", "1419": "decorative", "1420": "hawaiian", "1421": "petting horse", "1422": "wheel", "1423": "florida", "1424": "reds", "1425": "washington dc", "1426": "moon", "1427": "conference", "1428": "screen", "1429": "controller", "1430": "robin", "1431": "men", "1432": "protection", "1433": "roll", "1434": "harley davidson", "1435": "coal", "1436": "mustache", "1437": "smiling", "1438": "pedestrians", "1439": "88", "1440": "me", "1441": "tray", "1442": "males", "1443": "monitor", "1444": "bell", "1445": "landscape", "1446": "club", "1447": "toothpick", "1448": "seagulls", "1449": "bowtie", "1450": "lake", "1451": "steam", "1452": "surf", "1453": "baseball glove", "1454": "blinders", "1455": "woods", "1456": "stuffed", "1457": "sunbathing", "1458": "shearing", "1459": "dad", "1460": "mixer", "1461": "pot", "1462": "blending", "1463": "identification", "1464": "owl", "1465": "wine glass", "1466": "on bike", "1467": "billabong", "1468": "new york", "1469": "yarn", "1470": "tube", "1471": "tennis ball", "1472": "2:55", "1473": "ice cream", "1474": "chevrolet", "1475": "shirt and tie", "1476": "taking selfie", "1477": "blue and green", "1478": "he isn't", "1479": "cutting cake", "1480": "east", "1481": "setting", "1482": "brewers", "1483": "riding bikes", "1484": "7 eleven", "1485": "stars", "1486": "jockey", "1487": "jacket", "1488": "standing still", "1489": "book", "1490": "gray and white", "1491": "pen", "1492": "red white blue", "1493": "above", "1494": "alaska", "1495": "tongue", "1496": "feathers", "1497": "k", "1498": "camping", "1499": "pasture", "1500": "corner", "1501": "away", "1502": "ski", "1503": "texas", "1504": "fire truck", "1505": "sailboats", "1506": "jump", "1507": "walk", "1508": "spray paint", "1509": "loading", "1510": "united", "1511": "1000", "1512": "brushing his teeth", "1513": "roman numerals", "1514": "garlic", "1515": "surprise", "1516": "3rd", "1517": "first", "1518": "side of road", "1519": "dodgers", "1520": "airplanes", "1521": "unsure", "1522": "russian", "1523": "wet", "1524": "skyscraper", "1525": "5 star", "1526": "brushing her teeth", "1527": "blankets", "1528": "natural", "1529": "across street", "1530": "smartphone", "1531": "duck", "1532": "sausage", "1533": "paris", "1534": "newspaper", "1535": "pants", "1536": "spices", "1537": "pillow", "1538": "to left", "1539": "snowboards", "1540": "colgate", "1541": "on elephant", "1542": "string", "1543": "horns", "1544": "2:40", "1545": "men's", "1546": "cobblestone", "1547": "regular", "1548": "staring", "1549": "28", "1550": "barber shop", "1551": "linoleum", "1552": "grind", "1553": "cut", "1554": "x", "1555": "above sink", "1556": "above stove", "1557": "dishes", "1558": "dalmatian", "1559": "watching", "1560": "glazed", "1561": "5:25", "1562": "j", "1563": "messy", "1564": "wallet", "1565": "tuna", "1566": "toasted", "1567": "grilled", "1568": "french", "1569": "green and blue", "1570": "sunflowers", "1571": "to catch frisbee", "1572": "wool", "1573": "sprint", "1574": "no grass", "1575": "cabinet", "1576": "shell", "1577": "foil", "1578": "bottles", "1579": "bar", "1580": "king", "1581": "paper towels", "1582": "friends", "1583": "beagle", "1584": "school bus", "1585": "laptops", "1586": "snowing", "1587": "cement", "1588": "pc", "1589": "accident", "1590": "stuffed animal", "1591": "wakeboard", "1592": "balance", "1593": "in suitcase", "1594": "white and black", "1595": "nikon", "1596": "cleats", "1597": "on sink", "1598": "pool", "1599": "mom", "1600": "downtown", "1601": "asian", "1602": "heater", "1603": "bathing", "1604": "193", "1605": "against wall", "1606": "canopy", "1607": "jungle", "1608": "berries", "1609": "military", "1610": "pickle", "1611": "clams", "1612": "seafood", "1613": "in box", "1614": "boats", "1615": "tables", "1616": "lizard", "1617": "lemonade", "1618": "m", "1619": "soft", "1620": "illinois", "1621": "country", "1622": "for sale", "1623": "arm", "1624": "listening", "1625": "curly", "1626": "play tennis", "1627": "hands", "1628": "cereal", "1629": "blue and red", "1630": "robe", "1631": "around neck", "1632": "red and silver", "1633": "soap", "1634": "trains", "1635": "throwing frisbee", "1636": "smoking", "1637": "india", "1638": "headband", "1639": "not very", "1640": "westin", "1641": "serve", "1642": "bicycles", "1643": "can't tell", "1644": "to catch ball", "1645": "visibility", "1646": "ana", "1647": "reins", "1648": "rodeo", "1649": "boot", "1650": "on horse", "1651": "12:35", "1652": "riding motorcycle", "1653": "mexico", "1654": "mother", "1655": "african", "1656": "left and right", "1657": "button", "1658": "earrings", "1659": "blackberry", "1660": "cell", "1661": "10:00", "1662": "harness", "1663": "pillows", "1664": "vegetable", "1665": "tablet", "1666": "fern", "1667": "cats", "1668": "golden retriever", "1669": "goat", "1670": "tractor", "1671": "valentine's day", "1672": "hearts", "1673": "khaki", "1674": "man on right", "1675": "mcdonald's", "1676": "player", "1677": "arriving", "1678": "husky", "1679": "on skateboard", "1680": "vases", "1681": "coat", "1682": "beanie", "1683": "coming", "1684": "granite", "1685": "shopping cart", "1686": "it's raining", "1687": "sports", "1688": "leash", "1689": "balls", "1690": "blurry", "1691": "baseball bat", "1692": "team", "1693": "mango", "1694": "mug", "1695": "eiffel tower", "1696": "worms", "1697": "trash", "1698": "robot", "1699": "show", "1700": "terrier", "1701": "painting", "1702": "rooster", "1703": "42", "1704": "jones", "1705": "state farm", "1706": "balloon", "1707": "trunk", "1708": "coach", "1709": "t", "1710": "playing game", "1711": "fireplace", "1712": "behind clouds", "1713": "uphill", "1714": "motocross", "1715": "sony", "1716": "magazine", "1717": "kitesurfing", "1718": "catching frisbee", "1719": "catch frisbee", "1720": "bud light", "1721": "drive", "1722": "fighting", "1723": "1 on left", "1724": "very old", "1725": "hallway", "1726": "lexus", "1727": "wii controller", "1728": "9:15", "1729": "fast food", "1730": "5:45", "1731": "catholic", "1732": "muffin", "1733": "traffic light", "1734": "band", "1735": "button up", "1736": "grocery", "1737": "shelf", "1738": "2:25", "1739": "honey", "1740": "plants", "1741": "oars", "1742": "foggy", "1743": "nathan's", "1744": "cord", "1745": "yard", "1746": "48", "1747": "donut shop", "1748": "chimney", "1749": "calico", "1750": "suits", "1751": "sideways", "1752": "animals", "1753": "black and blue", "1754": "bikini", "1755": "photographer", "1756": "700", "1757": "queen", "1758": "1:00", "1759": "12:05", "1760": "horseback riding", "1761": "awake", "1762": "bunny", "1763": "12:00", "1764": "continental", "1765": "flamingo", "1766": "rye", "1767": "family", "1768": "lots", "1769": "owner", "1770": "stew", "1771": "palm tree", "1772": "cruise ship", "1773": "56", "1774": "design", "1775": "ny", "1776": "far right", "1777": "tire", "1778": "younger", "1779": "biking", "1780": "at&t", "1781": "giants", "1782": "marshmallows", "1783": "caramel", "1784": "polo", "1785": "emirates", "1786": "salon", "1787": "focus", "1788": "on motorcycle", "1789": "magnets", "1790": "mat", "1791": "ivy", "1792": "cakes", "1793": "chrome", "1794": "bob", "1795": "asia", "1796": "graduation", "1797": "cauliflower", "1798": "in snow", "1799": "c", "1800": "rough", "1801": "vacation", "1802": "air", "1803": "windy", "1804": "victoria", "1805": "4:45", "1806": "trick", "1807": "coconut", "1808": "labrador", "1809": "on left", "1810": "yellow and green", "1811": "butterfly", "1812": "fake", "1813": "on napkin", "1814": "bricks", "1815": "wine glasses", "1816": "detroit", "1817": "man's", "1818": "parsley", "1819": "art", "1820": "subway", "1821": "wave", "1822": "placemat", "1823": "hydrant", "1824": "sofa", "1825": "pigeon", "1826": "riding elephant", "1827": "all", "1828": "branches", "1829": "plant", "1830": "to eat", "1831": "zucchini", "1832": "feta", "1833": "neon", "1834": "mouse pad", "1835": "cloud", "1836": "toilet paper", "1837": "pumpkin", "1838": "rowing", "1839": "toronto", "1840": "handicap", "1841": "seeds", "1842": "fly kite", "1843": "chicago", "1844": "marble", "1845": "frame", "1846": "150", "1847": "rocky", "1848": "give way", "1849": "sauce", "1850": "it's not", "1851": "control", "1852": "high chair", "1853": "playstation", "1854": "xbox", "1855": "not likely", "1856": "roman", "1857": "land", "1858": "1:35", "1859": "lifeguard", "1860": "on pizza", "1861": "size", "1862": "bull", "1863": "dandelions", "1864": "equestrian", "1865": "goose", "1866": "8 feet", "1867": "recessed", "1868": "statue", "1869": "index", "1870": "phillies", "1871": "strike", "1872": "mirrors", "1873": "pointing", "1874": "farmer", "1875": "collie", "1876": "motorbike", "1877": "lanes", "1878": "bikes", "1879": "biker", "1880": "arrows", "1881": "gas station", "1882": "logs", "1883": "smaller", "1884": "desert", "1885": "yield", "1886": "flags", "1887": "stool", "1888": "kitten", "1889": "doll", "1890": "daffodils", "1891": "letters", "1892": "dishwasher", "1893": "first base", "1894": "nuts", "1895": "2013", "1896": "persian", "1897": "swim trunks", "1898": "deep", "1899": "o", "1900": "doubles", "1901": "toothpicks", "1902": "in field", "1903": "wristband", "1904": "wheels", "1905": "baking", "1906": "4:15", "1907": "11:00", "1908": "ear", "1909": "2007", "1910": "51", "1911": "chevy", "1912": "using computer", "1913": "frog", "1914": "storm", "1915": "boogie board", "1916": "hungry", "1917": "by window", "1918": "ambulance", "1919": "pigtails", "1920": "audi", "1921": "microsoft", "1922": "on man", "1923": "cannot tell", "1924": "stained glass", "1925": "hugging", "1926": "laying down", "1927": "3:00", "1928": "taxi", "1929": "pedestrian", "1930": "landing", "1931": "numbers", "1932": "38", "1933": "stones", "1934": "on tree", "1935": "clocks", "1936": "new", "1937": "picnic", "1938": "fog", "1939": "buffalo", "1940": "under armour", "1941": "cocker spaniel", "1942": "orioles", "1943": "no sign", "1944": "telling time", "1945": "bags", "1946": "golden gate", "1947": "cover", "1948": "castle", "1949": "canoe", "1950": "selfie", "1951": "cream", "1952": "floating", "1953": "indoor", "1954": "antique", "1955": "aluminum", "1956": "silver and black", "1957": "cast iron", "1958": "peas", "1959": "sun hat", "1960": "on right", "1961": "swiss", "1962": "flour", "1963": "under sink", "1964": "fashion", "1965": "fedora", "1966": "shells", "1967": "1 hour", "1968": "puppy", "1969": "in stands", "1970": "not here", "1971": "motor", "1972": "thousands", "1973": "120", "1974": "sail", "1975": "butt", "1976": "mexican", "1977": "dead end", "1978": "paddle", "1979": "bathing suit", "1980": "shop", "1981": "onion rings", "1982": "boxing", "1983": "birthday cake", "1984": "chalk", "1985": "scenery", "1986": "style", "1987": "nissan", "1988": "sticker", "1989": "on rack", "1990": "1 4", "1991": "woman's", "1992": "surprised", "1993": "north face", "1994": "squash", "1995": "not sure", "1996": "email", "1997": "spotted", "1998": "seat", "1999": "himself", "2000": "circles", "2001": "san diego", "2002": "kia", "2003": "mattress", "2004": "obama", "2005": "lamb", "2006": "american flag", "2007": "climbing", "2008": "skull and crossbones", "2009": "roast beef", "2010": "visor", "2011": "herd", "2012": "double", "2013": "52", "2014": "high", "2015": "stagecoach", "2016": "cart", "2017": "feeding", "2018": "eaten", "2019": "cone", "2020": "11:15", "2021": "smoothie", "2022": "golf", "2023": "colorado", "2024": "electronics", "2025": "5:15", "2026": "bowling", "2027": "players", "2028": "ketchup and mustard", "2029": "styrofoam", "2030": "6 feet", "2031": "hawk", "2032": "cheddar", "2033": "12:28", "2034": "arabic", "2035": "12:25", "2036": "12:10", "2037": "shower curtain", "2038": "army", "2039": "salmon", "2040": "10:40", "2041": "hanging", "2042": "whole", "2043": "behind fence", "2044": "bars", "2045": "moss", "2046": "no dog", "2047": "traffic", "2048": "10:25", "2049": "r", "2050": "countryside", "2051": "machine", "2052": "directions", "2053": "cooked", "2054": "aa", "2055": "6:45", "2056": "4 way", "2057": "stripe", "2058": "brand", "2059": "baseball player", "2060": "bunk", "2061": "coleslaw", "2062": "fishing boat", "2063": "at table", "2064": "europe", "2065": "dead", "2066": "arch", "2067": "scrambled", "2068": "clothing", "2069": "closet", "2070": "egg", "2071": "suitcases", "2072": "indoors", "2073": "coffee pot", "2074": "tires", "2075": "lilies", "2076": "cafe", "2077": "9:35", "2078": "teal", "2079": "toothpaste", "2080": "in background", "2081": "tarmac", "2082": "painted", "2083": "sunset", "2084": "orange and yellow", "2085": "oar", "2086": "peaches", "2087": "zebra and giraffe", "2088": "ladybug", "2089": "20 ft", "2090": "sesame seeds", "2091": "hills", "2092": "2:30", "2093": "stucco", "2094": "tail", "2095": "couple", "2096": "kawasaki", "2097": "smooth", "2098": "powdered sugar", "2099": "pedestrian crossing", "2100": "french fries", "2101": "picnic table", "2102": "teeth", "2103": "ribbon", "2104": "saddle", "2105": "15 feet", "2106": "earbuds", "2107": "on train", "2108": "39", "2109": "curb", "2110": "tow", "2111": "shark", "2112": "white and orange", "2113": "6:25", "2114": "gravy", "2115": "fork and spoon", "2116": "pooping", "2117": "curtain", "2118": "lime", "2119": "skull", "2120": "crossing", "2121": "speed limit", "2122": "peacock", "2123": "boredom", "2124": "neck", "2125": "hit", "2126": "dragon", "2127": "tissues", "2128": "basil", "2129": "waving", "2130": "blue team", "2131": "rectangles", "2132": "helicopter", "2133": "mud", "2134": "us", "2135": "balcony", "2136": "red and gray", "2137": "firefighter", "2138": "sunflower", "2139": "wallpaper", "2140": "best buy", "2141": "11:20", "2142": "public market center", "2143": "seattle", "2144": "bookshelf", "2145": "looking", "2146": "1 inch", "2147": "harley", "2148": "urinal", "2149": "cartoon", "2150": "t shirt and jeans", "2151": "navy", "2152": "fedex", "2153": "rays", "2154": "deck", "2155": "coaster", "2156": "1:20", "2157": "50 feet", "2158": "4:20", "2159": "us open", "2160": "looking at camera", "2161": "600", "2162": "national express", "2163": "white house", "2164": "5:00", "2165": "jp morgan", "2166": "palm trees", "2167": "tub", "2168": "pens", "2169": "soldiers", "2170": "2 people", "2171": "animal", "2172": "speaker", "2173": "hamburger", "2174": "spaghetti", "2175": "green beans", "2176": "it isn't", "2177": "10:20", "2178": "buildings", "2179": "on shelf", "2180": "baseball uniform", "2181": "tiled", "2182": "orange and blue", "2183": "90", "2184": "north america", "2185": "arrow", "2186": "news", "2187": "tropicana", "2188": "formal", "2189": "in grass", "2190": "thumbs up", "2191": "clip", "2192": "gate", "2193": "tennis player", "2194": "lilac", "2195": "pastry", "2196": "nose", "2197": "pacifier", "2198": "11:35", "2199": "different teams", "2200": "cardinals", "2201": "exhaust", "2202": "hauling", "2203": "on tray", "2204": "bagel", "2205": "huge", "2206": "out of focus", "2207": "cook", "2208": "wheat", "2209": "photo", "2210": "ghost", "2211": "sedan", "2212": "qatar", "2213": "zig zag", "2214": "lanyard", "2215": "pink and white", "2216": "sesame", "2217": "space", "2218": "no clock", "2219": "warning", "2220": "snowy", "2221": "tater tots", "2222": "tropical", "2223": "grandfather", "2224": "mac", "2225": "magnet", "2226": "photoshop", "2227": "pajamas", "2228": "350", "2229": "casserole", "2230": "4:55", "2231": "pelican", "2232": "2009", "2233": "clydesdale", "2234": "tow truck", "2235": "belt", "2236": "west", "2237": "omelet", "2238": "heavy", "2239": "crown", "2240": "in corner", "2241": "hexagon", "2242": "mound", "2243": "iris", "2244": "g", "2245": "12:45", "2246": "2:15", "2247": "3:10", "2248": "drawing", "2249": "only", "2250": "little girl", "2251": "washing", "2252": "nokia", "2253": "windsor", "2254": "2 men", "2255": "parmesan cheese", "2256": "on woman", "2257": "freezer", "2258": "icing", "2259": "venice", "2260": "dairy", "2261": "several", "2262": "concentration", "2263": "3:15", "2264": "no smoking", "2265": "kayak", "2266": "frosting", "2267": "jetblue", "2268": "thoroughbred", "2269": "parakeet", "2270": "shoe", "2271": "skeleton", "2272": "britain", "2273": "ties", "2274": "in sink", "2275": "patio", "2276": "bank", "2277": "camouflage", "2278": "privacy", "2279": "bib", "2280": "blue and gray", "2281": "looking out window", "2282": "falling", "2283": "bucket", "2284": "cupcakes", "2285": "throw ball", "2286": "garden", "2287": "almonds", "2288": "ducati", "2289": "ireland", "2290": "plastic wrap", "2291": "starbucks", "2292": "all way", "2293": "bark", "2294": "home plate", "2295": "base", "2296": "dog food", "2297": "toys", "2298": "blue and orange", "2299": "1 in front", "2300": "foot", "2301": "dc", "2302": "california", "2303": "towing", "2304": "cheesecake", "2305": "bushes", "2306": "bow tie", "2307": "millions", "2308": "down street", "2309": "2011", "2310": "police officer", "2311": "windmill", "2312": "taking pictures", "2313": "street name", "2314": "cleaning", "2315": "on pole", "2316": "russia", "2317": "main street", "2318": "catch ball", "2319": "mario", "2320": "pirate", "2321": "track", "2322": "garage", "2323": "7:10", "2324": "they aren't", "2325": "mother and child", "2326": "tents", "2327": "fancy", "2328": "tattoos", "2329": "alcohol", "2330": "2:45", "2331": "wheelchair", "2332": "money", "2333": "top hat", "2334": "willow", "2335": "cd", "2336": "brushing hair", "2337": "pancake", "2338": "80", "2339": "listening to music", "2340": "green and red", "2341": "barrier", "2342": "vests", "2343": "hiking", "2344": "tank top", "2345": "lufthansa", "2346": "student", "2347": "menu", "2348": "forehand", "2349": "wii controllers", "2350": "acer", "2351": "wall st", "2352": "hundreds", "2353": "water ski", "2354": "furniture", "2355": "paisley", "2356": "pizza hut", "2357": "baseball game", "2358": "hill", "2359": "prom", "2360": "1 world", "2361": "tiara", "2362": "students", "2363": "information", "2364": "hazy", "2365": "nasa", "2366": "canon", "2367": "bird feeder", "2368": "crane", "2369": "dr pepper", "2370": "logitech", "2371": "2:10", "2372": "all of them", "2373": "utensils", "2374": "telephone", "2375": "converse", "2376": "bone", "2377": "jeep", "2378": "nursing", "2379": "krispy kreme", "2380": "cameraman", "2381": "pee", "2382": "ranch", "2383": "polka dots", "2384": "railroad crossing", "2385": "shirts", "2386": "feeder", "2387": "above toilet", "2388": "unclear", "2389": "below", "2390": "43", "2391": "spoons", "2392": "calendar", "2393": "vaio", "2394": "fox", "2395": "mint", "2396": "after", "2397": "spiderman", "2398": "lg", "2399": "concert", "2400": "on rock", "2401": "fluffy", "2402": "gray and black", "2403": "coats", "2404": "lady", "2405": "dodge", "2406": "easyjet", "2407": "pearl", "2408": "bunt", "2409": "flat screen", "2410": "10:30", "2411": "music", "2412": "polar bears", "2413": "riding horse", "2414": "lift", "2415": "angry", "2416": "cookies", "2417": "3:45", "2418": "buttons", "2419": "hot", "2420": "cute", "2421": "behind", "2422": "dole", "2423": "in motion", "2424": "26", "2425": "pans", "2426": "love", "2427": "winnie pooh", "2428": "pear", "2429": "copyright", "2430": "2 hours", "2431": "snowsuit", "2432": "kissing", "2433": "backhand", "2434": "to get to other side", "2435": "metro", "2436": "swans", "2437": "very fast", "2438": "can't see it", "2439": "nintendo", "2440": "direction", "2441": "waiting", "2442": "mohawk", "2443": "st patrick's day", "2444": "rail", "2445": "hoodie", "2446": "feet", "2447": "swirls", "2448": "muffins", "2449": "4:05", "2450": "106", "2451": "10:55", "2452": "coins", "2453": "mitt", "2454": "game controller", "2455": "room", "2456": "adults", "2457": "urinals", "2458": "cameras", "2459": "marker", "2460": "upright", "2461": "brass", "2462": "sled", "2463": "teacher", "2464": "conductor", "2465": "farmers market", "2466": "toiletries", "2467": "blue and black", "2468": "soccer field", "2469": "banana peel", "2470": "sprite", "2471": "doughnuts", "2472": "bank of america", "2473": "on his face", "2474": "heat", "2475": "emergency", "2476": "ski slope", "2477": "hard", "2478": "41", "2479": "6:00", "2480": "in his hand", "2481": "cluttered", "2482": "dog show", "2483": "on boat", "2484": "grizzly", "2485": "drums", "2486": "not", "2487": "in hand", "2488": "easy", "2489": "400", "2490": "under table", "2491": "d", "2492": "hitting ball", "2493": "photography", "2494": "intersection", "2495": "backwards", "2496": "crocs", "2497": "marina", "2498": "chips", "2499": "bible", "2500": "harry potter", "2501": "hawaii", "2502": "fanta", "2503": "half full", "2504": "carriage", "2505": "curious", "2506": "12:50", "2507": "black white", "2508": "geese", "2509": "pork", "2510": "mailbox", "2511": "l", "2512": "sidecar", "2513": "poop", "2514": "wings", "2515": "penguin", "2516": "to see", "2517": "pocket", "2518": "steps", "2519": "cubs", "2520": "junk", "2521": "deer", "2522": "ottoman", "2523": "salt", "2524": "condiments", "2525": "1:55", "2526": "post", "2527": "bulldog", "2528": "notebook", "2529": "no cat", "2530": "champagne", "2531": "jets", "2532": "knee pads", "2533": "throw frisbee", "2534": "drinks", "2535": "leopard", "2536": "taller", "2537": "cooler", "2538": "bundt", "2539": "monday", "2540": "grape", "2541": "wine tasting", "2542": "under", "2543": "baskets", "2544": "santa hat", "2545": "chest", "2546": "sewing", "2547": "on car", "2548": "sony ericsson", "2549": "peeing", "2550": "for photo", "2551": "tour", "2552": "few", "2553": "singapore", "2554": "fireman", "2555": "fire extinguisher", "2556": "wildebeest", "2557": "lemons", "2558": "peanuts", "2559": "babies", "2560": "wiimote", "2561": "guitar hero", "2562": "slide", "2563": "stopped", "2564": "library", "2565": "multi colored", "2566": "blue and pink", "2567": "choppy", "2568": "sailing", "2569": "brush", "2570": "grinding", "2571": "jelly", "2572": "dairy queen", "2573": "shaking hands", "2574": "ge", "2575": "tigers", "2576": "tokyo", "2577": "philadelphia", "2578": "ski boots", "2579": "buses", "2580": "11:45", "2581": "collage", "2582": "pink and blue", "2583": "jesus", "2584": "singles", "2585": "iron", "2586": "coffee table", "2587": "2 years", "2588": "don't walk", "2589": "classroom", "2590": "on water", "2591": "potato salad", "2592": "posts", "2593": "harbor", "2594": "residential", "2595": "joshua", "2596": "uk", "2597": "burgers", "2598": "deli", "2599": "kicking", "2600": "lace", "2601": "overalls", "2602": "vehicles", "2603": "ram", "2604": "dancing", "2605": "47", "2606": "shed", "2607": "lid", "2608": "he's not", "2609": "fans", "2610": "amtrak", "2611": "space shuttle", "2612": "ostrich", "2613": "bathtub", "2614": "kneeling", "2615": "2:50", "2616": "mall", "2617": "yellow and orange", "2618": "gazebo", "2619": "wax", "2620": "slow down", "2621": "lays", "2622": "hammer time", "2623": "octopus", "2624": "crib", "2625": "banana split", "2626": "broadway", "2627": "pottery", "2628": "wavy", "2629": "farmers", "2630": "holding phone", "2631": "on phone", "2632": "squirrel", "2633": "wax paper", "2634": "tusks", "2635": "dining", "2636": "packing", "2637": "kangaroo", "2638": "dawn", "2639": "defense", "2640": "powdered", "2641": "thomas", "2642": "budweiser", "2643": "back left", "2644": "stir fry", "2645": "beijing", "2646": "11:10", "2647": "tripod", "2648": "wide", "2649": "slope", "2650": "black and gray", "2651": "planter", "2652": "chili", "2653": "siblings", "2654": "kayaking", "2655": "captivity", "2656": "opaque", "2657": "rack", "2658": "panda", "2659": "doorway", "2660": "wheelie", "2661": "pelicans", "2662": "genetics", "2663": "not in service", "2664": "volvo", "2665": "dachshund", "2666": "v", "2667": "on laptop", "2668": "western", "2669": "gone", "2670": "birthday party", "2671": "parking garage", "2672": "tying tie", "2673": "blueberry", "2674": "scale", "2675": "notes", "2676": "train car", "2677": "man made", "2678": "stability", "2679": "lily", "2680": "lying down", "2681": "pacific", "2682": "high heels", "2683": "pare", "2684": "checkerboard", "2685": "partly cloudy", "2686": "cool", "2687": "n", "2688": "toilets", "2689": "tree branch", "2690": "copper", "2691": "cycling", "2692": "5:50", "2693": "870", "2694": "shopping", "2695": "7:05", "2696": "zipper", "2697": "holding umbrella", "2698": "batman", "2699": "lotion", "2700": "1:25", "2701": "black and brown", "2702": "playing video game", "2703": "girl on right", "2704": "legos", "2705": "drinking water", "2706": "burrito", "2707": "plow", "2708": "jet ski", "2709": "spiral", "2710": "ibm", "2711": "tools", "2712": "flashlight", "2713": "cherries", "2714": "maple leaf", "2715": "mountainous", "2716": "under tree", "2717": "vines", "2718": "sushi", "2719": "baker", "2720": "snake", "2721": "globe", "2722": "target", "2723": "john", "2724": "pomeranian", "2725": "tuxedo", "2726": "hockey", "2727": "sleeve", "2728": "leaning", "2729": "wireless", "2730": "11:05", "2731": "compaq", "2732": "do not enter", "2733": "radish", "2734": "1:05", "2735": "dim", "2736": "advertisement", "2737": "movement", "2738": "model", "2739": "hammock", "2740": "swing", "2741": "sheet", "2742": "google", "2743": "boardwalk", "2744": "right 1", "2745": "haircut", "2746": "ankle", "2747": "3:30", "2748": "exit", "2749": "csx", "2750": "tim hortons", "2751": "lego", "2752": "cucumbers", "2753": "angel", "2754": "12:20", "2755": "racquet", "2756": "behind woman", "2757": "potato", "2758": "egg salad", "2759": "controllers", "2760": "recliner", "2761": "upside down", "2762": "mosaic", "2763": "before", "2764": "antenna", "2765": "3:50", "2766": "10:15", "2767": "lion", "2768": "camo", "2769": "fighter", "2770": "silver and red", "2771": "dirt bike", "2772": "playing video games", "2773": "used", "2774": "crates", "2775": "horizontally", "2776": "plunger", "2777": "refrigerators", "2778": "radiator", "2779": "stork", "2780": "in basket", "2781": "cap", "2782": "living", "2783": "married", "2784": "briefcase", "2785": "bottom left", "2786": "30 mph", "2787": "ascending", "2788": "flip phone", "2789": "101", "2790": "11:50", "2791": "gun", "2792": "arizona", "2793": "foam", "2794": "serious", "2795": "y", "2796": "close up", "2797": "pancakes", "2798": "heineken", "2799": "paw", "2800": "cnn", "2801": "comforter", "2802": "sheets", "2803": "8:35", "2804": "driveway", "2805": "fair", "2806": "cleaner", "2807": "1 year", "2808": "delivery", "2809": "commuter", "2810": "apple and banana", "2811": "chase", "2812": "72", "2813": "safe", "2814": "trucks", "2815": "trunks", "2816": "spider", "2817": "64", "2818": "slacks", "2819": "meeting", "2820": "7:00", "2821": "skiers", "2822": "shaved", "2823": "carrot cake", "2824": "holding", "2825": "surfers", "2826": "giraffe and zebra", "2827": "7:45", "2828": "mississippi", "2829": "seaweed", "2830": "black and pink", "2831": "horse racing", "2832": "orchid", "2833": "rv", "2834": "tourist", "2835": "above door", "2836": "leaving", "2837": "pitch", "2838": "crest", "2839": "miami", "2840": "asics", "2841": "flood", "2842": "bus station", "2843": "take off", "2844": "amazon", "2845": "practice", "2846": "entering", "2847": "diesel", "2848": "pm", "2849": "wetsuits", "2850": "remodeling", "2851": "porch", "2852": "7:35", "2853": "tie dye", "2854": "baked", "2855": "life jacket", "2856": "cylinder", "2857": "grilled cheese", "2858": "meatballs", "2859": "paddling", "2860": "banana bread", "2861": "monster", "2862": "smiley face", "2863": "not high", "2864": "keys", "2865": "dreadlocks", "2866": "kitchenaid", "2867": "straight ahead", "2868": "badminton", "2869": "long sleeve", "2870": "sheepdog", "2871": "5:18", "2872": "end", "2873": "on shore", "2874": "scratching", "2875": "oriental", "2876": "5:05", "2877": "alligator", "2878": "city bus", "2879": "purple and white", "2880": "10:50", "2881": "each other", "2882": "weeds", "2883": "tinkerbell", "2884": "rottweiler", "2885": "apartments", "2886": "snowflakes", "2887": "stop light", "2888": "sweatshirt", "2889": "shore", "2890": "bidet", "2891": "switzerland", "2892": "stretching", "2893": "tv stand", "2894": "boundaries", "2895": "65", "2896": "bronze", "2897": "jar", "2898": "middle 1", "2899": "54", "2900": "skate", "2901": "easton", "2902": "turn right", "2903": "raspberries", "2904": "singing", "2905": "on bus", "2906": "carnations", "2907": "descending", "2908": "classic", "2909": "suspenders", "2910": "not long", "2911": "8:50", "2912": "father", "2913": "anniversary", "2914": "hsbc", "2915": "very long", "2916": "space needle", "2917": "skatepark", "2918": "fruit salad", "2919": "kenmore", "2920": "no water", "2921": "8:05", "2922": "db", "2923": "baby's breath", "2924": "shelter", "2925": "1980", "2926": "no left turn", "2927": "washington monument", "2928": "ham and cheese", "2929": "10 inches", "2930": "8:55", "2931": "savory", "2932": "6:35", "2933": "indians", "2934": "9:05", "2935": "fires", "2936": "pipes", "2937": "donkey", "2938": "cds", "2939": "mitsubishi", "2940": "tell time", "2941": "outfield", "2942": "christian", "2943": "puma", "2944": "parking meters", "2945": "cranes", "2946": "flip", "2947": "wine bottle", "2948": "stadium", "2949": "mouthwash", "2950": "heinz", "2951": "distance", "2952": "macaroni", "2953": "on plane", "2954": "triumph", "2955": "more", "2956": "4:50", "2957": "single engine", "2958": "disney", "2959": "on stove", "2960": "shih tzu", "2961": "fried", "2962": "to hit ball", "2963": "in her hand", "2964": "sunrise", "2965": "2nd", "2966": "elmo", "2967": "kite string", "2968": "suzuki", "2969": "traffic lights", "2970": "blt", "2971": "i", "2972": "hitting", "2973": "htc", "2974": "healthy", "2975": "current", "2976": "star alliance", "2977": "stomach", "2978": "watch tv", "2979": "tulip", "2980": "5:10", "2981": "right side", "2982": "4:40", "2983": "ginger", "2984": "on sign", "2985": "cushion", "2986": "5:30", "2987": "learning", "2988": "pencil", "2989": "maroon", "2990": "food processor", "2991": "5:40", "2992": "dog bed", "2993": "michigan", "2994": "close", "2995": "license plate", "2996": "crows", "2997": "right hand", "2998": "normal", "2999": "green and brown", "3000": "1.00", "3001": "000", "3002": "1:40", "3003": "wing", "3004": "american airlines", "3005": "kodak", "3006": "mural", "3007": "sniffing", "3008": "1:15", "3009": "behind bench", "3010": "cardinal", "3011": "no light", "3012": "warmth", "3013": "paved", "3014": "skyscrapers", "3015": "swinging bat", "3016": "watermark", "3017": "in cup", "3018": "pizza box", "3019": "dough", "3020": "hiding", "3021": "goal", "3022": "no plate", "3023": "shower head", "3024": "ripe", "3025": "1:10", "3026": "1 in back", "3027": "older", "3028": "nest", "3029": "multiple", "3030": "cinnamon", "3031": "bin", "3032": "new orleans", "3033": "colored", "3034": "enclosure", "3035": "bride", "3036": "on dresser", "3037": "star wars", "3038": "in back", "3039": "triangles", "3040": "over easy", "3041": "cilantro", "3042": "statues", "3043": "sticks", "3044": "formica", "3045": "roundabout", "3046": "bowls", "3047": "ahead", "3048": "years", "3049": "drain", "3050": "veggies", "3051": "no shirt", "3052": "taking photo", "3053": "tugboat", "3054": "broke", "3055": "59", "3056": "cadillac", "3057": "prince", "3058": "left side", "3059": "1 in middle", "3060": "10:45", "3061": "drying", "3062": "11:25", "3063": "silk", "3064": "conference room", "3065": "buoys", "3066": "pockets", "3067": "daffodil", "3068": "6:40", "3069": "walgreens", "3070": "4 ft", "3071": "6:05", "3072": "virgin atlantic", "3073": "12:40", "3074": "digital", "3075": "ups", "3076": "westjet", "3077": "bikers", "3078": "us air force", "3079": "limes", "3080": "comcast", "3081": "dip", "3082": "7:55", "3083": "man in middle", "3084": "bus driver", "3085": "soon", "3086": "futon", "3087": "selling", "3088": "braid", "3089": "mariners", "3090": "wisconsin", "3091": "99", "3092": "citizen", "3093": "broccoli and carrots", "3094": "grocery store", "3095": "us airways", "3096": "49", "3097": "bored", "3098": "red velvet", "3099": "hotel room", "3100": "qantas", "3101": "tam", "3102": "korean air", "3103": "10:35", "3104": "whirlpool", "3105": "coffee cup", "3106": "hilly", "3107": "9:12", "3108": "whipped cream", "3109": "video", "3110": "finger", "3111": "competition", "3112": "hollywood", "3113": "sas", "3114": "backward", "3115": "beads", "3116": "cosmo", "3117": "10:08", "3118": "jal", "3119": "6:30", "3120": "100 year party ct", "3121": "hispanic", "3122": "in cabbage town", "3123": "opponent", "3124": "woodpecker", "3125": "visilab", "3126": "mt airy", "3127": "crosstown", "3128": "freightliner"}]
================================================
FILE: openvqa/datasets/vqa/eval/result_eval.py
================================================
from openvqa.datasets.vqa.eval.vqa import VQA
from openvqa.datasets.vqa.eval.vqaEval import VQAEval
import json, pickle
import numpy as np
def eval(__C, dataset, ans_ix_list, pred_list, result_eval_file, ensemble_file, log_file, valid=False):
result_eval_file = result_eval_file + '.json'
qid_list = [ques['question_id'] for ques in dataset.ques_list]
ans_size = dataset.ans_size
result = [{
'answer': dataset.ix_to_ans[str(ans_ix_list[qix])],
# 'answer': dataset.ix_to_ans[ans_ix_list[qix]],
'question_id': int(qid_list[qix])
} for qix in range(qid_list.__len__())]
print('Save the result to file: {}'.format(result_eval_file))
json.dump(result, open(result_eval_file, 'w'))
if __C.TEST_SAVE_PRED:
print('Save the prediction vector to file: {}'.format(ensemble_file))
pred_list = np.array(pred_list).reshape(-1, ans_size)
result_pred = [{
'pred': pred_list[qix],
'qid': int(qid_list[qix])
} for qix in range(qid_list.__len__())]
pickle.dump(result_pred, open(ensemble_file, 'wb+'), protocol=-1)
if valid:
# create vqa object and vqaRes object
ques_file_path = __C.RAW_PATH[__C.DATASET][__C.SPLIT['val']]
ans_file_path = __C.RAW_PATH[__C.DATASET][__C.SPLIT['val'] + '-anno']
vqa = VQA(ans_file_path, ques_file_path)
vqaRes = vqa.loadRes(result_eval_file, ques_file_path)
# create vqaEval object by taking vqa and vqaRes
vqaEval = VQAEval(vqa, vqaRes, n=2) # n is precision of accuracy (number of places after decimal), default is 2
# evaluate results
"""
If you have a list of question ids on which you would like to evaluate your results, pass it as a list to below function
By default it uses all the question ids in annotation file
"""
vqaEval.evaluate()
# print accuracies
print("\n")
print("Overall Accuracy is: %.02f\n" % (vqaEval.accuracy['overall']))
# print("Per Question Type Accuracy is the following:")
# for quesType in vqaEval.accuracy['perQuestionType']:
# print("%s : %.02f" % (quesType, vqaEval.accuracy['perQuestionType'][quesType]))
# print("\n")
print("Per Answer Type Accuracy is the following:")
for ansType in vqaEval.accuracy['perAnswerType']:
print("%s : %.02f" % (ansType, vqaEval.accuracy['perAnswerType'][ansType]))
print("\n")
print('Write to log file: {}'.format(log_file))
logfile = open(log_file, 'a+')
logfile.write("Overall Accuracy is: %.02f\n" % (vqaEval.accuracy['overall']))
for ansType in vqaEval.accuracy['perAnswerType']:
logfile.write("%s : %.02f " % (ansType, vqaEval.accuracy['perAnswerType'][ansType]))
logfile.write("\n\n")
logfile.close()
================================================
FILE: openvqa/datasets/vqa/eval/vqa.py
================================================
__author__ = 'aagrawal'
__version__ = '0.9'
# Interface for accessing the VQA dataset.
# This code is based on the code written by Tsung-Yi Lin for MSCOCO Python API available at the following link:
# (https://github.com/pdollar/coco/blob/master/PythonAPI/pycocotools/coco.py).
# The following functions are defined:
# VQA - VQA class that loads VQA annotation file and prepares data structures.
# getQuesIds - Get question ids that satisfy given filter conditions.
# getImgIds - Get image ids that satisfy given filter conditions.
# loadQA - Load questions and answers with the specified question ids.
# showQA - Display the specified questions and answers.
# loadRes - Load result file and create result object.
# Help on each function can be accessed by: "help(COCO.function)"
import json
import datetime
import copy
class VQA:
def __init__(self, annotation_file=None, question_file=None):
"""
Constructor of VQA helper class for reading and visualizing questions and answers.
:param annotation_file (str): location of VQA annotation file
:return:
"""
# load dataset
self.dataset = {}
self.questions = {}
self.qa = {}
self.qqa = {}
self.imgToQA = {}
if not annotation_file == None and not question_file == None:
print('loading VQA annotations and questions into memory...')
time_t = datetime.datetime.utcnow()
dataset = json.load(open(annotation_file, 'r'))
questions = json.load(open(question_file, 'r'))
print(datetime.datetime.utcnow() - time_t)
self.dataset = dataset
self.questions = questions
self.createIndex()
def createIndex(self):
# create index
print('creating index...')
imgToQA = {ann['image_id']: [] for ann in self.dataset['annotations']}
qa = {ann['question_id']: [] for ann in self.dataset['annotations']}
qqa = {ann['question_id']: [] for ann in self.dataset['annotations']}
for ann in self.dataset['annotations']:
imgToQA[ann['image_id']] += [ann]
qa[ann['question_id']] = ann
for ques in self.questions['questions']:
qqa[ques['question_id']] = ques
print('index created!')
# create class members
self.qa = qa
self.qqa = qqa
self.imgToQA = imgToQA
def info(self):
"""
Print information about the VQA annotation file.
:return:
"""
for key, value in self.dataset['info'].items():
print('%s: %s' % (key, value))
def getQuesIds(self, imgIds=[], quesTypes=[], ansTypes=[]):
"""
Get question ids that satisfy given filter conditions. default skips that filter
:param imgIds (int array) : get question ids for given imgs
quesTypes (str array) : get question ids for given question types
ansTypes (str array) : get question ids for given answer types
:return: ids (int array) : integer array of question ids
"""
imgIds = imgIds if type(imgIds) == list else [imgIds]
quesTypes = quesTypes if type(quesTypes) == list else [quesTypes]
ansTypes = ansTypes if type(ansTypes) == list else [ansTypes]
if len(imgIds) == len(quesTypes) == len(ansTypes) == 0:
anns = self.dataset['annotations']
else:
if not len(imgIds) == 0:
anns = sum([self.imgToQA[imgId] for imgId in imgIds if imgId in self.imgToQA], [])
else:
anns = self.dataset['annotations']
anns = anns if len(quesTypes) == 0 else [ann for ann in anns if ann['question_type'] in quesTypes]
anns = anns if len(ansTypes) == 0 else [ann for ann in anns if ann['answer_type'] in ansTypes]
ids = [ann['question_id'] for ann in anns]
return ids
def getImgIds(self, quesIds=[], quesTypes=[], ansTypes=[]):
"""
Get image ids that satisfy given filter conditions. default skips that filter
:param quesIds (int array) : get image ids for given question ids
quesTypes (str array) : get image ids for given question types
ansTypes (str array) : get image ids for given answer types
:return: ids (int array) : integer array of image ids
"""
quesIds = quesIds if type(quesIds) == list else [quesIds]
quesTypes = quesTypes if type(quesTypes) == list else [quesTypes]
ansTypes = ansTypes if type(ansTypes) == list else [ansTypes]
if len(quesIds) == len(quesTypes) == len(ansTypes) == 0:
anns = self.dataset['annotations']
else:
if not len(quesIds) == 0:
anns = sum([self.qa[quesId] for quesId in quesIds if quesId in self.qa], [])
else:
anns = self.dataset['annotations']
anns = anns if len(quesTypes) == 0 else [ann for ann in anns if ann['question_type'] in quesTypes]
anns = anns if len(ansTypes) == 0 else [ann for ann in anns if ann['answer_type'] in ansTypes]
ids = [ann['image_id'] for ann in anns]
return ids
def loadQA(self, ids=[]):
"""
Load questions and answers with the specified question ids.
:param ids (int array) : integer ids specifying question ids
:return: qa (object array) : loaded qa objects
"""
if type(ids) == list:
return [self.qa[id] for id in ids]
elif type(ids) == int:
return [self.qa[ids]]
def showQA(self, anns):
"""
Display the specified annotations.
:param anns (array of object): annotations to display
:return: None
"""
if len(anns) == 0:
return 0
for ann in anns:
quesId = ann['question_id']
print("Question: %s" % (self.qqa[quesId]['question']))
for ans in ann['answers']:
print("Answer %d: %s" % (ans['answer_id'], ans['answer']))
def loadRes(self, resFile, quesFile):
"""
Load result file and return a result object.
:param resFile (str) : file name of result file
:return: res (obj) : result api object
"""
res = VQA()
res.questions = json.load(open(quesFile))
res.dataset['info'] = copy.deepcopy(self.questions['info'])
res.dataset['task_type'] = copy.deepcopy(self.questions['task_type'])
res.dataset['data_type'] = copy.deepcopy(self.questions['data_type'])
res.dataset['data_subtype'] = copy.deepcopy(self.questions['data_subtype'])
res.dataset['license'] = copy.deepcopy(self.questions['license'])
print('Loading and preparing results... ')
time_t = datetime.datetime.utcnow()
anns = json.load(open(resFile))
assert type(anns) == list, 'results is not an array of objects'
annsQuesIds = [ann['question_id'] for ann in anns]
assert set(annsQuesIds) == set(self.getQuesIds()), \
'Results do not correspond to current VQA set. Either the results do not have predictions for all question ids in annotation file or there is atleast one question id that does not belong to the question ids in the annotation file.'
for ann in anns:
quesId = ann['question_id']
if res.dataset['task_type'] == 'Multiple Choice':
assert ann['answer'] in self.qqa[quesId][
'multiple_choices'], 'predicted answer is not one of the multiple choices'
qaAnn = self.qa[quesId]
ann['image_id'] = qaAnn['image_id']
ann['question_type'] = qaAnn['question_type']
ann['answer_type'] = qaAnn['answer_type']
print('DONE (t=%0.2fs)' % ((datetime.datetime.utcnow() - time_t).total_seconds()))
res.dataset['annotations'] = anns
res.createIndex()
return res
================================================
FILE: openvqa/datasets/vqa/eval/vqaEval.py
================================================
# coding=utf-8
__author__='aagrawal'
# This code is based on the code written by Tsung-Yi Lin for MSCOCO Python API available at the following link:
# (https://github.com/tylin/coco-caption/blob/master/pycocoevalcap/eval.py).
import sys
import re
class VQAEval:
def __init__(self, vqa, vqaRes, n=2):
self.n = n
self.accuracy = {}
self.evalQA = {}
self.evalQuesType = {}
self.evalAnsType = {}
self.vqa = vqa
self.vqaRes = vqaRes
self.params = {'question_id': vqa.getQuesIds()}
self.contractions = {"aint": "ain't", "arent": "aren't", "cant": "can't", "couldve": "could've", "couldnt": "couldn't",
"couldn'tve": "couldn't've", "couldnt've": "couldn't've", "didnt": "didn't", "doesnt": "doesn't", "dont": "don't", "hadnt": "hadn't",
"hadnt've": "hadn't've", "hadn'tve": "hadn't've", "hasnt": "hasn't", "havent": "haven't", "hed": "he'd", "hed've": "he'd've",
"he'dve": "he'd've", "hes": "he's", "howd": "how'd", "howll": "how'll", "hows": "how's", "Id've": "I'd've", "I'dve": "I'd've",
"Im": "I'm", "Ive": "I've", "isnt": "isn't", "itd": "it'd", "itd've": "it'd've", "it'dve": "it'd've", "itll": "it'll", "let's": "let's",
"maam": "ma'am", "mightnt": "mightn't", "mightnt've": "mightn't've", "mightn'tve": "mightn't've", "mightve": "might've",
"mustnt": "mustn't", "mustve": "must've", "neednt": "needn't", "notve": "not've", "oclock": "o'clock", "oughtnt": "oughtn't",
"ow's'at": "'ow's'at", "'ows'at": "'ow's'at", "'ow'sat": "'ow's'at", "shant": "shan't", "shed've": "she'd've", "she'dve": "she'd've",
"she's": "she's", "shouldve": "should've", "shouldnt": "shouldn't", "shouldnt've": "shouldn't've", "shouldn'tve": "shouldn't've",
"somebody'd": "somebodyd", "somebodyd've": "somebody'd've", "somebody'dve": "somebody'd've", "somebodyll": "somebody'll",
"somebodys": "somebody's", "someoned": "someone'd", "someoned've": "someone'd've", "someone'dve": "someone'd've",
"someonell": "someone'll", "someones": "someone's", "somethingd": "something'd", "somethingd've": "something'd've",
"something'dve": "something'd've", "somethingll": "something'll", "thats": "that's", "thered": "there'd", "thered've": "there'd've",
"there'dve": "there'd've", "therere": "there're", "theres": "there's", "theyd": "they'd", "theyd've": "they'd've",
"they'dve": "they'd've", "theyll": "they'll", "theyre": "they're", "theyve": "they've", "twas": "'twas", "wasnt": "wasn't",
"wed've": "we'd've", "we'dve": "we'd've", "weve": "we've", "werent": "weren't", "whatll": "what'll", "whatre": "what're",
"whats": "what's", "whatve": "what've", "whens": "when's", "whered": "where'd", "wheres": "where's", "whereve": "where've",
"whod": "who'd", "whod've": "who'd've", "who'dve": "who'd've", "wholl": "who'll", "whos": "who's", "whove": "who've", "whyll": "why'll",
"whyre": "why're", "whys": "why's", "wont": "won't", "wouldve": "would've", "wouldnt": "wouldn't", "wouldnt've": "wouldn't've",
"wouldn'tve": "wouldn't've", "yall": "y'all", "yall'll": "y'all'll", "y'allll": "y'all'll", "yall'd've": "y'all'd've",
"y'alld've": "y'all'd've", "y'all'dve": "y'all'd've", "youd": "you'd", "youd've": "you'd've", "you'dve": "you'd've",
"youll": "you'll", "youre": "you're", "youve": "you've"}
self.manualMap = { 'none': '0',
'zero': '0',
'one': '1',
'two': '2',
'three': '3',
'four': '4',
'five': '5',
'six': '6',
'seven': '7',
'eight': '8',
'nine': '9',
'ten': '10'
}
self.articles = ['a',
'an',
'the'
]
self.periodStrip = re.compile("(?!<=\d)(\.)(?!\d)")
self.commaStrip = re.compile("(\d)(,)(\d)")
self.punct = [';', r"/", '[', ']', '"', '{', '}',
'(', ')', '=', '+', '\\', '_', '-',
'>', '<', '@', '`', ',', '?', '!']
def evaluate(self, quesIds=None):
if quesIds == None:
quesIds = [quesId for quesId in self.params['question_id']]
gts = {}
res = {}
for quesId in quesIds:
gts[quesId] = self.vqa.qa[quesId]
res[quesId] = self.vqaRes.qa[quesId]
# =================================================
# Compute accuracy
# =================================================
accQA = []
accQuesType = {}
accAnsType = {}
print ("computing accuracy")
step = 0
for quesId in quesIds:
resAns = res[quesId]['answer']
resAns = resAns.replace('\n', ' ')
resAns = resAns.replace('\t', ' ')
resAns = resAns.strip()
resAns = self.processPunctuation(resAns)
resAns = self.processDigitArticle(resAns)
gtAcc = []
gtAnswers = [ans['answer'] for ans in gts[quesId]['answers']]
if len(set(gtAnswers)) > 1:
for ansDic in gts[quesId]['answers']:
ansDic['answer'] = self.processPunctuation(ansDic['answer'])
for gtAnsDatum in gts[quesId]['answers']:
otherGTAns = [item for item in gts[quesId]['answers'] if item!=gtAnsDatum]
matchingAns = [item for item in otherGTAns if item['answer']==resAns]
acc = min(1, float(len(matchingAns))/3)
gtAcc.append(acc)
quesType = gts[quesId]['question_type']
ansType = gts[quesId]['answer_type']
avgGTAcc = float(sum(gtAcc))/len(gtAcc)
accQA.append(avgGTAcc)
if quesType not in accQuesType:
accQuesType[quesType] = []
accQuesType[quesType].append(avgGTAcc)
if ansType not in accAnsType:
accAnsType[ansType] = []
accAnsType[ansType].append(avgGTAcc)
self.setEvalQA(quesId, avgGTAcc)
self.setEvalQuesType(quesId, quesType, avgGTAcc)
self.setEvalAnsType(quesId, ansType, avgGTAcc)
if step%100 == 0:
self.updateProgress(step/float(len(quesIds)))
step = step + 1
self.setAccuracy(accQA, accQuesType, accAnsType)
print ("Done computing accuracy")
def processPunctuation(self, inText):
outText = inText
for p in self.punct:
if (p + ' ' in inText or ' ' + p in inText) or (re.search(self.commaStrip, inText) != None):
outText = outText.replace(p, '')
else:
outText = outText.replace(p, ' ')
outText = self.periodStrip.sub("",
outText,
re.UNICODE)
return outText
def processDigitArticle(self, inText):
outText = []
tempText = inText.lower().split()
for word in tempText:
word = self.manualMap.setdefault(word, word)
if word not in self.articles:
outText.append(word)
else:
pass
for wordId, word in enumerate(outText):
if word in self.contractions:
outText[wordId] = self.contractions[word]
outText = ' '.join(outText)
return outText
def setAccuracy(self, accQA, accQuesType, accAnsType):
self.accuracy['overall'] = round(100*float(sum(accQA))/len(accQA), self.n)
self.accuracy['perQuestionType'] = {quesType: round(100*float(sum(accQuesType[quesType]))/len(accQuesType[quesType]), self.n) for quesType in accQuesType}
self.accuracy['perAnswerType'] = {ansType: round(100*float(sum(accAnsType[ansType]))/len(accAnsType[ansType]), self.n) for ansType in accAnsType}
def setEvalQA(self, quesId, acc):
self.evalQA[quesId] = round(100*acc, self.n)
def setEvalQuesType(self, quesId, quesType, acc):
if quesType not in self.evalQuesType:
self.evalQuesType[quesType] = {}
self.evalQuesType[quesType][quesId] = round(100*acc, self.n)
def setEvalAnsType(self, quesId, ansType, acc):
if ansType not in self.evalAnsType:
self.evalAnsType[ansType] = {}
self.evalAnsType[ansType][quesId] = round(100*acc, self.n)
def updateProgress(self, progress):
barLength = 20
status = ""
if isinstance(progress, int):
progress = float(progress)
if not isinstance(progress, float):
progress = 0
status = "error: progress var must be float\r\n"
if progress < 0:
progress = 0
status = "Halt...\r\n"
if progress >= 1:
progress = 1
status = "Done...\r\n"
block = int(round(barLength*progress))
text = "\rFinished Percent: [{0}] {1}% {2}".format( "#"*block + "-"*(barLength-block), int(progress*100), status)
sys.stdout.write(text)
sys.stdout.flush()
================================================
FILE: openvqa/datasets/vqa/vqa_loader.py
================================================
# --------------------------------------------------------
# OpenVQA
# Written by Yuhao Cui https://github.com/cuiyuhao1996
# --------------------------------------------------------
import numpy as np
import glob, json, re, en_vectors_web_lg
from openvqa.core.base_dataset import BaseDataSet
from openvqa.utils.ans_punct import prep_ans
class DataSet(BaseDataSet):
def __init__(self, __C):
super(DataSet, self).__init__()
self.__C = __C
# --------------------------
# ---- Raw data loading ----
# --------------------------
# Loading all image paths
frcn_feat_path_list = \
glob.glob(__C.FEATS_PATH[__C.DATASET]['train'] + '/*.npz') + \
glob.glob(__C.FEATS_PATH[__C.DATASET]['val'] + '/*.npz') + \
glob.glob(__C.FEATS_PATH[__C.DATASET]['test'] + '/*.npz')
# Loading question word list
stat_ques_list = \
json.load(open(__C.RAW_PATH[__C.DATASET]['train'], 'r'))['questions'] + \
json.load(open(__C.RAW_PATH[__C.DATASET]['val'], 'r'))['questions'] + \
json.load(open(__C.RAW_PATH[__C.DATASET]['test'], 'r'))['questions'] + \
json.load(open(__C.RAW_PATH[__C.DATASET]['vg'], 'r'))['questions']
# Loading answer word list
# stat_ans_list = \
# json.load(open(__C.RAW_PATH[__C.DATASET]['train-anno'], 'r'))['annotations'] + \
# json.load(open(__C.RAW_PATH[__C.DATASET]['val-anno'], 'r'))['annotations']
# Loading question and answer list
self.ques_list = []
self.ans_list = []
split_list = __C.SPLIT[__C.RUN_MODE].split('+')
for split in split_list:
self.ques_list += json.load(open(__C.RAW_PATH[__C.DATASET][split], 'r'))['questions']
if __C.RUN_MODE in ['train']:
self.ans_list += json.load(open(__C.RAW_PATH[__C.DATASET][split + '-anno'], 'r'))['annotations']
# Define run data size
if __C.RUN_MODE in ['train']:
self.data_size = self.ans_list.__len__()
else:
self.data_size = self.ques_list.__len__()
print(' ========== Dataset size:', self.data_size)
# ------------------------
# ---- Data statistic ----
# ------------------------
# {image id} -> {image feature absolutely path}
self.iid_to_frcn_feat_path = self.img_feat_path_load(frcn_feat_path_list)
# {question id} -> {question}
self.qid_to_ques = self.ques_load(self.ques_list)
# Tokenize
self.token_to_ix, self.pretrained_emb = self.tokenize(stat_ques_list, __C.USE_GLOVE)
self.token_size = self.token_to_ix.__len__()
print(' ========== Question token vocab size:', self.token_size)
# Answers statistic
self.ans_to_ix, self.ix_to_ans = self.ans_stat('openvqa/datasets/vqa/answer_dict.json')
# self.ans_to_ix, self.ix_to_ans = self.ans_stat(stat_ans_list, ans_freq=8)
self.ans_size = self.ans_to_ix.__len__()
print(' ========== Answer token vocab size (occur more than {} times):'.format(8), self.ans_size)
print('Finished!')
print('')
def img_feat_path_load(self, path_list):
iid_to_path = {}
for ix, path in enumerate(path_list):
iid = str(int(path.split('/')[-1].split('_')[-1].split('.')[0]))
# print(iid)
iid_to_path[iid] = path
return iid_to_path
def ques_load(self, ques_list):
qid_to_ques = {}
for ques in ques_list:
qid = str(ques['question_id'])
qid_to_ques[qid] = ques
return qid_to_ques
def tokenize(self, stat_ques_list, use_glove):
token_to_ix = {
'PAD': 0,
'UNK': 1,
'CLS': 2,
}
spacy_tool = None
pretrained_emb = []
if use_glove:
spacy_tool = en_vectors_web_lg.load()
pretrained_emb.append(spacy_tool('PAD').vector)
pretrained_emb.append(spacy_tool('UNK').vector)
pretrained_emb.append(spacy_tool('CLS').vector)
for ques in stat_ques_list:
words = re.sub(
r"([.,'!?\"()*#:;])",
'',
ques['question'].lower()
).replace('-', ' ').replace('/', ' ').split()
for word in words:
if word not in token_to_ix:
token_to_ix[word] = len(token_to_ix)
if use_glove:
pretrained_emb.append(spacy_tool(word).vector)
pretrained_emb = np.array(pretrained_emb)
return token_to_ix, pretrained_emb
# def ans_stat(self, stat_ans_list, ans_freq):
# ans_to_ix = {}
# ix_to_ans = {}
# ans_freq_dict = {}
#
# for ans in stat_ans_list:
# ans_proc = prep_ans(ans['multiple_choice_answer'])
# if ans_proc not in ans_freq_dict:
# ans_freq_dict[ans_proc] = 1
# else:
# ans_freq_dict[ans_proc] += 1
#
# ans_freq_filter = ans_freq_dict.copy()
# for ans in ans_freq_dict:
# if ans_freq_dict[ans] <= ans_freq:
# ans_freq_filter.pop(ans)
#
# for ans in ans_freq_filter:
# ix_to_ans[ans_to_ix.__len__()] = ans
# ans_to_ix[ans] = ans_to_ix.__len__()
#
# return ans_to_ix, ix_to_ans
def ans_stat(self, json_file):
ans_to_ix, ix_to_ans = json.load(open(json_file, 'r'))
return ans_to_ix, ix_to_ans
# ----------------------------------------------
# ---- Real-Time Processing Implementations ----
# ----------------------------------------------
def load_ques_ans(self, idx):
if self.__C.RUN_MODE in ['train']:
ans = self.ans_list[idx]
ques = self.qid_to_ques[str(ans['question_id'])]
iid = str(ans['image_id'])
# Process question
ques_ix_iter = self.proc_ques(ques, self.token_to_ix, max_token=14)
# Process answer
ans_iter = self.proc_ans(ans, self.ans_to_ix)
return ques_ix_iter, ans_iter, iid
else:
ques = self.ques_list[idx]
iid = str(ques['image_id'])
ques_ix_iter = self.proc_ques(ques, self.token_to_ix, max_token=14)
return ques_ix_iter, np.zeros(1), iid
def load_img_feats(self, idx, iid):
frcn_feat = np.load(self.iid_to_frcn_feat_path[iid])
frcn_feat_x = frcn_feat['x'].transpose((1, 0))
frcn_feat_iter = self.proc_img_feat(frcn_feat_x, img_feat_pad_size=self.__C.FEAT_SIZE['vqa']['FRCN_FEAT_SIZE'][0])
bbox_feat_iter = self.proc_img_feat(
self.proc_bbox_feat(
frcn_feat['bbox'],
(frcn_feat['image_h'], frcn_feat['image_w'])
),
img_feat_pad_size=self.__C.FEAT_SIZE['vqa']['BBOX_FEAT_SIZE'][0]
)
grid_feat_iter = np.zeros(1)
return frcn_feat_iter, grid_feat_iter, bbox_feat_iter
# ------------------------------------
# ---- Real-Time Processing Utils ----
# ------------------------------------
def proc_img_feat(self, img_feat, img_feat_pad_size):
if img_feat.shape[0] > img_feat_pad_size:
img_feat = img_feat[:img_feat_pad_size]
img_feat = np.pad(
img_feat,
((0, img_feat_pad_size - img_feat.shape[0]), (0, 0)),
mode='constant',
constant_values=0
)
return img_feat
def proc_bbox_feat(self, bbox, img_shape):
if self.__C.BBOX_NORMALIZE:
bbox_nm = np.zeros((bbox.shape[0], 4), dtype=np.float32)
bbox_nm[:, 0] = bbox[:, 0] / float(img_shape[1])
bbox_nm[:, 1] = bbox[:, 1] / float(img_shape[0])
bbox_nm[:, 2] = bbox[:, 2] / float(img_shape[1])
bbox_nm[:, 3] = bbox[:, 3] / float(img_shape[0])
return bbox_nm
# bbox_feat[:, 4] = (bbox[:, 2] - bbox[:, 0]) * (bbox[:, 3] - bbox[:, 1]) / float(img_shape[0] * img_shape[1])
return bbox
def proc_ques(self, ques, token_to_ix, max_token):
ques_ix = np.zeros(max_token, np.int64)
words = re.sub(
r"([.,'!?\"()*#:;])",
'',
ques['question'].lower()
).replace('-', ' ').replace('/', ' ').split()
for ix, word in enumerate(words):
if word in token_to_ix:
ques_ix[ix] = token_to_ix[word]
else:
ques_ix[ix] = token_to_ix['UNK']
if ix + 1 == max_token:
break
return ques_ix
def get_score(self, occur):
if occur == 0:
return .0
elif occur == 1:
return .3
elif occur == 2:
return .6
elif occur == 3:
return .9
else:
return 1.
def proc_ans(self, ans, ans_to_ix):
ans_score = np.zeros(ans_to_ix.__len__(), np.float32)
ans_prob_dict = {}
for ans_ in ans['answers']:
ans_proc = prep_ans(ans_['answer'])
if ans_proc not in ans_prob_dict:
ans_prob_dict[ans_proc] = 1
else:
ans_prob_dict[ans_proc] += 1
if self.__C.LOSS_FUNC in ['kld']:
for ans_ in ans_prob_dict:
if ans_ in ans_to_ix:
ans_score[ans_to_ix[ans_]] = ans_prob_dict[ans_] / 10.
else:
for ans_ in ans_prob_dict:
if ans_ in ans_to_ix:
ans_score[ans_to_ix[ans_]] = self.get_score(ans_prob_dict[ans_])
return ans_score
================================================
FILE: openvqa/models/ban/adapter.py
================================================
# --------------------------------------------------------
# OpenVQA
# Written by Zhenwei Shao https://github.com/ParadoxZW
# --------------------------------------------------------
import torch.nn as nn
import torch
from openvqa.core.base_dataset import BaseAdapter
from openvqa.utils.make_mask import make_mask
class Adapter(BaseAdapter):
def __init__(self, __C):
super(Adapter, self).__init__(__C)
self.__C = __C
def vqa_init(self, __C):
pass
#self.frcn_linear = nn.Linear(__C.FEAT_SIZE['vqa']['FRCN_FEAT_SIZE'][1], __C.HIDDEN_SIZE)
def gqa_init(self, __C):
imgfeat_linear_size = __C.FEAT_SIZE['gqa']['FRCN_FEAT_SIZE'][1]
if __C.USE_BBOX_FEAT:
self.bbox_linear = nn.Linear(5, __C.BBOXFEAT_EMB_SIZE)
imgfeat_linear_size += __C.BBOXFEAT_EMB_SIZE
self.frcn_linear = nn.Linear(imgfeat_linear_size, __C.HIDDEN_SIZE)
if __C.USE_AUX_FEAT:
self.grid_linear = nn.Linear(
__C.FEAT_SIZE['gqa']['GRID_FEAT_SIZE'][1], __C.HIDDEN_SIZE)
def clevr_init(self, __C):
self.grid_linear = nn.Linear(__C.FEAT_SIZE['clevr']['GRID_FEAT_SIZE'][1], __C.HIDDEN_SIZE)
def vqa_forward(self, feat_dict):
frcn_feat = feat_dict['FRCN_FEAT']
bbox_feat = feat_dict['BBOX_FEAT']
img_feat_mask = make_mask(frcn_feat)
# img_feat = self.frcn_linear(frcn_feat)
return frcn_feat, img_feat_mask
def gqa_forward(self, feat_dict):
frcn_feat = feat_dict['FRCN_FEAT']
bbox_feat = feat_dict['BBOX_FEAT']
grid_feat = feat_dict['GRID_FEAT']
img_feat_mask = make_mask(frcn_feat)
if self.__C.USE_BBOX_FEAT:
bbox_feat = self.bbox_linear(bbox_feat)
frcn_feat = torch.cat((frcn_feat, bbox_feat), dim=-1)
img_feat = self.frcn_linear(frcn_feat)
return img_feat, img_feat_mask
def clevr_forward(self, feat_dict):
grid_feat = feat_dict['GRID_FEAT']
img_feat_mask = make_mask(grid_feat)
img_feat = self.grid_linear(grid_feat)
return img_feat, img_feat_mask
================================================
FILE: openvqa/models/ban/ban.py
================================================
# --------------------------------------------------------
# OpenVQA
# Written by Zhenwei Shao https://github.com/ParadoxZW
# Based on the implementation of paper "Bilinear Attention Neworks", NeurIPS 2018 https://github.com/jnhwkim/ban-vqa)
# --------------------------------------------------------
import torch.nn as nn
import torch.nn.functional as F
from torch.nn.utils.weight_norm import weight_norm
import torch, math
# ------------------------------
# ----- Weight Normal MLP ------
# ------------------------------
class MLP(nn.Module):
"""
Simple class for non-linear fully connect network
"""
def __init__(self, dims, act='ReLU', dropout_r=0.0):
super(MLP, self).__init__()
layers = []
for i in range(len(dims) - 1):
in_dim = dims[i]
out_dim = dims[i + 1]
if dropout_r > 0:
layers.append(nn.Dropout(dropout_r))
layers.append(weight_norm(nn.Linear(in_dim, out_dim), dim=None))
if act != '':
layers.append(getattr(nn, act)())
self.mlp = nn.Sequential(*layers)
def forward(self, x):
return self.mlp(x)
# ------------------------------
# ------ Bilinear Connect ------
# ------------------------------
class BC(nn.Module):
"""
Simple class for non-linear bilinear connect network
"""
def __init__(self, __C, atten=False):
super(BC, self).__init__()
self.__C = __C
self.v_net = MLP([__C.IMG_FEAT_SIZE,
__C.BA_HIDDEN_SIZE], dropout_r=__C.DROPOUT_R)
self.q_net = MLP([__C.HIDDEN_SIZE,
__C.BA_HIDDEN_SIZE], dropout_r=__C.DROPOUT_R)
if not atten:
self.p_net = nn.AvgPool1d(__C.K_TIMES, stride=__C.K_TIMES)
else:
self.dropout = nn.Dropout(__C.CLASSIFER_DROPOUT_R) # attention
self.h_mat = nn.Parameter(torch.Tensor(
1, __C.GLIMPSE, 1, __C.BA_HIDDEN_SIZE).normal_())
self.h_bias = nn.Parameter(
torch.Tensor(1, __C.GLIMPSE, 1, 1).normal_())
def forward(self, v, q):
# low-rank bilinear pooling using einsum
v_ = self.dropout(self.v_net(v))
q_ = self.q_net(q)
logits = torch.einsum('xhyk,bvk,bqk->bhvq',
(self.h_mat, v_, q_)) + self.h_bias
return logits # b x h_out x v x q
def forward_with_weights(self, v, q, w):
v_ = self.v_net(v) # b x v x d
q_ = self.q_net(q) # b x q x d
logits = torch.einsum('bvk,bvq,bqk->bk', (v_, w, q_))
logits = logits.unsqueeze(1) # b x 1 x d
logits = self.p_net(logits).squeeze(1) * self.__C.K_TIMES # sum-pooling
return logits
# ------------------------------
# -------- BiAttention ---------
# ------------------------------
class BiAttention(nn.Module):
def __init__(self, __C):
super(BiAttention, self).__init__()
self.__C = __C
self.logits = weight_norm(BC(__C, True), name='h_mat', dim=None)
def forward(self, v, q, v_mask=True, logit=False, mask_with=-float('inf')):
v_num = v.size(1)
q_num = q.size(1)
logits = self.logits(v, q) # b x g x v x q
if v_mask:
mask = (0 == v.abs().sum(2)).unsqueeze(
1).unsqueeze(3).expand(logits.size())
logits.data.masked_fill_(mask.data, mask_with)
if not logit:
p = nn.functional.softmax(
logits.view(-1, self.__C.GLIMPSE, v_num * q_num), 2)
return p.view(-1, self.__C.GLIMPSE, v_num, q_num), logits
return logits
# ------------------------------
# - Bilinear Attention Network -
# ------------------------------
class BAN(nn.Module):
def __init__(self, __C):
super(BAN, self).__init__()
self.__C = __C
self.BiAtt = BiAttention(__C)
b_net = []
q_prj = []
c_prj = []
for i in range(__C.GLIMPSE):
b_net.append(BC(__C))
q_prj.append(MLP([__C.HIDDEN_SIZE, __C.HIDDEN_SIZE], '', __C.DROPOUT_R))
self.b_net = nn.ModuleList(b_net)
self.q_prj = nn.ModuleList(q_prj)
def forward(self, q, v):
att, logits = self.BiAtt(v, q) # b x g x v x q
for g in range(self.__C.GLIMPSE):
bi_emb = self.b_net[g].forward_with_weights(
v, q, att[:, g, :, :]) # b x l x h
q = self.q_prj[g](bi_emb.unsqueeze(1)) + q
return q
================================================
FILE: openvqa/models/ban/model_cfgs.py
================================================
# --------------------------------------------------------
# OpenVQA
# Written by Zhenwei Shao https://github.com/ParadoxZW
# --------------------------------------------------------
from openvqa.core.base_cfgs import BaseCfgs
class Cfgs(BaseCfgs):
def __init__(self):
super(Cfgs, self).__init__()
self.IMG_FEAT_SIZE = 2048
self.GLIMPSE = 8
self.HIDDEN_SIZE = 1024
self.K_TIMES = 3
self.BA_HIDDEN_SIZE = self.K_TIMES * self.HIDDEN_SIZE
self.DROPOUT_R = 0.2
self.CLASSIFER_DROPOUT_R = 0.5
self.FLAT_OUT_SIZE = 2048
================================================
FILE: openvqa/models/ban/net.py
================================================
# --------------------------------------------------------
# OpenVQA
# Written by Zhenwei Shao https://github.com/ParadoxZW
# --------------------------------------------------------
from openvqa.utils.make_mask import make_mask
from openvqa.ops.fc import FC, MLP
from openvqa.ops.layer_norm import LayerNorm
from openvqa.models.ban.ban import BAN
from openvqa.models.ban.adapter import Adapter
import torch.nn as nn
import torch.nn.functional as F
from torch.nn.utils.weight_norm import weight_norm
import torch
# -------------------------
# ---- Main BAN Model ----
# -------------------------
class Net(nn.Module):
def __init__(self, __C, pretrained_emb, token_size, answer_size):
super(Net, self).__init__()
self.__C = __C
self.embedding = nn.Embedding(
num_embeddings=token_size,
embedding_dim=__C.WORD_EMBED_SIZE
)
# Loading the GloVe embedding weights
if __C.USE_GLOVE:
self.embedding.weight.data.copy_(torch.from_numpy(pretrained_emb))
self.rnn = nn.GRU(
input_size=__C.WORD_EMBED_SIZE,
hidden_size=__C.HIDDEN_SIZE,
num_layers=1,
batch_first=True
)
self.adapter = Adapter(__C)
self.backbone = BAN(__C)
# Classification layers
layers = [
weight_norm(nn.Linear(__C.HIDDEN_SIZE, __C.FLAT_OUT_SIZE), dim=None),
nn.ReLU(),
nn.Dropout(__C.CLASSIFER_DROPOUT_R, inplace=True),
weight_norm(nn.Linear(__C.FLAT_OUT_SIZE, answer_size), dim=None)
]
self.classifer = nn.Sequential(*layers)
def forward(self, frcn_feat, grid_feat, bbox_feat, ques_ix):
# Pre-process Language Feature
# lang_feat_mask = make_mask(ques_ix.unsqueeze(2))
lang_feat = self.embedding(ques_ix)
lang_feat, _ = self.rnn(lang_feat)
img_feat, _ = self.adapter(frcn_feat, grid_feat, bbox_feat)
# Backbone Framework
lang_feat = self.backbone(
lang_feat,
img_feat
)
# Classification layers
proj_feat = self.classifer(lang_feat.sum(1))
return proj_feat
================================================
FILE: openvqa/models/butd/adapter.py
================================================
# --------------------------------------------------------
# OpenVQA
# Written by Zhenwei Shao https://github.com/ParadoxZW
# --------------------------------------------------------
import torch.nn as nn
import torch
from openvqa.core.base_dataset import BaseAdapter
from openvqa.utils.make_mask import make_mask
class Adapter(BaseAdapter):
def __init__(self, __C):
super(Adapter, self).__init__(__C)
self.__C = __C
def vqa_init(self, __C):
pass
#self.frcn_linear = nn.Linear(__C.FEAT_SIZE['vqa']['FRCN_FEAT_SIZE'][1], __C.HIDDEN_SIZE)
def gqa_init(self, __C):
imgfeat_linear_size = __C.FEAT_SIZE['gqa']['FRCN_FEAT_SIZE'][1]
if __C.USE_BBOX_FEAT:
self.bbox_linear = nn.Linear(5, __C.BBOXFEAT_EMB_SIZE)
imgfeat_linear_size += __C.BBOXFEAT_EMB_SIZE
self.frcn_linear = nn.Linear(imgfeat_linear_size, __C.HIDDEN_SIZE)
if __C.USE_AUX_FEAT:
self.grid_linear = nn.Linear(
__C.FEAT_SIZE['gqa']['GRID_FEAT_SIZE'][1], __C.HIDDEN_SIZE)
def clevr_init(self, __C):
self.grid_linear = nn.Linear(
__C.FEAT_SIZE['clevr']['GRID_FEAT_SIZE'][1], __C.HIDDEN_SIZE)
def vqa_forward(self, feat_dict):
frcn_feat = feat_dict['FRCN_FEAT']
bbox_feat = feat_dict['BBOX_FEAT']
img_feat_mask = make_mask(frcn_feat)
# img_feat = self.frcn_linear(frcn_feat)
return frcn_feat, img_feat_mask
def gqa_forward(self, feat_dict):
frcn_feat = feat_dict['FRCN_FEAT']
bbox_feat = feat_dict['BBOX_FEAT']
grid_feat = feat_dict['GRID_FEAT']
img_feat_mask = make_mask(frcn_feat)
if self.__C.USE_BBOX_FEAT:
bbox_feat = self.bbox_linear(bbox_feat)
frcn_feat = torch.cat((frcn_feat, bbox_feat), dim=-1)
img_feat = self.frcn_linear(frcn_feat)
return img_feat, img_feat_mask
def clevr_forward(self, feat_dict):
grid_feat = feat_dict['GRID_FEAT']
img_feat_mask = make_mask(grid_feat)
img_feat = self.grid_linear(grid_feat)
return img_feat, img_feat_mask
================================================
FILE: openvqa/models/butd/model_cfgs.py
================================================
# --------------------------------------------------------
# OpenVQA
# Written by Zhenwei Shao https://github.com/ParadoxZW
# --------------------------------------------------------
from openvqa.core.base_cfgs import BaseCfgs
class Cfgs(BaseCfgs):
def __init__(self):
super(Cfgs, self).__init__()
self.IMG_FEAT_SIZE = 2048
self.HIDDEN_SIZE = 512
self.DROPOUT_R = 0.2
self.CLASSIFER_DROPOUT_R = 0.5
self.FLAT_OUT_SIZE = 1024
================================================
FILE: openvqa/models/butd/net.py
================================================
# --------------------------------------------------------
# OpenVQA
# Written by Zhenwei Shao https://github.com/ParadoxZW
# --------------------------------------------------------
from openvqa.utils.make_mask import make_mask
from openvqa.models.butd.tda import TDA
from openvqa.models.butd.adapter import Adapter
import torch.nn as nn
import torch.nn.functional as F
from torch.nn.utils.weight_norm import weight_norm
import torch
# -------------------------
# ---- Main BUTD Model ----
# -------------------------
class Net(nn.Module):
def __init__(self, __C, pretrained_emb, token_size, answer_size):
super(Net, self).__init__()
self.__C = __C
self.embedding = nn.Embedding(
num_embeddings=token_size,
embedding_dim=__C.WORD_EMBED_SIZE
)
# Loading the GloVe embedding weights
if __C.USE_GLOVE:
self.embedding.weight.data.copy_(torch.from_numpy(pretrained_emb))
self.rnn = nn.LSTM(
input_size=__C.WORD_EMBED_SIZE,
hidden_size=__C.HIDDEN_SIZE,
num_layers=1,
batch_first=True
)
self.adapter = Adapter(__C)
self.backbone = TDA(__C)
# Classification layers
layers = [
weight_norm(nn.Linear(__C.HIDDEN_SIZE,
__C.FLAT_OUT_SIZE), dim=None),
nn.ReLU(),
nn.Dropout(__C.CLASSIFER_DROPOUT_R, inplace=True),
weight_norm(nn.Linear(__C.FLAT_OUT_SIZE, answer_size), dim=None)
]
self.classifer = nn.Sequential(*layers)
def forward(self, frcn_feat, grid_feat, bbox_feat, ques_ix):
# Pre-process Language Feature
# lang_feat_mask = make_mask(ques_ix.unsqueeze(2))
lang_feat = self.embedding(ques_ix)
lang_feat, _ = self.rnn(lang_feat)
img_feat, _ = self.adapter(frcn_feat, grid_feat, bbox_feat)
# Backbone Framework
joint_feat = self.backbone(
lang_feat[:, -1],
img_feat
)
# Classification layers
proj_feat = self.classifer(joint_feat)
return proj_feat
================================================
FILE: openvqa/models/butd/tda.py
================================================
# --------------------------------------------------------
# OpenVQA
# Written by Zhenwei Shao https://github.com/ParadoxZW
# based on the implementation in https://github.com/hengyuan-hu/bottom-up-attention-vqa
# ELU is chosen as the activation function in non-linear layers due to
# the experiment results that indicate ELU is better than ReLU in BUTD model.
# --------------------------------------------------------
import torch.nn as nn
import torch.nn.functional as F
from torch.nn.utils.weight_norm import weight_norm
import torch
import math
# ------------------------------
# ----- Weight Normal MLP ------
# ------------------------------
class MLP(nn.Module):
"""
class for non-linear fully connect network
"""
def __init__(self, dims, act='ELU', dropout_r=0.0):
super(MLP, self).__init__()
layers = []
for i in range(len(dims) - 1):
in_dim = dims[i]
out_dim = dims[i + 1]
if dropout_r > 0:
layers.append(nn.Dropout(dropout_r))
layers.append(weight_norm(nn.Linear(in_dim, out_dim), dim=None))
if act != '':
layers.append(getattr(nn, act)())
self.mlp = nn.Sequential(*layers)
def forward(self, x):
return self.mlp(x)
# ------------------------------
# ---Top Down Attention Map ----
# ------------------------------
class AttnMap(nn.Module):
'''
implementation of top down attention
'''
def __init__(self, __C):
super(AttnMap, self).__init__()
self.__C = __C
self.linear_q = weight_norm(
nn.Linear(__C.HIDDEN_SIZE, __C.HIDDEN_SIZE), dim=None)
self.linear_v = weight_norm(
nn.Linear(__C.IMG_FEAT_SIZE, __C.IMG_FEAT_SIZE), dim=None)
self.nonlinear = MLP(
[__C.IMG_FEAT_SIZE + __C.HIDDEN_SIZE, __C.HIDDEN_SIZE], dropout_r=__C.DROPOUT_R)
self.linear = weight_norm(nn.Linear(__C.HIDDEN_SIZE, 1), dim=None)
def forward(self, q, v):
v = self.linear_v(v)
q = self.linear_q(q)
logits = self.logits(q, v)
w = nn.functional.softmax(logits, 1)
return w
def logits(self, q, v):
num_objs = v.size(1)
q = q.unsqueeze(1).repeat(1, num_objs, 1)
vq = torch.cat((v, q), 2)
joint_repr = self.nonlinear(vq)
logits = self.linear(joint_repr)
return logits
# ------------------------------
# ---- Attended Joint Map ------
# ------------------------------
class TDA(nn.Module):
def __init__(self, __C):
super(TDA, self).__init__()
self.__C = __C
self.v_att = AttnMap(__C)
self.q_net = MLP([__C.HIDDEN_SIZE, __C.HIDDEN_SIZE])
self.v_net = MLP([__C.IMG_FEAT_SIZE, __C.HIDDEN_SIZE])
def forward(self, q, v):
att = self.v_att(q, v)
atted_v = (att * v).sum(1)
q_repr = self.q_net(q)
v_repr = self.v_net(atted_v)
joint_repr = q_repr * v_repr
return joint_repr
================================================
FILE: openvqa/models/mcan/adapter.py
================================================
# --------------------------------------------------------
# OpenVQA
# Written by Yuhao Cui https://github.com/cuiyuhao1996
# --------------------------------------------------------
import torch.nn as nn
import torch
from openvqa.core.base_dataset import BaseAdapter
from openvqa.utils.make_mask import make_mask
class Adapter(BaseAdapter):
def __init__(self, __C):
super(Adapter, self).__init__(__C)
self.__C = __C
def bbox_proc(self, bbox):
area = (bbox[:, :, 2] - bbox[:, :, 0]) * (bbox[:, :, 3] - bbox[:, :, 1])
return torch.cat((bbox, area.unsqueeze(2)), -1)
def vqa_init(self, __C):
imgfeat_linear_size = __C.FEAT_SIZE['vqa']['FRCN_FEAT_SIZE'][1]
if __C.USE_BBOX_FEAT:
self.bbox_linear = nn.Linear(5, __C.BBOXFEAT_EMB_SIZE)
imgfeat_linear_size += __C.BBOXFEAT_EMB_SIZE
self.frcn_linear = nn.Linear(imgfeat_linear_size, __C.HIDDEN_SIZE)
def gqa_init(self, __C):
imgfeat_linear_size = __C.FEAT_SIZE['gqa']['FRCN_FEAT_SIZE'][1]
if __C.USE_BBOX_FEAT:
self.bbox_linear = nn.Linear(5, __C.BBOXFEAT_EMB_SIZE)
imgfeat_linear_size += __C.BBOXFEAT_EMB_SIZE
self.frcn_linear = nn.Linear(imgfeat_linear_size, __C.HIDDEN_SIZE)
if __C.USE_AUX_FEAT:
self.grid_linear = nn.Linear(__C.FEAT_SIZE['gqa']['GRID_FEAT_SIZE'][1], __C.HIDDEN_SIZE)
def clevr_init(self, __C):
self.grid_linear = nn.Linear(__C.FEAT_SIZE['clevr']['GRID_FEAT_SIZE'][1], __C.HIDDEN_SIZE)
def vqa_forward(self, feat_dict):
frcn_feat = feat_dict['FRCN_FEAT']
bbox_feat = feat_dict['BBOX_FEAT']
img_feat_mask = make_mask(frcn_feat)
if self.__C.USE_BBOX_FEAT:
bbox_feat = self.bbox_proc(bbox_feat)
bbox_feat = self.bbox_linear(bbox_feat)
frcn_feat = torch.cat((frcn_feat, bbox_feat), dim=-1)
img_feat = self.frcn_linear(frcn_feat)
return img_feat, img_feat_mask
def gqa_forward(self, feat_dict):
frcn_feat = feat_dict['FRCN_FEAT']
bbox_feat = feat_dict['BBOX_FEAT']
grid_feat = feat_dict['GRID_FEAT']
img_feat_mask = make_mask(frcn_feat)
if self.__C.USE_BBOX_FEAT:
bbox_feat = self.bbox_proc(bbox_feat)
bbox_feat = self.bbox_linear(bbox_feat)
frcn_feat = torch.cat((frcn_feat, bbox_feat), dim=-1)
img_feat = self.frcn_linear(frcn_feat)
if self.__C.USE_AUX_FEAT:
grid_feat_mask = make_mask(grid_feat)
img_feat_mask = torch.cat((img_feat_mask, grid_feat_mask), dim=-1)
grid_feat = self.grid_linear(grid_feat)
img_feat = torch.cat((img_feat, grid_feat), dim=1)
return img_feat, img_feat_mask
def clevr_forward(self, feat_dict):
grid_feat = feat_dict['GRID_FEAT']
img_feat_mask = make_mask(grid_feat)
img_feat = self.grid_linear(grid_feat)
return img_feat, img_feat_mask
================================================
FILE: openvqa/models/mcan/mca.py
================================================
# --------------------------------------------------------
# OpenVQA
# Written by Yuhao Cui https://github.com/cuiyuhao1996
# --------------------------------------------------------
from openvqa.ops.fc import FC, MLP
from openvqa.ops.layer_norm import LayerNorm
import torch.nn as nn
import torch.nn.functional as F
import torch
import math
# ------------------------------
# ---- Multi-Head Attention ----
# ------------------------------
class MHAtt(nn.Module):
def __init__(self, __C):
super(MHAtt, self).__init__()
self.__C = __C
self.linear_v = nn.Linear(__C.HIDDEN_SIZE, __C.HIDDEN_SIZE)
self.linear_k = nn.Linear(__C.HIDDEN_SIZE, __C.HIDDEN_SIZE)
self.linear_q = nn.Linear(__C.HIDDEN_SIZE, __C.HIDDEN_SIZE)
self.linear_merge = nn.Linear(__C.HIDDEN_SIZE, __C.HIDDEN_SIZE)
self.dropout = nn.Dropout(__C.DROPOUT_R)
def forward(self, v, k, q, mask):
n_batches = q.size(0)
v = self.linear_v(v).view(
n_batches,
-1,
self.__C.MULTI_HEAD,
int(self.__C.HIDDEN_SIZE / self.__C.MULTI_HEAD)
).transpose(1, 2)
k = self.linear_k(k).view(
n_batches,
-1,
self.__C.MULTI_HEAD,
int(self.__C.HIDDEN_SIZE / self.__C.MULTI_HEAD)
).transpose(1, 2)
q = self.linear_q(q).view(
n_batches,
-1,
self.__C.MULTI_HEAD,
int(self.__C.HIDDEN_SIZE / self.__C.MULTI_HEAD)
).transpose(1, 2)
atted = self.att(v, k, q, mask)
atted = atted.transpose(1, 2).contiguous().view(
n_batches,
-1,
self.__C.HIDDEN_SIZE
)
atted = self.linear_merge(atted)
return atted
def att(self, value, key, query, mask):
d_k = query.size(-1)
scores = torch.matmul(
query, key.transpose(-2, -1)
) / math.sqrt(d_k)
if mask is not None:
scores = scores.masked_fill(mask, -1e9)
att_map = F.softmax(scores, dim=-1)
att_map = self.dropout(att_map)
return torch.matmul(att_map, value)
# ---------------------------
# ---- Feed Forward Nets ----
# ---------------------------
class FFN(nn.Module):
def __init__(self, __C):
super(FFN, self).__init__()
self.mlp = MLP(
in_size=__C.HIDDEN_SIZE,
mid_size=__C.FF_SIZE,
out_size=__C.HIDDEN_SIZE,
dropout_r=__C.DROPOUT_R,
use_relu=True
)
def forward(self, x):
return self.mlp(x)
# ------------------------
# ---- Self Attention ----
# ------------------------
class SA(nn.Module):
def __init__(self, __C):
super(SA, self).__init__()
self.mhatt = MHAtt(__C)
self.ffn = FFN(__C)
self.dropout1 = nn.Dropout(__C.DROPOUT_R)
self.norm1 = LayerNorm(__C.HIDDEN_SIZE)
self.dropout2 = nn.Dropout(__C.DROPOUT_R)
self.norm2 = LayerNorm(__C.HIDDEN_SIZE)
def forward(self, y, y_mask):
y = self.norm1(y + self.dropout1(
self.mhatt(y, y, y, y_mask)
))
y = self.norm2(y + self.dropout2(
self.ffn(y)
))
return y
# -------------------------------
# ---- Self Guided Attention ----
# -------------------------------
class SGA(nn.Module):
def __init__(self, __C):
super(SGA, self).__init__()
self.mhatt1 = MHAtt(__C)
self.mhatt2 = MHAtt(__C)
self.ffn = FFN(__C)
self.dropout1 = nn.Dropout(__C.DROPOUT_R)
self.norm1 = LayerNorm(__C.HIDDEN_SIZE)
self.dropout2 = nn.Dropout(__C.DROPOUT_R)
self.norm2 = LayerNorm(__C.HIDDEN_SIZE)
self.dropout3 = nn.Dropout(__C.DROPOUT_R)
self.norm3 = LayerNorm(__C.HIDDEN_SIZE)
def forward(self, x, y, x_mask, y_mask):
x = self.norm1(x + self.dropout1(
self.mhatt1(v=x, k=x, q=x, mask=x_mask)
))
x = self.norm2(x + self.dropout2(
self.mhatt2(v=y, k=y, q=x, mask=y_mask)
))
x = self.norm3(x + self.dropout3(
self.ffn(x)
))
return x
# ------------------------------------------------
# ---- MAC Layers Cascaded by Encoder-Decoder ----
# ------------------------------------------------
class MCA_ED(nn.Module):
def __init__(self, __C):
super(MCA_ED, self).__init__()
self.enc_list = nn.ModuleList([SA(__C) for _ in range(__C.LAYER)])
self.dec_list = nn.ModuleList([SGA(__C) for _ in range(__C.LAYER)])
def forward(self, y, x, y_mask, x_mask):
# Get encoder last hidden vector
for enc in self.enc_list:
y = enc(y, y_mask)
# Input encoder last hidden vector
# And obtain decoder last hidden vectors
for dec in self.dec_list:
x = dec(x, y, x_mask, y_mask)
return y, x
================================================
FILE: openvqa/models/mcan/model_cfgs.py
================================================
# --------------------------------------------------------
# OpenVQA
# Written by Yuhao Cui https://github.com/cuiyuhao1996
# --------------------------------------------------------
from openvqa.core.base_cfgs import BaseCfgs
class Cfgs(BaseCfgs):
def __init__(self):
super(Cfgs, self).__init__()
self.LAYER = 6
self.HIDDEN_SIZE = 512
self.BBOXFEAT_EMB_SIZE = 2048
self.FF_SIZE = 2048
self.MULTI_HEAD = 8
self.DROPOUT_R = 0.1
self.FLAT_MLP_SIZE = 512
self.FLAT_GLIMPSES = 1
self.FLAT_OUT_SIZE = 1024
self.USE_AUX_FEAT = False
self.USE_BBOX_FEAT = False
self.BBOX_NORMALIZE = True
================================================
FILE: openvqa/models/mcan/net.py
================================================
# --------------------------------------------------------
# OpenVQA
# Written by Yuhao Cui https://github.com/cuiyuhao1996
# --------------------------------------------------------
from openvqa.utils.make_mask import make_mask
from openvqa.ops.fc import FC, MLP
from openvqa.ops.layer_norm import LayerNorm
from openvqa.models.mcan.mca import MCA_ED
from openvqa.models.mcan.adapter import Adapter
import torch.nn as nn
import torch.nn.functional as F
import torch
# ------------------------------
# ---- Flatten the sequence ----
# ------------------------------
class AttFlat(nn.Module):
def __init__(self, __C):
super(AttFlat, self).__init__()
self.__C = __C
self.mlp = MLP(
in_size=__C.HIDDEN_SIZE,
mid_size=__C.FLAT_MLP_SIZE,
out_size=__C.FLAT_GLIMPSES,
dropout_r=__C.DROPOUT_R,
use_relu=True
)
self.linear_merge = nn.Linear(
__C.HIDDEN_SIZE * __C.FLAT_GLIMPSES,
__C.FLAT_OUT_SIZE
)
def forward(self, x, x_mask):
att = self.mlp(x)
att = att.masked_fill(
x_mask.squeeze(1).squeeze(1).unsqueeze(2),
-1e9
)
att = F.softmax(att, dim=1)
att_list = []
for i in range(self.__C.FLAT_GLIMPSES):
att_list.append(
torch.sum(att[:, :, i: i + 1] * x, dim=1)
)
x_atted = torch.cat(att_list, dim=1)
x_atted = self.linear_merge(x_atted)
return x_atted
# -------------------------
# ---- Main MCAN Model ----
# -------------------------
class Net(nn.Module):
def __init__(self, __C, pretrained_emb, token_size, answer_size):
super(Net, self).__init__()
self.__C = __C
self.embedding = nn.Embedding(
num_embeddings=token_size,
embedding_dim=__C.WORD_EMBED_SIZE
)
# Loading the GloVe embedding weights
if __C.USE_GLOVE:
self.embedding.weight.data.copy_(torch.from_numpy(pretrained_emb))
self.lstm = nn.LSTM(
input_size=__C.WORD_EMBED_SIZE,
hidden_size=__C.HIDDEN_SIZE,
num_layers=1,
batch_first=True
)
self.adapter = Adapter(__C)
self.backbone = MCA_ED(__C)
# Flatten to vector
self.attflat_img = AttFlat(__C)
self.attflat_lang = AttFlat(__C)
# Classification layers
self.proj_norm = LayerNorm(__C.FLAT_OUT_SIZE)
self.proj = nn.Linear(__C.FLAT_OUT_SIZE, answer_size)
def forward(self, frcn_feat, grid_feat, bbox_feat, ques_ix):
# Pre-process Language Feature
lang_feat_mask = make_mask(ques_ix.unsqueeze(2))
lang_feat = self.embedding(ques_ix)
lang_feat, _ = self.lstm(lang_feat)
img_feat, img_feat_mask = self.adapter(frcn_feat, grid_feat, bbox_feat)
# Backbone Framework
lang_feat, img_feat = self.backbone(
lang_feat,
img_feat,
lang_feat_mask,
img_feat_mask
)
# Flatten to vector
lang_feat = self.attflat_lang(
lang_feat,
lang_feat_mask
)
img_feat = self.attflat_img(
img_feat,
img_feat_mask
)
# Classification layers
proj_feat = lang_feat + img_feat
proj_feat = self.proj_norm(proj_feat)
proj_feat = self.proj(proj_feat)
return proj_feat
================================================
FILE: openvqa/models/mfb/adapter.py
================================================
# --------------------------------------------------------
# OpenVQA
# Written by Pengbing Gao https://github.com/nbgao
# --------------------------------------------------------
import torch.nn as nn
import torch
import torch.nn.functional as F
from openvqa.core.base_dataset import BaseAdapter
from openvqa.utils.make_mask import make_mask
class Adapter(BaseAdapter):
def __init__(self, __C):
super(Adapter, self).__init__(__C)
self.__C = __C
def vqa_init(self, __C):
self.frcn_linear = nn.Linear(__C.FEAT_SIZE['vqa']['FRCN_FEAT_SIZE'][1], __C.HIDDEN_SIZE)
def gqa_init(self, __C):
self.bbox_linear = nn.Linear(5, __C.BBOXFEAT_EMB_SIZE)
self.frcn_linear = nn.Linear(
__C.FEAT_SIZE['gqa']['FRCN_FEAT_SIZE'][1] + __C.BBOXFEAT_EMB_SIZE,
__C.HIDDEN_SIZE
)
self.grid_linear = nn.Linear(__C.FEAT_SIZE['gqa']['GRID_FEAT_SIZE'][1], __C.HIDDEN_SIZE)
def clevr_init(self, __C):
self.grid_linear = nn.Linear(__C.FEAT_SIZE['clevr']['GRID_FEAT_SIZE'][1], __C.HIDDEN_SIZE)
def vqa_forward(self, feat_dict):
frcn_feat = feat_dict['FRCN_FEAT']
bbox_feat = feat_dict['BBOX_FEAT']
img_feat_mask = make_mask(frcn_feat)
img_feat = frcn_feat
#[N, C, W] = img_feat.shape
#img_feat = F.normalize(img_feat.view(N, -1)).view(N, C, W)
return img_feat, img_feat_mask
def gqa_forward(self, feat_dict):
frcn_feat = feat_dict['FRCN_FEAT']
bbox_feat = feat_dict['BBOX_FEAT']
grid_feat = feat_dict['GRID_FEAT']
img_feat_mask = torch.cat((make_mask(frcn_feat), make_mask(grid_feat)), dim=-1)
bbox_feat = self.bbox_linear(bbox_feat)
frcn_feat = torch.cat((frcn_feat, bbox_feat), dim=-1)
frcn_feat = self.frcn_linear(frcn_feat)
grid_feat = self.grid_linear(grid_feat)
img_feat = torch.cat((frcn_feat, grid_feat), dim=1)
return img_feat, img_feat_mask
def clevr_forward(self, feat_dict):
grid_feat = feat_dict['GRID_FEAT']
img_feat_mask = make_mask(grid_feat)
img_feat = self.grid_linear(grid_feat)
return img_feat, img_feat_mask
================================================
FILE: openvqa/models/mfb/mfb.py
================================================
# --------------------------------------------------------
# OpenVQA
# Licensed under The MIT License [see LICENSE for details]
# Written by Pengbing Gao https://github.com/nbgao
# --------------------------------------------------------
from openvqa.ops.fc import MLP
import torch
import torch.nn as nn
import torch.nn.functional as F
# -------------------------------------------------------------
# ---- Multi-Model Hign-order Bilinear Pooling Co-Attention----
# -------------------------------------------------------------
class MFB(nn.Module):
def __init__(self, __C, img_feat_size, ques_feat_size, is_first):
super(MFB, self).__init__()
self.__C = __C
self.is_first = is_first
self.proj_i = nn.Linear(img_feat_size, __C.MFB_K * __C.MFB_O)
self.proj_q = nn.Linear(ques_feat_size, __C.MFB_K * __C.MFB_O)
self.dropout = nn.Dropout(__C.DROPOUT_R)
self.pool = nn.AvgPool1d(__C.MFB_K, stride=__C.MFB_K)
def forward(self, img_feat, ques_feat, exp_in=1):
'''
img_feat.size() -> (N, C, img_feat_size) C = 1 or 100
ques_feat.size() -> (N, 1, ques_feat_size)
z.size() -> (N, C, MFB_O)
exp_out.size() -> (N, C, K*O)
'''
batch_size = img_feat.shape[0]
img_feat = self.proj_i(img_feat) # (N, C, K*O)
ques_feat = self.proj_q(ques_feat) # (N, 1, K*O)
exp_out = img_feat * ques_feat # (N, C, K*O)
exp_out = self.dropout(exp_out) if self.is_first else self.dropout(exp_out * exp_in) # (N, C, K*O)
z = self.pool(exp_out) * self.__C.MFB_K # (N, C, O)
z = torch.sqrt(F.relu(z)) - torch.sqrt(F.relu(-z))
z = F.normalize(z.view(batch_size, -1)) # (N, C*O)
z = z.view(batch_size, -1, self.__C.MFB_O) # (N, C, O)
return z, exp_out
class QAtt(nn.Module):
def __init__(self, __C):
super(QAtt, self).__init__()
self.__C = __C
self.mlp = MLP(
in_size=__C.LSTM_OUT_SIZE,
mid_size=__C.HIDDEN_SIZE,
out_size=__C.Q_GLIMPSES,
dropout_r=__C.DROPOUT_R,
use_relu=True
)
def forward(self, ques_feat):
'''
ques_feat.size() -> (N, T, LSTM_OUT_SIZE)
qatt_feat.size() -> (N, LSTM_OUT_SIZE * Q_GLIMPSES)
'''
qatt_maps = self.mlp(ques_feat) # (N, T, Q_GLIMPSES)
qatt_maps = F.softmax(qatt_maps, dim=1) # (N, T, Q_GLIMPSES)
qatt_feat_list = []
for i in range(self.__C.Q_GLIMPSES):
mask = qatt_maps[:, :, i:i + 1] # (N, T, 1)
mask = mask * ques_feat # (N, T, LSTM_OUT_SIZE)
mask = torch.sum(mask, dim=1) # (N, LSTM_OUT_SIZE)
qatt_feat_list.append(mask)
qatt_feat = torch.cat(qatt_feat_list, dim=1) # (N, LSTM_OUT_SIZE*Q_GLIMPSES)
return qatt_feat
class IAtt(nn.Module):
def __init__(self, __C, img_feat_size, ques_att_feat_size):
super(IAtt, self).__init__()
self.__C = __C
self.dropout = nn.Dropout(__C.DROPOUT_R)
self.mfb = MFB(__C, img_feat_size, ques_att_feat_size, True)
self.mlp = MLP(
in_size=__C.MFB_O,
mid_size=__C.HIDDEN_SIZE,
out_size=__C.I_GLIMPSES,
dropout_r=__C.DROPOUT_R,
use_relu=True
)
def forward(self, img_feat, ques_att_feat):
'''
img_feats.size() -> (N, C, FRCN_FEAT_SIZE)
ques_att_feat.size() -> (N, LSTM_OUT_SIZE * Q_GLIMPSES)
iatt_feat.size() -> (N, MFB_O * I_GLIMPSES)
'''
ques_att_feat = ques_att_feat.unsqueeze(1) # (N, 1, LSTM_OUT_SIZE * Q_GLIMPSES)
img_feat = self.dropout(img_feat)
z, _ = self.mfb(img_feat, ques_att_feat) # (N, C, O)
iatt_maps = self.mlp(z) # (N, C, I_GLIMPSES)
iatt_maps = F.softmax(iatt_maps, dim=1) # (N, C, I_GLIMPSES)
iatt_feat_list = []
for i in range(self.__C.I_GLIMPSES):
mask = iatt_maps[:, :, i:i + 1] # (N, C, 1)
mask = mask * img_feat # (N, C, FRCN_FEAT_SIZE)
mask = torch.sum(mask, dim=1) # (N, FRCN_FEAT_SIZE)
iatt_feat_list.append(mask)
iatt_feat = torch.cat(iatt_feat_list, dim=1) # (N, FRCN_FEAT_SIZE*I_GLIMPSES)
return iatt_feat
class CoAtt(nn.Module):
def __init__(self, __C):
super(CoAtt, self).__init__()
self.__C = __C
img_feat_size = __C.FEAT_SIZE[__C.DATASET]['FRCN_FEAT_SIZE'][1]
img_att_feat_size = img_feat_size * __C.I_GLIMPSES
ques_att_feat_size = __C.LSTM_OUT_SIZE * __C.Q_GLIMPSES
self.q_att = QAtt(__C)
self.i_att = IAtt(__C, img_feat_size, ques_att_feat_size)
if self.__C.HIGH_ORDER: # MFH
self.mfh1 = MFB(__C, img_att_feat_size, ques_att_feat_size, True)
self.mfh2 = MFB(__C, img_att_feat_size, ques_att_feat_size, False)
else: # MFB
self.mfb = MFB(__C, img_att_feat_size, ques_att_feat_size, True)
def forward(self, img_feat, ques_feat):
'''
img_feat.size() -> (N, C, FRCN_FEAT_SIZE)
ques_feat.size() -> (N, T, LSTM_OUT_SIZE)
z.size() -> MFH:(N, 2*O) / MFB:(N, O)
'''
ques_feat = self.q_att(ques_feat) # (N, LSTM_OUT_SIZE*Q_GLIMPSES)
fuse_feat = self.i_att(img_feat, ques_feat) # (N, FRCN_FEAT_SIZE*I_GLIMPSES)
if self.__C.HIGH_ORDER: # MFH
z1, exp1 = self.mfh1(fuse_feat.unsqueeze(1), ques_feat.unsqueeze(1)) # z1:(N, 1, O) exp1:(N, C, K*O)
z2, _ = self.mfh2(fuse_feat.unsqueeze(1), ques_feat.unsqueeze(1), exp1) # z2:(N, 1, O) _:(N, C, K*O)
z = torch.cat((z1.squeeze(1), z2.squeeze(1)), 1) # (N, 2*O)
else: # MFB
z, _ = self.mfb(fuse_feat.unsqueeze(1), ques_feat.unsqueeze(1)) # z:(N, 1, O) _:(N, C, K*O)
z = z.squeeze(1) # (N, O)
return z
================================================
FILE: openvqa/models/mfb/model_cfgs.py
================================================
# --------------------------------------------------------
# OpenVQA
# Written by Gao Pengbing https://github.com/nbgao
# --------------------------------------------------------
from openvqa.core.base_cfgs import BaseCfgs
class Cfgs(BaseCfgs):
def __init__(self):
super(Cfgs, self).__init__()
self.HIGH_ORDER = False
self.HIDDEN_SIZE = 512
self.MFB_K = 5
self.MFB_O = 1000
self.LSTM_OUT_SIZE = 1024
self.DROPOUT_R = 0.1
self.I_GLIMPSES = 2
self.Q_GLIMPSES = 2
================================================
FILE: openvqa/models/mfb/net.py
================================================
# --------------------------------------------------------
# OpenVQA
# Licensed under The MIT License [see LICENSE for details]
# Written by Pengbing Gao https://github.com/nbgao
# --------------------------------------------------------
from openvqa.models.mfb.mfb import CoAtt
from openvqa.models.mfb.adapter import Adapter
import torch
import torch.nn as nn
# -------------------------------------------------------
# ---- Main MFB/MFH model with Co-Attention Learning ----
# -------------------------------------------------------
class Net(nn.Module):
def __init__(self, __C, pretrained_emb, token_size, answer_size):
super(Net, self).__init__()
self.__C = __C
self.adapter = Adapter(__C)
self.embedding = nn.Embedding(
num_embeddings=token_size,
embedding_dim=__C.WORD_EMBED_SIZE
)
# Loading the GloVe embedding weights
if __C.USE_GLOVE:
self.embedding.weight.data.copy_(torch.from_numpy(pretrained_emb))
self.lstm = nn.LSTM(
input_size=__C.WORD_EMBED_SIZE,
hidden_size=__C.LSTM_OUT_SIZE,
num_layers=1,
batch_first=True
)
self.dropout = nn.Dropout(__C.DROPOUT_R)
self.dropout_lstm = nn.Dropout(__C.DROPOUT_R)
self.backbone = CoAtt(__C)
if __C.HIGH_ORDER: # MFH
self.proj = nn.Linear(2*__C.MFB_O, answer_size)
else: # MFB
self.proj = nn.Linear(__C.MFB_O, answer_size)
def forward(self, frcn_feat, grid_feat, bbox_feat, ques_ix):
img_feat, _ = self.adapter(frcn_feat, grid_feat, bbox_feat) # (N, C, FRCN_FEAT_SIZE)
# Pre-process Language Feature
ques_feat = self.embedding(ques_ix) # (N, T, WORD_EMBED_SIZE)
ques_feat = self.dropout(ques_feat)
ques_feat, _ = self.lstm(ques_feat) # (N, T, LSTM_OUT_SIZE)
ques_feat = self.dropout_lstm(ques_feat)
z = self.backbone(img_feat, ques_feat) # MFH:(N, 2*O) / MFB:(N, O)
proj_feat = self.proj(z) # (N, answer_size)
return proj_feat
================================================
FILE: openvqa/models/mmnasnet/adapter.py
================================================
# --------------------------------------------------------
# OpenVQA
# Written by Zhenwei Shao https://github.com/ParadoxZW
# --------------------------------------------------------
import torch.nn as nn
import torch
from openvqa.core.base_dataset import BaseAdapter
from openvqa.utils.make_mask import make_mask
class Adapter(BaseAdapter):
def __init__(self, __C):
super(Adapter, self).__init__(__C)
self.__C = __C
def relation_embedding(self, f_g):
x_min, y_min, x_max, y_max = torch.chunk(f_g, 4, dim=2) # [bs, n_obj, 1]
cx = (x_min + x_max) * 0.5 # [bs, n_obj, 1]
cy = (y_min + y_max) * 0.5 # [bs, n_obj, 1]
w = (x_max - x_min) + 1. # [bs, n_obj, 1]
h = (y_max - y_min) + 1. # [bs, n_obj, 1]
delta_x = cx - cx.transpose(-1, -2)
delta_x = torch.clamp(torch.abs(delta_x / w), min=1e-3)
delta_x = torch.log(delta_x) # [bs, n_obj, n_obj]
delta_y = cy - cy.transpose(-1, -2)
delta_y = torch.clamp(torch.abs(delta_y / h), min=1e-3)
delta_y = torch.log(delta_y) # [bs, n_obj, n_obj]
delta_w = torch.log(w / w.transpose(-1, -2)) # [bs, n_obj, n_obj]
delta_h = torch.log(h / h.transpose(-1, -2)) # [bs, n_obj, n_obj]
size = delta_h.size()
delta_x = delta_x.view(size[0], size[1], size[2], 1)
delta_y = delta_y.view(size[0], size[1], size[2], 1)
delta_w = delta_w.view(size[0], size[1], size[2], 1)
delta_h = delta_h.view(size[0], size[1], size[2], 1) # [bs, n_obj, n_obj, 1]
position_mat = torch.cat(
(delta_x, delta_y, delta_w, delta_h), -1) # [bs, n_obj, n_obj, 4]
return position_mat
def vqa_init(self, __C):
imgfeat_linear_size = __C.FEAT_SIZE['vqa']['FRCN_FEAT_SIZE'][1]
if __C.USE_BBOX_FEAT:
self.bbox_linear = nn.Linear(5, __C.BBOXFEAT_EMB_SIZE)
imgfeat_linear_size += __C.BBOXFEAT_EMB_SIZE
self.frcn_linear = nn.Linear(imgfeat_linear_size, __C.HIDDEN_SIZE)
def gqa_init(self, __C):
imgfeat_linear_size = __C.FEAT_SIZE['gqa']['FRCN_FEAT_SIZE'][1]
if __C.USE_BBOX_FEAT:
self.bbox_linear = nn.Linear(5, __C.BBOXFEAT_EMB_SIZE)
imgfeat_linear_size += __C.BBOXFEAT_EMB_SIZE
self.frcn_linear = nn.Linear(imgfeat_linear_size, __C.HIDDEN_SIZE)
if __C.USE_AUX_FEAT:
self.grid_linear = nn.Linear(__C.FEAT_SIZE['gqa']['GRID_FEAT_SIZE'][1], __C.HIDDEN_SIZE)
def clevr_init(self, __C):
self.grid_linear = nn.Linear(__C.FEAT_SIZE['clevr']['GRID_FEAT_SIZE'][1], __C.HIDDEN_SIZE)
def vqa_forward(self, feat_dict):
frcn_feat = feat_dict['FRCN_FEAT']
bbox_feat = feat_dict['BBOX_FEAT']
img_feat_mask = make_mask(frcn_feat)
if self.__C.USE_BBOX_FEAT:
bbox_feat = self.bbox_proc(bbox_feat)
bbox_feat = self.bbox_linear(bbox_feat)
frcn_feat = torch.cat((frcn_feat, bbox_feat), dim=-1)
img_feat = self.frcn_linear(frcn_feat)
rel_embed = self.relation_embedding(bbox_feat)
return img_feat, rel_embed, img_feat_mask
def gqa_forward(self, feat_dict):
frcn_feat = feat_dict['FRCN_FEAT']
bbox_feat = feat_dict['BBOX_FEAT']
grid_feat = feat_dict['GRID_FEAT']
img_feat_mask = make_mask(frcn_feat)
if self.__C.USE_BBOX_FEAT:
bbox_feat = self.bbox_linear(bbox_feat)
frcn_feat = torch.cat((frcn_feat, bbox_feat), dim=-1)
img_feat = self.frcn_linear(frcn_feat)
if self.__C.USE_AUX_FEAT:
grid_feat_mask = make_mask(grid_feat)
img_feat_mask = torch.cat((img_feat_mask, grid_feat_mask), dim=-1)
grid_feat = self.grid_linear(grid_feat)
img_feat = torch.cat((img_feat, grid_feat), dim=1)
rel_embed = self.relation_embedding(bbox_feat)
return img_feat, rel_embed, img_feat_mask
def clevr_forward(self, feat_dict):
grid_feat = feat_dict['GRID_FEAT']
img_feat_mask = make_mask(grid_feat)
img_feat = self.grid_linear(grid_feat)
rel_embed = self.relation_embedding(bbox_feat)
return img_feat, rel_embed, img_feat_mask
================================================
FILE: openvqa/models/mmnasnet/model_cfgs.py
================================================
# --------------------------------------------------------
# OpenVQA
# Written by Zhenwei Shao https://github.com/ParadoxZW
# --------------------------------------------------------
from openvqa.core.base_cfgs import BaseCfgs
class Cfgs(BaseCfgs):
def __init__(self):
super(Cfgs, self).__init__()
self.ARCH = {
'enc': ['SA', 'SA', 'SA', 'SA', 'FFN', 'FFN', 'FFN', 'FFN', 'SA', 'FFN', 'FFN', 'FFN'],
'dec': ['GA', 'GA', 'FFN', 'FFN', 'GA', 'FFN', 'RSA', 'GA', 'FFN', 'GA', 'RSA', 'FFN', 'RSA', 'SA', 'FFN', 'RSA', 'GA', 'FFN']
}
self.HIDDEN_SIZE = 512
self.BBOXFEAT_EMB_SIZE = 2048
self.FF_SIZE = 2048
self.MULTI_HEAD = 8
self.DROPOUT_R = 0.1
self.FLAT_MLP_SIZE = 512
self.FLAT_GLIMPSES = 1
self.FLAT_OUT_SIZE = 1024
self.USE_AUX_FEAT = False
self.USE_BBOX_FEAT = False
self.REL_HBASE = 64
self.REL_SIZE = 64
================================================
FILE: openvqa/models/mmnasnet/nasnet.py
================================================
# --------------------------------------------------------
# OpenVQA
# Written by Zhenwei Shao https://github.com/ParadoxZW
# --------------------------------------------------------
from openvqa.ops.fc import FC, MLP
from openvqa.ops.layer_norm import LayerNorm
import torch.nn as nn
import torch.nn.functional as F
import torch
import math
# ------------------------------
# --- Operations and Modules ---
# ------------------------------
class RelMHAtt(nn.Module):
def __init__(self, __C):
super(RelMHAtt, self).__init__()
self.__C = __C
self.HBASE = __C.REL_HBASE
self.HHEAD = int(__C.HIDDEN_SIZE / __C.REL_HBASE)
self.linear_v = nn.Linear(__C.HIDDEN_SIZE, __C.HIDDEN_SIZE)
self.linear_k = nn.Linear(__C.HIDDEN_SIZE, __C.HIDDEN_SIZE)
self.linear_q = nn.Linear(__C.HIDDEN_SIZE, __C.HIDDEN_SIZE)
self.linear_merge = nn.Linear(__C.HIDDEN_SIZE, __C.HIDDEN_SIZE)
self.linear_r = nn.Linear(__C.REL_SIZE, self.HHEAD, bias=True)
self.dropout = nn.Dropout(__C.DROPOUT_R)
self.relu = nn.ReLU(inplace=True)
def forward(self, v, k, q, mask=None, rel_embed=None):
assert rel_embed is not None
n_batches = q.size(0)
v = self.linear_v(v).view(n_batches, -1, self.HHEAD,
self.HBASE).transpose(1, 2)
k = self.linear_k(k).view(n_batches, -1, self.HHEAD,
self.HBASE).transpose(1, 2)
q = self.linear_q(q).view(n_batches, -1, self.HHEAD,
self.HBASE).transpose(1, 2)
r = self.relu(self.linear_r(rel_embed)).permute(0, 3, 1, 2)
d_k = q.size(-1)
scores = torch.matmul(q, k.transpose(-2, -1)) / math.sqrt(d_k)
scores = torch.log(torch.clamp(r, min=1e-6)) + scores
if mask is not None:
scores = scores.masked_fill(mask, -1e9)
att_map = F.softmax(scores, dim=-1)
att_map = self.dropout(att_map)
atted = torch.matmul(att_map, v)
atted = atted.transpose(1, 2).contiguous().view(
n_batches, -1, self.__C.HIDDEN_SIZE)
atted = self.linear_merge(atted)
return atted
class MHAtt(nn.Module):
def __init__(self, __C):
super(MHAtt, self).__init__()
self.__C = __C
self.linear_v = nn.Linear(__C.HIDDEN_SIZE, __C.HIDDEN_SIZE)
self.linear_k = nn.Linear(__C.HIDDEN_SIZE, __C.HIDDEN_SIZE)
self.linear_q = nn.Linear(__C.HIDDEN_SIZE, __C.HIDDEN_SIZE)
self.linear_merge = nn.Linear(__C.HIDDEN_SIZE, __C.HIDDEN_SIZE)
self.dropout = nn.Dropout(__C.DROPOUT_R)
def forward(self, v, k, q, mask):
n_batches = q.size(0)
v = self.linear_v(v).view(
n_batches,
-1,
self.__C.MULTI_HEAD,
int(self.__C.HIDDEN_SIZE / self.__C.MULTI_HEAD)
).transpose(1, 2)
k = self.linear_k(k).view(
n_batches,
-1,
self.__C.MULTI_HEAD,
int(self.__C.HIDDEN_SIZE / self.__C.MULTI_HEAD)
).transpose(1, 2)
q = self.linear_q(q).view(
n_batches,
-1,
self.__C.MULTI_HEAD,
int(self.__C.HIDDEN_SIZE / self.__C.MULTI_HEAD)
).transpose(1, 2)
atted = self.att(v, k, q, mask)
atted = atted.transpose(1, 2).contiguous().view(
n_batches,
-1,
self.__C.HIDDEN_SIZE
)
atted = self.linear_merge(atted)
return atted
def att(self, value, key, query, mask):
d_k = query.size(-1)
scores = torch.matmul(
query, key.transpose(-2, -1)
) / math.sqrt(d_k)
if mask is not None:
scores = scores.masked_fill(mask, -1e9)
att_map = F.softmax(scores, dim=-1)
att_map = self.dropout(att_map)
return torch.matmul(att_map, value)
class FFN(nn.Module):
def __init__(self, __C):
super(FFN, self).__init__()
self.mlp = MLP(
in_size=__C.HIDDEN_SIZE,
mid_size=__C.HIDDEN_SIZE * 4,
out_size=__C.HIDDEN_SIZE,
dropout_r=__C.DROPOUT_R,
use_relu=True
)
self.dropout = nn.Dropout(__C.DROPOUT_R)
self.norm = LayerNorm(__C.HIDDEN_SIZE)
def forward(self, x, arg1, arg2, arg3, arg4):
x = self.norm(x + self.dropout(
self.mlp(x)
))
return x
class SA(nn.Module):
def __init__(self, __C, size=1024):
super(SA, self).__init__()
self.mhatt = MHAtt(__C)
self.dropout = nn.Dropout(__C.DROPOUT_R)
self.norm = LayerNorm(__C.HIDDEN_SIZE)
def forward(self, y, arg1, y_mask, arg2, arg3):
y = self.norm(y + self.dropout(
self.mhatt(y, y, y, y_mask)
))
return y
class RSA(nn.Module):
def __init__(self, __C, size=1024):
super(RSA, self).__init__()
self.mhatt = RelMHAtt(__C)
self.dropout = nn.Dropout(__C.DROPOUT_R)
self.norm = LayerNorm(__C.HIDDEN_SIZE)
def forward(self, x, arg1, x_mask, arg2, rela):
x = self.norm(x + self.dropout(
self.mhatt(x, x, x, x_mask, rela)
))
return x
class GA(nn.Module):
def __init__(self, __C):
super(GA, self).__init__()
self.mhatt = MHAtt(__C)
self.dropout = nn.Dropout(__C.DROPOUT_R)
self.norm = LayerNorm(__C.HIDDEN_SIZE)
def forward(self, x, y, x_mask, y_mask, rela):
x = self.norm(x + self.dropout(
self.mhatt(v=y, k=y, q=x, mask=y_mask)
))
return x
# ------------------------------------------------
# --- Encoder-Decoder Architecture of MMNasNet ---
# ------------------------------------------------
class NAS_ED(nn.Module):
def __init__(self, __C):
super(NAS_ED, self).__init__()
enc = __C.ARCH['enc']
dec = __C.ARCH['dec']
self.enc_list = nn.ModuleList([eval(layer)(__C) for layer in enc])
self.dec_list = nn.ModuleList([eval(layer)(__C) for layer in dec])
def forward(self, y, x, y_mask, x_mask, rela):
for enc in self.enc_list:
y = enc(y, None, y_mask, None, None)
for dec in self.dec_list:
x = dec(x, y, x_mask, y_mask, rela)
return y, x
================================================
FILE: openvqa/models/mmnasnet/net.py
================================================
# --------------------------------------------------------
# OpenVQA
# Written by Zhenwei Shao https://github.com/ParadoxZW
# --------------------------------------------------------
from openvqa.utils.make_mask import make_mask
from openvqa.ops.fc import FC, MLP
from openvqa.ops.layer_norm import LayerNorm
from openvqa.models.mmnasnet.nasnet import NAS_ED
from openvqa.models.mmnasnet.adapter import Adapter
import torch.nn as nn
import torch.nn.functional as F
import torch
# ------------------------------
# ---- Flatten the sequence ----
# ------------------------------
class AttFlat(nn.Module):
def __init__(self, __C):
super(AttFlat, self).__init__()
self.__C = __C
self.mlp = MLP(
in_size=__C.HIDDEN_SIZE,
mid_size=__C.FLAT_MLP_SIZE,
out_size=__C.FLAT_GLIMPSES,
dropout_r=__C.DROPOUT_R,
use_relu=True
)
self.linear_merge = nn.Linear(
__C.HIDDEN_SIZE * __C.FLAT_GLIMPSES,
__C.FLAT_OUT_SIZE
)
def forward(self, x, x_mask):
att = self.mlp(x)
att = att.masked_fill(
x_mask.squeeze(1).squeeze(1).unsqueeze(2),
-1e9
)
att = F.softmax(att, dim=1)
att_list = []
for i in range(self.__C.FLAT_GLIMPSES):
att_list.append(
torch.sum(att[:, :, i: i + 1] * x, dim=1)
)
x_atted = torch.cat(att_list, dim=1)
x_atted = self.linear_merge(x_atted)
return x_atted
# -------------------------
# ---- Main MCAN Model ----
# -------------------------
class Net(nn.Module):
def __init__(self, __C, pretrained_emb, token_size, answer_size):
super(Net, self).__init__()
self.__C = __C
self.embedding = nn.Embedding(
num_embeddings=token_size,
embedding_dim=__C.WORD_EMBED_SIZE
)
# Loading the GloVe embedding weights
if __C.USE_GLOVE:
self.embedding.weight.data.copy_(torch.from_numpy(pretrained_emb))
self.lstm = nn.LSTM(
input_size=__C.WORD_EMBED_SIZE,
hidden_size=__C.HIDDEN_SIZE,
num_layers=1,
batch_first=True
)
self.adapter = Adapter(__C)
self.backbone = NAS_ED(__C)
# Projection of relation embedding
self.linear_rel = nn.Linear(4, __C.REL_SIZE)
self.relu = nn.ReLU()
# Flatten to vector
self.attflat_img = AttFlat(__C)
self.attflat_lang = AttFlat(__C)
# Classification layers
self.proj_norm = LayerNorm(__C.FLAT_OUT_SIZE)
self.proj = nn.Linear(__C.FLAT_OUT_SIZE, answer_size)
def forward(self, frcn_feat, grid_feat, bbox_feat, ques_ix):
# Pre-process Language Feature
lang_feat_mask = make_mask(ques_ix.unsqueeze(2))
lang_feat = self.embedding(ques_ix)
lang_feat, _ = self.lstm(lang_feat)
img_feat, rel_embed, img_feat_mask = self.adapter(frcn_feat, grid_feat, bbox_feat)
rela = self.relu(self.linear_rel(rel_embed))
# Backbone Framework
lang_feat, img_feat = self.backbone(
lang_feat,
img_feat,
lang_feat_mask,
img_feat_mask,
rela
)
# Flatten to vector
lang_feat = self.attflat_lang(
lang_feat,
lang_feat_mask
)
img_feat = self.attflat_img(
img_feat,
img_feat_mask
)
# Classification layers
proj_feat = lang_feat + img_feat
proj_feat = self.proj_norm(proj_feat)
proj_feat = self.proj(proj_feat)
return proj_feat
================================================
FILE: openvqa/models/model_loader.py
================================================
# --------------------------------------------------------
# OpenVQA
# Written by Yuhao Cui https://github.com/cuiyuhao1996
# --------------------------------------------------------
from importlib import import_module
class ModelLoader:
def __init__(self, __C):
self.model_use = __C.MODEL_USE
model_moudle_path = 'openvqa.models.' + self.model_use + '.net'
self.model_moudle = import_module(model_moudle_path)
def Net(self, __arg1, __arg2, __arg3, __arg4):
return self.model_moudle.Net(__arg1, __arg2, __arg3, __arg4)
class CfgLoader:
def __init__(self, model_use):
cfg_moudle_path = 'openvqa.models.' + model_use + '.model_cfgs'
self.cfg_moudle = import_module(cfg_moudle_path)
def load(self):
return self.cfg_moudle.Cfgs()
================================================
FILE: openvqa/ops/fc.py
================================================
# --------------------------------------------------------
# OpenVQA
# Written by Yuhao Cui https://github.com/cuiyuhao1996
# --------------------------------------------------------
import torch.nn as nn
import torch
class FC(nn.Module):
def __init__(self, in_size, out_size, dropout_r=0., use_relu=True):
super(FC, self).__init__()
self.dropout_r = dropout_r
self.use_relu = use_relu
self.linear = nn.Linear(in_size, out_size)
if use_relu:
self.relu = nn.ReLU(inplace=True)
if dropout_r > 0:
self.dropout = nn.Dropout(dropout_r)
def forward(self, x):
x = self.linear(x)
if self.use_relu:
x = self.relu(x)
if self.dropout_r > 0:
x = self.dropout(x)
return x
class MLP(nn.Module):
def __init__(self, in_size, mid_size, out_size, dropout_r=0., use_relu=True):
super(MLP, self).__init__()
self.fc = FC(in_size, mid_size, dropout_r=dropout_r, use_relu=use_relu)
self.linear = nn.Linear(mid_size, out_size)
def forward(self, x):
return self.linear(self.fc(x))
================================================
FILE: openvqa/ops/layer_norm.py
================================================
# --------------------------------------------------------
# OpenVQA
# Written by Yuhao Cui https://github.com/cuiyuhao1996
# --------------------------------------------------------
import torch.nn as nn
import torch
class LayerNorm(nn.Module):
def __init__(self, size, eps=1e-6):
super(LayerNorm, self).__init__()
self.eps = eps
self.a_2 = nn.Parameter(torch.ones(size))
self.b_2 = nn.Parameter(torch.zeros(size))
def forward(self, x):
mean = x.mean(-1, keepdim=True)
std = x.std(-1, keepdim=True)
return self.a_2 * (x - mean) / (std + self.eps) + self.b_2
================================================
FILE: openvqa/utils/ans_punct.py
================================================
# --------------------------------------------------------
# OpenVQA
# Written by Yuhao Cui https://github.com/cuiyuhao1996
# based on VQA Evaluation Code
# --------------------------------------------------------
import re
contractions = {
"aint": "ain't", "arent": "aren't", "cant": "can't", "couldve":
"could've", "couldnt": "couldn't", "couldn'tve": "couldn't've",
"couldnt've": "couldn't've", "didnt": "didn't", "doesnt":
"doesn't", "dont": "don't", "hadnt": "hadn't", "hadnt've":
"hadn't've", "hadn'tve": "hadn't've", "hasnt": "hasn't", "havent":
"haven't", "hed": "he'd", "hed've": "he'd've", "he'dve":
"he'd've", "hes": "he's", "howd": "how'd", "howll": "how'll",
"hows": "how's", "Id've": "I'd've", "I'dve": "I'd've", "Im":
"I'm", "Ive": "I've", "isnt": "isn't", "itd": "it'd", "itd've":
"it'd've", "it'dve": "it'd've", "itll": "it'll", "let's": "let's",
"maam": "ma'am", "mightnt": "mightn't", "mightnt've":
"mightn't've", "mightn'tve": "mightn't've", "mightve": "might've",
"mustnt": "mustn't", "mustve": "must've", "neednt": "needn't",
"notve": "not've", "oclock": "o'clock", "oughtnt": "oughtn't",
"ow's'at": "'ow's'at", "'ows'at": "'ow's'at", "'ow'sat":
"'ow's'at", "shant": "shan't", "shed've": "she'd've", "she'dve":
"she'd've", "she's": "she's", "shouldve": "should've", "shouldnt":
"shouldn't", "shouldnt've": "shouldn't've", "shouldn'tve":
"shouldn't've", "somebody'd": "somebodyd", "somebodyd've":
"somebody'd've", "somebody'dve": "somebody'd've", "somebodyll":
"somebody'll", "somebodys": "somebody's", "someoned": "someone'd",
"someoned've": "someone'd've", "someone'dve": "someone'd've",
"someonell": "someone'll", "someones": "someone's", "somethingd":
"something'd", "somethingd've": "something'd've", "something'dve":
"something'd've", "somethingll": "something'll", "thats":
"that's", "thered": "there'd", "thered've": "there'd've",
"there'dve": "there'd've", "therere": "there're", "theres":
"there's", "theyd": "they'd", "theyd've": "they'd've", "they'dve":
"they'd've", "theyll": "they'll", "theyre": "they're", "theyve":
"they've", "twas": "'twas", "wasnt": "wasn't", "wed've":
"we'd've", "we'dve": "we'd've", "weve": "we've", "werent":
"weren't", "whatll": "what'll", "whatre": "what're", "whats":
"what's", "whatve": "what've", "whens": "when's", "whered":
"where'd", "wheres": "where's", "whereve": "where've", "whod":
"who'd", "whod've": "who'd've", "who'dve": "who'd've", "wholl":
"who'll", "whos": "who's", "whove": "who've", "whyll": "why'll",
"whyre": "why're", "whys": "why's", "wont": "won't", "wouldve":
"would've", "wouldnt": "wouldn't", "wouldnt've": "wouldn't've",
"wouldn'tve": "wouldn't've", "yall": "y'all", "yall'll":
"y'all'll", "y'allll": "y'all'll", "yall'd've": "y'all'd've",
"y'alld've": "y'all'd've", "y'all'dve": "y'all'd've", "youd":
"you'd", "youd've": "you'd've", "you'dve": "you'd've", "youll":
"you'll", "youre": "you're", "youve": "you've"
}
manual_map = { 'none': '0',
'zero': '0',
'one': '1',
'two': '2',
'three': '3',
'four': '4',
'five': '5',
'six': '6',
'seven': '7',
'eight': '8',
'nine': '9',
'ten': '10'}
articles = ['a', 'an', 'the']
period_strip = re.compile("(?!<=\d)(\.)(?!\d)")
comma_strip = re.compile("(\d)(\,)(\d)")
punct = [';', r"/", '[', ']', '"', '{', '}',
'(', ')', '=', '+', '\\', '_', '-',
'>', '<', '@', '`', ',', '?', '!']
def process_punctuation(inText):
outText = inText
for p in punct:
if (p + ' ' in inText or ' ' + p in inText) \
or (re.search(comma_strip, inText) != None):
outText = outText.replace(p, '')
else:
outText = outText.replace(p, ' ')
outText = period_strip.sub("", outText, re.UNICODE)
return outText
def process_digit_article(inText):
outText = []
tempText = inText.lower().split()
for word in tempText:
word = manual_map.setdefault(word, word)
if word not in articles:
outText.append(word)
else:
pass
for wordId, word in enumerate(outText):
if word in contractions:
outText[wordId] = contractions[word]
outText = ' '.join(outText)
return outText
def prep_ans(answer):
answer = process_digit_article(process_punctuation(answer))
answer = answer.replace(',', '')
return answer
================================================
FILE: openvqa/utils/feat_filter.py
================================================
# --------------------------------------------------------
# OpenVQA
# Written by Yuhao Cui https://github.com/cuiyuhao1996
# --------------------------------------------------------
def feat_filter(dataset, frcn_feat, grid_feat, bbox_feat):
feat_dict = {}
if dataset in ['vqa']:
feat_dict['FRCN_FEAT'] = frcn_feat
feat_dict['BBOX_FEAT'] = bbox_feat
elif dataset in ['gqa']:
feat_dict['FRCN_FEAT'] = frcn_feat
feat_dict['GRID_FEAT'] = grid_feat
feat_dict['BBOX_FEAT'] = bbox_feat
elif dataset in ['clevr']:
feat_dict['GRID_FEAT'] = grid_feat
else:
exit(-1)
return feat_dict
================================================
FILE: openvqa/utils/make_mask.py
================================================
# --------------------------------------------------------
# OpenVQA
# Written by Yuhao Cui https://github.com/cuiyuhao1996
# --------------------------------------------------------
import torch
# Masking the sequence mask
def make_mask(feature):
return (torch.sum(
torch.abs(feature),
dim=-1
) == 0).unsqueeze(1).unsqueeze(2)
================================================
FILE: openvqa/utils/optim.py
================================================
# --------------------------------------------------------
# OpenVQA
# Written by Yuhao Cui https://github.com/cuiyuhao1996
# --------------------------------------------------------
import torch.optim as Optim
class WarmupOptimizer(object):
def __init__(self, lr_base, optimizer, data_size, batch_size, warmup_epoch):
self.optimizer = optimizer
self._step = 0
self.lr_base = lr_base
self._rate = 0
self.data_size = data_size
self.batch_size = batch_size
self.warmup_epoch = warmup_epoch
def step(self):
self._step += 1
rate = self.rate()
for p in self.optimizer.param_groups:
p['lr'] = rate
self._rate = rate
self.optimizer.step()
def zero_grad(self):
self.optimizer.zero_grad()
def rate(self, step=None):
if step is None:
step = self._step
if step <= int(self.data_size / self.batch_size * (self.warmup_epoch + 1) * 0.25):
r = self.lr_base * 1/(self.warmup_epoch + 1)
elif step <= int(self.data_size / self.batch_size * (self.warmup_epoch + 1) * 0.5):
r = self.lr_base * 2/(self.warmup_epoch + 1)
elif step <= int(self.data_size / self.batch_size * (self.warmup_epoch + 1) * 0.75):
r = self.lr_base * 3/(self.warmup_epoch + 1)
else:
r = self.lr_base
return r
def get_optim(__C, model, data_size, lr_base=None):
if lr_base is None:
lr_base = __C.LR_BASE
std_optim = getattr(Optim, __C.OPT)
params = filter(lambda p: p.requires_grad, model.parameters())
eval_str = 'params, lr=0'
for key in __C.OPT_PARAMS:
eval_str += ' ,' + key + '=' + str(__C.OPT_PARAMS[key])
optim = WarmupOptimizer(
lr_base,
eval('std_optim' + '(' + eval_str + ')'),
data_size,
__C.BATCH_SIZE,
__C.WARMUP_EPOCH
)
return optim
def adjust_lr(optim, decay_r):
optim.lr_base *= decay_r
================================================
FILE: requirements.txt
================================================
spacy >= 2.0.18
numpy >= 1.16.2
================================================
FILE: results/cache/.gitkeep
================================================
================================================
FILE: results/log/.gitkeep
================================================
================================================
FILE: results/pred/.gitkeep
================================================
================================================
FILE: results/result_test/.gitkeep
================================================
================================================
FILE: run.py
================================================
# --------------------------------------------------------
# OpenVQA
# Written by Yuhao Cui https://github.com/cuiyuhao1996
# --------------------------------------------------------
from openvqa.models.model_loader import CfgLoader
from utils.exec import Execution
import argparse, yaml
def parse_args():
'''
Parse input arguments
'''
parser = argparse.ArgumentParser(description='OpenVQA Args')
parser.add_argument('--RUN', dest='RUN_MODE',
choices=['train', 'val', 'test'],
help='{train, val, test}',
type=str, required=True)
parser.add_argument('--MODEL', dest='MODEL',
choices=[
'mcan_small',
'mcan_large',
'ban_4',
'ban_8',
'mfb',
'mfh',
'mem',
'butd',
'mmnasnet'
]
,
help='{'
'mcan_small,'
'mcan_large,'
'ban_4,'
'ban_8,'
'mfb,'
'mfh,'
'butd,'
'mmnasnet,'
'}'
,
type=str, required=True)
parser.add_argument('--DATASET', dest='DATASET',
choices=['vqa', 'gqa', 'clevr'],
help='{'
'vqa,'
'gqa,'
'clevr,'
'}'
,
type=str, required=True)
parser.add_argument('--SPLIT', dest='TRAIN_SPLIT',
choices=['train', 'train+val', 'train+val+vg'],
help="set training split, "
"vqa: {'train', 'train+val', 'train+val+vg'}"
"gqa: {'train', 'train+val'}"
"clevr: {'train', 'train+val'}"
,
type=str)
parser.add_argument('--EVAL_EE', dest='EVAL_EVERY_EPOCH',
choices=['True', 'False'],
help='True: evaluate the val split when an epoch finished,'
'False: do not evaluate on local',
type=str)
parser.add_argument('--SAVE_PRED', dest='TEST_SAVE_PRED',
choices=['True', 'False'],
help='True: save the prediction vectors,'
'False: do not save the prediction vectors',
type=str)
parser.add_argument('--BS', dest='BATCH_SIZE',
help='batch size in training',
type=int)
parser.add_argument('--GPU', dest='GPU',
help="gpu choose, eg.'0, 1, 2, ...'",
type=str)
parser.add_argument('--SEED', dest='SEED',
help='fix random seed',
type=int)
parser.add_argument('--VERSION', dest='VERSION',
help='version control',
type=str)
parser.add_argument('--RESUME', dest='RESUME',
choices=['True', 'False'],
help='True: use checkpoint to resume training,'
'False: start training with random init',
type=str)
parser.add_argument('--CKPT_V', dest='CKPT_VERSION',
help='checkpoint version',
type=str)
parser.add_argument('--CKPT_E', dest='CKPT_EPOCH',
help='checkpoint epoch',
type=int)
parser.add_argument('--CKPT_PATH', dest='CKPT_PATH',
help='load checkpoint path, we '
'recommend that you use '
'CKPT_VERSION and CKPT_EPOCH '
'instead, it will override'
'CKPT_VERSION and CKPT_EPOCH',
type=str)
parser.add_argument('--ACCU', dest='GRAD_ACCU_STEPS',
help='split batch to reduce gpu memory usage',
type=int)
parser.add_argument('--NW', dest='NUM_WORKERS',
help='multithreaded loading to accelerate IO',
type=int)
parser.add_argument('--PINM', dest='PIN_MEM',
choices=['True', 'False'],
help='True: use pin memory, False: not use pin memory',
type=str)
parser.add_argument('--VERB', dest='VERBOSE',
choices=['True', 'False'],
help='True: verbose print, False: simple print',
type=str)
args = parser.parse_args()
return args
if __name__ == '__main__':
args = parse_args()
cfg_file = "configs/{}/{}.yml".format(args.DATASET, args.MODEL)
with open(cfg_file, 'r') as f:
yaml_dict = yaml.load(f)
__C = CfgLoader(yaml_dict['MODEL_USE']).load()
args = __C.str_to_bool(args)
args_dict = __C.parse_to_dict(args)
args_dict = {**yaml_dict, **args_dict}
__C.add_args(args_dict)
__C.proc()
print('Hyper Parameters:')
print(__C)
execution = Execution(__C)
execution.run(__C.RUN_MODE)
================================================
FILE: utils/exec.py
================================================
# --------------------------------------------------------
# OpenVQA
# Written by Yuhao Cui https://github.com/cuiyuhao1996
# --------------------------------------------------------
import os, copy
from openvqa.datasets.dataset_loader import DatasetLoader
from utils.train_engine import train_engine
from utils.test_engine import test_engine
class Execution:
def __init__(self, __C):
self.__C = __C
print('Loading dataset........')
self.dataset = DatasetLoader(__C).DataSet()
# If trigger the evaluation after every epoch
# Will create a new cfgs with RUN_MODE = 'val'
self.dataset_eval = None
if __C.EVAL_EVERY_EPOCH:
__C_eval = copy.deepcopy(__C)
setattr(__C_eval, 'RUN_MODE', 'val')
print('Loading validation set for per-epoch evaluation........')
self.dataset_eval = DatasetLoader(__C_eval).DataSet()
def run(self, run_mode):
if run_mode == 'train':
if self.__C.RESUME is False:
self.empty_log(self.__C.VERSION)
train_engine(self.__C, self.dataset, self.dataset_eval)
elif run_mode == 'val':
test_engine(self.__C, self.dataset, validation=True)
elif run_mode == 'test':
test_engine(self.__C, self.dataset)
else:
exit(-1)
def empty_log(self, version):
print('Initializing log file........')
if (os.path.exists(self.__C.LOG_PATH + '/log_run_' + version + '.txt')):
os.remove(self.__C.LOG_PATH + '/log_run_' + version + '.txt')
print('Finished!')
print('')
================================================
FILE: utils/proc_dict_gqa.py
================================================
# --------------------------------------------------------
# mcan-vqa (Deep Modular Co-Attention Networks)
# Licensed under The MIT License [see LICENSE for details]
# Written by Yuhao Cui https://github.com/cuiyuhao1996
# --------------------------------------------------------
import sys
sys.path.append('../')
from openvqa.utils.ans_punct import prep_ans
from openvqa.core.path_cfgs import PATH
import json, re
path = PATH()
ques_dict_preread = {
'train': json.load(open(path.RAW_PATH['gqa']['train'], 'r')),
'val': json.load(open(path.RAW_PATH['gqa']['val'], 'r')),
'testdev': json.load(open(path.RAW_PATH['gqa']['testdev'], 'r')),
'test': json.load(open(path.RAW_PATH['gqa']['test'], 'r')),
}
# Loading question word list
stat_ques_dict = {
**ques_dict_preread['train'],
**ques_dict_preread['val'],
**ques_dict_preread['testdev'],
**ques_dict_preread['test'],
}
stat_ans_dict = {
**ques_dict_preread['train'],
**ques_dict_preread['val'],
**ques_dict_preread['testdev'],
}
def tokenize(stat_ques_dict):
token_to_ix = {
'PAD': 0,
'UNK': 1,
'CLS': 2,
}
max_token = 0
for qid in stat_ques_dict:
ques = stat_ques_dict[qid]['question']
words = re.sub(
r"([.,'!?\"()*#:;])",
'',
ques.lower()
).replace('-', ' ').replace('/', ' ').split()
if len(words) > max_token:
max_token = len(words)
for word in words:
if word not in token_to_ix:
token_to_ix[word] = len(token_to_ix)
return token_to_ix, max_token
def ans_stat(stat_ans_dict):
ans_to_ix = {}
ix_to_ans = {}
for qid in stat_ans_dict:
ans = stat_ans_dict[qid]['answer']
ans = prep_ans(ans)
if ans not in ans_to_ix:
ix_to_ans[ans_to_ix.__len__()] = ans
ans_to_ix[ans] = ans_to_ix.__len__()
return ans_to_ix, ix_to_ans
token_to_ix, max_token = tokenize(stat_ques_dict)
ans_to_ix, ix_to_ans = ans_stat(stat_ans_dict)
# print(ans_to_ix)
# print(ix_to_ans)
# print(token_to_ix)
# print(token_to_ix.__len__())
# print(max_token)
json.dump([ans_to_ix, ix_to_ans, token_to_ix, max_token], open('../openvqa/datasets/gqa/dicts.json', 'w'))
================================================
FILE: utils/proc_dict_vqa.py
================================================
# --------------------------------------------------------
# mcan-vqa (Deep Modular Co-Attention Networks)
# Licensed under The MIT License [see LICENSE for details]
# Written by Yuhao Cui https://github.com/cuiyuhao1996
# --------------------------------------------------------
import sys
sys.path.append('../')
from openvqa.utils.ans_punct import prep_ans
from openvqa.core.path_cfgs import PATH
import json
path = PATH()
# Loading answer word list
stat_ans_list = \
json.load(open(path.RAW_PATH['vqa']['train-anno'], 'r'))['annotations'] + \
json.load(open(path.RAW_PATH['vqa']['val-anno'], 'r'))['annotations']
def ans_stat(stat_ans_list):
ans_to_ix = {}
ix_to_ans = {}
ans_freq_dict = {}
for ans in stat_ans_list:
ans_proc = prep_ans(ans['multiple_choice_answer'])
if ans_proc not in ans_freq_dict:
ans_freq_dict[ans_proc] = 1
else:
ans_freq_dict[ans_proc] += 1
ans_freq_filter = ans_freq_dict.copy()
for ans in ans_freq_dict:
if ans_freq_dict[ans] <= 8:
ans_freq_filter.pop(ans)
for ans in ans_freq_filter:
ix_to_ans[ans_to_ix.__len__()] = ans
ans_to_ix[ans] = ans_to_ix.__len__()
return ans_to_ix, ix_to_ans
ans_to_ix, ix_to_ans = ans_stat(stat_ans_list)
print(ans_to_ix)
# print(ans_to_ix.__len__())
json.dump([ans_to_ix, ix_to_ans], open('../openvqa/datasets/vqa/answer_dict.json', 'w'))
================================================
FILE: utils/test_engine.py
================================================
# --------------------------------------------------------
# OpenVQA
# Written by Yuhao Cui https://github.com/cuiyuhao1996
# --------------------------------------------------------
import os, json, torch, pickle
import numpy as np
import torch.nn as nn
import torch.utils.data as Data
from openvqa.models.model_loader import ModelLoader
from openvqa.datasets.dataset_loader import EvalLoader
# Evaluation
@torch.no_grad()
def test_engine(__C, dataset, state_dict=None, validation=False):
# Load parameters
if __C.CKPT_PATH is not None:
print('Warning: you are now using CKPT_PATH args, '
'CKPT_VERSION and CKPT_EPOCH will not work')
path = __C.CKPT_PATH
else:
path = __C.CKPTS_PATH + \
'/ckpt_' + __C.CKPT_VERSION + \
'/epoch' + str(__C.CKPT_EPOCH) + '.pkl'
# val_ckpt_flag = False
if state_dict is None:
# val_ckpt_flag = True
print('Loading ckpt from: {}'.format(path))
state_dict = torch.load(path)['state_dict']
print('Finish!')
if __C.N_GPU > 1:
state_dict = ckpt_proc(state_dict)
# Store the prediction list
# qid_list = [ques['question_id'] for ques in dataset.ques_list]
ans_ix_list = []
pred_list = []
data_size = dataset.data_size
token_size = dataset.token_size
ans_size = dataset.ans_size
pretrained_emb = dataset.pretrained_emb
net = ModelLoader(__C).Net(
__C,
pretrained_emb,
token_size,
ans_size
)
net.cuda()
net.eval()
if __C.N_GPU > 1:
net = nn.DataParallel(net, device_ids=__C.DEVICES)
net.load_state_dict(state_dict)
dataloader = Data.DataLoader(
dataset,
batch_size=__C.EVAL_BATCH_SIZE,
shuffle=False,
num_workers=__C.NUM_WORKERS,
pin_memory=__C.PIN_MEM
)
for step, (
frcn_feat_iter,
grid_feat_iter,
bbox_feat_iter,
ques_ix_iter,
ans_iter
) in enumerate(dataloader):
print("\rEvaluation: [step %4d/%4d]" % (
step,
int(data_size / __C.EVAL_BATCH_SIZE),
), end=' ')
frcn_feat_iter = frcn_feat_iter.cuda()
grid_feat_iter = grid_feat_iter.cuda()
bbox_feat_iter = bbox_feat_iter.cuda()
ques_ix_iter = ques_ix_iter.cuda()
pred = net(
frcn_feat_iter,
grid_feat_iter,
bbox_feat_iter,
ques_ix_iter
)
pred_np = pred.cpu().data.numpy()
pred_argmax = np.argmax(pred_np, axis=1)
# Save the answer index
if pred_argmax.shape[0] != __C.EVAL_BATCH_SIZE:
pred_argmax = np.pad(
pred_argmax,
(0, __C.EVAL_BATCH_SIZE - pred_argmax.shape[0]),
mode='constant',
constant_values=-1
)
ans_ix_list.append(pred_argmax)
# Save the whole prediction vector
if __C.TEST_SAVE_PRED:
if pred_np.shape[0] != __C.EVAL_BATCH_SIZE:
pred_np = np.pad(
pred_np,
((0, __C.EVAL_BATCH_SIZE - pred_np.shape[0]), (0, 0)),
mode='constant',
constant_values=-1
)
pred_list.append(pred_np)
print('')
ans_ix_list = np.array(ans_ix_list).reshape(-1)
if validation:
if __C.RUN_MODE not in ['train']:
result_eval_file = __C.CACHE_PATH + '/result_run_' + __C.CKPT_VERSION
else:
result_eval_file = __C.CACHE_PATH + '/result_run_' + __C.VERSION
else:
if __C.CKPT_PATH is not None:
result_eval_file = __C.RESULT_PATH + '/result_run_' + __C.CKPT_VERSION
else:
result_eval_file = __C.RESULT_PATH + '/result_run_' + __C.CKPT_VERSION + '_epoch' + str(__C.CKPT_EPOCH)
if __C.CKPT_PATH is not None:
ensemble_file = __C.PRED_PATH + '/result_run_' + __C.CKPT_VERSION + '.pkl'
else:
ensemble_file = __C.PRED_PATH + '/result_run_' + __C.CKPT_VERSION + '_epoch' + str(__C.CKPT_EPOCH) + '.pkl'
if __C.RUN_MODE not in ['train']:
log_file = __C.LOG_PATH + '/log_run_' + __C.CKPT_VERSION + '.txt'
else:
log_file = __C.LOG_PATH + '/log_run_' + __C.VERSION + '.txt'
EvalLoader(__C).eval(dataset, ans_ix_list, pred_list, result_eval_file, ensemble_file, log_file, validation)
def ckpt_proc(state_dict):
state_dict_new = {}
for key in state_dict:
state_dict_new['module.' + key] = state_dict[key]
# state_dict.pop(key)
return state_dict_new
================================================
FILE: utils/train_engine.py
================================================
# --------------------------------------------------------
# OpenVQA
# Written by Yuhao Cui https://github.com/cuiyuhao1996
# --------------------------------------------------------
import os, torch, datetime, shutil, time
import numpy as np
import torch.nn as nn
import torch.nn.functional as F
import torch.utils.data as Data
from openvqa.models.model_loader import ModelLoader
from openvqa.utils.optim import get_optim, adjust_lr
from utils.test_engine import test_engine, ckpt_proc
def train_engine(__C, dataset, dataset_eval=None):
data_size = dataset.data_size
token_size = dataset.token_size
ans_size = dataset.ans_size
pretrained_emb = dataset.pretrained_emb
net = ModelLoader(__C).Net(
__C,
pretrained_emb,
token_size,
ans_size
)
net.cuda()
net.train()
if __C.N_GPU > 1:
net = nn.DataParallel(net, device_ids=__C.DEVICES)
# Define Loss Function
loss_fn = eval('torch.nn.' + __C.LOSS_FUNC_NAME_DICT[__C.LOSS_FUNC] + "(reduction='" + __C.LOSS_REDUCTION + "').cuda()")
# Load checkpoint if resume training
if __C.RESUME:
print(' ========== Resume training')
if __C.CKPT_PATH is not None:
print('Warning: Now using CKPT_PATH args, '
'CKPT_VERSION and CKPT_EPOCH will not work')
path = __C.CKPT_PATH
else:
path = __C.CKPTS_PATH + \
'/ckpt_' + __C.CKPT_VERSION + \
'/epoch' + str(__C.CKPT_EPOCH) + '.pkl'
# Load the network parameters
print('Loading ckpt from {}'.format(path))
ckpt = torch.load(path)
print('Finish!')
if __C.N_GPU > 1:
net.load_state_dict(ckpt_proc(ckpt['state_dict']))
else:
net.load_state_dict(ckpt['state_dict'])
start_epoch = ckpt['epoch']
# Load the optimizer paramters
optim = get_optim(__C, net, data_size, ckpt['lr_base'])
optim._step = int(data_size / __C.BATCH_SIZE * start_epoch)
optim.optimizer.load_state_dict(ckpt['optimizer'])
if ('ckpt_' + __C.VERSION) not in os.listdir(__C.CKPTS_PATH):
os.mkdir(__C.CKPTS_PATH + '/ckpt_' + __C.VERSION)
else:
if ('ckpt_' + __C.VERSION) not in os.listdir(__C.CKPTS_PATH):
#shutil.rmtree(__C.CKPTS_PATH + '/ckpt_' + __C.VERSION)
os.mkdir(__C.CKPTS_PATH + '/ckpt_' + __C.VERSION)
optim = get_optim(__C, net, data_size)
start_epoch = 0
loss_sum = 0
named_params = list(net.named_parameters())
grad_norm = np.zeros(len(named_params))
# Define multi-thread dataloader
# if __C.SHUFFLE_MODE in ['external']:
# dataloader = Data.DataLoader(
# dataset,
# batch_size=__C.BATCH_SIZE,
# shuffle=False,
# num_workers=__C.NUM_WORKERS,
# pin_memory=__C.PIN_MEM,
# drop_last=True
# )
# else:
dataloader = Data.DataLoader(
dataset,
batch_size=__C.BATCH_SIZE,
shuffle=True,
num_workers=__C.NUM_WORKERS,
pin_memory=__C.PIN_MEM,
drop_last=True
)
logfile = open(
__C.LOG_PATH +
'/log_run_' + __C.VERSION + '.txt',
'a+'
)
logfile.write(str(__C))
logfile.close()
# Training script
for epoch in range(start_epoch, __C.MAX_EPOCH):
# Save log to file
logfile = open(
__C.LOG_PATH +
'/log_run_' + __C.VERSION + '.txt',
'a+'
)
logfile.write(
'=====================================\nnowTime: ' +
datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S') +
'\n'
)
logfile.close()
# Learning Rate Decay
if epoch in __C.LR_DECAY_LIST:
adjust_lr(optim, __C.LR_DECAY_R)
# Externally shuffle data list
# if __C.SHUFFLE_MODE == 'external':
# dataset.shuffle_list(dataset.ans_list)
time_start = time.time()
# Iteration
for step, (
frcn_feat_iter,
grid_feat_iter,
bbox_feat_iter,
ques_ix_iter,
ans_iter
) in enumerate(dataloader):
optim.zero_grad()
frcn_feat_iter = frcn_feat_iter.cuda()
grid_feat_iter = grid_feat_iter.cuda()
bbox_feat_iter = bbox_feat_iter.cuda()
ques_ix_iter = ques_ix_iter.cuda()
ans_iter = ans_iter.cuda()
loss_tmp = 0
for accu_step in range(__C.GRAD_ACCU_STEPS):
loss_tmp = 0
sub_frcn_feat_iter = \
frcn_feat_iter[accu_step * __C.SUB_BATCH_SIZE:
(accu_step + 1) * __C.SUB_BATCH_SIZE]
sub_grid_feat_iter = \
grid_feat_iter[accu_step * __C.SUB_BATCH_SIZE:
(accu_step + 1) * __C.SUB_BATCH_SIZE]
sub_bbox_feat_iter = \
bbox_feat_iter[accu_step * __C.SUB_BATCH_SIZE:
(accu_step + 1) * __C.SUB_BATCH_SIZE]
sub_ques_ix_iter = \
ques_ix_iter[accu_step * __C.SUB_BATCH_SIZE:
(accu_step + 1) * __C.SUB_BATCH_SIZE]
sub_ans_iter = \
ans_iter[accu_step * __C.SUB_BATCH_SIZE:
(accu_step + 1) * __C.SUB_BATCH_SIZE]
pred = net(
sub_frcn_feat_iter,
sub_grid_feat_iter,
sub_bbox_feat_iter,
sub_ques_ix_iter
)
loss_item = [pred, sub_ans_iter]
loss_nonlinear_list = __C.LOSS_FUNC_NONLINEAR[__C.LOSS_FUNC]
for item_ix, loss_nonlinear in enumerate(loss_nonlinear_list):
if loss_nonlinear in ['flat']:
loss_item[item_ix] = loss_item[item_ix].view(-1)
elif loss_nonlinear:
loss_item[item_ix] = eval('F.' + loss_nonlinear + '(loss_item[item_ix], dim=1)')
loss = loss_fn(loss_item[0], loss_item[1])
if __C.LOSS_REDUCTION == 'mean':
# only mean-reduction needs be divided by grad_accu_steps
loss /= __C.GRAD_ACCU_STEPS
loss.backward()
loss_tmp += loss.cpu().data.numpy() * __C.GRAD_ACCU_STEPS
loss_sum += loss.cpu().data.numpy() * __C.GRAD_ACCU_STEPS
if __C.VERBOSE:
if dataset_eval is not None:
mode_str = __C.SPLIT['train'] + '->' + __C.SPLIT['val']
else:
mode_str = __C.SPLIT['train'] + '->' + __C.SPLIT['test']
print("\r[Version %s][Model %s][Dataset %s][Epoch %2d][Step %4d/%4d][%s] Loss: %.4f, Lr: %.2e" % (
__C.VERSION,
__C.MODEL_USE,
__C.DATASET,
epoch + 1,
step,
int(data_size / __C.BATCH_SIZE),
mode_str,
loss_tmp / __C.SUB_BATCH_SIZE,
optim._rate
), end=' ')
# Gradient norm clipping
if __C.GRAD_NORM_CLIP > 0:
nn.utils.clip_grad_norm_(
net.parameters(),
__C.GRAD_NORM_CLIP
)
# Save the gradient information
for name in range(len(named_params)):
norm_v = torch.norm(named_params[name][1].grad).cpu().data.numpy() \
if named_params[name][1].grad is not None else 0
grad_norm[name] += norm_v * __C.GRAD_ACCU_STEPS
# print('Param %-3s Name %-80s Grad_Norm %-20s'%
# (str(grad_wt),
# params[grad_wt][0],
# str(norm_v)))
optim.step()
time_end = time.time()
elapse_time = time_end-time_start
print('Finished in {}s'.format(int(elapse_time)))
epoch_finish = epoch + 1
# Save checkpoint
if __C.N_GPU > 1:
state = {
'state_dict': net.module.state_dict(),
'optimizer': optim.optimizer.state_dict(),
'lr_base': optim.lr_base,
'epoch': epoch_finish
}
else:
state = {
'state_dict': net.state_dict(),
'optimizer': optim.optimizer.state_dict(),
'lr_base': optim.lr_base,
'epoch': epoch_finish
}
torch.save(
state,
__C.CKPTS_PATH +
'/ckpt_' + __C.VERSION +
'/epoch' + str(epoch_finish) +
'.pkl'
)
# Logging
logfile = open(
__C.LOG_PATH +
'/log_run_' + __C.VERSION + '.txt',
'a+'
)
logfile.write(
'Epoch: ' + str(epoch_finish) +
', Loss: ' + str(loss_sum / data_size) +
', Lr: ' + str(optim._rate) + '\n' +
'Elapsed time: ' + str(int(elapse_time)) +
', Speed(s/batch): ' + str(elapse_time / step) +
'\n\n'
)
logfile.close()
# Eval after every epoch
if dataset_eval is not None:
test_engine(
__C,
dataset_eval,
state_dict=net.state_dict(),
validation=True
)
# if self.__C.VERBOSE:
# logfile = open(
# self.__C.LOG_PATH +
# '/log_run_' + self.__C.VERSION + '.txt',
# 'a+'
# )
# for name in range(len(named_params)):
# logfile.write(
# 'Param %-3s Name %-80s Grad_Norm %-25s\n' % (
# str(name),
# named_params[name][0],
# str(grad_norm[name] / data_size * self.__C.BATCH_SIZE)
# )
# )
# logfile.write('\n')
# logfile.close()
loss_sum = 0
grad_norm = np.zeros(len(named_params))