Showing preview only (494K chars total). Download the full file or copy to clipboard to get everything.
Repository: MILVLG/openvqa
Branch: master
Commit: f8f9966f202d
Files: 95
Total size: 465.4 KB
Directory structure:
gitextract_8y0cfniw/
├── .gitignore
├── Dockerfile
├── LICENSE
├── README.md
├── configs/
│ ├── clevr/
│ │ └── mcan_small.yml
│ ├── gqa/
│ │ ├── ban_4.yml
│ │ ├── ban_8.yml
│ │ ├── butd.yml
│ │ ├── mcan_large.yml
│ │ └── mcan_small.yml
│ └── vqa/
│ ├── ban_4.yml
│ ├── ban_8.yml
│ ├── butd.yml
│ ├── mcan_large.yml
│ ├── mcan_small.yml
│ ├── mfb.yml
│ ├── mfh.yml
│ ├── mmnasnet_large.yml
│ └── mmnasnet_small.yml
├── data/
│ ├── clevr/
│ │ ├── clevr_extract_feat.py
│ │ ├── feats/
│ │ │ └── .gitkeep
│ │ └── raw/
│ │ └── .gitkeep
│ ├── gqa/
│ │ ├── feats/
│ │ │ └── .gitkeep
│ │ ├── gqa_feat_preproc.py
│ │ └── raw/
│ │ └── .gitkeep
│ └── vqa/
│ ├── feats/
│ │ └── .gitkeep
│ └── raw/
│ └── .gitkeep
├── docs/
│ ├── Makefile
│ ├── _source/
│ │ ├── _static/
│ │ │ ├── custom.css
│ │ │ ├── mathjax_mathml.user.js
│ │ │ └── mathjax_wikipedia.user.js
│ │ ├── advanced/
│ │ │ ├── adding_model.md
│ │ │ └── contributing.md
│ │ ├── basic/
│ │ │ ├── getting_started.md
│ │ │ ├── install.md
│ │ │ └── model_zoo.md
│ │ ├── conf.py
│ │ └── index.rst
│ ├── _templates/
│ │ └── layout.html
│ ├── make.bat
│ ├── readme.md
│ └── requirements.txt
├── openvqa/
│ ├── core/
│ │ ├── base_cfgs.py
│ │ ├── base_dataset.py
│ │ └── path_cfgs.py
│ ├── datasets/
│ │ ├── clevr/
│ │ │ ├── clevr_loader.py
│ │ │ └── eval/
│ │ │ └── result_eval.py
│ │ ├── dataset_loader.py
│ │ ├── gqa/
│ │ │ ├── dicts.json
│ │ │ ├── eval/
│ │ │ │ ├── gqa_eval.py
│ │ │ │ └── result_eval.py
│ │ │ └── gqa_loader.py
│ │ └── vqa/
│ │ ├── answer_dict.json
│ │ ├── eval/
│ │ │ ├── result_eval.py
│ │ │ ├── vqa.py
│ │ │ └── vqaEval.py
│ │ └── vqa_loader.py
│ ├── models/
│ │ ├── ban/
│ │ │ ├── adapter.py
│ │ │ ├── ban.py
│ │ │ ├── model_cfgs.py
│ │ │ └── net.py
│ │ ├── butd/
│ │ │ ├── adapter.py
│ │ │ ├── model_cfgs.py
│ │ │ ├── net.py
│ │ │ └── tda.py
│ │ ├── mcan/
│ │ │ ├── adapter.py
│ │ │ ├── mca.py
│ │ │ ├── model_cfgs.py
│ │ │ └── net.py
│ │ ├── mfb/
│ │ │ ├── adapter.py
│ │ │ ├── mfb.py
│ │ │ ├── model_cfgs.py
│ │ │ └── net.py
│ │ ├── mmnasnet/
│ │ │ ├── adapter.py
│ │ │ ├── model_cfgs.py
│ │ │ ├── nasnet.py
│ │ │ └── net.py
│ │ └── model_loader.py
│ ├── ops/
│ │ ├── fc.py
│ │ └── layer_norm.py
│ └── utils/
│ ├── ans_punct.py
│ ├── feat_filter.py
│ ├── make_mask.py
│ └── optim.py
├── requirements.txt
├── results/
│ ├── cache/
│ │ └── .gitkeep
│ ├── log/
│ │ └── .gitkeep
│ ├── pred/
│ │ └── .gitkeep
│ └── result_test/
│ └── .gitkeep
├── run.py
└── utils/
├── exec.py
├── proc_dict_gqa.py
├── proc_dict_vqa.py
├── test_engine.py
└── train_engine.py
================================================
FILE CONTENTS
================================================
================================================
FILE: .gitignore
================================================
ckpts/
results/cache/*.json
results/cache/*.txt
results/result_test/*.json
results/result_test/*.txt
results/pred/*.pkl
results/log/*.txt
data/clevr/raw/images/
data/clevr/raw/questions/
data/clevr/raw/scenes/
data/clevr/feats/train/
data/clevr/feats/val/
data/clevr/feats/test/
data/gqa/raw/eval/
data/gqa/raw/questions1.2/
data/gqa/raw/sceneGraphs/
data/gqa/feats/gqa-frcn/
data/gqa/feats/gqa-grid/
data/vqa/raw/*.json
data/vqa/feats/train2014/
data/vqa/feats/val2014/
data/vqa/feats/test2015/
.idea/
# Byte-compiled / optimized / DLL files
__pycache__/
*.py[cod]
*$py.class
# C extensions
*.so
# Distribution / packaging
.Python
build/
develop-eggs/
dist/
downloads/
eggs/
.eggs/
lib/
lib64/
parts/
sdist/
var/
wheels/
pip-wheel-metadata/
share/python-wheels/
*.egg-info/
.installed.cfg
*.egg
MANIFEST
# PyInstaller
# Usually these files are written by a python script from a template
# before PyInstaller builds the exe, so as to inject date/other infos into it.
*.manifest
*.spec
# Installer logs
pip-log.txt
pip-delete-this-directory.txt
# Unit test / coverage reports
htmlcov/
.tox/
.nox/
.coverage
.coverage.*
.cache
nosetests.xml
coverage.xml
*.cover
.hypothesis/
.pytest_cache/
# Translations
*.mo
*.pot
# Django stuff:
*.log
local_settings.py
db.sqlite3
db.sqlite3-journal
# Flask stuff:
instance/
.webassets-cache
# Scrapy stuff:
.scrapy
# Sphinx documentation
docs/_build/
# PyBuilder
target/
# Jupyter Notebook
.ipynb_checkpoints
# IPython
profile_default/
ipython_config.py
# pyenv
.python-version
# pipenv
# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
# However, in case of collaboration, if having platform-specific dependencies or dependencies
# having no cross-platform support, pipenv may install dependencies that don't work, or not
# install all needed dependencies.
#Pipfile.lock
# celery beat schedule file
celerybeat-schedule
# SageMath parsed files
*.sage.py
# Environments
.env
.venv
env/
venv/
ENV/
env.bak/
venv.bak/
# Spyder project settings
.spyderproject
.spyproject
# Rope project settings
.ropeproject
# mkdocs documentation
/site
# mypy
.mypy_cache/
.dmypy.json
dmypy.json
# Pyre type checker
.pyre/
# Microsoft
.vscode
.vscode/
.vs
.vs/
================================================
FILE: Dockerfile
================================================
FROM nvidia/cuda:10.0-cudnn7-devel-ubuntu16.04
# install anaconda 5.2.0
ENV LANG=C.UTF-8 LC_ALL=C.UTF-8
ENV PATH /opt/conda/bin:$PATH
RUN apt-get update --fix-missing && apt-get install -y wget bzip2 ca-certificates \
libglib2.0-0 libxext6 libsm6 libxrender1 \
git mercurial subversion
RUN wget --quiet https://repo.anaconda.com/archive/Anaconda3-5.2.0-Linux-x86_64.sh -O ~/anaconda.sh && \
/bin/bash ~/anaconda.sh -b -p /opt/conda && \
rm ~/anaconda.sh && \
ln -s /opt/conda/etc/profile.d/conda.sh /etc/profile.d/conda.sh && \
echo ". /opt/conda/etc/profile.d/conda.sh" >> ~/.bashrc && \
echo "conda activate base" >> ~/.bashrc
RUN apt-get install -y curl grep sed dpkg && \
TINI_VERSION=`curl https://github.com/krallin/tini/releases/latest | grep -o "/v.*\"" | sed 's:^..\(.*\).$:\1:'` && \
curl -L "https://github.com/krallin/tini/releases/download/v${TINI_VERSION}/tini_${TINI_VERSION}.deb" > tini.deb && \
dpkg -i tini.deb && \
rm tini.deb && \
apt-get clean
ENTRYPOINT [ "/usr/bin/tini", "--" ]
CMD [ "/bin/bash" ]
# install pytorch 1.1 and cudatoolkit
RUN conda install pytorch==1.1.0 torchvision==0.3.0 cudatoolkit=10.0 -c pytorch
# clone and install openvqa dependencies
RUN mkdir /workspace && \
cd /workspace && \
git clone https://github.com/MILVLG/openvqa.git && \
cd openvqa &&\
pip install -r requirements.txt && \
wget https://github.com/explosion/spacy-models/releases/download/en_vectors_web_lg-2.1.0/en_vectors_web_lg-2.1.0.tar.gz -O en_vectors_web_lg-2.1.0.tar.gz && \
pip install en_vectors_web_lg-2.1.0.tar.gz && \
rm en_vectors_web_lg-2.1.0.tar.gz && \
cd /
# delete openvqa repo
RUN rm -r /workspace/openvqa
WORKDIR /workspace
================================================
FILE: LICENSE
================================================
Apache License
Version 2.0, January 2004
http://www.apache.org/licenses/
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
1. Definitions.
"License" shall mean the terms and conditions for use, reproduction,
and distribution as defined by Sections 1 through 9 of this document.
"Licensor" shall mean the copyright owner or entity authorized by
the copyright owner that is granting the License.
"Legal Entity" shall mean the union of the acting entity and all
other entities that control, are controlled by, or are under common
control with that entity. For the purposes of this definition,
"control" means (i) the power, direct or indirect, to cause the
direction or management of such entity, whether by contract or
otherwise, or (ii) ownership of fifty percent (50%) or more of the
outstanding shares, or (iii) beneficial ownership of such entity.
"You" (or "Your") shall mean an individual or Legal Entity
exercising permissions granted by this License.
"Source" form shall mean the preferred form for making modifications,
including but not limited to software source code, documentation
source, and configuration files.
"Object" form shall mean any form resulting from mechanical
transformation or translation of a Source form, including but
not limited to compiled object code, generated documentation,
and conversions to other media types.
"Work" shall mean the work of authorship, whether in Source or
Object form, made available under the License, as indicated by a
copyright notice that is included in or attached to the work
(an example is provided in the Appendix below).
"Derivative Works" shall mean any work, whether in Source or Object
form, that is based on (or derived from) the Work and for which the
editorial revisions, annotations, elaborations, or other modifications
represent, as a whole, an original work of authorship. For the purposes
of this License, Derivative Works shall not include works that remain
separable from, or merely link (or bind by name) to the interfaces of,
the Work and Derivative Works thereof.
"Contribution" shall mean any work of authorship, including
the original version of the Work and any modifications or additions
to that Work or Derivative Works thereof, that is intentionally
submitted to Licensor for inclusion in the Work by the copyright owner
or by an individual or Legal Entity authorized to submit on behalf of
the copyright owner. For the purposes of this definition, "submitted"
means any form of electronic, verbal, or written communication sent
to the Licensor or its representatives, including but not limited to
communication on electronic mailing lists, source code control systems,
and issue tracking systems that are managed by, or on behalf of, the
Licensor for the purpose of discussing and improving the Work, but
excluding communication that is conspicuously marked or otherwise
designated in writing by the copyright owner as "Not a Contribution."
"Contributor" shall mean Licensor and any individual or Legal Entity
on behalf of whom a Contribution has been received by Licensor and
subsequently incorporated within the Work.
2. Grant of Copyright License. Subject to the terms and conditions of
this License, each Contributor hereby grants to You a perpetual,
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
copyright license to reproduce, prepare Derivative Works of,
publicly display, publicly perform, sublicense, and distribute the
Work and such Derivative Works in Source or Object form.
3. Grant of Patent License. Subject to the terms and conditions of
this License, each Contributor hereby grants to You a perpetual,
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
(except as stated in this section) patent license to make, have made,
use, offer to sell, sell, import, and otherwise transfer the Work,
where such license applies only to those patent claims licensable
by such Contributor that are necessarily infringed by their
Contribution(s) alone or by combination of their Contribution(s)
with the Work to which such Contribution(s) was submitted. If You
institute patent litigation against any entity (including a
cross-claim or counterclaim in a lawsuit) alleging that the Work
or a Contribution incorporated within the Work constitutes direct
or contributory patent infringement, then any patent licenses
granted to You under this License for that Work shall terminate
as of the date such litigation is filed.
4. Redistribution. You may reproduce and distribute copies of the
Work or Derivative Works thereof in any medium, with or without
modifications, and in Source or Object form, provided that You
meet the following conditions:
(a) You must give any other recipients of the Work or
Derivative Works a copy of this License; and
(b) You must cause any modified files to carry prominent notices
stating that You changed the files; and
(c) You must retain, in the Source form of any Derivative Works
that You distribute, all copyright, patent, trademark, and
attribution notices from the Source form of the Work,
excluding those notices that do not pertain to any part of
the Derivative Works; and
(d) If the Work includes a "NOTICE" text file as part of its
distribution, then any Derivative Works that You distribute must
include a readable copy of the attribution notices contained
within such NOTICE file, excluding those notices that do not
pertain to any part of the Derivative Works, in at least one
of the following places: within a NOTICE text file distributed
as part of the Derivative Works; within the Source form or
documentation, if provided along with the Derivative Works; or,
within a display generated by the Derivative Works, if and
wherever such third-party notices normally appear. The contents
of the NOTICE file are for informational purposes only and
do not modify the License. You may add Your own attribution
notices within Derivative Works that You distribute, alongside
or as an addendum to the NOTICE text from the Work, provided
that such additional attribution notices cannot be construed
as modifying the License.
You may add Your own copyright statement to Your modifications and
may provide additional or different license terms and conditions
for use, reproduction, or distribution of Your modifications, or
for any such Derivative Works as a whole, provided Your use,
reproduction, and distribution of the Work otherwise complies with
the conditions stated in this License.
5. Submission of Contributions. Unless You explicitly state otherwise,
any Contribution intentionally submitted for inclusion in the Work
by You to the Licensor shall be under the terms and conditions of
this License, without any additional terms or conditions.
Notwithstanding the above, nothing herein shall supersede or modify
the terms of any separate license agreement you may have executed
with Licensor regarding such Contributions.
6. Trademarks. This License does not grant permission to use the trade
names, trademarks, service marks, or product names of the Licensor,
except as required for reasonable and customary use in describing the
origin of the Work and reproducing the content of the NOTICE file.
7. Disclaimer of Warranty. Unless required by applicable law or
agreed to in writing, Licensor provides the Work (and each
Contributor provides its Contributions) on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
implied, including, without limitation, any warranties or conditions
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
PARTICULAR PURPOSE. You are solely responsible for determining the
appropriateness of using or redistributing the Work and assume any
risks associated with Your exercise of permissions under this License.
8. Limitation of Liability. In no event and under no legal theory,
whether in tort (including negligence), contract, or otherwise,
unless required by applicable law (such as deliberate and grossly
negligent acts) or agreed to in writing, shall any Contributor be
liable to You for damages, including any direct, indirect, special,
incidental, or consequential damages of any character arising as a
result of this License or out of the use or inability to use the
Work (including but not limited to damages for loss of goodwill,
work stoppage, computer failure or malfunction, or any and all
other commercial damages or losses), even if such Contributor
has been advised of the possibility of such damages.
9. Accepting Warranty or Additional Liability. While redistributing
the Work or Derivative Works thereof, You may choose to offer,
and charge a fee for, acceptance of support, warranty, indemnity,
or other liability obligations and/or rights consistent with this
License. However, in accepting such obligations, You may act only
on Your own behalf and on Your sole responsibility, not on behalf
of any other Contributor, and only if You agree to indemnify,
defend, and hold each Contributor harmless for any liability
incurred by, or claims asserted against, such Contributor by reason
of your accepting any such warranty or additional liability.
END OF TERMS AND CONDITIONS
APPENDIX: How to apply the Apache License to your work.
To apply the Apache License to your work, attach the following
boilerplate notice, with the fields enclosed by brackets "[]"
replaced with your own identifying information. (Don't include
the brackets!) The text should be enclosed in the appropriate
comment syntax for the file format. We also recommend that a
file or class name and description of purpose be included on the
same "printed page" as the copyright notice for easier
identification within third-party archives.
Copyright [2019] [Vision and Language Group@ MIL]
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
================================================
FILE: README.md
================================================
# OpenVQA
<div>
<a href="https://openvqa.readthedocs.io/en/latest/?badge=latest"><img alt="Documentation Status" src="https://readthedocs.org/projects/openvqa/badge/?version=latest"/></a>
<a href="https://github.com/MILVLG"><img alt="powered-by MILVLG" src="https://img.shields.io/badge/powered%20by-MILVLG-orange.svg?style=flat&colorA=E1523D&colorB=007D8A"/></a>
</div>
OpenVQA is a general platform for visual question ansering (VQA) research, with implementing state-of-the-art approaches (e.g., [BUTD](https://arxiv.org/abs/1707.07998), [MFH](https://arxiv.org/abs/1708.03619), [BAN](https://arxiv.org/abs/1805.07932), [MCAN](https://arxiv.org/abs/1906.10770) and [MMNasNet](https://arxiv.org/pdf/2004.12070.pdf)) on different benchmark datasets like [VQA-v2](https://visualqa.org/), [GQA](https://cs.stanford.edu/people/dorarad/gqa/index.html) and [CLEVR](https://cs.stanford.edu/people/jcjohns/clevr/). Supports for more methods and datasets will be updated continuously.
<p align="center">
<img src="misc/openvqa_overall.png" width="550">
</p>
## Documentation
Getting started and learn more about OpenVQA [here](https://openvqa.readthedocs.io/en/latest/).
## Benchmark and Model Zoo
Supported methods and benchmark datasets are shown in the below table.
Results and models are available in [MODEL ZOO](https://openvqa.readthedocs.io/en/latest/basic/model_zoo.html).
| | [VQA-v2](https://visualqa.org/) | [GQA](https://cs.stanford.edu/people/dorarad/gqa/index.html) | [CLEVR](https://cs.stanford.edu/people/jcjohns/clevr/) |
|:-----------------------------------------:|:-------------------------------:|:------------------------------------------------------------:|:------------------------------------------------------:|
| [BUTD](https://arxiv.org/abs/1707.07998) | ✓ | ✓ | |
| [MFB](https://arxiv.org/abs/1708.01471v1) | ✓ | | |
| [MFH](https://arxiv.org/abs/1708.03619) | ✓ | | |
| [BAN](https://arxiv.org/abs/1805.07932) | ✓ | ✓ | |
| [MCAN](https://arxiv.org/abs/1906.10770) | ✓ | ✓ | ✓ |
| [MMNasNet](https://arxiv.org/pdf/2004.12070.pdf) | ✓ | | |
## News & Updates
#### v0.7.5 (30/12/2019)
- Add supports and pre-trained models for the approaches on CLEVR.
#### v0.7 (29/11/2019)
- Add supports and pre-trained models for the approaches on GQA.
- Add an document to tell developers how to add a new model to OpenVQA.
#### v0.6 (18/09/2019)
- Refactoring the documents and using Sphinx to build the whole documents.
#### v0.5 (31/07/2019)
- Implement the basic framework for OpenVQA.
- Add supports and pre-trained models for BUTD, MFB, MFH, BAN, MCAN on VQA-v2.
## License
This project is released under the [Apache 2.0 license](LICENSE).
## Contact
This repo is currently maintained by Zhou Yu ([@yuzcccc](https://github.com/yuzcccc)) and Yuhao Cui ([@cuiyuhao1996](https://github.com/cuiyuhao1996)).
## Citation
If this repository is helpful for your research or you want to refer the provided results in the modelzoo, you could cite the work using the following BibTeX entry:
```
@misc{yu2019openvqa,
author = {Yu, Zhou and Cui, Yuhao and Shao, Zhenwei and Gao, Pengbing and Yu, Jun},
title = {OpenVQA},
howpublished = {\url{https://github.com/MILVLG/openvqa}},
year = {2019}
}
================================================
FILE: configs/clevr/mcan_small.yml
================================================
# Network
MODEL_USE: mcan
LAYER: 6
HIDDEN_SIZE: 512
FF_SIZE: 2048
MULTI_HEAD: 8
DROPOUT_R: 0.1
FLAT_MLP_SIZE: 512
FLAT_GLIMPSES: 1
FLAT_OUT_SIZE: 1024
# Execution
BATCH_SIZE: 64
LR_BASE: 0.00004
LR_DECAY_R: 0.2
LR_DECAY_LIST: [13, 15]
WARMUP_EPOCH: 3
MAX_EPOCH: 16
GRAD_NORM_CLIP: -1
GRAD_ACCU_STEPS: 2
LOSS_FUNC: ce
LOSS_REDUCTION: sum
OPT: Adam
OPT_PARAMS: {betas: '(0.9, 0.98)', eps: '1e-9'}
================================================
FILE: configs/gqa/ban_4.yml
================================================
# Network
MODEL_USE: ban
GLIMPSE: 4
HIDDEN_SIZE: 1024
K_TIMES: 3
DROPOUT_R: 0.2
CLASSIFER_DROPOUT_R: 0.5
FLAT_OUT_SIZE: 2048
USE_BBOX_FEAT: True
BBOXFEAT_EMB_SIZE: 1024
IMG_FEAT_SIZE: 1024
# Execution
BATCH_SIZE: 512
LR_BASE: 0.002
LR_DECAY_R: 0.25
LR_DECAY_LIST: [10, 12]
WARMUP_EPOCH: 3
MAX_EPOCH: 13
GRAD_NORM_CLIP: 0.25
GRAD_ACCU_STEPS: 8
LOSS_FUNC: ce
LOSS_REDUCTION: sum
OPT: Adamax
OPT_PARAMS: {betas: '(0.9, 0.999)', eps: '1e-9'}
================================================
FILE: configs/gqa/ban_8.yml
================================================
# Network
MODEL_USE: ban
GLIMPSE: 8
HIDDEN_SIZE: 1024
K_TIMES: 3
DROPOUT_R: 0.2
CLASSIFER_DROPOUT_R: 0.5
FLAT_OUT_SIZE: 2048
USE_BBOX_FEAT: True
BBOXFEAT_EMB_SIZE: 1024
IMG_FEAT_SIZE: 1024
# Execution
BATCH_SIZE: 512
LR_BASE: 0.001
LR_DECAY_R: 0.25
LR_DECAY_LIST: [10]
WARMUP_EPOCH: 3
MAX_EPOCH: 11
GRAD_NORM_CLIP: 0.25
GRAD_ACCU_STEPS: 8
LOSS_FUNC: ce
LOSS_REDUCTION: sum
OPT: Adamax
OPT_PARAMS: {betas: '(0.9, 0.999)', eps: '1e-9'}
================================================
FILE: configs/gqa/butd.yml
================================================
# Network
MODEL_USE: butd
HIDDEN_SIZE: 1024
DROPOUT_R: 0.2
CLASSIFER_DROPOUT_R: 0.5
FLAT_OUT_SIZE: 2048
USE_BBOX_FEAT: True
BBOXFEAT_EMB_SIZE: 1024
IMG_FEAT_SIZE: 1024
# Execution
BATCH_SIZE: 512
LR_BASE: 0.002
LR_DECAY_R: 0.2
LR_DECAY_LIST: [10, 12]
WARMUP_EPOCH: 3
MAX_EPOCH: 13
GRAD_NORM_CLIP: 0.25
GRAD_ACCU_STEPS: 1
LOSS_FUNC: ce
LOSS_REDUCTION: sum
OPT: Adamax
OPT_PARAMS: {betas: '(0.9, 0.999)', eps: '1e-9'}
================================================
FILE: configs/gqa/mcan_large.yml
================================================
# Network
MODEL_USE: mcan
LAYER: 6
HIDDEN_SIZE: 1024
FF_SIZE: 4096
MULTI_HEAD: 8
DROPOUT_R: 0.1
FLAT_MLP_SIZE: 512
FLAT_GLIMPSES: 1
FLAT_OUT_SIZE: 2048
USE_BBOX_FEAT: True
USE_AUX_FEAT: True
# Execution
BATCH_SIZE: 64
LR_BASE: 0.00005
LR_DECAY_R: 0.2
LR_DECAY_LIST: [8, 10]
WARMUP_EPOCH: 2
MAX_EPOCH: 11
GRAD_NORM_CLIP: -1
GRAD_ACCU_STEPS: 4
LOSS_FUNC: ce
LOSS_REDUCTION: sum
OPT: Adam
OPT_PARAMS: {betas: '(0.9, 0.98)', eps: '1e-9'}
================================================
FILE: configs/gqa/mcan_small.yml
================================================
# Network
MODEL_USE: mcan
LAYER: 6
HIDDEN_SIZE: 512
FF_SIZE: 2048
MULTI_HEAD: 8
DROPOUT_R: 0.1
FLAT_MLP_SIZE: 512
FLAT_GLIMPSES: 1
FLAT_OUT_SIZE: 1024
USE_BBOX_FEAT: True
USE_AUX_FEAT: True
# Execution
BATCH_SIZE: 64
LR_BASE: 0.0001
LR_DECAY_R: 0.2
LR_DECAY_LIST: [8, 10]
WARMUP_EPOCH: 2
MAX_EPOCH: 11
GRAD_NORM_CLIP: -1
GRAD_ACCU_STEPS: 1
LOSS_FUNC: ce
LOSS_REDUCTION: sum
OPT: Adam
OPT_PARAMS: {betas: '(0.9, 0.98)', eps: '1e-9'}
================================================
FILE: configs/vqa/ban_4.yml
================================================
# Network
MODEL_USE: ban
GLIMPSE: 4
HIDDEN_SIZE: 1024
K_TIMES: 3
DROPOUT_R: 0.2
CLASSIFER_DROPOUT_R: 0.5
FLAT_OUT_SIZE: 2048
# Execution
BATCH_SIZE: 512
LR_BASE: 0.002
LR_DECAY_R: 0.25
LR_DECAY_LIST: [10, 12]
WARMUP_EPOCH: 3
MAX_EPOCH: 13
GRAD_NORM_CLIP: 0.25
GRAD_ACCU_STEPS: 8
LOSS_FUNC: bce
LOSS_REDUCTION: sum
OPT: Adamax
OPT_PARAMS: {betas: '(0.9, 0.999)', eps: '1e-9'}
================================================
FILE: configs/vqa/ban_8.yml
================================================
# Network
MODEL_USE: ban
GLIMPSE: 8
HIDDEN_SIZE: 1024
K_TIMES: 3
DROPOUT_R: 0.2
CLASSIFER_DROPOUT_R: 0.5
FLAT_OUT_SIZE: 2048
# Execution
BATCH_SIZE: 512
LR_BASE: 0.002
LR_DECAY_R: 0.25
LR_DECAY_LIST: [10, 12]
WARMUP_EPOCH: 3
MAX_EPOCH: 13
GRAD_NORM_CLIP: 0.25
GRAD_ACCU_STEPS: 8
LOSS_FUNC: bce
LOSS_REDUCTION: sum
OPT: Adamax
OPT_PARAMS: {betas: '(0.9, 0.999)', eps: '1e-9'}
================================================
FILE: configs/vqa/butd.yml
================================================
# Network
MODEL_USE: butd
HIDDEN_SIZE: 1024
DROPOUT_R: 0.2
CLASSIFER_DROPOUT_R: 0.5
FLAT_OUT_SIZE: 2048
# Execution
BATCH_SIZE: 512
LR_BASE: 0.002
LR_DECAY_R: 0.2
LR_DECAY_LIST: [10, 12]
WARMUP_EPOCH: 3
MAX_EPOCH: 13
GRAD_NORM_CLIP: 0.25
GRAD_ACCU_STEPS: 1
LOSS_FUNC: bce
LOSS_REDUCTION: sum
OPT: Adamax
OPT_PARAMS: {betas: '(0.9, 0.999)', eps: '1e-9'}
================================================
FILE: configs/vqa/mcan_large.yml
================================================
# Network
MODEL_USE: mcan
LAYER: 6
HIDDEN_SIZE: 1024
FF_SIZE: 4096
MULTI_HEAD: 8
DROPOUT_R: 0.1
FLAT_MLP_SIZE: 512
FLAT_GLIMPSES: 1
FLAT_OUT_SIZE: 2048
USE_BBOX_FEAT: False
# Execution
BATCH_SIZE: 64
LR_BASE: 0.00007 # 0.00005 for train+val+vg->test
LR_DECAY_R: 0.2
LR_DECAY_LIST: [10, 12]
WARMUP_EPOCH: 3
MAX_EPOCH: 13
GRAD_NORM_CLIP: -1
GRAD_ACCU_STEPS: 2 # to reduce GPU memory cost
LOSS_FUNC: bce
LOSS_REDUCTION: sum
OPT: Adam
OPT_PARAMS: {betas: '(0.9, 0.98)', eps: '1e-9'}
================================================
FILE: configs/vqa/mcan_small.yml
================================================
# Network
MODEL_USE: mcan
LAYER: 6
HIDDEN_SIZE: 512
FF_SIZE: 2048
MULTI_HEAD: 8
DROPOUT_R: 0.1
FLAT_MLP_SIZE: 512
FLAT_GLIMPSES: 1
FLAT_OUT_SIZE: 1024
USE_BBOX_FEAT: False
# Execution
BATCH_SIZE: 64
LR_BASE: 0.0001
LR_DECAY_R: 0.2
LR_DECAY_LIST: [10, 12]
WARMUP_EPOCH: 3
MAX_EPOCH: 13
GRAD_NORM_CLIP: -1
GRAD_ACCU_STEPS: 1
LOSS_FUNC: bce
LOSS_REDUCTION: sum
OPT: Adam
OPT_PARAMS: {betas: '(0.9, 0.98)', eps: '1e-9'}
================================================
FILE: configs/vqa/mfb.yml
================================================
# Network
MODEL_USE: mfb
HIGH_ORDER: False # True for MFH, False for MFB
HIDDEN_SIZE: 512
MFB_K: 5
MFB_O: 1000
LSTM_OUT_SIZE: 1024
DROPOUT_R: 0.1
I_GLIMPSES: 2
Q_GLIMPSES: 2
# Execution
BATCH_SIZE: 64
LR_BASE: 0.0007
LR_DECAY_R: 0.5
LR_DECAY_LIST: [6, 12]
WARMUP_EPOCH: 3
MAX_EPOCH: 13
GRAD_NORM_CLIP: -1
GRAD_ACCU_STEPS: 1
LOSS_FUNC: kld
LOSS_REDUCTION: sum
OPT: Adam
OPT_PARAMS: {betas: '(0.9, 0.99)', eps: '1e-9'}
================================================
FILE: configs/vqa/mfh.yml
================================================
# Network
MODEL_USE: mfb
HIGH_ORDER: True # True for MFH, False for MFB
HIDDEN_SIZE: 512
MFB_K: 5
MFB_O: 1000
LSTM_OUT_SIZE: 1024
DROPOUT_R: 0.1
I_GLIMPSES: 2
Q_GLIMPSES: 2
# Execution
BATCH_SIZE: 64
LR_BASE: 0.0007
LR_DECAY_R: 0.25
LR_DECAY_LIST: [10, 12]
WARMUP_EPOCH: 3
MAX_EPOCH: 13
GRAD_NORM_CLIP: -1
GRAD_ACCU_STEPS: 1
LOSS_FUNC: kld
LOSS_REDUCTION: sum
OPT: Adam
OPT_PARAMS: {betas: '(0.9, 0.99)', eps: '1e-9'}
================================================
FILE: configs/vqa/mmnasnet_large.yml
================================================
# Network
MODEL_USE: mmnasnet
ARCH: {
enc: [SA, SA, SA, SA, FFN, FFN, FFN, FFN, SA, FFN, FFN, FFN],
dec: [GA, GA, FFN, FFN, GA, FFN, RSA, GA, FFN, GA, RSA, FFN, RSA, SA, FFN, RSA, GA, FFN]
}
HIDDEN_SIZE: 1024
REL_HBASE: 128
REL_SIZE: 64
MULTI_HEAD: 8
DROPOUT_R: 0.1
FLAT_MLP_SIZE: 1024
FLAT_GLIMPSES: 1
FLAT_OUT_SIZE: 2048
# Execution
BATCH_SIZE: 64
LR_BASE: 0.00007 # 5e-5 for train+val+vg->test
LR_DECAY_R: 0.2
LR_DECAY_LIST: [10, 12]
WARMUP_EPOCH: 3
MAX_EPOCH: 13
GRAD_NORM_CLIP: 1.0
GRAD_ACCU_STEPS: 1
LOSS_FUNC: bce
LOSS_REDUCTION: sum
OPT: Adam
OPT_PARAMS: {betas: '(0.9, 0.98)', eps: '1e-9'}
================================================
FILE: configs/vqa/mmnasnet_small.yml
================================================
# Network
MODEL_USE: mmnasnet
ARCH: {
enc: [SA, SA, SA, SA, FFN, FFN, FFN, FFN, SA, FFN, FFN, FFN],
dec: [GA, GA, FFN, FFN, GA, FFN, RSA, GA, FFN, GA, RSA, FFN, RSA, SA, FFN, RSA, GA, FFN]
}
HIDDEN_SIZE: 512
REL_HBASE: 64
REL_SIZE: 64
MULTI_HEAD: 8
DROPOUT_R: 0.1
FLAT_MLP_SIZE: 512
FLAT_GLIMPSES: 1
FLAT_OUT_SIZE: 1024
# Execution
BATCH_SIZE: 64
LR_BASE: 0.00012 # 1e-4 for train+val+vg->test
LR_DECAY_R: 0.2
LR_DECAY_LIST: [10, 12]
WARMUP_EPOCH: 3
MAX_EPOCH: 13
GRAD_NORM_CLIP: 1.0
GRAD_ACCU_STEPS: 1
LOSS_FUNC: bce
LOSS_REDUCTION: sum
OPT: Adam
OPT_PARAMS: {betas: '(0.9, 0.98)', eps: '1e-9'}
================================================
FILE: data/clevr/clevr_extract_feat.py
================================================
# --------------------------------------------------------
# OpenVQA
# CLEVR images feature extraction script
# Written by Pengbing Gao https://github.com/nbgao
# --------------------------------------------------------
'''
Command line example:
python clevr_extract_feat.py --mode=all --gpu=0
python clevr_extract_feat.py --mode=train --gpu=0 --model=resnet101 --model_stage=3 --batch_size=128 --image_height=224 --image_width=224
'''
import argparse, os, json
import numpy as np
from scipy.misc import imread, imresize
import torch
import torchvision
torch.set_num_threads(5)
def build_model(args):
if not hasattr(torchvision.models, args.model):
raise ValueError('Invalid model "%s"' % args.model)
if not 'resnet' in args.model:
raise ValueError('Feature extraction only supports ResNets')
cnn = getattr(torchvision.models, args.model)(pretrained=True)
layers = [cnn.conv1,
cnn.bn1,
cnn.relu,
cnn.maxpool]
for i in range(args.model_stage):
name = 'layer%d' % (i + 1)
layers.append(getattr(cnn, name))
model = torch.nn.Sequential(*layers)
model.cuda()
model.eval()
return model
def batch_feat(cur_batch, model):
mean = np.array([0.485, 0.456, 0.406]).reshape(1, 3, 1, 1)
std = np.array([0.229, 0.224, 0.224]).reshape(1, 3, 1, 1)
image_batch = np.concatenate(cur_batch, 0).astype(np.float32)
image_batch = (image_batch / 255.0 - mean) / std
image_batch = torch.FloatTensor(image_batch).cuda()
image_batch = torch.autograd.Variable(image_batch, volatile=True)
feats = model(image_batch)
feats = feats.data.cpu().clone().numpy()
return feats
def extract_feature(args, images_path, feats_npz_path):
input_paths = []
idx_set = set()
for file in os.listdir(images_path):
if not file.endswith('.png'):
continue
idx = int(os.path.splitext(file)[0].split('_')[-1])
input_paths.append((os.path.join(images_path, file), idx))
idx_set.add(idx)
input_paths.sort(key=lambda x: x[1])
assert len(idx_set) == len(input_paths)
assert min(idx_set) == 0 and max(idx_set) == len(idx_set) - 1
print('Image number:', len(input_paths))
model = build_model(args)
if not os.path.exists(feats_npz_path):
os.mkdir(feats_npz_path)
print('Create dir:', feats_npz_path)
img_size = (args.image_height, args.image_width)
ix = 0
cur_batch = []
for i, (path, idx) in enumerate(input_paths):
img = imread(path, mode='RGB')
img = imresize(img, img_size, interp='bicubic')
img = img.transpose(2, 0, 1)[None]
cur_batch.append(img)
if len(cur_batch) == args.batch_size:
feats = batch_feat(cur_batch, model)
for j in range(feats.shape[0]):
np.savez(feats_npz_path + str(ix) + '.npz', x=feats[j].reshape(1024, 196).transpose(1, 0))
ix += 1
print('Processed %d/%d images' % (ix, len(input_paths)), end='\r')
cur_batch = []
if len(cur_batch) > 0:
feats = batch_feat(cur_batch, model)
for j in range(feats.shape[0]):
np.savez(feats_npz_path + str(ix) + '.npz', x=feats[j].reshape(1024, 196).transpose(1, 0))
ix += 1
print('Processed %d/%d images' % (ix, len(input_paths)), end='\r')
print('Extract image features to generate npz files sucessfully!')
parser = argparse.ArgumentParser(description='clevr_extract_feat')
parser.add_argument('--mode', '-mode', choices=['all', 'train', 'val', 'test'], default='all', help='mode', type=str)
parser.add_argument('--gpu', '-gpu', default='0', type=str)
parser.add_argument('--model', '-model', default='resnet101')
parser.add_argument('--model_stage', '-model_stage', default=3, type=int)
parser.add_argument('--batch_size', '-batch_size', default=128, type=int)
parser.add_argument('--image_height', '-image_height', default=224, type=int)
parser.add_argument('--image_width', '-image_width', default=224, type=int)
if __name__ == '__main__':
train_images_path = './raws/images/train/'
val_images_path = './raws/images/val/'
test_images_path = './raws/images/test/'
train_feats_npz_path = './feats/train/'
val_feats_npz_path = './feats/val/'
test_feats_npz_path = './feats/test/'
args = parser.parse_args()
print('mode:', args.mode)
print('gpu:', args.gpu)
print('model:', args.model)
print('model_stage:', args.model_stage)
print('batch_size:', args.batch_size)
print('image_height:', args.image_height)
print('image_width:', args.image_width)
os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu
# process train images features
if args.mode in ['train', 'all']:
print('\nProcess [train] images features:')
extract_feature(args, train_images_path, train_feats_npz_path)
# process val images features
if args.mode in ['val', 'all']:
print('\nProcess [val] images features:')
extract_feature(args, val_images_path, val_feats_npz_path)
# processs test images features
if args.mode in ['test', 'all']:
print('\nProcess [test] images features:')
extract_feature(args, test_images_path, test_feats_npz_path)
================================================
FILE: data/clevr/feats/.gitkeep
================================================
================================================
FILE: data/clevr/raw/.gitkeep
================================================
================================================
FILE: data/gqa/feats/.gitkeep
================================================
================================================
FILE: data/gqa/gqa_feat_preproc.py
================================================
# --------------------------------------------------------
# OpenVQA
# GQA spatial features & object features .h5 files to .npz files transform script
# Written by Pengbing Gao https://github.com/nbgao
# --------------------------------------------------------
'''
Command line example:
(1) Process spatial features
python gqa_feat_preproc.py --mode=spatial --spatial_dir=./spatialFeatures --out_dir=./feats/gqa-grid
(2) Process object features
python gqa_feat_preproc.py --mode=object --object_dir=./objectFeatures --out_dir=./feats/gqa-frcn
'''
import h5py, glob, json, cv2, argparse
import numpy as np
# spatial features
def process_spatial_features(feat_path, out_path):
info_file = feat_path + '/gqa_spatial_info.json'
try:
info = json.load(open(info_file, 'r'))
except:
print('Failed to open info file:', info_file)
return
print('Total grid features', len(info))
print('Making the <h5 index> to <image id> dict...')
h5idx_to_imgid = {}
for img_id in info:
h5idx_to_imgid[str(info[img_id]['file']) + '_' + str(info[img_id]['idx'])] = img_id
for ix in range(16):
feat_file = feat_path + '/gqa_spatial_' + str(ix) + '.h5'
print('Processing', feat_file)
try:
feat_dict = h5py.File(feat_file, 'r')
except:
print('Failed to open feat file:', feat_file)
return
features = feat_dict['features']
for iy in range(features.shape[0]):
img_id = h5idx_to_imgid[str(ix) + '_' + str(iy)]
feature = features[iy]
# save to .npz file ['x']
np.savez(
out_path + '/' + img_id + '.npz',
x=feature.reshape(2048, 49).transpose(1, 0), # (49, 2048)
)
print('Process spatial features successfully!')
# object features
def process_object_features(feat_path, out_path):
info_file = feat_path + '/gqa_objects_info.json'
try:
info = json.load(open(info_file, 'r'))
except:
print('Failed to open info file:', info_file)
return
print('Total frcn features', len(info))
print('Making the <h5 index> to <image id> dict...')
h5idx_to_imgid = {}
for img_id in info:
h5idx_to_imgid[str(info[img_id]['file']) + '_' + str(info[img_id]['idx'])] = img_id
for ix in range(16):
feat_file = feat_path + '/gqa_objects_' + str(ix) + '.h5'
print('Processing', feat_file)
try:
feat_dict = h5py.File(feat_file, 'r')
except:
print('Failed to open feat file:', feat_file)
return
bboxes = feat_dict['bboxes']
features = feat_dict['features']
for iy in range(features.shape[0]):
img_id = h5idx_to_imgid[str(ix) + '_' + str(iy)]
img_info = info[img_id]
objects_num = img_info['objectsNum']
# save to .npz file ['x', 'bbox', 'width', 'height']
np.savez(
out_path + '/' + img_id + '.npz',
x=features[iy, :objects_num],
bbox=bboxes[iy, :objects_num],
width=img_info['width'],
height=img_info['height'],
)
print('Process object features successfully!')
parser = argparse.ArgumentParser(description='gqa_h52npz')
parser.add_argument('--mode', '-mode', choices=['object', 'spatial', 'frcn', 'grid'], help='mode', type=str)
parser.add_argument('--object_dir', '-object_dir', help='object features dir', type=str)
parser.add_argument('--spatial_dir', '-spatial_dir', help='spatial features dir', type=str)
parser.add_argument('--out_dir', '-out_dir', help='output dir', type=str)
args = parser.parse_args()
mode = args.mode
object_path = args.object_dir
spatial_path = args.spatial_dir
out_path = args.out_dir
print('mode:', mode)
print('object_path:', object_path)
print('spatial_path:', spatial_path)
print('out_path:', out_path)
# process spatial features
if mode in ['spatial', 'grid']:
process_spatial_features(spatial_path, out_path)
# process object features
if mode in ['object', 'frcn']:
process_object_features(object_path, out_path)
================================================
FILE: data/gqa/raw/.gitkeep
================================================
================================================
FILE: data/vqa/feats/.gitkeep
================================================
================================================
FILE: data/vqa/raw/.gitkeep
================================================
================================================
FILE: docs/Makefile
================================================
# Minimal makefile for Sphinx documentation
#
# You can set these variables from the command line, and also
# from the environment for the first two.
SPHINXOPTS ?=
SPHINXBUILD ?= sphinx-build
SOURCEDIR = _source
BUILDDIR = _build
# Put it first so that "make" without argument is like "make help".
help:
@$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
.PHONY: help Makefile
# Catch-all target: route all unknown targets to Sphinx using the new
# "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS).
%: Makefile
@$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
================================================
FILE: docs/_source/_static/custom.css
================================================
.rst-content code.literal {
color: inherit;
font-size: 85%;
border: none;
background: #F0F0F0;
padding: 2px 3px 1px;
}
================================================
FILE: docs/_source/_static/mathjax_mathml.user.js
================================================
// ==UserScript==
// @name MathJax MathML
// @namespace http://www.mathjax.org/
// @description Insert MathJax into pages containing MathML
// @include *
// ==/UserScript==
if ((window.unsafeWindow == null ? window : unsafeWindow).MathJax == null) {
if ((document.getElementsByTagName("math").length > 0) ||
(document.getElementsByTagNameNS == null ? false :
(document.getElementsByTagNameNS("http://www.w3.org/1998/Math/MathML","math").length > 0))) {
var script = document.createElement("script");
script.type = "text/javascript";
script.src = "https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.5/latest.js?config=TeX-AMS-MML_CHTML-full";
var config = 'MathJax.Hub.Startup.onload()';
document.getElementsByTagName("head")[0].appendChild(script);
}
}
================================================
FILE: docs/_source/_static/mathjax_wikipedia.user.js
================================================
// ==UserScript==
// @name MathJax in Wikipedia
// @namespace http://www.mathjax.org/
// @description Insert MathJax into Wikipedia pages
// @include http://en.wikipedia.org/wiki/*
// ==/UserScript==
if ((window.unsafeWindow == null ? window : unsafeWindow).MathJax == null) {
//
// Replace the images with MathJax scripts of type math/tex
//
var images = document.getElementsByTagName('img'), count = 0;
for (var i = images.length - 1; i >= 0; i--) {
var img = images[i];
if (img.className === "tex") {
var script = document.createElement("script"); script.type = "math/tex";
if (window.opera) {script.innerHTML = img.alt} else {script.text = img.alt}
img.parentNode.replaceChild(script,img); count++;
}
}
if (count) {
//
// Load MathJax and have it process the page
//
var script = document.createElement("script");
script.type = "text/javascript";
script.src = "https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.5/latest.js?config=TeX-AMS-MML_CHTML-full";
document.getElementsByTagName("head")[0].appendChild(script);
}
}
================================================
FILE: docs/_source/advanced/adding_model.md
================================================
# Adding a custom VQA model
This is a tutorial on how to add a custom VQA model into OpenVQA. Follow the steps below, you will obtain a model that can run across VQA/GQA/CLEVR datasets.
## 1. Preliminary
All implemented models are placed at ```<openvqa>/openvqa/models/```, so the first thing to do is to create a folder there for your VQA model named by `<YOU_MODEL_NAME>`. After that, all your model related files will be placed in the folder ```<openvqa>/openvqa/models/<YOU_MODEL_NAME>/```.
## 2. Dataset Adapter
Create a python file `<openvqa>/openvqa/models/<YOU_MODEL_NAME>/adapter.py` to bridge your model and different datasets. Different datasets have different input features, thus resulting in different operators to handle the features.
#### Input
Input features (packed as `feat_dict`) for different datasets.
#### Output
Customized pre-processed features to be fed into the model.
#### Adapter Template
```
from openvqa.core.base_dataset import BaseAdapter
class Adapter(BaseAdapter):
def __init__(self, __C):
super(Adapter, self).__init__(__C)
self.__C = __C
def vqa_init(self, __C):
# Your Implementation
def gqa_init(self, __C):
# Your Implementation
def clevr_init(self, __C):
# Your Implementation
def vqa_forward(self, feat_dict):
# Your Implementation
def gqa_forward(self, feat_dict):
# Your Implementation
def clevr_forward(self, feat_dict):
# Your Implementation
```
Each dataset-specific initiation function `def <dataset>_init(self, __C)` corresponds to one feed-forward function `def <dataset>_forward(self, feat_dict)`, your implementations should follow the principles ```torch.nn.Module.__init__()``` and ```torch.nn.Module.forward()```, respectively.
The variable ` feat_dict` consists of the input feature names for the datasets, which corresponds to the definitions in `<openvqa>/openvqa/core/base_cfg.py`
```
vqa:{
'FRCN_FEAT': buttom-up features -> [batchsize, num_bbox, 2048],
'BBOX_FEAT': bbox coordinates -> [batchsize, num_bbox, 5],
}
gqa:{
'FRCN_FEAT': official buttom-up features -> [batchsize, num_bbox, 2048],
'BBOX_FEAT': official bbox coordinates -> [batchsize, num_bbox, 5],
'GRID_FEAT': official resnet grid features -> [batchsize, num_grid, 2048],
}
clevr:{
'GRID_FEAT': resnet grid features -> [batchsize, num_grid, 1024],
}
```
More detailed examples can be referred to the adapter for the [MCAN](https://github.com/MILVLG/openvqa/tree/master/openvqa/models/mcan/adapter.py) model.
## 3. Definition of model hyper-parameters
Create a python file named ```<openvqa>/openvqa/models/<YOUR MODEL NAME>/model_cfgs.py```
#### Configuration Template
```
from openvqa.core.base_cfgs import BaseCfgs
class Cfgs(BaseCfgs):
def __init__(self):
super(Cfgs, self).__init__()
# Your Implementation
```
Only the variable you defined here can be used in the network. The variable value can be override in the running configuration file described later.
#### Example
```
# model_cfgs.py
from openvqa.core.base_cfgs import BaseCfgs
class Cfgs(BaseCfgs):
def __init__(self):
super(Cfgs, self).__init__()
self.LAYER = 6
```
```
# net.py
class Net(nn.Module):
def __init__(self, __C, pretrained_emb, token_size, answer_size):
super(Net, self).__init__()
self.__C = __C
print(__C.LAYER)
```
```
Output: 6
```
## 4. Main body
Create a python file for the main body of the model as ```<openvqa>/openvqa/models/<YOUR MODEL NAME>/net.py```. Note that the filename must be `net.py` since this filename will be invoked by the running script. Except the file, other auxiliary model files invoked by `net.py` can be named arbitrarily.
When implementation, you should pay attention to the following restrictions:
- The main module should be named `Net`, i.e., `class Net(nn.Module):`
- The `init` function has three input variables: *pretrained_emb* corresponds to the GloVe embedding features for the question; *token\_size* corresponds to the number of all dataset words; *answer_size* corresponds to the number of classes for prediction.
- The `forward` function has four input variables: *frcn_feat*, *grid_feat*, *bbox_feat*, *ques_ix*.
- In the `init` function, you should initialize the `Adapter` which you've already defined above. In the `forward` function, you should feed *frcn_feat*, *grid_feat*, *bbox_feat* into the `Adapter` to obtain the processed image features.
- Return a prediction tensor of size [batch\_size, answer_size]. Note that no activation function like ```sigmoid``` or ```softmax``` is appended on the prediction. The activation has been designed for the prediction in the loss function outside.
#### Model Template
```
import torch.nn as nn
from openvqa.models.mcan.adapter import Adapter
class Net(nn.Module):
def __init__(self, __C, pretrained_emb, token_size, answer_size):
super(Net, self).__init__()
self.__C = __C
self.adapter = Adapter(__C)
def forward(self, frcn_feat, grid_feat, bbox_feat, ques_ix):
img_feat = self.adapter(frcn_feat, grid_feat, bbox_feat)
# model implementation
...
return pred
```
## 5. Declaration of running configurations
Create a `yml` file at```<openvqa>/configs/<dataset>/<YOUR_CONFIG_NAME>.yml``` and define your hyper-parameters here. We suggest that `<YOUR_CONFIG_NAME>`= `<YOUR_MODEL_NAME>`. If you have the requirement to have one base model support the running scripts for different variants. (e.g., MFB and MFH), you can have different yml files (e.g., `mfb.yml` and `mfh.yml`) and use the `MODEL_USE` param in the yml file to specify the actual used model (i.e., mfb).
### Example:
```
MODEL_USE: <YOUR MODEL NAME> # Must be defined
LAYER: 6
LOSS_FUNC: bce
LOSS_REDUCTION: sum
```
Finally, to register the added model to the running script, you can modify `<openvqa/run.py>` by adding your `<YOUR_CONFIG_NAME>` into the arguments for models [here](https://github.com/MILVLG/openvqa/tree/master/run.py#L22).
By doing all the steps above, you are able to use ```--MODEL=<YOUR_CONFIG_NAME>``` to train/val/test your model like other provided models. For more information about the usage of the running script, please refer to the [Getting Started](https://openvqa.readthedocs.io/en/latest/basic/getting_started.html) page.
================================================
FILE: docs/_source/advanced/contributing.md
================================================
# Contributing to OpenVQA
All kinds of contributions are welcome, including but not limited to the following.
- Fixes (typo, bugs)
- New features and components
## Workflow
1. fork and pull the latest version of OpenVQA
2. checkout a new branch (do not use master branch for PRs)
3. commit your changes
4. create a PR
## Code style
### Python
We adopt [PEP8](https://www.python.org/dev/peps/pep-0008/) as the preferred code style.
We use [flake8](http://flake8.pycqa.org/en/latest/) as the linter and [yapf](https://github.com/google/yapf) as the formatter.
Please upgrade to the latest yapf (>=0.27.0) and refer to the configuration.
>Before you create a PR, make sure that your code lints and is formatted by yapf.
### C++ and CUDA
We follow the [Google C++ Style Guide](https://google.github.io/styleguide/cppguide.html).
================================================
FILE: docs/_source/basic/getting_started.md
================================================
# Getting Started
This page provides basic tutorials about the usage of mmdetection.
For installation instructions, please see [Installation](install).
## Training
The following script will start training a `mcan_small` model on the `VQA-v2` dataset:
```bash
$ python3 run.py --RUN='train' --MODEL='mcan_small' --DATASET='vqa'
```
- ```--RUN={'train','val','test'}``` to set the mode to be executed.
- ```--MODEL=str```, e.g., to assign the model to be executed.
- ```--DATASET={'vqa','gqa','clevr'}``` to choose the dataset to be executed.
All checkpoint files will be saved to:
```
ckpts/ckpt_<VERSION>/epoch<EPOCH_NUMBER>.pkl
```
and the training log file will be placed at:
```
results/log/log_run_<VERSION>.txt
```
To add:
- ```--VERSION=str```, e.g., ```--VERSION='v1'``` to assign a name for your this model.
- ```--GPU=str```, e.g., ```--GPU='2'``` to train the model on specified GPU device.
- ```--SEED=int```, e.g., ```--SEED=123``` to use a fixed seed to initialize the model, which obtains exactly the same model. Unset it results in random seeds.
- ```--NW=int```, e.g., ```--NW=8``` to accelerate I/O speed.
- ```--SPLIT=str``` to set the training sets as you want. Setting ```--SPLIT='train'``` will trigger the evaluation script to run the validation score after every epoch automatically.
- ```--RESUME=True``` to start training with saved checkpoint parameters. In this stage, you should assign the checkpoint version```--CKPT_V=str``` and the resumed epoch number ```CKPT_E=int```.
- ```--MAX_EPOCH=int``` to stop training at a specified epoch number.
If you want to resume training from an existing checkpoint, you can use the following script:
```bash
$ python3 run.py --RUN='train' --MODEL='mcan_small' --DATASET='vqa' --CKPT_V=str --CKPT_E=int
```
where the args `CKPT_V` and `CKPT_E` must be specified, corresponding to the version and epoch number of the loaded model.
#### Multi-GPU Training and Gradient Accumulation
We recommend to use the GPU with at least 8 GB memory, but if you don't have such device, we provide two solutions to deal with it:
- _Multi-GPU Training_:
If you want to accelerate training or train the model on a device with limited GPU memory, you can use more than one GPUs:
Add ```--GPU='0, 1, 2, 3...'```
The batch size on each GPU will be adjusted to `BATCH_SIZE`/#GPUs automatically.
- _Gradient Accumulation_:
If you only have one GPU less than 8GB, an alternative strategy is provided to use the gradient accumulation during training:
Add ```--ACCU=n```
This makes the optimizer accumulate gradients for`n` small batches and update the model weights at once. It is worth noting that `BATCH_SIZE` must be divided by ```n``` to run this mode correctly.
## Validation and Testing
**Warning**: The args ```--MODEL``` and `--DATASET` should be set to the same values as those in the training stage.
### Validation on Local Machine
Offline evaluation on local machine only support the evaluations on the *val* split. If you want to evaluate the *test* split, please see [Evaluation on online server](#Evaluation on online server).
There are two ways to start:
(Recommend)
```bash
$ python3 run.py --RUN='val' --MODEL=str --DATASET='{vqa,gqa,clevr}' --CKPT_V=str --CKPT_E=int
```
or use the absolute path instead:
```bash
$ python3 run.py --RUN='val' --MODEL=str --DATASET='{vqa,gqa,clevr}' --CKPT_PATH=str
```
- For VQA-v2, the results on *val* split
### Testing on Online Server
All the evaluations on the test split of VQA-v2, GQA and CLEVR benchmarks can be achieved by using
```bash
$ python3 run.py --RUN='test' --MODEL=str --DATASET='{vqa,gqa,clevr}' --CKPT_V=str --CKPT_E=int
```
Result file are saved at: ```results/result_test/result_run_<CKPT_V>_<CKPT_E>.json```
- For VQA-v2, the result file is uploaded the [VQA challenge website](https://evalai.cloudcv.org/web/challenges/challenge-page/163/overview) to evaluate the scores on *test-dev* or *test-std* split.
- For GQA, the result file is uploaded to the [GQA Challenge website](<https://evalai.cloudcv.org/web/challenges/challenge-page/225/overview>) to evaluate the scores on *test* or *test-dev* split.
- For CLEVR, the result file can be evaluated via sending an email to the author [Justin Johnson](<https://cs.stanford.edu/people/jcjohns/>) with attaching this file, and he will reply the scores via email too.
================================================
FILE: docs/_source/basic/install.md
================================================
# Installation
This page provides basic prerequisites to run OpenVQA, including the setups of hardware, software, and datasets.
## Hardware & Software Setup
A machine with at least **1 GPU (>= 8GB)**, **20GB memory** and **50GB free disk space** is required. We strongly recommend to use a SSD drive to guarantee high-speed I/O.
The following packages are required to build the project correctly.
- [Python](https://www.python.org/downloads/) >= 3.5
- [Cuda](https://developer.nvidia.com/cuda-toolkit) >= 9.0 and [cuDNN](https://developer.nvidia.com/cudnn)
- [PyTorch](http://pytorch.org/) >= 0.4.1 with CUDA (**PyTorch 1.x is also supported**).
- [SpaCy](https://spacy.io/) and initialize the [GloVe](https://github.com/explosion/spacy-models/releases/download/en_vectors_web_lg-2.1.0/en_vectors_web_lg-2.1.0.tar.gz) as follows:
```bash
$ pip install -r requirements.txt
$ wget https://github.com/explosion/spacy-models/releases/download/en_vectors_web_lg-2.1.0/en_vectors_web_lg-2.1.0.tar.gz -O en_vectors_web_lg-2.1.0.tar.gz
$ pip install en_vectors_web_lg-2.1.0.tar.gz
```
## Dataset Setup
The following datasets should be prepared before running the experiments.
**Note that if you only want to run experiments on one specific dataset, you can focus on the setup for that and skip the rest.**
### VQA-v2
- Image Features
The image features are extracted using the [bottom-up-attention](https://github.com/peteanderson80/bottom-up-attention) strategy, with each image being represented as an dynamic number (from 10 to 100) of 2048-D features. We store the features for each image in a `.npz` file. You can prepare the visual features by yourself or download the extracted features from [OneDrive](https://awma1-my.sharepoint.com/:f:/g/personal/yuz_l0_tn/EsfBlbmK1QZFhCOFpr4c5HUBzUV0aH2h1McnPG1jWAxytQ?e=2BZl8O) or [BaiduYun](https://pan.baidu.com/s/1C7jIWgM3hFPv-YXJexItgw#list/path=%2F). The downloaded files contains three files: **train2014.tar.gz, val2014.tar.gz, and test2015.tar.gz**, corresponding to the features of the train/val/test images for *VQA-v2*, respectively.
All the image feature files are unzipped and placed in the `data/vqa/feats` folder to form the following tree structure:
```
|-- data
|-- vqa
| |-- feats
| | |-- train2014
| | | |-- COCO_train2014_...jpg.npz
| | | |-- ...
| | |-- val2014
| | | |-- COCO_val2014_...jpg.npz
| | | |-- ...
| | |-- test2015
| | | |-- COCO_test2015_...jpg.npz
| | | |-- ...
```
- QA Annotations
Download all the annotation `json` files for VQA-v2, including the [train questions](https://s3.amazonaws.com/cvmlp/vqa/mscoco/vqa/v2_Questions_Train_mscoco.zip), [val questions](https://s3.amazonaws.com/cvmlp/vqa/mscoco/vqa/v2_Questions_Val_mscoco.zip), [test questions](https://s3.amazonaws.com/cvmlp/vqa/mscoco/vqa/v2_Questions_Test_mscoco.zip), [train answers](https://s3.amazonaws.com/cvmlp/vqa/mscoco/vqa/v2_Annotations_Train_mscoco.zip), and [val answers](https://s3.amazonaws.com/cvmlp/vqa/mscoco/vqa/v2_Annotations_Val_mscoco.zip).
In addition, we use the VQA samples from the Visual Genome to augment the training samples. We pre-processed these samples by two rules:
1. Select the QA pairs with the corresponding images appear in the MS-COCO *train* and *val* splits;
2. Select the QA pairs with the answer appear in the processed answer list (occurs more than 8 times in whole *VQA-v2* answers).
We provide our processed vg questions and annotations files, you can download them from [OneDrive](https://awma1-my.sharepoint.com/:f:/g/personal/yuz_l0_tn/EmVHVeGdck1IifPczGmXoaMBFiSvsegA6tf_PqxL3HXclw) or [BaiduYun](https://pan.baidu.com/s/1QCOtSxJGQA01DnhUg7FFtQ#list/path=%2F).
All the QA annotation files are unzipped and placed in the `data/vqa/raw` folder to form the following tree structure:
```
|-- data
|-- vqa
| |-- raw
| | |-- v2_OpenEnded_mscoco_train2014_questions.json
| | |-- v2_OpenEnded_mscoco_val2014_questions.json
| | |-- v2_OpenEnded_mscoco_test2015_questions.json
| | |-- v2_OpenEnded_mscoco_test-dev2015_questions.json
| | |-- v2_mscoco_train2014_annotations.json
| | |-- v2_mscoco_val2014_annotations.json
| | |-- VG_questions.json
| | |-- VG_annotations.json
```
### GQA
- Image Features
Download the [spatial features](https://nlp.stanford.edu/data/gqa/spatialFeatures.zip) and [object features](https://nlp.stanford.edu/data/gqa/objectFeatures.zip) for GQA from its official website. **Spatial Features Files** include `gqa_spatial_*.h5` and `gqa_spatial_info.json`. **Object Features Files** include `gqa_objects_*.h5` and `gqa_objects_info.json`.
To make the input features consistent with those for VQA-v2, we provide a [script](https://github.com/MILVLG/openvqa/tree/master/data/gqa/gqa_feat_preproc.py) to transform `.h5` feature files into multiple `.npz` files, with each file corresponding to one image.
```bash
$ cd data/gqa
$ unzip spatialFeatures.zip
$ python gqa_feat_preproc.py --mode=spatial --spatial_dir=./spatialFeatures --out_dir=./feats/gqa-grid
$ rm -r spatialFeatures.zip ./spatialFeatures
$ unzip objectFeatures.zip
$ python gqa_feat_preproc.py --mode=object --object_dir=./objectFeatures --out_dir=./feats/gqa-frcn
$ rm -r objectFeatures.zip ./objectFeatures
```
All the processed feature files are placed in the `data/gqa/feats` folder to form the following tree structure:
```
|-- data
|-- gqa
| |-- feats
| | |-- gqa-frcn
| | | |-- 1.npz
| | | |-- ...
| | |-- gqa-grid
| | | |-- 1.npz
| | | |-- ...
```
- Questions and Scene Graphs
Download all the GQA [QA files](https://nlp.stanford.edu/data/gqa/questions1.2.zip) from the official site, including all the splits needed for training, validation and testing. Download the [scene graphs files](https://nlp.stanford.edu/data/gqa/sceneGraphs.zip) for `train` and `val` splits from the official site. Download the [supporting files](https://nlp.stanford.edu/data/gqa/eval.zip) from the official site, including the `train` and `val` choices supporting files for the evaluation.
All the question files and scene graph files are unzipped and placed in the `data/gqa/raw` folder to form the following tree structure:
```
|-- data
|-- gqa
| |-- raw
| | |-- questions1.2
| | | |-- train_all_questions
| | | | |-- train_all_questions_0.json
| | | | |-- ...
| | | | |-- train_all_questions_9.json
| | | |-- train_balanced_questions.json
| | | |-- val_all_questions.json
| | | |-- val_balanced_questions.json
| | | |-- testdev_all_questions.json
| | | |-- testdev_balanced_questions.json
| | | |-- test_all_questions.json
| | | |-- test_balanced_questions.json
| | | |-- challenge_all_questions.json
| | | |-- challenge_balanced_questions.json
| | | |-- submission_all_questions.json
| | |-- eval
| | | |-- train_choices
| | | | |-- train_all_questions_0.json
| | | | |-- ...
| | | | |-- train_all_questions_9.json
| | | |-- val_choices.json
| | |-- sceneGraphs
| | | |-- train_sceneGraphs.json
| | | |-- val_sceneGraphs.json
```
### CLEVR
- Images, Questions and Scene Graphs
Download all the [CLEVR v1.0](https://dl.fbaipublicfiles.com/clevr/CLEVR_v1.0.zip) from the official site, including all the splits needed for training, validation and testing.
All the image files, question files and scene graph files are unzipped and placed in the `data/clevr/raw` folder to form the following tree structure:
```
|-- data
|-- clevr
| |-- raw
| | |-- images
| | | |-- train
| | | | |-- CLEVR_train_000000.json
| | | | |-- ...
| | | | |-- CLEVR_train_069999.json
| | | |-- val
| | | | |-- CLEVR_val_000000.json
| | | | |-- ...
| | | | |-- CLEVR_val_014999.json
| | | |-- test
| | | | |-- CLEVR_test_000000.json
| | | | |-- ...
| | | | |-- CLEVR_test_014999.json
| | |-- questions
| | | |-- CLEVR_train_questions.json
| | | |-- CLEVR_val_questions.json
| | | |-- CLEVR_test_questions.json
| | |-- scenes
| | | |-- CLEVR_train_scenes.json
| | | |-- CLEVR_val_scenes.json
```
- Image Features
To make the input features consistent with those for VQA-v2, we provide a [script](https://github.com/MILVLG/openvqa/tree/master/data/clevr/clevr_extract_feat.py) to extract image features using a pre-trained ResNet-101 model like most previous works did and generate `.h5` files, with each file corresponding to one image.
```bash
$ cd data/clevr
$ python clevr_extract_feat.py --mode=all --gpu=0
```
All the processed feature files are placed in the `data/clevr/feats` folder to form the following tree structure:
```
|-- data
|-- clevr
| |-- feats
| | |-- train
| | | |-- 1.npz
| | | |-- ...
| | |-- val
| | | |-- 1.npz
| | | |-- ...
| | |-- test
| | | |-- 1.npz
| | | |-- ...
```
================================================
FILE: docs/_source/basic/model_zoo.md
================================================
# Benchmark and Model Zoo
## Environment
We use the following environment to run all the experiments in this page.
- Python 3.6
- PyTorch 0.4.1
- CUDA 9.0.176
- CUDNN 7.0.4
## VQA-v2
We provide three groups of results (including the accuracies of *Overall*, *Yes/No*, *Number* and *Other*) for each model on VQA-v2 using different training schemes as follows. We provide pre-trained models for the latter two schemes.
- **Train -> Val**: trained on the `train` split and evaluated on the `val` split.
- **Train+val -> Test-dev**: trained on the `train+val` splits and evaluated on the `test-dev` split.
- **Train+val+vg -> Test-dev**: trained on the `train+val+vg` splits and evaluated on the `test-dev` split.
**Note that for one model, the used base learning rate in the two schemes may be different, you should modify this setting in the config file to reproduce the results.**
#### Train -> Val
| Model | Base lr | Overall (%) | Yes/No (%) | Number (%) | Other (%) |
|:--------------------------------------------------------------------------------------:|:-------:|:-----------:|:----------:|:----------:|:---------:|
| [BUTD](https://github.com/MILVLG/openvqa/tree/master/configs/vqa/butd.yml) | 2e-3 | 63.84 | 81.40 | 43.81 | 55.78 |
| [MFB](https://github.com/MILVLG/openvqa/tree/master/configs/vqa/mfb.yml) | 7e-4 | 65.35 | 83.23 | 45.31 | 57.05 |
| [MFH](https://github.com/MILVLG/openvqa/tree/master/configs/vqa/mfh.yml) | 7e-4 | 66.18 | 84.07 | 46.55 | 57.78 |
| [BAN-4](https://github.com/MILVLG/openvqa/tree/master/configs/vqa/ban_4.yml) | 2e-3 | 65.86 | 83.53 | 46.36 | 57.56 |
| [BAN-8](https://github.com/MILVLG/openvqa/tree/master/configs/vqa/ban_8.yml) | 2e-3 | 66.00 | 83.61 | 47.04 | 57.62 |
| [MCAN-small](https://github.com/MILVLG/openvqa/tree/master/configs/vqa/mcan_small.yml) | 1e-4 | 67.17 | 84.82 | 49.31 | 58.48 |
| [MCAN-large](https://github.com/MILVLG/openvqa/tree/master/configs/vqa/mcan_large.yml) | 7e-5 | 67.50 | 85.14 | 49.66 | 58.80 |
| [MMNasNet-small](https://github.com/MILVLG/openvqa/tree/master/configs/vqa/mmnasnet_small.yml) | 1.2e-4 | 67.79 | 85.02 | 52.25 | 58.80 |
| [MMNasNet-large](https://github.com/MILVLG/openvqa/tree/master/configs/vqa/mmnasnet_large.yml) | 7e-5 | 67.98 | 85.22 | 52.04 | 59.09 |
#### Train+val -> Test-dev
| Model | Base lr | Overall (%) | Yes/No (%) | Number (%) | Other (%) | Download |
|:--------------------------------------------------------------------------------------:|:-------:|:-----------:|:----------:|:----------:|:---------:|:-------------------------------------------------------------------------------------------------------------------------:|
| [BUTD](https://github.com/MILVLG/openvqa/tree/master/configs/vqa/butd.yml) | 2e-3 | 66.98 | 83.28 | 46.19 | 57.85 | [model](https://awma1-my.sharepoint.com/:u:/g/personal/yuz_l0_tn/EWSOkcCVGMpAot9ol0IJP3ABv3cWFRvGFB67980PHiCk3Q?download=1) |
| [MFB](https://github.com/MILVLG/openvqa/tree/master/configs/vqa/mfb.yml) | 7e-4 | 68.29 | 84.64 | 48.29 | 58.89 | [model](https://awma1-my.sharepoint.com/:u:/g/personal/yuz_l0_tn/ET-B23hG7UNPrQ0hha77V5kBMxAokIr486lB3YwMt-zhow?download=1) |
| [MFH](https://github.com/MILVLG/openvqa/tree/master/configs/vqa/mfh.yml) | 7e-4 | 69.11 | 85.56 | 48.81 | 59.69 | [model](https://awma1-my.sharepoint.com/:u:/g/personal/yuz_l0_tn/EUpvJD3c7NZJvBAbFOXTS0IBk1jCSz46bi7Pfq1kzJ35PA?download=1) |
| [BAN-4](https://github.com/MILVLG/openvqa/tree/master/configs/vqa/ban_4.yml) | 1.4e-3 | 68.9 | 85.0 | 49.5 | 59.56 | [model](https://awma1-my.sharepoint.com/:u:/g/personal/yuz_l0_tn/EVUabhYppDBImgV6b0DdGr0BrxTdSLm7ux9rN65T_8DZ0Q?download=1) |
| [BAN-8](https://github.com/MILVLG/openvqa/tree/master/configs/vqa/ban_8.yml) | 1.4e-3 | 69.07 | 85.2 | 49.63 | 59.71 | [model](https://awma1-my.sharepoint.com/:u:/g/personal/yuz_l0_tn/EbJgyL7FPTFAqzMm3HB1xDIBjXpWygOoXrdnDZKEIu34rg?download=1) |
| [MCAN-small](https://github.com/MILVLG/openvqa/tree/master/configs/vqa/mcan_small.yml) | 1e-4 | 70.33 | 86.77 | 52.14 | 60.40 | [model](https://awma1-my.sharepoint.com/:u:/g/personal/yuz_l0_tn/EcFeQCi_9MVBn6MeESly8OYBZCeBEuaPQqZjT-oXidgKKg?download=1) |
| [MCAN-large](https://github.com/MILVLG/openvqa/tree/master/configs/vqa/mcan_large.yml) | 5e-5 | 70.48 | 86.90 | 52.11 | 60.63 | [model](https://awma1-my.sharepoint.com/:u:/g/personal/yuz_l0_tn/Ee6HdFN_FcZAsQEm85WesHgBZBkY8dZ-278dDYG_ty_IwA?download=1) |
#### Train+val+vg -> Test-dev
| Model | Base lr | Overall (%) | Yes/No (%) | Number (%) | Other (%) | Download |
|:--------------------------------------------------------------------------------------:|:-------:|:-----------:|:----------:|:----------:|:---------:|:-------------------------------------------------------------------------------------------------------------------------:|
| [BUTD](https://github.com/MILVLG/openvqa/tree/master/configs/vqa/butd.yml) | 2e-3 | 67.54 | 83.48 | 46.97 | 58.62 | [model](https://awma1-my.sharepoint.com/:u:/g/personal/yuz_l0_tn/EbLMhJsx9AVJi-ipqtkzHckBS5TWo_au3T8wHPEdDKMgPQ?download=1) |
| [MFB](https://github.com/MILVLG/openvqa/tree/master/configs/vqa/mfb.yml) | 7e-4 | 68.25 | 84.79 | 48.24 | 58.68 | [model](https://awma1-my.sharepoint.com/:u:/g/personal/yuz_l0_tn/EfLYkv1XBgNJgOMU5PAo04YBHxAVmpeJtnZecqJztJdNig?download=1) |
| [MFH](https://github.com/MILVLG/openvqa/tree/master/configs/vqa/mfh.yml) | 7e-4 | 68.86 | 85.38 | 49.27 | 59.21 | [model](https://awma1-my.sharepoint.com/:u:/g/personal/yuz_l0_tn/EXGNuWmba8JOnQkkpfqokqcBzJ6Yw1ID6hl7hj2nyJaNJA?download=1) |
| [BAN-4](https://github.com/MILVLG/openvqa/tree/master/configs/vqa/ban_4.yml) | 1.4e-3 | 69.31 | 85.42 | 50.15 | 59.91 | [model](https://awma1-my.sharepoint.com/:u:/g/personal/yuz_l0_tn/ERAUbsBJzcNHjXcINxDoWOQByR0jSbdNp8nonuFdbyc8yA?download=1) |
| [BAN-8](https://github.com/MILVLG/openvqa/tree/master/configs/vqa/ban_8.yml) | 1.4e-3 | 69.48 | 85.40 | 50.82 | 60.14 | [model](https://awma1-my.sharepoint.com/:u:/g/personal/yuz_l0_tn/EW6v-dZOdJhFoKwT3bIx8M8B_U998hE8YD9zUJsUpo0rjQ?download=1) |
| [MCAN-small](https://github.com/MILVLG/openvqa/tree/master/configs/vqa/mcan_small.yml) | 1e-4 | 70.69 | 87.08 | 53.16 | 60.66 | [model](https://awma1-my.sharepoint.com/:u:/g/personal/yuz_l0_tn/EWSniKgB8Y9PropErzcAedkBKwJCeBP6b5x5oT_I4LiWtg?download=1) |
| [MCAN-large](https://github.com/MILVLG/openvqa/tree/master/configs/vqa/mcan_large.yml) | 5e-5 | 70.82 | 87.19 | 52.56 | 60.98 | [model](https://awma1-my.sharepoint.com/:u:/g/personal/yuz_l0_tn/EQvT2mjBm4ZGnE-jBgAJCbIBC9RBiHwl-XEDr8T63DS10w?download=1) |
| [MMNasNet-small](https://github.com/MILVLG/openvqa/tree/master/configs/vqa/mmnasnet_small.yml) | 1e-4 | 71.24 | 87.11 | 56.15 | 61.08 | [model](https://awma1-my.sharepoint.com/:u:/g/personal/yuz_l0_tn/EaUf4tRcw0FPghbwRoVcMo8BQT9SWzgiZBpD2CrFRfS54w?download=1) |
| [MMNasNet-large](https://github.com/MILVLG/openvqa/tree/master/configs/vqa/mmnasnet_large.yml) | 5e-5 | 71.45 | 87.29 | 55.71 | 61.45 | [model](https://awma1-my.sharepoint.com/:u:/g/personal/yuz_l0_tn/EQwNsq0AVehGqhWS4iwuWsYBPtP78xEqRgFKuRGKodkQWA?download=1) |
## GQA
We provide a group of results (including *Accuracy*, *Binary*, *Open*, *Validity*, *Plausibility*, *Consistency*, *Distribution*) for each model on GQA as follows.
- **Train+val -> Test-dev**: trained on the `train(balance) + val(balance)` splits and evaluated on the `test-dev(balance)` split.
**The results shown in the following are obtained from the [online server](https://evalai.cloudcv.org/web/challenges/challenge-page/225/overview). Note that the offline Test-dev result is evaluated by the provided offical script, which results in slight difference compared to the online result due to some unknown reasons.**
#### Train+val -> Test-dev
| Model | Base lr | Accuracy (%) | Binary (%) | Open (%) | Validity (%) | Plausibility (%) | Consistency (%) | Distribution | Download |
|:------:|:-------:|:------------:|:----------:|:--------:|:------------:|:----------------:|:----------------:|:------------:|:--------:|
| [BUTD (frcn+bbox)](https://github.com/MILVLG/openvqa/tree/master/configs/gqa/butd.yml) | 2e-3 | 53.38 | 67.78 | 40.72 | 96.62 | 84.81 | 77.62 | 1.26 | [model](https://awma1-my.sharepoint.com/:u:/g/personal/yuz_l0_tn/EaalaQ6VmBJCgeoZiPp45_gBn20g7tpkp-Uq8IVFcun64w?download=1) |
| [BAN-4 (frcn+bbox)](https://github.com/MILVLG/openvqa/tree/master/configs/gqa/ban_4.yml) | 2e-3 | 55.01 | 72.02 | 40.06 | 96.94 | 85.67 | 81.85 | 1.04 | [model](https://awma1-my.sharepoint.com/:u:/g/personal/yuz_l0_tn/EdRIuVXaJqBJoXg3T7N0xfYBsPl-GlgW2hq2toqm2gOxXg?download=1) |
| [BAN-8 (frcn+bbox)](https://github.com/MILVLG/openvqa/tree/master/configs/gqa/ban_8.yml) | 1e-3 | 56.19 | 73.31 | 41.13 | 96.77 | 85.58 | 84.64 | 1.09 | [model](https://awma1-my.sharepoint.com/:u:/g/personal/yuz_l0_tn/ES8FCQxFsqJBnvdoOcF_724BJgJml6iStYYK9UeUbI8Uyw?download=1) |
| [MCAN-small (frcn)](https://github.com/MILVLG/openvqa/tree/master/configs/gqa/mcan_small.yml) | 1e-4 | 53.41 | 70.29 | 38.56 | 96.77 | 85.32 | 82.29 | 1.40 | [model](https://awma1-my.sharepoint.com/:u:/g/personal/yuz_l0_tn/ER_i5xbPuXNCiC15iVtxBvgBTe7IBRpqpWTmeAY5svv3Ew?download=1) |
| [MCAN-small (frcn+grid)](https://github.com/MILVLG/openvqa/tree/master/configs/gqa/mcan_small.yml) | 1e-4 | 54.28 | 71.68 | 38.97 | 96.79 | 85.11 | 84.49 | 1.20 | [model](https://awma1-my.sharepoint.com/:u:/g/personal/yuz_l0_tn/EbsPhIGkvpNKtqBbFmIFIucBQO_dM6lDgQL-gdd3RnzziQ?download=1) |
| [MCAN-small (frcn+bbox)](https://github.com/MILVLG/openvqa/tree/master/configs/gqa/mcan_small.yml) | 1e-4 | 58.20 | 75.87 | 42.66 | 97.01 | 85.41 | 87.99 | 1.25 | [model](https://awma1-my.sharepoint.com/:u:/g/personal/yuz_l0_tn/EQCUNFPnpC1HliLDFCSDUc4BUdbdq40iPZVi5tLOCrVaQA?download=1) |
| [MCAN-small (frcn+bbox+grid)](https://github.com/MILVLG/openvqa/tree/master/configs/gqa/mcan_small.yml) | 1e-4 | 58.38 | 76.49 | 42.45 | 96.98 | 84.47 | 87.36 | 1.29 | [model](https://awma1-my.sharepoint.com/:u:/g/personal/yuz_l0_tn/EcrY2vDlzERLksouT5_cbcIBM1BCPkPdg4MyPmci8xrQig?download=1) |
| [MCAN-large (frcn+bbox+grid)](https://github.com/MILVLG/openvqa/tree/master/configs/gqa/mcan_large.yml) | 5e-5 | 58.10 | 76.98 | 41.50 | 97.01 | 85.43 | 87.34 | 1.20 | [model](https://awma1-my.sharepoint.com/:u:/g/personal/yuz_l0_tn/Ed6PBjIDEHpDot3vY__T-OIBJGdW51RFo2u_pm-7S5TMPA?download=1) |
## CLEVR
We provide a group of results (including *Overall*, *Count*, *Exist*, *Compare Numbers*, *Query Attribute*, *Compare Attribute*) for each model on CLEVR as follows.
- **Train -> Val**: trained on the `train` split and evaluated on the `val` split.
#### Train -> Val
| Model | Base lr | Overall (%) | Count (%) | Exist (%) | Compare Numbers (%) | Query Attribute (%) | Compare Attribute (%) | Download |
|:-----:|:-------:|:-------------:|:--------:|:--------:|:--------:|:--------:|:--------:|:--------:|
| [MCAN-small](https://github.com/MILVLG/openvqa/tree/master/configs/clevr/mcan_small.yml) | 4e-5 | 98.74 | 96.81 | 99.27 | 98.89 | 99.53 | 99.19 | [model](https://awma1-my.sharepoint.com/:u:/g/personal/yuz_l0_tn/ERtwnuAoeHNKjs0qTkWC3cYBWVuUk7BLk88cnCKNFxYYlQ?download=1) |
================================================
FILE: docs/_source/conf.py
================================================
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
#
# Configuration file for the Sphinx documentation builder.
#
# This file only contains a selection of the most common options. For a full
# list see the documentation:
# http://www.sphinx-doc.org/en/master/config
# -- Path setup --------------------------------------------------------------
# If extensions (or modules to document with autodoc) are in another directory,
# add these directories to sys.path here. If the directory is relative to the
# documentation root, use os.path.abspath to make it absolute, like shown here.
#
import os
import sys
sys.path.insert(0, os.path.abspath('../..'))
RELEASE = os.environ.get('RELEASE', False)
# -- Project information -----------------------------------------------------
project = u'OpenVQA'
copyright = u'2019, MILVLG'
author = u'MILVLG'
# The version info for the project you're documenting, acts as replacement for
# |version| and |release|, also used in various other places throughout the
# built documents.
#
# The short X.Y version.
# version = '1.0'
# The full version, including alpha/beta/rc tags.
# release = '0.0'
# -- General configuration ---------------------------------------------------
master_doc = 'index'
# The suffix(es) of source filenames.
# You can specify multiple suffix as a list of string:
#
source_suffix = {
'.rst': 'restructuredtext',
'.txt': 'markdown',
'.md': 'markdown',
}
# Add any Sphinx extension module names here, as strings. They can be
# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
# ones.
extensions = [
'sphinx.ext.autodoc',
'sphinx.ext.autosummary',
'sphinx.ext.doctest',
'sphinx.ext.intersphinx',
'sphinx.ext.todo',
'sphinx.ext.coverage',
'sphinx.ext.napoleon',
'sphinx.ext.viewcode',
'sphinx_markdown_tables',
'recommonmark',
]
# Add any paths that contain templates here, relative to this directory.
templates_path = ['_templates']
# List of patterns, relative to source directory, that match files and
# directories to ignore when looking for source files.
# This pattern also affects html_static_path and html_extra_path.
exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store']
# -- Options for HTML output -------------------------------------------------
# The theme to use for HTML and HTML Help pages. See the documentation for
# a list of builtin themes.
#
html_theme = 'sphinx_rtd_theme'
# Add any paths that contain custom static files (such as style sheets) here,
# relative to this directory. They are copied after the builtin static files,
# so a file named "default.css" will overwrite the builtin "default.css".
html_static_path = ['_static']
# Add cusotm css overrides
def setup(app):
app.add_stylesheet( "custom.css" )
# Add any paths that contain templates here, relative to this directory.
templates_path = ['_templates']
if RELEASE:
templates_path = ['_templates-stable']
# The name of the Pygments (syntax highlighting) style to use.
pygments_style = 'sphinx'
# Disable docstring inheritance
autodoc_inherit_docstrings = False
# -- Other Options ------------------------------------------------------------
# intersphinx_mapping = {
# 'python': ('https://docs.python.org/3', None)
# }
================================================
FILE: docs/_source/index.rst
================================================
.. OpenVQA documentation master file, created by
sphinx-quickstart on Sun Aug 4 13:54:29 2019.
You can adapt this file completely to your liking, but it should at least
contain the root `toctree` directive.
:github_url: https://github.com/MILVLG/openvqa
OpenVQA Documentation
=====================
.. raw:: html
<a href="https://openvqa.readthedocs.io/en/latest/?badge=latest">
<img alt="Documentation Status" src="https://readthedocs.org/projects/openvqa/badge/?version=latest"/>
</a>
<a href="https://github.com/MILVLG">
<img alt="powered-by MILVLG" src="https://img.shields.io/badge/powered%20by-MILVLG-orange.svg?style=flat&colorA=E1523D&colorB=007D8A"/>
</a>
OpenVQA is a general platform for visual question ansering (VQA) research,
with implementing state-of-the-art approaches on different benchmark datasets.
Supports for more methods and datasets will be updated continuously.
.. toctree::
:caption: The Basics
:name: basics
:maxdepth: 1
Installation <basic/install>
Getting Started <basic/getting_started>
Model Zoo <basic/model_zoo>
.. toctree::
:caption: Advanced topics
:name: advanced-topics
:maxdepth: 1
Adding a Model <advanced/adding_model>
Contributing <advanced/contributing>
------
This repo is currently maintained by Zhou Yu (`@yuzcccc`_) and Yuhao Cui (`@cuiyuhao1996`_).
This version of the documentation was built on |today|.
.. _@yuzcccc: https://github.com/yuzcccc
.. _@cuiyuhao1996: https://github.com/cuiyuhao1996
================================================
FILE: docs/_templates/layout.html
================================================
{% extends "!layout.html" %}
<link rel="canonical" href="{{ theme_canonical_url }}{{ pagename }}.html" />
{% block menu %}
<div>
<a style="color:#F05732" href="{{ theme_canonical_url }}{{ pagename }}.html">
You are viewing unstable developer preview docs.
Click here to view docs for latest stable release.
</a>
</div>
{{ super() }}
{% endblock %}
{% block footer %}
{{ super() }}
<script>
(function(i,s,o,g,r,a,m){i['GoogleAnalyticsObject']=r;i[r]=i[r]||function(){
(i[r].q=i[r].q||[]).push(arguments)},i[r].l=1*new Date();a=s.createElement(o),
m=s.getElementsByTagName(o)[0];a.async=1;a.src=g;m.parentNode.insertBefore(a,m)
})(window,document,'script','https://www.google-analytics.com/analytics.js','ga');
ga('create', 'UA-90545585-1', 'auto');
ga('send', 'pageview');
</script>
<script async src="https://www.googletagmanager.com/gtag/js?id=UA-117752657-2"></script>
<script>
window.dataLayer = window.dataLayer || [];
function gtag(){dataLayer.push(arguments);}
gtag('js', new Date());
gtag('config', 'UA-117752657-2');
</script>
<img height="1" width="1" style="border-style:none;" alt="" src="https://www.googleadservices.com/pagead/conversion/795629140/?label=txkmCPmdtosBENSssfsC&guid=ON&script=0"/>
{% endblock %}
================================================
FILE: docs/make.bat
================================================
@ECHO OFF
pushd %~dp0
REM Command file for Sphinx documentation
if "%SPHINXBUILD%" == "" (
set SPHINXBUILD=sphinx-build
)
set SOURCEDIR=_source
set BUILDDIR=_build
if "%1" == "" goto help
%SPHINXBUILD% >NUL 2>NUL
if errorlevel 9009 (
echo.
echo.The 'sphinx-build' command was not found. Make sure you have Sphinx
echo.installed, then set the SPHINXBUILD environment variable to point
echo.to the full path of the 'sphinx-build' executable. Alternatively you
echo.may add the Sphinx directory to PATH.
echo.
echo.If you don't have Sphinx installed, grab it from
echo.http://sphinx-doc.org/
exit /b 1
)
%SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
goto end
:help
%SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
:end
popd
================================================
FILE: docs/readme.md
================================================
# How to Edit OpenVQA Document
OpenVQA Document is built by [Sphix](https://www.sphinx-doc.org/en/master/) and hosted on [Read the Docs](https://readthedocs.org/).
You need know both [Markdown](https://markdown-zh.readthedocs.io/) and
[reStructuredText](http://docutils.sourceforge.net/rst.html) plaintext markup syntax.
We use the `.md` and `.rst` suffixes to distinguish them.
Usually OpenVQA source coders will participate in the maintenance of the document.
In most cases, programmers have learned markdown syntax. So the markdown syntax is used for simple content.
In order to use the [autodoc](https://www.sphinx-doc.org/ext/autodoc.html) feature in Sphix,
you must be familiar with the documentation content mentioned above.
## Edit and Debug
Different developers have different document maintenance habits,
it is recommended to maintain the document with a separate `docs: xxxx` branch
instead of directly making Pull Requests to the master branch.
When debugging locally, we usually use two instructions:
```shell
.\make.bat clean
.\make.bat html
```
Note:
- Make sure the current path is under the `docs` folder and have installed all things in `requirements.txt`.
- `clean` operation must be performed before `build`, otherwise undetectable errors may occur.
## Push to GitHub
In order to simplify the code review process and reduce `.git` size, changes to the `_build` folder are usually not logged.
(Check the `.gitignore` file in the root path of the project and find `docs/_build/` line for Sphinx documentation)
Only the contents in the `_source` folder will be submitted to GitHub (unless `_template` or `_theme` is used).
## Build and Host on Readthedocs
Readthedocs detect changes to the source code of the document through webhooks,
after the source code is updated, you need to check whether the document hosted in readthedocs is successfully built.
================================================
FILE: docs/requirements.txt
================================================
sphinx
sphinx_rtd_theme
recommonmark
sphinx-markdown-tables
================================================
FILE: openvqa/core/base_cfgs.py
================================================
# --------------------------------------------------------
# OpenVQA
# Written by Yuhao Cui https://github.com/cuiyuhao1996
# --------------------------------------------------------
from openvqa.core.path_cfgs import PATH
import os, torch, random
import numpy as np
from types import MethodType
class BaseCfgs(PATH):
def __init__(self):
super(BaseCfgs, self).__init__()
# Set Devices
# If use multi-gpu training, you can set e.g.'0, 1, 2' instead
self.GPU = '0'
# Set Seed For CPU And GPUs
self.SEED = random.randint(0, 9999999)
# -------------------------
# ---- Version Control ----
# -------------------------
# You can set a name to start new training
self.VERSION = str(self.SEED)
# Use checkpoint to resume training
self.RESUME = False
# Resume training version or testing version
self.CKPT_VERSION = self.VERSION
# Resume training epoch or testing epoch
self.CKPT_EPOCH = 0
# if set 'CKPT_PATH', -> 'CKPT_VERSION' and 'CKPT_EPOCH' will not work any more
self.CKPT_PATH = None
# Print loss every iteration
self.VERBOSE = True
# ------------------------------
# ---- Data Provider Params ----
# ------------------------------
self.MODEL = ''
self.MODEL_USE = ''
self.DATASET = ''
# Run as 'train' 'val' or 'test'
self.RUN_MODE = ''
# Set True to evaluate offline when an epoch finished
# (only work when train with 'train' split)
self.EVAL_EVERY_EPOCH = True
# Set True to save the prediction vector
# (use in ensemble)
self.TEST_SAVE_PRED = False
# A external method to set train split
# will override the SPLIT['train']
self.TRAIN_SPLIT = 'train'
# Set True to use pretrained GloVe word embedding
# (GloVe: spaCy https://spacy.io/)
self.USE_GLOVE = True
# Word embedding matrix size
# (token size x WORD_EMBED_SIZE)
self.WORD_EMBED_SIZE = 300
# All features size
self.FEAT_SIZE = {
'vqa': {
'FRCN_FEAT_SIZE': (100, 2048),
'BBOX_FEAT_SIZE': (100, 5),
},
'gqa': {
'FRCN_FEAT_SIZE': (100, 2048),
'GRID_FEAT_SIZE': (49, 2048),
'BBOX_FEAT_SIZE': (100, 5),
},
'clevr': {
'GRID_FEAT_SIZE': (196, 1024),
},
}
# Set if bbox_feat need be normalize by image size, default: False
self.BBOX_NORMALIZE = False
# Default training batch size: 64
self.BATCH_SIZE = 64
# Multi-thread I/O
self.NUM_WORKERS = 8
# Use pin memory
# (Warning: pin memory can accelerate GPU loading but may
# increase the CPU memory usage when NUM_WORKS is big)
self.PIN_MEM = True
# Large model can not training with batch size 64
# Gradient accumulate can split batch to reduce gpu memory usage
# (Warning: BATCH_SIZE should be divided by GRAD_ACCU_STEPS)
self.GRAD_ACCU_STEPS = 1
# --------------------------
# ---- Optimizer Params ----
# --------------------------
# Define the loss function
'''
Loss(case-sensitive):
'ce' : Cross Entropy -> NLLLoss(LogSoftmax(output), label) = CrossEntropyLoss(output, label)
'bce' : Binary Cross Entropy -> BCELoss(Sigmoid(output), label) = BCEWithLogitsLoss(output, label)
'kld' : Kullback-Leibler Divergence -> KLDivLoss(LogSoftmax(output), Softmax(label))
'mse' : Mean Squared Error -> MSELoss(output, label)
Reduction(case-sensitive):
'none': no reduction will be applied
'elementwise_mean': the sum of the output will be divided by the number of elements in the output
'sum': the output will be summed
'''
self.LOSS_FUNC = ''
self.LOSS_REDUCTION = ''
# The base learning rate
self.LR_BASE = 0.0001
# Learning rate decay ratio
self.LR_DECAY_R = 0.2
# Learning rate decay at {x, y, z...} epoch
self.LR_DECAY_LIST = [10, 12]
# Warmup epoch lr*{1/(n+1), 2/(n+1), ... , n/(n+1)}
self.WARMUP_EPOCH = 3
# Max training epoch
self.MAX_EPOCH = 13
# Gradient clip
# (default: -1 means not using)
self.GRAD_NORM_CLIP = -1
# Optimizer
'''
Optimizer(case-sensitive):
'Adam' : default -> {betas:(0.9, 0.999), eps:1e-8, weight_decay:0, amsgrad:False}
'Adamax' : default -> {betas:(0.9, 0.999), eps:1e-8, weight_decay:0}
'RMSprop' : default -> {alpha:0.99, eps:1e-8, weight_decay:0, momentum:0, centered:False}
'SGD' : default -> {momentum:0, dampening:0, weight_decay:0, nesterov:False}
'Adadelta' : default -> {rho:0.9, eps:1e-6, weight_decay:0}
'Adagrad' : default -> {lr_decay:0, weight_decay:0, initial_accumulator_value:0}
In YML files:
If you want to self-define the optimizer parameters, set a dict named OPT_PARAMS contains the keys you want to modify.
!!! Warning: keys: ['params, 'lr'] should not be set.
!!! Warning: To avoid ambiguity, the value of keys should be defined as string type.
If you not define the OPT_PARAMS, all parameters of optimizer will be set as default.
Example:
mcan_small.yml ->
OPT: Adam
OPT_PARAMS: {betas: '(0.9, 0.98)', eps: '1e-9'}
'''
# case-sensitive
self.OPT = ''
self.OPT_PARAMS = {}
def str_to_bool(self, args):
bool_list = [
'EVAL_EVERY_EPOCH',
'TEST_SAVE_PRED',
'RESUME',
'PIN_MEM',
'VERBOSE',
]
for arg in dir(args):
if arg in bool_list and getattr(args, arg) is not None:
setattr(args, arg, eval(getattr(args, arg)))
return args
def parse_to_dict(self, args):
args_dict = {}
for arg in dir(args):
if not arg.startswith('_') and not isinstance(getattr(args, arg), MethodType):
if getattr(args, arg) is not None:
args_dict[arg] = getattr(args, arg)
return args_dict
def add_args(self, args_dict):
for arg in args_dict:
setattr(self, arg, args_dict[arg])
def proc(self):
assert self.RUN_MODE in ['train', 'val', 'test']
# ------------ Devices setup
os.environ['CUDA_VISIBLE_DEVICES'] = self.GPU
self.N_GPU = len(self.GPU.split(','))
self.DEVICES = [_ for _ in range(self.N_GPU)]
torch.set_num_threads(2)
# ------------ Path check
self.check_path(self.DATASET)
# ------------ Model setup (Deprecated)
# self.MODEL_USE = self.MODEL.split('_')[0]
# ------------ Seed setup
# fix pytorch seed
torch.manual_seed(self.SEED)
if self.N_GPU < 2:
torch.cuda.manual_seed(self.SEED)
else:
torch.cuda.manual_seed_all(self.SEED)
torch.backends.cudnn.deterministic = True
# fix numpy seed
np.random.seed(self.SEED)
# fix random seed
random.seed(self.SEED)
if self.CKPT_PATH is not None:
print("Warning: you are now using 'CKPT_PATH' args, "
"'CKPT_VERSION' and 'CKPT_EPOCH' will not work")
self.CKPT_VERSION = self.CKPT_PATH.split('/')[-1] + '_' + str(random.randint(0, 9999999))
# ------------ Split setup
self.SPLIT = self.SPLITS[self.DATASET]
self.SPLIT['train'] = self.TRAIN_SPLIT
if self.SPLIT['val'] in self.SPLIT['train'].split('+') or self.RUN_MODE not in ['train']:
self.EVAL_EVERY_EPOCH = False
if self.RUN_MODE not in ['test']:
self.TEST_SAVE_PRED = False
# ------------ Gradient accumulate setup
assert self.BATCH_SIZE % self.GRAD_ACCU_STEPS == 0
self.SUB_BATCH_SIZE = int(self.BATCH_SIZE / self.GRAD_ACCU_STEPS)
# Set small eval batch size will reduce gpu memory usage
self.EVAL_BATCH_SIZE = int(self.SUB_BATCH_SIZE / 2)
# ------------ Loss process
assert self.LOSS_FUNC in ['ce', 'bce', 'kld', 'mse']
assert self.LOSS_REDUCTION in ['none', 'elementwise_mean', 'sum']
self.LOSS_FUNC_NAME_DICT = {
'ce': 'CrossEntropyLoss',
'bce': 'BCEWithLogitsLoss',
'kld': 'KLDivLoss',
'mse': 'MSELoss',
}
self.LOSS_FUNC_NONLINEAR = {
'ce': [None, 'flat'],
'bce': [None, None],
'kld': ['log_softmax', None],
'mse': [None, None],
}
self.TASK_LOSS_CHECK = {
'vqa': ['bce', 'kld'],
'gqa': ['ce'],
'clevr': ['ce'],
}
assert self.LOSS_FUNC in self.TASK_LOSS_CHECK[self.DATASET], \
self.DATASET + 'task only support' + str(self.TASK_LOSS_CHECK[self.DATASET]) + 'loss.' + \
'Modify the LOSS_FUNC in configs to get a better score.'
# ------------ Optimizer parameters process
assert self.OPT in ['Adam', 'Adamax', 'RMSprop', 'SGD', 'Adadelta', 'Adagrad']
optim = getattr(torch.optim, self.OPT)
default_params_dict = dict(zip(optim.__init__.__code__.co_varnames[3: optim.__init__.__code__.co_argcount],
optim.__init__.__defaults__[1:]))
def all(iterable):
for element in iterable:
if not element:
return False
return True
assert all(list(map(lambda x: x in default_params_dict, self.OPT_PARAMS)))
for key in self.OPT_PARAMS:
if isinstance(self.OPT_PARAMS[key], str):
self.OPT_PARAMS[key] = eval(self.OPT_PARAMS[key])
else:
print("To avoid ambiguity, set the value of 'OPT_PARAMS' to string type")
exit(-1)
self.OPT_PARAMS = {**default_params_dict, **self.OPT_PARAMS}
def __str__(self):
__C_str = ''
for attr in dir(self):
if not attr.startswith('__') and not isinstance(getattr(self, attr), MethodType):
__C_str += '{ %-17s }->' % attr + str(getattr(self, attr)) + '\n'
return __C_str
#
#
# if __name__ == '__main__':
# __C = Cfgs()
# __C.proc()
================================================
FILE: openvqa/core/base_dataset.py
================================================
# --------------------------------------------------------
# OpenVQA
# Written by Yuhao Cui https://github.com/cuiyuhao1996
# --------------------------------------------------------
import numpy as np
import glob, json, torch, random
import torch.utils.data as Data
import torch.nn as nn
from openvqa.utils.feat_filter import feat_filter
class BaseDataSet(Data.Dataset):
def __init__(self):
self.token_to_ix = None
self.pretrained_emb = None
self.ans_to_ix = None
self.ix_to_ans = None
self.data_size = None
self.token_size = None
self.ans_size = None
def load_ques_ans(self, idx):
raise NotImplementedError()
def load_img_feats(self, idx, iid):
raise NotImplementedError()
def __getitem__(self, idx):
ques_ix_iter, ans_iter, iid = self.load_ques_ans(idx)
frcn_feat_iter, grid_feat_iter, bbox_feat_iter = self.load_img_feats(idx, iid)
return \
torch.from_numpy(frcn_feat_iter),\
torch.from_numpy(grid_feat_iter),\
torch.from_numpy(bbox_feat_iter),\
torch.from_numpy(ques_ix_iter),\
torch.from_numpy(ans_iter)
def __len__(self):
return self.data_size
def shuffle_list(self, list):
random.shuffle(list)
class BaseAdapter(nn.Module):
def __init__(self, __C):
super(BaseAdapter, self).__init__()
self.__C = __C
if self.__C.DATASET in ['vqa']:
self.vqa_init(__C)
elif self.__C.DATASET in ['gqa']:
self.gqa_init(__C)
elif self.__C.DATASET in ['clevr']:
self.clevr_init(__C)
else:
exit(-1)
# eval('self.' + __C.DATASET + '_init()')
def vqa_init(self, __C):
raise NotImplementedError()
def gqa_init(self, __C):
raise NotImplementedError()
def clevr_init(self, __C):
raise NotImplementedError()
def forward(self, frcn_feat, grid_feat, bbox_feat):
feat_dict = feat_filter(self.__C.DATASET, frcn_feat, grid_feat, bbox_feat)
if self.__C.DATASET in ['vqa']:
return self.vqa_forward(feat_dict)
elif self.__C.DATASET in ['gqa']:
return self.gqa_forward(feat_dict)
elif self.__C.DATASET in ['clevr']:
return self.clevr_forward(feat_dict)
else:
exit(-1)
def vqa_forward(self, feat_dict):
raise NotImplementedError()
def gqa_forward(self, feat_dict):
raise NotImplementedError()
def clevr_forward(self, feat_dict):
raise NotImplementedError()
================================================
FILE: openvqa/core/path_cfgs.py
================================================
# --------------------------------------------------------
# OpenVQA
# Written by Yuhao Cui https://github.com/cuiyuhao1996
# --------------------------------------------------------
import os
class PATH:
def __init__(self):
self.init_path()
# self.check_path()
def init_path(self):
self.DATA_ROOT = './data'
# self.DATA_ROOT = '/data/datasets'
# self.DATA_ROOT = '/data1/datasets'
# self.DATA_ROOT = '/home/features'
self.DATA_PATH = {
'vqa': self.DATA_ROOT + '/vqa',
'gqa': self.DATA_ROOT + '/gqa',
'clevr': self.DATA_ROOT + '/clevr',
}
self.FEATS_PATH = {
'vqa': {
'train': self.DATA_PATH['vqa'] + '/feats' + '/train2014',
'val': self.DATA_PATH['vqa'] + '/feats' + '/val2014',
'test': self.DATA_PATH['vqa'] + '/feats' + '/test2015',
},
'gqa': {
'default-frcn': self.DATA_PATH['gqa'] + '/feats' + '/gqa-frcn',
'default-grid': self.DATA_PATH['gqa'] + '/feats' + '/gqa-grid',
},
'clevr': {
'train': self.DATA_PATH['clevr'] + '/feats' + '/train',
'val': self.DATA_PATH['clevr'] + '/feats' + '/val',
'test': self.DATA_PATH['clevr'] + '/feats' + '/test',
},
}
self.RAW_PATH = {
'vqa': {
'train': self.DATA_PATH['vqa'] + '/raw' + '/v2_OpenEnded_mscoco_train2014_questions.json',
'train-anno': self.DATA_PATH['vqa'] + '/raw' + '/v2_mscoco_train2014_annotations.json',
'val': self.DATA_PATH['vqa'] + '/raw' + '/v2_OpenEnded_mscoco_val2014_questions.json',
'val-anno': self.DATA_PATH['vqa'] + '/raw' + '/v2_mscoco_val2014_annotations.json',
'vg': self.DATA_PATH['vqa'] + '/raw' + '/VG_questions.json',
'vg-anno': self.DATA_PATH['vqa'] + '/raw' + '/VG_annotations.json',
'test': self.DATA_PATH['vqa'] + '/raw' + '/v2_OpenEnded_mscoco_test2015_questions.json',
},
'gqa': {
'train': self.DATA_PATH['gqa'] + '/raw' + '/questions1.2/train_balanced_questions.json',
'val': self.DATA_PATH['gqa'] + '/raw' + '/questions1.2/val_balanced_questions.json',
'testdev': self.DATA_PATH['gqa'] + '/raw' + '/questions1.2/testdev_balanced_questions.json',
'test': self.DATA_PATH['gqa'] + '/raw' + '/questions1.2/submission_all_questions.json',
'val_all': self.DATA_PATH['gqa'] + '/raw' + '/questions1.2/val_all_questions.json',
'testdev_all': self.DATA_PATH['gqa'] + '/raw' + '/questions1.2/testdev_all_questions.json',
'train_choices': self.DATA_PATH['gqa'] + '/raw' + '/eval/train_choices',
'val_choices': self.DATA_PATH['gqa'] + '/raw' + '/eval/val_choices.json',
},
'clevr': {
'train': self.DATA_PATH['clevr'] + '/raw' + '/questions/CLEVR_train_questions.json',
'val': self.DATA_PATH['clevr'] + '/raw' + '/questions/CLEVR_val_questions.json',
'test': self.DATA_PATH['clevr'] + '/raw' + '/questions/CLEVR_test_questions.json',
},
}
self.SPLITS = {
'vqa': {
'train': '',
'val': 'val',
'test': 'test',
},
'gqa': {
'train': '',
'val': 'testdev',
'test': 'test',
},
'clevr': {
'train': '',
'val': 'val',
'test': 'test',
},
}
self.RESULT_PATH = './results/result_test'
self.PRED_PATH = './results/pred'
self.CACHE_PATH = './results/cache'
self.LOG_PATH = './results/log'
self.CKPTS_PATH = './ckpts'
if 'result_test' not in os.listdir('./results'):
os.mkdir('./results/result_test')
if 'pred' not in os.listdir('./results'):
os.mkdir('./results/pred')
if 'cache' not in os.listdir('./results'):
os.mkdir('./results/cache')
if 'log' not in os.listdir('./results'):
os.mkdir('./results/log')
if 'ckpts' not in os.listdir('./'):
os.mkdir('./ckpts')
def check_path(self, dataset=None):
print('Checking dataset ........')
if dataset:
for item in self.FEATS_PATH[dataset]:
if not os.path.exists(self.FEATS_PATH[dataset][item]):
print(self.FEATS_PATH[dataset][item], 'NOT EXIST')
exit(-1)
for item in self.RAW_PATH[dataset]:
if not os.path.exists(self.RAW_PATH[dataset][item]):
print(self.RAW_PATH[dataset][item], 'NOT EXIST')
exit(-1)
else:
for dataset in self.FEATS_PATH:
for item in self.FEATS_PATH[dataset]:
if not os.path.exists(self.FEATS_PATH[dataset][item]):
print(self.FEATS_PATH[dataset][item], 'NOT EXIST')
exit(-1)
for dataset in self.RAW_PATH:
for item in self.RAW_PATH[dataset]:
if not os.path.exists(self.RAW_PATH[dataset][item]):
print(self.RAW_PATH[dataset][item], 'NOT EXIST')
exit(-1)
print('Finished!')
print('')
================================================
FILE: openvqa/datasets/clevr/clevr_loader.py
================================================
# --------------------------------------------------------
# OpenVQA
# Written by Yuhao Cui https://github.com/cuiyuhao1996
# --------------------------------------------------------
import numpy as np
import glob, json, re, en_vectors_web_lg
from openvqa.core.base_dataset import BaseDataSet
from openvqa.utils.ans_punct import prep_ans
class DataSet(BaseDataSet):
def __init__(self, __C):
super(DataSet, self).__init__()
self.__C = __C
# --------------------------
# ---- Raw data loading ----
# --------------------------
# Loading all image paths
# grid_feat_path_list = \
# glob.glob(__C.FEATS_PATH[__C.DATASET]['train'] + '/*.npz') + \
# glob.glob(__C.FEATS_PATH[__C.DATASET]['val'] + '/*.npz') + \
# glob.glob(__C.FEATS_PATH[__C.DATASET]['test'] + '/*.npz')
# Loading question word list
stat_ques_list = \
json.load(open(__C.RAW_PATH[__C.DATASET]['train'], 'r'))['questions'] + \
json.load(open(__C.RAW_PATH[__C.DATASET]['val'], 'r'))['questions'] + \
json.load(open(__C.RAW_PATH[__C.DATASET]['test'], 'r'))['questions']
# Loading answer word list
stat_ans_list = \
json.load(open(__C.RAW_PATH[__C.DATASET]['train'], 'r'))['questions'] + \
json.load(open(__C.RAW_PATH[__C.DATASET]['val'], 'r'))['questions']
# Loading question and answer list
self.ques_list = []
grid_feat_path_list = []
split_list = __C.SPLIT[__C.RUN_MODE].split('+')
for split in split_list:
self.ques_list += json.load(open(__C.RAW_PATH[__C.DATASET][split], 'r'))['questions']
grid_feat_path_list += glob.glob(__C.FEATS_PATH[__C.DATASET][split] + '/*.npz')
# Define run data size
self.data_size = self.ques_list.__len__()
print(' ========== Dataset size:', self.data_size)
# ------------------------
# ---- Data statistic ----
# ------------------------
# {image id} -> {image feature absolutely path}
self.iid_to_grid_feat_path = self.img_feat_path_load(grid_feat_path_list)
# Tokenize
self.token_to_ix, self.pretrained_emb, max_token = self.tokenize(stat_ques_list, __C.USE_GLOVE)
self.token_size = self.token_to_ix.__len__()
print(' ========== Question token vocab size:', self.token_size)
self.max_token = -1
if self.max_token == -1:
self.max_token = max_token
print('Max token length:', max_token, 'Trimmed to:', self.max_token)
# Answers statistic
self.ans_to_ix, self.ix_to_ans = self.ans_stat(stat_ans_list)
self.ans_size = self.ans_to_ix.__len__()
print(' ========== Answer token vocab size:', self.ans_size)
print('Finished!')
print('')
def img_feat_path_load(self, path_list):
iid_to_path = {}
for ix, path in enumerate(path_list):
iid = path.split('/')[-1].split('.')[0]
iid_to_path[iid] = path
return iid_to_path
def tokenize(self, stat_ques_list, use_glove):
token_to_ix = {
'PAD': 0,
'UNK': 1,
'CLS': 2,
}
spacy_tool = None
pretrained_emb = []
if use_glove:
spacy_tool = en_vectors_web_lg.load()
pretrained_emb.append(spacy_tool('PAD').vector)
pretrained_emb.append(spacy_tool('UNK').vector)
pretrained_emb.append(spacy_tool('CLS').vector)
max_token = 0
for ques in stat_ques_list:
words = re.sub(
r"([.,'!?\"()*#:;])",
'',
ques['question'].lower()
).replace('-', ' ').replace('/', ' ').split()
if len(words) > max_token:
max_token = len(words)
for word in words:
if word not in token_to_ix:
token_to_ix[word] = len(token_to_ix)
if use_glove:
pretrained_emb.append(spacy_tool(word).vector)
pretrained_emb = np.array(pretrained_emb)
return token_to_ix, pretrained_emb, max_token
def ans_stat(self, stat_ans_list):
ans_to_ix = {}
ix_to_ans = {}
for ans_stat in stat_ans_list:
ans = ans_stat['answer']
if ans not in ans_to_ix:
ix_to_ans[ans_to_ix.__len__()] = ans
ans_to_ix[ans] = ans_to_ix.__len__()
return ans_to_ix, ix_to_ans
# ----------------------------------------------
# ---- Real-Time Processing Implementations ----
# ----------------------------------------------
def load_ques_ans(self, idx):
# if self.__C.RUN_MODE in ['train']:
ques = self.ques_list[idx]
iid = str(ques['image_index'])
# Process question
ques_ix_iter = self.proc_ques(ques, self.token_to_ix, max_token=self.max_token)
ans_iter = np.zeros(1)
if self.__C.RUN_MODE in ['train']:
# process answers
ans = ques['answer']
ans_iter = self.proc_ans(ans, self.ans_to_ix)
return ques_ix_iter, ans_iter, iid
def load_img_feats(self, idx, iid):
grid_feat = np.load(self.iid_to_grid_feat_path[iid])
grid_feat_iter = grid_feat['x']
return np.zeros(1), grid_feat_iter, np.zeros(1)
# ------------------------------------
# ---- Real-Time Processing Utils ----
# ------------------------------------
def proc_ques(self, ques, token_to_ix, max_token):
ques_ix = np.zeros(max_token, np.int64)
words = re.sub(
r"([.,'!?\"()*#:;])",
'',
ques['question'].lower()
).replace('-', ' ').replace('/', ' ').split()
for ix, word in enumerate(words):
if word in token_to_ix:
ques_ix[ix] = token_to_ix[word]
else:
ques_ix[ix] = token_to_ix['UNK']
if ix + 1 == max_token:
break
return ques_ix
def proc_ans(self, ans, ans_to_ix):
ans_ix = np.zeros(1, np.int64)
ans_ix[0] = ans_to_ix[ans]
return ans_ix
================================================
FILE: openvqa/datasets/clevr/eval/result_eval.py
================================================
# --------------------------------------------------------
# OpenVQA
# Written by Yuhao Cui https://github.com/cuiyuhao1996
# --------------------------------------------------------
import json, pickle
import numpy as np
from collections import defaultdict
def eval(__C, dataset, ans_ix_list, pred_list, result_eval_file, ensemble_file, log_file, valid=False):
result_eval_file = result_eval_file + '.txt'
ans_size = dataset.ans_size
result_eval_file_fs = open(result_eval_file, 'w')
for qix in range(dataset.data_size):
result_eval_file_fs.write(dataset.ix_to_ans[ans_ix_list[qix]])
result_eval_file_fs.write("\n")
result_eval_file_fs.close()
if __C.TEST_SAVE_PRED:
print('Save the prediction vector to file: {}'.format(ensemble_file))
pred_list = np.array(pred_list).reshape(-1, ans_size)
result_pred = [{
'pred': pred_list[qix],
'qid': qix
} for qix in range(dataset.data_size)]
pickle.dump(result_pred, open(ensemble_file, 'wb+'), protocol=-1)
if valid:
ques_file_path = __C.RAW_PATH[__C.DATASET][__C.SPLIT['val']]
true_answers = []
with open(ques_file_path, 'r') as f:
questions = json.load(f)['questions']
for ques in questions:
true_answers.append(ques['answer'])
correct_by_q_type = defaultdict(list)
# Load predicted answers
predicted_answers = []
with open(result_eval_file, 'r') as f:
for line in f:
predicted_answers.append(line.strip())
num_true, num_pred = len(true_answers), len(predicted_answers)
assert num_true == num_pred, 'Expected %d answers but got %d' % (
num_true, num_pred)
for i, (true_answer, predicted_answer) in enumerate(zip(true_answers, predicted_answers)):
correct = 1 if true_answer == predicted_answer else 0
correct_by_q_type['Overall'].append(correct)
q_type = questions[i]['program'][-1]['function']
correct_by_q_type[q_type].append(correct)
print('Write to log file: {}'.format(log_file))
logfile = open(log_file, 'a+')
q_dict = {}
for q_type, vals in sorted(correct_by_q_type.items()):
vals = np.asarray(vals)
q_dict[q_type] = [vals.sum(), vals.shape[0]]
# print(q_type, '%d / %d = %.2f' % (vals.sum(), vals.shape[0], 100.0 * vals.mean()))
# logfile.write(q_type + ' : ' + '%d / %d = %.2f\n' % (vals.sum(), vals.shape[0], 100.0 * vals.mean()))
# Score Summary
score_type = ['Overall', 'Count', 'Exist', 'Compare_Numbers', 'Query_Attribute', 'Compare_Attribute']
compare_numbers_type = ['greater_than', 'less_than']
query_attribute_type = ['query_color', 'query_material', 'query_shape', 'query_size']
compare_attribute_type = ['equal_color', 'equal_integer', 'equal_material', 'equal_shape', 'equal_size']
score_dict = {}
score_dict['Overall'] = q_dict['Overall']
score_dict['Count'] = q_dict['count']
score_dict['Exist'] = q_dict['exist']
correct_num, total_num = 0, 0
for q_type in compare_numbers_type:
correct_num += q_dict[q_type][0]
total_num += q_dict[q_type][1]
score_dict['Compare_Numbers'] = [correct_num, total_num]
correct_num, total_num = 0, 0
for q_type in query_attribute_type:
correct_num += q_dict[q_type][0]
total_num += q_dict[q_type][1]
score_dict['Query_Attribute'] = [correct_num, total_num]
correct_num, total_num = 0, 0
for q_type in compare_attribute_type:
correct_num += q_dict[q_type][0]
total_num += q_dict[q_type][1]
score_dict['Compare_Attribute'] = [correct_num, total_num]
for q_type in score_type:
val, tol = score_dict[q_type]
print(q_type, '%d / %d = %.2f' % (val, tol, 100.0 * val / tol))
logfile.write(q_type + ' : ' + '%d / %d = %.2f\n' % (val, tol, 100.0 * val / tol))
logfile.write("\n")
logfile.close()
================================================
FILE: openvqa/datasets/dataset_loader.py
================================================
# --------------------------------------------------------
# OpenVQA
# Written by Yuhao Cui https://github.com/cuiyuhao1996
# --------------------------------------------------------
from importlib import import_module
class DatasetLoader:
def __init__(self, __C):
self.__C = __C
self.dataset = __C.DATASET
dataset_moudle_path = 'openvqa.datasets.' + self.dataset +'.' + self.dataset + '_loader'
self.dataset_moudle = import_module(dataset_moudle_path)
def DataSet(self):
return self.dataset_moudle.DataSet(self.__C)
class EvalLoader:
def __init__(self, __C):
self.__C = __C
self.dataset = __C.DATASET
eval_moudle_path = 'openvqa.datasets.' + self.dataset + '.' + 'eval' + '.' + 'result_eval'
self.eval_moudle = import_module(eval_moudle_path)
def eval(self, __arg1, __arg2, __arg3, __arg4, __arg5, __arg6, __arg7):
return self.eval_moudle.eval(self.__C, __arg1, __arg2, __arg3, __arg4, __arg5, __arg6, __arg7)
================================================
FILE: openvqa/datasets/gqa/dicts.json
================================================
[{"yes": 0, "pipe": 1, "no": 2, "large": 3, "girl": 4, "bed": 5, "sofa": 6, "right": 7, "dark": 8, "cabinet": 9, "left": 10, "bird": 11, "brick": 12, "rock": 13, "children": 14, "brown": 15, "blond": 16, "pants": 17, "top": 18, "horse": 19, "blue": 20, "hot dog": 21, "banana": 22, "laptop": 23, "desk": 24, "bottom": 25, "eating": 26, "man": 27, "grass": 28, "dog": 29, "silver": 30, "bag": 31, "pedestrian": 32, "cabinets": 33, "green": 34, "window": 35, "giraffe": 36, "tiny": 37, "child": 38, "yellow": 39, "wooden": 40, "parking meter": 41, "fries": 42, "plants": 43, "kiosk": 44, "orange": 45, "van": 46, "shirt": 47, "coat": 48, "controller": 49, "bench": 50, "television": 51, "black": 52, "carrot": 53, "sandwich": 54, "city": 55, "street": 56, "couch": 57, "closed": 58, "field": 59, "pink": 60, "boy": 61, "lady": 62, "tomato": 63, "horses": 64, "white": 65, "beef": 66, "cat": 67, "cutting board": 68, "overcast": 69, "vegetables": 70, "gray": 71, "onions": 72, "wood": 73, "toaster": 74, "bread": 75, "fence": 76, "player": 77, "roof": 78, "meadow": 79, "baby": 80, "calf": 81, "branch": 82, "street sign": 83, "backpack": 84, "jacket": 85, "teddy bear": 86, "game controller": 87, "herd": 88, "zoo": 89, "truck": 90, "red": 91, "printer": 92, "yard": 93, "end table": 94, "wetsuit": 95, "building": 96, "carrots": 97, "train": 98, "pans": 99, "giraffes": 100, "coffee maker": 101, "bathroom": 102, "woman": 103, "monitor": 104, "sheep": 105, "trees": 106, "dining table": 107, "park": 108, "nightstand": 109, "car": 110, "table": 111, "bicycle": 112, "donkey": 113, "cell phone": 114, "teal": 115, "chair": 116, "bathtub": 117, "waiting": 118, "purple": 119, "small": 120, "airport": 121, "colorful": 122, "stuffed bear": 123, "light brown": 124, "piano": 125, "lying": 126, "clock": 127, "pavement": 128, "snow": 129, "lemon": 130, "sandy": 131, "shelf": 132, "cheese": 133, "light blue": 134, "plant": 135, "bowl": 136, "bus": 137, "dishwasher": 138, "pepperoni": 139, "pole": 140, "bear": 141, "monkey": 142, "shore": 143, "hedges": 144, "wall": 145, "elephant": 146, "sidewalk": 147, "swimming pool": 148, "blender": 149, "bookshelves": 150, "mountain": 151, "pizza": 152, "birds": 153, "people": 154, "radiator": 155, "metal": 156, "striped": 157, "playing": 158, "kitten": 159, "dirty": 160, "runway": 161, "salad": 162, "sailboat": 163, "zebra": 164, "counter": 165, "lettuce": 166, "seat": 167, "asparagus": 168, "color": 169, "plastic": 170, "racket": 171, "dress": 172, "frisbee": 173, "standing": 174, "sea": 175, "keyboard": 176, "motorcycle": 177, "phone": 178, "tree": 179, "computer": 180, "pointing": 181, "iron": 182, "skis": 183, "blouse": 184, "onion": 185, "bat": 186, "light switch": 187, "hook": 188, "mirror": 189, "surfboard": 190, "candle": 191, "catcher": 192, "bricks": 193, "newspaper": 194, "handbag": 195, "knife": 196, "branches": 197, "cap": 198, "stove": 199, "pots": 200, "lawn": 201, "computer mouse": 202, "chef": 203, "steps": 204, "tan": 205, "eggplant": 206, "mountains": 207, "open": 208, "refrigerator": 209, "oranges": 210, "snowboarding": 211, "oven": 212, "utensil": 213, "bedroom": 214, "olives": 215, "little": 216, "cow": 217, "boat": 218, "microwave": 219, "pizza oven": 220, "taxi": 221, "young": 222, "drawers": 223, "tablet": 224, "choppy": 225, "foggy": 226, "apron": 227, "syrup": 228, "plate": 229, "coffee cup": 230, "taking picture": 231, "shoe": 232, "basket": 233, "pigeon": 234, "water": 235, "stop sign": 236, "mailbox": 237, "leather": 238, "remote control": 239, "home plate": 240, "spinach": 241, "tea kettle": 242, "cereal": 243, "tall": 244, "helmet": 245, "celery": 246, "vase": 247, "alien": 248, "collar": 249, "shorts": 250, "suit": 251, "supermarket": 252, "carpet": 253, "donuts": 254, "batter": 255, "outdoors": 256, "girls": 257, "skier": 258, "entertainment center": 259, "floor": 260, "chain": 261, "lamp": 262, "rope": 263, "pepper": 264, "tomatoes": 265, "drawer": 266, "forest": 267, "cars": 268, "balcony": 269, "guy": 270, "boats": 271, "scooter": 272, "flower": 273, "wii controller": 274, "down": 275, "shopping bag": 276, "grape": 277, "ski": 278, "ocean": 279, "comforter": 280, "mattress": 281, "lamb": 282, "customer": 283, "pan": 284, "highway": 285, "long": 286, "display": 287, "shower": 288, "nuts": 289, "sign": 290, "clear": 291, "letters": 292, "surfer": 293, "mannequin": 294, "checkered": 295, "ground": 296, "fisherman": 297, "egg": 298, "zebras": 299, "shoes": 300, "dish": 301, "coffee": 302, "paper": 303, "store": 304, "thin": 305, "glass": 306, "hat": 307, "station": 308, "spatula": 309, "train car": 310, "skateboard": 311, "lake": 312, "airplane": 313, "concrete": 314, "stainless steel": 315, "bushes": 316, "hill": 317, "road": 318, "spoon": 319, "lobby": 320, "indoors": 321, "armchair": 322, "flowers": 323, "broccoli": 324, "suv": 325, "umbrella": 326, "glasses": 327, "ham": 328, "rubber duck": 329, "croissants": 330, "carriage": 331, "burger": 332, "beach": 333, "pen": 334, "laptops": 335, "athlete": 336, "pickles": 337, "dark brown": 338, "trains": 339, "living room": 340, "screen": 341, "bikes": 342, "beige": 343, "napkin": 344, "gravel": 345, "papers": 346, "door": 347, "gold": 348, "cloudy": 349, "tofu": 350, "cows": 351, "hillside": 352, "sun": 353, "behind": 354, "jet": 355, "mushroom": 356, "material": 357, "snowboard": 358, "produce": 359, "dvd player": 360, "camera": 361, "sky": 362, "bun": 363, "walkway": 364, "vest": 365, "watch": 366, "sunny": 367, "locomotive": 368, "sausage": 369, "shop": 370, "ball": 371, "sneakers": 372, "sea foam": 373, "clouds": 374, "leaves": 375, "dresser": 376, "chili": 377, "gate": 378, "flag": 379, "stick": 380, "leggings": 381, "rubber": 382, "mugs": 383, "parsley": 384, "merchandise": 385, "grill": 386, "shallow": 387, "medicine cabinet": 388, "chairs": 389, "ceiling": 390, "curtains": 391, "peppers": 392, "huge": 393, "kettle": 394, "crouching": 395, "deer": 396, "picture": 397, "passenger": 398, "bears": 399, "ship": 400, "belt": 401, "umpire": 402, "short": 403, "driver": 404, "thick": 405, "reading": 406, "tape": 407, "doll": 408, "bookshelf": 409, "basil": 410, "tongs": 411, "cream colored": 412, "oil": 413, "flames": 414, "gift": 415, "rocks": 416, "apple": 417, "blanket": 418, "menu": 419, "lego": 420, "wine": 421, "kite": 422, "aquarium": 423, "swan": 424, "mask": 425, "boot": 426, "dessert": 427, "wide": 428, "headphones": 429, "baseball bat": 430, "tables": 431, "drain": 432, "logo": 433, "tie": 434, "crates": 435, "blueberries": 436, "worker": 437, "chocolate": 438, "kiwi": 439, "cookie": 440, "wine glass": 441, "boys": 442, "jumping": 443, "cloudless": 444, "rain": 445, "cord": 446, "books": 447, "speaker": 448, "heater": 449, "mickey mouse": 450, "cake": 451, "microphone": 452, "computer desk": 453, "coleslaw": 454, "wet": 455, "video games": 456, "uncomfortable": 457, "canoe": 458, "hotel room": 459, "giant": 460, "dolls": 461, "desserts": 462, "pear": 463, "roses": 464, "apples": 465, "cooker": 466, "tablecloth": 467, "unpeeled": 468, "coffee pot": 469, "bucket": 470, "buildings": 471, "gas station": 472, "staring": 473, "snow pants": 474, "gloves": 475, "couple": 476, "fireplace": 477, "stone": 478, "skate park": 479, "statue": 480, "cyclist": 481, "money": 482, "jeans": 483, "cup": 484, "cinnamon": 485, "pie": 486, "tissues": 487, "fork": 488, "at camera": 489, "rabbit": 490, "off": 491, "stuffed animal": 492, "sweet potato": 493, "skinny": 494, "duck": 495, "deep": 496, "pedestrians": 497, "sleeveless": 498, "maroon": 499, "wheelchair": 500, "skillet": 501, "snowsuit": 502, "potatoes": 503, "glove": 504, "dogs": 505, "elephants": 506, "chimney": 507, "pillow": 508, "stickers": 509, "suitcase": 510, "toilet paper": 511, "lion": 512, "drink": 513, "potato": 514, "stormy": 515, "bananas": 516, "empty": 517, "grassy": 518, "scarf": 519, "trailer": 520, "sandals": 521, "aircraft": 522, "pot": 523, "parking lot": 524, "bunny": 525, "puppy": 526, "sauce": 527, "ladder": 528, "smoke": 529, "t shirt": 530, "biker": 531, "fish": 532, "tangerine": 533, "coffee table": 534, "mixer": 535, "beer": 536, "tinted": 537, "sweater": 538, "doorway": 539, "berries": 540, "cooking": 541, "meats": 542, "towel": 543, "fire truck": 544, "round": 545, "donut": 546, "short sleeved": 547, "toast": 548, "hotel": 549, "squash": 550, "raincoat": 551, "talking": 552, "bagel": 553, "sour cream": 554, "lid": 555, "bandana": 556, "tissue": 557, "shelves": 558, "cupcake": 559, "men": 560, "sand": 561, "above": 562, "router": 563, "bridge": 564, "trunks": 565, "steak": 566, "long sleeved": 567, "wires": 568, "pancakes": 569, "moon": 570, "up": 571, "noodles": 572, "arrow": 573, "goose": 574, "american flag": 575, "wagon": 576, "beach umbrella": 577, "airplanes": 578, "river": 579, "heart": 580, "cone": 581, "countertop": 582, "poster": 583, "meat": 584, "drinking": 585, "rice": 586, "toilet": 587, "running": 588, "clean": 589, "roadway": 590, "dishes": 591, "fake": 592, "path": 593, "players": 594, "toothbrush": 595, "corn": 596, "skateboarder": 597, "controllers": 598, "tennis": 599, "dark blue": 600, "number": 601, "on": 602, "pastries": 603, "graffiti": 604, "place": 605, "shuttle": 606, "barrier": 607, "mustard": 608, "tray": 609, "calculator": 610, "sleeping": 611, "dispenser": 612, "cupboard": 613, "skateboarding": 614, "shape": 615, "ketchup": 616, "strawberries": 617, "kitchen": 618, "buoy": 619, "cups": 620, "skater": 621, "flower pot": 622, "match": 623, "buses": 624, "ribs": 625, "socks": 626, "grater": 627, "pipes": 628, "railroad": 629, "tree branches": 630, "jersey": 631, "air conditioner": 632, "train station": 633, "pastry": 634, "bookcase": 635, "chinese food": 636, "outfit": 637, "bike": 638, "nut": 639, "onion rings": 640, "museum": 641, "tractor": 642, "toddler": 643, "cooking pot": 644, "mushrooms": 645, "mat": 646, "bleachers": 647, "stained": 648, "walking": 649, "full": 650, "uniform": 651, "power line": 652, "pumpkin": 653, "watermelon": 654, "crust": 655, "bush": 656, "picture frame": 657, "looking down": 658, "grapes": 659, "character": 660, "honey": 661, "olive": 662, "power lines": 663, "serving tray": 664, "dragon": 665, "toys": 666, "helicopter": 667, "bacon": 668, "eye glasses": 669, "sword": 670, "garage": 671, "women": 672, "sitting": 673, "light bulb": 674, "chicken breast": 675, "video camera": 676, "decorations": 677, "platform": 678, "hamburger": 679, "boots": 680, "porcelain": 681, "bare": 682, "soda": 683, "light": 684, "radio": 685, "stadium": 686, "pine tree": 687, "action figure": 688, "coach": 689, "pasture": 690, "seal": 691, "cooler": 692, "guitar": 693, "sandwiches": 694, "vegetable": 695, "curly": 696, "front": 697, "toothpaste": 698, "polo shirt": 699, "dotted": 700, "restaurant": 701, "life preserver": 702, "rackets": 703, "tomato sauce": 704, "rose": 705, "eggs": 706, "fan": 707, "bedding": 708, "zucchini": 709, "dried": 710, "closet": 711, "star": 712, "khaki": 713, "artichoke": 714, "peas": 715, "umbrellas": 716, "bull": 717, "sandal": 718, "lemons": 719, "painting": 720, "blinds": 721, "magazine": 722, "cookies": 723, "luggage": 724, "pig": 725, "snake": 726, "cappuccino": 727, "intersection": 728, "beans": 729, "school bus": 730, "square": 731, "trash can": 732, "briefcase": 733, "sunglasses": 734, "dining room": 735, "juice": 736, "dirt": 737, "baseball": 738, "toy": 739, "log": 740, "cherry": 741, "toothbrushes": 742, "tennis ball": 743, "book": 744, "narrow": 745, "skirt": 746, "fire hydrant": 747, "game": 748, "house": 749, "clock tower": 750, "chicken": 751, "stroller": 752, "mud": 753, "bronze": 754, "jeep": 755, "seagull": 756, "street light": 757, "traffic lights": 758, "strawberry": 759, "computer monitor": 760, "hallway": 761, "ambulance": 762, "costume": 763, "panda bear": 764, "market": 765, "wii": 766, "raw": 767, "still": 768, "pineapple": 769, "goat": 770, "trunk": 771, "vacuum": 772, "tent": 773, "curtain": 774, "dock": 775, "liquid": 776, "pitcher": 777, "cage": 778, "folding chair": 779, "console": 780, "ring": 781, "dry": 782, "workers": 783, "bell": 784, "shampoo bottle": 785, "new": 786, "bushy": 787, "heavy": 788, "cats": 789, "mangoes": 790, "dome": 791, "label": 792, "pasta salad": 793, "light fixture": 794, "can": 795, "traffic light": 796, "male": 797, "jockey": 798, "spectators": 799, "letter": 800, "town": 801, "sheet": 802, "powder": 803, "ice cream": 804, "toy car": 805, "video game": 806, "turkey": 807, "utensils": 808, "pond": 809, "cupcakes": 810, "riding": 811, "lime": 812, "sock": 813, "cucumber": 814, "saucer": 815, "plantains": 816, "spectator": 817, "rectangular": 818, "dugout": 819, "tea": 820, "dress shirt": 821, "owl": 822, "cables": 823, "mouse pad": 824, "rug": 825, "purse": 826, "ostrich": 827, "bottles": 828, "toppings": 829, "net": 830, "gun": 831, "cooked": 832, "adult": 833, "trash bag": 834, "undershirt": 835, "snowy": 836, "driving": 837, "fruit": 838, "symbol": 839, "receipt": 840, "ipod": 841, "figure": 842, "resting": 843, "soup": 844, "terminal": 845, "machine": 846, "cherries": 847, "pretzel": 848, "surfing": 849, "hose": 850, "binder": 851, "old": 852, "mother": 853, "sink": 854, "vine": 855, "appetizers": 856, "earring": 857, "porch": 858, "steam": 859, "calm": 860, "posing": 861, "brunette": 862, "bicycles": 863, "polar bear": 864, "paddle": 865, "microwave oven": 866, "garden": 867, "jackets": 868, "cauliflower": 869, "spices": 870, "farm": 871, "antelope": 872, "container": 873, "fence post": 874, "octopus": 875, "garnish": 876, "planter": 877, "stones": 878, "sheets": 879, "coke": 880, "bottle": 881, "swimsuit": 882, "spice": 883, "walnut": 884, "word": 885, "policeman": 886, "waffle": 887, "ottoman": 888, "desktop computer": 889, "cart": 890, "dryer": 891, "candy": 892, "skiing": 893, "broth": 894, "dip": 895, "milkshake": 896, "plates": 897, "wire": 898, "silverware": 899, "asian": 900, "office chair": 901, "outlet": 902, "projector": 903, "flags": 904, "partly cloudy": 905, "buns": 906, "sad": 907, "feta cheese": 908, "pasta": 909, "hard drive": 910, "balloon": 911, "tv stand": 912, "tank top": 913, "lunch box": 914, "stuffed dog": 915, "picnic table": 916, "real": 917, "pillows": 918, "rainy": 919, "air": 920, "berry": 921, "soft drink": 922, "nike": 923, "trucks": 924, "harbor": 925, "restroom": 926, "tower": 927, "post": 928, "squirrel": 929, "crosswalk": 930, "mixing bowl": 931, "ducks": 932, "crackers": 933, "wine bottle": 934, "soccer player": 935, "pouch": 936, "necklace": 937, "wicker": 938, "pecan": 939, "hills": 940, "mashed potatoes": 941, "straw": 942, "boulders": 943, "peach": 944, "snowboarder": 945, "tag": 946, "pizza slice": 947, "fountain": 948, "magazines": 949, "chrome": 950, "side table": 951, "alligator": 952, "salt": 953, "rocky": 954, "frosting": 955, "orchard": 956, "hospital": 957, "map": 958, "gas stove": 959, "raisin": 960, "metallic": 961, "words": 962, "monitors": 963, "sticky notes": 964, "popcorn": 965, "parmesan cheese": 966, "cafe": 967, "placemat": 968, "hard": 969, "platter": 970, "bar stool": 971, "passengers": 972, "parachute": 973, "avocado": 974, "vases": 975, "bracelet": 976, "games": 977, "making face": 978, "fried": 979, "black and white": 980, "chocolate chips": 981, "seagulls": 982, "scissors": 983, "dinosaur": 984, "outfits": 985, "soccer ball": 986, "shirts": 987, "office": 988, "employee": 989, "kites": 990, "ropes": 991, "bracelets": 992, "gym": 993, "bison": 994, "sushi": 995, "soldier": 996, "rifle": 997, "cracker": 998, "notebook": 999, "mozzarella": 1000, "box": 1001, "cabbage": 1002, "onion ring": 1003, "trash": 1004, "palm tree": 1005, "coffee cups": 1006, "marina": 1007, "paintings": 1008, "rainbow colored": 1009, "chopsticks": 1010, "cross": 1011, "bikini": 1012, "bee": 1013, "paved": 1014, "frog": 1015, "burrito": 1016, "cheeseburger": 1017, "milk": 1018, "mug": 1019, "wristband": 1020, "dvds": 1021, "sweatshirt": 1022, "shark": 1023, "computers": 1024, "grilled": 1025, "pita": 1026, "burner": 1027, "train tracks": 1028, "sticker": 1029, "rhino": 1030, "parrot": 1031, "rough": 1032, "performer": 1033, "drapes": 1034, "courtyard": 1035, "decoration": 1036, "chandelier": 1037, "beverages": 1038, "sweet potatoes": 1039, "crumbs": 1040, "flour": 1041, "frame": 1042, "paint": 1043, "candles": 1044, "beverage": 1045, "cheesecake": 1046, "steel": 1047, "cream": 1048, "shopping center": 1049, "leafy": 1050, "brownie": 1051, "benches": 1052, "happy": 1053, "goats": 1054, "cactus": 1055, "charger": 1056, "antenna": 1057, "breakfast": 1058, "crate": 1059, "washing machine": 1060, "omelette": 1061, "packet": 1062, "beer mug": 1063, "wii controllers": 1064, "crab": 1065, "ripe": 1066, "feeder": 1067, "herb": 1068, "tunnel": 1069, "step": 1070, "flamingo": 1071, "muffin": 1072, "speakers": 1073, "baking pan": 1074, "carts": 1075, "cream cheese": 1076, "pigeons": 1077, "camel": 1078, "tool": 1079, "cafeteria": 1080, "gown": 1081, "blueberry": 1082, "jumpsuit": 1083, "crowd": 1084, "potato chips": 1085, "raisins": 1086, "minivan": 1087, "cobblestone": 1088, "dough": 1089, "panda": 1090, "brush": 1091, "wristwatch": 1092, "pizzas": 1093, "unpaved": 1094, "skateboards": 1095, "jars": 1096, "moss": 1097, "magnets": 1098, "jar": 1099, "hair": 1100, "hot dogs": 1101, "numbers": 1102, "penguin": 1103, "blazer": 1104, "roll": 1105, "figurines": 1106, "dolphin": 1107, "stairs": 1108, "safety jacket": 1109, "shrimp": 1110, "styrofoam": 1111, "officers": 1112, "soda bottle": 1113, "gummy bear": 1114, "cans": 1115, "watermelons": 1116, "wine glasses": 1117, "soldiers": 1118, "desert": 1119, "pine trees": 1120, "garlic": 1121, "lush": 1122, "gentleman": 1123, "dressing": 1124, "soap bottle": 1125, "unripe": 1126, "towels": 1127, "containers": 1128, "liquor": 1129, "murky": 1130, "whale": 1131, "potato salad": 1132, "waffles": 1133, "poodle": 1134, "hay": 1135, "yogurt": 1136, "sculpture": 1137, "alcohol": 1138, "tiles": 1139, "palm trees": 1140, "pajamas": 1141, "copper": 1142, "croissant": 1143, "swimming": 1144, "church": 1145, "bags": 1146, "snail": 1147, "diaper": 1148, "wavy": 1149, "library": 1150, "wool": 1151, "sprinkles": 1152, "fire extinguisher": 1153, "bowls": 1154, "light bulbs": 1155, "hats": 1156, "spoons": 1157, "peacock": 1158, "boxes": 1159, "upward": 1160, "eagle": 1161, "cinnamon roll": 1162, "granite": 1163, "roasted": 1164, "daughter": 1165, "foil": 1166, "icing": 1167, "peaches": 1168, "bath towel": 1169, "officer": 1170, "pesto": 1171, "telephone pole": 1172, "artwork": 1173, "bedspread": 1174, "caucasian": 1175, "bending": 1176, "female": 1177, "plain": 1178, "toaster oven": 1179, "walnuts": 1180, "triangular": 1181, "beet": 1182, "headband": 1183, "drawings": 1184, "beach chair": 1185, "donkeys": 1186, "below": 1187, "bread loaf": 1188, "paper towel": 1189, "gourd": 1190, "rotten": 1191, "mound": 1192, "whipped cream": 1193, "low": 1194, "parent": 1195, "bus stop": 1196, "bar stools": 1197, "gadget": 1198, "cakes": 1199, "phones": 1200, "cupboards": 1201, "wine bottles": 1202, "gravy": 1203, "covered": 1204, "cockpit": 1205, "mayonnaise": 1206, "marble": 1207, "cereal box": 1208, "butterfly": 1209, "kimono": 1210, "clocks": 1211, "tea pot": 1212, "food truck": 1213, "cords": 1214, "urinal": 1215, "bamboo": 1216, "peanut": 1217, "tissue box": 1218, "fire": 1219, "nutella": 1220, "ramekin": 1221, "leaf": 1222, "village": 1223, "name tag": 1224, "rolling pin": 1225, "olive oil": 1226, "hummus": 1227, "balls": 1228, "wines": 1229, "pizza shop": 1230, "pea": 1231, "goggles": 1232, "dragons": 1233, "drinks": 1234, "marshmallow": 1235, "audience": 1236, "dumplings": 1237, "traffic sign": 1238, "oreo": 1239, "raspberry": 1240, "skating": 1241, "patio": 1242, "bone": 1243, "classroom": 1244, "beer bottle": 1245, "chalkboard": 1246, "life jacket": 1247, "lemonade": 1248, "deck": 1249, "pancake": 1250, "cathedral": 1251, "toiletries": 1252, "backyard": 1253, "mall": 1254, "whisk": 1255, "brass": 1256, "vending machine": 1257, "island": 1258, "fog": 1259, "water bottle": 1260, "canopy": 1261, "drape": 1262, "topping": 1263, "parking sign": 1264, "antique": 1265, "mesh": 1266, "pens": 1267, "cowboy hat": 1268, "fruits": 1269, "cucumbers": 1270, "grapefruit": 1271, "fans": 1272, "meatballs": 1273, "houses": 1274, "under": 1275, "farmer": 1276, "crane": 1277, "hand dryer": 1278, "cowboy": 1279, "beds": 1280, "macaroni": 1281, "cheetah": 1282, "puddle": 1283, "stuffed animals": 1284, "coffee mug": 1285, "bakery": 1286, "lamps": 1287, "herbs": 1288, "bouquet": 1289, "hair clip": 1290, "cable": 1291, "biscuit": 1292, "cell phones": 1293, "tree leaves": 1294, "pizza pan": 1295, "drum": 1296, "raspberries": 1297, "ice maker": 1298, "shut": 1299, "cards": 1300, "pocket": 1301, "faucet": 1302, "guacamole": 1303, "coconut": 1304, "baseball players": 1305, "bug": 1306, "high": 1307, "brushing teeth": 1308, "ice": 1309, "toothpicks": 1310, "waiter": 1311, "tortilla": 1312, "spider": 1313, "snoopy": 1314, "weeds": 1315, "stew": 1316, "asphalt": 1317, "buoys": 1318, "family": 1319, "logs": 1320, "adidas": 1321, "underwear": 1322, "cliff": 1323, "sailboats": 1324, "robe": 1325, "casserole": 1326, "ketchup bottle": 1327, "teddy bears": 1328, "lock": 1329, "couches": 1330, "figurine": 1331, "pencil": 1332, "leafless": 1333, "drawing": 1334, "flip flops": 1335, "hippo": 1336, "paper dispenser": 1337, "cigarette": 1338, "barn": 1339, "hardwood": 1340, "staircase": 1341, "entrance": 1342, "windows": 1343, "picnic tables": 1344, "fudge": 1345, "performing trick": 1346, "blind": 1347, "vinegar": 1348, "beets": 1349, "curved": 1350, "away": 1351, "roast beef": 1352, "spray bottle": 1353, "chopstick": 1354, "soap dispenser": 1355, "dog food": 1356, "bus driver": 1357, "banana bunch": 1358, "dumpster": 1359, "twigs": 1360, "napkins": 1361, "bagels": 1362, "stage": 1363, "baskets": 1364, "ceramic": 1365, "pineapples": 1366, "street lights": 1367, "soap": 1368, "brownies": 1369, "christmas lights": 1370, "cameras": 1371, "fruit stand": 1372, "soda can": 1373, "hotdog bun": 1374, "fat": 1375, "pizza boxes": 1376, "melon": 1377, "customers": 1378, "athletic shoe": 1379, "peeled": 1380, "food container": 1381, "powdered sugar": 1382, "rice cooker": 1383, "spots": 1384, "sugar": 1385, "hair dryer": 1386, "tractors": 1387, "broom": 1388, "skin": 1389, "pillowcase": 1390, "smoothie": 1391, "ear buds": 1392, "garment": 1393, "soft": 1394, "walls": 1395, "ravioli": 1396, "seafood": 1397, "hammer": 1398, "sack": 1399, "blenders": 1400, "sponge": 1401, "sunflower": 1402, "cabin": 1403, "tuna": 1404, "beautiful": 1405, "heels": 1406, "butter": 1407, "scooters": 1408, "wardrobe": 1409, "taking pictures": 1410, "forks": 1411, "lambs": 1412, "tin": 1413, "cat food": 1414, "engineer": 1415, "oatmeal": 1416, "clay": 1417, "butterflies": 1418, "team": 1419, "lipstick": 1420, "ladle": 1421, "food processor": 1422, "wok": 1423, "shelter": 1424, "lobster": 1425, "snacks": 1426, "vests": 1427, "face mask": 1428, "peanut butter": 1429, "balloons": 1430, "peanuts": 1431, "wallpaper": 1432, "cranberries": 1433, "crown": 1434, "caramel": 1435, "floor lamp": 1436, "shower curtain": 1437, "blankets": 1438, "hangar": 1439, "surfboards": 1440, "meal": 1441, "wolf": 1442, "gifts": 1443, "father": 1444, "cd": 1445, "chains": 1446, "tourist": 1447, "canister": 1448, "spear": 1449, "pilot": 1450, "mountain side": 1451, "pencils": 1452, "trumpet": 1453, "knives": 1454, "mango": 1455, "magnet": 1456, "guys": 1457, "satellite dish": 1458, "table lamp": 1459, "keyboards": 1460, "swimmer": 1461, "stump": 1462, "amusement park": 1463, "goal": 1464, "roadside": 1465, "wig": 1466, "chickens": 1467, "card": 1468, "looking up": 1469, "aluminum": 1470, "pandas": 1471, "soap dish": 1472, "pomegranate": 1473, "tourists": 1474, "parrots": 1475, "toilet brush": 1476, "remote controls": 1477, "suits": 1478, "cotton": 1479, "tents": 1480, "water glass": 1481, "healthy": 1482, "envelope": 1483, "baking sheet": 1484, "marker": 1485, "muffins": 1486, "salon": 1487, "snow flakes": 1488, "dry erase board": 1489, "fishing pole": 1490, "lighthouse": 1491, "earphones": 1492, "photographer": 1493, "gorilla": 1494, "seeds": 1495, "sticks": 1496, "shopping cart": 1497, "pears": 1498, "alarm clock": 1499, "tree branch": 1500, "almonds": 1501, "theater": 1502, "tiger": 1503, "forward": 1504, "temple": 1505, "hedge": 1506, "kitchen towel": 1507, "motorcycles": 1508, "garland": 1509, "pudding": 1510, "vintage": 1511, "coarse": 1512, "swans": 1513, "pretzels": 1514, "swamp": 1515, "dense": 1516, "auditorium": 1517, "daisy": 1518, "dish soap": 1519, "opaque": 1520, "french toast": 1521, "straight": 1522, "tennis balls": 1523, "orchid": 1524, "champagne": 1525, "pizza pie": 1526, "egg roll": 1527, "flatbread": 1528, "coconuts": 1529, "flip flop": 1530, "skyscraper": 1531, "fur": 1532, "denim": 1533, "scaffolding": 1534, "coin": 1535, "policemen": 1536, "stuffed bears": 1537, "cane": 1538, "cloth": 1539, "cake stand": 1540, "pump": 1541, "soccer": 1542, "loaf": 1543, "knife block": 1544, "ski lift": 1545, "modern": 1546, "old fashioned": 1547, "cotton dessert": 1548, "trays": 1549, "smooth": 1550, "lunch": 1551, "dull": 1552, "shield": 1553, "dinner": 1554, "cloths": 1555, "waterfall": 1556, "waste basket": 1557, "scarce": 1558, "shaking hands": 1559, "salt shaker": 1560, "pocket watch": 1561, "unhealthy": 1562, "lounge": 1563, "moose": 1564, "seaweed": 1565, "panda bears": 1566, "candies": 1567, "batteries": 1568, "comb": 1569, "wallet": 1570, "students": 1571, "school": 1572, "geese": 1573, "apartment building": 1574, "stars": 1575, "granola": 1576, "leopard": 1577, "cardboard": 1578, "shoe laces": 1579, "hairbrush": 1580, "chef hat": 1581, "crystal": 1582, "pizza tray": 1583, "bread box": 1584, "luggage cart": 1585, "apartment": 1586, "angry": 1587, "characters": 1588, "oak tree": 1589, "angry bird": 1590, "backpacks": 1591, "shaving cream": 1592, "cemetery": 1593, "lace": 1594, "anchovies": 1595, "dresses": 1596, "paper towels": 1597, "garage door": 1598, "vanilla": 1599, "uncooked": 1600, "battery": 1601, "butter knife": 1602, "mint": 1603, "package": 1604, "biscuits": 1605, "son": 1606, "cake pan": 1607, "snack": 1608, "riding boots": 1609, "rooftop": 1610, "irregular": 1611, "baked": 1612, "kittens": 1613, "sconce": 1614, "serving dish": 1615, "mirrors": 1616, "taking photo": 1617, "bubble": 1618, "printers": 1619, "ice cube": 1620, "knee pads": 1621, "doors": 1622, "ceiling light": 1623, "cotton candy": 1624, "helmets": 1625, "cheese cube": 1626, "bartender": 1627, "pistachio": 1628, "ugly": 1629, "sausages": 1630, "beer can": 1631, "baker": 1632, "coffee beans": 1633, "almond": 1634, "ovens": 1635, "curled": 1636, "underneath": 1637, "suitcases": 1638, "food": 1639, "taking bath": 1640, "vendor": 1641, "lizard": 1642, "homes": 1643, "shops": 1644, "mannequins": 1645, "turtle": 1646, "blossom": 1647, "chickpeas": 1648, "outside": 1649, "ornament": 1650, "milk carton": 1651, "mexican food": 1652, "seed": 1653, "avocados": 1654, "masks": 1655, "pumpkins": 1656, "papaya": 1657, "stapler": 1658, "hamburgers": 1659, "earrings": 1660, "back": 1661, "wildflowers": 1662, "bats": 1663, "hand soap": 1664, "fresh": 1665, "manhole cover": 1666, "dolphins": 1667, "thermometer": 1668, "castle": 1669, "cones": 1670, "pizza cutter": 1671, "pizza box": 1672, "heel": 1673, "salmon": 1674, "door frame": 1675, "taco": 1676, "pork": 1677, "wedding": 1678, "bubbles": 1679, "eiffel tower": 1680, "cranberry": 1681, "napkin dispenser": 1682, "bandage": 1683, "elmo": 1684, "notepad": 1685, "pepper shaker": 1686, "artichokes": 1687, "tools": 1688, "window frame": 1689, "steamed": 1690, "groceries": 1691, "lily": 1692, "cookbook": 1693, "paper container": 1694, "hippos": 1695, "hilltop": 1696, "twig": 1697, "animal": 1698, "wii game": 1699, "beads": 1700, "lilies": 1701, "towel dispenser": 1702, "blood": 1703, "ladles": 1704, "jewelry": 1705, "hearts": 1706, "snow boots": 1707, "ahead": 1708, "utensil holder": 1709, "football": 1710, "bird cage": 1711, "dish drainer": 1712, "cds": 1713, "banana peel": 1714, "vines": 1715, "pizza crust": 1716, "shopper": 1717, "tags": 1718, "keypad": 1719, "dinosaurs": 1720, "stir fry": 1721, "bomb": 1722, "necklaces": 1723, "packages": 1724, "uniforms": 1725, "sparse": 1726, "unhappy": 1727, "control panel": 1728, "antennas": 1729, "spray can": 1730, "feathers": 1731, "electric toothbrush": 1732, "potted": 1733, "juice box": 1734, "toolbox": 1735, "visitor": 1736, "ornaments": 1737, "sign post": 1738, "baseball mitt": 1739, "robot": 1740, "blackberries": 1741, "desk lamp": 1742, "glaze": 1743, "melons": 1744, "cookie dough": 1745, "paint brush": 1746, "mustard bottle": 1747, "apple logo": 1748, "salad dressing": 1749, "mattresses": 1750, "cash register": 1751, "nest": 1752, "knee pad": 1753, "out": 1754, "toasted": 1755, "price tag": 1756, "canisters": 1757, "christmas light": 1758, "antelopes": 1759, "dream catcher": 1760, "student": 1761, "fine": 1762, "kangaroo": 1763, "smoke stack": 1764, "music": 1765, "cages": 1766, "soccer balls": 1767, "ostriches": 1768, "coffee shop": 1769, "ice cubes": 1770, "downward": 1771, "televisions": 1772, "candle holder": 1773, "grinder": 1774, "xbox controller": 1775, "cricket": 1776, "hurdle": 1777, "obstacle": 1778, "lab coat": 1779, "gas pump": 1780, "banana bunches": 1781, "bell tower": 1782, "waitress": 1783, "in mirror": 1784, "coats": 1785, "attic": 1786, "sugar packet": 1787, "taking photograph": 1788, "mountain peak": 1789, "pub": 1790, "silk": 1791, "blossoms": 1792, "pillars": 1793, "scrub brush": 1794, "kiwis": 1795, "octagonal": 1796, "parachutes": 1797, "lions": 1798, "sideways": 1799, "egg carton": 1800, "visitors": 1801, "sunflowers": 1802, "shoe lace": 1803, "rhinos": 1804, "elbow pad": 1805, "egg yolk": 1806, "outlets": 1807, "baseball bats": 1808, "life jackets": 1809, "snakes": 1810, "vitamins": 1811, "cigar": 1812, "upwards": 1813, "beneath": 1814, "taking photos": 1815, "storage box": 1816, "armor": 1817, "cookie jar": 1818, "rounded": 1819, "seat belt": 1820, "owls": 1821, "appetizer": 1822, "beer cans": 1823, "stores": 1824, "shoppers": 1825, "bird house": 1826, "sugar packets": 1827, "wild": 1828, "dvd players": 1829, "towers": 1830, "water bottles": 1831, "waves": 1832, "pikachu": 1833, "wolves": 1834, "immature": 1835, "shampoo": 1836, "orchids": 1837, "elevator": 1838, "taking notes": 1839, "wave": 1840, "horse hoof": 1841, "bottle cap": 1842}, {"0": "yes", "1": "pipe", "2": "no", "3": "large", "4": "girl", "5": "bed", "6": "sofa", "7": "right", "8": "dark", "9": "cabinet", "10": "left", "11": "bird", "12": "brick", "13": "rock", "14": "children", "15": "brown", "16": "blond", "17": "pants", "18": "top", "19": "horse", "20": "blue", "21": "hot dog", "22": "banana", "23": "laptop", "24": "desk", "25": "bottom", "26": "eating", "27": "man", "28": "grass", "29": "dog", "30": "silver", "31": "bag", "32": "pedestrian", "33": "cabinets", "34": "green", "35": "window", "36": "giraffe", "37": "tiny", "38": "child", "39": "yellow", "40": "wooden", "41": "parking meter", "42": "fries", "43": "plants", "44": "kiosk", "45": "orange", "46": "van", "47": "shirt", "48": "coat", "49": "controller", "50": "bench", "51": "television", "52": "black", "53": "carrot", "54": "sandwich", "55": "city", "56": "street", "57": "couch", "58": "closed", "59": "field", "60": "pink", "61": "boy", "62": "lady", "63": "tomato", "64": "horses", "65": "white", "66": "beef", "67": "cat", "68": "cutting board", "69": "overcast", "70": "vegetables", "71": "gray", "72": "onions", "73": "wood", "74": "toaster", "75": "bread", "76": "fence", "77": "player", "78": "roof", "79": "meadow", "80": "baby", "81": "calf", "82": "branch", "83": "street sign", "84": "backpack", "85": "jacket", "86": "teddy bear", "87": "game controller", "88": "herd", "89": "zoo", "90": "truck", "91": "red", "92": "printer", "93": "yard", "94": "end table", "95": "wetsuit", "96": "building", "97": "carrots", "98": "train", "99": "pans", "100": "giraffes", "101": "coffee maker", "102": "bathroom", "103": "woman", "104": "monitor", "105": "sheep", "106": "trees", "107": "dining table", "108": "park", "109": "nightstand", "110": "car", "111": "table", "112": "bicycle", "113": "donkey", "114": "cell phone", "115": "teal", "116": "chair", "117": "bathtub", "118": "waiting", "119": "purple", "120": "small", "121": "airport", "122": "colorful", "123": "stuffed bear", "124": "light brown", "125": "piano", "126": "lying", "127": "clock", "128": "pavement", "129": "snow", "130": "lemon", "131": "sandy", "132": "shelf", "133": "cheese", "134": "light blue", "135": "plant", "136": "bowl", "137": "bus", "138": "dishwasher", "139": "pepperoni", "140": "pole", "141": "bear", "142": "monkey", "143": "shore", "144": "hedges", "145": "wall", "146": "elephant", "147": "sidewalk", "148": "swimming pool", "149": "blender", "150": "bookshelves", "151": "mountain", "152": "pizza", "153": "birds", "154": "people", "155": "radiator", "156": "metal", "157": "striped", "158": "playing", "159": "kitten", "160": "dirty", "161": "runway", "162": "salad", "163": "sailboat", "164": "zebra", "165": "counter", "166": "lettuce", "167": "seat", "168": "asparagus", "169": "color", "170": "plastic", "171": "racket", "172": "dress", "173": "frisbee", "174": "standing", "175": "sea", "176": "keyboard", "177": "motorcycle", "178": "phone", "179": "tree", "180": "computer", "181": "pointing", "182": "iron", "183": "skis", "184": "blouse", "185": "onion", "186": "bat", "187": "light switch", "188": "hook", "189": "mirror", "190": "surfboard", "191": "candle", "192": "catcher", "193": "bricks", "194": "newspaper", "195": "handbag", "196": "knife", "197": "branches", "198": "cap", "199": "stove", "200": "pots", "201": "lawn", "202": "computer mouse", "203": "chef", "204": "steps", "205": "tan", "206": "eggplant", "207": "mountains", "208": "open", "209": "refrigerator", "210": "oranges", "211": "snowboarding", "212": "oven", "213": "utensil", "214": "bedroom", "215": "olives", "216": "little", "217": "cow", "218": "boat", "219": "microwave", "220": "pizza oven", "221": "taxi", "222": "young", "223": "drawers", "224": "tablet", "225": "choppy", "226": "foggy", "227": "apron", "228": "syrup", "229": "plate", "230": "coffee cup", "231": "taking picture", "232": "shoe", "233": "basket", "234": "pigeon", "235": "water", "236": "stop sign", "237": "mailbox", "238": "leather", "239": "remote control", "240": "home plate", "241": "spinach", "242": "tea kettle", "243": "cereal", "244": "tall", "245": "helmet", "246": "celery", "247": "vase", "248": "alien", "249": "collar", "250": "shorts", "251": "suit", "252": "supermarket", "253": "carpet", "254": "donuts", "255": "batter", "256": "outdoors", "257": "girls", "258": "skier", "259": "entertainment center", "260": "floor", "261": "chain", "262": "lamp", "263": "rope", "264": "pepper", "265": "tomatoes", "266": "drawer", "267": "forest", "268": "cars", "269": "balcony", "270": "guy", "271": "boats", "272": "scooter", "273": "flower", "274": "wii controller", "275": "down", "276": "shopping bag", "277": "grape", "278": "ski", "279": "ocean", "280": "comforter", "281": "mattress", "282": "lamb", "283": "customer", "284": "pan", "285": "highway", "286": "long", "287": "display", "288": "shower", "289": "nuts", "290": "sign", "291": "clear", "292": "letters", "293": "surfer", "294": "mannequin", "295": "checkered", "296": "ground", "297": "fisherman", "298": "egg", "299": "zebras", "300": "shoes", "301": "dish", "302": "coffee", "303": "paper", "304": "store", "305": "thin", "306": "glass", "307": "hat", "308": "station", "309": "spatula", "310": "train car", "311": "skateboard", "312": "lake", "313": "airplane", "314": "concrete", "315": "stainless steel", "316": "bushes", "317": "hill", "318": "road", "319": "spoon", "320": "lobby", "321": "indoors", "322": "armchair", "323": "flowers", "324": "broccoli", "325": "suv", "326": "umbrella", "327": "glasses", "328": "ham", "329": "rubber duck", "330": "croissants", "331": "carriage", "332": "burger", "333": "beach", "334": "pen", "335": "laptops", "336": "athlete", "337": "pickles", "338": "dark brown", "339": "trains", "340": "living room", "341": "screen", "342": "bikes", "343": "beige", "344": "napkin", "345": "gravel", "346": "papers", "347": "door", "348": "gold", "349": "cloudy", "350": "tofu", "351": "cows", "352": "hillside", "353": "sun", "354": "behind", "355": "jet", "356": "mushroom", "357": "material", "358": "snowboard", "359": "produce", "360": "dvd player", "361": "camera", "362": "sky", "363": "bun", "364": "walkway", "365": "vest", "366": "watch", "367": "sunny", "368": "locomotive", "369": "sausage", "370": "shop", "371": "ball", "372": "sneakers", "373": "sea foam", "374": "clouds", "375": "leaves", "376": "dresser", "377": "chili", "378": "gate", "379": "flag", "380": "stick", "381": "leggings", "382": "rubber", "383": "mugs", "384": "parsley", "385": "merchandise", "386": "grill", "387": "shallow", "388": "medicine cabinet", "389": "chairs", "390": "ceiling", "391": "curtains", "392": "peppers", "393": "huge", "394": "kettle", "395": "crouching", "396": "deer", "397": "picture", "398": "passenger", "399": "bears", "400": "ship", "401": "belt", "402": "umpire", "403": "short", "404": "driver", "405": "thick", "406": "reading", "407": "tape", "408": "doll", "409": "bookshelf", "410": "basil", "411": "tongs", "412": "cream colored", "413": "oil", "414": "flames", "415": "gift", "416": "rocks", "417": "apple", "418": "blanket", "419": "menu", "420": "lego", "421": "wine", "422": "kite", "423": "aquarium", "424": "swan", "425": "mask", "426": "boot", "427": "dessert", "428": "wide", "429": "headphones", "430": "baseball bat", "431": "tables", "432": "drain", "433": "logo", "434": "tie", "435": "crates", "436": "blueberries", "437": "worker", "438": "chocolate", "439": "kiwi", "440": "cookie", "441": "wine glass", "442": "boys", "443": "jumping", "444": "cloudless", "445": "rain", "446": "cord", "447": "books", "448": "speaker", "449": "heater", "450": "mickey mouse", "451": "cake", "452": "microphone", "453": "computer desk", "454": "coleslaw", "455": "wet", "456": "video games", "457": "uncomfortable", "458": "canoe", "459": "hotel room", "460": "giant", "461": "dolls", "462": "desserts", "463": "pear", "464": "roses", "465": "apples", "466": "cooker", "467": "tablecloth", "468": "unpeeled", "469": "coffee pot", "470": "bucket", "471": "buildings", "472": "gas station", "473": "staring", "474": "snow pants", "475": "gloves", "476": "couple", "477": "fireplace", "478": "stone", "479": "skate park", "480": "statue", "481": "cyclist", "482": "money", "483": "jeans", "484": "cup", "485": "cinnamon", "486": "pie", "487": "tissues", "488": "fork", "489": "at camera", "490": "rabbit", "491": "off", "492": "stuffed animal", "493": "sweet potato", "494": "skinny", "495": "duck", "496": "deep", "497": "pedestrians", "498": "sleeveless", "499": "maroon", "500": "wheelchair", "501": "skillet", "502": "snowsuit", "503": "potatoes", "504": "glove", "505": "dogs", "506": "elephants", "507": "chimney", "508": "pillow", "509": "stickers", "510": "suitcase", "511": "toilet paper", "512": "lion", "513": "drink", "514": "potato", "515": "stormy", "516": "bananas", "517": "empty", "518": "grassy", "519": "scarf", "520": "trailer", "521": "sandals", "522": "aircraft", "523": "pot", "524": "parking lot", "525": "bunny", "526": "puppy", "527": "sauce", "528": "ladder", "529": "smoke", "530": "t shirt", "531": "biker", "532": "fish", "533": "tangerine", "534": "coffee table", "535": "mixer", "536": "beer", "537": "tinted", "538": "sweater", "539": "doorway", "540": "berries", "541": "cooking", "542": "meats", "543": "towel", "544": "fire truck", "545": "round", "546": "donut", "547": "short sleeved", "548": "toast", "549": "hotel", "550": "squash", "551": "raincoat", "552": "talking", "553": "bagel", "554": "sour cream", "555": "lid", "556": "bandana", "557": "tissue", "558": "shelves", "559": "cupcake", "560": "men", "561": "sand", "562": "above", "563": "router", "564": "bridge", "565": "trunks", "566": "steak", "567": "long sleeved", "568": "wires", "569": "pancakes", "570": "moon", "571": "up", "572": "noodles", "573": "arrow", "574": "goose", "575": "american flag", "576": "wagon", "577": "beach umbrella", "578": "airplanes", "579": "river", "580": "heart", "581": "cone", "582": "countertop", "583": "poster", "584": "meat", "585": "drinking", "586": "rice", "587": "toilet", "588": "running", "589": "clean", "590": "roadway", "591": "dishes", "592": "fake", "593": "path", "594": "players", "595": "toothbrush", "596": "corn", "597": "skateboarder", "598": "controllers", "599": "tennis", "600": "dark blue", "601": "number", "602": "on", "603": "pastries", "604": "graffiti", "605": "place", "606": "shuttle", "607": "barrier", "608": "mustard", "609": "tray", "610": "calculator", "611": "sleeping", "612": "dispenser", "613": "cupboard", "614": "skateboarding", "615": "shape", "616": "ketchup", "617": "strawberries", "618": "kitchen", "619": "buoy", "620": "cups", "621": "skater", "622": "flower pot", "623": "match", "624": "buses", "625": "ribs", "626": "socks", "627": "grater", "628": "pipes", "629": "railroad", "630": "tree branches", "631": "jersey", "632": "air conditioner", "633": "train station", "634": "pastry", "635": "bookcase", "636": "chinese food", "637": "outfit", "638": "bike", "639": "nut", "640": "onion rings", "641": "museum", "642": "tractor", "643": "toddler", "644": "cooking pot", "645": "mushrooms", "646": "mat", "647": "bleachers", "648": "stained", "649": "walking", "650": "full", "651": "uniform", "652": "power line", "653": "pumpkin", "654": "watermelon", "655": "crust", "656": "bush", "657": "picture frame", "658": "looking down", "659": "grapes", "660": "character", "661": "honey", "662": "olive", "663": "power lines", "664": "serving tray", "665": "dragon", "666": "toys", "667": "helicopter", "668": "bacon", "669": "eye glasses", "670": "sword", "671": "garage", "672": "women", "673": "sitting", "674": "light bulb", "675": "chicken breast", "676": "video camera", "677": "decorations", "678": "platform", "679": "hamburger", "680": "boots", "681": "porcelain", "682": "bare", "683": "soda", "684": "light", "685": "radio", "686": "stadium", "687": "pine tree", "688": "action figure", "689": "coach", "690": "pasture", "691": "seal", "692": "cooler", "693": "guitar", "694": "sandwiches", "695": "vegetable", "696": "curly", "697": "front", "698": "toothpaste", "699": "polo shirt", "700": "dotted", "701": "restaurant", "702": "life preserver", "703": "rackets", "704": "tomato sauce", "705": "rose", "706": "eggs", "707": "fan", "708": "bedding", "709": "zucchini", "710": "dried", "711": "closet", "712": "star", "713": "khaki", "714": "artichoke", "715": "peas", "716": "umbrellas", "717": "bull", "718": "sandal", "719": "lemons", "720": "painting", "721": "blinds", "722": "magazine", "723": "cookies", "724": "luggage", "725": "pig", "726": "snake", "727": "cappuccino", "728": "intersection", "729": "beans", "730": "school bus", "731": "square", "732": "trash can", "733": "briefcase", "734": "sunglasses", "735": "dining room", "736": "juice", "737": "dirt", "738": "baseball", "739": "toy", "740": "log", "741": "cherry", "742": "toothbrushes", "743": "tennis ball", "744": "book", "745": "narrow", "746": "skirt", "747": "fire hydrant", "748": "game", "749": "house", "750": "clock tower", "751": "chicken", "752": "stroller", "753": "mud", "754": "bronze", "755": "jeep", "756": "seagull", "757": "street light", "758": "traffic lights", "759": "strawberry", "760": "computer monitor", "761": "hallway", "762": "ambulance", "763": "costume", "764": "panda bear", "765": "market", "766": "wii", "767": "raw", "768": "still", "769": "pineapple", "770": "goat", "771": "trunk", "772": "vacuum", "773": "tent", "774": "curtain", "775": "dock", "776": "liquid", "777": "pitcher", "778": "cage", "779": "folding chair", "780": "console", "781": "ring", "782": "dry", "783": "workers", "784": "bell", "785": "shampoo bottle", "786": "new", "787": "bushy", "788": "heavy", "789": "cats", "790": "mangoes", "791": "dome", "792": "label", "793": "pasta salad", "794": "light fixture", "795": "can", "796": "traffic light", "797": "male", "798": "jockey", "799": "spectators", "800": "letter", "801": "town", "802": "sheet", "803": "powder", "804": "ice cream", "805": "toy car", "806": "video game", "807": "turkey", "808": "utensils", "809": "pond", "810": "cupcakes", "811": "riding", "812": "lime", "813": "sock", "814": "cucumber", "815": "saucer", "816": "plantains", "817": "spectator", "818": "rectangular", "819": "dugout", "820": "tea", "821": "dress shirt", "822": "owl", "823": "cables", "824": "mouse pad", "825": "rug", "826": "purse", "827": "ostrich", "828": "bottles", "829": "toppings", "830": "net", "831": "gun", "832": "cooked", "833": "adult", "834": "trash bag", "835": "undershirt", "836": "snowy", "837": "driving", "838": "fruit", "839": "symbol", "840": "receipt", "841": "ipod", "842": "figure", "843": "resting", "844": "soup", "845": "terminal", "846": "machine", "847": "cherries", "848": "pretzel", "849": "surfing", "850": "hose", "851": "binder", "852": "old", "853": "mother", "854": "sink", "855": "vine", "856": "appetizers", "857": "earring", "858": "porch", "859": "steam", "860": "calm", "861": "posing", "862": "brunette", "863": "bicycles", "864": "polar bear", "865": "paddle", "866": "microwave oven", "867": "garden", "868": "jackets", "869": "cauliflower", "870": "spices", "871": "farm", "872": "antelope", "873": "container", "874": "fence post", "875": "octopus", "876": "garnish", "877": "planter", "878": "stones", "879": "sheets", "880": "coke", "881": "bottle", "882": "swimsuit", "883": "spice", "884": "walnut", "885": "word", "886": "policeman", "887": "waffle", "888": "ottoman", "889": "desktop computer", "890": "cart", "891": "dryer", "892": "candy", "893": "skiing", "894": "broth", "895": "dip", "896": "milkshake", "897": "plates", "898": "wire", "899": "silverware", "900": "asian", "901": "office chair", "902": "outlet", "903": "projector", "904": "flags", "905": "partly cloudy", "906": "buns", "907": "sad", "908": "feta cheese", "909": "pasta", "910": "hard drive", "911": "balloon", "912": "tv stand", "913": "tank top", "914": "lunch box", "915": "stuffed dog", "916": "picnic table", "917": "real", "918": "pillows", "919": "rainy", "920": "air", "921": "berry", "922": "soft drink", "923": "nike", "924": "trucks", "925": "harbor", "926": "restroom", "927": "tower", "928": "post", "929": "squirrel", "930": "crosswalk", "931": "mixing bowl", "932": "ducks", "933": "crackers", "934": "wine bottle", "935": "soccer player", "936": "pouch", "937": "necklace", "938": "wicker", "939": "pecan", "940": "hills", "941": "mashed potatoes", "942": "straw", "943": "boulders", "944": "peach", "945": "snowboarder", "946": "tag", "947": "pizza slice", "948": "fountain", "949": "magazines", "950": "chrome", "951": "side table", "952": "alligator", "953": "salt", "954": "rocky", "955": "frosting", "956": "orchard", "957": "hospital", "958": "map", "959": "gas stove", "960": "raisin", "961": "metallic", "962": "words", "963": "monitors", "964": "sticky notes", "965": "popcorn", "966": "parmesan cheese", "967": "cafe", "968": "placemat", "969": "hard", "970": "platter", "971": "bar stool", "972": "passengers", "973": "parachute", "974": "avocado", "975": "vases", "976": "bracelet", "977": "games", "978": "making face", "979": "fried", "980": "black and white", "981": "chocolate chips", "982": "seagulls", "983": "scissors", "984": "dinosaur", "985": "outfits", "986": "soccer ball", "987": "shirts", "988": "office", "989": "employee", "990": "kites", "991": "ropes", "992": "bracelets", "993": "gym", "994": "bison", "995": "sushi", "996": "soldier", "997": "rifle", "998": "cracker", "999": "notebook", "1000": "mozzarella", "1001": "box", "1002": "cabbage", "1003": "onion ring", "1004": "trash", "1005": "palm tree", "1006": "coffee cups", "1007": "marina", "1008": "paintings", "1009": "rainbow colored", "1010": "chopsticks", "1011": "cross", "1012": "bikini", "1013": "bee", "1014": "paved", "1015": "frog", "1016": "burrito", "1017": "cheeseburger", "1018": "milk", "1019": "mug", "1020": "wristband", "1021": "dvds", "1022": "sweatshirt", "1023": "shark", "1024": "computers", "1025": "grilled", "1026": "pita", "1027": "burner", "1028": "train tracks", "1029": "sticker", "1030": "rhino", "1031": "parrot", "1032": "rough", "1033": "performer", "1034": "drapes", "1035": "courtyard", "1036": "decoration", "1037": "chandelier", "1038": "beverages", "1039": "sweet potatoes", "1040": "crumbs", "1041": "flour", "1042": "frame", "1043": "paint", "1044": "candles", "1045": "beverage", "1046": "cheesecake", "1047": "steel", "1048": "cream", "1049": "shopping center", "1050": "leafy", "1051": "brownie", "1052": "benches", "1053": "happy", "1054": "goats", "1055": "cactus", "1056": "charger", "1057": "antenna", "1058": "breakfast", "1059": "crate", "1060": "washing machine", "1061": "omelette", "1062": "packet", "1063": "beer mug", "1064": "wii controllers", "1065": "crab", "1066": "ripe", "1067": "feeder", "1068": "herb", "1069": "tunnel", "1070": "step", "1071": "flamingo", "1072": "muffin", "1073": "speakers", "1074": "baking pan", "1075": "carts", "1076": "cream cheese", "1077": "pigeons", "1078": "camel", "1079": "tool", "1080": "cafeteria", "1081": "gown", "1082": "blueberry", "1083": "jumpsuit", "1084": "crowd", "1085": "potato chips", "1086": "raisins", "1087": "minivan", "1088": "cobblestone", "1089": "dough", "1090": "panda", "1091": "brush", "1092": "wristwatch", "1093": "pizzas", "1094": "unpaved", "1095": "skateboards", "1096": "jars", "1097": "moss", "1098": "magnets", "1099": "jar", "1100": "hair", "1101": "hot dogs", "1102": "numbers", "1103": "penguin", "1104": "blazer", "1105": "roll", "1106": "figurines", "1107": "dolphin", "1108": "stairs", "1109": "safety jacket", "1110": "shrimp", "1111": "styrofoam", "1112": "officers", "1113": "soda bottle", "1114": "gummy bear", "1115": "cans", "1116": "watermelons", "1117": "wine glasses", "1118": "soldiers", "1119": "desert", "1120": "pine trees", "1121": "garlic", "1122": "lush", "1123": "gentleman", "1124": "dressing", "1125": "soap bottle", "1126": "unripe", "1127": "towels", "1128": "containers", "1129": "liquor", "1130": "murky", "1131": "whale", "1132": "potato salad", "1133": "waffles", "1134": "poodle", "1135": "hay", "1136": "yogurt", "1137": "sculpture", "1138": "alcohol", "1139": "tiles", "1140": "palm trees", "1141": "pajamas", "1142": "copper", "1143": "croissant", "1144": "swimming", "1145": "church", "1146": "bags", "1147": "snail", "1148": "diaper", "1149": "wavy", "1150": "library", "1151": "wool", "1152": "sprinkles", "1153": "fire extinguisher", "1154": "bowls", "1155": "light bulbs", "1156": "hats", "1157": "spoons", "1158": "peacock", "1159": "boxes", "1160": "upward", "1161": "eagle", "1162": "cinnamon roll", "1163": "granite", "1164": "roasted", "1165": "daughter", "1166": "foil", "1167": "icing", "1168": "peaches", "1169": "bath towel", "1170": "officer", "1171": "pesto", "1172": "telephone pole", "1173": "artwork", "1174": "bedspread", "1175": "caucasian", "1176": "bending", "1177": "female", "1178": "plain", "1179": "toaster oven", "1180": "walnuts", "1181": "triangular", "1182": "beet", "1183": "headband", "1184": "drawings", "1185": "beach chair", "1186": "donkeys", "1187": "below", "1188": "bread loaf", "1189": "paper towel", "1190": "gourd", "1191": "rotten", "1192": "mound", "1193": "whipped cream", "1194": "low", "1195": "parent", "1196": "bus stop", "1197": "bar stools", "1198": "gadget", "1199": "cakes", "1200": "phones", "1201": "cupboards", "1202": "wine bottles", "1203": "gravy", "1204": "covered", "1205": "cockpit", "1206": "mayonnaise", "1207": "marble", "1208": "cereal box", "1209": "butterfly", "1210": "kimono", "1211": "clocks", "1212": "tea pot", "1213": "food truck", "1214": "cords", "1215": "urinal", "1216": "bamboo", "1217": "peanut", "1218": "tissue box", "1219": "fire", "1220": "nutella", "1221": "ramekin", "1222": "leaf", "1223": "village", "1224": "name tag", "1225": "rolling pin", "1226": "olive oil", "1227": "hummus", "1228": "balls", "1229": "wines", "1230": "pizza shop", "1231": "pea", "1232": "goggles", "1233": "dragons", "1234": "drinks", "1235": "marshmallow", "1236": "audience", "1237": "dumplings", "1238": "traffic sign", "1239": "oreo", "1240": "raspberry", "1241": "skating", "1242": "patio", "1243": "bone", "1244": "classroom", "1245": "beer bottle", "1246": "chalkboard", "1247": "life jacket", "1248": "lemonade", "1249": "deck", "1250": "pancake", "1251": "cathedral", "1252": "toiletries", "1253": "backyard", "1254": "mall", "1255": "whisk", "1256": "brass", "1257": "vending machine", "1258": "island", "1259": "fog", "1260": "water bottle", "1261": "canopy", "1262": "drape", "1263": "topping", "1264": "parking sign", "1265": "antique", "1266": "mesh", "1267": "pens", "1268": "cowboy hat", "1269": "fruits", "1270": "cucumbers", "1271": "grapefruit", "1272": "fans", "1273": "meatballs", "1274": "houses", "1275": "under", "1276": "farmer", "1277": "crane", "1278": "hand dryer", "1279": "cowboy", "1280": "beds", "1281": "macaroni", "1282": "cheetah", "1283": "puddle", "1284": "stuffed animals", "1285": "coffee mug", "1286": "bakery", "1287": "lamps", "1288": "herbs", "1289": "bouquet", "1290": "hair clip", "1291": "cable", "1292": "biscuit", "1293": "cell phones", "1294": "tree leaves", "1295": "pizza pan", "1296": "drum", "1297": "raspberries", "1298": "ice maker", "1299": "shut", "1300": "cards", "1301": "pocket", "1302": "faucet", "1303": "guacamole", "1304": "coconut", "1305": "baseball players", "1306": "bug", "1307": "high", "1308": "brushing teeth", "1309": "ice", "1310": "toothpicks", "1311": "waiter", "1312": "tortilla", "1313": "spider", "1314": "snoopy", "1315": "weeds", "1316": "stew", "1317": "asphalt", "1318": "buoys", "1319": "family", "1320": "logs", "1321": "adidas", "1322": "underwear", "1323": "cliff", "1324": "sailboats", "1325": "robe", "1326": "casserole", "1327": "ketchup bottle", "1328": "teddy bears", "1329": "lock", "1330": "couches", "1331": "figurine", "1332": "pencil", "1333": "leafless", "1334": "drawing", "1335": "flip flops", "1336": "hippo", "1337": "paper dispenser", "1338": "cigarette", "1339": "barn", "1340": "hardwood", "1341": "staircase", "1342": "entrance", "1343": "windows", "1344": "picnic tables", "1345": "fudge", "1346": "performing trick", "1347": "blind", "1348": "vinegar", "1349": "beets", "1350": "curved", "1351": "away", "1352": "roast beef", "1353": "spray bottle", "1354": "chopstick", "1355": "soap dispenser", "1356": "dog food", "1357": "bus driver", "1358": "banana bunch", "1359": "dumpster", "1360": "twigs", "1361": "napkins", "1362": "bagels", "1363": "stage", "1364": "baskets", "1365": "ceramic", "1366": "pineapples", "1367": "street lights", "1368": "soap", "1369": "brownies", "1370": "christmas lights", "1371": "cameras", "1372": "fruit stand", "1373": "soda can", "1374": "hotdog bun", "1375": "fat", "1376": "pizza boxes", "1377": "melon", "1378": "customers", "1379": "athletic shoe", "1380": "peeled", "1381": "food container", "1382": "powdered sugar", "1383": "rice cooker", "1384": "spots", "1385": "sugar", "1386": "hair dryer", "1387": "tractors", "1388": "broom", "1389": "skin", "1390": "pillowcase", "1391": "smoothie", "1392": "ear buds", "1393": "garment", "1394": "soft", "1395": "walls", "1396": "ravioli", "1397": "seafood", "1398": "hammer", "1399": "sack", "1400": "blenders", "1401": "sponge", "1402": "sunflower", "1403": "cabin", "1404": "tuna", "1405": "beautiful", "1406": "heels", "1407": "butter", "1408": "scooters", "1409": "wardrobe", "1410": "taking pictures", "1411": "forks", "1412": "lambs", "1413": "tin", "1414": "cat food", "1415": "engineer", "1416": "oatmeal", "1417": "clay", "1418": "butterflies", "1419": "team", "1420": "lipstick", "1421": "ladle", "1422": "food processor", "1423": "wok", "1424": "shelter", "1425": "lobster", "1426": "snacks", "1427": "vests", "1428": "face mask", "1429": "peanut butter", "1430": "balloons", "1431": "peanuts", "1432": "wallpaper", "1433": "cranberries", "1434": "crown", "1435": "caramel", "1436": "floor lamp", "1437": "shower curtain", "1438": "blankets", "1439": "hangar", "1440": "surfboards", "1441": "meal", "1442": "wolf", "1443": "gifts", "1444": "father", "1445": "cd", "1446": "chains", "1447": "tourist", "1448": "canister", "1449": "spear", "1450": "pilot", "1451": "mountain side", "1452": "pencils", "1453": "trumpet", "1454": "knives", "1455": "mango", "1456": "magnet", "1457": "guys", "1458": "satellite dish", "1459": "table lamp", "1460": "keyboards", "1461": "swimmer", "1462": "stump", "1463": "amusement park", "1464": "goal", "1465": "roadside", "1466": "wig", "1467": "chickens", "1468": "card", "1469": "looking up", "1470": "aluminum", "1471": "pandas", "1472": "soap dish", "1473": "pomegranate", "1474": "tourists", "1475": "parrots", "1476": "toilet brush", "1477": "remote controls", "1478": "suits", "1479": "cotton", "1480": "tents", "1481": "water glass", "1482": "healthy", "1483": "envelope", "1484": "baking sheet", "1485": "marker", "1486": "muffins", "1487": "salon", "1488": "snow flakes", "1489": "dry erase board", "1490": "fishing pole", "1491": "lighthouse", "1492": "earphones", "1493": "photographer", "1494": "gorilla", "1495": "seeds", "1496": "sticks", "1497": "shopping cart", "1498": "pears", "1499": "alarm clock", "1500": "tree branch", "1501": "almonds", "1502": "theater", "1503": "tiger", "1504": "forward", "1505": "temple", "1506": "hedge", "1507": "kitchen towel", "1508": "motorcycles", "1509": "garland", "1510": "pudding", "1511": "vintage", "1512": "coarse", "1513": "swans", "1514": "pretzels", "1515": "swamp", "1516": "dense", "1517": "auditorium", "1518": "daisy", "1519": "dish soap", "1520": "opaque", "1521": "french toast", "1522": "straight", "1523": "tennis balls", "1524": "orchid", "1525": "champagne", "1526": "pizza pie", "1527": "egg roll", "1528": "flatbread", "1529": "coconuts", "1530": "flip flop", "1531": "skyscraper", "1532": "fur", "1533": "denim", "1534": "scaffolding", "1535": "coin", "1536": "policemen", "1537": "stuffed bears", "1538": "cane", "1539": "cloth", "1540": "cake stand", "1541": "pump", "1542": "soccer", "1543": "loaf", "1544": "knife block", "1545": "ski lift", "1546": "modern", "1547": "old fashioned", "1548": "cotton dessert", "1549": "trays", "1550": "smooth", "1551": "lunch", "1552": "dull", "1553": "shield", "1554": "dinner", "1555": "cloths", "1556": "waterfall", "1557": "waste basket", "1558": "scarce", "1559": "shaking hands", "1560": "salt shaker", "1561": "pocket watch", "1562": "unhealthy", "1563": "lounge", "1564": "moose", "1565": "seaweed", "1566": "panda bears", "1567": "candies", "1568": "batteries", "1569": "comb", "1570": "wallet", "1571": "students", "1572": "school", "1573": "geese", "1574": "apartment building", "1575": "stars", "1576": "granola", "1577": "leopard", "1578": "cardboard", "1579": "shoe laces", "1580": "hairbrush", "1581": "chef hat", "1582": "crystal", "1583": "pizza tray", "1584": "bread box", "1585": "luggage cart", "1586": "apartment", "1587": "angry", "1588": "characters", "1589": "oak tree", "1590": "angry bird", "1591": "backpacks", "1592": "shaving cream", "1593": "cemetery", "1594": "lace", "1595": "anchovies", "1596": "dresses", "1597": "paper towels", "1598": "garage door", "1599": "vanilla", "1600": "uncooked", "1601": "battery", "1602": "butter knife", "1603": "mint", "1604": "package", "1605": "biscuits", "1606": "son", "1607": "cake pan", "1608": "snack", "1609": "riding boots", "1610": "rooftop", "1611": "irregular", "1612": "baked", "1613": "kittens", "1614": "sconce", "1615": "serving dish", "1616": "mirrors", "1617": "taking photo", "1618": "bubble", "1619": "printers", "1620": "ice cube", "1621": "knee pads", "1622": "doors", "1623": "ceiling light", "1624": "cotton candy", "1625": "helmets", "1626": "cheese cube", "1627": "bartender", "1628": "pistachio", "1629": "ugly", "1630": "sausages", "1631": "beer can", "1632": "baker", "1633": "coffee beans", "1634": "almond", "1635": "ovens", "1636": "curled", "1637": "underneath", "1638": "suitcases", "1639": "food", "1640": "taking bath", "1641": "vendor", "1642": "lizard", "1643": "homes", "1644": "shops", "1645": "mannequins", "1646": "turtle", "1647": "blossom", "1648": "chickpeas", "1649": "outside", "1650": "ornament", "1651": "milk carton", "1652": "mexican food", "1653": "seed", "1654": "avocados", "1655": "masks", "1656": "pumpkins", "1657": "papaya", "1658": "stapler", "1659": "hamburgers", "1660": "earrings", "1661": "back", "1662": "wildflowers", "1663": "bats", "1664": "hand soap", "1665": "fresh", "1666": "manhole cover", "1667": "dolphins", "1668": "thermometer", "1669": "castle", "1670": "cones", "1671": "pizza cutter", "1672": "pizza box", "1673": "heel", "1674": "salmon", "1675": "door frame", "1676": "taco", "1677": "pork", "1678": "wedding", "1679": "bubbles", "1680": "eiffel tower", "1681": "cranberry", "1682": "napkin dispenser", "1683": "bandage", "1684": "elmo", "1685": "notepad", "1686": "pepper shaker", "1687": "artichokes", "1688": "tools", "1689": "window frame", "1690": "steamed", "1691": "groceries", "1692": "lily", "1693": "cookbook", "1694": "paper container", "1695": "hippos", "1696": "hilltop", "1697": "twig", "1698": "animal", "1699": "wii game", "1700": "beads", "1701": "lilies", "1702": "towel dispenser", "1703": "blood", "1704": "ladles", "1705": "jewelry", "1706": "hearts", "1707": "snow boots", "1708": "ahead", "1709": "utensil holder", "1710": "football", "1711": "bird cage", "1712": "dish drainer", "1713": "cds", "1714": "banana peel", "1715": "vines", "1716": "pizza crust", "1717": "shopper", "1718": "tags", "1719": "keypad", "1720": "dinosaurs", "1721": "stir fry", "1722": "bomb", "1723": "necklaces", "1724": "packages", "1725": "uniforms", "1726": "sparse", "1727": "unhappy", "1728": "control panel", "1729": "antennas", "1730": "spray can", "1731": "feathers", "1732": "electric toothbrush", "1733": "potted", "1734": "juice box", "1735": "toolbox", "1736": "visitor", "1737": "ornaments", "1738": "sign post", "1739": "baseball mitt", "1740": "robot", "1741": "blackberries", "1742": "desk lamp", "1743": "glaze", "1744": "melons", "1745": "cookie dough", "1746": "paint brush", "1747": "mustard bottle", "1748": "apple logo", "1749": "salad dressing", "1750": "mattresses", "1751": "cash register", "1752": "nest", "1753": "knee pad", "1754": "out", "1755": "toasted", "1756": "price tag", "1757": "canisters", "1758": "christmas light", "1759": "antelopes", "1760": "dream catcher", "1761": "student", "1762": "fine", "1763": "kangaroo", "1764": "smoke stack", "1765": "music", "1766": "cages", "1767": "soccer balls", "1768": "ostriches", "1769": "coffee shop", "1770": "ice cubes", "1771": "downward", "1772": "televisions", "1773": "candle holder", "1774": "grinder", "1775": "xbox controller", "1776": "cricket", "1777": "hurdle", "1778": "obstacle", "1779": "lab coat", "1780": "gas pump", "1781": "banana bunches", "1782": "bell tower", "1783": "waitress", "1784": "in mirror", "1785": "coats", "1786": "attic", "1787": "sugar packet", "1788": "taking photograph", "1789": "mountain peak", "1790": "pub", "1791": "silk", "1792": "blossoms", "1793": "pillars", "1794": "scrub brush", "1795": "kiwis", "1796": "octagonal", "1797": "parachutes", "1798": "lions", "1799": "sideways", "1800": "egg carton", "1801": "visitors", "1802": "sunflowers", "1803": "shoe lace", "1804": "rhinos", "1805": "elbow pad", "1806": "egg yolk", "1807": "outlets", "1808": "baseball bats", "1809": "life jackets", "1810": "snakes", "1811": "vitamins", "1812": "cigar", "1813": "upwards", "1814": "beneath", "1815": "taking photos", "1816": "storage box", "1817": "armor", "1818": "cookie jar", "1819": "rounded", "1820": "seat belt", "1821": "owls", "1822": "appetizer", "1823": "beer cans", "1824": "stores", "1825": "shoppers", "1826": "bird house", "1827": "sugar packets", "1828": "wild", "1829": "dvd players", "1830": "towers", "1831": "water bottles", "1832": "waves", "1833": "pikachu", "1834": "wolves", "1835": "immature", "1836": "shampoo", "1837": "orchids", "1838": "elevator", "1839": "taking notes", "1840": "wave", "1841": "horse hoof", "1842": "bottle cap"}, {"PAD": 0, "UNK": 1, "CLS": 2, "is": 3, "the": 4, "sky": 5, "dark": 6, "what": 7, "on": 8, "white": 9, "wall": 10, "that": 11, "pipe": 12, "red": 13, "tall": 14, "clock": 15, "small": 16, "or": 17, "large": 18, "who": 19, "wearing": 20, "a": 21, "shirt": 22, "do": 23, "you": 24, "think": 25, "he": 26, "sleeping": 27, "in": 28, "cheese": 29, "to": 30, "left": 31, "of": 32, "food": 33, "plate": 34, "piece": 35, "furniture": 36, "kind": 37, "right": 38, "chair": 39, "steel": 40, "spatula": 41, "top": 42, "image": 43, "woman": 44, "man": 45, "color": 46, "are": 47, "there": 48, "any": 49, "umbrellas": 50, "motorcycles": 51, "photograph": 52, "bird": 53, "coat": 54, "either": 55, "traffic": 56, "light": 57, "stop": 58, "sign": 59, "which": 60, "cups": 61, "hanging": 62, "cup": 63, "people": 64, "mirror": 65, "part": 66, "both": 67, "bikes": 68, "and": 69, "cars": 70, "this": 71, "scene": 72, "animal": 73, "sits": 74, "bench": 75, "side": 76, "ground": 77, "made": 78, "jeans": 79, "look": 80, "bicycles": 81, "helmets": 82, "skier": 83, "helmet": 84, "children": 85, "picture": 86, "rock": 87, "tools": 88, "kids": 89, "grapefruit": 90, "hair": 91, "street": 92, "where": 93, "photo": 94, "green": 95, "chairs": 96, "bottom": 97, "field": 98, "see": 99, "horses": 100, "eating": 101, "from": 102, "grass": 103, "striped": 104, "ripe": 105, "bananas": 106, "above": 107, "newspaper": 108, "device": 109, "table": 110, "laptop": 111, "couch": 112, "fast": 113, "looks": 114, "does": 115, "seem": 116, "be": 117, "wooden": 118, "bread": 119, "box": 120, "fruit": 121, "called": 122, "fork": 123, "ski": 124, "napkin": 125, "stuffed": 126, "dog": 127, "front": 128, "cell": 129, "phone": 130, "whats": 131, "doing": 132, "tree": 133, "little": 134, "curtain": 135, "soccer": 136, "ball": 137, "have": 138, "different": 139, "colors": 140, "pillow": 141, "bed": 142, "post": 143, "standing": 144, "person": 145, "surf": 146, "board": 147, "wear": 148, "glasses": 149, "how": 150, "rug": 151, "silver": 152, "pink": 153, "sofa": 154, "bicycle": 155, "surfboard": 156, "middle": 157, "pedestrian": 158, "scooters": 159, "bag": 160, "car": 161, "cone": 162, "elbow": 163, "pad": 164, "other": 165, "black": 166, "giraffes": 167, "elephants": 168, "oven": 169, "customer": 170, "under": 171, "yellow": 172, "window": 173, "girl": 174, "pants": 175, "size": 176, "near": 177, "watching": 178, "running": 179, "trousers": 180, "feathers": 181, "blue": 182, "keyboard": 183, "metallic": 184, "boxes": 185, "apples": 186, "bike": 187, "holding": 188, "bat": 189, "wardrobe": 190, "gray": 191, "umpire": 192, "cooking": 193, "utensil": 194, "faucet": 195, "utensils": 196, "tray": 197, "type": 198, "step": 199, "place": 200, "orange": 201, "fries": 202, "vehicle": 203, "isnt": 204, "bottle": 205, "pedestrians": 206, "surfboards": 207, "freezer": 208, "remote": 209, "item": 210, "television": 211, "cabinet": 212, "happy": 213, "baby": 214, "statue": 215, "curtains": 216, "truck": 217, "highway": 218, "socks": 219, "inside": 220, "boots": 221, "bags": 222, "sidewalk": 223, "cap": 224, "church": 225, "women": 226, "men": 227, "same": 228, "shoe": 229, "lace": 230, "lamps": 231, "books": 232, "backpack": 233, "vegetable": 234, "knee": 235, "pads": 236, "backpacks": 237, "umbrella": 238, "behind": 239, "spectator": 240, "fences": 241, "mirrors": 242, "not": 243, "brown": 244, "bleachers": 245, "by": 246, "lamb": 247, "it": 248, "elephant": 249, "walking": 250, "closed": 251, "open": 252, "coffee": 253, "tables": 254, "glass": 255, "container": 256, "cow": 257, "containers": 258, "peppers": 259, "tomatoes": 260, "frosting": 261, "fence": 262, "pillows": 263, "towels": 264, "balancing": 265, "skateboard": 266, "tent": 267, "sneakers": 268, "metal": 269, "pizza": 270, "cart": 271, "sweater": 272, "mushroom": 273, "blanket": 274, "animals": 275, "officers": 276, "sitting": 277, "hats": 278, "scarves": 279, "parking": 280, "lot": 281, "tea": 282, "kettle": 283, "meat": 284, "spinach": 285, "sandwich": 286, "name": 287, "shopping": 288, "counter": 289, "cat": 290, "clear": 291, "overcast": 292, "breads": 293, "paper": 294, "vegetables": 295, "carrots": 296, "folding": 297, "vases": 298, "spoon": 299, "plastic": 300, "kite": 301, "triangular": 302, "shape": 303, "wears": 304, "appliance": 305, "baked": 306, "good": 307, "platform": 308, "trains": 309, "player": 310, "flag": 311, "sandal": 312, "looking": 313, "at": 314, "weather": 315, "building": 316, "toilet": 317, "than": 318, "dryer": 319, "guys": 320, "frisbee": 321, "bicyclist": 322, "center": 323, "next": 324, "lady": 325, "tents": 326, "lights": 327, "pole": 328, "desks": 329, "lying": 330, "kayak": 331, "wood": 332, "these": 333, "species": 334, "hot": 335, "clothing": 336, "saucer": 337, "mug": 338, "jacket": 339, "crossing": 340, "road": 341, "toy": 342, "teddy": 343, "bear": 344, "doll": 345, "doors": 346, "windows": 347, "shelf": 348, "swinging": 349, "racket": 350, "skateboarders": 351, "air": 352, "toothbrush": 353, "full": 354, "sheep": 355, "purple": 356, "palm": 357, "van": 358, "chain": 359, "train": 360, "ovens": 361, "cabinets": 362, "printers": 363, "desk": 364, "frisbees": 365, "fruits": 366, "walnuts": 367, "kid": 368, "indoors": 369, "pot": 370, "pan": 371, "an": 372, "tiles": 373, "pans": 374, "horse": 375, "bowl": 376, "lid": 377, "around": 378, "parachutes": 379, "kites": 380, "biscuit": 381, "down": 382, "hold": 383, "dining": 384, "motorbike": 385, "driving": 386, "tvs": 387, "pen": 388, "plants": 389, "child": 390, "skirt": 391, "beach": 392, "machine": 393, "cats": 394, "sand": 395, "nightstand": 396, "beds": 397, "hat": 398, "cones": 399, "shorts": 400, "big": 401, "plant": 402, "two": 403, "material": 404, "as": 405, "outdoors": 406, "lamp": 407, "cooked": 408, "platter": 409, "presented": 410, "toilets": 411, "shower": 412, "carpet": 413, "beige": 414, "flowers": 415, "benches": 416, "leaves": 417, "logo": 418, "teal": 419, "soap": 420, "bottles": 421, "plates": 422, "vase": 423, "boy": 424, "snow": 425, "refrigerators": 426, "towel": 427, "fridge": 428, "wig": 429, "coats": 430, "placemats": 431, "clocks": 432, "cafe": 433, "dress": 434, "pepper": 435, "shakers": 436, "locks": 437, "plane": 438, "airplane": 439, "cupboards": 440, "end": 441, "cakes": 442, "giraffe": 443, "bucket": 444, "candles": 445, "using": 446, "door": 447, "colorful": 448, "blond": 449, "has": 450, "adidas": 451, "computer": 452, "mouse": 453, "skis": 454, "crate": 455, "instrument": 456, "ladders": 457, "piano": 458, "pictures": 459, "house": 460, "could": 461, "pulls": 462, "stove": 463, "lemon": 464, "cucumber": 465, "square": 466, "smooth": 467, "him": 468, "snowy": 469, "sandy": 470, "skiing": 471, "goggles": 472, "bacon": 473, "can": 474, "donuts": 475, "bus": 476, "laptops": 477, "video": 478, "cameras": 479, "liquid": 480, "bun": 481, "banana": 482, "mannequins": 483, "napkins": 484, "sit": 485, "she": 486, "tiny": 487, "old": 488, "hedges": 489, "feeding": 490, "shown": 491, "boot": 492, "shoes": 493, "necktie": 494, "ties": 495, "ladder": 496, "covering": 497, "carrot": 498, "brunette": 499, "magazines": 500, "pieces": 501, "book": 502, "groomed": 503, "onion": 504, "for": 505, "umpires": 506, "potatoes": 507, "belt": 508, "glove": 509, "motorcycle": 510, "bridge": 511, "trash": 512, "bin": 513, "pattern": 514, "batter": 515, "liquor": 516, "kittens": 517, "clean": 518, "bright": 519, "bookcases": 520, "tissue": 521, "drinking": 522, "buildings": 523, "tan": 524, "pictured": 525, "dressing": 526, "soup": 527, "salad": 528, "watercraft": 529, "water": 530, "bush": 531, "riding": 532, "suitcases": 533, "iron": 534, "branches": 535, "trays": 536, "donkey": 537, "computers": 538, "with": 539, "watch": 540, "drawer": 541, "gas": 542, "boys": 543, "bears": 544, "dishwasher": 545, "below": 546, "curly": 547, "asparaguss": 548, "mans": 549, "uses": 550, "tool": 551, "wolves": 552, "scarf": 553, "taxi": 554, "common": 555, "spoons": 556, "comforter": 557, "appear": 558, "athletic": 559, "sock": 560, "murky": 561, "wavy": 562, "duck": 563, "swimming": 564, "game": 565, "tshirt": 566, "pitcher": 567, "t": 568, "microwaves": 569, "sheets": 570, "carrying": 571, "wheels": 572, "ice": 573, "makers": 574, "wires": 575, "olives": 576, "foggy": 577, "stands": 578, "types": 579, "gate": 580, "wire": 581, "hospital": 582, "catcher": 583, "long": 584, "sleeved": 585, "blouse": 586, "drives": 587, "hook": 588, "flags": 589, "located": 590, "fireplace": 591, "butter": 592, "knives": 593, "crates": 594, "waiting": 595, "cake": 596, "knife": 597, "runs": 598, "sugar": 599, "packets": 600, "brush": 601, "covered": 602, "map": 603, "skateboarder": 604, "cookie": 605, "pots": 606, "blender": 607, "apple": 608, "carriage": 609, "speaker": 610, "guy": 611, "balls": 612, "trees": 613, "moss": 614, "stick": 615, "motorbikes": 616, "gold": 617, "trucks": 618, "buses": 619, "snowboarding": 620, "boat": 621, "shrub": 622, "ropes": 623, "basket": 624, "restaurant": 625, "crab": 626, "seat": 627, "bookshelf": 628, "signal": 629, "candies": 630, "cranberries": 631, "dried": 632, "bell": 633, "younger": 634, "tomato": 635, "thing": 636, "cloudy": 637, "round": 638, "snowpants": 639, "marina": 640, "bookcase": 641, "wine": 642, "sheet": 643, "pillowcase": 644, "stapler": 645, "ketchup": 646, "zebra": 647, "gloves": 648, "papers": 649, "out": 650, "denim": 651, "kitten": 652, "comfortable": 653, "necklace": 654, "screen": 655, "tablecloth": 656, "calm": 657, "choppy": 658, "deer": 659, "hose": 660, "nightstands": 661, "forks": 662, "ring": 663, "soft": 664, "drinks": 665, "peacocks": 666, "dogs": 667, "beneath": 668, "dragon": 669, "syrup": 670, "mobile": 671, "dirty": 672, "contains": 673, "silverware": 674, "shop": 675, "mixer": 676, "microwave": 677, "control": 678, "american": 679, "rope": 680, "perched": 681, "words": 682, "mailbox": 683, "beside": 684, "handbag": 685, "pavement": 686, "underneath": 687, "squirrel": 688, "outfit": 689, "bridges": 690, "bulbs": 691, "refrigerator": 692, "rectangular": 693, "short": 694, "store": 695, "womans": 696, "up": 697, "wide": 698, "dish": 699, "beverage": 700, "cigarettes": 701, "mats": 702, "alien": 703, "male": 704, "moving": 705, "cowboy": 706, "engine": 707, "screens": 708, "keyboards": 709, "clouds": 710, "ocean": 711, "mugs": 712, "wineglass": 713, "lipstick": 714, "lift": 715, "tennis": 716, "squash": 717, "flower": 718, "pastry": 719, "walks": 720, "berry": 721, "shelves": 722, "hydrant": 723, "fire": 724, "playing": 725, "baseball": 726, "river": 727, "vacuum": 728, "cleaner": 729, "floor": 730, "camera": 731, "bikers": 732, "bartender": 733, "parked": 734, "surfing": 735, "bells": 736, "kitchen": 737, "alarm": 738, "telephone": 739, "was": 740, "taken": 741, "wicker": 742, "brooms": 743, "vehicles": 744, "onions": 745, "apartment": 746, "hill": 747, "cutting": 748, "eyes": 749, "hills": 750, "wii": 751, "controller": 752, "extinguisher": 753, "items": 754, "countertop": 755, "leaning": 756, "performing": 757, "trick": 758, "swim": 759, "suit": 760, "wet": 761, "airplanes": 762, "skinny": 763, "sandals": 764, "rhinos": 765, "steps": 766, "seagull": 767, "headboard": 768, "mattress": 769, "dresser": 770, "quilt": 771, "number": 772, "walk": 773, "grazing": 774, "drawers": 775, "reflected": 776, "dessert": 777, "baking": 778, "houses": 779, "length": 780, "beard": 781, "cord": 782, "scooter": 783, "like": 784, "letters": 785, "donut": 786, "sprinkles": 787, "birds": 788, "wagon": 789, "croissant": 790, "puppys": 791, "checkered": 792, "coins": 793, "used": 794, "make": 795, "broccoli": 796, "cows": 797, "zebras": 798, "toasts": 799, "eggs": 800, "comforters": 801, "filled": 802, "crosswalk": 803, "gender": 804, "armchair": 805, "pastries": 806, "tee": 807, "hay": 808, "pouch": 809, "rackets": 810, "dispenser": 811, "thick": 812, "crust": 813, "lettuce": 814, "chicken": 815, "pepperoni": 816, "tongs": 817, "clothes": 818, "cds": 819, "turned": 820, "off": 821, "throwing": 822, "snowboarder": 823, "soda": 824, "electrical": 825, "outlet": 826, "pines": 827, "panda": 828, "pizzas": 829, "microphone": 830, "swan": 831, "aircraft": 832, "all": 833, "washer": 834, "stainless": 835, "monkey": 836, "cans": 837, "safety": 838, "vests": 839, "sticker": 840, "garbage": 841, "surrounding": 842, "yard": 843, "bushes": 844, "bar": 845, "stools": 846, "collar": 847, "beans": 848, "mushrooms": 849, "cabbage": 850, "branch": 851, "pulled": 852, "cubes": 853, "stand": 854, "suitcase": 855, "sponge": 856, "golden": 857, "cream": 858, "passengers": 859, "ham": 860, "cupboard": 861, "squashes": 862, "mice": 863, "headphones": 864, "cab": 865, "city": 866, "uniform": 867, "bakery": 868, "juice": 869, "sliced": 870, "ridged": 871, "burger": 872, "purse": 873, "mask": 874, "paddle": 875, "paddles": 876, "routers": 877, "calculators": 878, "dressed": 879, "armors": 880, "pliers": 881, "blonde": 882, "phones": 883, "girls": 884, "cellphone": 885, "tourist": 886, "drink": 887, "salt": 888, "shaker": 889, "pickles": 890, "uncooked": 891, "buns": 892, "bathroom": 893, "living": 894, "room": 895, "tank": 896, "steering": 897, "wheel": 898, "tablecloths": 899, "rough": 900, "bowls": 901, "potato": 902, "wallet": 903, "gravel": 904, "shampoos": 905, "ladys": 906, "bracelet": 907, "shields": 908, "airport": 909, "furry": 910, "sailboat": 911, "briefcases": 912, "couches": 913, "blazer": 914, "holds": 915, "sink": 916, "opens": 917, "jet": 918, "star": 919, "heart": 920, "snowboard": 921, "produce": 922, "hippos": 923, "dvd": 924, "tv": 925, "her": 926, "planter": 927, "wristband": 928, "controls": 929, "flying": 930, "staring": 931, "envelopes": 932, "pine": 933, "sheeps": 934, "makes": 935, "kick": 936, "kicks": 937, "chocolate": 938, "coconut": 939, "sculpture": 940, "rounded": 941, "garden": 942, "sea": 943, "foam": 944, "lies": 945, "touching": 946, "headband": 947, "toothbrushes": 948, "floating": 949, "policeman": 950, "apron": 951, "slices": 952, "mashed": 953, "corn": 954, "leggings": 955, "balloon": 956, "dumpster": 957, "spectators": 958, "colored": 959, "cereal": 960, "jersey": 961, "pulling": 962, "walls": 963, "trunk": 964, "station": 965, "asparagus": 966, "leather": 967, "rubber": 968, "net": 969, "eats": 970, "notebook": 971, "boars": 972, "rabbits": 973, "sponges": 974, "shade": 975, "soaps": 976, "drainers": 977, "merchandise": 978, "cash": 979, "registers": 980, "shallow": 981, "deep": 982, "life": 983, "hand": 984, "public": 985, "mustard": 986, "rugs": 987, "cages": 988, "menu": 989, "object": 990, "vending": 991, "older": 992, "tractor": 993, "chrome": 994, "tie": 995, "catching": 996, "cover": 997, "pens": 998, "undershirt": 999, "chimney": 1000, "zoo": 1001, "peanuts": 1002, "olive": 1003, "monitor": 1004, "scissors": 1005, "word": 1006, "printed": 1007, "blinds": 1008, "mat": 1009, "huge": 1010, "barn": 1011, "cloth": 1012, "console": 1013, "crouching": 1014, "chewing": 1015, "sauce": 1016, "ship": 1017, "grapes": 1018, "through": 1019, "forest": 1020, "radiator": 1021, "hangs": 1022, "young": 1023, "passenger": 1024, "concrete": 1025, "polar": 1026, "topped": 1027, "nose": 1028, "flamingoes": 1029, "sausage": 1030, "cupcake": 1031, "canopy": 1032, "goat": 1033, "cloths": 1034, "poster": 1035, "conditions": 1036, "printer": 1037, "leans": 1038, "against": 1039, "pilot": 1040, "trailer": 1041, "oval": 1042, "wristwatch": 1043, "placemat": 1044, "basil": 1045, "dirt": 1046, "brief": 1047, "case": 1048, "jar": 1049, "pecan": 1050, "eye": 1051, "clips": 1052, "flames": 1053, "home": 1054, "bending": 1055, "lock": 1056, "tape": 1057, "sack": 1058, "balloons": 1059, "frame": 1060, "raw": 1061, "onto": 1062, "trashcan": 1063, "aquarium": 1064, "spider": 1065, "tap": 1066, "bees": 1067, "mother": 1068, "goose": 1069, "parachute": 1070, "tower": 1071, "wetsuit": 1072, "talking": 1073, "entering": 1074, "skateboards": 1075, "narrow": 1076, "ipod": 1077, "bare": 1078, "canister": 1079, "garland": 1080, "picnic": 1081, "driver": 1082, "brick": 1083, "envelope": 1084, "seems": 1085, "healthier": 1086, "chimneys": 1087, "drain": 1088, "bathtub": 1089, "objects": 1090, "trunks": 1091, "rocks": 1092, "direction": 1093, "empty": 1094, "fisherman": 1095, "canoe": 1096, "decorated": 1097, "blueberries": 1098, "raspberries": 1099, "caps": 1100, "reading": 1101, "snowboards": 1102, "park": 1103, "entertainment": 1104, "devices": 1105, "painting": 1106, "ducks": 1107, "blankets": 1108, "skating": 1109, "ear": 1110, "buds": 1111, "pipes": 1112, "jumping": 1113, "stars": 1114, "beer": 1115, "batters": 1116, "players": 1117, "hows": 1118, "bats": 1119, "maker": 1120, "egg": 1121, "sandwiches": 1122, "lions": 1123, "radio": 1124, "mickey": 1125, "cutter": 1126, "tail": 1127, "coleslaw": 1128, "dry": 1129, "televisions": 1130, "mud": 1131, "venue": 1132, "pigeons": 1133, "gulls": 1134, "uncomfortable": 1135, "luggage": 1136, "oranges": 1137, "oar": 1138, "wreath": 1139, "giant": 1140, "toys": 1141, "desserts": 1142, "crackers": 1143, "posing": 1144, "sculptures": 1145, "toddler": 1146, "pig": 1147, "marble": 1148, "avocado": 1149, "peeled": 1150, "unpeeled": 1151, "vest": 1152, "leading": 1153, "still": 1154, "meal": 1155, "fur": 1156, "toppings": 1157, "over": 1158, "rice": 1159, "barefoot": 1160, "roof": 1161, "tin": 1162, "mountain": 1163, "wrist": 1164, "band": 1165, "planters": 1166, "rings": 1167, "snacks": 1168, "faucets": 1169, "tracks": 1170, "heavy": 1171, "wrinkled": 1172, "couple": 1173, "facing": 1174, "stone": 1175, "represent": 1176, "resting": 1177, "figurine": 1178, "urinal": 1179, "thin": 1180, "showing": 1181, "blueberry": 1182, "money": 1183, "modern": 1184, "tire": 1185, "arrow": 1186, "ingredient": 1187, "lake": 1188, "waiters": 1189, "articles": 1190, "heater": 1191, "skateboarding": 1192, "sprinkled": 1193, "pie": 1194, "engineers": 1195, "toddlers":
gitextract_8y0cfniw/
├── .gitignore
├── Dockerfile
├── LICENSE
├── README.md
├── configs/
│ ├── clevr/
│ │ └── mcan_small.yml
│ ├── gqa/
│ │ ├── ban_4.yml
│ │ ├── ban_8.yml
│ │ ├── butd.yml
│ │ ├── mcan_large.yml
│ │ └── mcan_small.yml
│ └── vqa/
│ ├── ban_4.yml
│ ├── ban_8.yml
│ ├── butd.yml
│ ├── mcan_large.yml
│ ├── mcan_small.yml
│ ├── mfb.yml
│ ├── mfh.yml
│ ├── mmnasnet_large.yml
│ └── mmnasnet_small.yml
├── data/
│ ├── clevr/
│ │ ├── clevr_extract_feat.py
│ │ ├── feats/
│ │ │ └── .gitkeep
│ │ └── raw/
│ │ └── .gitkeep
│ ├── gqa/
│ │ ├── feats/
│ │ │ └── .gitkeep
│ │ ├── gqa_feat_preproc.py
│ │ └── raw/
│ │ └── .gitkeep
│ └── vqa/
│ ├── feats/
│ │ └── .gitkeep
│ └── raw/
│ └── .gitkeep
├── docs/
│ ├── Makefile
│ ├── _source/
│ │ ├── _static/
│ │ │ ├── custom.css
│ │ │ ├── mathjax_mathml.user.js
│ │ │ └── mathjax_wikipedia.user.js
│ │ ├── advanced/
│ │ │ ├── adding_model.md
│ │ │ └── contributing.md
│ │ ├── basic/
│ │ │ ├── getting_started.md
│ │ │ ├── install.md
│ │ │ └── model_zoo.md
│ │ ├── conf.py
│ │ └── index.rst
│ ├── _templates/
│ │ └── layout.html
│ ├── make.bat
│ ├── readme.md
│ └── requirements.txt
├── openvqa/
│ ├── core/
│ │ ├── base_cfgs.py
│ │ ├── base_dataset.py
│ │ └── path_cfgs.py
│ ├── datasets/
│ │ ├── clevr/
│ │ │ ├── clevr_loader.py
│ │ │ └── eval/
│ │ │ └── result_eval.py
│ │ ├── dataset_loader.py
│ │ ├── gqa/
│ │ │ ├── dicts.json
│ │ │ ├── eval/
│ │ │ │ ├── gqa_eval.py
│ │ │ │ └── result_eval.py
│ │ │ └── gqa_loader.py
│ │ └── vqa/
│ │ ├── answer_dict.json
│ │ ├── eval/
│ │ │ ├── result_eval.py
│ │ │ ├── vqa.py
│ │ │ └── vqaEval.py
│ │ └── vqa_loader.py
│ ├── models/
│ │ ├── ban/
│ │ │ ├── adapter.py
│ │ │ ├── ban.py
│ │ │ ├── model_cfgs.py
│ │ │ └── net.py
│ │ ├── butd/
│ │ │ ├── adapter.py
│ │ │ ├── model_cfgs.py
│ │ │ ├── net.py
│ │ │ └── tda.py
│ │ ├── mcan/
│ │ │ ├── adapter.py
│ │ │ ├── mca.py
│ │ │ ├── model_cfgs.py
│ │ │ └── net.py
│ │ ├── mfb/
│ │ │ ├── adapter.py
│ │ │ ├── mfb.py
│ │ │ ├── model_cfgs.py
│ │ │ └── net.py
│ │ ├── mmnasnet/
│ │ │ ├── adapter.py
│ │ │ ├── model_cfgs.py
│ │ │ ├── nasnet.py
│ │ │ └── net.py
│ │ └── model_loader.py
│ ├── ops/
│ │ ├── fc.py
│ │ └── layer_norm.py
│ └── utils/
│ ├── ans_punct.py
│ ├── feat_filter.py
│ ├── make_mask.py
│ └── optim.py
├── requirements.txt
├── results/
│ ├── cache/
│ │ └── .gitkeep
│ ├── log/
│ │ └── .gitkeep
│ ├── pred/
│ │ └── .gitkeep
│ └── result_test/
│ └── .gitkeep
├── run.py
└── utils/
├── exec.py
├── proc_dict_gqa.py
├── proc_dict_vqa.py
├── test_engine.py
└── train_engine.py
SYMBOL INDEX (290 symbols across 49 files)
FILE: data/clevr/clevr_extract_feat.py
function build_model (line 23) | def build_model(args):
function batch_feat (line 44) | def batch_feat(cur_batch, model):
function extract_feature (line 59) | def extract_feature(args, images_path, feats_npz_path):
FILE: data/gqa/gqa_feat_preproc.py
function process_spatial_features (line 20) | def process_spatial_features(feat_path, out_path):
function process_object_features (line 58) | def process_object_features(feat_path, out_path):
FILE: docs/_source/conf.py
function setup (line 89) | def setup(app):
FILE: openvqa/core/base_cfgs.py
class BaseCfgs (line 12) | class BaseCfgs(PATH):
method __init__ (line 13) | def __init__(self):
method str_to_bool (line 181) | def str_to_bool(self, args):
method parse_to_dict (line 197) | def parse_to_dict(self, args):
method add_args (line 207) | def add_args(self, args_dict):
method proc (line 212) | def proc(self):
method __str__ (line 319) | def __str__(self):
FILE: openvqa/core/base_dataset.py
class BaseDataSet (line 12) | class BaseDataSet(Data.Dataset):
method __init__ (line 13) | def __init__(self):
method load_ques_ans (line 24) | def load_ques_ans(self, idx):
method load_img_feats (line 28) | def load_img_feats(self, idx, iid):
method __getitem__ (line 32) | def __getitem__(self, idx):
method __len__ (line 46) | def __len__(self):
method shuffle_list (line 49) | def shuffle_list(self, list):
class BaseAdapter (line 53) | class BaseAdapter(nn.Module):
method __init__ (line 54) | def __init__(self, __C):
method vqa_init (line 71) | def vqa_init(self, __C):
method gqa_init (line 74) | def gqa_init(self, __C):
method clevr_init (line 77) | def clevr_init(self, __C):
method forward (line 80) | def forward(self, frcn_feat, grid_feat, bbox_feat):
method vqa_forward (line 95) | def vqa_forward(self, feat_dict):
method gqa_forward (line 98) | def gqa_forward(self, feat_dict):
method clevr_forward (line 101) | def clevr_forward(self, feat_dict):
FILE: openvqa/core/path_cfgs.py
class PATH (line 8) | class PATH:
method __init__ (line 9) | def __init__(self):
method init_path (line 14) | def init_path(self):
method check_path (line 116) | def check_path(self, dataset=None):
FILE: openvqa/datasets/clevr/clevr_loader.py
class DataSet (line 12) | class DataSet(BaseDataSet):
method __init__ (line 13) | def __init__(self, __C):
method img_feat_path_load (line 79) | def img_feat_path_load(self, path_list):
method tokenize (line 89) | def tokenize(self, stat_ques_list, use_glove):
method ans_stat (line 126) | def ans_stat(self, stat_ans_list):
method load_ques_ans (line 145) | def load_ques_ans(self, idx):
method load_img_feats (line 162) | def load_img_feats(self, idx, iid):
method proc_ques (line 174) | def proc_ques(self, ques, token_to_ix, max_token):
method proc_ans (line 195) | def proc_ans(self, ans, ans_to_ix):
FILE: openvqa/datasets/clevr/eval/result_eval.py
function eval (line 11) | def eval(__C, dataset, ans_ix_list, pred_list, result_eval_file, ensembl...
FILE: openvqa/datasets/dataset_loader.py
class DatasetLoader (line 8) | class DatasetLoader:
method __init__ (line 9) | def __init__(self, __C):
method DataSet (line 16) | def DataSet(self):
class EvalLoader (line 20) | class EvalLoader:
method __init__ (line 21) | def __init__(self, __C):
method eval (line 28) | def eval(self, __arg1, __arg2, __arg3, __arg4, __arg5, __arg6, __arg7):
FILE: openvqa/datasets/gqa/eval/gqa_eval.py
class GQAEval (line 13) | class GQAEval:
method __init__ (line 14) | def __init__(self, __C, result_eval_file, ques_file_path, choices_path...
method get_str_result (line 195) | def get_str_result(self):
method loadFile (line 198) | def loadFile(self, name):
method toScore (line 215) | def toScore(self, b):
method avg (line 219) | def avg(self, l):
method wavg (line 224) | def wavg(self, l, w):
method getWordsNum (line 233) | def getWordsNum(self, question):
method getStepsNum (line 237) | def getStepsNum(self, question):
method belongs (line 260) | def belongs(self, element, group, question):
method updateConsistency (line 270) | def updateConsistency(self, questionId, question, questions):
method chiSquare (line 289) | def chiSquare(self, goldDist, predictedDist):
FILE: openvqa/datasets/gqa/eval/result_eval.py
function eval (line 11) | def eval(__C, dataset, ans_ix_list, pred_list, result_eval_file, ensembl...
FILE: openvqa/datasets/gqa/gqa_loader.py
class DataSet (line 12) | class DataSet(BaseDataSet):
method __init__ (line 13) | def __init__(self, __C):
method img_feat_path_load (line 97) | def img_feat_path_load(self, path_list):
method tokenize (line 160) | def tokenize(self, json_file, use_glove):
method ans_stat (line 175) | def ans_stat(self, json_file):
method load_ques_ans (line 185) | def load_ques_ans(self, idx):
method load_img_feats (line 202) | def load_img_feats(self, idx, iid):
method proc_img_feat (line 225) | def proc_img_feat(self, img_feat, img_feat_pad_size):
method proc_bbox_feat (line 239) | def proc_bbox_feat(self, bbox, img_shape):
method proc_ques (line 250) | def proc_ques(self, ques, token_to_ix, max_token):
method proc_ans (line 271) | def proc_ans(self, ans, ans_to_ix):
FILE: openvqa/datasets/vqa/eval/result_eval.py
function eval (line 7) | def eval(__C, dataset, ans_ix_list, pred_list, result_eval_file, ensembl...
FILE: openvqa/datasets/vqa/eval/vqa.py
class VQA (line 24) | class VQA:
method __init__ (line 25) | def __init__(self, annotation_file=None, question_file=None):
method createIndex (line 47) | def createIndex(self):
method info (line 65) | def info(self):
method getQuesIds (line 73) | def getQuesIds(self, imgIds=[], quesTypes=[], ansTypes=[]):
method getImgIds (line 97) | def getImgIds(self, quesIds=[], quesTypes=[], ansTypes=[]):
method loadQA (line 121) | def loadQA(self, ids=[]):
method showQA (line 132) | def showQA(self, anns):
method loadRes (line 146) | def loadRes(self, resFile, quesFile):
FILE: openvqa/datasets/vqa/eval/vqaEval.py
class VQAEval (line 10) | class VQAEval:
method __init__ (line 11) | def __init__(self, vqa, vqaRes, n=2):
method evaluate (line 68) | def evaluate(self, quesIds=None):
method processPunctuation (line 122) | def processPunctuation(self, inText):
method processDigitArticle (line 134) | def processDigitArticle(self, inText):
method setAccuracy (line 149) | def setAccuracy(self, accQA, accQuesType, accAnsType):
method setEvalQA (line 154) | def setEvalQA(self, quesId, acc):
method setEvalQuesType (line 157) | def setEvalQuesType(self, quesId, quesType, acc):
method setEvalAnsType (line 162) | def setEvalAnsType(self, quesId, ansType, acc):
method updateProgress (line 167) | def updateProgress(self, progress):
FILE: openvqa/datasets/vqa/vqa_loader.py
class DataSet (line 11) | class DataSet(BaseDataSet):
method __init__ (line 12) | def __init__(self, __C):
method img_feat_path_load (line 82) | def img_feat_path_load(self, path_list):
method ques_load (line 93) | def ques_load(self, ques_list):
method tokenize (line 103) | def tokenize(self, stat_ques_list, use_glove):
method ans_stat (line 159) | def ans_stat(self, json_file):
method load_ques_ans (line 170) | def load_ques_ans(self, idx):
method load_img_feats (line 193) | def load_img_feats(self, idx, iid):
method proc_img_feat (line 215) | def proc_img_feat(self, img_feat, img_feat_pad_size):
method proc_bbox_feat (line 229) | def proc_bbox_feat(self, bbox, img_shape):
method proc_ques (line 243) | def proc_ques(self, ques, token_to_ix, max_token):
method get_score (line 264) | def get_score(self, occur):
method proc_ans (line 277) | def proc_ans(self, ans, ans_to_ix):
FILE: openvqa/models/ban/adapter.py
class Adapter (line 12) | class Adapter(BaseAdapter):
method __init__ (line 13) | def __init__(self, __C):
method vqa_init (line 18) | def vqa_init(self, __C):
method gqa_init (line 23) | def gqa_init(self, __C):
method clevr_init (line 35) | def clevr_init(self, __C):
method vqa_forward (line 39) | def vqa_forward(self, feat_dict):
method gqa_forward (line 49) | def gqa_forward(self, feat_dict):
method clevr_forward (line 64) | def clevr_forward(self, feat_dict):
FILE: openvqa/models/ban/ban.py
class MLP (line 16) | class MLP(nn.Module):
method __init__ (line 21) | def __init__(self, dims, act='ReLU', dropout_r=0.0):
method forward (line 36) | def forward(self, x):
class BC (line 43) | class BC(nn.Module):
method __init__ (line 48) | def __init__(self, __C, atten=False):
method forward (line 66) | def forward(self, v, q):
method forward_with_weights (line 74) | def forward_with_weights(self, v, q, w):
class BiAttention (line 87) | class BiAttention(nn.Module):
method __init__ (line 88) | def __init__(self, __C):
method forward (line 94) | def forward(self, v, q, v_mask=True, logit=False, mask_with=-float('in...
class BAN (line 115) | class BAN(nn.Module):
method __init__ (line 116) | def __init__(self, __C):
method forward (line 130) | def forward(self, q, v):
FILE: openvqa/models/ban/model_cfgs.py
class Cfgs (line 9) | class Cfgs(BaseCfgs):
method __init__ (line 10) | def __init__(self):
FILE: openvqa/models/ban/net.py
class Net (line 21) | class Net(nn.Module):
method __init__ (line 22) | def __init__(self, __C, pretrained_emb, token_size, answer_size):
method forward (line 56) | def forward(self, frcn_feat, grid_feat, bbox_feat, ques_ix):
FILE: openvqa/models/butd/adapter.py
class Adapter (line 12) | class Adapter(BaseAdapter):
method __init__ (line 13) | def __init__(self, __C):
method vqa_init (line 18) | def vqa_init(self, __C):
method gqa_init (line 22) | def gqa_init(self, __C):
method clevr_init (line 33) | def clevr_init(self, __C):
method vqa_forward (line 37) | def vqa_forward(self, feat_dict):
method gqa_forward (line 47) | def gqa_forward(self, feat_dict):
method clevr_forward (line 62) | def clevr_forward(self, feat_dict):
FILE: openvqa/models/butd/model_cfgs.py
class Cfgs (line 9) | class Cfgs(BaseCfgs):
method __init__ (line 10) | def __init__(self):
FILE: openvqa/models/butd/net.py
class Net (line 20) | class Net(nn.Module):
method __init__ (line 21) | def __init__(self, __C, pretrained_emb, token_size, answer_size):
method forward (line 55) | def forward(self, frcn_feat, grid_feat, bbox_feat, ques_ix):
FILE: openvqa/models/butd/tda.py
class MLP (line 19) | class MLP(nn.Module):
method __init__ (line 24) | def __init__(self, dims, act='ELU', dropout_r=0.0):
method forward (line 39) | def forward(self, x):
class AttnMap (line 47) | class AttnMap(nn.Module):
method __init__ (line 51) | def __init__(self, __C):
method forward (line 62) | def forward(self, q, v):
method logits (line 69) | def logits(self, q, v):
class TDA (line 82) | class TDA(nn.Module):
method __init__ (line 83) | def __init__(self, __C):
method forward (line 91) | def forward(self, q, v):
FILE: openvqa/models/mcan/adapter.py
class Adapter (line 12) | class Adapter(BaseAdapter):
method __init__ (line 13) | def __init__(self, __C):
method bbox_proc (line 17) | def bbox_proc(self, bbox):
method vqa_init (line 21) | def vqa_init(self, __C):
method gqa_init (line 29) | def gqa_init(self, __C):
method clevr_init (line 40) | def clevr_init(self, __C):
method vqa_forward (line 44) | def vqa_forward(self, feat_dict):
method gqa_forward (line 59) | def gqa_forward(self, feat_dict):
method clevr_forward (line 81) | def clevr_forward(self, feat_dict):
FILE: openvqa/models/mcan/mca.py
class MHAtt (line 19) | class MHAtt(nn.Module):
method __init__ (line 20) | def __init__(self, __C):
method forward (line 31) | def forward(self, v, k, q, mask):
method att (line 66) | def att(self, value, key, query, mask):
class FFN (line 86) | class FFN(nn.Module):
method __init__ (line 87) | def __init__(self, __C):
method forward (line 98) | def forward(self, x):
class SA (line 106) | class SA(nn.Module):
method __init__ (line 107) | def __init__(self, __C):
method forward (line 119) | def forward(self, y, y_mask):
class SGA (line 135) | class SGA(nn.Module):
method __init__ (line 136) | def __init__(self, __C):
method forward (line 152) | def forward(self, x, y, x_mask, y_mask):
class MCA_ED (line 172) | class MCA_ED(nn.Module):
method __init__ (line 173) | def __init__(self, __C):
method forward (line 179) | def forward(self, y, x, y_mask, x_mask):
FILE: openvqa/models/mcan/model_cfgs.py
class Cfgs (line 9) | class Cfgs(BaseCfgs):
method __init__ (line 10) | def __init__(self):
FILE: openvqa/models/mcan/net.py
class AttFlat (line 21) | class AttFlat(nn.Module):
method __init__ (line 22) | def __init__(self, __C):
method forward (line 39) | def forward(self, x, x_mask):
class Net (line 63) | class Net(nn.Module):
method __init__ (line 64) | def __init__(self, __C, pretrained_emb, token_size, answer_size):
method forward (line 97) | def forward(self, frcn_feat, grid_feat, bbox_feat, ques_ix):
FILE: openvqa/models/mfb/adapter.py
class Adapter (line 13) | class Adapter(BaseAdapter):
method __init__ (line 14) | def __init__(self, __C):
method vqa_init (line 19) | def vqa_init(self, __C):
method gqa_init (line 23) | def gqa_init(self, __C):
method clevr_init (line 32) | def clevr_init(self, __C):
method vqa_forward (line 36) | def vqa_forward(self, feat_dict):
method gqa_forward (line 46) | def gqa_forward(self, feat_dict):
method clevr_forward (line 61) | def clevr_forward(self, feat_dict):
FILE: openvqa/models/mfb/mfb.py
class MFB (line 18) | class MFB(nn.Module):
method __init__ (line 19) | def __init__(self, __C, img_feat_size, ques_feat_size, is_first):
method forward (line 28) | def forward(self, img_feat, ques_feat, exp_in=1):
class QAtt (line 48) | class QAtt(nn.Module):
method __init__ (line 49) | def __init__(self, __C):
method forward (line 60) | def forward(self, ques_feat):
class IAtt (line 79) | class IAtt(nn.Module):
method __init__ (line 80) | def __init__(self, __C, img_feat_size, ques_att_feat_size):
method forward (line 93) | def forward(self, img_feat, ques_att_feat):
class CoAtt (line 117) | class CoAtt(nn.Module):
method __init__ (line 118) | def __init__(self, __C):
method forward (line 135) | def forward(self, img_feat, ques_feat):
FILE: openvqa/models/mfb/model_cfgs.py
class Cfgs (line 9) | class Cfgs(BaseCfgs):
method __init__ (line 10) | def __init__(self):
FILE: openvqa/models/mfb/net.py
class Net (line 18) | class Net(nn.Module):
method __init__ (line 19) | def __init__(self, __C, pretrained_emb, token_size, answer_size):
method forward (line 48) | def forward(self, frcn_feat, grid_feat, bbox_feat, ques_ix):
FILE: openvqa/models/mmnasnet/adapter.py
class Adapter (line 12) | class Adapter(BaseAdapter):
method __init__ (line 13) | def __init__(self, __C):
method relation_embedding (line 18) | def relation_embedding(self, f_g):
method vqa_init (line 47) | def vqa_init(self, __C):
method gqa_init (line 55) | def gqa_init(self, __C):
method clevr_init (line 66) | def clevr_init(self, __C):
method vqa_forward (line 70) | def vqa_forward(self, feat_dict):
method gqa_forward (line 86) | def gqa_forward(self, feat_dict):
method clevr_forward (line 109) | def clevr_forward(self, feat_dict):
FILE: openvqa/models/mmnasnet/model_cfgs.py
class Cfgs (line 9) | class Cfgs(BaseCfgs):
method __init__ (line 10) | def __init__(self):
FILE: openvqa/models/mmnasnet/nasnet.py
class RelMHAtt (line 19) | class RelMHAtt(nn.Module):
method __init__ (line 20) | def __init__(self, __C):
method forward (line 35) | def forward(self, v, k, q, mask=None, rel_embed=None):
class MHAtt (line 63) | class MHAtt(nn.Module):
method __init__ (line 64) | def __init__(self, __C):
method forward (line 75) | def forward(self, v, k, q, mask):
method att (line 110) | def att(self, value, key, query, mask):
class FFN (line 126) | class FFN(nn.Module):
method __init__ (line 127) | def __init__(self, __C):
method forward (line 141) | def forward(self, x, arg1, arg2, arg3, arg4):
class SA (line 148) | class SA(nn.Module):
method __init__ (line 149) | def __init__(self, __C, size=1024):
method forward (line 157) | def forward(self, y, arg1, y_mask, arg2, arg3):
class RSA (line 165) | class RSA(nn.Module):
method __init__ (line 166) | def __init__(self, __C, size=1024):
method forward (line 174) | def forward(self, x, arg1, x_mask, arg2, rela):
class GA (line 182) | class GA(nn.Module):
method __init__ (line 183) | def __init__(self, __C):
method forward (line 191) | def forward(self, x, y, x_mask, y_mask, rela):
class NAS_ED (line 203) | class NAS_ED(nn.Module):
method __init__ (line 204) | def __init__(self, __C):
method forward (line 211) | def forward(self, y, x, y_mask, x_mask, rela):
FILE: openvqa/models/mmnasnet/net.py
class AttFlat (line 21) | class AttFlat(nn.Module):
method __init__ (line 22) | def __init__(self, __C):
method forward (line 39) | def forward(self, x, x_mask):
class Net (line 63) | class Net(nn.Module):
method __init__ (line 64) | def __init__(self, __C, pretrained_emb, token_size, answer_size):
method forward (line 101) | def forward(self, frcn_feat, grid_feat, bbox_feat, ques_ix):
FILE: openvqa/models/model_loader.py
class ModelLoader (line 9) | class ModelLoader:
method __init__ (line 10) | def __init__(self, __C):
method Net (line 16) | def Net(self, __arg1, __arg2, __arg3, __arg4):
class CfgLoader (line 20) | class CfgLoader:
method __init__ (line 21) | def __init__(self, model_use):
method load (line 26) | def load(self):
FILE: openvqa/ops/fc.py
class FC (line 10) | class FC(nn.Module):
method __init__ (line 11) | def __init__(self, in_size, out_size, dropout_r=0., use_relu=True):
method forward (line 24) | def forward(self, x):
class MLP (line 36) | class MLP(nn.Module):
method __init__ (line 37) | def __init__(self, in_size, mid_size, out_size, dropout_r=0., use_relu...
method forward (line 43) | def forward(self, x):
FILE: openvqa/ops/layer_norm.py
class LayerNorm (line 9) | class LayerNorm(nn.Module):
method __init__ (line 10) | def __init__(self, size, eps=1e-6):
method forward (line 17) | def forward(self, x):
FILE: openvqa/utils/ans_punct.py
function process_punctuation (line 74) | def process_punctuation(inText):
function process_digit_article (line 86) | def process_digit_article(inText):
function prep_ans (line 102) | def prep_ans(answer):
FILE: openvqa/utils/feat_filter.py
function feat_filter (line 7) | def feat_filter(dataset, frcn_feat, grid_feat, bbox_feat):
FILE: openvqa/utils/make_mask.py
function make_mask (line 10) | def make_mask(feature):
FILE: openvqa/utils/optim.py
class WarmupOptimizer (line 9) | class WarmupOptimizer(object):
method __init__ (line 10) | def __init__(self, lr_base, optimizer, data_size, batch_size, warmup_e...
method step (line 20) | def step(self):
method zero_grad (line 31) | def zero_grad(self):
method rate (line 35) | def rate(self, step=None):
function get_optim (line 51) | def get_optim(__C, model, data_size, lr_base=None):
function adjust_lr (line 72) | def adjust_lr(optim, decay_r):
FILE: run.py
function parse_args (line 11) | def parse_args():
FILE: utils/exec.py
class Execution (line 11) | class Execution:
method __init__ (line 12) | def __init__(self, __C):
method run (line 29) | def run(self, run_mode):
method empty_log (line 45) | def empty_log(self, version):
FILE: utils/proc_dict_gqa.py
function tokenize (line 38) | def tokenize(stat_ques_dict):
function ans_stat (line 64) | def ans_stat(stat_ans_dict):
FILE: utils/proc_dict_vqa.py
function ans_stat (line 21) | def ans_stat(stat_ans_list):
FILE: utils/test_engine.py
function test_engine (line 16) | def test_engine(__C, dataset, state_dict=None, validation=False):
function ckpt_proc (line 151) | def ckpt_proc(state_dict):
FILE: utils/train_engine.py
function train_engine (line 16) | def train_engine(__C, dataset, dataset_eval=None):
Condensed preview — 95 files, each showing path, character count, and a content snippet. Download the .json file or copy for the full structured content (528K chars).
[
{
"path": ".gitignore",
"chars": 2258,
"preview": "ckpts/\nresults/cache/*.json\nresults/cache/*.txt\nresults/result_test/*.json\nresults/result_test/*.txt\nresults/pred/*.pkl\n"
},
{
"path": "Dockerfile",
"chars": 1749,
"preview": "FROM nvidia/cuda:10.0-cudnn7-devel-ubuntu16.04\n# install anaconda 5.2.0\nENV LANG=C.UTF-8 LC_ALL=C.UTF-8\nENV PATH /opt/co"
},
{
"path": "LICENSE",
"chars": 11364,
"preview": " Apache License\n Version 2.0, January 2004\n "
},
{
"path": "README.md",
"chars": 4160,
"preview": "# OpenVQA\n\n<div>\n\t<a href=\"https://openvqa.readthedocs.io/en/latest/?badge=latest\"><img alt=\"Documentation Status\" src=\""
},
{
"path": "configs/clevr/mcan_small.yml",
"chars": 395,
"preview": "# Network\nMODEL_USE: mcan\nLAYER: 6\nHIDDEN_SIZE: 512\nFF_SIZE: 2048\nMULTI_HEAD: 8\nDROPOUT_R: 0.1\nFLAT_MLP_SIZE: 512\nFLAT_G"
},
{
"path": "configs/gqa/ban_4.yml",
"chars": 440,
"preview": "# Network\nMODEL_USE: ban\nGLIMPSE: 4\nHIDDEN_SIZE: 1024\nK_TIMES: 3\nDROPOUT_R: 0.2\nCLASSIFER_DROPOUT_R: 0.5\nFLAT_OUT_SIZE: "
},
{
"path": "configs/gqa/ban_8.yml",
"chars": 435,
"preview": "# Network\nMODEL_USE: ban\nGLIMPSE: 8\nHIDDEN_SIZE: 1024\nK_TIMES: 3\nDROPOUT_R: 0.2\nCLASSIFER_DROPOUT_R: 0.5\nFLAT_OUT_SIZE: "
},
{
"path": "configs/gqa/butd.yml",
"chars": 417,
"preview": "# Network\nMODEL_USE: butd\nHIDDEN_SIZE: 1024\nDROPOUT_R: 0.2\nCLASSIFER_DROPOUT_R: 0.5\nFLAT_OUT_SIZE: 2048\nUSE_BBOX_FEAT: T"
},
{
"path": "configs/gqa/mcan_large.yml",
"chars": 434,
"preview": "# Network\nMODEL_USE: mcan\nLAYER: 6\nHIDDEN_SIZE: 1024\nFF_SIZE: 4096\nMULTI_HEAD: 8\nDROPOUT_R: 0.1\nFLAT_MLP_SIZE: 512\nFLAT_"
},
{
"path": "configs/gqa/mcan_small.yml",
"chars": 432,
"preview": "# Network\nMODEL_USE: mcan\nLAYER: 6\nHIDDEN_SIZE: 512\nFF_SIZE: 2048\nMULTI_HEAD: 8\nDROPOUT_R: 0.1\nFLAT_MLP_SIZE: 512\nFLAT_G"
},
{
"path": "configs/vqa/ban_4.yml",
"chars": 376,
"preview": "# Network\nMODEL_USE: ban\nGLIMPSE: 4\nHIDDEN_SIZE: 1024\nK_TIMES: 3\nDROPOUT_R: 0.2\nCLASSIFER_DROPOUT_R: 0.5\nFLAT_OUT_SIZE: "
},
{
"path": "configs/vqa/ban_8.yml",
"chars": 376,
"preview": "# Network\nMODEL_USE: ban\nGLIMPSE: 8\nHIDDEN_SIZE: 1024\nK_TIMES: 3\nDROPOUT_R: 0.2\nCLASSIFER_DROPOUT_R: 0.5\nFLAT_OUT_SIZE: "
},
{
"path": "configs/vqa/butd.yml",
"chars": 354,
"preview": "# Network\nMODEL_USE: butd\nHIDDEN_SIZE: 1024\nDROPOUT_R: 0.2\nCLASSIFER_DROPOUT_R: 0.5\nFLAT_OUT_SIZE: 2048\n\n# Execution\nBAT"
},
{
"path": "configs/vqa/mcan_large.yml",
"chars": 480,
"preview": "# Network\nMODEL_USE: mcan\nLAYER: 6\nHIDDEN_SIZE: 1024\nFF_SIZE: 4096\nMULTI_HEAD: 8\nDROPOUT_R: 0.1\nFLAT_MLP_SIZE: 512\nFLAT_"
},
{
"path": "configs/vqa/mcan_small.yml",
"chars": 417,
"preview": "# Network\nMODEL_USE: mcan\nLAYER: 6\nHIDDEN_SIZE: 512\nFF_SIZE: 2048\nMULTI_HEAD: 8\nDROPOUT_R: 0.1\nFLAT_MLP_SIZE: 512\nFLAT_G"
},
{
"path": "configs/vqa/mfb.yml",
"chars": 418,
"preview": "# Network\nMODEL_USE: mfb\nHIGH_ORDER: False # True for MFH, False for MFB\nHIDDEN_SIZE: 512\nMFB_K: 5\nMFB_O: 1000\nLSTM_OUT_"
},
{
"path": "configs/vqa/mfh.yml",
"chars": 419,
"preview": "# Network\nMODEL_USE: mfb\nHIGH_ORDER: True # True for MFH, False for MFB\nHIDDEN_SIZE: 512\nMFB_K: 5\nMFB_O: 1000\nLSTM_OUT_S"
},
{
"path": "configs/vqa/mmnasnet_large.yml",
"chars": 609,
"preview": "# Network\nMODEL_USE: mmnasnet\nARCH: {\n enc: [SA, SA, SA, SA, FFN, FFN, FFN, FFN, SA, FFN, FFN, FFN],\n dec: [GA, GA"
},
{
"path": "configs/vqa/mmnasnet_small.yml",
"chars": 606,
"preview": "# Network\nMODEL_USE: mmnasnet\nARCH: {\n enc: [SA, SA, SA, SA, FFN, FFN, FFN, FFN, SA, FFN, FFN, FFN],\n dec: [GA, GA"
},
{
"path": "data/clevr/clevr_extract_feat.py",
"chars": 5312,
"preview": "# --------------------------------------------------------\n# OpenVQA\n# CLEVR images feature extraction script\n# Written "
},
{
"path": "data/clevr/feats/.gitkeep",
"chars": 0,
"preview": ""
},
{
"path": "data/clevr/raw/.gitkeep",
"chars": 0,
"preview": ""
},
{
"path": "data/gqa/feats/.gitkeep",
"chars": 0,
"preview": ""
},
{
"path": "data/gqa/gqa_feat_preproc.py",
"chars": 4182,
"preview": "# --------------------------------------------------------\n# OpenVQA\n# GQA spatial features & object features .h5 files "
},
{
"path": "data/gqa/raw/.gitkeep",
"chars": 0,
"preview": ""
},
{
"path": "data/vqa/feats/.gitkeep",
"chars": 0,
"preview": ""
},
{
"path": "data/vqa/raw/.gitkeep",
"chars": 0,
"preview": ""
},
{
"path": "docs/Makefile",
"chars": 640,
"preview": "# Minimal makefile for Sphinx documentation\n#\n\n# You can set these variables from the command line, and also\n# from the "
},
{
"path": "docs/_source/_static/custom.css",
"chars": 138,
"preview": ".rst-content code.literal {\n color: inherit;\n font-size: 85%;\n border: none;\n background: #F0F0F0;\n paddi"
},
{
"path": "docs/_source/_static/mathjax_mathml.user.js",
"chars": 834,
"preview": "// ==UserScript==\n// @name MathJax MathML\n// @namespace http://www.mathjax.org/\n// @description Insert"
},
{
"path": "docs/_source/_static/mathjax_wikipedia.user.js",
"chars": 1171,
"preview": "// ==UserScript==\n// @name MathJax in Wikipedia\n// @namespace http://www.mathjax.org/\n// @description "
},
{
"path": "docs/_source/advanced/adding_model.md",
"chars": 6411,
"preview": "# Adding a custom VQA model\n\nThis is a tutorial on how to add a custom VQA model into OpenVQA. Follow the steps below, y"
},
{
"path": "docs/_source/advanced/contributing.md",
"chars": 837,
"preview": "# Contributing to OpenVQA\n\nAll kinds of contributions are welcome, including but not limited to the following.\n\n- Fixes "
},
{
"path": "docs/_source/basic/getting_started.md",
"chars": 4416,
"preview": "# Getting Started\n\nThis page provides basic tutorials about the usage of mmdetection.\nFor installation instructions, ple"
},
{
"path": "docs/_source/basic/install.md",
"chars": 8905,
"preview": "# Installation\n\nThis page provides basic prerequisites to run OpenVQA, including the setups of hardware, software, and d"
},
{
"path": "docs/_source/basic/model_zoo.md",
"chars": 12604,
"preview": "# Benchmark and Model Zoo\n\n## Environment\n\nWe use the following environment to run all the experiments in this page.\n\n- "
},
{
"path": "docs/_source/conf.py",
"chars": 3263,
"preview": "#!/usr/bin/env python3\n# -*- coding: utf-8 -*-\n#\n# Configuration file for the Sphinx documentation builder.\n#\n# This fil"
},
{
"path": "docs/_source/index.rst",
"chars": 1639,
"preview": ".. OpenVQA documentation master file, created by\n sphinx-quickstart on Sun Aug 4 13:54:29 2019.\n You can adapt this"
},
{
"path": "docs/_templates/layout.html",
"chars": 1268,
"preview": "{% extends \"!layout.html\" %}\n\n<link rel=\"canonical\" href=\"{{ theme_canonical_url }}{{ pagename }}.html\" />\n{% block menu"
},
{
"path": "docs/make.bat",
"chars": 801,
"preview": "@ECHO OFF\r\n\r\npushd %~dp0\r\n\r\nREM Command file for Sphinx documentation\r\n\r\nif \"%SPHINXBUILD%\" == \"\" (\r\n\tset SPHINXBUILD=sp"
},
{
"path": "docs/readme.md",
"chars": 1903,
"preview": "# How to Edit OpenVQA Document\n\nOpenVQA Document is built by [Sphix](https://www.sphinx-doc.org/en/master/) and hosted o"
},
{
"path": "docs/requirements.txt",
"chars": 59,
"preview": "sphinx\nsphinx_rtd_theme\nrecommonmark\nsphinx-markdown-tables"
},
{
"path": "openvqa/core/base_cfgs.py",
"chars": 10685,
"preview": "# --------------------------------------------------------\n# OpenVQA\n# Written by Yuhao Cui https://github.com/cuiyuhao1"
},
{
"path": "openvqa/core/base_dataset.py",
"chars": 2627,
"preview": "# --------------------------------------------------------\n# OpenVQA\n# Written by Yuhao Cui https://github.com/cuiyuhao1"
},
{
"path": "openvqa/core/path_cfgs.py",
"chars": 5571,
"preview": "# --------------------------------------------------------\n# OpenVQA\n# Written by Yuhao Cui https://github.com/cuiyuhao1"
},
{
"path": "openvqa/datasets/clevr/clevr_loader.py",
"chars": 6272,
"preview": "# --------------------------------------------------------\n# OpenVQA\n# Written by Yuhao Cui https://github.com/cuiyuhao1"
},
{
"path": "openvqa/datasets/clevr/eval/result_eval.py",
"chars": 4182,
"preview": "# --------------------------------------------------------\n# OpenVQA\n# Written by Yuhao Cui https://github.com/cuiyuhao1"
},
{
"path": "openvqa/datasets/dataset_loader.py",
"chars": 1015,
"preview": "# --------------------------------------------------------\n# OpenVQA\n# Written by Yuhao Cui https://github.com/cuiyuhao1"
},
{
"path": "openvqa/datasets/gqa/dicts.json",
"chars": 111349,
"preview": "[{\"yes\": 0, \"pipe\": 1, \"no\": 2, \"large\": 3, \"girl\": 4, \"bed\": 5, \"sofa\": 6, \"right\": 7, \"dark\": 8, \"cabinet\": 9, \"left\":"
},
{
"path": "openvqa/datasets/gqa/eval/gqa_eval.py",
"chars": 13462,
"preview": "# --------------------------------------------------------\n# OpenVQA\n# Written by Yuhao Cui https://github.com/cuiyuhao1"
},
{
"path": "openvqa/datasets/gqa/eval/result_eval.py",
"chars": 2115,
"preview": "# --------------------------------------------------------\n# OpenVQA\n# Written by Yuhao Cui https://github.com/cuiyuhao1"
},
{
"path": "openvqa/datasets/gqa/gqa_loader.py",
"chars": 8902,
"preview": "# --------------------------------------------------------\n# OpenVQA\n# Written by Yuhao Cui https://github.com/cuiyuhao1"
},
{
"path": "openvqa/datasets/vqa/answer_dict.json",
"chars": 109890,
"preview": "[{\"net\": 0, \"pitcher\": 1, \"orange\": 2, \"yes\": 3, \"white\": 4, \"skiing\": 5, \"red\": 6, \"frisbee\": 7, \"brushing teeth\": 8, \""
},
{
"path": "openvqa/datasets/vqa/eval/result_eval.py",
"chars": 2890,
"preview": "from openvqa.datasets.vqa.eval.vqa import VQA\nfrom openvqa.datasets.vqa.eval.vqaEval import VQAEval\nimport json, pickle\n"
},
{
"path": "openvqa/datasets/vqa/eval/vqa.py",
"chars": 7090,
"preview": "__author__ = 'aagrawal'\n__version__ = '0.9'\n\n# Interface for accessing the VQA dataset.\n\n# This code is based on the cod"
},
{
"path": "openvqa/datasets/vqa/eval/vqaEval.py",
"chars": 8157,
"preview": "# coding=utf-8\n\n__author__='aagrawal'\n\n# This code is based on the code written by Tsung-Yi Lin for MSCOCO Python API av"
},
{
"path": "openvqa/datasets/vqa/vqa_loader.py",
"chars": 9748,
"preview": "# --------------------------------------------------------\n# OpenVQA\n# Written by Yuhao Cui https://github.com/cuiyuhao1"
},
{
"path": "openvqa/models/ban/adapter.py",
"chars": 2133,
"preview": "# --------------------------------------------------------\n# OpenVQA\n# Written by Zhenwei Shao https://github.com/Parado"
},
{
"path": "openvqa/models/ban/ban.py",
"chars": 4521,
"preview": "# --------------------------------------------------------\n# OpenVQA\n# Written by Zhenwei Shao https://github.com/Parado"
},
{
"path": "openvqa/models/ban/model_cfgs.py",
"chars": 594,
"preview": "# --------------------------------------------------------\n# OpenVQA\n# Written by Zhenwei Shao https://github.com/Parado"
},
{
"path": "openvqa/models/ban/net.py",
"chars": 2195,
"preview": "# --------------------------------------------------------\n# OpenVQA\n# Written by Zhenwei Shao https://github.com/Parado"
},
{
"path": "openvqa/models/butd/adapter.py",
"chars": 2143,
"preview": "# --------------------------------------------------------\n# OpenVQA\n# Written by Zhenwei Shao https://github.com/Parado"
},
{
"path": "openvqa/models/butd/model_cfgs.py",
"chars": 481,
"preview": "# --------------------------------------------------------\n# OpenVQA\n# Written by Zhenwei Shao https://github.com/Parado"
},
{
"path": "openvqa/models/butd/net.py",
"chars": 2155,
"preview": "# --------------------------------------------------------\n# OpenVQA\n# Written by Zhenwei Shao https://github.com/Parado"
},
{
"path": "openvqa/models/butd/tda.py",
"chars": 3001,
"preview": "# --------------------------------------------------------\n# OpenVQA\n# Written by Zhenwei Shao https://github.com/Parado"
},
{
"path": "openvqa/models/mcan/adapter.py",
"chars": 3004,
"preview": "# --------------------------------------------------------\n# OpenVQA\n# Written by Yuhao Cui https://github.com/cuiyuhao1"
},
{
"path": "openvqa/models/mcan/mca.py",
"chars": 4941,
"preview": "# --------------------------------------------------------\n# OpenVQA\n# Written by Yuhao Cui https://github.com/cuiyuhao1"
},
{
"path": "openvqa/models/mcan/model_cfgs.py",
"chars": 693,
"preview": "# --------------------------------------------------------\n# OpenVQA\n# Written by Yuhao Cui https://github.com/cuiyuhao1"
},
{
"path": "openvqa/models/mcan/net.py",
"chars": 3497,
"preview": "# --------------------------------------------------------\n# OpenVQA\n# Written by Yuhao Cui https://github.com/cuiyuhao1"
},
{
"path": "openvqa/models/mfb/adapter.py",
"chars": 2202,
"preview": "# --------------------------------------------------------\n# OpenVQA\n# Written by Pengbing Gao https://github.com/nbgao\n"
},
{
"path": "openvqa/models/mfb/mfb.py",
"chars": 6286,
"preview": "# --------------------------------------------------------\n# OpenVQA\n# Licensed under The MIT License [see LICENSE for d"
},
{
"path": "openvqa/models/mfb/model_cfgs.py",
"chars": 541,
"preview": "# --------------------------------------------------------\n# OpenVQA\n# Written by Gao Pengbing https://github.com/nbgao\n"
},
{
"path": "openvqa/models/mfb/net.py",
"chars": 2140,
"preview": "# --------------------------------------------------------\n# OpenVQA\n# Licensed under The MIT License [see LICENSE for d"
},
{
"path": "openvqa/models/mmnasnet/adapter.py",
"chars": 4250,
"preview": "# --------------------------------------------------------\n# OpenVQA\n# Written by Zhenwei Shao https://github.com/Parado"
},
{
"path": "openvqa/models/mmnasnet/model_cfgs.py",
"chars": 961,
"preview": "# --------------------------------------------------------\n# OpenVQA\n# Written by Zhenwei Shao https://github.com/Parado"
},
{
"path": "openvqa/models/mmnasnet/nasnet.py",
"chars": 6345,
"preview": "# --------------------------------------------------------\n# OpenVQA\n# Written by Zhenwei Shao https://github.com/Parado"
},
{
"path": "openvqa/models/mmnasnet/net.py",
"chars": 3717,
"preview": "# --------------------------------------------------------\n# OpenVQA\n# Written by Zhenwei Shao https://github.com/Parado"
},
{
"path": "openvqa/models/model_loader.py",
"chars": 807,
"preview": "# --------------------------------------------------------\n# OpenVQA\n# Written by Yuhao Cui https://github.com/cuiyuhao1"
},
{
"path": "openvqa/ops/fc.py",
"chars": 1146,
"preview": "# --------------------------------------------------------\n# OpenVQA\n# Written by Yuhao Cui https://github.com/cuiyuhao1"
},
{
"path": "openvqa/ops/layer_norm.py",
"chars": 628,
"preview": "# --------------------------------------------------------\n# OpenVQA\n# Written by Yuhao Cui https://github.com/cuiyuhao1"
},
{
"path": "openvqa/utils/ans_punct.py",
"chars": 4621,
"preview": "# --------------------------------------------------------\n# OpenVQA\n# Written by Yuhao Cui https://github.com/cuiyuhao1"
},
{
"path": "openvqa/utils/feat_filter.py",
"chars": 663,
"preview": "# --------------------------------------------------------\n# OpenVQA\n# Written by Yuhao Cui https://github.com/cuiyuhao1"
},
{
"path": "openvqa/utils/make_mask.py",
"chars": 354,
"preview": "# --------------------------------------------------------\n# OpenVQA\n# Written by Yuhao Cui https://github.com/cuiyuhao1"
},
{
"path": "openvqa/utils/optim.py",
"chars": 2006,
"preview": "# --------------------------------------------------------\n# OpenVQA\n# Written by Yuhao Cui https://github.com/cuiyuhao1"
},
{
"path": "requirements.txt",
"chars": 35,
"preview": "spacy >= 2.0.18\nnumpy >= 1.16.2\n\n\n\n"
},
{
"path": "results/cache/.gitkeep",
"chars": 0,
"preview": ""
},
{
"path": "results/log/.gitkeep",
"chars": 0,
"preview": ""
},
{
"path": "results/pred/.gitkeep",
"chars": 0,
"preview": ""
},
{
"path": "results/result_test/.gitkeep",
"chars": 0,
"preview": ""
},
{
"path": "run.py",
"chars": 5597,
"preview": "# --------------------------------------------------------\n# OpenVQA\n# Written by Yuhao Cui https://github.com/cuiyuhao1"
},
{
"path": "utils/exec.py",
"chars": 1639,
"preview": "# --------------------------------------------------------\n# OpenVQA\n# Written by Yuhao Cui https://github.com/cuiyuhao1"
},
{
"path": "utils/proc_dict_gqa.py",
"chars": 2267,
"preview": "# --------------------------------------------------------\n# mcan-vqa (Deep Modular Co-Attention Networks)\n# Licensed un"
},
{
"path": "utils/proc_dict_vqa.py",
"chars": 1432,
"preview": "# --------------------------------------------------------\n# mcan-vqa (Deep Modular Co-Attention Networks)\n# Licensed un"
},
{
"path": "utils/test_engine.py",
"chars": 4670,
"preview": "# --------------------------------------------------------\n# OpenVQA\n# Written by Yuhao Cui https://github.com/cuiyuhao1"
},
{
"path": "utils/train_engine.py",
"chars": 10385,
"preview": "# --------------------------------------------------------\n# OpenVQA\n# Written by Yuhao Cui https://github.com/cuiyuhao1"
}
]
About this extraction
This page contains the full source code of the MILVLG/openvqa GitHub repository, extracted and formatted as plain text for AI agents and large language models (LLMs). The extraction includes 95 files (465.4 KB), approximately 167.4k tokens, and a symbol index with 290 extracted functions, classes, methods, constants, and types. Use this with OpenClaw, Claude, ChatGPT, Cursor, Windsurf, or any other AI tool that accepts text input. You can copy the full output to your clipboard or download it as a .txt file.
Extracted by GitExtract — free GitHub repo to text converter for AI. Built by Nikandr Surkov.