Repository: BerkeleyAutomation/gqcnn
Branch: master
Commit: 499a609fe9df
Files: 170
Total size: 754.8 KB
Directory structure:
gitextract_i8u5tpyn/
├── .flake8
├── .github/
│ └── ISSUE_TEMPLATE/
│ ├── bug-performance-issue--custom-images-.md
│ ├── bug-performance-issue--physical-robot-.md
│ ├── bug-performance-issue--replication-.md
│ ├── installation-issue.md
│ └── other-issue.md
├── .gitignore
├── .style.yapf
├── .travis.yml
├── CMakeLists.txt
├── LICENSE
├── README.md
├── cfg/
│ ├── examples/
│ │ ├── antipodal_grasp_sampling.yaml
│ │ ├── fc_gqcnn_pj.yaml
│ │ ├── fc_gqcnn_suction.yaml
│ │ ├── gqcnn_pj.yaml
│ │ ├── gqcnn_suction.yaml
│ │ ├── replication/
│ │ │ ├── dex-net_2.0.yaml
│ │ │ ├── dex-net_2.1.yaml
│ │ │ ├── dex-net_3.0.yaml
│ │ │ ├── dex-net_4.0_fc_pj.yaml
│ │ │ ├── dex-net_4.0_fc_suction.yaml
│ │ │ ├── dex-net_4.0_pj.yaml
│ │ │ └── dex-net_4.0_suction.yaml
│ │ └── ros/
│ │ ├── fc_gqcnn_pj.yaml
│ │ ├── fc_gqcnn_suction.yaml
│ │ ├── gqcnn_pj.yaml
│ │ └── gqcnn_suction.yaml
│ ├── finetune.yaml
│ ├── finetune_dex-net_4.0_pj.yaml
│ ├── finetune_dex-net_4.0_suction.yaml
│ ├── finetune_example_pj.yaml
│ ├── finetune_example_suction.yaml
│ ├── hyperparam_search/
│ │ ├── train_dex-net_4.0_fc_suction_hyperparam_search.yaml
│ │ └── train_hyperparam_search.yaml
│ ├── tools/
│ │ ├── analyze_gqcnn_performance.yaml
│ │ └── run_policy.yaml
│ ├── train.yaml
│ ├── train_dex-net_2.0.yaml
│ ├── train_dex-net_3.0.yaml
│ ├── train_dex-net_4.0_fc_pj.yaml
│ ├── train_dex-net_4.0_fc_suction.yaml
│ ├── train_dex-net_4.0_pj.yaml
│ ├── train_dex-net_4.0_suction.yaml
│ ├── train_example_pj.yaml
│ ├── train_example_suction.yaml
│ └── train_fc.yaml
├── ci/
│ └── travis/
│ └── format.sh
├── data/
│ ├── calib/
│ │ ├── phoxi/
│ │ │ ├── phoxi.intr
│ │ │ └── phoxi_to_world.tf
│ │ └── primesense/
│ │ ├── primesense.intr
│ │ └── primesense.tf
│ └── examples/
│ ├── clutter/
│ │ ├── phoxi/
│ │ │ ├── dex-net_4.0/
│ │ │ │ ├── depth_0.npy
│ │ │ │ ├── depth_1.npy
│ │ │ │ ├── depth_2.npy
│ │ │ │ ├── depth_3.npy
│ │ │ │ └── depth_4.npy
│ │ │ └── fcgqcnn/
│ │ │ ├── depth_0.npy
│ │ │ ├── depth_1.npy
│ │ │ ├── depth_2.npy
│ │ │ ├── depth_3.npy
│ │ │ └── depth_4.npy
│ │ └── primesense/
│ │ ├── depth_0.npy
│ │ ├── depth_1.npy
│ │ ├── depth_2.npy
│ │ ├── depth_3.npy
│ │ └── depth_4.npy
│ └── single_object/
│ └── primesense/
│ ├── depth_0.npy
│ ├── depth_1.npy
│ ├── depth_2.npy
│ ├── depth_3.npy
│ ├── depth_4.npy
│ ├── depth_5.npy
│ ├── depth_6.npy
│ ├── depth_7.npy
│ ├── depth_8.npy
│ └── depth_9.npy
├── docker/
│ ├── cpu/
│ │ └── Dockerfile
│ └── gpu/
│ └── Dockerfile
├── docs/
│ ├── Makefile
│ ├── gh_deploy.sh
│ ├── make.bat
│ └── source/
│ ├── api/
│ │ ├── analysis.rst
│ │ ├── gqcnn.rst
│ │ ├── policies.rst
│ │ └── training.rst
│ ├── benchmarks/
│ │ └── benchmarks.rst
│ ├── conf.py
│ ├── index.rst
│ ├── info/
│ │ └── info.rst
│ ├── install/
│ │ └── install.rst
│ ├── license/
│ │ └── license.rst
│ ├── replication/
│ │ └── replication.rst
│ └── tutorials/
│ ├── analysis.rst
│ ├── planning.rst
│ ├── training.rst
│ └── tutorial.rst
├── examples/
│ ├── antipodal_grasp_sampling.py
│ ├── policy.py
│ ├── policy_ros.py
│ └── policy_with_image_proc.py
├── gqcnn/
│ ├── __init__.py
│ ├── analysis/
│ │ ├── __init__.py
│ │ └── analyzer.py
│ ├── grasping/
│ │ ├── __init__.py
│ │ ├── actions.py
│ │ ├── constraint_fn.py
│ │ ├── grasp.py
│ │ ├── grasp_quality_function.py
│ │ ├── image_grasp_sampler.py
│ │ └── policy/
│ │ ├── __init__.py
│ │ ├── enums.py
│ │ ├── fc_policy.py
│ │ └── policy.py
│ ├── model/
│ │ ├── __init__.py
│ │ └── tf/
│ │ ├── __init__.py
│ │ ├── fc_network_tf.py
│ │ └── network_tf.py
│ ├── search/
│ │ ├── __init__.py
│ │ ├── enums.py
│ │ ├── resource_manager.py
│ │ ├── search.py
│ │ ├── trial.py
│ │ └── utils.py
│ ├── training/
│ │ ├── __init__.py
│ │ └── tf/
│ │ ├── __init__.py
│ │ └── trainer_tf.py
│ ├── utils/
│ │ ├── __init__.py
│ │ ├── enums.py
│ │ ├── policy_exceptions.py
│ │ ├── train_stats_logger.py
│ │ └── utils.py
│ └── version.py
├── launch/
│ └── grasp_planning_service.launch
├── msg/
│ ├── Action.msg
│ ├── BoundingBox.msg
│ ├── GQCNNGrasp.msg
│ └── Observation.msg
├── package.xml
├── post-checkout
├── requirements/
│ ├── cpu_requirements.txt
│ ├── docs_requirements.txt
│ └── gpu_requirements.txt
├── ros_nodes/
│ └── grasp_planner_node.py
├── scripts/
│ ├── docker/
│ │ └── build-docker.sh
│ ├── policies/
│ │ ├── run_all_dex-net_2.0_examples.sh
│ │ ├── run_all_dex-net_2.1_examples.sh
│ │ ├── run_all_dex-net_3.0_examples.sh
│ │ ├── run_all_dex-net_4.0_fc_pj_examples.sh
│ │ ├── run_all_dex-net_4.0_fc_suction_examples.sh
│ │ ├── run_all_dex-net_4.0_pj_examples.sh
│ │ └── run_all_dex-net_4.0_suction_examples.sh
│ └── training/
│ ├── train_dex-net_2.0.sh
│ ├── train_dex-net_2.1.sh
│ ├── train_dex-net_3.0.sh
│ ├── train_dex-net_4.0_fc_pj.sh
│ ├── train_dex-net_4.0_fc_suction.sh
│ ├── train_dex-net_4.0_pj.sh
│ └── train_dex-net_4.0_suction.sh
├── setup.py
├── srv/
│ ├── GQCNNGraspPlanner.srv
│ ├── GQCNNGraspPlannerBoundingBox.srv
│ ├── GQCNNGraspPlannerFull.srv
│ └── GQCNNGraspPlannerSegmask.srv
└── tools/
├── analyze_gqcnn_performance.py
├── finetune.py
├── hyperparam_search.py
├── plot_training_losses.py
├── run_policy.py
└── train.py
================================================
FILE CONTENTS
================================================
================================================
FILE: .flake8
================================================
[flake8]
ignore = C408, E121, E123, E126, E226, E24, E704, W503, W504, W605
exclude = docs/source/conf.py
inline-quotes = "
no-avoid-escape = 1
================================================
FILE: .github/ISSUE_TEMPLATE/bug-performance-issue--custom-images-.md
================================================
---
name: Bug/Performance Issue [Custom Images]
about: You are testing your own images in software.
title: 'Issue: Bug/Performance Issue [Custom Images]'
labels: ''
assignees: ''
---
**System information**
- OS Platform and Distribution (e.g., Linux Ubuntu 16.04):
- Python version:
- Installed using pip or ROS:
- Camera:
- GPU model (if applicable):
**Describe what you are trying to do**
**Describe current behavior**
**Describe the expected behavior**
**Describe the input images**
Provide details such as original captured resolution, pre-processing, final resolution, etc.
**Describe the physical camera setup**
Provide details such as the distance from camera to workspace, orientation, etc.. Attach a picture if possible.
**Other info / logs**
Include any logs or source code that would be helpful to diagnose the problem. If including tracebacks, please include the full traceback. Large logs and files should be attached.
================================================
FILE: .github/ISSUE_TEMPLATE/bug-performance-issue--physical-robot-.md
================================================
---
name: Bug/Performance Issue [Physical Robot]
about: You are running on a physical robot.
title: 'Issue: Bug/Performance Issue [Physical Robot]'
labels: ''
assignees: ''
---
**System information**
- OS Platform and Distribution (e.g., Linux Ubuntu 16.04):
- Python version:
- Installed using pip or ROS:
- Camera:
- Gripper:
- Robot:
- GPU model (if applicable):
**Describe what you are trying to do**
Provide details such as what angle you are attempting grasps at relative to the workspace.
**Describe current behavior**
**Describe the expected behavior**
**Describe the input images**
Provide details such as original captured resolution, pre-processing, final resolution, etc.
**Describe the physical camera setup**
Provide details such as the distance from camera to workspace, orientation, etc.. Attach a picture if possible.
**Describe the physical robot setup**
Attach a picture if possible.
**Other info / logs**
Include any logs or source code that would be helpful to diagnose the problem. If including tracebacks, please include the full traceback. Large logs and files should be attached.
================================================
FILE: .github/ISSUE_TEMPLATE/bug-performance-issue--replication-.md
================================================
---
name: Bug/Performance Issue [Replication]
about: You are replicating training or the policy with the provided images/datasets/models.
title: 'Issue: Bug/Performance Issue [Replication]'
labels: ''
assignees: ''
---
**System information**
- OS Platform and Distribution (e.g., Linux Ubuntu 16.04):
- Python version:
- Installed using pip or ROS:
- GPU model (if applicable):
**Describe the result you are trying to replicate**
If you can, provide a link to the section in the [documentation](https://berkeleyautomation.github.io/gqcnn/index.html).
**Provide the exact sequence of commands / steps that you executed to replicate this result**
**Describe the unexpected behavior**
**Other info / logs**
Include any logs or source code that would be helpful to diagnose the problem. If including tracebacks, please include the full traceback. Large logs and files should be attached.
================================================
FILE: .github/ISSUE_TEMPLATE/installation-issue.md
================================================
---
name: Installation Issue
about: You are experiencing installation issues.
title: 'Issue: Installation Issue'
labels: ''
assignees: ''
---
**System information**
- OS Platform and Distribution (e.g., Linux Ubuntu 16.04):
- Python version:
- Installed using pip or ROS:
**Describe the problem**
**Provide the exact sequence of commands / steps that you executed before running into the problem**
**Any other info / logs**
Include any logs or source code that would be helpful to diagnose the problem. If including tracebacks, please include the full traceback. Large logs and files should be attached.
================================================
FILE: .github/ISSUE_TEMPLATE/other-issue.md
================================================
---
name: Other Issue
about: Your issue doesn't fall into the above categories.
title: 'Issue: Other Issue'
labels: ''
assignees: ''
---
**System information**
- OS Platform and Distribution (e.g., Linux Ubuntu 16.04):
- Python version:
- Installed using pip or ROS:
**What is the problem**
Provide as much detail as possible and steps to replicate it if applicable.
**Other info / logs**
Include any logs or source code that would be helpful to diagnose the problem. If including tracebacks, please include the full traceback. Large logs and files should be attached.
================================================
FILE: .gitignore
================================================
# Byte-compiled / optimized / DLL files
__pycache__/
*.py[cod]
*$py.class
# C extensions
*.so
# Distribution / packaging
.Python
env/
build/
develop-eggs/
dist/
downloads/
data/training/
eggs/
.eggs/
lib/
lib64/
parts/
sdist/
var/
*.egg-info/
.installed.cfg
*.egg
# PyInstaller
# Usually these files are written by a python script from a template
# before PyInstaller builds the exe, so as to inject date/other infos into it.
*.manifest
*.spec
# Installer logs
pip-log.txt
pip-delete-this-directory.txt
# Unit test / coverage reports
htmlcov/
.tox/
.coverage
.coverage.*
.cache
nosetests.xml
coverage.xml
*,cover
.hypothesis/
# Translations
*.mo
*.pot
# Django stuff:
*.log
local_settings.py
# Flask stuff:
instance/
.webassets-cache
# Scrapy stuff:
.scrapy
# PyBuilder
target/
# IPython Notebook
.ipynb_checkpoints
# pyenv
.python-version
# celery beat schedule file
celerybeat-schedule
# dotenv
.env
# virtualenv
venv/
ENV/
# Spyder project settings
.spyderproject
# Rope project settings
.ropeproject
# Temp files
*~
.#*
#*
================================================
FILE: .style.yapf
================================================
[style]
based_on_style=pep8
allow_split_before_dict_value=False
join_multiple_lines=False
================================================
FILE: .travis.yml
================================================
# Copyright ©2017. The Regents of the University of California (Regents).
# All Rights Reserved. Permission to use, copy, modify, and distribute this
# software and its documentation for educational, research, and not-for-profit
# purposes, without fee and without a signed licensing agreement, is hereby
# granted, provided that the above copyright notice, this paragraph and the
# following two paragraphs appear in all copies, modifications, and
# distributions. Contact The Office of Technology Licensing, UC Berkeley, 2150
# Shattuck Avenue, Suite 510, Berkeley, CA 94720-1620, (510) 643-7201,
# otl@berkeley.edu,
# http://ipira.berkeley.edu/industry-info for commercial licensing opportunities.
# IN NO EVENT SHALL REGENTS BE LIABLE TO ANY PARTY FOR DIRECT, INDIRECT, SPECIAL,
# INCIDENTAL, OR CONSEQUENTIAL DAMAGES, INCLUDING LOST PROFITS, ARISING OUT OF
# THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN IF REGENTS HAS BEEN
# ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
# REGENTS SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
# THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE. THE SOFTWARE AND ACCOMPANYING DOCUMENTATION, IF ANY, PROVIDED
# HEREUNDER IS PROVIDED "AS IS". REGENTS HAS NO OBLIGATION TO PROVIDE
# MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
language: python
matrix:
- name: "Python 3.5 on Xenial Linux"
python: 3.5
dist: xenial
- name: "Python 3.6 on Xenial Linux"
python: 3.6
dist: xenial
- name: "Python 3.7 on Xenial Linux"
python: 3.7
dist: xenial
- name: "Linter"
python: 3.7
dist: xenial
before_install: []
install: []
script:
- pip install yapf==0.27.0 # Must use specific version.
- pip install flake8 flake8-comprehensions flake8-quotes==2.0.0
- ./ci/travis/format.sh --all # Should exit with 0 for no diffs.
- flake8
before_install:
- sudo apt-get update
- sudo apt-get install -y curl g++ make
- sudo apt-get install -y python-opengl # For GLU.
- pushd ~
- curl -L http://download.osgeo.org/libspatialindex/spatialindex-src-1.8.5.tar.gz | tar xz
- cd spatialindex-src-1.8.5
- ./configure
- make
- sudo make install
- sudo ldconfig
- popd
- pip install -U setuptools # Required for easy_install to find right
# skimage version for Python 3.5.
install:
- pip install .
script:
- python -c "import gqcnn"
notifications:
email: false
================================================
FILE: CMakeLists.txt
================================================
# Copyright ©2017. The Regents of the University of California (Regents).
# All Rights Reserved. Permission to use, copy, modify, and distribute this
# software and its documentation for educational, research, and not-for-profit
# purposes, without fee and without a signed licensing agreement, is hereby
# granted, provided that the above copyright notice, this paragraph and the
# following two paragraphs appear in all copies, modifications, and
# distributions. Contact The Office of Technology Licensing, UC Berkeley, 2150
# Shattuck Avenue, Suite 510, Berkeley, CA 94720-1620, (510) 643-7201,
# otl@berkeley.edu,
# http://ipira.berkeley.edu/industry-info for commercial licensing opportunities.
# IN NO EVENT SHALL REGENTS BE LIABLE TO ANY PARTY FOR DIRECT, INDIRECT, SPECIAL,
# INCIDENTAL, OR CONSEQUENTIAL DAMAGES, INCLUDING LOST PROFITS, ARISING OUT OF
# THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN IF REGENTS HAS BEEN
# ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
# REGENTS SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
# THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE. THE SOFTWARE AND ACCOMPANYING DOCUMENTATION, IF ANY, PROVIDED
# HEREUNDER IS PROVIDED "AS IS". REGENTS HAS NO OBLIGATION TO PROVIDE
# MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
cmake_minimum_required(VERSION 2.8.3)
project(gqcnn)
## Find catkin macros and libraries
## if COMPONENTS list like find_package(catkin REQUIRED COMPONENTS xyz)
## is used, also find other catkin packages
find_package(catkin REQUIRED COMPONENTS
rospy
message_generation
geometry_msgs
sensor_msgs
)
## System dependencies are found with CMake's conventions
# find_package(Boost REQUIRED COMPONENTS system)
## Uncomment this if the package has a setup.py. This macro ensures
## modules and global scripts declared therein get installed
## See http://ros.org/doc/api/catkin/html/user_guide/setup_dot_py.html
catkin_python_setup()
################################################
## Declare ROS messages, services and actions ##
################################################
## To declare and build messages, services or actions from within this
## package, follow these steps:
## * Let MSG_DEP_SET be the set of packages whose message types you use in
## your messages/services/actions (e.g. std_msgs, actionlib_msgs, ...).
## * In the file package.xml:
## * add a build_depend tag for "message_generation"
## * add a build_depend and a run_depend tag for each package in MSG_DEP_SET
## * If MSG_DEP_SET isn't empty the following dependency has been pulled in
## but can be declared for certainty nonetheless:
## * add a run_depend tag for "message_runtime"
## * In this file (CMakeLists.txt):
## * add "message_generation" and every package in MSG_DEP_SET to
## find_package(catkin REQUIRED COMPONENTS ...)
## * add "message_runtime" and every package in MSG_DEP_SET to
## catkin_package(CATKIN_DEPENDS ...)
## * uncomment the add_*_files sections below as needed
## and list every .msg/.srv/.action file to be processed
## * uncomment the generate_messages entry below
## * add every package in MSG_DEP_SET to generate_messages(DEPENDENCIES ...)
## Generate messages in the 'msg' folder
add_message_files(
FILES
GQCNNGrasp.msg
BoundingBox.msg
Action.msg
Observation.msg
)
## Generate services in the 'srv' folder
add_service_files(
FILES
GQCNNGraspPlanner.srv
GQCNNGraspPlannerBoundingBox.srv
GQCNNGraspPlannerSegmask.srv
)
## Generate actions in the 'action' folder
# add_action_files(
# FILES
# Action1.action
# Action2.action
# )
## Generate added messages and services with any dependencies listed here
generate_messages(
DEPENDENCIES
geometry_msgs
sensor_msgs
)
################################################
## Declare ROS dynamic reconfigure parameters ##
################################################
## To declare and build dynamic reconfigure parameters within this
## package, follow these steps:
## * In the file package.xml:
## * add a build_depend and a run_depend tag for "dynamic_reconfigure"
## * In this file (CMakeLists.txt):
## * add "dynamic_reconfigure" to
## find_package(catkin REQUIRED COMPONENTS ...)
## * uncomment the "generate_dynamic_reconfigure_options" section below
## and list every .cfg file to be processed
## Generate dynamic reconfigure parameters in the 'cfg' folder
# generate_dynamic_reconfigure_options(
# cfg/DynReconf1.cfg
# cfg/DynReconf2.cfg
# )
###################################
## catkin specific configuration ##
###################################
## The catkin_package macro generates cmake config files for your package
## Declare things to be passed to dependent projects
## INCLUDE_DIRS: uncomment this if you package contains header files
## LIBRARIES: libraries you create in this project that dependent projects also need
## CATKIN_DEPENDS: catkin_packages dependent projects also need
## DEPENDS: system dependencies of this project that dependent projects also need
catkin_package(
# INCLUDE_DIRS include
# LIBRARIES yumipy
# CATKIN_DEPENDS rospy
# DEPENDS system_lib
CATKIN_DEPENDS message_runtime
)
###########
## Build ##
###########
## Specify additional locations of header files
## Your package locations should be listed before other locations
# include_directories(include)
include_directories(
${catkin_INCLUDE_DIRS}
)
## Declare a C++ library
# add_library(yumipy
# src/${PROJECT_NAME}/yumipy.cpp
# )
## Add cmake target dependencies of the library
## as an example, code may need to be generated before libraries
## either from message generation or dynamic reconfigure
# add_dependencies(yumipy ${${PROJECT_NAME}_EXPORTED_TARGETS} ${catkin_EXPORTED_TARGETS})
## Declare a C++ executable
# add_executable(yumipy_node src/yumipy_node.cpp)
## Add cmake target dependencies of the executable
## same as for the library above
#add_dependencies(gqcnn gqcnn_generate_messages_cpp)
## Specify libraries to link a library or executable target against
# target_link_libraries(yumipy_node
# ${catkin_LIBRARIES}
# )
#############
## Install ##
#############
# all install targets should use catkin DESTINATION variables
# See http://ros.org/doc/api/catkin/html/adv_user_guide/variables.html
## Mark executable scripts (Python etc.) for installation
## in contrast to setup.py, you can choose the destination
# install(PROGRAMS
# scripts/my_python_script
# DESTINATION ${CATKIN_PACKAGE_BIN_DESTINATION}
# )
## Mark executables and/or libraries for installation
# install(TARGETS yumipy yumipy_node
# ARCHIVE DESTINATION ${CATKIN_PACKAGE_LIB_DESTINATION}
# LIBRARY DESTINATION ${CATKIN_PACKAGE_LIB_DESTINATION}
# RUNTIME DESTINATION ${CATKIN_PACKAGE_BIN_DESTINATION}
# )
## Mark cpp header files for installation
# install(DIRECTORY include/${PROJECT_NAME}/
# DESTINATION ${CATKIN_PACKAGE_INCLUDE_DESTINATION}
# FILES_MATCHING PATTERN "*.h"
# PATTERN ".svn" EXCLUDE
# )
## Mark other files for installation (e.g. launch and bag files, etc.)
# install(FILES
# # myfile1
# # myfile2
# DESTINATION ${CATKIN_PACKAGE_SHARE_DESTINATION}
# )
#############
## Testing ##
#############
## Add gtest based cpp test target and link libraries
# catkin_add_gtest(${PROJECT_NAME}-test test/test_yumipy.cpp)
# if(TARGET ${PROJECT_NAME}-test)
# target_link_libraries(${PROJECT_NAME}-test ${PROJECT_NAME})
# endif()
## Add folders to be run by python nosetests
# catkin_add_nosetests(test)
================================================
FILE: LICENSE
================================================
Copyright ©2017. The Regents of the University of California (Regents). All Rights Reserved.
Permission to use, copy, modify, and distribute this software and its documentation for educational,
research, and not-for-profit purposes, without fee and without a signed licensing agreement, is
hereby granted, provided that the above copyright notice, this paragraph and the following two
paragraphs appear in all copies, modifications, and distributions. Contact The Office of Technology
Licensing, UC Berkeley, 2150 Shattuck Avenue, Suite 510, Berkeley, CA 94720-1620, (510) 643-
7201, otl@berkeley.edu, http://ipira.berkeley.edu/industry-info for commercial licensing opportunities.
IN NO EVENT SHALL REGENTS BE LIABLE TO ANY PARTY FOR DIRECT, INDIRECT, SPECIAL,
INCIDENTAL, OR CONSEQUENTIAL DAMAGES, INCLUDING LOST PROFITS, ARISING OUT OF
THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN IF REGENTS HAS BEEN
ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
REGENTS SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
PURPOSE. THE SOFTWARE AND ACCOMPANYING DOCUMENTATION, IF ANY, PROVIDED
HEREUNDER IS PROVIDED "AS IS". REGENTS HAS NO OBLIGATION TO PROVIDE
MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
================================================
FILE: README.md
================================================
## Note: Python 2.x support has officially been dropped.
# Berkeley AUTOLAB's GQCNN Package
## Package Overview
The gqcnn Python package is for training and analysis of Grasp Quality Convolutional Neural Networks (GQ-CNNs). It is part of the ongoing [Dexterity-Network (Dex-Net)](https://berkeleyautomation.github.io/dex-net/) project created and maintained by the [AUTOLAB](https://autolab.berkeley.edu) at UC Berkeley.
## Installation and Usage
Please see the [docs](https://berkeleyautomation.github.io/gqcnn/) for installation and usage instructions.
## Citation
If you use any part of this code in a publication, please cite [the appropriate Dex-Net publication](https://berkeleyautomation.github.io/gqcnn/index.html#academic-use).
================================================
FILE: cfg/examples/antipodal_grasp_sampling.yaml
================================================
# policy params
policy:
# general params
deterministic: 1
gripper_width: 0.05
# sampling params
sampling:
# type
type: antipodal_depth
# antipodality
friction_coef: 0.5
depth_grad_thresh: 0.0025
depth_grad_gaussian_sigma: 1.0
downsample_rate: 4
max_rejection_samples: 4000
# distance
max_dist_from_center: 1000
min_dist_from_boundary: 45
min_grasp_dist: 10.0
angle_dist_weight: 5.0
# depth sampling
depth_sampling_mode: uniform
depth_samples_per_grasp: 1
depth_sample_win_height: 1
depth_sample_win_width: 1
min_depth_offset: 0.015
max_depth_offset: 0.05
# metrics
metric:
type: zero
# visualization
vis:
grasp_sampling: 0
final_grasp: 1
vmin: 0.4
vmax: 1.0
================================================
FILE: cfg/examples/fc_gqcnn_pj.yaml
================================================
policy:
type: fully_conv_pj
sampling_method: top_k
num_depth_bins: 16
gripper_width: 0.05
gqcnn_stride: 4
gqcnn_recep_h: 96
gqcnn_recep_w: 96
# filtering params
max_grasps_to_filter: 50
filter_grasps: 0
# metrics
metric:
type: fcgqcnn
gqcnn_model: /path/to/your/FC-GQ-Image-Wise
gqcnn_backend: tf
fully_conv_gqcnn_config:
im_height: 386
im_width: 516
# visualization
policy_vis:
scale: 0.5
show_axis: 1
num_samples: 0
actions_2d: 0
actions_3d: 0
affordance_map: 0
vis:
final_grasp: 1
vmin: 0.5
vmax: 0.8
# image pre-processing before input to policy
inpaint_rescale_factor: 0.5
================================================
FILE: cfg/examples/fc_gqcnn_suction.yaml
================================================
policy:
type: fully_conv_suction
sampling_method: top_k
gqcnn_stride: 4
gqcnn_recep_h: 96
gqcnn_recep_w: 96
# filtering params
max_grasps_to_filter: 50
filter_grasps: 0
# metrics
metric:
type: fcgqcnn
gqcnn_model: /path/to/your/FC-GQ-Image-Wise-Suction
gqcnn_backend: tf
fully_conv_gqcnn_config:
im_height: 386
im_width: 516
# visualization
policy_vis:
scale: 0.5
show_axis: 1
num_samples: 0
actions_2d: 0
actions_3d: 0
affordance_map: 0
vis:
final_grasp: 1
vmin: 0.5
vmax: 0.8
# image pre-processing before input to policy
inpaint_rescale_factor: 0.5
================================================
FILE: cfg/examples/gqcnn_pj.yaml
================================================
policy:
# optimization params
num_seed_samples: 128
num_gmm_samples: 64
num_iters: 3
gmm_refit_p: 0.25
gmm_component_frac: 0.4
gmm_reg_covar: 0.01
# general params
deterministic: 1
gripper_width: 0.05
# sampling params
sampling:
# type
type: antipodal_depth
# antipodality
friction_coef: 1.0
depth_grad_thresh: 0.0025
depth_grad_gaussian_sigma: 1.0
downsample_rate: 4
max_rejection_samples: 4000
# distance
max_dist_from_center: 160
min_dist_from_boundary: 45
min_grasp_dist: 2.5
angle_dist_weight: 5.0
# depth sampling
depth_sampling_mode: uniform
depth_samples_per_grasp: 3
depth_sample_win_height: 1
depth_sample_win_width: 1
min_depth_offset: 0.015
max_depth_offset: 0.05
# metrics
metric:
type: gqcnn
gqcnn_model: models/GQCNN-4.0-PJ
crop_height: 96
crop_width: 96
# visualization
vis:
grasp_sampling : 0
tf_images: 0
grasp_candidates: 0
elite_grasps: 0
grasp_ranking: 0
grasp_plan: 0
final_grasp: 1
vmin: 0.5
vmax: 0.8
k: 25
# image proc params
inpaint_rescale_factor: 0.5
================================================
FILE: cfg/examples/gqcnn_suction.yaml
================================================
policy:
# optimization params
num_seed_samples: 200
num_gmm_samples: 50
num_iters: 3
gmm_refit_p: 0.25
gmm_component_frac: 0.4
gmm_reg_covar: 0.01
# general params
deterministic: 1
gripper_width: 0.05
crop_height: 96
crop_width: 96
# sampling params
sampling:
# type
type: suction
# params
max_suction_dir_optical_axis_angle: 30
delta_theta: 5
delta_phi: 5
sigma_depth: 0.0025
min_suction_dist: 1.0
angle_dist_weight: 5.0
depth_gaussian_sigma: 1.0
num_grasp_samples: 1000
max_dist_from_center: 260
max_num_samples: 10000
# metric params
metric:
type: gqcnn
gqcnn_model: models/GQCNN-4.0-SUCTION
crop_height: 96
crop_width: 96
# visualization
vis:
grasp_sampling : 0
tf_images: 0
plane: 0
grasp_candidates: 0
elite_grasps: 0
grasp_ranking: 0
grasp_plan: 0
grasp_affordance_map: 0
final_grasp: 1
vmin: 0.5
vmax: 0.8
k: 25
# image proc params
inpaint_rescale_factor: 0.5
# detection params
detection:
type: point_cloud_box
foreground_mask_tolerance: 60
min_pt:
- 0.205
- -0.3
- 0.01
max_pt:
- 0.65
- 0.3
- 0.15
selection_policy: min
focus: 0
min_contour_area: 250.0
max_contour_area: 1000000.0
min_box_area: 250.0
max_box_area: 1000000.0
box_padding_px: 15
rescale_factor: 1.0
interpolation: bilinear
depth_grad_thresh: 10.0
contour_dist_thresh: 2.5
point_cloud_mask_only: 1
image_width: 640
image_height: 480
filter_dim: 1
================================================
FILE: cfg/examples/replication/dex-net_2.0.yaml
================================================
# policy params
policy:
# optimization params
num_seed_samples: 250
num_gmm_samples: 50
num_iters: 3
gmm_refit_p: 0.25
gmm_component_frac: 0.4
gmm_reg_covar: 0.01
# general params
deterministic: 1
gripper_width: 0.05
# sampling params
sampling:
# type
type: antipodal_depth
# antipodality
friction_coef: 0.8
depth_grad_thresh: 0.0025
depth_grad_gaussian_sigma: 1.0
downsample_rate: 2
max_rejection_samples: 4000
# distance
max_dist_from_center: 160
min_dist_from_boundary: 45
min_grasp_dist: 2.5
angle_dist_weight: 5.0
# depth sampling
depth_sampling_mode: uniform
depth_samples_per_grasp: 1
depth_sample_win_height: 1
depth_sample_win_width: 1
min_depth_offset: 0.005
max_depth_offset: 0.04
# metrics
metric:
type: gqcnn
gqcnn_model: models/GQCNN-2.0
crop_height: 96
crop_width: 96
# visualization
vis:
grasp_sampling : 0
tf_images: 0
grasp_candidates: 0
elite_grasps: 0
grasp_ranking: 0
grasp_plan: 0
final_grasp: 1
vmin: 0.0
vmax: 1.0
k: 25
# image proc params
inpaint_rescale_factor: 0.5
================================================
FILE: cfg/examples/replication/dex-net_2.1.yaml
================================================
# policy params
policy:
# optimization params
num_seed_samples: 250
num_gmm_samples: 50
num_iters: 3
gmm_refit_p: 0.25
gmm_component_frac: 0.4
gmm_reg_covar: 0.01
# general params
deterministic: 1
gripper_width: 0.05
# sampling params
sampling:
# type
type: antipodal_depth
# antipodality
friction_coef: 0.8
depth_grad_thresh: 0.0025
depth_grad_gaussian_sigma: 1.0
downsample_rate: 2
max_rejection_samples: 4000
# distance
max_dist_from_center: 160
min_dist_from_boundary: 45
min_grasp_dist: 2.5
angle_dist_weight: 5.0
# depth sampling
depth_sampling_mode: uniform
depth_samples_per_grasp: 1
depth_sample_win_height: 1
depth_sample_win_width: 1
min_depth_offset: 0.005
max_depth_offset: 0.04
# metrics
metric:
type: gqcnn
gqcnn_model: models/GQCNN-2.1
crop_height: 96
crop_width: 96
# visualization
vis:
grasp_sampling : 0
tf_images: 0
grasp_candidates: 0
elite_grasps: 0
grasp_ranking: 0
grasp_plan: 0
final_grasp: 1
vmin: 0.0
vmax: 1.0
k: 25
# image proc params
inpaint_rescale_factor: 0.5
================================================
FILE: cfg/examples/replication/dex-net_3.0.yaml
================================================
# policy params
policy:
# optimization params
num_seed_samples: 250
num_gmm_samples: 50
num_iters: 3
gmm_refit_p: 0.25
gmm_component_frac: 0.4
gmm_reg_covar: 0.01
# general params
deterministic: 1
max_approach_angle: 80
# sampling params
sampling:
# type
type: suction
# params
max_suction_dir_optical_axis_angle: 30
delta_theta: 1
delta_phi: 1
mean_depth: 0.0025
sigma_depth: 0.000001
min_suction_dist: 1.0
angle_dist_weight: 5.0
depth_gaussian_sigma: 1.0
max_dist_from_center: 10000000000
max_num_samples: 10000
num_grasp_samples: 500
# metric params
metric:
type: gqcnn
gqcnn_model: models/GQCNN-3.0
crop_height: 96
crop_width: 96
# visualization
vis:
grasp_sampling : 0
tf_images: 0
plane: 0
grasp_candidates: 0
elite_grasps: 0
grasp_ranking: 0
grasp_plan: 0
final_grasp: 1
vmin: 0.0
vmax: 1.0
k: 25
# image proc params
inpaint_rescale_factor: 0.5
================================================
FILE: cfg/examples/replication/dex-net_4.0_fc_pj.yaml
================================================
policy:
type: fully_conv_pj
sampling_method: top_k
num_depth_bins: 16
gripper_width: 0.05
gqcnn_stride: 4
gqcnn_recep_h: 96
gqcnn_recep_w: 96
# filtering params
max_grasps_to_filter: 50
filter_grasps: 0
# metrics
metric:
type: fcgqcnn
gqcnn_model: models/FCGQCNN-4.0-PJ
gqcnn_backend: tf
fully_conv_gqcnn_config:
im_height: 480
im_width: 640
# visualization
policy_vis:
scale: 0.5
show_axis: 1
num_samples: 0
actions_2d: 0
actions_3d: 0
affordance_map: 0
vis:
final_grasp: 1
vmin: 0.0
vmax: 1.0
# image pre-processing before input to policy
inpaint_rescale_factor: 0.5
================================================
FILE: cfg/examples/replication/dex-net_4.0_fc_suction.yaml
================================================
policy:
type: fully_conv_suction
sampling_method: top_k
gqcnn_stride: 4
gqcnn_recep_h: 96
gqcnn_recep_w: 96
# filtering params
max_grasps_to_filter: 50
filter_grasps: 0
# metrics
metric:
type: fcgqcnn
gqcnn_model: models/FCGQCNN-4.0-SUCTION
gqcnn_backend: tf
fully_conv_gqcnn_config:
im_height: 480
im_width: 640
# visualization
policy_vis:
scale: 0.5
show_axis: 1
num_samples: 0
actions_2d: 0
actions_3d: 0
affordance_map: 0
vis:
final_grasp: 1
vmin: 0.0
vmax: 1.0
# image pre-processing before input to policy
inpaint_rescale_factor: 0.5
================================================
FILE: cfg/examples/replication/dex-net_4.0_pj.yaml
================================================
policy:
# optimization params
num_seed_samples: 128
num_gmm_samples: 64
num_iters: 3
gmm_refit_p: 0.25
gmm_component_frac: 0.4
gmm_reg_covar: 0.01
# general params
deterministic: 1
gripper_width: 0.05
# sampling params
sampling:
# type
type: antipodal_depth
# antipodality
friction_coef: 1.0
depth_grad_thresh: 0.0025
depth_grad_gaussian_sigma: 1.0
downsample_rate: 4
max_rejection_samples: 4000
# distance
max_dist_from_center: 160
min_dist_from_boundary: 45
min_grasp_dist: 2.5
angle_dist_weight: 5.0
# depth sampling
depth_sampling_mode: uniform
depth_samples_per_grasp: 3
depth_sample_win_height: 1
depth_sample_win_width: 1
min_depth_offset: 0.015
max_depth_offset: 0.05
# metrics
metric:
type: gqcnn
gqcnn_model: models/GQCNN-4.0-PJ
crop_height: 96
crop_width: 96
# visualization
vis:
grasp_sampling : 0
tf_images: 0
grasp_candidates: 0
elite_grasps: 0
grasp_ranking: 0
grasp_plan: 0
final_grasp: 1
vmin: 0.0
vmax: 1.0
k: 25
# image proc params
inpaint_rescale_factor: 0.5
================================================
FILE: cfg/examples/replication/dex-net_4.0_suction.yaml
================================================
policy:
# optimization params
num_seed_samples: 200
num_gmm_samples: 50
num_iters: 3
gmm_refit_p: 0.25
gmm_component_frac: 0.4
gmm_reg_covar: 0.01
# general params
deterministic: 1
gripper_width: 0.05
crop_height: 96
crop_width: 96
# sampling params
sampling:
# type
type: suction
# params
max_suction_dir_optical_axis_angle: 30
delta_theta: 5
delta_phi: 5
sigma_depth: 0.0025
min_suction_dist: 1.0
angle_dist_weight: 5.0
depth_gaussian_sigma: 1.0
num_grasp_samples: 1000
max_dist_from_center: 260
max_num_samples: 10000
# metric params
metric:
type: gqcnn
gqcnn_model: models/GQCNN-4.0-SUCTION
crop_height: 96
crop_width: 96
# visualization
vis:
grasp_sampling : 0
tf_images: 0
plane: 0
grasp_candidates: 0
elite_grasps: 0
grasp_ranking: 0
grasp_plan: 0
grasp_affordance_map: 0
final_grasp: 1
vmin: 0.0
vmax: 1.0
k: 25
# image proc params
inpaint_rescale_factor: 0.5
# detection params
detection:
type: point_cloud_box
foreground_mask_tolerance: 60
min_pt:
- 0.205
- -0.3
- 0.01
max_pt:
- 0.65
- 0.3
- 0.15
selection_policy: min
focus: 0
min_contour_area: 250.0
max_contour_area: 1000000.0
min_box_area: 250.0
max_box_area: 1000000.0
box_padding_px: 15
rescale_factor: 1.0
interpolation: bilinear
depth_grad_thresh: 10.0
contour_dist_thresh: 2.5
point_cloud_mask_only: 1
image_width: 640
image_height: 480
filter_dim: 1
================================================
FILE: cfg/examples/ros/fc_gqcnn_pj.yaml
================================================
!include ../fc_gqcnn_pj.yaml
# ROS-specific visualization
vis:
rgbd_state: 0
cropped_rgbd_image: 0
color_image: 0
depth_image: 0
segmask: 0
================================================
FILE: cfg/examples/ros/fc_gqcnn_suction.yaml
================================================
!include ../fc_gqcnn_suction.yaml
# ROS-specific visualization
vis:
rgbd_state: 0
cropped_rgbd_image: 0
color_image: 0
depth_image: 0
segmask: 0
================================================
FILE: cfg/examples/ros/gqcnn_pj.yaml
================================================
!include ../gqcnn_pj.yaml
# ROS-specific visualization
vis:
rgbd_state: 0
cropped_rgbd_image: 0
color_image: 0
depth_image: 0
segmask: 0
================================================
FILE: cfg/examples/ros/gqcnn_suction.yaml
================================================
!include ../gqcnn_suction.yaml
# ROS-specific visualization
vis:
rgbd_state: 0
cropped_rgbd_image: 0
color_image: 0
depth_image: 0
segmask: 0
================================================
FILE: cfg/finetune.yaml
================================================
# general optimization params
train_batch_size: 64
val_batch_size: &val_batch_size 64
# logging params
num_epochs: 10 # number of epochs to train for
eval_frequency: 2.5 # how often to get validation error (in epochs)
save_frequency: 2.5 # how often to save output (in epochs)
vis_frequency: 10000 # how often to visualize filters (in epochs)
log_frequency: 1 # how often to log output (in steps)
# train / val split params
train_pct: 0.9 # percentage of the data to use for training vs validation
total_pct: 1.0 # percentage of all the files to use
eval_total_train_error: 0 # whether or not to evaluate the total training error on each validataion
max_files_eval: 1000 # the number of validation files to use in each eval
# optimization params
loss: sparse
optimizer: momentum
train_l2_regularizer: 0.0005
base_lr: 0.01
decay_step_multiplier: 0.5 # number of times to go through training datapoints before stepping down decay rate (in epochs)
decay_rate: 0.95
momentum_rate: 0.9
max_training_examples_per_load: 128
drop_rate: 0.0
max_global_grad_norm: 100000000000
optimize_base_layers: 0
# input params
training_mode: classification
image_field_name: tf_depth_ims
pose_field_name: grasps
# label params
target_metric_name: grasp_metrics # name of the field to use for the labels
metric_thresh: 0.5 # threshold for positive examples (label = 1 if grasp_metric > metric_thresh)
# preproc params
num_random_files: 1000 # the number of random files to compute dataset statistics in preprocessing (lower speeds initialization)
preproc_log_frequency: 100 # how often to log preprocessing (in steps)
# denoising / synthetic data params
multiplicative_denoising: 0
gamma_shape: 1000.00
symmetrize: 1
gaussian_process_denoising: 0
gaussian_process_rate: 0.5
gaussian_process_scaling_factor: 4.0
gaussian_process_sigma: 0.005
# tensorboard
tensorboard_port: 6006
# debugging params
debug: &debug 0
debug_num_files: 10 # speeds up initialization
seed: &seed 24098
### GQCNN CONFIG ###
gqcnn:
# basic data metrics
im_height: 96
im_width: 96
im_channels: 1
debug: *debug
seed: *seed
# needs to match input data mode that was used for training, determines the pose dimensions for the network
gripper_mode: parallel_jaw
# prediction batch size, in training this will be overriden by the val_batch_size in the optimizer's config file
batch_size: *val_batch_size
# architecture
architecture:
base_model:
output_layer: fc4
im_stream:
conv1_1:
type: conv
filt_dim: 9
num_filt: 8
pool_size: 2
pool_stride: 2
pad: VALID
norm: 0
norm_type: local_response
conv1_2:
type: conv
filt_dim: 3
num_filt: 8
pool_size: 2
pool_stride: 2
pad: VALID
norm: 0
norm_type: local_response
conv2_1:
type: conv
filt_dim: 3
num_filt: 8
pool_size: 1
pool_stride: 1
pad: VALID
norm: 0
norm_type: local_response
conv2_2:
type: conv
filt_dim: 3
num_filt: 8
pool_size: 2
pool_stride: 2
pad: VALID
norm: 0
norm_type: local_response
fc3:
type: fc
out_size: 64
pose_stream:
pc1:
type: pc
out_size: 4
pc2:
type: pc
out_size: 0
merge_stream:
fc4:
type: fc_merge
out_size: 64
fc5:
type: fc
out_size: 2
# architecture normalization constants
radius: 2
alpha: 2.0e-05
beta: 0.75
bias: 1.0
# leaky relu coefficient
relu_coeff: 0.0
================================================
FILE: cfg/finetune_dex-net_4.0_pj.yaml
================================================
# general optimization params
train_batch_size: 64
val_batch_size: &val_batch_size 64
# logging params
num_epochs: 50 # number of epochs to train for
eval_frequency: 10.0 # how often to get validation error (in epochs)
save_frequency: 10.0 # how often to save output (in epochs)
vis_frequency: 10000 # how often to visualize filters (in epochs)
log_frequency: 1 # how often to log output (in steps)
# train / val split params
train_pct: 0.9 # percentage of the data to use for training vs validation
total_pct: 1.0 # percentage of all the files to use
eval_total_train_error: 0 # whether or not to evaluate the total training error on each validataion
max_files_eval: 1000 # the number of validation files to use in each eval
# optimization params
loss: sparse
optimizer: momentum
train_l2_regularizer: 0.0005
base_lr: 0.01
decay_step_multiplier: 0.5 # number of times to go through training datapoints before stepping down decay rate (in epochs)
decay_rate: 0.95
momentum_rate: 0.9
max_training_examples_per_load: 128
drop_rate: 0.0
max_global_grad_norm: 100000000000
optimize_base_layers: 0
# input params
training_mode: classification
image_field_name: tf_depth_ims
pose_field_name: grasps
# label params
target_metric_name: grasp_metrics # name of the field to use for the labels
metric_thresh: 0.5 # threshold for positive examples (label = 1 if grasp_metric > metric_thresh)
# preproc params
num_random_files: 1000 # the number of random files to compute dataset statistics in preprocessing (lower speeds initialization)
preproc_log_frequency: 100 # how often to log preprocessing (in steps)
# denoising / synthetic data params
multiplicative_denoising: 0
gamma_shape: 1000.00
symmetrize: 1
gaussian_process_denoising: 0
gaussian_process_rate: 0.5
gaussian_process_scaling_factor: 4.0
gaussian_process_sigma: 0.005
# tensorboard
tensorboard_port: 6006
# debugging params
debug: &debug 0
debug_num_files: 10 # speeds up initialization
seed: &seed 24098
### GQCNN CONFIG ###
gqcnn:
# basic data metrics
im_height: 96
im_width: 96
im_channels: 1
debug: *debug
seed: *seed
# needs to match input data mode that was used for training, determines the pose dimensions for the network
gripper_mode: parallel_jaw
# method by which to integrate depth into the network
input_depth_mode: pose_stream
# used for training with multiple angular predictions
angular_bins: 0
# prediction batch size, in training this will be overriden by the val_batch_size in the optimizer's config file
batch_size: *val_batch_size
# architecture
architecture:
base_model:
output_layer: fc4
im_stream:
conv1_1:
type: conv
filt_dim: 9
num_filt: 16
pool_size: 1
pool_stride: 1
pad: VALID
norm: 0
norm_type: local_response
conv1_2:
type: conv
filt_dim: 5
num_filt: 16
pool_size: 2
pool_stride: 2
pad: VALID
norm: 0
norm_type: local_response
conv2_1:
type: conv
filt_dim: 5
num_filt: 16
pool_size: 1
pool_stride: 1
pad: VALID
norm: 0
norm_type: local_response
conv2_2:
type: conv
filt_dim: 5
num_filt: 16
pool_size: 2
pool_stride: 2
pad: VALID
norm: 0
norm_type: local_response
fc3:
type: fc
out_size: 128
pose_stream:
pc1:
type: pc
out_size: 16
pc2:
type: pc
out_size: 0
merge_stream:
fc4:
type: fc_merge
out_size: 128
fc5:
type: fc
out_size: 2
# architecture normalization constants
radius: 2
alpha: 2.0e-05
beta: 0.75
bias: 1.0
# leaky relu coefficient
relu_coeff: 0.0
================================================
FILE: cfg/finetune_dex-net_4.0_suction.yaml
================================================
# general optimization params
train_batch_size: 64
val_batch_size: &val_batch_size 64
# logging params
num_epochs: 50 # number of epochs to train for
eval_frequency: 10.0 # how often to get validation error (in epochs)
save_frequency: 10.0 # how often to save output (in epochs)
vis_frequency: 10000 # how often to visualize filters (in epochs)
log_frequency: 1 # how often to log output (in steps)
# train / val split params
train_pct: 0.9 # percentage of the data to use for training vs validation
total_pct: 1.0 # percentage of all the files to use
eval_total_train_error: 0 # whether or not to evaluate the total training error on each validataion
max_files_eval: 1000 # the number of validation files to use in each eval
# optimization params
loss: sparse
optimizer: momentum
train_l2_regularizer: 0.0005
base_lr: 0.01
decay_step_multiplier: 0.5 # number of times to go through training datapoints before stepping down decay rate (in epochs)
decay_rate: 0.95
momentum_rate: 0.9
max_training_examples_per_load: 128
drop_rate: 0.0
max_global_grad_norm: 100000000000
optimize_base_layers: 0
# input params
training_mode: classification
image_field_name: tf_depth_ims
pose_field_name: grasps
# label params
target_metric_name: grasp_metrics # name of the field to use for the labels
metric_thresh: 0.5 # threshold for positive examples (label = 1 if grasp_metric > metric_thresh)
# preproc params
num_random_files: 1000 # the number of random files to compute dataset statistics in preprocessing (lower speeds initialization)
preproc_log_frequency: 100 # how often to log preprocessing (in steps)
# denoising / synthetic data params
multiplicative_denoising: 0
gamma_shape: 1000.00
symmetrize: 1
gaussian_process_denoising: 0
gaussian_process_rate: 0.5
gaussian_process_scaling_factor: 4.0
gaussian_process_sigma: 0.005
# tensorboard
tensorboard_port: 6006
# debugging params
debug: &debug 0
debug_num_files: 10 # speeds up initialization
seed: &seed 24098
### GQCNN CONFIG ###
gqcnn:
# basic data metrics
im_height: 96
im_width: 96
im_channels: 1
debug: *debug
seed: *seed
# needs to match input data mode that was used for training, determines the pose dimensions for the network
gripper_mode: suction
# method by which to integrate depth into the network
input_depth_mode: pose_stream
# used for training with multiple angular predictions
angular_bins: 0
# prediction batch size, in training this will be overriden by the val_batch_size in the optimizer's config file
batch_size: *val_batch_size
# architecture
architecture:
base_model:
output_layer: fc3
im_stream:
conv1_1:
type: conv
filt_dim: 9
num_filt: 16
pool_size: 1
pool_stride: 1
pad: VALID
norm: 0
norm_type: local_response
conv1_2:
type: conv
filt_dim: 5
num_filt: 16
pool_size: 2
pool_stride: 2
pad: VALID
norm: 0
norm_type: local_response
conv2_1:
type: conv
filt_dim: 5
num_filt: 16
pool_size: 1
pool_stride: 1
pad: VALID
norm: 0
norm_type: local_response
conv2_2:
type: conv
filt_dim: 5
num_filt: 16
pool_size: 2
pool_stride: 2
pad: VALID
norm: 0
norm_type: local_response
fc3:
type: fc
out_size: 128
pose_stream:
pc1:
type: pc
out_size: 16
pc2:
type: pc
out_size: 0
merge_stream:
fc4:
type: fc_merge
out_size: 128
fc5:
type: fc
out_size: 2
# architecture normalization constants
radius: 2
alpha: 2.0e-05
beta: 0.75
bias: 1.0
# leaky relu coefficient
relu_coeff: 0.0
================================================
FILE: cfg/finetune_example_pj.yaml
================================================
# general optimization params
train_batch_size: 64
val_batch_size: &val_batch_size 64
# logging params
num_epochs: 20 # number of epochs to train for
eval_frequency: 10.0 # how often to get validation error (in epochs)
save_frequency: 10.0 # how often to save output (in epochs)
vis_frequency: 10000 # how often to visualize filters (in epochs)
log_frequency: 1 # how often to log output (in steps)
# train / val split params
train_pct: 0.9 # percentage of the data to use for training vs validation
total_pct: 1.0 # percentage of all the files to use
eval_total_train_error: 0 # whether or not to evaluate the total training error on each validataion
max_files_eval: 1000 # the number of validation files to use in each eval
# optimization params
loss: sparse
optimizer: momentum
train_l2_regularizer: 0.0005
base_lr: 0.01
decay_step_multiplier: 0.33 # number of times to go through training datapoints before stepping down decay rate (in epochs)
decay_rate: 0.95
momentum_rate: 0.9
max_training_examples_per_load: 128
drop_rate: 0.0
max_global_grad_norm: 100000000000
optimize_base_layers: 0
# input params
training_mode: classification
image_field_name: tf_depth_ims
pose_field_name: grasps
# label params
target_metric_name: grasp_metrics # name of the field to use for the labels
metric_thresh: 0.5 # threshold for positive examples (label = 1 if grasp_metric > metric_thresh)
# preproc params
num_random_files: 1000 # the number of random files to compute dataset statistics in preprocessing (lower speeds initialization)
preproc_log_frequency: 100 # how often to log preprocessing (in steps)
# denoising / synthetic data params
multiplicative_denoising: 0
gamma_shape: 1000.00
symmetrize: 1
gaussian_process_denoising: 0
gaussian_process_rate: 0.5
gaussian_process_scaling_factor: 4.0
gaussian_process_sigma: 0.005
# tensorboard
tensorboard_port: 6006
# debugging params
debug: &debug 0
debug_num_files: 10 # speeds up initialization
seed: &seed 24098
### GQCNN CONFIG ###
gqcnn:
# basic data metrics
im_height: 96
im_width: 96
im_channels: 1
debug: *debug
seed: *seed
# needs to match input data mode that was used for training, determines the pose dimensions for the network
gripper_mode: parallel_jaw
# method by which to integrate depth into the network
input_depth_mode: pose_stream
# used for training with multiple angular predictions
angular_bins: 0
# prediction batch size, in training this will be overriden by the val_batch_size in the optimizer's config file
batch_size: *val_batch_size
# architecture
architecture:
base_model:
output_layer: conv2_2
im_stream:
conv1_1:
type: conv
filt_dim: 9
num_filt: 16
pool_size: 1
pool_stride: 1
pad: VALID
norm: 0
norm_type: local_response
conv1_2:
type: conv
filt_dim: 5
num_filt: 16
pool_size: 2
pool_stride: 2
pad: VALID
norm: 0
norm_type: local_response
conv2_1:
type: conv
filt_dim: 5
num_filt: 16
pool_size: 1
pool_stride: 1
pad: VALID
norm: 0
norm_type: local_response
conv2_2:
type: conv
filt_dim: 5
num_filt: 16
pool_size: 2
pool_stride: 2
pad: VALID
norm: 0
norm_type: local_response
fc3:
type: fc
out_size: 64
pose_stream:
pc1:
type: pc
out_size: 16
pc2:
type: pc
out_size: 0
merge_stream:
fc4:
type: fc_merge
out_size: 64
fc5:
type: fc
out_size: 2
# architecture normalization constants
radius: 2
alpha: 2.0e-05
beta: 0.75
bias: 1.0
# leaky relu coefficient
relu_coeff: 0.0
================================================
FILE: cfg/finetune_example_suction.yaml
================================================
# general optimization params
train_batch_size: 64
val_batch_size: &val_batch_size 64
# logging params
num_epochs: 20 # number of epochs to train for
eval_frequency: 10.0 # how often to get validation error (in epochs)
save_frequency: 10.0 # how often to save output (in epochs)
vis_frequency: 10000 # how often to visualize filters (in epochs)
log_frequency: 1 # how often to log output (in steps)
# train / val split params
train_pct: 0.9 # percentage of the data to use for training vs validation
total_pct: 1.0 # percentage of all the files to use
eval_total_train_error: 0 # whether or not to evaluate the total training error on each validataion
max_files_eval: 1000 # the number of validation files to use in each eval
# optimization params
loss: sparse
optimizer: momentum
train_l2_regularizer: 0.0005
base_lr: 0.01
decay_step_multiplier: 0.33 # number of times to go through training datapoints before stepping down decay rate (in epochs)
decay_rate: 0.95
momentum_rate: 0.9
max_training_examples_per_load: 128
drop_rate: 0.0
max_global_grad_norm: 100000000000
optimize_base_layers: 0
# input params
training_mode: classification
image_field_name: tf_depth_ims
pose_field_name: grasps
# label params
target_metric_name: grasp_metrics # name of the field to use for the labels
metric_thresh: 0.5 # threshold for positive examples (label = 1 if grasp_metric > metric_thresh)
# preproc params
num_random_files: 1000 # the number of random files to compute dataset statistics in preprocessing (lower speeds initialization)
preproc_log_frequency: 100 # how often to log preprocessing (in steps)
# denoising / synthetic data params
multiplicative_denoising: 0
gamma_shape: 1000.00
symmetrize: 1
gaussian_process_denoising: 0
gaussian_process_rate: 0.5
gaussian_process_scaling_factor: 4.0
gaussian_process_sigma: 0.005
# tensorboard
tensorboard_port: 6006
# debugging params
debug: &debug 0
debug_num_files: 10 # speeds up initialization
seed: &seed 24098
### GQCNN CONFIG ###
gqcnn:
# basic data metrics
im_height: 96
im_width: 96
im_channels: 1
debug: *debug
seed: *seed
# needs to match input data mode that was used for training, determines the pose dimensions for the network
gripper_mode: suction
# method by which to integrate depth into the network
input_depth_mode: pose_stream
# used for training with multiple angular predictions
angular_bins: 0
# prediction batch size, in training this will be overriden by the val_batch_size in the optimizer's config file
batch_size: *val_batch_size
# architecture
architecture:
base_model:
output_layer: conv2_2
im_stream:
conv1_1:
type: conv
filt_dim: 9
num_filt: 16
pool_size: 1
pool_stride: 1
pad: VALID
norm: 0
norm_type: local_response
conv1_2:
type: conv
filt_dim: 5
num_filt: 16
pool_size: 2
pool_stride: 2
pad: VALID
norm: 0
norm_type: local_response
conv2_1:
type: conv
filt_dim: 5
num_filt: 16
pool_size: 1
pool_stride: 1
pad: VALID
norm: 0
norm_type: local_response
conv2_2:
type: conv
filt_dim: 5
num_filt: 16
pool_size: 2
pool_stride: 2
pad: VALID
norm: 0
norm_type: local_response
fc3:
type: fc
out_size: 64
pose_stream:
pc1:
type: pc
out_size: 16
pc2:
type: pc
out_size: 0
merge_stream:
fc4:
type: fc_merge
out_size: 64
fc5:
type: fc
out_size: 2
# architecture normalization constants
radius: 2
alpha: 2.0e-05
beta: 0.75
bias: 1.0
# leaky relu coefficient
relu_coeff: 0.0
================================================
FILE: cfg/hyperparam_search/train_dex-net_4.0_fc_suction_hyperparam_search.yaml
================================================
# general optimization params
train_batch_size: 64
val_batch_size: &val_batch_size 64
# logging params
num_epochs: 50 # number of epochs to train for
eval_frequency: 10 # how often to get validation error (in epochs)
save_frequency: 10 # how often to save output (in epochs)
vis_frequency: 10000 # how often to visualize filters (in epochs)
log_frequency: 1 # how often to log output (in steps)
# train / val split params
train_pct: 0.8 # percentage of the data to use for training vs validation
total_pct: 1.0 # percentage of all the files to use
eval_total_train_error: 0 # whether or not to evaluate the total training error on each validataion
max_files_eval: 1000 # the number of validation files to use in each eval
# optimization params
loss: sparse
optimizer: momentum
train_l2_regularizer: 0.0005
base_lr: 0.01
decay_step_multiplier: 0.5 # number of times to go through training datapoints before stepping down decay rate (in epochs)
decay_rate: 0.95
momentum_rate: 0.9
max_training_examples_per_load: 128
drop_rate: 0.0
max_global_grad_norm: 100000000000
# input params
training_mode: classification
image_field_name: tf_depth_ims
pose_field_name: grasps
# label params
target_metric_name: grasp_metrics # name of the field to use for the labels
metric_thresh: 0.5 # threshold for positive examples (label = 1 if grasp_metric > metric_thresh)
# preproc params
num_random_files: 10000 # the number of random files to compute dataset statistics in preprocessing (lower speeds initialization)
preproc_log_frequency: 100 # how often to log preprocessing (in steps)
# denoising / synthetic data params
multiplicative_denoising: 0
gamma_shape: 1000.00
symmetrize: 1
gaussian_process_denoising: 0
gaussian_process_rate: 0.5
gaussian_process_scaling_factor: 4.0
gaussian_process_sigma: 0.005
# tensorboard
tensorboard_port: 6006
# debugging params
debug: 0
debug_num_files: 1000000000
seed: 24098
### GQCNN CONFIG ###
gqcnn:
# basic data metrics
im_height: 96
im_width: 96
im_channels: 1
debug: 0
seed: 24098
# needs to match input data mode that was used for training, determines the pose dimensions for the network
gripper_mode: suction
# method by which to integrate depth into the network
input_depth_mode: im_only
# used for training with multiple angular predictions
angular_bins: 0
# prediction batch size, in training this will be overriden by the val_batch_size in the optimizer's config file
batch_size: *val_batch_size
# architecture
architecture:
im_stream:
conv1_1:
type: conv
filt_dim: ['anchor_filt_dim', 6, 9, 9]
num_filt: ['anchor_num_filts', 8, 12, 16, 20]
pool_size: 1
pool_stride: 1
pad: VALID
norm: 0
norm_type: local_response
conv1_2:
type: conv
filt_dim: ['anchor_filt_dim', 3, 5, 5]
num_filt: ['anchor_num_filts', 8, 12, 16, 20]
pool_size: 2
pool_stride: 2
pad: VALID
norm: 0
norm_type: local_response
conv2_1:
type: conv
filt_dim: ['anchor_filt_dim', 3, 5, 3]
num_filt: ['anchor_num_filts', 8, 12, 16, 20]
pool_size: 1
pool_stride: 1
pad: VALID
norm: 0
norm_type: local_response
conv2_2:
type: conv
filt_dim: ['anchor_filt_dim', 3, 5, 3]
num_filt: ['anchor_num_filts', 8, 12, 16, 20]
pool_size: 2
pool_stride: 2
pad: VALID
norm: 0
norm_type: local_response
fc3:
type: fc
out_size: ['anchor_fc_out_size', 32, 64, 128]
fc4:
type: fc
out_size: ['anchor_fc_out_size', 32, 64, 128]
fc5:
type: fc
out_size: 2
# architecture normalization constants
radius: 2
alpha: 2.0e-05
beta: 0.75
bias: 1.0
# leaky relu coefficient
relu_coeff: 0.0
================================================
FILE: cfg/hyperparam_search/train_hyperparam_search.yaml
================================================
# Example hyper-parameter search config corresponding to cfg/train.yaml that
# grid-searches through number of training epochs, base learning rate, and
# conv1_1 filter sizes.
# general optimization params
train_batch_size: 64
val_batch_size: &val_batch_size 64
# logging params
num_epochs: [25, 50, 75] # number of epochs to train for
eval_frequency: 10 # how often to get validation error (in epochs)
save_frequency: 10 # how often to save output (in epochs)
vis_frequency: 10000 # how often to visualize filters (in epochs)
log_frequency: 1 # how often to log output (in steps)
# train / val split params
train_pct: 0.9 # percentage of the data to use for training vs validation
total_pct: 1.0 # percentage of all the files to use
eval_total_train_error: 0 # whether or not to evaluate the total training error on each validataion
max_files_eval: 1000 # the number of validation files to use in each eval
# optimization params
loss: sparse
optimizer: momentum
train_l2_regularizer: 0.0005
base_lr: [0.001, 0.01, 0.05, 0.1]
decay_step_multiplier: 1.0 # number of times to go through training datapoints before stepping down decay rate (in epochs)
decay_rate: 0.95
momentum_rate: 0.9
max_training_examples_per_load: 128
drop_rate: 0.0
max_global_grad_norm: 100000000000
# input params
training_mode: classification
image_field_name: tf_depth_ims
pose_field_name: grasps
# label params
target_metric_name: grasp_metrics # name of the field to use for the labels
metric_thresh: 0.5 # threshold for positive examples (label = 1 if grasp_metric > metric_thresh)
# preproc params
num_random_files: 1000 # the number of random files to compute dataset statistics in preprocessing (lower speeds initialization)
preproc_log_frequency: 100 # how often to log preprocessing (in steps)
# denoising / synthetic data params
multiplicative_denoising: 0
gamma_shape: 1000.00
symmetrize: 1
gaussian_process_denoising: 0
gaussian_process_rate: 0.5
gaussian_process_scaling_factor: 4.0
gaussian_process_sigma: 0.005
# tensorboard
tensorboard_port: 6006
# debugging params
debug: 0
debug_num_files: 1000000000
seed: 24098
### GQCNN CONFIG ###
gqcnn:
# basic data metrics
im_height: 96
im_width: 96
im_channels: 1
debug: 0
seed: 24098
# needs to match input data mode that was used for training, determines the pose dimensions for the network
gripper_mode: parallel_jaw
# method by which to integrate depth into the network
input_depth_mode: pose_stream
# used for training with multiple angular predictions
angular_bins: 0
# prediction batch size, in training this will be overriden by the val_batch_size in the optimizer's config file
batch_size: *val_batch_size
# architecture
architecture:
im_stream:
conv1_1:
type: conv
filt_dim: [3, 5, 9]
num_filt: 8
pool_size: 2
pool_stride: 2
pad: VALID
norm: 0
norm_type: local_response
conv1_2:
type: conv
filt_dim: 3
num_filt: 8
pool_size: 2
pool_stride: 2
pad: VALID
norm: 0
norm_type: local_response
conv2_1:
type: conv
filt_dim: 3
num_filt: 8
pool_size: 1
pool_stride: 1
pad: VALID
norm: 0
norm_type: local_response
conv2_2:
type: conv
filt_dim: 3
num_filt: 8
pool_size: 2
pool_stride: 2
pad: VALID
norm: 0
norm_type: local_response
fc3:
type: fc
out_size: 64
pose_stream:
pc1:
type: pc
out_size: 4
pc2:
type: pc
out_size: 0
merge_stream:
fc4:
type: fc_merge
out_size: 64
fc5:
type: fc
out_size: 2
# architecture normalization constants
radius: 2
alpha: 2.0e-05
beta: 0.75
bias: 1.0
# leaky relu coefficient
relu_coeff: 0.0
================================================
FILE: cfg/tools/analyze_gqcnn_performance.yaml
================================================
log_rate: 10
font_size: 15
line_width: 4
dpi: 100
num_bins: 100
num_vis: 10
================================================
FILE: cfg/tools/run_policy.yaml
================================================
# policy params
policy:
# optimization params
num_seed_samples: 250
num_gmm_samples: 100
num_iters: 3
gmm_refit_p: 0.125
gmm_component_frac: 0.8
gmm_reg_covar: 0.01
max_resamples_per_iteration: 10
# general params
deterministic: 0
gripper: yumi_metal_spline
gripper_width: 0.05
max_approach_angle: 60
logging_dir: logs/debug
# sampling params
sampling:
# type
type: antipodal_depth
# antipodality
friction_coef: 0.8
depth_grad_thresh: 0.002
depth_grad_gaussian_sigma: 0.5
min_num_edge_pixels: 25
downsample_rate: 2
max_rejection_samples: 4000
# distance
max_dist_from_center: 100000
min_dist_from_boundary: 45
min_grasp_dist: 1.0
angle_dist_weight: 5.0
# depth sampling
depth_samples_per_grasp: 1
depth_sample_win_height: 1
depth_sample_win_width: 1
depth_sampling_mode: uniform
min_depth_offset: 0.015
max_depth_offset: 0.04
# metric params
metric:
type: gqcnn
gqcnn_model: /mnt/data/diskstation/models/icra2019/gqcnn_mini_dexnet/
crop_height: 96
crop_width: 96
# filter params
filter_collisions: 1
filter_ik: 1
filter_unreachable: 0
max_grasps_filter: 10
collision_free_filter:
approach_dist: 0.075
rescale_factor: 0.125
ik_filter:
approach_dist: 0.075
traj_len: 0
group_name: left_jaws
ik_timeout: 0.1
reachability_filter:
unreachable_pose_dir: /mnt/data/diskstation/unreachable_poses/left/
min_rot_dist: 0.1
min_trans_dist: 0.01
# visualization
vis:
input_images: 0
grasp_sampling : 0
tf_images: 0
grasp_candidates: 0
elite_grasps: 0
grasp_ranking: 0
grasp_plan: 0
seg_point_cloud: 0
filtered_point_cloud: 0
final_grasp: 1
grasp_scale: 1
vmin: 0.6
vmax: 0.9
k: 25
================================================
FILE: cfg/train.yaml
================================================
# general optimization params
train_batch_size: 64
val_batch_size: &val_batch_size 64
# logging params
num_epochs: 50 # number of epochs to train for
eval_frequency: 10 # how often to get validation error (in epochs)
save_frequency: 10 # how often to save output (in epochs)
vis_frequency: 10000 # how often to visualize filters (in epochs)
log_frequency: 1 # how often to log output (in steps)
# train / val split params
train_pct: 0.9 # percentage of the data to use for training vs validation
total_pct: 1.0 # percentage of all the files to use
eval_total_train_error: 0 # whether or not to evaluate the total training error on each validataion
max_files_eval: 1000 # the number of validation files to use in each eval
# optimization params
loss: sparse
optimizer: momentum
train_l2_regularizer: 0.0005
base_lr: 0.01
decay_step_multiplier: 1.0 # number of times to go through training datapoints before stepping down decay rate (in epochs)
decay_rate: 0.95
momentum_rate: 0.9
max_training_examples_per_load: 128
drop_rate: 0.0
max_global_grad_norm: 100000000000
# input params
training_mode: classification
image_field_name: tf_depth_ims
pose_field_name: grasps
# label params
target_metric_name: grasp_metrics # name of the field to use for the labels
metric_thresh: 0.5 # threshold for positive examples (label = 1 if grasp_metric > metric_thresh)
# preproc params
num_random_files: 1000 # the number of random files to compute dataset statistics in preprocessing (lower speeds initialization)
preproc_log_frequency: 100 # how often to log preprocessing (in steps)
# denoising / synthetic data params
multiplicative_denoising: 0
gamma_shape: 1000.00
symmetrize: 1
gaussian_process_denoising: 0
gaussian_process_rate: 0.5
gaussian_process_scaling_factor: 4.0
gaussian_process_sigma: 0.005
# tensorboard
tensorboard_port: 6006
# debugging params
debug: &debug 0
debug_num_files: 10 # speeds up initialization
seed: &seed 24098
### GQCNN CONFIG ###
gqcnn:
# basic data metrics
im_height: 96
im_width: 96
im_channels: 1
debug: *debug
seed: *seed
# needs to match input data mode that was used for training, determines the pose dimensions for the network
gripper_mode: parallel_jaw
# method by which to integrate depth into the network
input_depth_mode: pose_stream
# used for training with multiple angular predictions
angular_bins: 0
# prediction batch size, in training this will be overriden by the val_batch_size in the optimizer's config file
batch_size: *val_batch_size
# architecture
architecture:
im_stream:
conv1_1:
type: conv
filt_dim: 9
num_filt: 8
pool_size: 2
pool_stride: 2
pad: VALID
norm: 0
norm_type: local_response
conv1_2:
type: conv
filt_dim: 3
num_filt: 8
pool_size: 2
pool_stride: 2
pad: VALID
norm: 0
norm_type: local_response
conv2_1:
type: conv
filt_dim: 3
num_filt: 8
pool_size: 1
pool_stride: 1
pad: VALID
norm: 0
norm_type: local_response
conv2_2:
type: conv
filt_dim: 3
num_filt: 8
pool_size: 2
pool_stride: 2
pad: VALID
norm: 0
norm_type: local_response
fc3:
type: fc
out_size: 64
pose_stream:
pc1:
type: pc
out_size: 4
pc2:
type: pc
out_size: 0
merge_stream:
fc4:
type: fc_merge
out_size: 64
fc5:
type: fc
out_size: 2
# architecture normalization constants
radius: 2
alpha: 2.0e-05
beta: 0.75
bias: 1.0
# leaky relu coefficient
relu_coeff: 0.0
================================================
FILE: cfg/train_dex-net_2.0.yaml
================================================
# general optimization params
train_batch_size: 64
val_batch_size: &val_batch_size 64
# logging params
num_epochs: 50 # number of epochs to train for
eval_frequency: 5 # how often to get validation error (in epochs)
save_frequency: 5 # how often to save output (in epochs)
vis_frequency: 10000 # how often to visualize filters (in epochs)
log_frequency: 1 # how often to log output (in steps)
# train / val split params
train_pct: 0.8 # percentage of the data to use for training vs validation
total_pct: 1.0 # percentage of all the files to use
eval_total_train_error: 0 # whether or not to evaluate the total training error on each validataion
max_files_eval: 1000 # the number of validation files to use in each eval
# optimization params
loss: sparse
optimizer: momentum
train_l2_regularizer: 0.0005
base_lr: 0.01
decay_step_multiplier: 0.66 # number of times to go through training datapoints before stepping down decay rate (in epochs)
decay_rate: 0.95
momentum_rate: 0.9
max_training_examples_per_load: 128
drop_rate: 0.0
max_global_grad_norm: 100000000000
# input params
training_mode: classification
image_field_name: depth_ims_tf_table
pose_field_name: hand_poses
# label params
target_metric_name: robust_ferrari_canny # name of the field to use for the labels
metric_thresh: 0.002 # threshold for positive examples (label = 1 if grasp_metric > metric_thresh)
# preproc params
num_random_files: 10000 # the number of random files to compute dataset statistics in preprocessing (lower speeds initialization)
preproc_log_frequency: 100 # how often to log preprocessing (in steps)
# denoising / synthetic data params
multiplicative_denoising: 1
gamma_shape: 1000.00
symmetrize: 1
gaussian_process_denoising: 1
gaussian_process_rate: 0.5
gaussian_process_scaling_factor: 4.0
gaussian_process_sigma: 0.005
# tensorboard
tensorboard_port: 6006
# debugging params
debug: &debug 0
debug_num_files: 10 # speeds up initialization
seed: &seed 24098
### GQCNN CONFIG ###
gqcnn:
# basic data metrics
im_height: 32
im_width: 32
im_channels: 1
debug: *debug
seed: *seed
# needs to match input data mode that was used for training, determines the pose dimensions for the network
gripper_mode: legacy_parallel_jaw
# prediction batch size, in training this will be overriden by the val_batch_size in the optimizer's config file
batch_size: *val_batch_size
# architecture
architecture:
im_stream:
conv1_1:
type: conv
filt_dim: 7
num_filt: 64
pool_size: 1
pool_stride: 1
pad: SAME
norm: 0
norm_type: local_response
conv1_2:
type: conv
filt_dim: 5
num_filt: 64
pool_size: 2
pool_stride: 2
pad: SAME
norm: 1
norm_type: local_response
conv2_1:
type: conv
filt_dim: 3
num_filt: 64
pool_size: 1
pool_stride: 1
pad: SAME
norm: 0
norm_type: local_response
conv2_2:
type: conv
filt_dim: 3
num_filt: 64
pool_size: 2
pool_stride: 2
pad: SAME
norm: 1
norm_type: local_response
fc3:
type: fc
out_size: 1024
pose_stream:
pc1:
type: pc
out_size: 16
pc2:
type: pc
out_size: 0
merge_stream:
fc4:
type: fc_merge
out_size: 1024
fc5:
type: fc
out_size: 2
# architecture normalization constants
radius: 2
alpha: 2.0e-05
beta: 0.75
bias: 1.0
# leaky relu coefficient
relu_coeff: 0.0
================================================
FILE: cfg/train_dex-net_3.0.yaml
================================================
# general optimization params
train_batch_size: 64
val_batch_size: &val_batch_size 64
# logging params
num_epochs: 25 # number of epochs to train for
eval_frequency: 5 # how often to get validation error (in epochs)
save_frequency: 5 # how often to save output (in epochs)
vis_frequency: 10000 # how often to visualize filters (in epochs)
log_frequency: 1 # how often to log output (in steps)
# train / val split params
train_pct: 0.8 # percentage of the data to use for training vs validation
total_pct: 1.0 # percentage of all the files to use
eval_total_train_error: 0 # whether or not to evaluate the total training error on each validataion
max_files_eval: 1000 # the number of validation files to use in each eval
# optimization params
loss: sparse
optimizer: momentum
train_l2_regularizer: 0.0005
base_lr: 0.01
decay_step_multiplier: 0.66 # number of times to go through training datapoints before stepping down decay rate (in epochs)
decay_rate: 0.95
momentum_rate: 0.9
max_training_examples_per_load: 128
drop_rate: 0.0
max_global_grad_norm: 100000000000
# input params
training_mode: classification
image_field_name: depth_ims_tf_table
pose_field_name: hand_poses
# label params
target_metric_name: robust_suction_wrench_resistance # name of the field to use for the labels
metric_thresh: 0.2 # threshold for positive examples (label = 1 if grasp_metric > metric_thresh)
# preproc params
num_random_files: 10000 # the number of random files to compute dataset statistics in preprocessing (lower speeds initialization)
preproc_log_frequency: 100 # how often to log preprocessing (in steps)
# denoising / synthetic data params
multiplicative_denoising: 1
gamma_shape: 1000.00
symmetrize: 1
gaussian_process_denoising: 1
gaussian_process_rate: 0.5
gaussian_process_scaling_factor: 4.0
gaussian_process_sigma: 0.005
# tensorboard
tensorboard_port: 6006
# debugging params
debug: &debug 0
debug_num_files: 10 # speeds up initialization
seed: &seed 24098
### GQCNN CONFIG ###
gqcnn:
# basic data metrics
im_height: 32
im_width: 32
im_channels: 1
debug: *debug
seed: *seed
# needs to match input data mode that was used for training, determines the pose dimensions for the network
gripper_mode: legacy_suction
# prediction batch size, in training this will be overriden by the val_batch_size in the optimizer's config file
batch_size: *val_batch_size
# architecture
architecture:
im_stream:
conv1_1:
type: conv
filt_dim: 7
num_filt: 64
pool_size: 1
pool_stride: 1
pad: SAME
norm: 0
norm_type: local_response
conv1_2:
type: conv
filt_dim: 5
num_filt: 64
pool_size: 2
pool_stride: 2
pad: SAME
norm: 1
norm_type: local_response
conv2_1:
type: conv
filt_dim: 3
num_filt: 64
pool_size: 1
pool_stride: 1
pad: SAME
norm: 0
norm_type: local_response
conv2_2:
type: conv
filt_dim: 3
num_filt: 64
pool_size: 2
pool_stride: 2
pad: SAME
norm: 1
norm_type: local_response
fc3:
type: fc
out_size: 1024
pose_stream:
pc1:
type: pc
out_size: 128
pc2:
type: pc
out_size: 0
merge_stream:
fc4:
type: fc_merge
out_size: 1024
fc5:
type: fc
out_size: 2
# architecture normalization constants
radius: 2
alpha: 2.0e-05
beta: 0.75
bias: 1.0
# leaky relu coefficient
relu_coeff: 0.0
================================================
FILE: cfg/train_dex-net_4.0_fc_pj.yaml
================================================
# general optimization params
train_batch_size: 64
val_batch_size: &val_batch_size 64
# logging params
num_epochs: 50 # number of epochs to train for
eval_frequency: 10 # how often to get validation error (in epochs)
save_frequency: 10 # how often to save output (in epochs)
vis_frequency: 10000 # how often to visualize filters (in epochs)
log_frequency: 1 # how often to log output (in steps)
# train / val split params
train_pct: 0.8 # percentage of the data to use for training vs validation
total_pct: 1.0 # percentage of all the files to use
eval_total_train_error: 0 # whether or not to evaluate the total training error on each validataion
max_files_eval: 1000 # the number of validation files to use in each eval
# optimization params
loss: sparse
optimizer: momentum
train_l2_regularizer: 0.0005
base_lr: 0.01
decay_step_multiplier: 0.5 # number of times to go through training datapoints before stepping down decay rate (in epochs)
decay_rate: 0.95
momentum_rate: 0.9
max_training_examples_per_load: 128
drop_rate: 0.0
max_global_grad_norm: 100000000000
# input params
training_mode: classification
image_field_name: tf_depth_ims
pose_field_name: grasps
# label params
target_metric_name: grasp_metrics # name of the field to use for the labels
metric_thresh: 0.5 # threshold for positive examples (label = 1 if grasp_metric > metric_thresh)
# preproc params
num_random_files: 10000 # the number of random files to compute dataset statistics in preprocessing (lower speeds initialization)
preproc_log_frequency: 100 # how often to log preprocessing (in steps)
# denoising / synthetic data params
multiplicative_denoising: 0
gamma_shape: 1000.00
symmetrize: 0
gaussian_process_denoising: 0
gaussian_process_rate: 0.5
gaussian_process_scaling_factor: 4.0
gaussian_process_sigma: 0.005
# tensorboard
tensorboard_port: 6006
# debugging params
debug: &debug 0
debug_num_files: 10 # speeds up initialization
seed: &seed 24098
### GQCNN CONFIG ###
gqcnn:
# basic data metrics
im_height: 96
im_width: 96
im_channels: 1
debug: *debug
seed: *seed
# needs to match input data mode that was used for training, determines the pose dimensions for the network
gripper_mode: parallel_jaw
# method by which to integrate depth into the network
input_depth_mode: im_depth_sub
# used for training with multiple angular predictions
angular_bins: 16
# prediction batch size, in training this will be overriden by the val_batch_size in the optimizer's config file
batch_size: *val_batch_size
# architecture
architecture:
im_stream:
conv1_1:
type: conv
filt_dim: 9
num_filt: 16
pool_size: 1
pool_stride: 1
pad: VALID
norm: 0
norm_type: local_response
conv1_2:
type: conv
filt_dim: 5
num_filt: 16
pool_size: 2
pool_stride: 2
pad: VALID
norm: 0
norm_type: local_response
conv2_1:
type: conv
filt_dim: 5
num_filt: 16
pool_size: 1
pool_stride: 1
pad: VALID
norm: 0
norm_type: local_response
conv2_2:
type: conv
filt_dim: 5
num_filt: 16
pool_size: 2
pool_stride: 2
pad: VALID
norm: 0
norm_type: local_response
fc3:
type: fc
out_size: 128
fc4:
type: fc
out_size: 128
fc5:
type: fc
out_size: 32
# architecture normalization constants
radius: 2
alpha: 2.0e-05
beta: 0.75
bias: 1.0
# leaky relu coefficient
relu_coeff: 0.0
================================================
FILE: cfg/train_dex-net_4.0_fc_suction.yaml
================================================
# general optimization params
train_batch_size: 64
val_batch_size: &val_batch_size 64
# logging params
num_epochs: 50 # number of epochs to train for
eval_frequency: 10 # how often to get validation error (in epochs)
save_frequency: 10 # how often to save output (in epochs)
vis_frequency: 10000 # how often to visualize filters (in epochs)
log_frequency: 1 # how often to log output (in steps)
# train / val split params
train_pct: 0.8 # percentage of the data to use for training vs validation
total_pct: 1.0 # percentage of all the files to use
eval_total_train_error: 0 # whether or not to evaluate the total training error on each validataion
max_files_eval: 1000 # the number of validation files to use in each eval
# optimization params
loss: sparse
optimizer: momentum
train_l2_regularizer: 0.0005
base_lr: 0.01
decay_step_multiplier: 0.5 # number of times to go through training datapoints before stepping down decay rate (in epochs)
decay_rate: 0.95
momentum_rate: 0.9
max_training_examples_per_load: 128
drop_rate: 0.0
max_global_grad_norm: 100000000000
# input params
training_mode: classification
image_field_name: tf_depth_ims
pose_field_name: grasps
# label params
target_metric_name: grasp_metrics # name of the field to use for the labels
metric_thresh: 0.5 # threshold for positive examples (label = 1 if grasp_metric > metric_thresh)
# preproc params
num_random_files: 10000 # the number of random files to compute dataset statistics in preprocessing (lower speeds initialization)
preproc_log_frequency: 100 # how often to log preprocessing (in steps)
# denoising / synthetic data params
multiplicative_denoising: 0
gamma_shape: 1000.00
symmetrize: 1
gaussian_process_denoising: 0
gaussian_process_rate: 0.5
gaussian_process_scaling_factor: 4.0
gaussian_process_sigma: 0.005
# tensorboard
tensorboard_port: 6006
# debugging params
debug: &debug 0
debug_num_files: 10 # speeds up initialization
seed: &seed 24098
### GQCNN CONFIG ###
gqcnn:
# basic data metrics
im_height: 96
im_width: 96
im_channels: 1
debug: *debug
seed: *seed
# needs to match input data mode that was used for training, determines the pose dimensions for the network
gripper_mode: suction
# method by which to integrate depth into the network
input_depth_mode: im_only
# used for training with multiple angular predictions
angular_bins: 0
# prediction batch size, in training this will be overriden by the val_batch_size in the optimizer's config file
batch_size: *val_batch_size
# architecture
architecture:
im_stream:
conv1_1:
type: conv
filt_dim: 9
num_filt: 16
pool_size: 1
pool_stride: 1
pad: VALID
norm: 0
norm_type: local_response
conv1_2:
type: conv
filt_dim: 5
num_filt: 16
pool_size: 2
pool_stride: 2
pad: VALID
norm: 0
norm_type: local_response
conv2_1:
type: conv
filt_dim: 5
num_filt: 16
pool_size: 1
pool_stride: 1
pad: VALID
norm: 0
norm_type: local_response
conv2_2:
type: conv
filt_dim: 5
num_filt: 16
pool_size: 2
pool_stride: 2
pad: VALID
norm: 0
norm_type: local_response
fc3:
type: fc
out_size: 128
fc4:
type: fc
out_size: 128
fc5:
type: fc
out_size: 2
# architecture normalization constants
radius: 2
alpha: 2.0e-05
beta: 0.75
bias: 1.0
# leaky relu coefficient
relu_coeff: 0.0
================================================
FILE: cfg/train_dex-net_4.0_pj.yaml
================================================
# general optimization params
train_batch_size: 64
val_batch_size: &val_batch_size 64
# logging params
num_epochs: 50 # number of epochs to train for
eval_frequency: 10 # how often to get validation error (in epochs)
save_frequency: 10 # how often to save output (in epochs)
vis_frequency: 10000 # how often to visualize filters (in epochs)
log_frequency: 1 # how often to log output (in steps)
# train / val split params
train_pct: 0.8 # percentage of the data to use for training vs validation
total_pct: 1.0 # percentage of all the files to use
eval_total_train_error: 0 # whether or not to evaluate the total training error on each validataion
max_files_eval: 1000 # the number of validation files to use in each eval
# optimization params
loss: sparse
optimizer: momentum
train_l2_regularizer: 0.0005
base_lr: 0.01
decay_step_multiplier: 0.5 # number of times to go through training datapoints before stepping down decay rate (in epochs)
decay_rate: 0.95
momentum_rate: 0.9
max_training_examples_per_load: 128
drop_rate: 0.0
max_global_grad_norm: 100000000000
# input params
training_mode: classification
image_field_name: tf_depth_ims
pose_field_name: grasps
# label params
target_metric_name: grasp_metrics # name of the field to use for the labels
metric_thresh: 0.5 # threshold for positive examples (label = 1 if grasp_metric > metric_thresh)
# preproc params
num_random_files: 10000 # the number of random files to compute dataset statistics in preprocessing (lower speeds initialization)
preproc_log_frequency: 100 # how often to log preprocessing (in steps)
# denoising / synthetic data params
multiplicative_denoising: 0
gamma_shape: 1000.00
symmetrize: 1
gaussian_process_denoising: 0
gaussian_process_rate: 0.5
gaussian_process_scaling_factor: 4.0
gaussian_process_sigma: 0.005
# tensorboard
tensorboard_port: 6006
# debugging params
debug: &debug 0
debug_num_files: 10 # speeds up initialization
seed: &seed 24098
### GQCNN CONFIG ###
gqcnn:
# basic data metrics
im_height: 96
im_width: 96
im_channels: 1
debug: *debug
seed: *seed
# needs to match input data mode that was used for training, determines the pose dimensions for the network
gripper_mode: parallel_jaw
# method by which to integrate depth into the network
input_depth_mode: pose_stream
# used for training with multiple angular predictions
angular_bins: 0
# prediction batch size, in training this will be overriden by the val_batch_size in the optimizer's config file
batch_size: *val_batch_size
# architecture
architecture:
im_stream:
conv1_1:
type: conv
filt_dim: 9
num_filt: 16
pool_size: 1
pool_stride: 1
pad: VALID
norm: 0
norm_type: local_response
conv1_2:
type: conv
filt_dim: 5
num_filt: 16
pool_size: 2
pool_stride: 2
pad: VALID
norm: 0
norm_type: local_response
conv2_1:
type: conv
filt_dim: 5
num_filt: 16
pool_size: 1
pool_stride: 1
pad: VALID
norm: 0
norm_type: local_response
conv2_2:
type: conv
filt_dim: 5
num_filt: 16
pool_size: 2
pool_stride: 2
pad: VALID
norm: 0
norm_type: local_response
fc3:
type: fc
out_size: 128
pose_stream:
pc1:
type: pc
out_size: 16
pc2:
type: pc
out_size: 0
merge_stream:
fc4:
type: fc_merge
out_size: 128
fc5:
type: fc
out_size: 2
# architecture normalization constants
radius: 2
alpha: 2.0e-05
beta: 0.75
bias: 1.0
# leaky relu coefficient
relu_coeff: 0.0
================================================
FILE: cfg/train_dex-net_4.0_suction.yaml
================================================
# general optimization params
train_batch_size: 64
val_batch_size: &val_batch_size 64
# logging params
num_epochs: 50 # number of epochs to train for
eval_frequency: 10 # how often to get validation error (in epochs)
save_frequency: 10 # how often to save output (in epochs)
vis_frequency: 10000 # how often to visualize filters (in epochs)
log_frequency: 1 # how often to log output (in steps)
# train / val split params
train_pct: 0.8 # percentage of the data to use for training vs validation
total_pct: 1.0 # percentage of all the files to use
eval_total_train_error: 0 # whether or not to evaluate the total training error on each validataion
max_files_eval: 1000 # the number of validation files to use in each eval
# optimization params
loss: sparse
optimizer: momentum
train_l2_regularizer: 0.0005
base_lr: 0.01
decay_step_multiplier: 0.5 # number of times to go through training datapoints before stepping down decay rate (in epochs)
decay_rate: 0.95
momentum_rate: 0.9
max_training_examples_per_load: 128
drop_rate: 0.0
max_global_grad_norm: 100000000000
# input params
training_mode: classification
image_field_name: tf_depth_ims
pose_field_name: grasps
# label params
target_metric_name: grasp_metrics # name of the field to use for the labels
metric_thresh: 0.5 # threshold for positive examples (label = 1 if grasp_metric > metric_thresh)
# preproc params
num_random_files: 10000 # the number of random files to compute dataset statistics in preprocessing (lower speeds initialization)
preproc_log_frequency: 100 # how often to log preprocessing (in steps)
# denoising / synthetic data params
multiplicative_denoising: 0
gamma_shape: 1000.00
symmetrize: 1
gaussian_process_denoising: 0
gaussian_process_rate: 0.5
gaussian_process_scaling_factor: 4.0
gaussian_process_sigma: 0.005
# tensorboard
tensorboard_port: 6006
# debugging params
debug: &debug 0
debug_num_files: 10 # speeds up initialization
seed: &seed 24098
### GQCNN CONFIG ###
gqcnn:
# basic data metrics
im_height: 96
im_width: 96
im_channels: 1
debug: *debug
seed: *seed
# needs to match input data mode that was used for training, determines the pose dimensions for the network
gripper_mode: suction
# method by which to integrate depth into the network
input_depth_mode: pose_stream
# used for training with multiple angular predictions
angular_bins: 0
# prediction batch size, in training this will be overriden by the val_batch_size in the optimizer's config file
batch_size: *val_batch_size
# architecture
architecture:
im_stream:
conv1_1:
type: conv
filt_dim: 9
num_filt: 8
pool_size: 1
pool_stride: 1
pad: VALID
norm: 0
norm_type: local_response
conv1_2:
type: conv
filt_dim: 3
num_filt: 8
pool_size: 2
pool_stride: 2
pad: VALID
norm: 0
norm_type: local_response
conv2_1:
type: conv
filt_dim: 3
num_filt: 8
pool_size: 1
pool_stride: 1
pad: VALID
norm: 0
norm_type: local_response
conv2_2:
type: conv
filt_dim: 3
num_filt: 8
pool_size: 2
pool_stride: 2
pad: VALID
norm: 0
norm_type: local_response
fc3:
type: fc
out_size: 64
pose_stream:
pc1:
type: pc
out_size: 16
pc2:
type: pc
out_size: 0
merge_stream:
fc4:
type: fc_merge
out_size: 64
fc5:
type: fc
out_size: 2
# architecture normalization constants
radius: 2
alpha: 2.0e-05
beta: 0.75
bias: 1.0
# leaky relu coefficient
relu_coeff: 0.0
================================================
FILE: cfg/train_example_pj.yaml
================================================
# general optimization params
train_batch_size: 64
val_batch_size: &val_batch_size 64
# logging params
num_epochs: 100 # number of epochs to train for
eval_frequency: 10 # how often to get validation error (in epochs)
save_frequency: 10 # how often to save output (in epochs)
vis_frequency: 10000 # how often to visualize filters (in epochs)
log_frequency: 1 # how often to log output (in steps)
# train / val split params
train_pct: 0.9 # percentage of the data to use for training vs validation
total_pct: 1.0 # percentage of all the files to use
eval_total_train_error: 0 # whether or not to evaluate the total training error on each validataion
max_files_eval: 1000 # the number of validation files to use in each eval
# optimization params
loss: sparse
optimizer: momentum
train_l2_regularizer: 0.0005
base_lr: 0.01
decay_step_multiplier: 1.0 # number of times to go through training datapoints before stepping down decay rate (in epochs)
decay_rate: 0.98
momentum_rate: 0.9
max_training_examples_per_load: 128
drop_rate: 0.0
max_global_grad_norm: 100000000000
# input params
training_mode: classification
image_field_name: tf_depth_ims
pose_field_name: grasps
# label params
target_metric_name: grasp_metrics # name of the field to use for the labels
metric_thresh: 0.5 # threshold for positive examples (label = 1 if grasp_metric > metric_thresh)
# preproc params
num_random_files: 1000 # the number of random files to compute dataset statistics in preprocessing (lower speeds initialization)
preproc_log_frequency: 100 # how often to log preprocessing (in steps)
# denoising / synthetic data params
multiplicative_denoising: 0
gamma_shape: 1000.00
symmetrize: 1
gaussian_process_denoising: 0
gaussian_process_rate: 0.5
gaussian_process_scaling_factor: 4.0
gaussian_process_sigma: 0.005
# tensorboard
tensorboard_port: 6006
# debugging params
debug: &debug 0
debug_num_files: 10 # speeds up initialization
seed: &seed 24098
### GQCNN CONFIG ###
gqcnn:
# basic data metrics
im_height: 96
im_width: 96
im_channels: 1
debug: *debug
seed: *seed
# needs to match input data mode that was used for training, determines the pose dimensions for the network
gripper_mode: parallel_jaw
# method by which to integrate depth into the network
input_depth_mode: pose_stream
# used for training with multiple angular predictions
angular_bins: 0
# prediction batch size, in training this will be overriden by the val_batch_size in the optimizer's config file
batch_size: *val_batch_size
# architecture
architecture:
im_stream:
conv1_1:
type: conv
filt_dim: 9
num_filt: 8
pool_size: 2
pool_stride: 2
pad: VALID
norm: 0
norm_type: local_response
conv1_2:
type: conv
filt_dim: 3
num_filt: 8
pool_size: 2
pool_stride: 2
pad: VALID
norm: 0
norm_type: local_response
conv2_1:
type: conv
filt_dim: 3
num_filt: 8
pool_size: 1
pool_stride: 1
pad: VALID
norm: 0
norm_type: local_response
conv2_2:
type: conv
filt_dim: 3
num_filt: 8
pool_size: 2
pool_stride: 2
pad: VALID
norm: 0
norm_type: local_response
fc3:
type: fc
out_size: 64
pose_stream:
pc1:
type: pc
out_size: 4
pc2:
type: pc
out_size: 0
merge_stream:
fc4:
type: fc_merge
out_size: 64
fc5:
type: fc
out_size: 2
# architecture normalization constants
radius: 2
alpha: 2.0e-05
beta: 0.75
bias: 1.0
# leaky relu coefficient
relu_coeff: 0.0
================================================
FILE: cfg/train_example_suction.yaml
================================================
# general optimization params
train_batch_size: 64
val_batch_size: &val_batch_size 64
# logging params
num_epochs: 50 # number of epochs to train for
eval_frequency: 10 # how often to get validation error (in epochs)
save_frequency: 10 # how often to save output (in epochs)
vis_frequency: 10000 # how often to visualize filters (in epochs)
log_frequency: 1 # how often to log output (in steps)
# train / val split params
train_pct: 0.9 # percentage of the data to use for training vs validation
total_pct: 1.0 # percentage of all the files to use
eval_total_train_error: 0 # whether or not to evaluate the total training error on each validataion
max_files_eval: 1000 # the number of validation files to use in each eval
# optimization params
loss: sparse
optimizer: momentum
train_l2_regularizer: 0.0005
base_lr: 0.01
decay_step_multiplier: 1.0 # number of times to go through training datapoints before stepping down decay rate (in epochs)
decay_rate: 0.95
momentum_rate: 0.9
max_training_examples_per_load: 128
drop_rate: 0.0
max_global_grad_norm: 100000000000
# input params
training_mode: classification
image_field_name: tf_depth_ims
pose_field_name: grasps
# label params
target_metric_name: grasp_metrics # name of the field to use for the labels
metric_thresh: 0.5 # threshold for positive examples (label = 1 if grasp_metric > metric_thresh)
# preproc params
num_random_files: 1000 # the number of random files to compute dataset statistics in preprocessing (lower speeds initialization)
preproc_log_frequency: 100 # how often to log preprocessing (in steps)
# denoising / synthetic data params
multiplicative_denoising: 0
gamma_shape: 1000.00
symmetrize: 1
gaussian_process_denoising: 0
gaussian_process_rate: 0.5
gaussian_process_scaling_factor: 4.0
gaussian_process_sigma: 0.005
# tensorboard
tensorboard_port: 6006
# debugging params
debug: &debug 0
debug_num_files: 10 # speeds up initialization
seed: &seed 24098
### GQCNN CONFIG ###
gqcnn:
# basic data metrics
im_height: 96
im_width: 96
im_channels: 1
debug: *debug
seed: *seed
# needs to match input data mode that was used for training, determines the pose dimensions for the network
gripper_mode: suction
# method by which to integrate depth into the network
input_depth_mode: pose_stream
# used for training with multiple angular predictions
angular_bins: 0
# prediction batch size, in training this will be overriden by the val_batch_size in the optimizer's config file
batch_size: *val_batch_size
# architecture
architecture:
im_stream:
conv1_1:
type: conv
filt_dim: 9
num_filt: 8
pool_size: 1
pool_stride: 1
pad: VALID
norm: 0
norm_type: local_response
conv1_2:
type: conv
filt_dim: 3
num_filt: 8
pool_size: 2
pool_stride: 2
pad: VALID
norm: 0
norm_type: local_response
conv2_1:
type: conv
filt_dim: 3
num_filt: 8
pool_size: 1
pool_stride: 1
pad: VALID
norm: 0
norm_type: local_response
conv2_2:
type: conv
filt_dim: 3
num_filt: 8
pool_size: 2
pool_stride: 2
pad: VALID
norm: 0
norm_type: local_response
fc3:
type: fc
out_size: 64
pose_stream:
pc1:
type: pc
out_size: 16
pc2:
type: pc
out_size: 0
merge_stream:
fc4:
type: fc_merge
out_size: 64
fc5:
type: fc
out_size: 2
# architecture normalization constants
radius: 2
alpha: 2.0e-05
beta: 0.75
bias: 1.0
# leaky relu coefficient
relu_coeff: 0.0
================================================
FILE: cfg/train_fc.yaml
================================================
# general optimization params
train_batch_size: 64
val_batch_size: &val_batch_size 64
# logging params
num_epochs: 50 # number of epochs to train for
eval_frequency: 10 # how often to get validation error (in epochs)
save_frequency: 10 # how often to save output (in epochs)
vis_frequency: 10000 # how often to visualize filters (in epochs)
log_frequency: 1 # how often to log output (in steps)
# train / val split params
train_pct: 0.8 # percentage of the data to use for training vs validation
total_pct: 1.0 # percentage of all the files to use
eval_total_train_error: 0 # whether or not to evaluate the total training error on each validataion
max_files_eval: 1000 # the number of validation files to use in each eval
# optimization params
loss: sparse
optimizer: momentum
train_l2_regularizer: 0.0005
base_lr: 0.01
decay_step_multiplier: 0.5 # number of times to go through training datapoints before stepping down decay rate (in epochs)
decay_rate: 0.95
momentum_rate: 0.9
max_training_examples_per_load: 128
drop_rate: 0.0
max_global_grad_norm: 100000000000
# input params
training_mode: classification
image_field_name: tf_depth_ims
pose_field_name: grasps
# label params
target_metric_name: grasp_metrics # name of the field to use for the labels
metric_thresh: 0.5 # threshold for positive examples (label = 1 if grasp_metric > metric_thresh)
# preproc params
num_random_files: 10000 # the number of random files to compute dataset statistics in preprocessing (lower speeds initialization)
preproc_log_frequency: 100 # how often to log preprocessing (in steps)
# denoising / synthetic data params
multiplicative_denoising: 0
gamma_shape: 1000.00
symmetrize: 0
gaussian_process_denoising: 0
gaussian_process_rate: 0.5
gaussian_process_scaling_factor: 4.0
gaussian_process_sigma: 0.005
# tensorboard
tensorboard_port: 6006
# debugging params
debug: &debug 0
debug_num_files: 10 # speeds up initialization
seed: &seed 24098
### GQCNN CONFIG ###
gqcnn:
# basic data metrics
im_height: 96
im_width: 96
im_channels: 1
debug: *debug
seed: *seed
# needs to match input data mode that was used for training, determines the pose dimensions for the network
gripper_mode: parallel_jaw
# method by which to integrate depth into the network
input_depth_mode: im_depth_sub
# used for training with multiple angular predictions
angular_bins: 16
# prediction batch size, in training this will be overriden by the val_batch_size in the optimizer's config file
batch_size: *val_batch_size
# architecture
architecture:
im_stream:
conv1_1:
type: conv
filt_dim: 9
num_filt: 16
pool_size: 1
pool_stride: 1
pad: VALID
norm: 0
norm_type: local_response
conv1_2:
type: conv
filt_dim: 5
num_filt: 16
pool_size: 2
pool_stride: 2
pad: VALID
norm: 0
norm_type: local_response
conv2_1:
type: conv
filt_dim: 5
num_filt: 16
pool_size: 1
pool_stride: 1
pad: VALID
norm: 0
norm_type: local_response
conv2_2:
type: conv
filt_dim: 5
num_filt: 16
pool_size: 2
pool_stride: 2
pad: VALID
norm: 0
norm_type: local_response
fc3:
type: fc
out_size: 128
fc4:
type: fc
out_size: 128
fc5:
type: fc
out_size: 32
# architecture normalization constants
radius: 2
alpha: 2.0e-05
beta: 0.75
bias: 1.0
# leaky relu coefficient
relu_coeff: 0.0
================================================
FILE: ci/travis/format.sh
================================================
#!/bin/bash
# Copyright ©2017. The Regents of the University of California (Regents).
# All Rights Reserved. Permission to use, copy, modify, and distribute this
# software and its documentation for educational, research, and not-for-profit
# purposes, without fee and without a signed licensing agreement, is hereby
# granted, provided that the above copyright notice, this paragraph and the
# following two paragraphs appear in all copies, modifications, and
# distributions. Contact The Office of Technology Licensing, UC Berkeley, 2150
# Shattuck Avenue, Suite 510, Berkeley, CA 94720-1620, (510) 643-7201,
# otl@berkeley.edu,
# http://ipira.berkeley.edu/industry-info for commercial licensing opportunities.
# IN NO EVENT SHALL REGENTS BE LIABLE TO ANY PARTY FOR DIRECT, INDIRECT, SPECIAL,
# INCIDENTAL, OR CONSEQUENTIAL DAMAGES, INCLUDING LOST PROFITS, ARISING OUT OF
# THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN IF REGENTS HAS BEEN
# ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
# REGENTS SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
# THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE. THE SOFTWARE AND ACCOMPANYING DOCUMENTATION, IF ANY, PROVIDED
# HEREUNDER IS PROVIDED "AS IS". REGENTS HAS NO OBLIGATION TO PROVIDE
# MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
# Script for YAPF formatting. Adapted from https://github.com/ray-project/ray/blob/master/ci/travis/format.sh.
YAPF_FLAGS=(
'--style' ".style.yapf"
'--recursive'
'--parallel'
)
YAPF_EXCLUDES=()
# Format specified files
format() {
yapf --in-place "${YAPF_FLAGS[@]}" -- "$@"
}
# Format all files, and print the diff to `stdout` for Travis.
format_all() {
yapf --diff "${YAPF_FLAGS[@]}" "${YAPF_EXCLUDES[@]}" .
}
# This flag formats individual files. `--files` *must* be the first command line
# arg to use this option.
if [[ "$1" == '--files' ]]; then
format "${@:2}"
# If `--all` is passed, then any further arguments are ignored and the
# entire Python directory is formatted.
elif [[ "$1" == '--all' ]]; then
format_all
fi
================================================
FILE: data/calib/phoxi/phoxi.intr
================================================
{"_cy": 191.75, "_cx": 255.5, "_fy": 552.5, "_height": 386, "_fx": 552.5, "_width": 516, "_skew": 0.0, "_K": 0, "_frame": "phoxi"}
================================================
FILE: data/calib/phoxi/phoxi_to_world.tf
================================================
phoxi
world
0.385146 -0.121589 0.808145
0.005659 -0.999983 0.001249
-0.989208 -0.005415 0.146420
-0.146410 -0.002064 -0.989222
================================================
FILE: data/calib/primesense/primesense.intr
================================================
{"_cy": 239.5, "_cx": 319.5, "_fy": 525.0, "_height": 480, "_fx": 525.0, "_width": 640, "_skew": 0.0, "_K": 0, "_frame": "primesense_overhead"}
================================================
FILE: data/calib/primesense/primesense.tf
================================================
primesense_overhead
world
0.214280 0.004186 0.872913
0.021188 -0.975273 0.219984
-0.999765 -0.021698 0.000098
0.004678 -0.219934 -0.975504
================================================
FILE: docker/cpu/Dockerfile
================================================
FROM ubuntu:xenial
MAINTAINER Vishal Satish
# Args.
# Must be an absolute path.
ARG work_dir=/root/Workspace
# Install `apt-get` deps.
RUN apt-get update && apt-get install -y \
build-essential \
python3 \
python3-dev \
python3-tk \
python-opengl \
curl \
libsm6 \
libxext6 \
libglib2.0-0 \
libxrender1 \
wget \
unzip
# Install libspatialindex (required for latest rtree).
RUN curl -L http://download.osgeo.org/libspatialindex/spatialindex-src-1.8.5.tar.gz | tar xz && \
cd spatialindex-src-1.8.5 && \
./configure && \
make && \
make install && \
ldconfig && \
cd ..
# Install pip (`apt-get install python-pip` causes trouble w/ networkx).
RUN curl -O https://bootstrap.pypa.io/get-pip.py && \
python3 get-pip.py && \
rm get-pip.py
# Required for easy_install to find right skimage version for Python 3.5.
RUN pip3 install -U setuptools
# Make working directory.
WORKDIR ${work_dir}
# Copy the library.
ADD docker/gqcnn.tar .
# This is because `python setup.py develop` skips `install_requires` (I think).
RUN python3 -m pip install -r gqcnn/requirements/cpu_requirements.txt
# Install the library in editable mode because it's more versatile (in case we want to develop or if users want to modify things)
# Keep the egg outside of the library in site-packages in case we want to mount the library (overwriting it) for development with docker
ENV PYTHONPATH ${work_dir}/gqcnn
WORKDIR /usr/local/lib/python3.5/site-packages/
RUN python3 ${work_dir}/gqcnn/setup.py develop --docker
# Move to the top-level gqcnn package dir.
WORKDIR ${work_dir}/gqcnn
================================================
FILE: docker/gpu/Dockerfile
================================================
FROM nvidia/cuda:10.0-cudnn7-devel-ubuntu16.04
MAINTAINER Vishal Satish
# Args
# `work_dir` must be an absolute path.
ARG work_dir=/root/Workspace
# Install `apt-get` deps.
RUN apt-get update && apt-get install -y \
build-essential \
python3 \
python3-dev \
python3-tk \
python-opengl \
curl \
libsm6 \
libxext6 \
libglib2.0-0 \
libxrender1 \
wget \
unzip
# Install libspatialindex (required for latest rtree).
RUN curl -L http://download.osgeo.org/libspatialindex/spatialindex-src-1.8.5.tar.gz | tar xz && \
cd spatialindex-src-1.8.5 && \
./configure && \
make && \
make install && \
ldconfig && \
cd ..
# Install pip (`apt-get install python-pip` causes trouble w/ networkx).
RUN curl -O https://bootstrap.pypa.io/get-pip.py && \
python3 get-pip.py && \
rm get-pip.py
# Required for easy_install to find right skimage version for Python 3.5.
RUN pip3 install -U setuptools
# Make working directory.
WORKDIR ${work_dir}
# Copy the library.
ADD docker/gqcnn.tar .
# This is because `python setup.py develop` skips install_requires (I think) and also because we want to explicitly use the GPU requirements.
RUN python3 -m pip install -r gqcnn/requirements/gpu_requirements.txt
# Install the library in editable mode because it's more versatile (in case we want to develop or if users want to modify things)
# Keep the egg outside of the library in site-packages in case we want to mount the library (overwriting it) for development with docker
ENV PYTHONPATH ${work_dir}/gqcnn
WORKDIR /usr/local/lib/python3.5/site-packages/
RUN python3 ${work_dir}/gqcnn/setup.py develop --docker
# Move to the base library dir
WORKDIR ${work_dir}/gqcnn
================================================
FILE: docs/Makefile
================================================
# Minimal makefile for Sphinx documentation
#
# You can set these variables from the command line.
SPHINXOPTS =
SPHINXBUILD = sphinx-build
SPHINXPROJ = GQCNN
SOURCEDIR = source
BUILDDIR = build
GH_PAGES_SOURCES = docs examples gqcnn tools
# Put it first so that "make" without argument is like "make help".
help:
@$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
.PHONY: help Makefile
.PHONY: clean
clean:
rm -rf $(BUILDDIR)/*
.PHONY: gh-pages
gh-pages:
git checkout gh-pages && \
cd .. && \
git rm -rf . && git clean -fxd && \
git checkout master $(GH_PAGES_SOURCES) && \
git reset HEAD && \
cd docs && \
make html && \
cd .. && \
mv -fv docs/build/html/* ./ && \
touch .nojekyll && \
rm -rf $(GH_PAGES_SOURCES) && \
git add -A && \
git commit -m "Generated gh-pages for `git log master -1 --pretty=short --abbrev-commit`" && \
git push origin --delete gh-pages && \
git push origin gh-pages ; \
git checkout master
# Catch-all target: route all unknown targets to Sphinx using the new
# "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS).
%: Makefile
@$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
================================================
FILE: docs/gh_deploy.sh
================================================
#!/bin/sh
make gh-pages
cd ../docs
================================================
FILE: docs/make.bat
================================================
@ECHO OFF
pushd %~dp0
REM Command file for Sphinx documentation
if "%SPHINXBUILD%" == "" (
set SPHINXBUILD=sphinx-build
)
set SOURCEDIR=source
set BUILDDIR=build
set SPHINXPROJ=GQCNN
if "%1" == "" goto help
%SPHINXBUILD% >NUL 2>NUL
if errorlevel 9009 (
echo.
echo.The 'sphinx-build' command was not found. Make sure you have Sphinx
echo.installed, then set the SPHINXBUILD environment variable to point
echo.to the full path of the 'sphinx-build' executable. Alternatively you
echo.may add the Sphinx directory to PATH.
echo.
echo.If you don't have Sphinx installed, grab it from
echo.http://sphinx-doc.org/
exit /b 1
)
%SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS%
goto end
:help
%SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS%
:end
popd
================================================
FILE: docs/source/api/analysis.rst
================================================
Analysis
========
GQCNNAnalyzer
~~~~~~~~~~~~~
A tool for analyzing trained GQ-CNNs. Calculates statistics such as training/valiation errors and losses. Also plots Precision-Recall Curve and ROC, and saves sample TP/TN/FP/FN training/validation examples.
.. autoclass:: gqcnn.GQCNNAnalyzer
================================================
FILE: docs/source/api/gqcnn.rst
================================================
GQ-CNN
======
GQ-CNN and FC-GQ-CNN classes are **never accessed directly**, but through a lightweight factory function that returns the corresponding class depending on the specified backend. ::
$ from gqcnn import get_gqcnn_model
$
$ backend = 'tf'
$ my_gqcnn = get_gqcnn_model(backend)()
.. autofunction:: gqcnn.get_gqcnn_model
.. autofunction:: gqcnn.get_fc_gqcnn_model
GQCNNTF
~~~~~~~
Tensorflow implementation of GQ-CNN model.
.. autoclass:: gqcnn.model.tf.GQCNNTF
:exclude-members: init_mean_and_std,
set_base_network,
init_weights_file,
initialize_network,
set_batch_size,
set_im_mean,
get_im_mean,
set_im_std,
get_im_std,
set_pose_mean,
get_pose_mean,
set_pose_std,
get_pose_std,
set_im_depth_sub_mean,
set_im_depth_sub_std,
add_softmax_to_output,
add_sigmoid_to_output,
update_batch_size,
FCGQCNNTF
~~~~~~~~~
Tensorflow implementation of FC-GQ-CNN model.
.. autoclass:: gqcnn.model.tf.FCGQCNNTF
:exclude-members: __init__
================================================
FILE: docs/source/api/policies.rst
================================================
Policies
========
All GQ-CNN grasping policies are child classes of the base :ref:`GraspingPolicy` class that implements `__call__()`, which operates on :ref:`RgbdImageStates ` and returns a :ref:`GraspAction`. ::
$ from gqcnn import RgbdImageState, CrossEntropyRobustGraspingPolicy
$
$ im = RgbdImageState.load()
$ my_policy = CrossEntropyRobustGraspingPolicy()
$
$ my_grasp_action = my_policy(im)
Primary Policies
~~~~~~~~~~~~~~~~
CrossEntropyRobustGraspingPolicy
--------------------------------
An implementation of the `Cross Entropy Method (CEM)`_ used in `Dex-Net 2.0`_, `Dex-Net 2.1`_, `Dex-Net 3.0`_, and `Dex-Net 4.0`_ to iteratively locate the best grasp.
.. autoclass:: gqcnn.CrossEntropyRobustGraspingPolicy
FullyConvolutionalGraspingPolicyParallelJaw
-------------------------------------------
An implementation of the `FC-GQ-CNN`_ parallel jaw policy that uses dense, parallelized fully convolutional networks.
.. autoclass:: gqcnn.FullyConvolutionalGraspingPolicyParallelJaw
FullyConvolutionalGraspingPolicySuction
---------------------------------------
An implementation of the `FC-GQ-CNN`_ suction policy that uses dense, parallelized fully convolutional networks.
.. autoclass:: gqcnn.FullyConvolutionalGraspingPolicySuction
Grasps and Image Wrappers
~~~~~~~~~~~~~~~~~~~~~~~~~
.. _RgbdImageState:
RgbdImageState
--------------
A wrapper for states containing an RGBD (RGB + Depth) image, camera intrinisics, and segmentation masks.
.. autoclass:: gqcnn.RgbdImageState
.. _GraspAction:
GraspAction
-----------
A wrapper for 2D grasp actions such as :ref:`Grasp2D` or :ref:`SuctionPoint2D`.
.. autoclass:: gqcnn.grasping.policy.policy.GraspAction
.. _Grasp2D:
Grasp2D
-------
A wrapper for 2D parallel jaw grasps.
.. autoclass:: gqcnn.grasping.grasp.Grasp2D
.. _SuctionPoint2D:
SuctionPoint2D
--------------
A wrapper for 2D suction grasps.
.. autoclass:: gqcnn.grasping.grasp.SuctionPoint2D
Miscellaneous and Parent Policies
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Policy
------
.. autoclass:: gqcnn.grasping.policy.policy.Policy
.. _GraspingPolicy:
GraspingPolicy
--------------
.. autoclass:: gqcnn.grasping.policy.policy.GraspingPolicy
FullyConvolutionalGraspingPolicy
--------------------------------
.. autoclass:: gqcnn.grasping.policy.fc_policy.FullyConvolutionalGraspingPolicy
RobustGraspingPolicy
--------------------
.. autoclass:: gqcnn.RobustGraspingPolicy
UniformRandomGraspingPolicy
---------------------------
.. autoclass:: gqcnn.UniformRandomGraspingPolicy
.. _Cross Entropy Method (CEM): https://en.wikipedia.org/wiki/Cross-entropy_method
.. _Dex-Net 2.0: https://berkeleyautomation.github.io/dex-net/#dexnet_2
.. _Dex-Net 2.1: https://berkeleyautomation.github.io/dex-net/#dexnet_21
.. _Dex-Net 3.0: https://berkeleyautomation.github.io/dex-net/#dexnet_3
.. _Dex-Net 4.0: https://berkeleyautomation.github.io/dex-net/#dexnet_4
.. _FC-GQ-CNN: https://berkeleyautomation.github.io/fcgqcnn
================================================
FILE: docs/source/api/training.rst
================================================
Training
========
GQ-CNN training classes are **never accessed directly**, but through a lightweight factory function that returns the corresponding class depending on the specified backend. ::
$ from gqcnn import get_gqcnn_trainer
$
$ backend = 'tf'
$ my_trainer = get_gqcnn_trainer(backend)()
.. autofunction:: gqcnn.get_gqcnn_trainer
GQCNNTrainerTF
~~~~~~~~~~~~~~
.. autoclass:: gqcnn.training.tf.GQCNNTrainerTF
================================================
FILE: docs/source/benchmarks/benchmarks.rst
================================================
Dex-Net 2.0
~~~~~~~~~~~
Below are the highest classification accuracies achieved on the `Dex-Net 2.0`_ dataset on a randomized 80-20 train-validation split using various splitting rules:
.. image:: ../images/gqcnn_leaderboard.png
:width: 100%
The current leader is a ConvNet submitted by nomagic.ai. `GQ` is our best GQ-CNN for `Dex-Net 2.0`_.
We believe grasping performance on the physical robot can be improved if these validation error rates can be further reduced by modifications to the network architecture and optimization.
If you achieve superior numbers on a randomized validation set, please email Jeff Mahler (jmahler@berkeley.edu) with the subject "Dex-Net 2.0 Benchmark Submission" and we will consider testing on our ABB YuMi.
.. _Dex-Net 2.0: https://berkeleyautomation.github.io/dex-net/#dexnet_2
================================================
FILE: docs/source/conf.py
================================================
# -*- coding: utf-8 -*-
#
# core documentation build configuration file, created by
# sphinx-quickstart on Sun Oct 16 14:33:48 2016.
#
# This file is execfile()d with the current directory set to its
# containing dir.
#
# Note that not all possible configuration values are present in this
# autogenerated file.
#
# All configuration values have a default; values that are commented out
# serve to show the default.
import sys
import os
import sphinx_rtd_theme
# If extensions (or modules to document with autodoc) are in another directory,
# add these directories to sys.path here. If the directory is relative to the
# documentation root, use os.path.abspath to make it absolute, like shown here.
sys.path.insert(
0, os.path.join(os.path.abspath(os.path.dirname(__name__)), "../../"))
sys.path.insert(
0,
os.path.join(os.path.abspath(os.path.dirname(__name__)), "../../examples"))
sys.path.insert(
0, os.path.join(os.path.abspath(os.path.dirname(__name__)), "../../tools"))
# -- General configuration ------------------------------------------------
# If your documentation needs a minimal Sphinx version, state it here.
#needs_sphinx = "1.0"
# Add any Sphinx extension module names here, as strings. They can be
# extensions coming with Sphinx (named "sphinx.ext.*") or your custom
# ones.
extensions = ["sphinx.ext.autodoc", "sphinxcontrib.napoleon"]
autoclass_content = "class"
autodoc_member_order = "bysource"
autodoc_default_flags = ["members", "show-inheritance"]
napoleon_include_special_with_doc = True
napoleon_include_init_with_doc = True
# Add any paths that contain templates here, relative to this directory.
templates_path = ["_templates"]
# The suffix(es) of source filenames.
# You can specify multiple suffix as a list of string:
# source_suffix = [".rst", ".md"]
source_suffix = ".rst"
# The encoding of source files.
#source_encoding = "utf-8-sig"
# The master toctree document.
master_doc = "index"
# General information about the project.
project = u"GQCNN"
copyright = u"2019, The Regents of the University of California"
author = u"Vishal Satish, Jeff Mahler"
# The version info for the project you"re documenting, acts as replacement for
# |version| and |release|, also used in various other places throughout the
# built documents.
#
# The short X.Y version.
version = u"1.1"
# The full version, including alpha/beta/rc tags.
release = u"1.1.0"
# The language for content autogenerated by Sphinx. Refer to documentation
# for a list of supported languages.
#
# This is also used if you do content translation via gettext catalogs.
# Usually you set "language" from the command line for these cases.
language = None
# There are two options for replacing |today|: either, you set today to some
# non-false value, then it is used:
#today = ""
# Else, today_fmt is used as the format for a strftime call.
#today_fmt = "%B %d, %Y"
# List of patterns, relative to source directory, that match files and
# directories to ignore when looking for source files.
exclude_patterns = []
# The reST default role (used for this markup: `text`) to use for all
# documents.
#default_role = None
# If true, "()" will be appended to :func: etc. cross-reference text.
#add_function_parentheses = True
# If true, the current module name will be prepended to all description
# unit titles (such as .. function::).
#add_module_names = True
# If true, sectionauthor and moduleauthor directives will be shown in the
# output. They are ignored by default.
#show_authors = False
# The name of the Pygments (syntax highlighting) style to use.
pygments_style = "sphinx"
# A list of ignored prefixes for module index sorting.
#modindex_common_prefix = []
# If true, keep warnings as "system message" paragraphs in the built documents.
#keep_warnings = False
# If true, `todo` and `todoList` produce output, else they produce nothing.
todo_include_todos = False
# -- Options for HTML output ----------------------------------------------
# The theme to use for HTML and HTML Help pages. See the documentation for
# a list of builtin themes.
html_theme = "sphinx_rtd_theme"
html_theme_path = [sphinx_rtd_theme.get_html_theme_path()]
# Theme options are theme-specific and customize the look and feel of a theme
# further. For a list of options available for each theme, see the
# documentation.
#html_theme_options = {}
# Add any paths that contain custom themes here, relative to this directory.
#html_theme_path = []
# The name for this set of Sphinx documents. If None, it defaults to
# " v documentation".
#html_title = None
# A shorter title for the navigation bar. Default is the same as html_title.
#html_short_title = None
# The name of an image file (relative to this directory) to place at the top
# of the sidebar.
#html_logo = None
# The name of an image file (relative to this directory) to use as a favicon of
# the docs. This file should be a Windows icon file (.ico) being 16x16 or 32x32
# pixels large.
#html_favicon = None
# Add any paths that contain custom static files (such as style sheets) here,
# relative to this directory. They are copied after the builtin static files,
# so a file named "default.css" will overwrite the builtin "default.css".
# html_static_path = ["_static"]
# Add any extra paths that contain custom files (such as robots.txt or
# .htaccess) here, relative to this directory. These files are copied
# directly to the root of the documentation.
#html_extra_path = []
# If not "", a "Last updated on:" timestamp is inserted at every page bottom,
# using the given strftime format.
#html_last_updated_fmt = "%b %d, %Y"
# If true, SmartyPants will be used to convert quotes and dashes to
# typographically correct entities.
#html_use_smartypants = True
# Custom sidebar templates, maps document names to template names.
#html_sidebars = {}
# Additional templates that should be rendered to pages, maps page names to
# template names.
#html_additional_pages = {}
# If false, no module index is generated.
#html_domain_indices = True
# If false, no index is generated.
#html_use_index = True
# If true, the index is split into individual pages for each letter.
#html_split_index = False
# If true, links to the reST sources are added to the pages.
#html_show_sourcelink = True
# If true, "Created using Sphinx" is shown in the HTML footer. Default is True.
#html_show_sphinx = True
# If true, "(C) Copyright ..." is shown in the HTML footer. Default is True.
#html_show_copyright = True
# If true, an OpenSearch description file will be output, and all pages will
# contain a tag referring to it. The value of this option must be the
# base URL from which the finished HTML is served.
#html_use_opensearch = ""
# This is the file name suffix for HTML files (e.g. ".xhtml").
#html_file_suffix = None
# Language to be used for generating the HTML full-text search index.
# Sphinx supports the following languages:
# "da", "de", "en", "es", "fi", "fr", "hu", "it", "ja"
# "nl", "no", "pt", "ro", "ru", "sv", "tr"
#html_search_language = "en"
# A dictionary with options for the search language support, empty by default.
# Now only "ja" uses this config value
#html_search_options = {"type": "default"}
# The name of a javascript file (relative to the configuration directory) that
# implements a search results scorer. If empty, the default will be used.
#html_search_scorer = "scorer.js"
# Output file base name for HTML help builder.
htmlhelp_basename = "GQCNNdoc"
# -- Options for LaTeX output ---------------------------------------------
latex_elements = {
# The paper size ("letterpaper" or "a4paper").
#"papersize": "letterpaper",
# The font size ("10pt", "11pt" or "12pt").
#"pointsize": "10pt",
# Additional stuff for the LaTeX preamble.
#"preamble": "",
# Latex figure (float) alignment
#"figure_align": "htbp",
}
# Grouping the document tree into LaTeX files. List of tuples
# (source start file, target name, title,
# author, documentclass [howto, manual, or own class]).
latex_documents = [
(master_doc, "GQCNN.tex", u"GQCNN Documentation",
u"Jeff Mahler, Vishal Satish", "manual"),
]
# The name of an image file (relative to this directory) to place at the top of
# the title page.
#latex_logo = None
# For "manual" documents, if this is true, then toplevel headings are parts,
# not chapters.
#latex_use_parts = False
# If true, show page references after internal links.
#latex_show_pagerefs = False
# If true, show URL addresses after external links.
#latex_show_urls = False
# Documents to append as an appendix to all manuals.
#latex_appendices = []
# If false, no module index is generated.
#latex_domain_indices = True
# -- Options for manual page output ---------------------------------------
# One entry per manual page. List of tuples
# (source start file, name, description, authors, manual section).
man_pages = [(master_doc, "GQCNN", u"GQCNN Documentation", [author], 1)]
# If true, show URL addresses after external links.
#man_show_urls = False
# -- Options for Texinfo output -------------------------------------------
# Grouping the document tree into Texinfo files. List of tuples
# (source start file, target name, title, author,
# dir menu entry, description, category)
texinfo_documents = [
(master_doc, "GQCNN", u"GQCNN Documentation", author, "GQCNN",
"One line description of project.", "Miscellaneous"),
]
# Documents to append as an appendix to all manuals.
#texinfo_appendices = []
# If false, no module index is generated.
#texinfo_domain_indices = True
# How to display URL addresses: "footnote", "no", or "inline".
#texinfo_show_urls = "footnote"
# If true, do not generate a @detailmenu in the "Top" node"s menu.
#texinfo_no_detailmenu = False
================================================
FILE: docs/source/index.rst
================================================
.. GQCNN documentation master file, created by
sphinx-quickstart on Thu May 4 16:09:26 2017.
You can adapt this file completely to your liking, but it should at least
contain the root `toctree` directive.
Berkeley AUTOLAB's GQCNN Package
================================
Overview
--------
The `gqcnn` package is a Python API for training and deploying `Grasp Quality Convolutional Neural Networks (GQ-CNNs)`_ for grasp planning using training datasets from the `Dexterity Network (Dex-Net)`_, developed by the `Berkeley AUTOLAB`_ and introduced in the `Dex-Net 2.0 paper`_.
.. note:: We're excited to announce **version 1.0**, which brings the GQ-CNN package up to date with recent research in `Dex-Net`_.
Version 1.0 introduces support for:
#. **Dex-Net 4.0:** Composite policies that decide whether to use a suction cup or parallel-jaw gripper.
#. **Fully Convolutional GQ-CNNs:** Fully convolutional architectures that efficiently evaluate millions of grasps faster than prior GQ-CNNs.
Links
-----
* `Source Code`_
* `Datasets`_
* `Pretrained Models`_
* `Dex-Net Website`_
* `UC Berkeley AUTOLAB`_
.. _Source Code: https://github.com/BerkeleyAutomation/gqcnn
.. _Datasets: http://bit.ly/2rIM7Jk
.. _Pretrained Models: http://bit.ly/2tAFMko
.. _Dex-Net Website: https://berkeleyautomation.github.io/dex-net
.. _UC Berkeley AUTOLAB: http://autolab.berkeley.edu
.. image:: images/gqcnn.png
:width: 100 %
Project Goals
-------------
The goals of this project are to facilitate:
#. **Replicability** of GQ-CNN training and deployment from:
#. The latest `Dex-Net 4.0`_ results.
#. Older results such as `Dex-Net 2.0`_, `Dex-Net 2.1`_ and `Dex-Net 3.0`_.
#. Experimental results such as `FC-GQ-CNN`_.
#. **Research extensions** on novel GQ-CNN architectures that have higher performance on `Dex-Net 4.0`_ training datasets.
.. _Dex-Net 2.0: https://berkeleyautomation.github.io/dex-net/#dexnet_2
.. _Dex-Net 2.1: https://berkeleyautomation.github.io/dex-net/#dexnet_21
.. _Dex-Net 3.0: https://berkeleyautomation.github.io/dex-net/#dexnet_3
.. _Dex-Net 4.0: https://berkeleyautomation.github.io/dex-net/#dexnet_4
.. _FC-GQ-CNN: https://berkeleyautomation.github.io/fcgqcnn
Our longer-term goal is to encourage development of robust GQ-CNNs that can be used to plan grasps on different hardware setups with different robots and cameras.
Disclaimer
----------
GQ-CNN models are sensitive to the following parameters used during dataset generation:
#. The robot gripper
#. The depth camera
#. The distance between the camera and workspace.
As a result, we cannot guarantee performance of our pre-trained models on other physical setups. If you have a specific use-case in mind, please reach out to us. It might be possible to generate a custom dataset for you particular setup. We are actively researching how to generate more robust datasets that can generalize across robots, cameras, and viewpoints.
Development
-----------
The package is currently under active development. Please raise all bugs, feature requests, and other issues under the `Github Issues`_.
For other questions or concerns, please contact Jeff Mahler (jmahler@berkeley.edu) or Vishal Satish (vsatish@berkeley.edu) with the subject line starting with "GQ-CNN Development".
Academic Use
------------
If you use the code, datasets, or models in a publication, please cite the appropriate paper:
#. **Dex-Net 2.0** `(bibtex) `__
#. **Dex-Net 2.1** `(bibtex) `__
#. **Dex-Net 3.0** `(bibtex) `__
#. **Dex-Net 4.0** `(bibtex) `__
#. **FC-GQ-CNN** `(bibtex) `__
.. _Grasp Quality Convolutional Neural Networks (GQ-CNNs): info/info.html
.. _Dexterity Network (Dex-Net): https://berkeleyautomation.github.io/dex-net
.. _Dex-Net: https://berkeleyautomation.github.io/dex-net
.. _Berkeley AUTOLAB: http://autolab.berkeley.edu/
.. _Dex-Net 2.0 paper: https://github.com/BerkeleyAutomation/dex-net/raw/gh-pages/docs/dexnet_rss2017_final.pdf
.. _Github Issues: https://github.com/BerkeleyAutomation/gqcnn/issues
.. toctree::
:maxdepth: 2
:caption: Background
info/info.rst
.. toctree::
:maxdepth: 2
:caption: Installation Guide
install/install.rst
.. toctree::
:maxdepth: 2
:caption: Getting Started
tutorials/tutorial.rst
.. toctree::
:maxdepth: 2
:caption: Replication
replication/replication.rst
.. toctree::
:maxdepth: 2
:caption: Benchmarks
benchmarks/benchmarks.rst
.. toctree::
:maxdepth: 2
:caption: API Documentation
:glob:
api/gqcnn.rst
api/training.rst
api/analysis.rst
api/policies.rst
.. toctree::
:maxdepth: 2
:caption: License
license/license.rst
Indices and tables
==================
* :ref:`genindex`
* :ref:`modindex`
* :ref:`search`
================================================
FILE: docs/source/info/info.rst
================================================
What are GQ-CNNs?
-----------------
GQ-CNNs are neural network architectures that take as input a depth image and grasp, and output the predicted probability that the grasp will successfully hold the object while lifting, transporting, and shaking the object.
.. figure:: ../images/gqcnn.png
:width: 100%
:align: center
Original GQ-CNN architecture from `Dex-Net 2.0`_.
.. figure:: ../images/fcgqcnn_arch_diagram.png
:width: 100%
:align: center
Alternate faster GQ-CNN architecture from `FC-GQ-CNN`_.
The GQ-CNN weights are trained on datasets of synthetic point clouds, parallel jaw grasps, and grasp metrics generated from physics-based models with domain randomization for sim-to-real transfer. See the ongoing `Dexterity Network (Dex-Net)`_ project for more information.
.. _Dexterity Network (Dex-Net): https://berkeleyautomation.github.io/dex-net
.. _Dex-Net 2.0: https://berkeleyautomation.github.io/dex-net/#dexnet_2
.. _FC-GQ-CNN: https://berkeleyautomation.github.io/fcgqcnn
================================================
FILE: docs/source/install/install.rst
================================================
Prerequisites
~~~~~~~~~~~~~
Python
""""""
The `gqcnn` package has only been tested with `Python 3.5`, `Python 3.6`, and `Python 3.7`.
Ubuntu
""""""
The `gqcnn` package has only been tested with `Ubuntu 12.04`, `Ubuntu 14.04` and `Ubuntu 16.04`.
Virtualenv
""""""""""
We highly recommend using a Python environment management system, in particular `Virtualenv`, with the Pip and ROS installations. **Note: Several users have encountered problems with dependencies when using Conda.**
Pip Installation
~~~~~~~~~~~~~~~~
The pip installation is intended for users who are **only interested in 1) Training GQ-CNNs or 2) Grasp planning on saved RGBD images**, not
interfacing with a physical robot.
If you have intentions of using GQ-CNNs for grasp planning on a physical robot, we suggest you `install as a ROS package`_.
.. _install as a ROS package: https://berkeleyautomation.github.io/gqcnn/install/install.html#ros-installation
1. Clone the repository
"""""""""""""""""""""""
Clone or download the `project`_ from Github. ::
$ git clone https://github.com/BerkeleyAutomation/gqcnn.git
.. _project: https://github.com/BerkeleyAutomation/gqcnn
2. Run pip installation
"""""""""""""""""""""""
Change directories into the `gqcnn` repository and run the pip installation. ::
$ pip install .
This will install `gqcnn` in your current virtual environment.
.. _ros-install:
ROS Installation
~~~~~~~~~~~~~~~~
Installation as a ROS package is intended for users who wish to use GQ-CNNs to plan grasps on a physical robot.
1. Clone the repository
"""""""""""""""""""""""
Clone or download the `project`_ from Github. ::
$ cd /src
$ git clone https://github.com/BerkeleyAutomation/gqcnn.git
2. Build the catkin package
"""""""""""""""""""""""""""
Build the catkin package. ::
$ cd
$ catkin_make
Then re-source `devel/setup.bash` for the package to be available through Python.
Docker Installation
~~~~~~~~~~~~~~~~~~~
We currently do not provide pre-built Docker images, but you can build them yourself. This will require you to have installed `Docker`_ or `Nvidia-Docker`_ if you plan on using GPUs. Note that our provided build for GPUs uses CUDA 10.0 and cuDNN 7.0, so make sure that this is compatible with your GPU hardware. If you wish to use a different CUDA/cuDNN version, change the base image in `docker/gpu/Dockerfile` to the desired `CUDA/cuDNN image distribution`_. **Note that other images have not yet been tested.**
.. _Docker: https://www.docker.com/
.. _Nvidia-Docker: https://github.com/NVIDIA/nvidia-docker
.. _CUDA/cuDNN image distribution: https://hub.docker.com/r/nvidia/cuda/
1. Clone the repository
"""""""""""""""""""""""
Clone or download the `project`_ from Github. ::
$ git clone https://github.com/BerkeleyAutomation/gqcnn.git
.. _project: https://github.com/BerkeleyAutomation/gqcnn
2. Build Docker images
""""""""""""""""""""""
Change directories into the `gqcnn` repository and run the build script. ::
$ ./scripts/docker/build-docker.sh
This will build the images `gqcnn/cpu` and `gqcnn/gpu`.
3. Run Docker image
""""""""""""""""""""
To run `gqcnn/cpu`: ::
$ docker run --rm -it gqcnn/cpu
To run `gqcnn/gpu`: ::
$ nvidia-docker run --rm -it gqcnn/gpu
Note the use of `nvidia-docker` in the latter to enable the Nvidia runtime.
You will then see an interactive shell like this: ::
$ root@a96488604093:~/Workspace/gqcnn#
Now you can proceed to run the examples and tutorial!
================================================
FILE: docs/source/license/license.rst
================================================
License
~~~~~~~
The `gqcnn` library, pre-trained models, and raw datasets are licensed under The Regents of the University of California (Regents). Copyright ©2019. All Rights Reserved.
Permission to use, copy, modify, and distribute this software and its documentation for educational,
research, and not-for-profit purposes, without fee and without a signed licensing agreement, is
hereby granted, provided that the above copyright notice, this paragraph and the following two
paragraphs appear in all copies, modifications, and distributions. Contact The Office of Technology
Licensing, UC Berkeley, 2150 Shattuck Avenue, Suite 510, Berkeley, CA 94720-1620, (510) 643-
7201, otl@berkeley.edu, http://ipira.berkeley.edu/industry-info for commercial licensing opportunities.
IN NO EVENT SHALL REGENTS BE LIABLE TO ANY PARTY FOR DIRECT, INDIRECT, SPECIAL,
INCIDENTAL, OR CONSEQUENTIAL DAMAGES, INCLUDING LOST PROFITS, ARISING OUT OF
THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN IF REGENTS HAS BEEN
ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
REGENTS SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
PURPOSE. THE SOFTWARE AND ACCOMPANYING DOCUMENTATION, IF ANY, PROVIDED
HEREUNDER IS PROVIDED "AS IS". REGENTS HAS NO OBLIGATION TO PROVIDE
MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
The raw datasets are generated from 3D object models from `3DNet`_ and `the KIT Object Database`_ that may be subject to copyright.
.. _3DNet: https://repo.acin.tuwien.ac.at/tmp/permanent/3d-net.org/
.. _the KIT Object Database: https://h2t-projects.webarchiv.kit.edu/Projects/ObjectModelsWebUI/
================================================
FILE: docs/source/replication/replication.rst
================================================
Replicating Results
~~~~~~~~~~~~~~~~~~~
Numerous publications in the larger `Dex-Net`_ project utilize GQ-CNNs, particularly `Dex-Net 2.0`_, `Dex-Net 2.1`_, `Dex-Net 3.0`_, `Dex-Net 4.0`_, and `FC-GQ-CNN`_. One of the goals of the `gqcnn` library is to provide the necessary code and instructions for replicating the results of these papers. Thus, we have provided a number of replication scripts under the `scripts/` directory.
There are two ways to replicate results:
#. **Use a pre-trained model:** Download a pre-trained GQ-CNN model and run an example policy.
#. **Train from scratch:** Download the raw dataset, train a GQ-CNN model, and run an example policy using the model you just trained.
**We highly encourage method 1.** Note that method 2 is computationally expensive as training takes roughly 24 hours on a Nvidia Titan Xp GPU. Furthermore, the raw datasets are fairly large in size.
Please keep in mind that GQ-CNN models are sensitive to the following parameters used during dataset generation:
#. The robot gripper
#. The depth camera
#. The distance between the camera and workspace.
As a result, we cannot guarantee performance of our pre-trained models on other physical setups.
For more information about the pre-trained models and sample inputs for the example policy, see :ref:`pre-trained-models` and :ref:`sample-inputs`.
Using a Pre-trained Model
=========================
First download the pre-trained models. ::
$ ./scripts/downloads/models/download_models.sh
Dex-Net 2.0
"""""""""""
Evaluate the pre-trained GQ-CNN model. ::
$ ./scripts/policies/run_all_dex-net_2.0_examples.sh
Dex-Net 2.1
"""""""""""
Evaluate the pre-trained GQ-CNN model. ::
$ ./scripts/policies/run_all_dex-net_2.1_examples.sh
Dex-Net 3.0
"""""""""""
Evaluate the pre-trained GQ-CNN model. ::
$ ./scripts/policies/run_all_dex-net_3.0_examples.sh
Dex-Net 4.0
"""""""""""
To evaluate the pre-trained **parallel jaw** GQ-CNN model. ::
$ ./scripts/policies/run_all_dex-net_4.0_pj_examples.sh
To evaluate the pre-trained **suction** GQ-CNN model. ::
$ ./scripts/policies/run_all_dex-net_4.0_suction_examples.sh
FC-GQ-CNN
"""""""""""
To evaluate the pre-trained **parallel jaw** FC-GQ-CNN model. ::
$ ./scripts/policies/run_all_dex-net_4.0_fc_pj_examples.sh
To evaluate the pre-trained **suction** FC-GQ-CNN model. ::
$ ./scripts/policies/run_all_dex-net_4.0_fc_suction_examples.sh
Training from Scratch
=====================
Dex-Net 2.0
"""""""""""
First download the appropriate dataset. ::
$ ./scripts/downloads/datasets/download_dex-net_2.0.sh
Then train a GQ-CNN from scratch. ::
$ ./scripts/training/train_dex-net_2.0.sh
Finally, evaluate the trained GQ-CNN. ::
$ ./scripts/policies/run_all_dex-net_2.0_examples.sh
Dex-Net 2.1
"""""""""""
First download the appropriate dataset. ::
$ ./scripts/downloads/datasets/download_dex-net_2.1.sh
Then train a GQ-CNN from scratch. ::
$ ./scripts/training/train_dex-net_2.1.sh
Finally, evaluate the trained GQ-CNN. ::
$ ./scripts/policies/run_all_dex-net_2.1_examples.sh
Dex-Net 3.0
"""""""""""
First download the appropriate dataset. ::
$ ./scripts/downloads/datasets/download_dex-net_3.0.sh
Then train a GQ-CNN from scratch. ::
$ ./scripts/training/train_dex-net_3.0.sh
Finally, evaluate the trained GQ-CNN. ::
$ ./scripts/policies/run_all_dex-net_3.0_examples.sh
Dex-Net 4.0
"""""""""""
To replicate the `Dex-Net 4.0`_ **parallel jaw** results, first download the appropriate dataset. ::
$ ./scripts/downloads/datasets/download_dex-net_4.0_pj.sh
Then train a GQ-CNN from scratch. ::
$ ./scripts/training/train_dex-net_4.0_pj.sh
Finally, evaluate the trained GQ-CNN. ::
$ ./scripts/policies/run_all_dex-net_4.0_pj_examples.sh
To replicate the `Dex-Net 4.0`_ **suction** results, first download the appropriate dataset. ::
$ ./scripts/downloads/datasets/download_dex-net_4.0_suction.sh
Then train a GQ-CNN from scratch. ::
$ ./scripts/training/train_dex-net_4.0_suction.sh
Finally, evaluate the trained GQ-CNN. ::
$ ./scripts/policies/run_all_dex-net_4.0_suction_examples.sh
FC-GQ-CNN
"""""""""""
To replicate the `FC-GQ-CNN`_ **parallel jaw** results, first download the appropriate dataset. ::
$ ./scripts/downloads/datasets/download_dex-net_4.0_fc_pj.sh
Then train a FC-GQ-CNN from scratch. ::
$ ./scripts/training/train_dex-net_4.0_fc_pj.sh
Finally, evaluate the trained FC-GQ-CNN. ::
$ ./scripts/policies/run_all_dex-net_4.0_fc_pj_examples.sh
To replicate the `FC-GQ-CNN`_ **suction** results, first download the appropriate dataset. ::
$ ./scripts/downloads/datasets/download_dex-net_4.0_fc_suction.sh
Then train a FC-GQ-CNN from scratch. ::
$ ./scripts/training/train_dex-net_4.0_fc_suction.sh
Finally, evaluate the trained FC-GQ-CNN. ::
$ ./scripts/policies/run_all_dex-net_4.0_fc_suction_examples.sh
.. _Dex-Net: https://berkeleyautomation.github.io/dex-net/
.. _Dex-Net 2.0: https://berkeleyautomation.github.io/dex-net/#dexnet_2
.. _Dex-Net 2.1: https://berkeleyautomation.github.io/dex-net/#dexnet_21
.. _Dex-Net 3.0: https://berkeleyautomation.github.io/dex-net/#dexnet_3
.. _Dex-Net 4.0: https://berkeleyautomation.github.io/dex-net/#dexnet_4
.. _FC-GQ-CNN: https://berkeleyautomation.github.io/fcgqcnn
.. _gqcnn: https://github.com/BerkeleyAutomation/gqcnn
================================================
FILE: docs/source/tutorials/analysis.rst
================================================
Analysis
~~~~~~~~
It is helpful to check the training and validation loss and classification errors to ensure that the network has trained successfully. To analyze the performance of a trained GQ-CNN, run: ::
$ python tools/analyze_gqcnn_performance.py
The args are:
#. **model_name**: Name of a trained model.
The script will store a detailed analysis in `analysis//`.
To analyze the networks we just trained, run: ::
$ python tools/analyze_gqcnn_performance.py gqcnn_example_pj
$ python tools/analyze_gqcnn_performance.py gqcnn_example_suction
Below is the expected output for the **parallel jaw** network. Please keep in mind that the exact performance values may change due to randomization in the training dataset and random weight initialization: ::
$ GQCNNAnalyzer INFO TRAIN
$ GQCNNAnalyzer INFO Original error: 36.812
$ GQCNNAnalyzer INFO Final error: 6.061
$ GQCNNAnalyzer INFO Orig loss: 0.763
$ GQCNNAnalyzer INFO Final loss: 0.248
$ GQCNNAnalyzer INFO VAL
$ GQCNNAnalyzer INFO Original error: 32.212
$ GQCNNAnalyzer INFO Final error: 7.509
$ GQCNNAnalyzer INFO Normalized error: 0.233
A set of plots will be saved to `analysis/gqcnn_example_pj/`. The plots `training_error_rates.png` and `precision_recall.png` should look like the following:
.. image:: ../images/plots/pj_error_rate.png
:width: 49 %
.. image:: ../images/plots/pj_roc.png
:width: 49 %
Here is the expected output for the **suction** network: ::
$ GQCNNAnalyzer INFO TRAIN
$ GQCNNAnalyzer INFO Original error: 17.844
$ GQCNNAnalyzer INFO Final error: 6.417
$ GQCNNAnalyzer INFO Orig loss: 0.476
$ GQCNNAnalyzer INFO Final loss: 0.189
$ GQCNNAnalyzer INFO VAL
$ GQCNNAnalyzer INFO Original error: 18.036
$ GQCNNAnalyzer INFO Final error: 6.907
$ GQCNNAnalyzer INFO Normalized error: 0.383
A set of plots will be saved to `analysis/gqcnn_example_suction/`. The plots `training_error_rates.png` and `precision_recall.png` should look like the following:
.. image:: ../images/plots/suction_error_rate.png
:width: 49 %
.. image:: ../images/plots/suction_roc.png
:width: 49 %
================================================
FILE: docs/source/tutorials/planning.rst
================================================
Grasp Planning
~~~~~~~~~~~~~~
Grasp planning involves searching for the grasp with the highest predicted probability of success given a point cloud.
In the `gqcnn` package this is implemented as policies that map an RGBD image to a 6-DOF grasping pose by maximizing the output of a GQ-CNN. The maximization can be implemented with iterative methods such as the `Cross Entropy Method (CEM)`_, which is used in `Dex-Net 2.0`_, `Dex-Net 2.1`_, `Dex-Net 3.0`_, `Dex-Net 4.0`_, or much faster `fully convolutional networks`, which are used in the `FC-GQ-CNN`_.
.. _Cross Entropy Method (CEM): https://en.wikipedia.org/wiki/Cross-entropy_method
.. _Dex-Net 2.0: https://berkeleyautomation.github.io/dex-net/#dexnet_2
.. _Dex-Net 2.1: https://berkeleyautomation.github.io/dex-net/#dexnet_21
.. _Dex-Net 3.0: https://berkeleyautomation.github.io/dex-net/#dexnet_3
.. _Dex-Net 4.0: https://berkeleyautomation.github.io/dex-net/#dexnet_4
.. _FC-GQ-CNN: https://berkeleyautomation.github.io/fcgqcnn
We provide example policies in `examples/`. In particular, we provide both an example Python policy and an example ROS policy. **Note that the ROS policy requires the ROS gqcnn installation**, which can be found :ref:`here `. We highly recommend using the Python policy unless you need to plan grasps on a physical robot using ROS.
.. _sample-inputs:
Sample Inputs
-------------
Sample inputs from our experimental setup are provided with the repo:
#. **data/examples/clutter/phoxi/dex-net_4.0**: Set of example images from a PhotoNeo PhoXi S containing objects used in `Dex-Net 4.0`_ experiments arranged in heaps.
#. **data/examples/clutter/phoxi/fcgqcnn**: Set of example images from a PhotoNeo PhoXi S containing objects in `FC-GQ-CNN`_ experiments arranged in heaps.
#. **data/examples/single_object/primesense/**: Set of example images from a Primesense Carmine containing objects used in `Dex-Net 2.0`_ experiments in singulation.
#. **data/examples/clutter/primesense/**: Set of example images from a Primesense Carmine containing objects used in `Dex-Net 2.1`_ experiments arranged in heaps.
**\*\*Note that when trying these sample inputs, you must make sure that the GQ-CNN model you are using was trained for the corresponding camera and input type (singulation/clutter). See the following section for more details.\*\***
.. _pre-trained-models:
Pre-trained Models
------------------
Pre-trained parallel jaw and suction models for `Dex-Net 4.0`_ are automatically downloaded with the `gqcnn` package installation. If you do wish to try out models for older results (or for our experimental `FC-GQ-CNN`_), all pre-trained models can be downloaded with: ::
$ ./scripts/downloads/models/download_models.sh
The models are:
#. **GQCNN-2.0**: For `Dex-Net 2.0`_, trained on images of objects in singulation with parameters for a Primesense Carmine.
#. **GQCNN-2.1**: For `Dex-Net 2.1`_, a `Dex-Net 2.0`_ model fine-tuned on images of objects in clutter with parameters for a Primesense Carmine.
#. **GQCNN-3.0**: For `Dex-Net 3.0`_, trained on images of objects in clutter with parameters for a Primesense Carmine.
#. **GQCNN-4.0-PJ**: For `Dex-Net 4.0`_, trained on images of objects in clutter with parameters for a PhotoNeo PhoXi S.
#. **GQCNN-4.0-SUCTION**: For `Dex-Net 4.0`_, trained on images of objects in clutter with parameters for a PhotoNeo PhoXi S.
#. **FC-GQCNN-4.0-PJ**: For `FC-GQ-CNN`_, trained on images of objects in clutter with parameters for a PhotoNeo PhoXi S.
#. **FC-GQCNN-4.0-SUCTION**: For `FC-GQ-CNN`_, trained on images of objects in clutter with parameters for a PhotoNeo PhoXi S.
**\*\*Note that GQ-CNN models are sensitive to the parameters used during dataset generation, specifically 1) Gripper geometry, an ABB YuMi Parallel Jaw Gripper for all our pre-trained models 2) Camera intrinsics, either a Primesense Carmine or PhotoNeo Phoxi S for all our pre-trained models (see above for which one) 3) Distance between camera and workspace during rendering, 50-70cm for all our pre-trained models. Thus we cannot guarantee performance of our pre-trained models on other physical setups. If you have a specific use-case in mind, please reach out to us.\*\*** We are actively researching how to generate more robust datasets that can generalize across robots, cameras, and viewpoints!
Python Policy
-------------
The example Python policy can be queried on saved images using: ::
$ python examples/policy.py --depth_image --segmask --camera_intr
The args are:
#. **model_name**: Name of the GQ-CNN model to use.
#. **depth_image_filename**: Path to a depth image (float array in .npy format).
#. **segmask_filename**: Path to an object segmentation mask (binary image in .png format).
#. **camera_intr_filename**: Path to a camera intrinsics file (.intr file generated with `BerkeleyAutomation's`_ `autolab_core`_ package).
.. _BerkeleyAutomation's: https://github.com/BerkeleyAutomation
.. _autolab_core: https://github.com/BerkeleyAutomation/autolab_core
To evaluate the pre-trained `Dex-Net 4.0`_ **parallel jaw** network on sample images of objects in heaps run: ::
$ python examples/policy.py GQCNN-4.0-PJ --depth_image data/examples/clutter/phoxi/dex-net_4.0/depth_0.npy --segmask data/examples/clutter/phoxi/dex-net_4.0/segmask_0.png --camera_intr data/calib/phoxi/phoxi.intr
To evaluate the pre-trained `Dex-Net 4.0`_ **suction** network on sample images of objects in heaps run: ::
$ python examples/policy.py GQCNN-4.0-SUCTION --depth_image data/examples/clutter/phoxi/dex-net_4.0/depth_0.npy --segmask data/examples/clutter/phoxi/dex-net_4.0/segmask_0.png --camera_intr data/calib/phoxi/phoxi.intr
.. _ros-policy:
ROS Policy
----------
First start the grasp planning service: ::
$ roslaunch gqcnn grasp_planning_service.launch model_name:=
The args are:
#. **model_name**: Name of the GQ-CNN model to use. Default is **GQCNN-4.0-PJ**.
#. **model_dir**: Path to the directory where the GQ-CNN models are located. Default is **models/**. If you are using the provided **download_models.sh** script, you shouldn't have to modify this.
To start the grasp planning service with the pre-trained `Dex-Net 4.0`_ **parallel jaw** network run: ::
$ roslaunch gqcnn grasp_planning_service.launch model_name:=GQCNN-4.0-PJ
To start the grasp planning service with the pre-trained `Dex-Net 4.0`_ **suction** network run: ::
$ roslaunch gqcnn grasp_planning_service.launch model_name:=GQCNN-4.0-SUCTION
The example ROS policy can then be queried on saved images using: ::
$ python examples/policy_ros.py --depth_image --segmask --camera_intr
The args are:
#. **depth_image_filename**: Path to a depth image (float array in .npy format).
#. **segmask_filename**: Path to an object segmentation mask (binary image in .png format).
#. **camera_intr_filename**: Path to a camera intrinsics file (.intr file generated with `BerkeleyAutomation's`_ `autolab_core`_ package).
To query the policy on sample images of objects in heaps run: ::
$ python examples/policy_ros.py --depth_image data/examples/clutter/phoxi/dex-net_4.0/depth_0.npy --segmask data/examples/clutter/phoxi/dex-net_4.0/segmask_0.png --camera_intr data/calib/phoxi/phoxi.intr
Usage on a Physical Robot with ROS
----------------------------------
To run the GQ-CNN on a physical robot with ROS, you will want to implement your own ROS node to query the grasp planning service similar to what `examples/policy_ros.py` does. If you are interested in replicating this functionality on your own robot, please contact Jeff Mahler (jmahler@berkeley.edu) with the subject line: "Interested in GQ-CNN ROS Service".
FC-GQ-CNN Policy
----------------
Our most recent research result, the `FC-GQ-CNN`_, combines novel fully convolutional network architectures with our prior work on GQ-CNNs to increase policy rate and reliability. Instead of relying on the `Cross Entropy Method (CEM)`_ to iteratively search over the policy action space for the best grasp, the FC-GQ-CNN instead densely and efficiently evaluates the entire action space in parallel. It is thus able to consider 5000x more grasps in 0.625s, resulting in a MPPH (Mean Picks Per Hour) of 296, compared to the prior 250 MPPH of `Dex-Net 4.0`_.
.. figure:: ../images/fcgqcnn_arch_diagram.png
:width: 100 %
:align: center
FC-GQ-CNN architecture.
You can download the pre-trained `FC-GQ-CNN`_ parallel jaw and suction models along with the other pre-trained models: ::
$ ./scripts/downloads/models/download_models.sh
Then run the Python policy with the `\\--fully_conv` flag.
To evaluate the pre-trained `FC-GQ-CNN`_ **parallel jaw** network on sample images of objects in heaps run: ::
$ python examples/policy.py FC-GQCNN-4.0-PJ --fully_conv --depth_image data/examples/clutter/phoxi/fcgqcnn/depth_0.npy --segmask data/examples/clutter/phoxi/fcgqcnn/segmask_0.png --camera_intr data/calib/phoxi/phoxi.intr
To evaluate the pre-trained `FC-GQ-CNN`_ **suction** network on sample images of objects in heaps run: ::
$ python examples/policy.py FC-GQCNN-4.0-SUCTION --fully_conv --depth_image data/examples/clutter/phoxi/fcgqcnn/depth_0.npy --segmask data/examples/clutter/phoxi/fcgqcnn/segmask_0.png --camera_intr data/calib/phoxi/phoxi.intr
With ROS
^^^^^^^^
Review the section on using the normal ROS policy first, which can be found :ref:`here `.
Add the additional arg **fully_conv:=True** when launching the grasp planning service and provide the corresponding network (**FC-GQCNN-4.0-PJ** for **parallel jaw** and **FC-GQCNN-4.0-SUCTION** for **suction**).
If you wish to test on inputs other than those provided in `data/examples/clutter/phoxi/fcgqcnn/`, you will need to edit the input height and width configuration in the appropriate `cfg/examples/` under `["policy"]["metric"]["fully_conv_gqcnn_config"]`.
================================================
FILE: docs/source/tutorials/training.rst
================================================
Training
~~~~~~~~
The `gqcnn` package can be used to train a `Dex-Net 4.0`_ GQ-CNN model on a custom offline `Dex-Net`_ dataset. Because training from scratch can be time-consuming, the most efficient way to train a new network is to fine-tune the weights of a pre-trained `Dex-Net 4.0`_ GQ-CNN model, which has already been trained on millions of images.
.. _Dex-Net 4.0: https://berkeleyautomation.github.io/dex-net/#dexnet_4
.. _Dex-Net: https://berkeleyautomation.github.io/dex-net/
To fine-tune a GQ-CNN run: ::
$ python tools/finetune.py --config_filename --name
The args are:
#. **training_dataset_path**: Path to the training dataset.
#. **pretrained_network_name**: Name of pre-trained GQ-CNN.
#. **config_filename**: Name of the config file to use.
#. **model_name**: Name for the model.
To train a GQ-CNN for a **parallel jaw** gripper on a sample dataset, run the fine-tuning script: ::
$ python tools/finetune.py data/training/example_pj/ GQCNN-4.0-PJ --config_filename cfg/finetune_example_pj.yaml --name gqcnn_example_pj
To train a GQ-CNN for a **suction** gripper run: ::
$ python tools/finetune.py data/training/example_suction/ GQCNN-4.0-SUCTION --config_filename cfg/finetune_example_suction.yaml --name gqcnn_example_suction
Visualizing Training
--------------------
The `gqcnn` model contains support for visualizing training progress through Tensorboard. Tensorboard is automatically launched when the training script is run and can be accessed by navigating to **localhost:6006** in a web browser. There you will find something like the following:
.. image:: ../images/tensorboard.png
:width: 100 %
Which displays useful training statistics such as validation error, minibatch loss, and learning rate.
The Tensorflow summaries are stored in `models//tensorboard_summaries/`.
================================================
FILE: docs/source/tutorials/tutorial.rst
================================================
Overview
~~~~~~~~
There are two main use cases of the `gqcnn` package:
#. :ref:`training` a `Dex-Net 4.0`_ GQ-CNN model on an offline `Dex-Net`_ dataset of point clouds, grasps, and grasp success metrics, and then grasp planning on RGBD images.
#. :ref:`grasp planning` on RGBD images using a pre-trained `Dex-Net 4.0`_ GQ-CNN model.
.. _Dex-Net 4.0: https://berkeleyautomation.github.io/dex-net/#dexnet_4
.. _Dex-Net: https://berkeleyautomation.github.io/dex-net/
Click on the links or scroll down to get started!
Prerequisites
-------------
Before running the tutorials please download the example models and datasets: ::
$ cd /path/to/your/gqcnn
$ ./scripts/downloads/download_example_data.sh
$ ./scripts/downloads/models/download_models.sh
Running Python Scripts
----------------------
All `gqcnn` Python scripts are designed to be run from the top-level directory of your `gqcnn` repo by default. This is because every script takes in a YAML file specifying parameters for the script, and this YAML file is stored relative to the repository root directory.
We recommend that you run all scripts using this paradigm: ::
cd /path/to/your/gqcnn
python /path/to/script.py
.. _training:
.. include:: training.rst
.. _analysis:
.. include:: analysis.rst
.. _grasp planning:
.. include:: planning.rst
================================================
FILE: examples/antipodal_grasp_sampling.py
================================================
# -*- coding: utf-8 -*-
"""
Copyright ©2017. The Regents of the University of California (Regents).
All Rights Reserved. Permission to use, copy, modify, and distribute this
software and its documentation for educational, research, and not-for-profit
purposes, without fee and without a signed licensing agreement, is hereby
granted, provided that the above copyright notice, this paragraph and the
following two paragraphs appear in all copies, modifications, and
distributions. Contact The Office of Technology Licensing, UC Berkeley, 2150
Shattuck Avenue, Suite 510, Berkeley, CA 94720-1620, (510) 643-7201,
otl@berkeley.edu,
http://ipira.berkeley.edu/industry-info for commercial licensing opportunities.
IN NO EVENT SHALL REGENTS BE LIABLE TO ANY PARTY FOR DIRECT, INDIRECT, SPECIAL,
INCIDENTAL, OR CONSEQUENTIAL DAMAGES, INCLUDING LOST PROFITS, ARISING OUT OF
THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN IF REGENTS HAS BEEN
ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
REGENTS SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
PURPOSE. THE SOFTWARE AND ACCOMPANYING DOCUMENTATION, IF ANY, PROVIDED
HEREUNDER IS PROVIDED "AS IS". REGENTS HAS NO OBLIGATION TO PROVIDE
MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
Demonstrates image-based antipodal grasp candidate sampling, which is used in
the Cross Entropy Method (CEM)-based GQ-CNN grasping policy. Samples images
from a BerkeleyAutomation/perception `RgbdSensor`.
Author
------
Jeff Mahler
"""
import argparse
import os
from autolab_core import RigidTransform, YamlConfig, Logger, RgbdImage
from perception import RgbdSensorFactory
from visualization import Visualizer2D as vis
from gqcnn.grasping import AntipodalDepthImageGraspSampler
# Set up logger.
logger = Logger.get_logger("examples/antipodal_grasp_sampling.py")
if __name__ == "__main__":
# Parse args.
parser = argparse.ArgumentParser(description=(
"Sample antipodal grasps on a depth image from an RgbdSensor"))
parser.add_argument("--config_filename",
type=str,
default="cfg/examples/antipodal_grasp_sampling.yaml",
help="path to configuration file to use")
args = parser.parse_args()
config_filename = args.config_filename
# Read config.
config = YamlConfig(config_filename)
sensor_type = config["sensor"]["type"]
sensor_frame = config["sensor"]["frame"]
num_grasp_samples = config["num_grasp_samples"]
gripper_width = config["gripper_width"]
inpaint_rescale_factor = config["inpaint_rescale_factor"]
visualize_sampling = config["visualize_sampling"]
sample_config = config["sampling"]
# Read camera calib.
tf_filename = "%s_to_world.tf" % (sensor_frame)
T_camera_world = RigidTransform.load(
os.path.join(config["calib_dir"], sensor_frame, tf_filename))
# Setup sensor.
sensor = RgbdSensorFactory.sensor(sensor_type, config["sensor"])
sensor.start()
camera_intr = sensor.ir_intrinsics
# Read images.
color_im, depth_im, _ = sensor.frames()
color_im = color_im.inpaint(rescale_factor=inpaint_rescale_factor)
depth_im = depth_im.inpaint(rescale_factor=inpaint_rescale_factor)
rgbd_im = RgbdImage.from_color_and_depth(color_im, depth_im)
# Sample grasps.
grasp_sampler = AntipodalDepthImageGraspSampler(sample_config,
gripper_width)
grasps = grasp_sampler.sample(rgbd_im,
camera_intr,
num_grasp_samples,
segmask=None,
seed=100,
visualize=visualize_sampling)
# Visualize.
vis.figure()
vis.imshow(depth_im)
for grasp in grasps:
vis.grasp(grasp, scale=1.5, show_center=False, show_axis=True)
vis.title("Sampled grasps")
vis.show()
================================================
FILE: examples/policy.py
================================================
# -*- coding: utf-8 -*-
"""
Copyright ©2017. The Regents of the University of California (Regents).
All Rights Reserved. Permission to use, copy, modify, and distribute this
software and its documentation for educational, research, and not-for-profit
purposes, without fee and without a signed licensing agreement, is hereby
granted, provided that the above copyright notice, this paragraph and the
following two paragraphs appear in all copies, modifications, and
distributions. Contact The Office of Technology Licensing, UC Berkeley, 2150
Shattuck Avenue, Suite 510, Berkeley, CA 94720-1620, (510) 643-7201,
otl@berkeley.edu,
http://ipira.berkeley.edu/industry-info for commercial licensing opportunities.
IN NO EVENT SHALL REGENTS BE LIABLE TO ANY PARTY FOR DIRECT, INDIRECT, SPECIAL,
INCIDENTAL, OR CONSEQUENTIAL DAMAGES, INCLUDING LOST PROFITS, ARISING OUT OF
THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN IF REGENTS HAS BEEN
ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
REGENTS SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
PURPOSE. THE SOFTWARE AND ACCOMPANYING DOCUMENTATION, IF ANY, PROVIDED
HEREUNDER IS PROVIDED "AS IS". REGENTS HAS NO OBLIGATION TO PROVIDE
MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
Displays robust grasps planned using a GQ-CNN grapsing policy on a set of saved
RGB-D images. The default configuration for the standard GQ-CNN policy is
`cfg/examples/cfg/examples/gqcnn_pj.yaml`. The default configuration for the
Fully-Convolutional GQ-CNN policy is `cfg/examples/fc_gqcnn_pj.yaml`.
Author
------
Jeff Mahler & Vishal Satish
"""
import argparse
import json
import os
import time
import numpy as np
from autolab_core import (YamlConfig, Logger, BinaryImage, CameraIntrinsics,
ColorImage, DepthImage, RgbdImage)
from visualization import Visualizer2D as vis
from gqcnn.grasping import (RobustGraspingPolicy,
CrossEntropyRobustGraspingPolicy, RgbdImageState,
FullyConvolutionalGraspingPolicyParallelJaw,
FullyConvolutionalGraspingPolicySuction)
from gqcnn.utils import GripperMode
# Set up logger.
logger = Logger.get_logger("examples/policy.py")
if __name__ == "__main__":
# Parse args.
parser = argparse.ArgumentParser(
description="Run a grasping policy on an example image")
parser.add_argument("model_name",
type=str,
default=None,
help="name of a trained model to run")
parser.add_argument(
"--depth_image",
type=str,
default=None,
help="path to a test depth image stored as a .npy file")
parser.add_argument("--segmask",
type=str,
default=None,
help="path to an optional segmask to use")
parser.add_argument("--camera_intr",
type=str,
default=None,
help="path to the camera intrinsics")
parser.add_argument("--model_dir",
type=str,
default=None,
help="path to the folder in which the model is stored")
parser.add_argument("--config_filename",
type=str,
default=None,
help="path to configuration file to use")
parser.add_argument(
"--fully_conv",
action="store_true",
help=("run Fully-Convolutional GQ-CNN policy instead of standard"
" GQ-CNN policy"))
args = parser.parse_args()
model_name = args.model_name
depth_im_filename = args.depth_image
segmask_filename = args.segmask
camera_intr_filename = args.camera_intr
model_dir = args.model_dir
config_filename = args.config_filename
fully_conv = args.fully_conv
assert not (fully_conv and depth_im_filename is not None
and segmask_filename is None
), "Fully-Convolutional policy expects a segmask."
if depth_im_filename is None:
if fully_conv:
depth_im_filename = os.path.join(
os.path.dirname(os.path.realpath(__file__)), "..",
"data/examples/clutter/primesense/depth_0.npy")
else:
depth_im_filename = os.path.join(
os.path.dirname(os.path.realpath(__file__)), "..",
"data/examples/single_object/primesense/depth_0.npy")
if fully_conv and segmask_filename is None:
segmask_filename = os.path.join(
os.path.dirname(os.path.realpath(__file__)), "..",
"data/examples/clutter/primesense/segmask_0.png")
if camera_intr_filename is None:
camera_intr_filename = os.path.join(
os.path.dirname(os.path.realpath(__file__)), "..",
"data/calib/primesense/primesense.intr")
# Set model if provided.
if model_dir is None:
model_dir = os.path.join(os.path.dirname(os.path.realpath(__file__)),
"../models")
model_path = os.path.join(model_dir, model_name)
# Get configs.
model_config = json.load(open(os.path.join(model_path, "config.json"),
"r"))
try:
gqcnn_config = model_config["gqcnn"]
gripper_mode = gqcnn_config["gripper_mode"]
except KeyError:
gqcnn_config = model_config["gqcnn_config"]
input_data_mode = gqcnn_config["input_data_mode"]
if input_data_mode == "tf_image":
gripper_mode = GripperMode.LEGACY_PARALLEL_JAW
elif input_data_mode == "tf_image_suction":
gripper_mode = GripperMode.LEGACY_SUCTION
elif input_data_mode == "suction":
gripper_mode = GripperMode.SUCTION
elif input_data_mode == "multi_suction":
gripper_mode = GripperMode.MULTI_SUCTION
elif input_data_mode == "parallel_jaw":
gripper_mode = GripperMode.PARALLEL_JAW
else:
raise ValueError(
"Input data mode {} not supported!".format(input_data_mode))
# Set config.
if config_filename is None:
if (gripper_mode == GripperMode.LEGACY_PARALLEL_JAW
or gripper_mode == GripperMode.PARALLEL_JAW):
if fully_conv:
config_filename = os.path.join(
os.path.dirname(os.path.realpath(__file__)), "..",
"cfg/examples/fc_gqcnn_pj.yaml")
else:
config_filename = os.path.join(
os.path.dirname(os.path.realpath(__file__)), "..",
"cfg/examples/gqcnn_pj.yaml")
elif (gripper_mode == GripperMode.LEGACY_SUCTION
or gripper_mode == GripperMode.SUCTION):
if fully_conv:
config_filename = os.path.join(
os.path.dirname(os.path.realpath(__file__)), "..",
"cfg/examples/fc_gqcnn_suction.yaml")
else:
config_filename = os.path.join(
os.path.dirname(os.path.realpath(__file__)), "..",
"cfg/examples/gqcnn_suction.yaml")
# Read config.
config = YamlConfig(config_filename)
inpaint_rescale_factor = config["inpaint_rescale_factor"]
policy_config = config["policy"]
# Make relative paths absolute.
if "gqcnn_model" in policy_config["metric"]:
policy_config["metric"]["gqcnn_model"] = model_path
if not os.path.isabs(policy_config["metric"]["gqcnn_model"]):
policy_config["metric"]["gqcnn_model"] = os.path.join(
os.path.dirname(os.path.realpath(__file__)), "..",
policy_config["metric"]["gqcnn_model"])
# Setup sensor.
camera_intr = CameraIntrinsics.load(camera_intr_filename)
# Read images.
depth_data = np.load(depth_im_filename)
depth_im = DepthImage(depth_data, frame=camera_intr.frame)
color_im = ColorImage(np.zeros([depth_im.height, depth_im.width,
3]).astype(np.uint8),
frame=camera_intr.frame)
# Optionally read a segmask.
segmask = None
if segmask_filename is not None:
segmask = BinaryImage.open(segmask_filename)
valid_px_mask = depth_im.invalid_pixel_mask().inverse()
if segmask is None:
segmask = valid_px_mask
else:
segmask = segmask.mask_binary(valid_px_mask)
# Inpaint.
depth_im = depth_im.inpaint(rescale_factor=inpaint_rescale_factor)
if "input_images" in policy_config["vis"] and policy_config["vis"][
"input_images"]:
vis.figure(size=(10, 10))
num_plot = 1
if segmask is not None:
num_plot = 2
vis.subplot(1, num_plot, 1)
vis.imshow(depth_im)
if segmask is not None:
vis.subplot(1, num_plot, 2)
vis.imshow(segmask)
vis.show()
# Create state.
rgbd_im = RgbdImage.from_color_and_depth(color_im, depth_im)
state = RgbdImageState(rgbd_im, camera_intr, segmask=segmask)
# Set input sizes for fully-convolutional policy.
if fully_conv:
policy_config["metric"]["fully_conv_gqcnn_config"][
"im_height"] = depth_im.shape[0]
policy_config["metric"]["fully_conv_gqcnn_config"][
"im_width"] = depth_im.shape[1]
# Init policy.
if fully_conv:
# TODO(vsatish): We should really be doing this in some factory policy.
if policy_config["type"] == "fully_conv_suction":
policy = FullyConvolutionalGraspingPolicySuction(policy_config)
elif policy_config["type"] == "fully_conv_pj":
policy = FullyConvolutionalGraspingPolicyParallelJaw(policy_config)
else:
raise ValueError(
"Invalid fully-convolutional policy type: {}".format(
policy_config["type"]))
else:
policy_type = "cem"
if "type" in policy_config:
policy_type = policy_config["type"]
if policy_type == "ranking":
policy = RobustGraspingPolicy(policy_config)
elif policy_type == "cem":
policy = CrossEntropyRobustGraspingPolicy(policy_config)
else:
raise ValueError("Invalid policy type: {}".format(policy_type))
# Query policy.
policy_start = time.time()
action = policy(state)
logger.info("Planning took %.3f sec" % (time.time() - policy_start))
# Vis final grasp.
if policy_config["vis"]["final_grasp"]:
vis.figure(size=(10, 10))
vis.imshow(rgbd_im.depth,
vmin=policy_config["vis"]["vmin"],
vmax=policy_config["vis"]["vmax"])
vis.grasp(action.grasp, scale=2.5, show_center=False, show_axis=True)
vis.title("Planned grasp at depth {0:.3f}m with Q={1:.3f}".format(
action.grasp.depth, action.q_value))
vis.show()
================================================
FILE: examples/policy_ros.py
================================================
# -*- coding: utf-8 -*-
"""
Copyright ©2017. The Regents of the University of California (Regents).
All Rights Reserved. Permission to use, copy, modify, and distribute this
software and its documentation for educational, research, and not-for-profit
purposes, without fee and without a signed licensing agreement, is hereby
granted, provided that the above copyright notice, this paragraph and the
following two paragraphs appear in all copies, modifications, and
distributions. Contact The Office of Technology Licensing, UC Berkeley, 2150
Shattuck Avenue, Suite 510, Berkeley, CA 94720-1620, (510) 643-7201,
otl@berkeley.edu,
http://ipira.berkeley.edu/industry-info for commercial licensing opportunities.
IN NO EVENT SHALL REGENTS BE LIABLE TO ANY PARTY FOR DIRECT, INDIRECT, SPECIAL,
INCIDENTAL, OR CONSEQUENTIAL DAMAGES, INCLUDING LOST PROFITS, ARISING OUT OF
THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN IF REGENTS HAS BEEN
ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
REGENTS SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
PURPOSE. THE SOFTWARE AND ACCOMPANYING DOCUMENTATION, IF ANY, PROVIDED
HEREUNDER IS PROVIDED "AS IS". REGENTS HAS NO OBLIGATION TO PROVIDE
MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
Displays robust grasps planned using a GQ-CNN-based policy on a set of saved
RGB-D images. The default configuration is cfg/examples/policy.yaml.
Author
------
Jeff Mahler
"""
import argparse
import logging
import numpy as np
import os
import rosgraph.roslogging as rl
import rospy
import sys
from cv_bridge import CvBridge, CvBridgeError
from autolab_core import (Point, Logger, BinaryImage, CameraIntrinsics,
ColorImage, DepthImage)
from visualization import Visualizer2D as vis
from gqcnn.grasping import Grasp2D, SuctionPoint2D, GraspAction
from gqcnn.msg import GQCNNGrasp
from gqcnn.srv import GQCNNGraspPlanner, GQCNNGraspPlannerSegmask
# Set up logger.
logger = Logger.get_logger("examples/policy_ros.py")
if __name__ == "__main__":
# Parse args.
parser = argparse.ArgumentParser(
description="Run a grasping policy on an example image")
parser.add_argument(
"--depth_image",
type=str,
default=None,
help="path to a test depth image stored as a .npy file")
parser.add_argument("--segmask",
type=str,
default=None,
help="path to an optional segmask to use")
parser.add_argument("--camera_intr",
type=str,
default=None,
help="path to the camera intrinsics")
parser.add_argument("--gripper_width",
type=float,
default=0.05,
help="width of the gripper to plan for")
parser.add_argument("--namespace",
type=str,
default="gqcnn",
help="namespace of the ROS grasp planning service")
parser.add_argument("--vis_grasp",
type=bool,
default=True,
help="whether or not to visualize the grasp")
args = parser.parse_args()
depth_im_filename = args.depth_image
segmask_filename = args.segmask
camera_intr_filename = args.camera_intr
gripper_width = args.gripper_width
namespace = args.namespace
vis_grasp = args.vis_grasp
# Initialize the ROS node.
rospy.init_node("grasp_planning_example")
logging.getLogger().addHandler(rl.RosStreamHandler())
# Setup filenames.
if depth_im_filename is None:
depth_im_filename = os.path.join(
os.path.dirname(os.path.realpath(__file__)), "..",
"data/examples/single_object/primesense/depth_0.npy")
if camera_intr_filename is None:
camera_intr_filename = os.path.join(
os.path.dirname(os.path.realpath(__file__)), "..",
"data/calib/primesense/primesense.intr")
# Wait for grasp planning service and create service proxy.
rospy.wait_for_service("%s/grasp_planner" % (namespace))
rospy.wait_for_service("%s/grasp_planner_segmask" % (namespace))
plan_grasp = rospy.ServiceProxy("%s/grasp_planner" % (namespace),
GQCNNGraspPlanner)
plan_grasp_segmask = rospy.ServiceProxy(
"%s/grasp_planner_segmask" % (namespace), GQCNNGraspPlannerSegmask)
cv_bridge = CvBridge()
# Set up sensor.
camera_intr = CameraIntrinsics.load(camera_intr_filename)
# Read images.
depth_im = DepthImage.open(depth_im_filename, frame=camera_intr.frame)
color_im = ColorImage(np.zeros([depth_im.height, depth_im.width,
3]).astype(np.uint8),
frame=camera_intr.frame)
# Read segmask.
if segmask_filename is not None:
segmask = BinaryImage.open(segmask_filename, frame=camera_intr.frame)
grasp_resp = plan_grasp_segmask(color_im.rosmsg, depth_im.rosmsg,
camera_intr.rosmsg, segmask.rosmsg)
else:
grasp_resp = plan_grasp(color_im.rosmsg, depth_im.rosmsg,
camera_intr.rosmsg)
grasp = grasp_resp.grasp
# Convert to a grasp action.
grasp_type = grasp.grasp_type
if grasp_type == GQCNNGrasp.PARALLEL_JAW:
center = Point(np.array([grasp.center_px[0], grasp.center_px[1]]),
frame=camera_intr.frame)
grasp_2d = Grasp2D(center,
grasp.angle,
grasp.depth,
width=gripper_width,
camera_intr=camera_intr)
elif grasp_type == GQCNNGrasp.SUCTION:
center = Point(np.array([grasp.center_px[0], grasp.center_px[1]]),
frame=camera_intr.frame)
grasp_2d = SuctionPoint2D(center,
np.array([0, 0, 1]),
grasp.depth,
camera_intr=camera_intr)
else:
raise ValueError("Grasp type %d not recognized!" % (grasp_type))
try:
thumbnail = DepthImage(cv_bridge.imgmsg_to_cv2(
grasp.thumbnail, desired_encoding="passthrough"),
frame=camera_intr.frame)
except CvBridgeError as e:
logger.error(e)
logger.error("Failed to convert image")
sys.exit(1)
action = GraspAction(grasp_2d, grasp.q_value, thumbnail)
# Vis final grasp.
if vis_grasp:
vis.figure(size=(10, 10))
vis.imshow(depth_im, vmin=0.6, vmax=0.9)
vis.grasp(action.grasp, scale=2.5, show_center=False, show_axis=True)
vis.title("Planned grasp on depth (Q=%.3f)" % (action.q_value))
vis.show()
================================================
FILE: examples/policy_with_image_proc.py
================================================
# -*- coding: utf-8 -*-
"""
Copyright ©2017. The Regents of the University of California (Regents).
All Rights Reserved. Permission to use, copy, modify, and distribute this
software and its documentation for educational, research, and not-for-profit
purposes, without fee and without a signed licensing agreement, is hereby
granted, provided that the above copyright notice, this paragraph and the
following two paragraphs appear in all copies, modifications, and
distributions. Contact The Office of Technology Licensing, UC Berkeley, 2150
Shattuck Avenue, Suite 510, Berkeley, CA 94720-1620, (510) 643-7201,
otl@berkeley.edu,
http://ipira.berkeley.edu/industry-info for commercial licensing opportunities.
IN NO EVENT SHALL REGENTS BE LIABLE TO ANY PARTY FOR DIRECT, INDIRECT, SPECIAL,
INCIDENTAL, OR CONSEQUENTIAL DAMAGES, INCLUDING LOST PROFITS, ARISING OUT OF
THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN IF REGENTS HAS BEEN
ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
REGENTS SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
PURPOSE. THE SOFTWARE AND ACCOMPANYING DOCUMENTATION, IF ANY, PROVIDED
HEREUNDER IS PROVIDED "AS IS". REGENTS HAS NO OBLIGATION TO PROVIDE
MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
Displays robust grasps planned using a GQ-CNN-based policy on a set of saved
RGB-D images. The default configuration is cfg/examples/policy.yaml.
Author
------
Jeff Mahler
"""
import argparse
import os
import time
import numpy as np
import pcl
import skimage
from autolab_core import (PointCloud, RigidTransform, YamlConfig,
Logger, BinaryImage, CameraIntrinsics,
ColorImage, DepthImage, RgbdImage,
SegmentationImage)
from visualization import Visualizer2D as vis
from gqcnn import (RobustGraspingPolicy, CrossEntropyRobustGraspingPolicy,
RgbdImageState)
CLUSTER_TOL = 0.0015
MIN_CLUSTER_SIZE = 100
MAX_CLUSTER_SIZE = 1000000
# Set up logger.
logger = Logger.get_logger("tools/policy_with_image_proc.py")
if __name__ == "__main__":
# Parse args.
parser = argparse.ArgumentParser(
description="Run a grasping policy on an example image")
parser.add_argument(
"--depth_image",
type=str,
default=None,
help="path to a test depth image stored as a .npy file")
parser.add_argument("--segmask",
type=str,
default=None,
help="path to an optional segmask to use")
parser.add_argument("--camera_intrinsics",
type=str,
default=None,
help="path to the camera intrinsics")
parser.add_argument("--camera_pose",
type=str,
default=None,
help="path to the camera pose")
parser.add_argument("--model_dir",
type=str,
default=None,
help="path to a trained model to run")
parser.add_argument("--config_filename",
type=str,
default=None,
help="path to configuration file to use")
args = parser.parse_args()
depth_im_filename = args.depth_image
segmask_filename = args.segmask
camera_intr_filename = args.camera_intrinsics
camera_pose_filename = args.camera_pose
model_dir = args.model_dir
config_filename = args.config_filename
if depth_im_filename is None:
depth_im_filename = os.path.join(
os.path.dirname(os.path.realpath(__file__)), "..",
"data/examples/single_object/depth_0.npy")
if camera_intr_filename is None:
camera_intr_filename = os.path.join(
os.path.dirname(os.path.realpath(__file__)), "..",
"data/calib/primesense.intr")
if camera_pose_filename is None:
camera_pose_filename = os.path.join(
os.path.dirname(os.path.realpath(__file__)), "..",
"data/calib/primesense.tf")
if config_filename is None:
config_filename = os.path.join(
os.path.dirname(os.path.realpath(__file__)), "..",
"cfg/examples/replication/dex-net_2.0.yaml")
# Read config.
config = YamlConfig(config_filename)
inpaint_rescale_factor = config["inpaint_rescale_factor"]
policy_config = config["policy"]
# Make relative paths absolute.
if model_dir is not None:
policy_config["metric"]["gqcnn_model"] = model_dir
if "gqcnn_model" in policy_config["metric"] and not os.path.isabs(
policy_config["metric"]["gqcnn_model"]):
policy_config["metric"]["gqcnn_model"] = os.path.join(
os.path.dirname(os.path.realpath(__file__)), "..",
policy_config["metric"]["gqcnn_model"])
# Setup sensor.
camera_intr = CameraIntrinsics.load(camera_intr_filename)
T_camera_world = RigidTransform.load(camera_pose_filename)
# Read images.
depth_data = np.load(depth_im_filename)
depth_data = depth_data.astype(np.float32) / 1000.0
depth_im = DepthImage(depth_data, frame=camera_intr.frame)
color_im = ColorImage(np.zeros([depth_im.height, depth_im.width,
3]).astype(np.uint8),
frame=camera_intr.frame)
# Optionally read a segmask.
mask = np.zeros((camera_intr.height, camera_intr.width, 1), dtype=np.uint8)
c = np.array([165, 460, 500, 135])
r = np.array([165, 165, 370, 370])
rr, cc = skimage.draw.polygon(r, c, shape=mask.shape)
mask[rr, cc, 0] = 255
segmask = BinaryImage(mask)
if segmask_filename is not None:
segmask = BinaryImage.open(segmask_filename)
valid_px_mask = depth_im.invalid_pixel_mask().inverse()
if segmask is None:
segmask = valid_px_mask
else:
segmask = segmask.mask_binary(valid_px_mask)
# Create new cloud.
point_cloud = camera_intr.deproject(depth_im)
point_cloud.remove_zero_points()
pcl_cloud = pcl.PointCloud(point_cloud.data.T.astype(np.float32))
tree = pcl_cloud.make_kdtree()
# Find large clusters (likely to be real objects instead of noise).
ec = pcl_cloud.make_EuclideanClusterExtraction()
ec.set_ClusterTolerance(CLUSTER_TOL)
ec.set_MinClusterSize(MIN_CLUSTER_SIZE)
ec.set_MaxClusterSize(MAX_CLUSTER_SIZE)
ec.set_SearchMethod(tree)
cluster_indices = ec.Extract()
num_clusters = len(cluster_indices)
obj_segmask_data = np.zeros(depth_im.shape)
# Read out all points in large clusters.
cur_i = 0
for j, indices in enumerate(cluster_indices):
num_points = len(indices)
points = np.zeros([3, num_points])
for i, index in enumerate(indices):
points[0, i] = pcl_cloud[index][0]
points[1, i] = pcl_cloud[index][1]
points[2, i] = pcl_cloud[index][2]
segment = PointCloud(points, frame=camera_intr.frame)
depth_segment = camera_intr.project_to_image(segment)
obj_segmask_data[depth_segment.data > 0] = j + 1
obj_segmask = SegmentationImage(obj_segmask_data.astype(np.uint8))
obj_segmask = obj_segmask.mask_binary(segmask)
# Inpaint.
depth_im = depth_im.inpaint(rescale_factor=inpaint_rescale_factor)
if "input_images" in policy_config["vis"] and policy_config["vis"][
"input_images"]:
vis.figure(size=(10, 10))
num_plot = 3
vis.subplot(1, num_plot, 1)
vis.imshow(depth_im)
vis.subplot(1, num_plot, 2)
vis.imshow(segmask)
vis.subplot(1, num_plot, 3)
vis.imshow(obj_segmask)
vis.show()
from visualization import Visualizer3D as vis3d
point_cloud = camera_intr.deproject(depth_im)
vis3d.figure()
vis3d.points(point_cloud,
subsample=3,
random=True,
color=(0, 0, 1),
scale=0.001)
vis3d.pose(RigidTransform())
vis3d.pose(T_camera_world.inverse())
vis3d.show()
# Create state.
rgbd_im = RgbdImage.from_color_and_depth(color_im, depth_im)
state = RgbdImageState(rgbd_im, camera_intr, segmask=segmask)
# Init policy.
policy_type = "cem"
if "type" in policy_config:
policy_type = policy_config["type"]
if policy_type == "ranking":
policy = RobustGraspingPolicy(policy_config)
else:
policy = CrossEntropyRobustGraspingPolicy(policy_config)
policy_start = time.time()
action = policy(state)
logger.info("Planning took %.3f sec" % (time.time() - policy_start))
# Vis final grasp.
if policy_config["vis"]["final_grasp"]:
vis.figure(size=(10, 10))
vis.imshow(rgbd_im.depth,
vmin=policy_config["vis"]["vmin"],
vmax=policy_config["vis"]["vmax"])
vis.grasp(action.grasp, scale=2.5, show_center=False, show_axis=True)
vis.title("Planned grasp on depth (Q=%.3f)" % (action.q_value))
vis.show()
# Get grasp pose.
T_grasp_camera = action.grasp.pose(
grasp_approach_dir=-T_camera_world.inverse().z_axis)
grasp_pose_msg = T_grasp_camera.pose_msg
# TODO: Control to reach the grasp pose.
================================================
FILE: gqcnn/__init__.py
================================================
# -*- coding: utf-8 -*-
"""
Copyright ©2017. The Regents of the University of California (Regents).
All Rights Reserved. Permission to use, copy, modify, and distribute this
software and its documentation for educational, research, and not-for-profit
purposes, without fee and without a signed licensing agreement, is hereby
granted, provided that the above copyright notice, this paragraph and the
following two paragraphs appear in all copies, modifications, and
distributions. Contact The Office of Technology Licensing, UC Berkeley, 2150
Shattuck Avenue, Suite 510, Berkeley, CA 94720-1620, (510) 643-7201,
otl@berkeley.edu,
http://ipira.berkeley.edu/industry-info for commercial licensing opportunities.
IN NO EVENT SHALL REGENTS BE LIABLE TO ANY PARTY FOR DIRECT, INDIRECT, SPECIAL,
INCIDENTAL, OR CONSEQUENTIAL DAMAGES, INCLUDING LOST PROFITS, ARISING OUT OF
THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN IF REGENTS HAS BEEN
ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
REGENTS SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
PURPOSE. THE SOFTWARE AND ACCOMPANYING DOCUMENTATION, IF ANY, PROVIDED
HEREUNDER IS PROVIDED "AS IS". REGENTS HAS NO OBLIGATION TO PROVIDE
MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
"""
from .model import get_gqcnn_model, get_fc_gqcnn_model
from .training import get_gqcnn_trainer
from .grasping import (RobustGraspingPolicy, UniformRandomGraspingPolicy,
CrossEntropyRobustGraspingPolicy, RgbdImageState,
FullyConvolutionalGraspingPolicyParallelJaw,
FullyConvolutionalGraspingPolicySuction)
from .analysis import GQCNNAnalyzer
from .search import GQCNNSearch
from .utils import NoValidGraspsException, NoAntipodalPairsFoundException
__all__ = [
"get_gqcnn_model", "get_fc_gqcnn_model", "get_gqcnn_trainer",
"RobustGraspingPolicy", "UniformRandomGraspingPolicy",
"CrossEntropyRobustGraspingPolicy", "RgbdImageState",
"FullyConvolutionalGraspingPolicyParallelJaw",
"FullyConvolutionalGraspingPolicySuction", "GQCNNAnalyzer", "GQCNNSearch",
"NoValidGraspsException", "NoAntipodalPairsFoundException"
]
================================================
FILE: gqcnn/analysis/__init__.py
================================================
# -*- coding: utf-8 -*-
"""
Copyright ©2017. The Regents of the University of California (Regents).
All Rights Reserved. Permission to use, copy, modify, and distribute this
software and its documentation for educational, research, and not-for-profit
purposes, without fee and without a signed licensing agreement, is hereby
granted, provided that the above copyright notice, this paragraph and the
following two paragraphs appear in all copies, modifications, and
distributions. Contact The Office of Technology Licensing, UC Berkeley, 2150
Shattuck Avenue, Suite 510, Berkeley, CA 94720-1620, (510) 643-7201,
otl@berkeley.edu,
http://ipira.berkeley.edu/industry-info for commercial licensing opportunities.
IN NO EVENT SHALL REGENTS BE LIABLE TO ANY PARTY FOR DIRECT, INDIRECT, SPECIAL,
INCIDENTAL, OR CONSEQUENTIAL DAMAGES, INCLUDING LOST PROFITS, ARISING OUT OF
THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN IF REGENTS HAS BEEN
ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
REGENTS SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
PURPOSE. THE SOFTWARE AND ACCOMPANYING DOCUMENTATION, IF ANY, PROVIDED
HEREUNDER IS PROVIDED "AS IS". REGENTS HAS NO OBLIGATION TO PROVIDE
MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
"""
from .analyzer import GQCNNAnalyzer
__all__ = ["GQCNNAnalyzer"]
================================================
FILE: gqcnn/analysis/analyzer.py
================================================
# -*- coding: utf-8 -*-
"""
Copyright ©2017. The Regents of the University of California (Regents).
All Rights Reserved. Permission to use, copy, modify, and distribute this
software and its documentation for educational, research, and not-for-profit
purposes, without fee and without a signed licensing agreement, is hereby
granted, provided that the above copyright notice, this paragraph and the
following two paragraphs appear in all copies, modifications, and
distributions. Contact The Office of Technology Licensing, UC Berkeley, 2150
Shattuck Avenue, Suite 510, Berkeley, CA 94720-1620, (510) 643-7201,
otl@berkeley.edu,
http://ipira.berkeley.edu/industry-info for commercial licensing opportunities.
IN NO EVENT SHALL REGENTS BE LIABLE TO ANY PARTY FOR DIRECT, INDIRECT, SPECIAL,
INCIDENTAL, OR CONSEQUENTIAL DAMAGES, INCLUDING LOST PROFITS, ARISING OUT OF
THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN IF REGENTS HAS BEEN
ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
REGENTS SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
PURPOSE. THE SOFTWARE AND ACCOMPANYING DOCUMENTATION, IF ANY, PROVIDED
HEREUNDER IS PROVIDED "AS IS". REGENTS HAS NO OBLIGATION TO PROVIDE
MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
Class for analyzing a GQ-CNN model for grasp quality prediction.
Author
------
Jeff Mahler
"""
import json
import logging
import os
import matplotlib.pyplot as plt
import numpy as np
from autolab_core import (BinaryClassificationResult, TensorDataset,
Logger, DepthImage)
from autolab_core.constants import JSON_INDENT
import autolab_core.utils as utils
from visualization import Visualizer2D as vis2d
from ..model import get_gqcnn_model
from ..grasping import Grasp2D, SuctionPoint2D
from ..utils import (GripperMode, GeneralConstants, read_pose_data,
GQCNNFilenames)
WINDOW = 100 # For loss.
MAX_LOSS = 5.0
class GQCNNAnalyzer(object):
"""Analyzes a trained GQ-CNN model."""
def __init__(self, config, verbose=True, plot_backend="pdf"):
"""
Parameters
----------
config : dict
Dictionary of analysis configuration parameters.
verbose : bool
Whether or not to log analysis output to stdout.
plot_backend : str
Matplotlib plotting backend to use, default is non-interactive
"pdf" backend.
"""
self.cfg = config
self.verbose = verbose
# By default we want to use a non-interactive backend (ex. "pdf)"
# because the `GQCNNAnalyzer` anyways only saves plots to disk, and
# interactive backends sometimes cause issues with `localhost` when run
# remotely through the Linux `screen` program.
plt.switch_backend(plot_backend)
self._parse_config()
def _parse_config(self):
"""Read params from the config file."""
# Plotting params.
self.log_rate = self.cfg["log_rate"]
self.font_size = self.cfg["font_size"]
self.line_width = self.cfg["line_width"]
self.dpi = self.cfg["dpi"]
self.num_bins = self.cfg["num_bins"]
self.num_vis = self.cfg["num_vis"]
def analyze(self, model_dir, output_dir, dataset_config=None):
"""Run analysis.
Parameters
----------
model_dir : str
Path to the GQ-CNN model to analyze.
output_dir : str
Path to save the analysis.
dataset_config : dict
Dictionary to configure dataset used for training evaluation if
different from one used during training.
Returns
-------
:obj:`autolab_core.BinaryClassificationResult`
Result of analysis on training data.
:obj:`autolab_core.BinaryClassificationResult`
Result of analysis on validation data.
"""
# Determine model output dir.
model_name = ""
model_root = model_dir
while model_name == "" and model_root != "":
model_root, model_name = os.path.split(model_root)
model_output_dir = os.path.join(output_dir, model_name)
if not os.path.exists(model_output_dir):
os.mkdir(model_output_dir)
# Set up logger.
self.logger = Logger.get_logger(self.__class__.__name__,
log_file=os.path.join(
model_output_dir, "analysis.log"),
silence=(not self.verbose),
global_log_file=self.verbose)
self.logger.info("Analyzing model %s" % (model_name))
self.logger.info("Saving output to %s" % (output_dir))
# Run predictions.
train_result, val_result = self._run_prediction_single_model(
model_dir, model_output_dir, dataset_config)
# Finally plot curves.
(init_train_error, final_train_error, init_train_loss,
final_train_loss, init_val_error, final_val_error,
norm_final_val_error) = self._plot(model_dir, model_output_dir,
train_result, val_result)
return (train_result, val_result, init_train_error, final_train_error,
init_train_loss, final_train_loss, init_val_error,
final_val_error, norm_final_val_error)
def _plot_grasp(self,
datapoint,
image_field_name,
pose_field_name,
gripper_mode,
angular_preds=None):
"""Plots a single grasp represented as a datapoint."""
image = DepthImage(datapoint[image_field_name][:, :, 0])
depth = datapoint[pose_field_name][2]
width = 0
grasps = []
if gripper_mode == GripperMode.PARALLEL_JAW or \
gripper_mode == GripperMode.LEGACY_PARALLEL_JAW:
if angular_preds is not None:
num_bins = angular_preds.shape[0] / 2
bin_width = GeneralConstants.PI / num_bins
for i in range(num_bins):
bin_cent_ang = i * bin_width + bin_width / 2
grasps.append(
Grasp2D(center=image.center,
angle=GeneralConstants.PI / 2 - bin_cent_ang,
depth=depth,
width=0.0))
grasps.append(
Grasp2D(center=image.center,
angle=datapoint[pose_field_name][3],
depth=depth,
width=0.0))
else:
grasps.append(
Grasp2D(center=image.center,
angle=0,
depth=depth,
width=0.0))
width = datapoint[pose_field_name][-1]
else:
grasps.append(
SuctionPoint2D(center=image.center,
axis=[1, 0, 0],
depth=depth))
vis2d.imshow(image)
for i, grasp in enumerate(grasps[:-1]):
vis2d.grasp(grasp,
width=width,
color=plt.cm.RdYlGn(angular_preds[i * 2 + 1]))
vis2d.grasp(grasps[-1], width=width, color="b")
def _run_prediction_single_model(self, model_dir, model_output_dir,
dataset_config):
"""Analyze the performance of a single model."""
# Read in model config.
model_config_filename = os.path.join(model_dir,
GQCNNFilenames.SAVED_CFG)
with open(model_config_filename) as data_file:
model_config = json.load(data_file)
# Load model.
self.logger.info("Loading model %s" % (model_dir))
log_file = None
for handler in self.logger.handlers:
if isinstance(handler, logging.FileHandler):
log_file = handler.baseFilename
gqcnn = get_gqcnn_model(verbose=self.verbose).load(
model_dir, verbose=self.verbose, log_file=log_file)
gqcnn.open_session()
gripper_mode = gqcnn.gripper_mode
angular_bins = gqcnn.angular_bins
# Read params from the config.
if dataset_config is None:
dataset_dir = model_config["dataset_dir"]
split_name = model_config["split_name"]
image_field_name = model_config["image_field_name"]
pose_field_name = model_config["pose_field_name"]
metric_name = model_config["target_metric_name"]
metric_thresh = model_config["metric_thresh"]
else:
dataset_dir = dataset_config["dataset_dir"]
split_name = dataset_config["split_name"]
image_field_name = dataset_config["image_field_name"]
pose_field_name = dataset_config["pose_field_name"]
metric_name = dataset_config["target_metric_name"]
metric_thresh = dataset_config["metric_thresh"]
gripper_mode = dataset_config["gripper_mode"]
self.logger.info("Loading dataset %s" % (dataset_dir))
dataset = TensorDataset.open(dataset_dir)
train_indices, val_indices, _ = dataset.split(split_name)
# Visualize conv filters.
conv1_filters = gqcnn.filters
num_filt = conv1_filters.shape[3]
d = utils.sqrt_ceil(num_filt)
vis2d.clf()
for k in range(num_filt):
filt = conv1_filters[:, :, 0, k]
vis2d.subplot(d, d, k + 1)
vis2d.imshow(DepthImage(filt))
figname = os.path.join(model_output_dir, "conv1_filters.pdf")
vis2d.savefig(figname, dpi=self.dpi)
# Aggregate training and validation true labels and predicted
# probabilities.
all_predictions = []
if angular_bins > 0:
all_predictions_raw = []
all_labels = []
for i in range(dataset.num_tensors):
# Log progress.
if i % self.log_rate == 0:
self.logger.info("Predicting tensor %d of %d" %
(i + 1, dataset.num_tensors))
# Read in data.
image_arr = dataset.tensor(image_field_name, i).arr
pose_arr = read_pose_data(
dataset.tensor(pose_field_name, i).arr, gripper_mode)
metric_arr = dataset.tensor(metric_name, i).arr
label_arr = 1 * (metric_arr > metric_thresh)
label_arr = label_arr.astype(np.uint8)
if angular_bins > 0:
# Form mask to extract predictions from ground-truth angular
# bins.
raw_poses = dataset.tensor(pose_field_name, i).arr
angles = raw_poses[:, 3]
neg_ind = np.where(angles < 0)
# TODO(vsatish): These should use the max angle instead.
angles = np.abs(angles) % GeneralConstants.PI
angles[neg_ind] *= -1
g_90 = np.where(angles > (GeneralConstants.PI / 2))
l_neg_90 = np.where(angles < (-1 * (GeneralConstants.PI / 2)))
angles[g_90] -= GeneralConstants.PI
angles[l_neg_90] += GeneralConstants.PI
# TODO(vsatish): Fix this along with the others.
angles *= -1 # Hack to fix reverse angle convention.
angles += (GeneralConstants.PI / 2)
pred_mask = np.zeros((raw_poses.shape[0], angular_bins * 2),
dtype=bool)
bin_width = GeneralConstants.PI / angular_bins
for i in range(angles.shape[0]):
pred_mask[i, int((angles[i] // bin_width) * 2)] = True
pred_mask[i, int((angles[i] // bin_width) * 2 + 1)] = True
# Predict with GQ-CNN.
predictions = gqcnn.predict(image_arr, pose_arr)
if angular_bins > 0:
raw_predictions = np.array(predictions)
predictions = predictions[pred_mask].reshape((-1, 2))
# Aggregate.
all_predictions.extend(predictions[:, 1].tolist())
if angular_bins > 0:
all_predictions_raw.extend(raw_predictions.tolist())
all_labels.extend(label_arr.tolist())
# Close session.
gqcnn.close_session()
# Create arrays.
all_predictions = np.array(all_predictions)
all_labels = np.array(all_labels)
train_predictions = all_predictions[train_indices]
val_predictions = all_predictions[val_indices]
train_labels = all_labels[train_indices]
val_labels = all_labels[val_indices]
if angular_bins > 0:
all_predictions_raw = np.array(all_predictions_raw)
train_predictions_raw = all_predictions_raw[train_indices]
val_predictions_raw = all_predictions_raw[val_indices]
# Aggregate results.
train_result = BinaryClassificationResult(train_predictions,
train_labels)
val_result = BinaryClassificationResult(val_predictions, val_labels)
train_result.save(os.path.join(model_output_dir, "train_result.cres"))
val_result.save(os.path.join(model_output_dir, "val_result.cres"))
# Get stats, plot curves.
self.logger.info("Model %s training error rate: %.3f" %
(model_dir, train_result.error_rate))
self.logger.info("Model %s validation error rate: %.3f" %
(model_dir, val_result.error_rate))
self.logger.info("Model %s training loss: %.3f" %
(model_dir, train_result.cross_entropy_loss))
self.logger.info("Model %s validation loss: %.3f" %
(model_dir, val_result.cross_entropy_loss))
# Save images.
vis2d.figure()
example_dir = os.path.join(model_output_dir, "examples")
if not os.path.exists(example_dir):
os.mkdir(example_dir)
# Train.
self.logger.info("Saving training examples")
train_example_dir = os.path.join(example_dir, "train")
if not os.path.exists(train_example_dir):
os.mkdir(train_example_dir)
# Train TP.
true_positive_indices = train_result.true_positive_indices
np.random.shuffle(true_positive_indices)
true_positive_indices = true_positive_indices[:self.num_vis]
for i, j in enumerate(true_positive_indices):
k = train_indices[j]
datapoint = dataset.datapoint(
k, field_names=[image_field_name, pose_field_name])
vis2d.clf()
if angular_bins > 0:
self._plot_grasp(datapoint,
image_field_name,
pose_field_name,
gripper_mode,
angular_preds=train_predictions_raw[j])
else:
self._plot_grasp(datapoint, image_field_name, pose_field_name,
gripper_mode)
vis2d.title(
"Datapoint %d: Pred: %.3f Label: %.3f" %
(k, train_result.pred_probs[j], train_result.labels[j]),
fontsize=self.font_size)
vis2d.savefig(
os.path.join(train_example_dir,
"true_positive_%03d.png" % (i)))
# Train FP.
false_positive_indices = train_result.false_positive_indices
np.random.shuffle(false_positive_indices)
false_positive_indices = false_positive_indices[:self.num_vis]
for i, j in enumerate(false_positive_indices):
k = train_indices[j]
datapoint = dataset.datapoint(
k, field_names=[image_field_name, pose_field_name])
vis2d.clf()
if angular_bins > 0:
self._plot_grasp(datapoint,
image_field_name,
pose_field_name,
gripper_mode,
angular_preds=train_predictions_raw[j])
else:
self._plot_grasp(datapoint, image_field_name, pose_field_name,
gripper_mode)
vis2d.title(
"Datapoint %d: Pred: %.3f Label: %.3f" %
(k, train_result.pred_probs[j], train_result.labels[j]),
fontsize=self.font_size)
vis2d.savefig(
os.path.join(train_example_dir,
"false_positive_%03d.png" % (i)))
# Train TN.
true_negative_indices = train_result.true_negative_indices
np.random.shuffle(true_negative_indices)
true_negative_indices = true_negative_indices[:self.num_vis]
for i, j in enumerate(true_negative_indices):
k = train_indices[j]
datapoint = dataset.datapoint(
k, field_names=[image_field_name, pose_field_name])
vis2d.clf()
if angular_bins > 0:
self._plot_grasp(datapoint,
image_field_name,
pose_field_name,
gripper_mode,
angular_preds=train_predictions_raw[j])
else:
self._plot_grasp(datapoint, image_field_name, pose_field_name,
gripper_mode)
vis2d.title(
"Datapoint %d: Pred: %.3f Label: %.3f" %
(k, train_result.pred_probs[j], train_result.labels[j]),
fontsize=self.font_size)
vis2d.savefig(
os.path.join(train_example_dir,
"true_negative_%03d.png" % (i)))
# Train TP.
false_negative_indices = train_result.false_negative_indices
np.random.shuffle(false_negative_indices)
false_negative_indices = false_negative_indices[:self.num_vis]
for i, j in enumerate(false_negative_indices):
k = train_indices[j]
datapoint = dataset.datapoint(
k, field_names=[image_field_name, pose_field_name])
vis2d.clf()
if angular_bins > 0:
self._plot_grasp(datapoint,
image_field_name,
pose_field_name,
gripper_mode,
angular_preds=train_predictions_raw[j])
else:
self._plot_grasp(datapoint, image_field_name, pose_field_name,
gripper_mode)
vis2d.title(
"Datapoint %d: Pred: %.3f Label: %.3f" %
(k, train_result.pred_probs[j], train_result.labels[j]),
fontsize=self.font_size)
vis2d.savefig(
os.path.join(train_example_dir,
"false_negative_%03d.png" % (i)))
# Val.
self.logger.info("Saving validation examples")
val_example_dir = os.path.join(example_dir, "val")
if not os.path.exists(val_example_dir):
os.mkdir(val_example_dir)
# Val TP.
true_positive_indices = val_result.true_positive_indices
np.random.shuffle(true_positive_indices)
true_positive_indices = true_positive_indices[:self.num_vis]
for i, j in enumerate(true_positive_indices):
k = val_indices[j]
datapoint = dataset.datapoint(
k, field_names=[image_field_name, pose_field_name])
vis2d.clf()
if angular_bins > 0:
self._plot_grasp(datapoint,
image_field_name,
pose_field_name,
gripper_mode,
angular_preds=val_predictions_raw[j])
else:
self._plot_grasp(datapoint, image_field_name, pose_field_name,
gripper_mode)
vis2d.title("Datapoint %d: Pred: %.3f Label: %.3f" %
(k, val_result.pred_probs[j], val_result.labels[j]),
fontsize=self.font_size)
vis2d.savefig(
os.path.join(val_example_dir, "true_positive_%03d.png" % (i)))
# Val FP.
false_positive_indices = val_result.false_positive_indices
np.random.shuffle(false_positive_indices)
false_positive_indices = false_positive_indices[:self.num_vis]
for i, j in enumerate(false_positive_indices):
k = val_indices[j]
datapoint = dataset.datapoint(
k, field_names=[image_field_name, pose_field_name])
vis2d.clf()
if angular_bins > 0:
self._plot_grasp(datapoint,
image_field_name,
pose_field_name,
gripper_mode,
angular_preds=val_predictions_raw[j])
else:
self._plot_grasp(datapoint, image_field_name, pose_field_name,
gripper_mode)
vis2d.title("Datapoint %d: Pred: %.3f Label: %.3f" %
(k, val_result.pred_probs[j], val_result.labels[j]),
fontsize=self.font_size)
vis2d.savefig(
os.path.join(val_example_dir, "false_positive_%03d.png" % (i)))
# Val TN.
true_negative_indices = val_result.true_negative_indices
np.random.shuffle(true_negative_indices)
true_negative_indices = true_negative_indices[:self.num_vis]
for i, j in enumerate(true_negative_indices):
k = val_indices[j]
datapoint = dataset.datapoint(
k, field_names=[image_field_name, pose_field_name])
vis2d.clf()
if angular_bins > 0:
self._plot_grasp(datapoint,
image_field_name,
pose_field_name,
gripper_mode,
angular_preds=val_predictions_raw[j])
else:
self._plot_grasp(datapoint, image_field_name, pose_field_name,
gripper_mode)
vis2d.title("Datapoint %d: Pred: %.3f Label: %.3f" %
(k, val_result.pred_probs[j], val_result.labels[j]),
fontsize=self.font_size)
vis2d.savefig(
os.path.join(val_example_dir, "true_negative_%03d.png" % (i)))
# Val TP.
false_negative_indices = val_result.false_negative_indices
np.random.shuffle(false_negative_indices)
false_negative_indices = false_negative_indices[:self.num_vis]
for i, j in enumerate(false_negative_indices):
k = val_indices[j]
datapoint = dataset.datapoint(
k, field_names=[image_field_name, pose_field_name])
vis2d.clf()
if angular_bins > 0:
self._plot_grasp(datapoint,
image_field_name,
pose_field_name,
gripper_mode,
angular_preds=val_predictions_raw[j])
else:
self._plot_grasp(datapoint, image_field_name, pose_field_name,
gripper_mode)
vis2d.title("Datapoint %d: Pred: %.3f Label: %.3f" %
(k, val_result.pred_probs[j], val_result.labels[j]),
fontsize=self.font_size)
vis2d.savefig(
os.path.join(val_example_dir, "false_negative_%03d.png" % (i)))
# Save summary stats.
train_summary_stats = {
"error_rate": train_result.error_rate,
"ap_score": train_result.ap_score,
"auc_score": train_result.auc_score,
"loss": train_result.cross_entropy_loss
}
train_stats_filename = os.path.join(model_output_dir,
"train_stats.json")
json.dump(train_summary_stats,
open(train_stats_filename, "w"),
indent=JSON_INDENT,
sort_keys=True)
val_summary_stats = {
"error_rate": val_result.error_rate,
"ap_score": val_result.ap_score,
"auc_score": val_result.auc_score,
"loss": val_result.cross_entropy_loss
}
val_stats_filename = os.path.join(model_output_dir, "val_stats.json")
json.dump(val_summary_stats,
open(val_stats_filename, "w"),
indent=JSON_INDENT,
sort_keys=True)
return train_result, val_result
def _plot(self, model_dir, model_output_dir, train_result, val_result):
"""Plot analysis curves."""
self.logger.info("Plotting")
_, model_name = os.path.split(model_output_dir)
# Set params.
colors = ["g", "b", "c", "y", "m", "r"]
styles = ["-", "--", "-.", ":", "-"]
# PR, ROC.
vis2d.clf()
train_result.precision_recall_curve(plot=True,
line_width=self.line_width,
color=colors[0],
style=styles[0],
label="TRAIN")
val_result.precision_recall_curve(plot=True,
line_width=self.line_width,
color=colors[1],
style=styles[1],
label="VAL")
vis2d.title("Precision Recall Curves", fontsize=self.font_size)
handles, labels = vis2d.gca().get_legend_handles_labels()
vis2d.legend(handles, labels, loc="best")
figname = os.path.join(model_output_dir, "precision_recall.png")
vis2d.savefig(figname, dpi=self.dpi)
vis2d.clf()
train_result.roc_curve(plot=True,
line_width=self.line_width,
color=colors[0],
style=styles[0],
label="TRAIN")
val_result.roc_curve(plot=True,
line_width=self.line_width,
color=colors[1],
style=styles[1],
label="VAL")
vis2d.title("Reciever Operating Characteristic",
fontsize=self.font_size)
handles, labels = vis2d.gca().get_legend_handles_labels()
vis2d.legend(handles, labels, loc="best")
figname = os.path.join(model_output_dir, "roc.png")
vis2d.savefig(figname, dpi=self.dpi)
# Plot histogram of prediction errors.
num_bins = min(self.num_bins, train_result.num_datapoints)
# Train positives.
pos_ind = np.where(train_result.labels == 1)[0]
diffs = np.abs(train_result.labels[pos_ind] -
train_result.pred_probs[pos_ind])
vis2d.figure()
utils.histogram(diffs,
num_bins,
bounds=(0, 1),
normalized=False,
plot=True)
vis2d.title("Error on Positive Training Examples",
fontsize=self.font_size)
vis2d.xlabel("Abs Prediction Error", fontsize=self.font_size)
vis2d.ylabel("Count", fontsize=self.font_size)
figname = os.path.join(model_output_dir,
"pos_train_errors_histogram.png")
vis2d.savefig(figname, dpi=self.dpi)
# Train negatives.
neg_ind = np.where(train_result.labels == 0)[0]
diffs = np.abs(train_result.labels[neg_ind] -
train_result.pred_probs[neg_ind])
vis2d.figure()
utils.histogram(diffs,
num_bins,
bounds=(0, 1),
normalized=False,
plot=True)
vis2d.title("Error on Negative Training Examples",
fontsize=self.font_size)
vis2d.xlabel("Abs Prediction Error", fontsize=self.font_size)
vis2d.ylabel("Count", fontsize=self.font_size)
figname = os.path.join(model_output_dir,
"neg_train_errors_histogram.png")
vis2d.savefig(figname, dpi=self.dpi)
# Histogram of validation errors.
num_bins = min(self.num_bins, val_result.num_datapoints)
# Val positives.
pos_ind = np.where(val_result.labels == 1)[0]
diffs = np.abs(val_result.labels[pos_ind] -
val_result.pred_probs[pos_ind])
vis2d.figure()
utils.histogram(diffs,
num_bins,
bounds=(0, 1),
normalized=False,
plot=True)
vis2d.title("Error on Positive Validation Examples",
fontsize=self.font_size)
vis2d.xlabel("Abs Prediction Error", fontsize=self.font_size)
vis2d.ylabel("Count", fontsize=self.font_size)
figname = os.path.join(model_output_dir,
"pos_val_errors_histogram.png")
vis2d.savefig(figname, dpi=self.dpi)
# Val negatives.
neg_ind = np.where(val_result.labels == 0)[0]
diffs = np.abs(val_result.labels[neg_ind] -
val_result.pred_probs[neg_ind])
vis2d.figure()
utils.histogram(diffs,
num_bins,
bounds=(0, 1),
normalized=False,
plot=True)
vis2d.title("Error on Negative Validation Examples",
fontsize=self.font_size)
vis2d.xlabel("Abs Prediction Error", fontsize=self.font_size)
vis2d.ylabel("Count", fontsize=self.font_size)
figname = os.path.join(model_output_dir,
"neg_val_errors_histogram.png")
vis2d.savefig(figname, dpi=self.dpi)
# Losses.
try:
train_errors_filename = os.path.join(model_dir,
GQCNNFilenames.TRAIN_ERRORS)
val_errors_filename = os.path.join(model_dir,
GQCNNFilenames.VAL_ERRORS)
val_iters_filename = os.path.join(model_dir,
GQCNNFilenames.VAL_ITERS)
pct_pos_val_filename = os.path.join(model_dir,
GQCNNFilenames.PCT_POS_VAL)
train_losses_filename = os.path.join(model_dir,
GQCNNFilenames.TRAIN_LOSSES)
raw_train_errors = np.load(train_errors_filename)
val_errors = np.load(val_errors_filename)
val_iters = np.load(val_iters_filename)
pct_pos_val = float(val_errors[0])
if os.path.exists(pct_pos_val_filename):
pct_pos_val = 100.0 * np.load(pct_pos_val_filename)
raw_train_losses = np.load(train_losses_filename)
val_errors = np.r_[pct_pos_val, val_errors]
val_iters = np.r_[0, val_iters]
# Window the training error.
i = 0
train_errors = []
train_losses = []
train_iters = []
while i < raw_train_errors.shape[0]:
train_errors.append(np.mean(raw_train_errors[i:i + WINDOW]))
train_losses.append(np.mean(raw_train_losses[i:i + WINDOW]))
train_iters.append(i)
i += WINDOW
train_errors = np.array(train_errors)
train_losses = np.array(train_losses)
train_iters = np.array(train_iters)
init_val_error = val_errors[0]
norm_train_errors = train_errors / init_val_error
norm_val_errors = val_errors / init_val_error
norm_final_val_error = val_result.error_rate / val_errors[0]
if pct_pos_val > 0:
norm_final_val_error = val_result.error_rate / pct_pos_val
vis2d.clf()
vis2d.plot(train_iters,
train_errors,
linewidth=self.line_width,
color="b")
vis2d.plot(val_iters,
val_errors,
linewidth=self.line_width,
color="g")
vis2d.ylim(0, 100)
vis2d.legend(("TRAIN (Minibatch)", "VAL"),
fontsize=self.font_size,
loc="best")
vis2d.xlabel("Iteration", fontsize=self.font_size)
vis2d.ylabel("Error Rate", fontsize=self.font_size)
vis2d.title("Error Rate vs Training Iteration",
fontsize=self.font_size)
figname = os.path.join(model_output_dir,
"training_error_rates.png")
vis2d.savefig(figname, dpi=self.dpi)
vis2d.clf()
vis2d.plot(train_iters, norm_train_errors, linewidth=4, color="b")
vis2d.plot(val_iters, norm_val_errors, linewidth=4, color="g")
vis2d.ylim(0, 2.0)
vis2d.legend(("TRAIN (Minibatch)", "VAL"),
fontsize=self.font_size,
loc="best")
vis2d.xlabel("Iteration", fontsize=self.font_size)
vis2d.ylabel("Normalized Error Rate", fontsize=self.font_size)
vis2d.title("Normalized Error Rate vs Training Iteration",
fontsize=self.font_size)
figname = os.path.join(model_output_dir,
"training_norm_error_rates.png")
vis2d.savefig(figname, dpi=self.dpi)
train_losses[train_losses > MAX_LOSS] = MAX_LOSS # CAP LOSSES.
vis2d.clf()
vis2d.plot(train_iters,
train_losses,
linewidth=self.line_width,
color="b")
vis2d.ylim(0, 2.0)
vis2d.xlabel("Iteration", fontsize=self.font_size)
vis2d.ylabel("Loss", fontsize=self.font_size)
vis2d.title("Training Loss vs Iteration", fontsize=self.font_size)
figname = os.path.join(model_output_dir, "training_losses.png")
vis2d.savefig(figname, dpi=self.dpi)
# Log.
self.logger.info("TRAIN")
self.logger.info("Original error: %.3f" % (train_errors[0]))
self.logger.info("Final error: %.3f" % (train_result.error_rate))
self.logger.info("Orig loss: %.3f" % (train_losses[0]))
self.logger.info("Final loss: %.3f" % (train_losses[-1]))
self.logger.info("VAL")
self.logger.info("Original error: %.3f" % (pct_pos_val))
self.logger.info("Final error: %.3f" % (val_result.error_rate))
self.logger.info("Normalized error: %.3f" % (norm_final_val_error))
return (train_errors[0], train_result.error_rate, train_losses[0],
train_losses[-1], pct_pos_val, val_result.error_rate,
norm_final_val_error)
except Exception as e:
self.logger.error("Failed to plot training curves!\n" + str(e))
================================================
FILE: gqcnn/grasping/__init__.py
================================================
# -*- coding: utf-8 -*-
"""
Copyright ©2017. The Regents of the University of California (Regents).
All Rights Reserved. Permission to use, copy, modify, and distribute this
software and its documentation for educational, research, and not-for-profit
purposes, without fee and without a signed licensing agreement, is hereby
granted, provided that the above copyright notice, this paragraph and the
following two paragraphs appear in all copies, modifications, and
distributions. Contact The Office of Technology Licensing, UC Berkeley, 2150
Shattuck Avenue, Suite 510, Berkeley, CA 94720-1620, (510) 643-7201,
otl@berkeley.edu,
http://ipira.berkeley.edu/industry-info for commercial licensing opportunities.
IN NO EVENT SHALL REGENTS BE LIABLE TO ANY PARTY FOR DIRECT, INDIRECT, SPECIAL,
INCIDENTAL, OR CONSEQUENTIAL DAMAGES, INCLUDING LOST PROFITS, ARISING OUT OF
THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN IF REGENTS HAS BEEN
ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
REGENTS SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
PURPOSE. THE SOFTWARE AND ACCOMPANYING DOCUMENTATION, IF ANY, PROVIDED
HEREUNDER IS PROVIDED "AS IS". REGENTS HAS NO OBLIGATION TO PROVIDE
MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
"""
from .grasp import Grasp2D, SuctionPoint2D, MultiSuctionPoint2D
from .grasp_quality_function import (GraspQualityFunctionFactory,
GQCnnQualityFunction)
from .image_grasp_sampler import (ImageGraspSamplerFactory,
AntipodalDepthImageGraspSampler)
from .constraint_fn import GraspConstraintFnFactory
from .policy import (RobustGraspingPolicy, CrossEntropyRobustGraspingPolicy,
FullyConvolutionalGraspingPolicyParallelJaw,
FullyConvolutionalGraspingPolicySuction,
UniformRandomGraspingPolicy, RgbdImageState, GraspAction)
from .actions import (NoAction, ParallelJawGrasp3D, SuctionGrasp3D,
MultiSuctionGrasp3D)
__all__ = [
"Grasp2D", "SuctionPoint2D", "MultiSuctionPoint2D",
"GraspQualityFunctionFactory", "GQCnnQualityFunction",
"ImageGraspSamplerFactory", "AntipodalDepthImageGraspSampler",
"RobustGraspingPolicy", "CrossEntropyRobustGraspingPolicy",
"FullyConvolutionalGraspingPolicyParallelJaw",
"FullyConvolutionalGraspingPolicySuction", "UniformRandomGraspingPolicy",
"RgbdImageState", "GraspAction", "GraspConstraintFnFactory", "NoAction",
"ParallelJawGrasp3D", "SuctionGrasp3D", "MultiSuctionGrasp3D"
]
================================================
FILE: gqcnn/grasping/actions.py
================================================
# -*- coding: utf-8 -*-
"""
Copyright ©2017. The Regents of the University of California (Regents).
All Rights Reserved. Permission to use, copy, modify, and distribute this
software and its documentation for educational, research, and not-for-profit
purposes, without fee and without a signed licensing agreement, is hereby
granted, provided that the above copyright notice, this paragraph and the
following two paragraphs appear in all copies, modifications, and
distributions. Contact The Office of Technology Licensing, UC Berkeley, 2150
Shattuck Avenue, Suite 510, Berkeley, CA 94720-1620, (510) 643-7201,
otl@berkeley.edu,
http://ipira.berkeley.edu/industry-info for commercial licensing opportunities.
IN NO EVENT SHALL REGENTS BE LIABLE TO ANY PARTY FOR DIRECT, INDIRECT, SPECIAL,
INCIDENTAL, OR CONSEQUENTIAL DAMAGES, INCLUDING LOST PROFITS, ARISING OUT OF
THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN IF REGENTS HAS BEEN
ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
REGENTS SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
PURPOSE. THE SOFTWARE AND ACCOMPANYING DOCUMENTATION, IF ANY, PROVIDED
HEREUNDER IS PROVIDED "AS IS". REGENTS HAS NO OBLIGATION TO PROVIDE
MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
Action classes for representing 3D grasp actions.
Author
------
Jeff Mahler
"""
from abc import ABC, abstractmethod
import numpy as np
from autolab_core import Point
from .grasp import Grasp2D, SuctionPoint2D, MultiSuctionPoint2D
class Action(object):
"""Base action class.
Attributes
----------
q_value : float
Grasp quality.
id : int
Integer identifier for the action.
metadata : dict
Key-value dict of extra data about the action.
"""
def __init__(self, q_value=0.0, id=-1, metadata={}):
self._q_value = q_value
self._id = id
self._metadata = metadata
@property
def q_value(self):
return self._q_value
@property
def id(self):
return self._id
@property
def metadata(self):
return self._metadata
class NoAction(Action):
"""Proxy for taking no action when none can be found."""
pass
class GraspAction3D(ABC, Action):
"""Base grasp class with grasp specified as an end-effector pose.
Attributes
----------
T_grasp_world : :obj:`RigidTransform`
Pose of the grasp w.r.t. world coordinate frame.
"""
def __init__(self, T_grasp_world, q_value=0.0, id=-1, metadata={}):
self.T_grasp_world = T_grasp_world
Action.__init__(self, q_value, id, metadata)
@abstractmethod
def project(self, camera_intr, T_camera_world):
pass
class ParallelJawGrasp3D(GraspAction3D):
"""Grasping with a parallel-jaw gripper.
Attributes
----------
T_grasp_world : :obj:`RigidTransform`
Pose of the grasp wrt world coordinate frame.
"""
def project(self, camera_intr, T_camera_world, gripper_width=0.05):
# Compute pose of grasp in camera frame.
T_grasp_camera = T_camera_world.inverse() * self.T_grasp_world
y_axis_camera = T_grasp_camera.y_axis[:2]
if np.linalg.norm(y_axis_camera) > 0:
y_axis_camera = y_axis_camera / np.linalg.norm(y_axis_camera)
# Compute grasp axis rotation in image space.
rot_grasp_camera = np.arccos(y_axis_camera[0])
if y_axis_camera[1] < 0:
rot_grasp_camera = -rot_grasp_camera
while rot_grasp_camera < 0:
rot_grasp_camera += 2 * np.pi
while rot_grasp_camera > 2 * np.pi:
rot_grasp_camera -= 2 * np.pi
# Compute grasp center in image space.
t_grasp_camera = T_grasp_camera.translation
p_grasp_camera = Point(t_grasp_camera, frame=camera_intr.frame)
u_grasp_camera = camera_intr.project(p_grasp_camera)
d_grasp_camera = t_grasp_camera[2]
return Grasp2D(u_grasp_camera,
rot_grasp_camera,
d_grasp_camera,
width=gripper_width,
camera_intr=camera_intr)
class SuctionGrasp3D(GraspAction3D):
"""Grasping with a suction-based gripper.
Attributes
----------
T_grasp_world : :obj:`RigidTransform`
Pose of the grasp wrt world coordinate frame.
"""
def project(self, camera_intr, T_camera_world):
# Compute pose of grasp in camera frame.
T_grasp_camera = T_camera_world.inverse() * self.T_grasp_world
x_axis_camera = T_grasp_camera.x_axis
# Compute grasp center in image space.
t_grasp_camera = T_grasp_camera.translation
p_grasp_camera = Point(t_grasp_camera, frame=camera_intr.frame)
u_grasp_camera = camera_intr.project(p_grasp_camera)
d_grasp_camera = t_grasp_camera[2]
return SuctionPoint2D(u_grasp_camera,
x_axis_camera,
d_grasp_camera,
camera_intr=camera_intr)
class MultiSuctionGrasp3D(GraspAction3D):
"""Grasping with a multi-cup suction-based gripper.
Attributes
----------
T_grasp_world : :obj:`RigidTransform`
Pose of the grasp wrt world coordinate frame.
"""
def project(self, camera_intr, T_camera_world):
# Compute pose of grasp in camera frame.
T_grasp_camera = T_camera_world.inverse() * self.T_grasp_world
return MultiSuctionPoint2D(T_grasp_camera, camera_intr=camera_intr)
================================================
FILE: gqcnn/grasping/constraint_fn.py
================================================
# -*- coding: utf-8 -*-
"""
Copyright ©2017. The Regents of the University of California (Regents).
All Rights Reserved. Permission to use, copy, modify, and distribute this
software and its documentation for educational, research, and not-for-profit
purposes, without fee and without a signed licensing agreement, is hereby
granted, provided that the above copyright notice, this paragraph and the
following two paragraphs appear in all copies, modifications, and
distributions. Contact The Office of Technology Licensing, UC Berkeley, 2150
Shattuck Avenue, Suite 510, Berkeley, CA 94720-1620, (510) 643-7201,
otl@berkeley.edu,
http://ipira.berkeley.edu/industry-info for commercial licensing opportunities.
IN NO EVENT SHALL REGENTS BE LIABLE TO ANY PARTY FOR DIRECT, INDIRECT, SPECIAL,
INCIDENTAL, OR CONSEQUENTIAL DAMAGES, INCLUDING LOST PROFITS, ARISING OUT OF
THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN IF REGENTS HAS BEEN
ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
REGENTS SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
PURPOSE. THE SOFTWARE AND ACCOMPANYING DOCUMENTATION, IF ANY, PROVIDED
HEREUNDER IS PROVIDED "AS IS". REGENTS HAS NO OBLIGATION TO PROVIDE
MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
Constraint functions for grasp sampling.
Author
------
Jeff Mahler
"""
from abc import ABC, abstractmethod
import numpy as np
class GraspConstraintFn(ABC):
"""Abstract constraint functions for grasp sampling."""
def __init__(self, config):
# Set params.
self._config = config
def __call__(self, grasp):
"""Evaluates whether or not a grasp is valid.
Parameters
----------
grasp : :obj:`Grasp2D`
Grasp to evaluate.
Returns
-------
bool
True if the grasp satisfies constraints, False otherwise.
"""
return self.satisfies_constraints(grasp)
@abstractmethod
def satisfies_constraints(self, grasp):
"""Evaluates whether or not a grasp is valid.
Parameters
----------
grasp : :obj:`Grasp2D`
Grasp to evaluate.
Returns
-------
bool
True if the grasp satisfies constraints, False otherwise.
"""
pass
class DiscreteApproachGraspConstraintFn(GraspConstraintFn):
"""Constrains the grasp approach direction into a discrete set of
angles from the world z direction."""
def __init__(self, config):
# Init superclass.
GraspConstraintFn.__init__(self, config)
self._max_approach_angle = self._config["max_approach_angle"]
self._angular_tolerance = self._config["angular_tolerance"]
self._angular_step = self._config["angular_step"]
self._T_camera_world = self._config["camera_pose"]
def satisfies_constraints(self, grasp):
"""Evaluates whether or not a grasp is valid by evaluating the
angle between the approach axis and the world z direction.
Parameters
----------
grasp : :obj:`Grasp2D`
Grasp to evaluate.
Returns
-------
bool
True if the grasp satisfies constraints, False otherwise.
"""
# Find grasp angle in world coordinates.
axis_world = self._T_camera_world.rotation.dot(grasp.approach_axis)
angle = np.arccos(-axis_world[2])
# Check closest available angle.
available_angles = np.array([0.0])
if self._angular_step > 0:
available_angles = np.arange(start=0.0,
stop=self._max_approach_angle,
step=self._angular_step)
diff = np.abs(available_angles - angle)
angle_index = np.argmin(diff)
if diff[angle_index] < self._angular_tolerance:
return True
return False
class GraspConstraintFnFactory(object):
@staticmethod
def constraint_fn(fn_type, config):
if fn_type == "none":
return None
elif fn_type == "discrete_approach_angle":
return DiscreteApproachGraspConstraintFn(config)
else:
raise ValueError(
"Grasp constraint function type {} not supported!".format(
fn_type))
================================================
FILE: gqcnn/grasping/grasp.py
================================================
# -*- coding: utf-8 -*-
"""
Copyright ©2017. The Regents of the University of California (Regents).
All Rights Reserved. Permission to use, copy, modify, and distribute this
software and its documentation for educational, research, and not-for-profit
purposes, without fee and without a signed licensing agreement, is hereby
granted, provided that the above copyright notice, this paragraph and the
following two paragraphs appear in all copies, modifications, and
distributions. Contact The Office of Technology Licensing, UC Berkeley, 2150
Shattuck Avenue, Suite 510, Berkeley, CA 94720-1620, (510) 643-7201,
otl@berkeley.edu,
http://ipira.berkeley.edu/industry-info for commercial licensing opportunities.
IN NO EVENT SHALL REGENTS BE LIABLE TO ANY PARTY FOR DIRECT, INDIRECT, SPECIAL,
INCIDENTAL, OR CONSEQUENTIAL DAMAGES, INCLUDING LOST PROFITS, ARISING OUT OF
THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN IF REGENTS HAS BEEN
ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
REGENTS SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
PURPOSE. THE SOFTWARE AND ACCOMPANYING DOCUMENTATION, IF ANY, PROVIDED
HEREUNDER IS PROVIDED "AS IS". REGENTS HAS NO OBLIGATION TO PROVIDE
MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
Classes to encapsulate parallel-jaw grasps in image space.
Author
------
Jeff Mahler
"""
import numpy as np
from autolab_core import Point, RigidTransform, CameraIntrinsics
class Grasp2D(object):
"""Parallel-jaw grasp in image space.
Attributes
----------
center : :obj:`autolab_core.Point`
Point in image space.
angle : float
Grasp axis angle with the camera x-axis.
depth : float
Depth of the grasp center in 3D space.
width : float
Distance between the jaws in meters.
camera_intr : :obj:`autolab_core.CameraIntrinsics`
Frame of reference for camera that the grasp corresponds to.
contact_points : list of :obj:`numpy.ndarray`
Pair of contact points in image space.
contact_normals : list of :obj:`numpy.ndarray`
Pair of contact normals in image space.
"""
def __init__(self,
center,
angle=0.0,
depth=1.0,
width=0.0,
camera_intr=None,
contact_points=None,
contact_normals=None):
self.center = center
self.angle = angle
self.depth = depth
self.width = width
# If `camera_intr` is none use default primesense camera intrinsics.
if not camera_intr:
self.camera_intr = CameraIntrinsics("primesense_overhead",
fx=525,
fy=525,
cx=319.5,
cy=239.5,
width=640,
height=480)
else:
self.camera_intr = camera_intr
self.contact_points = contact_points
self.contact_normals = contact_normals
frame = "image"
if camera_intr is not None:
frame = camera_intr.frame
if isinstance(center, np.ndarray):
self.center = Point(center, frame=frame)
@property
def axis(self):
"""Returns the grasp axis."""
return np.array([np.cos(self.angle), np.sin(self.angle)])
@property
def approach_axis(self):
return np.array([0, 0, 1])
@property
def approach_angle(self):
"""The angle between the grasp approach axis and camera optical axis.
"""
return 0.0
@property
def frame(self):
"""The name of the frame of reference for the grasp."""
if self.camera_intr is None:
raise ValueError("Must specify camera intrinsics")
return self.camera_intr.frame
@property
def width_px(self):
"""Returns the width in pixels."""
if self.camera_intr is None:
missing_camera_intr_msg = ("Must specify camera intrinsics to"
" compute gripper width in 3D space.")
raise ValueError(missing_camera_intr_msg)
# Form the jaw locations in 3D space at the given depth.
p1 = Point(np.array([0, 0, self.depth]), frame=self.frame)
p2 = Point(np.array([self.width, 0, self.depth]), frame=self.frame)
# Project into pixel space.
u1 = self.camera_intr.project(p1)
u2 = self.camera_intr.project(p2)
return np.linalg.norm(u1.data - u2.data)
@property
def endpoints(self):
"""Returns the grasp endpoints."""
p1 = self.center.data - (self.width_px / 2) * self.axis
p2 = self.center.data + (self.width_px / 2) * self.axis
return p1, p2
@property
def feature_vec(self):
"""Returns the feature vector for the grasp.
`v = [p1, p2, depth]` where `p1` and `p2` are the jaw locations in
image space.
"""
p1, p2 = self.endpoints
return np.r_[p1, p2, self.depth]
@staticmethod
def from_feature_vec(v, width=0.0, camera_intr=None):
"""Creates a `Grasp2D` instance from a feature vector and additional
parameters.
Parameters
----------
v : :obj:`numpy.ndarray`
Feature vector, see `Grasp2D.feature_vec`.
width : float
Grasp opening width, in meters.
camera_intr : :obj:`autolab_core.CameraIntrinsics`
Frame of reference for camera that the grasp corresponds to.
"""
# Read feature vec.
p1 = v[:2]
p2 = v[2:4]
depth = v[4]
# Compute center and angle.
center_px = (p1 + p2) // 2
center = Point(center_px, camera_intr.frame)
axis = p2 - p1
if np.linalg.norm(axis) > 0:
axis = axis / np.linalg.norm(axis)
if axis[1] > 0:
angle = np.arccos(axis[0])
else:
angle = -np.arccos(axis[0])
return Grasp2D(center,
angle,
depth,
width=width,
camera_intr=camera_intr)
def pose(self, grasp_approach_dir=None):
"""Computes the 3D pose of the grasp relative to the camera.
If an approach direction is not specified then the camera
optical axis is used.
Parameters
----------
grasp_approach_dir : :obj:`numpy.ndarray`
Approach direction for the grasp in camera basis (e.g. opposite to
table normal).
Returns
-------
:obj:`autolab_core.RigidTransform`
The transformation from the grasp to the camera frame of reference.
"""
# Check intrinsics.
if self.camera_intr is None:
raise ValueError(
"Must specify camera intrinsics to compute 3D grasp pose")
# Compute 3D grasp center in camera basis.
grasp_center_im = self.center.data
center_px_im = Point(grasp_center_im, frame=self.camera_intr.frame)
grasp_center_camera = self.camera_intr.deproject_pixel(
self.depth, center_px_im)
grasp_center_camera = grasp_center_camera.data
# Compute 3D grasp axis in camera basis.
grasp_axis_im = self.axis
grasp_axis_im = grasp_axis_im / np.linalg.norm(grasp_axis_im)
grasp_axis_camera = np.array([grasp_axis_im[0], grasp_axis_im[1], 0])
grasp_axis_camera = grasp_axis_camera / np.linalg.norm(
grasp_axis_camera)
# Convert to 3D pose.
grasp_rot_camera, _, _ = np.linalg.svd(grasp_axis_camera.reshape(3, 1))
grasp_x_camera = grasp_approach_dir
if grasp_approach_dir is None:
grasp_x_camera = np.array([0, 0, 1]) # Align with camera Z axis.
grasp_y_camera = grasp_axis_camera
grasp_z_camera = np.cross(grasp_x_camera, grasp_y_camera)
grasp_z_camera = grasp_z_camera / np.linalg.norm(grasp_z_camera)
grasp_y_camera = np.cross(grasp_z_camera, grasp_x_camera)
grasp_rot_camera = np.array(
[grasp_x_camera, grasp_y_camera, grasp_z_camera]).T
if np.linalg.det(grasp_rot_camera) < 0: # Fix reflections due to SVD.
grasp_rot_camera[:, 0] = -grasp_rot_camera[:, 0]
T_grasp_camera = RigidTransform(rotation=grasp_rot_camera,
translation=grasp_center_camera,
from_frame="grasp",
to_frame=self.camera_intr.frame)
return T_grasp_camera
@staticmethod
def image_dist(g1, g2, alpha=1.0):
"""Computes the distance between grasps in image space.
Uses Euclidean distance with alpha weighting of angles
Parameters
----------
g1 : :obj:`Grasp2D`
First grasp.
g2 : :obj:`Grasp2D`
Second grasp.
alpha : float
Weight of angle distance (rad to meters).
Returns
-------
float
Distance between grasps.
"""
# Point to point distances.
point_dist = np.linalg.norm(g1.center.data - g2.center.data)
# Axis distances.
dot = max(min(np.abs(g1.axis.dot(g2.axis)), 1.0), -1.0)
axis_dist = np.arccos(dot)
return point_dist + alpha * axis_dist
class SuctionPoint2D(object):
"""Suction grasp in image space.
Attributes
----------
center : :obj:`autolab_core.Point`
Point in image space.
axis : :obj:`numpy.ndarray`
Dormalized 3-vector representing the direction of the suction tip.
depth : float
Depth of the suction point in 3D space.
camera_intr : :obj:`autolab_core.CameraIntrinsics`
Frame of reference for camera that the suction point corresponds to.
"""
def __init__(self, center, axis=None, depth=1.0, camera_intr=None):
if axis is None:
axis = np.array([0, 0, 1])
self.center = center
self.axis = axis
frame = "image"
if camera_intr is not None:
frame = camera_intr.frame
if isinstance(center, np.ndarray):
self.center = Point(center, frame=frame)
if isinstance(axis, list):
self.axis = np.array(axis)
if np.abs(np.linalg.norm(self.axis) - 1.0) > 1e-3:
raise ValueError("Illegal axis. Must be norm 1.")
self.depth = depth
# If `camera_intr` is `None` use default primesense camera intrinsics.
if not camera_intr:
self.camera_intr = CameraIntrinsics("primesense_overhead",
fx=525,
fy=525,
cx=319.5,
cy=239.5,
width=640,
height=480)
else:
self.camera_intr = camera_intr
@property
def frame(self):
"""The name of the frame of reference for the grasp."""
if self.camera_intr is None:
raise ValueError("Must specify camera intrinsics")
return self.camera_intr.frame
@property
def angle(self):
"""The angle that the grasp pivot axis makes in image space."""
rotation_axis = np.cross(self.axis, np.array([0, 0, 1]))
rotation_axis_image = np.array([rotation_axis[0], rotation_axis[1]])
angle = 0
if np.linalg.norm(rotation_axis) > 0:
rotation_axis_image = rotation_axis_image / np.linalg.norm(
rotation_axis_image)
angle = np.arccos(rotation_axis_image[0])
if rotation_axis[1] < 0:
angle = -angle
return angle
@property
def approach_angle(self):
"""The angle between the grasp approach axis and camera optical axis.
"""
dot = max(min(self.axis.dot(np.array([0, 0, 1])), 1.0), -1.0)
return np.arccos(dot)
@property
def approach_axis(self):
return self.axis
@property
def feature_vec(self):
"""Returns the feature vector for the suction point.
Note
----
`v = [center, axis, depth]`
"""
return self.center.data
@staticmethod
def from_feature_vec(v, camera_intr=None, depth=None, axis=None):
"""Creates a `SuctionPoint2D` instance from a feature vector and
additional parameters.
Parameters
----------
v : :obj:`numpy.ndarray`
Feature vector, see `Grasp2D.feature_vec`.
camera_intr : :obj:`autolab_core.CameraIntrinsics`
Frame of reference for camera that the grasp corresponds to.
depth : float
Hard-set the depth for the suction grasp.
axis : :obj:`numpy.ndarray`
Normalized 3-vector specifying the approach direction.
"""
# Read feature vec.
center_px = v[:2]
grasp_axis = np.array([0, 0, -1])
if v.shape[0] > 2 and axis is None:
grasp_axis = v[2:5]
grasp_axis = grasp_axis / np.linalg.norm(grasp_axis)
elif axis is not None:
grasp_axis = axis
grasp_depth = 0.5
if v.shape[0] > 5 and depth is None:
grasp_depth = v[5]
elif depth is not None:
grasp_depth = depth
# Compute center and angle.
center = Point(center_px, camera_intr.frame)
return SuctionPoint2D(center,
grasp_axis,
grasp_depth,
camera_intr=camera_intr)
def pose(self):
"""Computes the 3D pose of the grasp relative to the camera.
Returns
-------
:obj:`autolab_core.RigidTransform`
The transformation from the grasp to the camera frame of reference.
"""
# Check intrinsics.
if self.camera_intr is None:
raise ValueError(
"Must specify camera intrinsics to compute 3D grasp pose")
# Compute 3D grasp center in camera basis.
suction_center_im = self.center.data
center_px_im = Point(suction_center_im, frame=self.camera_intr.frame)
suction_center_camera = self.camera_intr.deproject_pixel(
self.depth, center_px_im)
suction_center_camera = suction_center_camera.data
# Compute 3D grasp axis in camera basis.
suction_axis_camera = self.axis
# Convert to 3D pose.
suction_x_camera = suction_axis_camera
suction_z_camera = np.array(
[-suction_x_camera[1], suction_x_camera[0], 0])
if np.linalg.norm(suction_z_camera) < 1e-12:
suction_z_camera = np.array([1.0, 0.0, 0.0])
suction_z_camera = suction_z_camera / np.linalg.norm(suction_z_camera)
suction_y_camera = np.cross(suction_z_camera, suction_x_camera)
suction_rot_camera = np.c_[suction_x_camera, suction_y_camera,
suction_z_camera]
T_suction_camera = RigidTransform(rotation=suction_rot_camera,
translation=suction_center_camera,
from_frame="grasp",
to_frame=self.camera_intr.frame)
return T_suction_camera
@staticmethod
def image_dist(g1, g2, alpha=1.0):
"""Computes the distance between grasps in image space.
Uses Euclidean distance with alpha weighting of angles.
Parameters
----------
g1 : :obj:`SuctionPoint2D`
First suction point.
g2 : :obj:`SuctionPoint2D`
Second suction point.
alpha : float
Weight of angle distance (rad to meters).
Returns
-------
float
Distance between grasps.
"""
# Point to point distances.
point_dist = np.linalg.norm(g1.center.data - g2.center.data)
# Axis distances.
dot = max(min(np.abs(g1.axis.dot(g2.axis)), 1.0), -1.0)
axis_dist = np.arccos(dot)
return point_dist + alpha * axis_dist
class MultiSuctionPoint2D(object):
"""Multi-Cup Suction grasp in image space.
Equivalent to projecting a 6D pose to image space.
Attributes
----------
pose : :obj:`autolab_core.RigidTransform`
Pose in 3D camera space.
camera_intr : :obj:`autolab_core.CameraIntrinsics`
Frame of reference for camera that the suction point corresponds to.
"""
def __init__(self, pose, camera_intr=None):
self._pose = pose
# TODO(vsatish): Confirm that this is really not needed.
# frame = "image"
# if camera_intr is not None:
# frame = camera_intr.frame
# If `camera_intr` is `None` use default primesense camera intrinsics.
if not camera_intr:
self.camera_intr = CameraIntrinsics("primesense_overhead",
fx=525,
fy=525,
cx=319.5,
cy=239.5,
width=640,
height=480)
else:
self.camera_intr = camera_intr
def pose(self):
return self._pose
@property
def frame(self):
"""The name of the frame of reference for the grasp."""
if self.camera_intr is None:
raise ValueError("Must specify camera intrinsics")
return self.camera_intr.frame
@property
def center(self):
center_camera = Point(self._pose.translation,
frame=self.camera_intr.frame)
center_px = self.camera_intr.project(center_camera)
return center_px
@property
def axis(self):
return self._pose.x_axis
@property
def approach_axis(self):
return self.axis
@property
def approach_angle(self):
"""The angle between the grasp approach axis and camera optical axis.
"""
dot = max(min(self.axis.dot(np.array([0, 0, 1])), 1.0), -1.0)
return np.arccos(dot)
@property
def angle(self):
g_axis = self._pose.y_axis
g_axis_im = np.array([g_axis[0], g_axis[1], 0])
if np.linalg.norm(g_axis_im) == 0:
return 0
theta = np.arctan2(g_axis[1], g_axis[0])
return theta
@property
def depth(self):
return self._pose.translation[2]
@property
def orientation(self):
x_axis = self.axis
y_axis = np.array([x_axis[1], -x_axis[0], 0])
if np.linalg.norm(y_axis) == 0:
y_axis = np.array([1, 0, 0])
y_axis = y_axis / np.linalg.norm(y_axis)
z_axis = np.cross(x_axis, y_axis)
R = np.array([x_axis, y_axis, z_axis]).T
delta_R = R.T.dot(self._pose.rotation)
orientation = np.arccos(delta_R[1, 1])
if delta_R[1, 2] > 0:
orientation = 2 * np.pi - orientation
return orientation
@property
def feature_vec(self):
"""Returns the feature vector for the suction point.
Note
----
`v = [center, axis, depth]`
"""
return np.r_[self.center.data,
np.cos(self.orientation),
np.sin(self.orientation)]
@staticmethod
def from_feature_vec(v,
camera_intr=None,
angle=None,
depth=None,
axis=None):
"""Creates a `SuctionPoint2D` instance from a feature vector and
additional parameters.
Parameters
----------
v : :obj:`numpy.ndarray`
Feature vector, see `Grasp2D.feature_vec`.
camera_intr : :obj:`autolab_core.CameraIntrinsics`
Frame of reference for camera that the grasp corresponds to.
depth : float
Hard-set the depth for the suction grasp.
axis : :obj:`numpy.ndarray`
Normalized 3-vector specifying the approach direction.
"""
# Read feature vec.
center_px = v[:2]
grasp_angle = 0
if v.shape[0] > 2 and angle is None:
# grasp_angle = v[2]
grasp_vec = v[2:]
grasp_vec = grasp_vec / np.linalg.norm(grasp_vec)
grasp_angle = np.arctan2(grasp_vec[1], grasp_vec[0])
elif angle is not None:
grasp_angle = angle
grasp_axis = np.array([1, 0, 0])
if axis is not None:
grasp_axis = axis
grasp_depth = 0.5
if depth is not None:
grasp_depth = depth
x_axis = grasp_axis
y_axis = np.array([grasp_axis[1], -grasp_axis[0], 0])
if np.linalg.norm(y_axis) == 0:
y_axis = np.array([1, 0, 0])
y_axis = y_axis / np.linalg.norm(y_axis)
z_axis = np.cross(x_axis, y_axis)
R = np.array([x_axis, y_axis, z_axis]).T
R = R.dot(RigidTransform.x_axis_rotation(grasp_angle))
t = camera_intr.deproject_pixel(
grasp_depth, Point(center_px, frame=camera_intr.frame)).data
T = RigidTransform(rotation=R,
translation=t,
from_frame="grasp",
to_frame=camera_intr.frame)
# Compute center and angle.
return MultiSuctionPoint2D(T, camera_intr=camera_intr)
@staticmethod
def image_dist(g1, g2, alpha=1.0):
"""Computes the distance between grasps in image space.
Uses Euclidean distance with alpha weighting of angles.
Parameters
----------
g1 : :obj:`SuctionPoint2D`
First suction point.
g2 : :obj:`SuctionPoint2D`
Second suction point.
alpha : float
Weight of angle distance (rad to meters).
Returns
-------
float
Distance between grasps.
"""
# Point to point distances.
point_dist = np.linalg.norm(g1.center.data - g2.center.data)
# Axis distances.
dot = max(min(np.abs(g1.axis.dot(g2.axis)), 1.0), -1.0)
axis_dist = np.arccos(dot)
return point_dist + alpha * axis_dist
================================================
FILE: gqcnn/grasping/grasp_quality_function.py
================================================
# -*- coding: utf-8 -*-
"""
Copyright ©2017. The Regents of the University of California (Regents).
All Rights Reserved. Permission to use, copy, modify, and distribute this
software and its documentation for educational, research, and not-for-profit
purposes, without fee and without a signed licensing agreement, is hereby
granted, provided that the above copyright notice, this paragraph and the
following two paragraphs appear in all copies, modifications, and
distributions. Contact The Office of Technology Licensing, UC Berkeley, 2150
Shattuck Avenue, Suite 510, Berkeley, CA 94720-1620, (510) 643-7201,
otl@berkeley.edu,
http://ipira.berkeley.edu/industry-info for commercial licensing opportunities.
IN NO EVENT SHALL REGENTS BE LIABLE TO ANY PARTY FOR DIRECT, INDIRECT, SPECIAL,
INCIDENTAL, OR CONSEQUENTIAL DAMAGES, INCLUDING LOST PROFITS, ARISING OUT OF
THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN IF REGENTS HAS BEEN
ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
REGENTS SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
PURPOSE. THE SOFTWARE AND ACCOMPANYING DOCUMENTATION, IF ANY, PROVIDED
HEREUNDER IS PROVIDED "AS IS". REGENTS HAS NO OBLIGATION TO PROVIDE
MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
Grasp quality functions: suction quality function and parallel jaw grasping
quality fuction.
Authors
-------
Jason Liu & Jeff Mahler
"""
from abc import ABC, abstractmethod
from time import time
import cv2
import matplotlib.pyplot as plt
import numpy as np
import autolab_core.utils as utils
from autolab_core import Point, PointCloud, RigidTransform, Logger, DepthImage
from ..model import get_gqcnn_model, get_fc_gqcnn_model
from .grasp import SuctionPoint2D
from ..utils import GeneralConstants, GripperMode
class GraspQualityFunction(ABC):
"""Abstract grasp quality class."""
def __init__(self):
# Set up logger.
self._logger = Logger.get_logger(self.__class__.__name__)
def __call__(self, state, actions, params=None):
"""Evaluates grasp quality for a set of actions given a state."""
return self.quality(state, actions, params)
@abstractmethod
def quality(self, state, actions, params=None):
"""Evaluates grasp quality for a set of actions given a state.
Parameters
----------
state : :obj:`object`
State of the world e.g. image.
actions : :obj:`list`
List of actions to evaluate e.g. parallel-jaw or suction grasps.
params : :obj:`dict`
Optional parameters for the evaluation.
Returns
-------
:obj:`numpy.ndarray`
Vector containing the real-valued grasp quality
for each candidate.
"""
pass
class ZeroGraspQualityFunction(object):
"""Null function."""
def quality(self, state, actions, params=None):
"""Returns zero for all grasps.
Parameters
----------
state : :obj:`object`
State of the world e.g. image.
actions : :obj:`list`
List of actions to evaluate e.g. parallel-jaw or suction grasps.
params : :obj:`dict`
Optional parameters for the evaluation.
Returns
-------
:obj:`numpy.ndarray`
Vector containing the real-valued grasp quality
for each candidate.
"""
return 0.0
class ParallelJawQualityFunction(GraspQualityFunction):
"""Abstract wrapper class for parallel jaw quality functions (only image
based metrics for now)."""
def __init__(self, config):
GraspQualityFunction.__init__(self)
# Read parameters.
self._friction_coef = config["friction_coef"]
self._max_friction_cone_angle = np.arctan(self._friction_coef)
def friction_cone_angle(self, action):
"""Compute the angle between the axis and the boundaries of the
friction cone."""
if action.contact_points is None or action.contact_normals is None:
invalid_friction_ang_msg = ("Cannot compute friction cone angle"
" without precomputed contact points"
" and normals.")
raise ValueError(invalid_friction_ang_msg)
dot_prod1 = min(max(action.contact_normals[0].dot(-action.axis), -1.0),
1.0)
angle1 = np.arccos(dot_prod1)
dot_prod2 = min(max(action.contact_normals[1].dot(action.axis), -1.0),
1.0)
angle2 = np.arccos(dot_prod2)
return max(angle1, angle2)
def force_closure(self, action):
"""Determine if the grasp is in force closure."""
return (self.friction_cone_angle(action) <
self._max_friction_cone_angle)
class ComForceClosureParallelJawQualityFunction(ParallelJawQualityFunction):
"""Measures the distance to the estimated center of mass for antipodal
parallel-jaw grasps."""
def __init__(self, config):
"""Create a best-fit planarity suction metric."""
self._antipodality_pctile = config["antipodality_pctile"]
ParallelJawQualityFunction.__init__(self, config)
def quality(self, state, actions, params=None):
"""Given a parallel-jaw grasp, compute the distance to the center of
mass of the grasped object.
Parameters
----------
state : :obj:`RgbdImageState`
An RgbdImageState instance that encapsulates rgbd_im, camera_intr,
segmask, full_observed.
action: :obj:`Grasp2D`
A suction grasp in image space that encapsulates center, approach
direction, depth, camera_intr.
params: dict
Stores params used in computing quality.
Returns
-------
:obj:`numpy.ndarray`
Array of the quality for each grasp.
"""
# Compute antipodality.
antipodality_q = [
ParallelJawQualityFunction.friction_cone_angle(self, action)
for action in actions
]
# Compute object centroid.
object_com = state.rgbd_im.center
if state.segmask is not None:
nonzero_px = state.segmask.nonzero_pixels()
object_com = np.mean(nonzero_px, axis=0)
# Compute negative SSE from the best fit plane for each grasp.
antipodality_thresh = abs(
np.percentile(antipodality_q, 100 - self._antipodality_pctile))
qualities = []
max_q = max(state.rgbd_im.height, state.rgbd_im.width)
for i, action in enumerate(actions):
q = max_q
friction_cone_angle = antipodality_q[i]
force_closure = ParallelJawQualityFunction.force_closure(
self, action)
if force_closure or friction_cone_angle < antipodality_thresh:
grasp_center = np.array([action.center.y, action.center.x])
if state.obj_segmask is not None:
grasp_obj_id = state.obj_segmask[grasp_center[0],
grasp_center[1]]
obj_mask = state.obj_segmask.segment_mask(grasp_obj_id)
nonzero_px = obj_mask.nonzero_pixels()
object_com = np.mean(nonzero_px, axis=0)
q = np.linalg.norm(grasp_center - object_com)
if state.obj_segmask is not None and grasp_obj_id == 0:
q = max_q
q = (np.exp(-q / max_q) - np.exp(-1)) / (1 - np.exp(-1))
qualities.append(q)
return np.array(qualities)
class SuctionQualityFunction(GraspQualityFunction):
"""Abstract wrapper class for suction quality functions (only image based
metrics for now)."""
def __init__(self, config):
GraspQualityFunction.__init(self)
# Read parameters.
self._window_size = config["window_size"]
self._sample_rate = config["sample_rate"]
def _points_in_window(self, point_cloud_image, action, segmask=None):
"""Retrieve all points on the object in a box of size
`self._window_size`."""
# Read indices.
im_shape = point_cloud_image.shape
i_start = int(max(action.center.y - self._window_size // 2,
0)) # TODO: Confirm div.
j_start = int(max(action.center.x - self._window_size // 2, 0))
i_end = int(min(i_start + self._window_size, im_shape[0]))
j_end = int(min(j_start + self._window_size, im_shape[1]))
step = int(1 / self._sample_rate)
# Read 3D points in the window.
points = point_cloud_image[i_start:i_end:step, j_start:j_end:step]
stacked_points = points.reshape(points.shape[0] * points.shape[1], -1)
# Form the matrices for plane-fitting.
return stacked_points
def _points_to_matrices(self, points):
"""Convert a set of 3D points to an A and b matrix for regression."""
A = points[:, [0, 1]]
ones = np.ones((A.shape[0], 1))
A = np.hstack((A, ones))
b = points[:, 2]
return A, b
def _best_fit_plane(self, A, b):
"""Find a best-fit plane of points."""
try:
w, _, _, _ = np.linalg.lstsq(A, b)
except np.linalg.LinAlgError:
self._logger.warning("Could not find a best-fit plane!")
raise
return w
def _sum_of_squared_residuals(self, w, A, z):
"""Returns the sum of squared residuals from the plane."""
return (1.0 / A.shape[0]) * np.square(np.linalg.norm(np.dot(A, w) - z))
class BestFitPlanaritySuctionQualityFunction(SuctionQualityFunction):
"""A best-fit planarity suction metric."""
def __init__(self, config):
SuctionQualityFunction.__init__(self, config)
def quality(self, state, actions, params=None):
"""Given a suction point, compute a score based on a best-fit 3D plane
of the neighboring points.
Parameters
----------
state : :obj:`RgbdImageState`
An RgbdImageState instance that encapsulates rgbd_im, camera_intr,
segmask, full_observed.
action: :obj:`SuctionPoint2D`
A suction grasp in image space that encapsulates center, approach
direction, depth, camera_intr.
params: dict
Stores params used in computing suction quality.
Returns
-------
:obj:`numpy.ndarray`
Array of the quality for each grasp.
"""
qualities = []
# Deproject points.
point_cloud_image = state.camera_intr.deproject_to_image(
state.rgbd_im.depth)
# Compute negative SSE from the best fit plane for each grasp.
for i, action in enumerate(actions):
if not isinstance(action, SuctionPoint2D):
not_suction_msg = ("This function can only be used to evaluate"
" suction quality.")
raise ValueError(not_suction_msg)
# x,y in matrix A and z is vector z.
points = self._points_in_window(point_cloud_image,
action,
segmask=state.segmask)
A, b = self._points_to_matrices(points)
# vector w w/ a bias term represents a best-fit plane.
w = self._best_fit_plane(A, b)
if params is not None and params["vis"]["plane"]:
from visualization import Visualizer2D as vis2d
from visualization import Visualizer3D as vis3d
mid_i = A.shape[0] // 2
pred_z = A.dot(w)
p0 = np.array([A[mid_i, 0], A[mid_i, 1], pred_z[mid_i]])
n = np.array([w[0], w[1], -1])
n = n / np.linalg.norm(n)
tx = np.array([n[1], -n[0], 0])
tx = tx / np.linalg.norm(tx)
ty = np.cross(n, tx)
R = np.array([tx, ty, n]).T
T_table_world = RigidTransform(rotation=R,
translation=p0,
from_frame="patch",
to_frame="world")
vis3d.figure()
vis3d.points(point_cloud_image.to_point_cloud(),
scale=0.0025,
subsample=10,
random=True,
color=(0, 0, 1))
vis3d.points(PointCloud(points.T),
scale=0.0025,
color=(1, 0, 0))
vis3d.table(T_table_world, dim=0.01)
vis3d.show()
vis2d.figure()
vis2d.imshow(state.rgbd_im.depth)
vis2d.scatter(action.center.x, action.center.y, s=50, c="b")
vis2d.show()
# Evaluate how well best-fit plane describles all points in window.
quality = np.exp(-self._sum_of_squared_residuals(w, A, b))
qualities.append(quality)
return np.array(qualities)
class ApproachPlanaritySuctionQualityFunction(SuctionQualityFunction):
"""A approach planarity suction metric."""
def __init__(self, config):
"""Create approach planarity suction metric."""
SuctionQualityFunction.__init__(self, config)
def _action_to_plane(self, point_cloud_image, action):
"""Convert a plane from point-normal form to general form."""
x = int(action.center.x)
y = int(action.center.y)
p_0 = point_cloud_image[y, x]
n = -action.axis
# TODO: Confirm divs.
w = np.array([-n[0] / n[2], -n[1] / n[2], np.dot(n, p_0) / n[2]])
return w
def quality(self, state, actions, params=None):
"""Given a suction point, compute a score based on a best-fit 3D plane
of the neighboring points.
Parameters
----------
state : :obj:`RgbdImageState`
An RgbdImageState instance that encapsulates rgbd_im, camera_intr,
segmask, full_observed.
action: :obj:`SuctionPoint2D`
A suction grasp in image space that encapsulates center, approach
direction, depth, camera_intr.
params: dict
Stores params used in computing suction quality.
Returns
-------
:obj:`numpy.ndarray`
Array of the quality for each grasp.
"""
qualities = []
# Deproject points.
point_cloud_image = state.camera_intr.deproject_to_image(
state.rgbd_im.depth)
# Compute negative SSE from the best fit plane for each grasp.
for i, action in enumerate(actions):
if not isinstance(action, SuctionPoint2D):
not_suction_msg = ("This function can only be used to evaluate"
" suction quality.")
raise ValueError(not_suction_msg)
# x,y in matrix A and z is vector z.
points = self._points_in_window(point_cloud_image,
action,
segmask=state.segmask)
A, b = self._points_to_matrices(points)
# vector w w/ a bias term represents a best-fit plane.
w = self._action_to_plane(point_cloud_image, action)
if params is not None and params["vis"]["plane"]:
from visualization import Visualizer2D as vis2d
from visualization import Visualizer3D as vis3d
mid_i = A.shape[0] // 2
pred_z = A.dot(w)
p0 = np.array([A[mid_i, 0], A[mid_i, 1], pred_z[mid_i]])
n = np.array([w[0], w[1], -1])
n = n / np.linalg.norm(n)
tx = np.array([n[1], -n[0], 0])
tx = tx / np.linalg.norm(tx)
ty = np.cross(n, tx)
R = np.array([tx, ty, n]).T
c = state.camera_intr.deproject_pixel(action.depth,
action.center)
d = Point(c.data - 0.01 * action.axis, frame=c.frame)
T_table_world = RigidTransform(rotation=R,
translation=p0,
from_frame="patch",
to_frame="world")
vis3d.figure()
vis3d.points(point_cloud_image.to_point_cloud(),
scale=0.0025,
subsample=10,
random=True,
color=(0, 0, 1))
vis3d.points(PointCloud(points.T),
scale=0.0025,
color=(1, 0, 0))
vis3d.points(c, scale=0.005, color=(1, 1, 0))
vis3d.points(d, scale=0.005, color=(1, 1, 0))
vis3d.table(T_table_world, dim=0.01)
vis3d.show()
vis2d.figure()
vis2d.imshow(state.rgbd_im.depth)
vis2d.scatter(action.center.x, action.center.y, s=50, c="b")
vis2d.show()
# Evaluate how well best-fit plane describles all points in window.
quality = np.exp(-self._sum_of_squared_residuals(w, A, b))
qualities.append(quality)
return np.array(qualities)
class DiscApproachPlanaritySuctionQualityFunction(SuctionQualityFunction):
"""A approach planarity suction metric using a disc-shaped window."""
def __init__(self, config):
"""Create approach planarity suction metric."""
self._radius = config["radius"]
SuctionQualityFunction.__init__(self, config)
def _action_to_plane(self, point_cloud_image, action):
"""Convert a plane from point-normal form to general form."""
x = int(action.center.x)
y = int(action.center.y)
p_0 = point_cloud_image[y, x]
n = -action.axis
# TODO: Confirm divs.
w = np.array([-n[0] / n[2], -n[1] / n[2], np.dot(n, p_0) / n[2]])
return w
def _points_in_window(self, point_cloud_image, action, segmask=None):
"""Retrieve all points on the object in a disc of size
`self._window_size`."""
# Compute plane.
n = -action.axis
U, _, _ = np.linalg.svd(n.reshape((3, 1)))
tangents = U[:, 1:]
# Read indices.
im_shape = point_cloud_image.shape
i_start = int(max(action.center.y - self._window_size // 2, 0))
j_start = int(max(action.center.x - self._window_size // 2, 0))
i_end = int(min(i_start + self._window_size, im_shape[0]))
j_end = int(min(j_start + self._window_size, im_shape[1]))
step = int(1 / self._sample_rate)
# Read 3D points in the window.
points = point_cloud_image[i_start:i_end:step, j_start:j_end:step]
stacked_points = points.reshape(points.shape[0] * points.shape[1], -1)
# Compute the center point.
contact_point = point_cloud_image[int(action.center.y),
int(action.center.x)]
# Project onto approach plane.
residuals = stacked_points - contact_point
coords = residuals.dot(tangents)
proj_residuals = coords.dot(tangents.T)
# Check distance from the center point along the approach plane.
dists = np.linalg.norm(proj_residuals, axis=1)
stacked_points = stacked_points[dists <= self._radius]
# Form the matrices for plane-fitting.
return stacked_points
def quality(self, state, actions, params=None):
"""Given a suction point, compute a score based on a best-fit 3D plane
of the neighboring points.
Parameters
----------
state : :obj:`RgbdImageState`
An RgbdImageState instance that encapsulates rgbd_im, camera_intr,
segmask, full_observed.
action: :obj:`SuctionPoint2D`
A suction grasp in image space that encapsulates center,
approach direction, depth, camera_intr.
params: dict
Stores params used in computing suction quality.
Returns
-------
:obj:`numpy.ndarray`
Array of the quality for each grasp.
"""
qualities = []
# Deproject points.
point_cloud_image = state.camera_intr.deproject_to_image(
state.rgbd_im.depth)
# Compute negative SSE from the best fit plane for each grasp.
for i, action in enumerate(actions):
if not isinstance(action, SuctionPoint2D):
not_suction_msg = ("This function can only be used to evaluate"
" suction quality.")
raise ValueError(not_suction_msg)
# x,y in matrix A and z is vector z.
points = self._points_in_window(point_cloud_image,
action,
segmask=state.segmask)
A, b = self._points_to_matrices(points)
# vector w w/ a bias term represents a best-fit plane.
w = self._action_to_plane(point_cloud_image, action)
sse = self._sum_of_squared_residuals(w, A, b)
if params is not None and params["vis"]["plane"]:
from visualization import Visualizer2D as vis2d
from visualization import Visualizer3D as vis3d
mid_i = A.shape[0] // 2
pred_z = A.dot(w)
p0 = np.array([A[mid_i, 0], A[mid_i, 1], pred_z[mid_i]])
n = np.array([w[0], w[1], -1])
n = n / np.linalg.norm(n)
tx = np.array([n[1], -n[0], 0])
tx = tx / np.linalg.norm(tx)
ty = np.cross(n, tx)
R = np.array([tx, ty, n]).T
c = state.camera_intr.deproject_pixel(action.depth,
action.center)
d = Point(c.data - 0.01 * action.axis, frame=c.frame)
T_table_world = RigidTransform(rotation=R,
translation=p0,
from_frame="patch",
to_frame="world")
vis3d.figure()
vis3d.points(point_cloud_image.to_point_cloud(),
scale=0.0025,
subsample=10,
random=True,
color=(0, 0, 1))
vis3d.points(PointCloud(points.T),
scale=0.0025,
color=(1, 0, 0))
vis3d.points(c, scale=0.005, color=(1, 1, 0))
vis3d.points(d, scale=0.005, color=(1, 1, 0))
vis3d.table(T_table_world, dim=0.01)
vis3d.show()
vis2d.figure()
vis2d.imshow(state.rgbd_im.depth)
vis2d.scatter(action.center.x, action.center.y, s=50, c="b")
vis2d.show()
# Evaluate how well best-fit plane describles all points in window.
quality = np.exp(-sse)
qualities.append(quality)
return np.array(qualities)
class ComApproachPlanaritySuctionQualityFunction(
ApproachPlanaritySuctionQualityFunction):
"""A approach planarity suction metric that ranks sufficiently planar
points by their distance to the object COM."""
def __init__(self, config):
"""Create approach planarity suction metric."""
self._planarity_thresh = config["planarity_thresh"]
ApproachPlanaritySuctionQualityFunction.__init__(self, config)
def quality(self, state, actions, params=None):
"""Given a suction point, compute a score based on a best-fit 3D plane
of the neighboring points.
Parameters
----------
state : :obj:`RgbdImageState`
An RgbdImageState instance that encapsulates rgbd_im, camera_intr,
segmask, full_observed.
action: :obj:`SuctionPoint2D`
A suction grasp in image space that encapsulates center, approach
direction, depth, camera_intr.
params: dict
Stores params used in computing suction quality.
Returns
-------
:obj:`numpy.ndarray`
Array of the quality for each grasp.
"""
# Compute planarity.
sse = ApproachPlanaritySuctionQualityFunction.quality(self,
state,
actions,
params=params)
if params["vis"]["hist"]:
plt.figure()
utils.histogram(sse,
100, (np.min(sse), np.max(sse)),
normalized=False,
plot=True)
plt.show()
# Compute object centroid.
object_com = state.rgbd_im.center
if state.segmask is not None:
nonzero_px = state.segmask.nonzero_pixels()
object_com = np.mean(nonzero_px, axis=0)
# Threshold.
qualities = []
for k, action in enumerate(actions):
q = max(state.rgbd_im.height, state.rgbd_im.width)
if np.abs(sse[k]) < self._planarity_thresh:
grasp_center = np.array([action.center.y, action.center.x])
q = np.linalg.norm(grasp_center - object_com)
qualities.append(np.exp(-q))
return np.array(qualities)
class ComDiscApproachPlanaritySuctionQualityFunction(
DiscApproachPlanaritySuctionQualityFunction):
"""A approach planarity suction metric that ranks sufficiently planar
points by their distance to the object COM."""
# NOTE: THERE WAS A SLIGHTLY DIFFERENT DUPLICATE ABOVE.
def __init__(self, config):
"""Create approach planarity suction metric."""
self._planarity_pctile = config["planarity_pctile"]
self._planarity_abs_thresh = 0
if "planarity_abs_thresh" in config:
self._planarity_abs_thresh = np.exp(
-config["planarity_abs_thresh"])
DiscApproachPlanaritySuctionQualityFunction.__init__(self, config)
def quality(self, state, actions, params=None):
"""Given a suction point, compute a score based on a best-fit 3D plane
of the neighboring points.
Parameters
----------
state : :obj:`RgbdImageState`
An RgbdImageState instance that encapsulates rgbd_im, camera_intr,
segmask, full_observed.
action: :obj:`SuctionPoint2D`
A suction grasp in image space that encapsulates center, approach
direction, depth, camera_intr.
params: dict
Stores params used in computing suction quality.
Returns
-------
:obj:`numpy.ndarray`
Array of the quality for each grasp.
"""
# Compute planarity.
sse_q = DiscApproachPlanaritySuctionQualityFunction.quality(
self, state, actions, params=params)
if params["vis"]["hist"]:
plt.figure()
utils.histogram(sse_q,
100, (np.min(sse_q), np.max(sse_q)),
normalized=False,
plot=True)
plt.show()
# Compute object centroid.
object_com = state.rgbd_im.center
if state.segmask is not None:
nonzero_px = state.segmask.nonzero_pixels()
object_com = np.mean(nonzero_px, axis=0)
# Threshold.
planarity_thresh = abs(
np.percentile(sse_q, 100 - self._planarity_pctile))
qualities = []
max_q = max(state.rgbd_im.height, state.rgbd_im.width)
for k, action in enumerate(actions):
q = max_q
if sse_q[k] > planarity_thresh or sse_q[
k] > self._planarity_abs_thresh:
grasp_center = np.array([action.center.y, action.center.x])
if state.obj_segmask is not None:
grasp_obj_id = state.obj_segmask[grasp_center[0],
grasp_center[1]]
obj_mask = state.obj_segmask.segment_mask(grasp_obj_id)
nonzero_px = obj_mask.nonzero_pixels()
object_com = np.mean(nonzero_px, axis=0)
q = np.linalg.norm(grasp_center - object_com)
q = (np.exp(-q / max_q) - np.exp(-1)) / (1 - np.exp(-1))
qualities.append(q)
return np.array(qualities)
class GaussianCurvatureSuctionQualityFunction(SuctionQualityFunction):
"""A approach planarity suction metric."""
def __init__(self, config):
"""Create approach planarity suction metric."""
SuctionQualityFunction.__init__(self, config)
def _points_to_matrices(self, points):
"""Convert a set of 3D points to an A and b matrix for regression."""
x = points[:, 0]
y = points[:, 1]
A = np.c_[x, y, x * x, x * y, y * y]
ones = np.ones([A.shape[0], 1])
A = np.c_[A, ones]
b = points[:, 2]
return A, b
def quality(self, state, actions, params=None):
"""Given a suction point, compute a score based on a best-fit 3D plane
of the neighboring points.
Parameters
----------
state : :obj:`RgbdImageState`
An RgbdImageState instance that encapsulates rgbd_im, camera_intr,
segmask, full_observed.
action: :obj:`SuctionPoint2D`
A suction grasp in image space that encapsulates center, approach
direction, depth, camera_intr.
params: dict
Stores params used in computing suction quality.
Returns
-------
:obj:`numpy.ndarray`
Array of the quality for each grasp.
"""
qualities = []
# Deproject points.
point_cloud_image = state.camera_intr.deproject_to_image(
state.rgbd_im.depth)
# Compute negative SSE from the best fit plane for each grasp.
for i, action in enumerate(actions):
if not isinstance(action, SuctionPoint2D):
not_suction_msg = ("This function can only be used to evaluate"
" suction quality.")
raise ValueError(not_suction_msg)
# x,y in matrix A and z is vector z.
points = self._points_in_window(point_cloud_image,
action,
segmask=state.segmask)
A, b = self._points_to_matrices(points)
# vector w w/ a bias term represents a best-fit plane.
w = self._best_fit_plane(A, b)
# Compute curvature.
fx = w[0]
fy = w[1]
fxx = 2 * w[2]
fxy = w[3]
fyy = 2 * w[4]
curvature = (fxx * fyy - fxy**2) / ((1 + fx**2 + fy**2)**2)
# Store quality.
quality = np.exp(-np.abs(curvature))
qualities.append(quality)
return np.array(qualities)
class DiscCurvatureSuctionQualityFunction(
GaussianCurvatureSuctionQualityFunction):
def __init__(self, config):
"""Create approach planarity suction metric."""
self._radius = config["radius"]
SuctionQualityFunction.__init__(self, config)
def _points_in_window(self, point_cloud_image, action, segmask=None):
"""Retrieve all points on the object in a disc of size
`self._window_size`."""
# Read indices.
im_shape = point_cloud_image.shape
i_start = int(max(action.center.y - self._window_size // 2, 0))
j_start = int(max(action.center.x - self._window_size // 2, 0))
i_end = int(min(i_start + self._window_size, im_shape[0]))
j_end = int(min(j_start + self._window_size, im_shape[1]))
step = int(1 / self._sample_rate)
# Read 3D points in the window.
points = point_cloud_image[i_start:i_end:step, j_start:j_end:step]
stacked_points = points.reshape(points.shape[0] * points.shape[1], -1)
# Check the distance from the center point.
contact_point = point_cloud_image[int(action.center.y),
int(action.center.x)]
dists = np.linalg.norm(stacked_points - contact_point, axis=1)
stacked_points = stacked_points[dists <= self._radius]
# Form the matrices for plane-fitting.
return stacked_points
class ComDiscCurvatureSuctionQualityFunction(
DiscCurvatureSuctionQualityFunction):
def __init__(self, config):
"""Create approach planarity suction metric."""
self._curvature_pctile = config["curvature_pctile"]
DiscCurvatureSuctionQualityFunction.__init__(self, config)
def quality(self, state, actions, params=None):
"""Given a suction point, compute a score based on the Gaussian
curvature.
Parameters
----------
state : :obj:`RgbdImageState`
An RgbdImageState instance that encapsulates rgbd_im, camera_intr,
segmask, full_observed.
action: :obj:`SuctionPoint2D`
A suction grasp in image space that encapsulates center, approach
direction, depth, camera_intr.
params: dict
Stores params used in computing suction quality.
Returns
-------
:obj:`numpy.ndarray`
Array of the quality for each grasp.
"""
# Compute planarity.
curvature_q = DiscCurvatureSuctionQualityFunction.quality(
self, state, actions, params=params)
if params["vis"]["hist"]:
plt.figure()
# NOTE: This used to be an undefined `curvature`.
utils.histogram(curvature_q,
100, (np.min(curvature_q), np.max(curvature_q)),
normalized=False,
plot=True)
plt.show()
# Compute object centroid.
object_com = state.rgbd_im.center
if state.segmask is not None:
nonzero_px = state.segmask.nonzero_pixels()
object_com = np.mean(nonzero_px, axis=0)
# Threshold.
curvature_q_thresh = abs(
np.percentile(curvature_q, 100 - self._curvature_pctile))
qualities = []
max_q = max(state.rgbd_im.height, state.rgbd_im.width)
for k, action in enumerate(actions):
q = max_q
if curvature_q[k] > curvature_q_thresh:
grasp_center = np.array([action.center.y, action.center.x])
q = np.linalg.norm(grasp_center - object_com)
q = (np.exp(-q / max_q) - np.exp(-1)) / (1 - np.exp(-1))
qualities.append(q)
return np.array(qualities)
class GQCnnQualityFunction(GraspQualityFunction):
def __init__(self, config):
"""Create a GQCNN suction quality function."""
GraspQualityFunction.__init__(self)
# Store parameters.
self._config = config
self._gqcnn_model_dir = config["gqcnn_model"]
self._crop_height = config["crop_height"]
self._crop_width = config["crop_width"]
# Init GQ-CNN
self._gqcnn = get_gqcnn_model().load(self._gqcnn_model_dir)
# Open Tensorflow session for gqcnn.
self._gqcnn.open_session()
def __del__(self):
try:
self._gqcnn.close_session()
except Exception:
# TODO(vsatish): Except specific exception.
pass
@property
def gqcnn(self):
"""Returns the GQ-CNN."""
return self._gqcnn
@property
def gqcnn_recep_height(self):
return self._gqcnn.im_height
@property
def gqcnn_recep_width(self):
return self._gqcnn.im_width
@property
def gqcnn_stride(self):
return self._gqcnn.stride
@property
def config(self):
"""Returns the GQCNN quality function parameters."""
return self._config
def grasps_to_tensors(self, grasps, state):
"""Converts a list of grasps to an image and pose tensor
for fast grasp quality evaluation.
Attributes
----------
grasps : :obj:`list` of :obj:`object`
List of image grasps to convert.
state : :obj:`RgbdImageState`
RGB-D image to plan grasps on.
Returns
-------
image_arr : :obj:`numpy.ndarray`
4D numpy tensor of image to be predicted.
pose_arr : :obj:`numpy.ndarray`
2D numpy tensor of depth values.
"""
# Parse params.
gqcnn_im_height = self.gqcnn.im_height
gqcnn_im_width = self.gqcnn.im_width
gqcnn_num_channels = self.gqcnn.num_channels
gqcnn_pose_dim = self.gqcnn.pose_dim
gripper_mode = self.gqcnn.gripper_mode
num_grasps = len(grasps)
depth_im = state.rgbd_im.depth
# Allocate tensors.
tensor_start = time()
image_tensor = np.zeros(
[num_grasps, gqcnn_im_height, gqcnn_im_width, gqcnn_num_channels])
pose_tensor = np.zeros([num_grasps, gqcnn_pose_dim])
scale = gqcnn_im_height / self._crop_height
depth_im_scaled = depth_im.resize(scale)
for i, grasp in enumerate(grasps):
translation = scale * np.array([
depth_im.center[0] - grasp.center.data[1],
depth_im.center[1] - grasp.center.data[0]
])
im_tf = depth_im_scaled
im_tf = depth_im_scaled.transform(translation, grasp.angle)
im_tf = im_tf.crop(gqcnn_im_height, gqcnn_im_width)
image_tensor[i, ...] = im_tf.raw_data
if gripper_mode == GripperMode.PARALLEL_JAW:
pose_tensor[i] = grasp.depth
elif gripper_mode == GripperMode.SUCTION:
pose_tensor[i, ...] = np.array(
[grasp.depth, grasp.approach_angle])
elif gripper_mode == GripperMode.MULTI_SUCTION:
pose_tensor[i] = grasp.depth
elif gripper_mode == GripperMode.LEGACY_PARALLEL_JAW:
pose_tensor[i] = grasp.depth
elif gripper_mode == GripperMode.LEGACY_SUCTION:
pose_tensor[i, ...] = np.array(
[grasp.depth, grasp.approach_angle])
else:
raise ValueError("Gripper mode %s not supported" %
(gripper_mode))
self._logger.debug("Tensor conversion took %.3f sec" %
(time() - tensor_start))
return image_tensor, pose_tensor
def quality(self, state, actions, params):
"""Evaluate the quality of a set of actions according to a GQ-CNN.
Parameters
----------
state : :obj:`RgbdImageState`
State of the world described by an RGB-D image.
actions: :obj:`object`
Set of grasping actions to evaluate.
params: dict
Optional parameters for quality evaluation.
Returns
-------
:obj:`list` of float
Real-valued grasp quality predictions for each
action, between 0 and 1.
"""
# Form tensors.
tensor_start = time()
image_tensor, pose_tensor = self.grasps_to_tensors(actions, state)
self._logger.info("Image transformation took %.3f sec" %
(time() - tensor_start))
if params is not None and params["vis"]["tf_images"]:
# Read vis params.
k = params["vis"]["k"]
d = utils.sqrt_ceil(k)
# Display grasp transformed images.
from visualization import Visualizer2D as vis2d
vis2d.figure(size=(GeneralConstants.FIGSIZE,
GeneralConstants.FIGSIZE))
for i, image_tf in enumerate(image_tensor[:k, ...]):
depth = pose_tensor[i][0]
vis2d.subplot(d, d, i + 1)
vis2d.imshow(DepthImage(image_tf))
vis2d.title("Image %d: d=%.3f" % (i, depth))
vis2d.show()
# Predict grasps.
predict_start = time()
output_arr = self.gqcnn.predict(image_tensor, pose_tensor)
q_values = output_arr[:, -1]
self._logger.info("Inference took %.3f sec" % (time() - predict_start))
return q_values.tolist()
class NoMagicQualityFunction(GraspQualityFunction):
def __init__(self, config):
"""Create a quality that uses `nomagic_net` as a quality function."""
from nomagic_submission import ConvNetModel
from tensorpack import SaverRestore
from tensorpack.predict import OfflinePredictor
from tensorpack.predict.config import PredictConfig
GraspQualityFunction.__init(self)
# Store parameters.
self._model_path = config["gqcnn_model"]
self._batch_size = config["batch_size"]
self._crop_height = config["crop_height"]
self._crop_width = config["crop_width"]
self._im_height = config["im_height"]
self._im_width = config["im_width"]
self._num_channels = config["num_channels"]
self._pose_dim = config["pose_dim"]
self._gripper_mode = config["gripper_mode"]
self._data_mean = config["data_mean"]
self._data_std = config["data_std"]
# Init config.
model = ConvNetModel()
self._config = PredictConfig(model=model,
session_init=SaverRestore(
self._model_path),
output_names=["prob"])
self._predictor = OfflinePredictor(self._config)
@property
def gqcnn(self):
"""Returns the GQ-CNN."""
return self._predictor
@property
def config(self):
"""Returns the GQCNN suction quality function parameters."""
return self._config
def grasps_to_tensors(self, grasps, state):
"""Converts a list of grasps to an image and pose tensor
for fast grasp quality evaluation.
Attributes
----------
grasps : :obj:`list` of :obj:`object`
List of image grasps to convert.
state : :obj:`RgbdImageState`
RGB-D image to plan grasps on.
Returns
-------
:obj:`numpy.ndarray`
4D numpy tensor of image to be predicted.
:obj:`numpy.ndarray`
2D numpy tensor of depth values.
"""
# Parse params.
gqcnn_im_height = self._im_height
gqcnn_im_width = self._im_width
gqcnn_num_channels = self._num_channels
gqcnn_pose_dim = self._pose_dim
gripper_mode = self._gripper_mode
num_grasps = len(grasps)
depth_im = state.rgbd_im.depth
# Allocate tensors.
tensor_start = time()
image_tensor = np.zeros(
[num_grasps, gqcnn_im_height, gqcnn_im_width, gqcnn_num_channels])
pose_tensor = np.zeros([num_grasps, gqcnn_pose_dim])
scale = gqcnn_im_height / self._crop_height
depth_im_scaled = depth_im.resize(scale)
for i, grasp in enumerate(grasps):
translation = scale * np.array([
depth_im.center[0] - grasp.center.data[1],
depth_im.center[1] - grasp.center.data[0]
])
im_tf = depth_im_scaled
im_tf = depth_im_scaled.transform(translation, grasp.angle)
im_tf = im_tf.crop(gqcnn_im_height, gqcnn_im_width)
im_encoded = cv2.imencode(".png", np.uint8(im_tf.raw_data *
255))[1].tostring()
im_decoded = cv2.imdecode(np.frombuffer(im_encoded, np.uint8),
0) / 255.0
image_tensor[i, :, :,
0] = ((im_decoded - self._data_mean) / self._data_std)
if gripper_mode == GripperMode.PARALLEL_JAW:
pose_tensor[i] = grasp.depth
elif gripper_mode == GripperMode.SUCTION:
pose_tensor[i, ...] = np.array(
[grasp.depth, grasp.approach_angle])
elif gripper_mode == GripperMode.LEGACY_PARALLEL_JAW:
pose_tensor[i] = grasp.depth
elif gripper_mode == GripperMode.LEGACY_SUCTION:
pose_tensor[i, ...] = np.array(
[grasp.depth, grasp.approach_angle])
else:
raise ValueError("Gripper mode %s not supported" %
(gripper_mode))
self._logger.debug("Tensor conversion took %.3f sec" %
(time() - tensor_start))
return image_tensor, pose_tensor
def quality(self, state, actions, params):
"""Evaluate the quality of a set of actions according to a GQ-CNN.
Parameters
----------
state : :obj:`RgbdImageState`
State of the world described by an RGB-D image.
actions: :obj:`object`
Set of grasping actions to evaluate.
params: dict
Optional parameters for quality evaluation.
Returns
-------
:obj:`list` of float
Real-valued grasp quality predictions for each action, between 0
and 1.
"""
# Form tensors.
image_tensor, pose_tensor = self.grasps_to_tensors(actions, state)
if params is not None and params["vis"]["tf_images"]:
# Read vis params.
k = params["vis"]["k"]
d = utils.sqrt_ceil(k)
# Display grasp transformed images.
from visualization import Visualizer2D as vis2d
vis2d.figure(size=(GeneralConstants.FIGSIZE,
GeneralConstants.FIGSIZE))
for i, image_tf in enumerate(image_tensor[:k, ...]):
depth = pose_tensor[i][0]
vis2d.subplot(d, d, i + 1)
vis2d.imshow(DepthImage(image_tf))
vis2d.title("Image %d: d=%.3f" % (i, depth))
vis2d.show()
# Predict grasps.
num_actions = len(actions)
null_arr = -1 * np.ones(self._batch_size)
predict_start = time()
output_arr = np.zeros([num_actions, 2])
cur_i = 0
end_i = cur_i + min(self._batch_size, num_actions - cur_i)
while cur_i < num_actions:
output_arr[cur_i:end_i, :] = self.gqcnn(
image_tensor[cur_i:end_i, :, :, 0],
pose_tensor[cur_i:end_i, 0], null_arr)[0]
cur_i = end_i
end_i = cur_i + min(self._batch_size, num_actions - cur_i)
q_values = output_arr[:, -1]
self._logger.debug("Prediction took %.3f sec" %
(time() - predict_start))
return q_values.tolist()
class FCGQCnnQualityFunction(GraspQualityFunction):
def __init__(self, config):
"""Grasp quality function using the fully-convolutional gqcnn."""
GraspQualityFunction.__init__(self)
# Store parameters.
self._config = config
self._model_dir = config["gqcnn_model"]
self._backend = config["gqcnn_backend"]
self._fully_conv_config = config["fully_conv_gqcnn_config"]
# Init fcgqcnn.
self._fcgqcnn = get_fc_gqcnn_model(backend=self._backend).load(
self._model_dir, self._fully_conv_config)
# Open Tensorflow session for fcgqcnn.
self._fcgqcnn.open_session()
def __del__(self):
try:
self._fcgqcnn.close_session()
except Exception:
# TODO(vsatish): Except specific exception.
pass
@property
def gqcnn(self):
"""Returns the FC-GQCNN."""
return self._fcgqcnn
@property
def config(self):
"""Returns the FC-GQCNN quality function parameters."""
return self._config
def quality(self, images, depths, params=None):
return self._fcgqcnn.predict(images, depths)
class GraspQualityFunctionFactory(object):
"""Factory for grasp quality functions."""
@staticmethod
def quality_function(metric_type, config):
if metric_type == "zero":
return ZeroGraspQualityFunction()
elif metric_type == "parallel_jaw_com_force_closure":
return ComForceClosureParallelJawQualityFunction(config)
elif metric_type == "suction_best_fit_planarity":
return BestFitPlanaritySuctionQualityFunction(config)
elif metric_type == "suction_approach_planarity":
return ApproachPlanaritySuctionQualityFunction(config)
elif metric_type == "suction_com_approach_planarity":
return ComApproachPlanaritySuctionQualityFunction(config)
elif metric_type == "suction_disc_approach_planarity":
return DiscApproachPlanaritySuctionQualityFunction(config)
elif metric_type == "suction_com_disc_approach_planarity":
return ComDiscApproachPlanaritySuctionQualityFunction(config)
elif metric_type == "suction_gaussian_curvature":
return GaussianCurvatureSuctionQualityFunction(config)
elif metric_type == "suction_disc_curvature":
return DiscCurvatureSuctionQualityFunction(config)
elif metric_type == "suction_com_disc_curvature":
return ComDiscCurvatureSuctionQualityFunction(config)
elif metric_type == "gqcnn":
return GQCnnQualityFunction(config)
elif metric_type == "nomagic":
return NoMagicQualityFunction(config)
elif metric_type == "fcgqcnn":
return FCGQCnnQualityFunction(config)
else:
raise ValueError("Grasp function type %s not supported!" %
(metric_type))
================================================
FILE: gqcnn/grasping/image_grasp_sampler.py
================================================
# -*- coding: utf-8 -*-
"""
Copyright ©2017. The Regents of the University of California (Regents).
All Rights Reserved. Permission to use, copy, modify, and distribute this
software and its documentation for educational, research, and not-for-profit
purposes, without fee and without a signed licensing agreement, is hereby
granted, provided that the above copyright notice, this paragraph and the
following two paragraphs appear in all copies, modifications, and
distributions. Contact The Office of Technology Licensing, UC Berkeley, 2150
Shattuck Avenue, Suite 510, Berkeley, CA 94720-1620, (510) 643-7201,
otl@berkeley.edu,
http://ipira.berkeley.edu/industry-info for commercial licensing opportunities.
IN NO EVENT SHALL REGENTS BE LIABLE TO ANY PARTY FOR DIRECT, INDIRECT, SPECIAL,
INCIDENTAL, OR CONSEQUENTIAL DAMAGES, INCLUDING LOST PROFITS, ARISING OUT OF
THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN IF REGENTS HAS BEEN
ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
REGENTS SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
PURPOSE. THE SOFTWARE AND ACCOMPANYING DOCUMENTATION, IF ANY, PROVIDED
HEREUNDER IS PROVIDED "AS IS". REGENTS HAS NO OBLIGATION TO PROVIDE
MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
Classes for sampling a set of grasps directly from images to generate data for
a neural network.
Author
------
Jeff Mahler & Sherdil Niyaz
"""
from abc import ABC, abstractmethod
import random
from time import time
import matplotlib.pyplot as plt
import numpy as np
import scipy.ndimage.filters as snf
import scipy.spatial.distance as ssd
import scipy.stats as ss
from autolab_core import (Point, RigidTransform, Logger,
DepthImage, RgbdImage, GdImage)
from visualization import Visualizer2D as vis
from .grasp import Grasp2D, SuctionPoint2D, MultiSuctionPoint2D
def force_closure(p1, p2, n1, n2, mu):
"""Computes whether or not the point and normal pairs are in force
closure."""
# Line between the contacts.
v = p2 - p1
v = v / np.linalg.norm(v)
# Compute cone membership.
alpha = np.arctan(mu)
dot_1 = max(min(n1.dot(-v), 1.0), -1.0)
dot_2 = max(min(n2.dot(v), 1.0), -1.0)
in_cone_1 = (np.arccos(dot_1) < alpha)
in_cone_2 = (np.arccos(dot_2) < alpha)
return (in_cone_1 and in_cone_2)
class DepthSamplingMode(object):
"""Modes for sampling grasp depth."""
UNIFORM = "uniform"
MIN = "min"
MAX = "max"
class ImageGraspSampler(ABC):
"""Wraps image to crane grasp candidate generation for easy deployment of
GQ-CNN.
Attributes
----------
config : :obj:`autolab_core.YamlConfig`
A dictionary-like object containing the parameters of the sampler.
"""
def __init__(self, config):
# Set params.
self._config = config
# Setup logger.
self._logger = Logger.get_logger(self.__class__.__name__)
def sample(self,
rgbd_im,
camera_intr,
num_samples,
segmask=None,
seed=None,
visualize=False,
constraint_fn=None):
"""Samples a set of 2D grasps from a given RGB-D image.
Parameters
----------
rgbd_im : :obj:`autolab_core.RgbdImage`
RGB-D image to sample from.
camera_intr : :obj:`autolab_core.CameraIntrinsics`
Intrinsics of the camera that captured the images.
num_samples : int
Number of grasps to sample.
segmask : :obj:`autolab_core.BinaryImage`
Binary image segmenting out the object of interest.
seed : int
Number to use in random seed (`None` if no seed).
visualize : bool
Whether or not to show intermediate samples (for debugging).
constraint_fn : :obj:`GraspConstraintFn`
Constraint function to apply to grasps.
Returns
-------
:obj:`list` of :obj:`Grasp2D`
The list of grasps in image space.
"""
# Set random seed for determinism.
if seed is not None:
random.seed(seed)
np.random.seed(seed)
# Sample an initial set of grasps (without depth).
self._logger.debug("Sampling 2d candidates")
sampling_start = time()
grasps = self._sample(rgbd_im,
camera_intr,
num_samples,
segmask=segmask,
visualize=visualize,
constraint_fn=constraint_fn)
sampling_stop = time()
self._logger.debug("Sampled %d grasps from image" % (len(grasps)))
self._logger.debug("Sampling grasps took %.3f sec" %
(sampling_stop - sampling_start))
return grasps
@abstractmethod
def _sample(self,
rgbd_im,
camera_intr,
num_samples,
segmask=None,
visualize=False,
constraint_fn=None):
"""Sample a set of 2D grasp candidates from a depth image.
Subclasses must override.
Parameters
----------
rgbd_im : :obj:`autolab_core.RgbdImage`
RGB-D image to sample from.
camera_intr : :obj:`autolab_core.CameraIntrinsics`
Intrinsics of the camera that captured the images.
num_samples : int
Number of grasps to sample.
segmask : :obj:`autolab_core.BinaryImage`
Binary image segmenting out the object of interest.
visualize : bool
Whether or not to show intermediate samples (for debugging).
constraint_fn : :obj:`GraspConstraintFn`
Constraint function to apply to grasps.
Returns
-------
:obj:`list` of :obj:`Grasp2D`
List of 2D grasp candidates.
"""
pass
class AntipodalDepthImageGraspSampler(ImageGraspSampler):
"""Grasp sampler for antipodal point pairs from depth image gradients.
Notes
-----
Required configuration parameters are specified in Other Parameters.
Other Parameters
----------------
gripper_width : float
Width of the gripper, in meters.
friction_coef : float
Friction coefficient for 2D force closure.
depth_grad_thresh : float
Threshold for depth image gradients to determine edge points for
sampling.
depth_grad_gaussian_sigma : float
Sigma used for pre-smoothing the depth image for better gradients.
downsample_rate : float
Factor to downsample the depth image by before sampling grasps.
max_rejection_samples : int
Ceiling on the number of grasps to check in antipodal grasp rejection
sampling.
max_dist_from_center : int
Maximum allowable distance of a grasp from the image center.
min_grasp_dist : float
Threshold on the grasp distance.
angle_dist_weight : float
Amount to weight the angle difference in grasp distance computation.
depth_samples_per_grasp : int
Number of depth samples to take per grasp.
min_depth_offset : float
Offset from the minimum depth at the grasp center pixel to use in depth
sampling.
max_depth_offset : float
Offset from the maximum depth across all edges.
depth_sample_win_height : float
Height of a window around the grasp center pixel used to determine min
depth.
depth_sample_win_height : float
Width of a window around the grasp center pixel used to determine min
depth.
depth_sampling_mode : str
Name of depth sampling mode (uniform, min, max).
"""
def __init__(self, config, gripper_width=np.inf):
# Init superclass.
ImageGraspSampler.__init__(self, config)
# Antipodality params.
self._gripper_width = self._config["gripper_width"]
self._friction_coef = self._config["friction_coef"]
self._depth_grad_thresh = self._config["depth_grad_thresh"]
self._depth_grad_gaussian_sigma = self._config[
"depth_grad_gaussian_sigma"]
self._downsample_rate = self._config["downsample_rate"]
self._rescale_factor = 1.0 / self._downsample_rate
self._max_rejection_samples = self._config["max_rejection_samples"]
self._min_num_edge_pixels = 0
if "min_num_edge_pixels" in self._config:
self._min_num_edge_pixels = self._config["min_num_edge_pixels"]
# Distance thresholds for rejection sampling.
self._max_dist_from_center = self._config["max_dist_from_center"]
self._min_dist_from_boundary = self._config["min_dist_from_boundary"]
self._min_grasp_dist = self._config["min_grasp_dist"]
self._angle_dist_weight = self._config["angle_dist_weight"]
# Depth sampling params.
self._depth_samples_per_grasp = max(
self._config["depth_samples_per_grasp"], 1)
self._min_depth_offset = self._config["min_depth_offset"]
self._max_depth_offset = self._config["max_depth_offset"]
self._h = self._config["depth_sample_win_height"]
self._w = self._config["depth_sample_win_width"]
self._depth_sampling_mode = self._config["depth_sampling_mode"]
# Perturbation.
self._grasp_center_sigma = 0.0
if "grasp_center_sigma" in self._config:
self._grasp_center_sigma = self._config["grasp_center_sigma"]
self._grasp_angle_sigma = 0.0
if "grasp_angle_sigma" in self._config:
self._grasp_angle_sigma = np.deg2rad(
self._config["grasp_angle_sigma"])
def _surface_normals(self, depth_im, edge_pixels):
"""Return an array of the surface normals at the edge pixels."""
# Compute the gradients.
grad = np.gradient(depth_im.data.astype(np.float32))
# Compute surface normals.
normals = np.zeros([edge_pixels.shape[0], 2])
for i, pix in enumerate(edge_pixels):
dx = grad[1][pix[0], pix[1]]
dy = grad[0][pix[0], pix[1]]
normal_vec = np.array([dy, dx])
if np.linalg.norm(normal_vec) == 0:
normal_vec = np.array([1, 0])
normal_vec = normal_vec / np.linalg.norm(normal_vec)
normals[i, :] = normal_vec
return normals
def _sample_depth(self, min_depth, max_depth):
"""Samples a depth value between the min and max."""
depth_sample = max_depth
if self._depth_sampling_mode == DepthSamplingMode.UNIFORM:
depth_sample = min_depth + (max_depth -
min_depth) * np.random.rand()
elif self._depth_sampling_mode == DepthSamplingMode.MIN:
depth_sample = min_depth
return depth_sample
def _sample(self,
image,
camera_intr,
num_samples,
segmask=None,
visualize=False,
constraint_fn=None):
"""Sample a set of 2D grasp candidates from a depth image.
Parameters
----------
image : :obj:`autolab_core.RgbdImage` or :obj:`autolab_core.DepthImage` or :obj:`autolab_core.GdImage` # noqa: E501
RGB-D or Depth image to sample from.
camera_intr : :obj:`autolab_core.CameraIntrinsics`
Intrinsics of the camera that captured the images.
num_samples : int
Number of grasps to sample
segmask : :obj:`autolab_core.BinaryImage`
Binary image segmenting out the object of interest.
visualize : bool
Whether or not to show intermediate samples (for debugging).
constraint_fn : :obj:`GraspConstraintFn`
Constraint function to apply to grasps.
Returns
-------
:obj:`list` of :obj:`Grasp2D`
List of 2D grasp candidates.
"""
if isinstance(image, RgbdImage) or isinstance(image, GdImage):
depth_im = image.depth
elif isinstance(image, DepthImage):
depth_im = image
else:
raise ValueError(
"image type must be one of [RgbdImage, DepthImage, GdImage]")
# Sample antipodal pairs in image space.
grasps = self._sample_antipodal_grasps(depth_im,
camera_intr,
num_samples,
segmask=segmask,
visualize=visualize,
constraint_fn=constraint_fn)
return grasps
def _sample_antipodal_grasps(self,
depth_im,
camera_intr,
num_samples,
segmask=None,
visualize=False,
constraint_fn=None):
"""Sample a set of 2D grasp candidates from a depth image by finding
depth edges, then uniformly sampling point pairs and keeping only
antipodal grasps with width less than the maximum allowable.
Parameters
----------
depth_im : :obj:"autolab_core.DepthImage"
Depth image to sample from.
camera_intr : :obj:`autolab_core.CameraIntrinsics`
Intrinsics of the camera that captured the images.
num_samples : int
Number of grasps to sample.
segmask : :obj:`autolab_core.BinaryImage`
Binary image segmenting out the object of interest.
visualize : bool
Whether or not to show intermediate samples (for debugging).
constraint_fn : :obj:`GraspConstraintFn`
Constraint function to apply to grasps.
Returns
-------
:obj:`list` of :obj:`Grasp2D`
List of 2D grasp candidates.
"""
# Compute edge pixels.
edge_start = time()
depth_im = depth_im.apply(snf.gaussian_filter,
sigma=self._depth_grad_gaussian_sigma)
scale_factor = self._rescale_factor
depth_im_downsampled = depth_im.resize(scale_factor)
depth_im_threshed = depth_im_downsampled.threshold_gradients(
self._depth_grad_thresh)
edge_pixels = (1.0 / scale_factor) * depth_im_threshed.zero_pixels()
edge_pixels = edge_pixels.astype(np.int16)
depth_im_mask = depth_im.copy()
if segmask is not None:
edge_pixels = np.array(
[p for p in edge_pixels if np.any(segmask[p[0], p[1]] > 0)])
depth_im_mask = depth_im.mask_binary(segmask)
# Re-threshold edges if there are too few.
if edge_pixels.shape[0] < self._min_num_edge_pixels:
self._logger.info("Too few edge pixels!")
depth_im_threshed = depth_im.threshold_gradients(
self._depth_grad_thresh)
edge_pixels = depth_im_threshed.zero_pixels()
edge_pixels = edge_pixels.astype(np.int16)
depth_im_mask = depth_im.copy()
if segmask is not None:
edge_pixels = np.array([
p for p in edge_pixels if np.any(segmask[p[0], p[1]] > 0)
])
depth_im_mask = depth_im.mask_binary(segmask)
num_pixels = edge_pixels.shape[0]
self._logger.debug("Depth edge detection took %.3f sec" %
(time() - edge_start))
self._logger.debug("Found %d edge pixels" % (num_pixels))
# Compute point cloud.
point_cloud_im = camera_intr.deproject_to_image(depth_im_mask)
# Compute_max_depth.
depth_data = depth_im_mask.data[depth_im_mask.data > 0]
if depth_data.shape[0] == 0:
return []
min_depth = np.min(depth_data) + self._min_depth_offset
max_depth = np.max(depth_data) + self._max_depth_offset
# Compute surface normals.
normal_start = time()
edge_normals = self._surface_normals(depth_im, edge_pixels)
self._logger.debug("Normal computation took %.3f sec" %
(time() - normal_start))
if visualize:
edge_pixels = edge_pixels[::2, :]
edge_normals = edge_normals[::2, :]
vis.figure()
vis.subplot(1, 3, 1)
vis.imshow(depth_im)
if num_pixels > 0:
vis.scatter(edge_pixels[:, 1], edge_pixels[:, 0], s=2, c="b")
X = [pix[1] for pix in edge_pixels]
Y = [pix[0] for pix in edge_pixels]
U = [3 * pix[1] for pix in edge_normals]
V = [-3 * pix[0] for pix in edge_normals]
plt.quiver(X,
Y,
U,
V,
units="x",
scale=0.25,
width=0.5,
zorder=2,
color="r")
vis.title("Edge pixels and normals")
vis.subplot(1, 3, 2)
vis.imshow(depth_im_threshed)
vis.title("Edge map")
vis.subplot(1, 3, 3)
vis.imshow(segmask)
vis.title("Segmask")
vis.show()
# Exit if no edge pixels.
if num_pixels == 0:
return []
# Form set of valid candidate point pairs.
pruning_start = time()
max_grasp_width_px = Grasp2D(Point(np.zeros(2)),
0.0,
min_depth,
width=self._gripper_width,
camera_intr=camera_intr).width_px
normal_ip = edge_normals.dot(edge_normals.T)
dists = ssd.squareform(ssd.pdist(edge_pixels))
valid_indices = np.where(
(normal_ip < -np.cos(np.arctan(self._friction_coef)))
& (dists < max_grasp_width_px) & (dists > 0.0))
valid_indices = np.c_[valid_indices[0], valid_indices[1]]
self._logger.debug("Normal pruning %.3f sec" %
(time() - pruning_start))
# Raise exception if no antipodal pairs.
num_pairs = valid_indices.shape[0]
if num_pairs == 0:
return []
# Prune out grasps.
contact_points1 = edge_pixels[valid_indices[:, 0], :]
contact_points2 = edge_pixels[valid_indices[:, 1], :]
contact_normals1 = edge_normals[valid_indices[:, 0], :]
contact_normals2 = edge_normals[valid_indices[:, 1], :]
v = contact_points1 - contact_points2
v_norm = np.linalg.norm(v, axis=1)
v = v / np.tile(v_norm[:, np.newaxis], [1, 2])
ip1 = np.sum(contact_normals1 * v, axis=1)
ip2 = np.sum(contact_normals2 * (-v), axis=1)
ip1[ip1 > 1.0] = 1.0
ip1[ip1 < -1.0] = -1.0
ip2[ip2 > 1.0] = 1.0
ip2[ip2 < -1.0] = -1.0
beta1 = np.arccos(ip1)
beta2 = np.arccos(ip2)
alpha = np.arctan(self._friction_coef)
antipodal_indices = np.where((beta1 < alpha) & (beta2 < alpha))[0]
# Raise exception if no antipodal pairs.
num_pairs = antipodal_indices.shape[0]
if num_pairs == 0:
return []
sample_size = min(self._max_rejection_samples, num_pairs)
grasp_indices = np.random.choice(antipodal_indices,
size=sample_size,
replace=False)
self._logger.debug("Grasp comp took %.3f sec" %
(time() - pruning_start))
# Compute grasps.
sample_start = time()
k = 0
grasps = []
while k < sample_size and len(grasps) < num_samples:
grasp_ind = grasp_indices[k]
p1 = contact_points1[grasp_ind, :]
p2 = contact_points2[grasp_ind, :]
n1 = contact_normals1[grasp_ind, :]
n2 = contact_normals2[grasp_ind, :]
# width = np.linalg.norm(p1 - p2)
k += 1
# Compute center and axis.
grasp_center = (p1 + p2) // 2
grasp_axis = p2 - p1
grasp_axis = grasp_axis / np.linalg.norm(grasp_axis)
grasp_theta = np.pi / 2
if grasp_axis[1] != 0:
grasp_theta = np.arctan2(grasp_axis[0], grasp_axis[1])
grasp_center_pt = Point(np.array(
[grasp_center[1], grasp_center[0]]),
frame=camera_intr.frame)
# Compute grasp points in 3D.
x1 = point_cloud_im[p1[0], p1[1]]
x2 = point_cloud_im[p2[0], p2[1]]
if np.linalg.norm(x2 - x1) > self._gripper_width:
continue
# Perturb.
if self._grasp_center_sigma > 0.0:
grasp_center_pt = grasp_center_pt + ss.multivariate_normal.rvs(
cov=self._grasp_center_sigma * np.diag(np.ones(2)))
if self._grasp_angle_sigma > 0.0:
grasp_theta = grasp_theta + ss.norm.rvs(
scale=self._grasp_angle_sigma)
# Check center px dist from boundary.
if (grasp_center[0] < self._min_dist_from_boundary
or grasp_center[1] < self._min_dist_from_boundary
or grasp_center[0] >
depth_im.height - self._min_dist_from_boundary
or grasp_center[1] >
depth_im.width - self._min_dist_from_boundary):
continue
# Sample depths.
for i in range(self._depth_samples_per_grasp):
# Get depth in the neighborhood of the center pixel.
depth_win = depth_im.data[grasp_center[0] -
self._h:grasp_center[0] + self._h,
grasp_center[1] -
self._w:grasp_center[1] + self._w]
center_depth = np.min(depth_win)
if center_depth == 0 or np.isnan(center_depth):
continue
# Sample depth between the min and max.
min_depth = center_depth + self._min_depth_offset
max_depth = center_depth + self._max_depth_offset
sample_depth = min_depth + (max_depth -
min_depth) * np.random.rand()
candidate_grasp = Grasp2D(grasp_center_pt,
grasp_theta,
sample_depth,
width=self._gripper_width,
camera_intr=camera_intr,
contact_points=[p1, p2],
contact_normals=[n1, n2])
if visualize:
vis.figure()
vis.imshow(depth_im)
vis.grasp(candidate_grasp)
vis.scatter(p1[1], p1[0], c="b", s=25)
vis.scatter(p2[1], p2[0], c="b", s=25)
vis.show()
grasps.append(candidate_grasp)
# Return sampled grasps.
self._logger.debug("Loop took %.3f sec" % (time() - sample_start))
return grasps
class DepthImageSuctionPointSampler(ImageGraspSampler):
"""Grasp sampler for suction points from depth images.
Notes
-----
Required configuration parameters are specified in Other Parameters.
Other Parameters
----------------
max_suction_dir_optical_axis_angle : float
Maximum angle, in degrees, between the suction approach axis and the
camera optical axis.
delta_theta : float
Maximum deviation from zero for the aziumth angle of a rotational
perturbation to the surface normal (for sample diversity).
delta_phi : float
Maximum deviation from zero for the elevation angle of a rotational
perturbation to the surface normal (for sample diversity).
sigma_depth : float
Standard deviation for a normal distribution over depth values (for
sample diversity).
min_suction_dist : float
Minimum admissible distance between suction points (for sample
diversity).
angle_dist_weight : float
Amount to weight the angle difference in suction point distance
computation.
depth_gaussian_sigma : float
Sigma used for pre-smoothing the depth image for better gradients.
"""
def __init__(self, config):
# Init superclass.
ImageGraspSampler.__init__(self, config)
# Read params.
self._max_suction_dir_optical_axis_angle = np.deg2rad(
self._config["max_suction_dir_optical_axis_angle"])
self._max_dist_from_center = self._config["max_dist_from_center"]
self._min_dist_from_boundary = self._config["min_dist_from_boundary"]
self._max_num_samples = self._config["max_num_samples"]
self._min_theta = -np.deg2rad(self._config["delta_theta"])
self._max_theta = np.deg2rad(self._config["delta_theta"])
self._theta_rv = ss.uniform(loc=self._min_theta,
scale=self._max_theta - self._min_theta)
self._min_phi = -np.deg2rad(self._config["delta_phi"])
self._max_phi = np.deg2rad(self._config["delta_phi"])
self._phi_rv = ss.uniform(loc=self._min_phi,
scale=self._max_phi - self._min_phi)
self._mean_depth = 0.0
if "mean_depth" in self._config:
self._mean_depth = self._config["mean_depth"]
self._sigma_depth = self._config["sigma_depth"]
self._depth_rv = ss.norm(self._mean_depth, self._sigma_depth**2)
self._min_suction_dist = self._config["min_suction_dist"]
self._angle_dist_weight = self._config["angle_dist_weight"]
self._depth_gaussian_sigma = self._config["depth_gaussian_sigma"]
def _sample(self,
image,
camera_intr,
num_samples,
segmask=None,
visualize=False,
constraint_fn=None):
"""Sample a set of 2D grasp candidates from a depth image.
Parameters
----------
image : :obj:`autolab_core.RgbdImage` or "autolab_core.DepthImage"
RGB-D or D image to sample from.
camera_intr : :obj:`autolab_core.CameraIntrinsics`
Intrinsics of the camera that captured the images.
num_samples : int
Number of grasps to sample.
segmask : :obj:`autolab_core.BinaryImage`
Binary image segmenting out the object of interest.
visualize : bool
Whether or not to show intermediate samples (for debugging).
Returns
-------
:obj:`list` of :obj:`Grasp2D`
List of 2D grasp candidates.
"""
if isinstance(image, RgbdImage) or isinstance(image, GdImage):
depth_im = image.depth
elif isinstance(image, DepthImage):
depth_im = image
else:
raise ValueError(
"image type must be one of [RgbdImage, DepthImage, GdImage]")
# Sample antipodal pairs in image space.
grasps = self._sample_suction_points(depth_im,
camera_intr,
num_samples,
segmask=segmask,
visualize=visualize,
constraint_fn=constraint_fn)
return grasps
def _sample_suction_points(self,
depth_im,
camera_intr,
num_samples,
segmask=None,
visualize=False,
constraint_fn=None):
"""Sample a set of 2D suction point candidates from a depth image by
choosing points on an object surface uniformly at random
and then sampling around the surface normal.
Parameters
----------
depth_im : :obj:"autolab_core.DepthImage"
Depth image to sample from.
camera_intr : :obj:`autolab_core.CameraIntrinsics`
Intrinsics of the camera that captured the images.
num_samples : int
Number of grasps to sample.
segmask : :obj:`autolab_core.BinaryImage`
Binary image segmenting out the object of interest.
visualize : bool
Whether or not to show intermediate samples (for debugging).
Returns
-------
:obj:`list` of :obj:`SuctionPoint2D`
List of 2D suction point candidates.
"""
# Compute edge pixels.
filter_start = time()
if self._depth_gaussian_sigma > 0:
depth_im_mask = depth_im.apply(snf.gaussian_filter,
sigma=self._depth_gaussian_sigma)
else:
depth_im_mask = depth_im.copy()
if segmask is not None:
depth_im_mask = depth_im.mask_binary(segmask)
self._logger.debug("Filtering took %.3f sec" % (time() - filter_start))
if visualize:
vis.figure()
vis.subplot(1, 2, 1)
vis.imshow(depth_im)
vis.subplot(1, 2, 2)
vis.imshow(depth_im_mask)
vis.show()
# Project to get the point cloud.
cloud_start = time()
point_cloud_im = camera_intr.deproject_to_image(depth_im_mask)
normal_cloud_im = point_cloud_im.normal_cloud_im()
nonzero_px = depth_im_mask.nonzero_pixels()
num_nonzero_px = nonzero_px.shape[0]
if num_nonzero_px == 0:
return []
self._logger.debug("Normal cloud took %.3f sec" %
(time() - cloud_start))
# Randomly sample points and add to image.
sample_start = time()
suction_points = []
k = 0
sample_size = min(self._max_num_samples, num_nonzero_px)
indices = np.random.choice(num_nonzero_px,
size=sample_size,
replace=False)
while k < sample_size and len(suction_points) < num_samples:
# Sample a point uniformly at random.
ind = indices[k]
center_px = np.array([nonzero_px[ind, 1], nonzero_px[ind, 0]])
center = Point(center_px, frame=camera_intr.frame)
axis = -normal_cloud_im[center.y, center.x]
depth = point_cloud_im[center.y, center.x][2]
# Update number of tries.
k += 1
# Check center px dist from boundary.
if (center_px[0] < self._min_dist_from_boundary
or center_px[1] < self._min_dist_from_boundary
or center_px[1] >
depth_im.height - self._min_dist_from_boundary
or center_px[0] >
depth_im.width - self._min_dist_from_boundary):
continue
# Perturb depth.
delta_depth = self._depth_rv.rvs(size=1)[0]
depth = depth + delta_depth
# Keep if the angle between the camera optical axis and the suction
# direction is less than a threshold.
dot = max(min(axis.dot(np.array([0, 0, 1])), 1.0), -1.0)
psi = np.arccos(dot)
if psi < self._max_suction_dir_optical_axis_angle:
# Create candidate grasp.
candidate = SuctionPoint2D(center,
axis,
depth,
camera_intr=camera_intr)
# Check constraint satisfaction.
if constraint_fn is None or constraint_fn(candidate):
if visualize:
vis.figure()
vis.imshow(depth_im)
vis.scatter(center.x, center.y)
vis.show()
suction_points.append(candidate)
self._logger.debug("Loop took %.3f sec" % (time() - sample_start))
return suction_points
class DepthImageMultiSuctionPointSampler(ImageGraspSampler):
"""Grasp sampler for suction points from depth images.
Notes
-----
Required configuration parameters are specified in Other Parameters.
Other Parameters
----------------
max_suction_dir_optical_axis_angle : float
Maximum angle, in degrees, between the suction approach axis and the
camera optical axis.
delta_theta : float
Maximum deviation from zero for the aziumth angle of a rotational
perturbation to the surface normal (for sample diversity).
delta_phi : float
Maximum deviation from zero for the elevation angle of a rotational
perturbation to the surface normal (for sample diversity).
sigma_depth : float
Standard deviation for a normal distribution over depth values (for
sample diversity).
min_suction_dist : float
Minimum admissible distance between suction points (for sample
diversity).
angle_dist_weight : float
Amount to weight the angle difference in suction point distance
computation.
depth_gaussian_sigma : float
Sigma used for pre-smoothing the depth image for better gradients.
"""
def __init__(self, config):
# Init superclass.
ImageGraspSampler.__init__(self, config)
# Read params.
self._max_suction_dir_optical_axis_angle = np.deg2rad(
self._config["max_suction_dir_optical_axis_angle"])
self._max_dist_from_center = self._config["max_dist_from_center"]
self._min_dist_from_boundary = self._config["min_dist_from_boundary"]
self._max_num_samples = self._config["max_num_samples"]
self._min_theta = -np.deg2rad(self._config["delta_theta"])
self._max_theta = np.deg2rad(self._config["delta_theta"])
self._theta_rv = ss.uniform(loc=self._min_theta,
scale=self._max_theta - self._min_theta)
self._min_phi = -np.deg2rad(self._config["delta_phi"])
self._max_phi = np.deg2rad(self._config["delta_phi"])
self._phi_rv = ss.uniform(loc=self._min_phi,
scale=self._max_phi - self._min_phi)
self._mean_depth = 0.0
if "mean_depth" in self._config:
self._mean_depth = self._config["mean_depth"]
self._sigma_depth = self._config["sigma_depth"]
self._depth_rv = ss.norm(self._mean_depth, self._sigma_depth**2)
self._min_suction_dist = self._config["min_suction_dist"]
self._angle_dist_weight = self._config["angle_dist_weight"]
self._depth_gaussian_sigma = self._config["depth_gaussian_sigma"]
def _sample(self,
image,
camera_intr,
num_samples,
segmask=None,
visualize=False,
constraint_fn=None):
"""Sample a set of 2D grasp candidates from a depth image.
Parameters
----------
image : :obj:`autolab_core.RgbdImage` or `autolab_core.DepthImage`
RGB-D or D image to sample from.
camera_intr : :obj:`autolab_core.CameraIntrinsics`
Intrinsics of the camera that captured the images.
num_samples : int
Number of grasps to sample.
segmask : :obj:`autolab_core.BinaryImage`
Binary image segmenting out the object of interest.
visualize : bool
Whether or not to show intermediate samples (for debugging).
constraint_fn : :obj:`GraspConstraintFn`
Constraint function to apply to grasps.
Returns
-------
:obj:`list` of :obj:`Grasp2D`
List of 2D grasp candidates.
"""
if isinstance(image, RgbdImage) or isinstance(image, GdImage):
depth_im = image.depth
elif isinstance(image, DepthImage):
depth_im = image
else:
raise ValueError(
"image type must be one of [RgbdImage, DepthImage, GdImage]")
# Sample antipodal pairs in image space.
grasps = self._sample_suction_points(depth_im,
camera_intr,
num_samples,
segmask=segmask,
visualize=visualize,
constraint_fn=constraint_fn)
return grasps
def _sample_suction_points(self,
depth_im,
camera_intr,
num_samples,
segmask=None,
visualize=False,
constraint_fn=None):
"""Sample a set of 2D suction point candidates from a depth image by
choosing points on an object surface uniformly at random
and then sampling around the surface normal.
Parameters
----------
depth_im : :obj:"autolab_core.DepthImage"
Depth image to sample from.
camera_intr : :obj:`autolab_core.CameraIntrinsics`
Intrinsics of the camera that captured the images.
num_samples : int
Number of grasps to sample.
segmask : :obj:`autolab_core.BinaryImage`
Binary image segmenting out the object of interest.
visualize : bool
Whether or not to show intermediate samples (for debugging).
constraint_fn : :obj:`GraspConstraintFn`
Constraint function to apply to grasps.
Returns
-------
:obj:`list` of :obj:`SuctionPoint2D`
List of 2D suction point candidates.
"""
# Compute edge pixels.
filter_start = time()
if self._depth_gaussian_sigma > 0:
depth_im_mask = depth_im.apply(snf.gaussian_filter,
sigma=self._depth_gaussian_sigma)
else:
depth_im_mask = depth_im.copy()
if segmask is not None:
depth_im_mask = depth_im.mask_binary(segmask)
self._logger.debug("Filtering took %.3f sec" % (time() - filter_start))
if visualize:
vis.figure()
vis.subplot(1, 2, 1)
vis.imshow(depth_im)
vis.subplot(1, 2, 2)
vis.imshow(depth_im_mask)
vis.show()
# Project to get the point cloud.
cloud_start = time()
point_cloud_im = camera_intr.deproject_to_image(depth_im_mask)
normal_cloud_im = point_cloud_im.normal_cloud_im()
nonzero_px = depth_im_mask.nonzero_pixels()
num_nonzero_px = nonzero_px.shape[0]
if num_nonzero_px == 0:
return []
self._logger.debug("Normal cloud took %.3f sec" %
(time() - cloud_start))
# Randomly sample points and add to image.
sample_start = time()
suction_points = []
k = 0
sample_size = min(self._max_num_samples, num_nonzero_px)
indices = np.random.choice(num_nonzero_px,
size=sample_size,
replace=False)
while k < sample_size and len(suction_points) < num_samples:
# Sample a point uniformly at random.
ind = indices[k]
center_px = np.array([nonzero_px[ind, 1], nonzero_px[ind, 0]])
center = Point(center_px, frame=camera_intr.frame)
axis = -normal_cloud_im[center.y, center.x]
# depth = point_cloud_im[center.y, center.x][2]
orientation = 2 * np.pi * np.random.rand()
# Update number of tries.
k += 1
# Skip bad axes.
if np.linalg.norm(axis) == 0:
continue
# Rotation matrix.
x_axis = axis
y_axis = np.array([axis[1], -axis[0], 0])
if np.linalg.norm(y_axis) == 0:
y_axis = np.array([1, 0, 0])
y_axis = y_axis / np.linalg.norm(y_axis)
z_axis = np.cross(x_axis, y_axis)
R = np.array([x_axis, y_axis, z_axis]).T
# R_orig = np.copy(R)
R = R.dot(RigidTransform.x_axis_rotation(orientation))
t = point_cloud_im[center.y, center.x]
pose = RigidTransform(rotation=R,
translation=t,
from_frame="grasp",
to_frame=camera_intr.frame)
# Check center px dist from boundary.
if (center_px[0] < self._min_dist_from_boundary
or center_px[1] < self._min_dist_from_boundary
or center_px[1] >
depth_im.height - self._min_dist_from_boundary
or center_px[0] >
depth_im.width - self._min_dist_from_boundary):
continue
# Keep if the angle between the camera optical axis and the suction
# direction is less than a threshold.
dot = max(min(axis.dot(np.array([0, 0, 1])), 1.0), -1.0)
psi = np.arccos(dot)
if psi < self._max_suction_dir_optical_axis_angle:
# Check distance to ensure sample diversity.
candidate = MultiSuctionPoint2D(pose, camera_intr=camera_intr)
# Check constraint satisfaction.
if constraint_fn is None or constraint_fn(candidate):
if visualize:
vis.figure()
vis.imshow(depth_im)
vis.scatter(center.x, center.y)
vis.show()
suction_points.append(candidate)
self._logger.debug("Loop took %.3f sec" % (time() - sample_start))
return suction_points
class ImageGraspSamplerFactory(object):
"""Factory for image grasp samplers."""
@staticmethod
def sampler(sampler_type, config):
if sampler_type == "antipodal_depth":
return AntipodalDepthImageGraspSampler(config)
elif sampler_type == "suction":
return DepthImageSuctionPointSampler(config)
elif sampler_type == "multi_suction":
return DepthImageMultiSuctionPointSampler(config)
else:
raise ValueError("Image grasp sampler type %s not supported!" %
(sampler_type))
================================================
FILE: gqcnn/grasping/policy/__init__.py
================================================
# -*- coding: utf-8 -*-
"""
Copyright ©2017. The Regents of the University of California (Regents).
All Rights Reserved. Permission to use, copy, modify, and distribute this
software and its documentation for educational, research, and not-for-profit
purposes, without fee and without a signed licensing agreement, is hereby
granted, provided that the above copyright notice, this paragraph and the
following two paragraphs appear in all copies, modifications, and
distributions. Contact The Office of Technology Licensing, UC Berkeley, 2150
Shattuck Avenue, Suite 510, Berkeley, CA 94720-1620, (510) 643-7201,
otl@berkeley.edu,
http://ipira.berkeley.edu/industry-info for commercial licensing opportunities.
IN NO EVENT SHALL REGENTS BE LIABLE TO ANY PARTY FOR DIRECT, INDIRECT, SPECIAL,
INCIDENTAL, OR CONSEQUENTIAL DAMAGES, INCLUDING LOST PROFITS, ARISING OUT OF
THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN IF REGENTS HAS BEEN
ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
REGENTS SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
PURPOSE. THE SOFTWARE AND ACCOMPANYING DOCUMENTATION, IF ANY, PROVIDED
HEREUNDER IS PROVIDED "AS IS". REGENTS HAS NO OBLIGATION TO PROVIDE
MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
"""
from .fc_policy import (FullyConvolutionalGraspingPolicyParallelJaw,
FullyConvolutionalGraspingPolicySuction)
from .policy import (RobustGraspingPolicy, CrossEntropyRobustGraspingPolicy,
RgbdImageState, GraspAction, UniformRandomGraspingPolicy)
__all__ = [
"FullyConvolutionalGraspingPolicyParallelJaw",
"FullyConvolutionalGraspingPolicySuction", "RobustGraspingPolicy",
"CrossEntropyRobustGraspingPolicy", "UniformRandomGraspingPolicy",
"RgbdImageState", "GraspAction"
]
================================================
FILE: gqcnn/grasping/policy/enums.py
================================================
# -*- coding: utf-8 -*-
"""
Copyright ©2017. The Regents of the University of California (Regents).
All Rights Reserved. Permission to use, copy, modify, and distribute this
software and its documentation for educational, research, and not-for-profit
purposes, without fee and without a signed licensing agreement, is hereby
granted, provided that the above copyright notice, this paragraph and the
following two paragraphs appear in all copies, modifications, and
distributions. Contact The Office of Technology Licensing, UC Berkeley, 2150
Shattuck Avenue, Suite 510, Berkeley, CA 94720-1620, (510) 643-7201,
otl@berkeley.edu,
http://ipira.berkeley.edu/industry-info for commercial licensing opportunities.
IN NO EVENT SHALL REGENTS BE LIABLE TO ANY PARTY FOR DIRECT, INDIRECT, SPECIAL,
INCIDENTAL, OR CONSEQUENTIAL DAMAGES, INCLUDING LOST PROFITS, ARISING OUT OF
THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN IF REGENTS HAS BEEN
ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
REGENTS SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
PURPOSE. THE SOFTWARE AND ACCOMPANYING DOCUMENTATION, IF ANY, PROVIDED
HEREUNDER IS PROVIDED "AS IS". REGENTS HAS NO OBLIGATION TO PROVIDE
MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
Enums for GQ-CNN policies.
Author
------
Vishal Satish
"""
class SamplingMethod(object):
TOP_K = "top_k"
UNIFORM = "uniform"
================================================
FILE: gqcnn/grasping/policy/fc_policy.py
================================================
# -*- coding: utf-8 -*-
"""
Copyright ©2017. The Regents of the University of California (Regents).
All Rights Reserved. Permission to use, copy, modify, and distribute this
software and its documentation for educational, research, and not-for-profit
purposes, without fee and without a signed licensing agreement, is hereby
granted, provided that the above copyright notice, this paragraph and the
following two paragraphs appear in all copies, modifications, and
distributions. Contact The Office of Technology Licensing, UC Berkeley, 2150
Shattuck Avenue, Suite 510, Berkeley, CA 94720-1620, (510) 643-7201,
otl@berkeley.edu,
http://ipira.berkeley.edu/industry-info for commercial licensing opportunities.
IN NO EVENT SHALL REGENTS BE LIABLE TO ANY PARTY FOR DIRECT, INDIRECT, SPECIAL,
INCIDENTAL, OR CONSEQUENTIAL DAMAGES, INCLUDING LOST PROFITS, ARISING OUT OF
THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN IF REGENTS HAS BEEN
ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
REGENTS SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
PURPOSE. THE SOFTWARE AND ACCOMPANYING DOCUMENTATION, IF ANY, PROVIDED
HEREUNDER IS PROVIDED "AS IS". REGENTS HAS NO OBLIGATION TO PROVIDE
MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
Fully-Convolutional GQ-CNN grasping policies.
Author
------
Vishal Satish
"""
from abc import abstractmethod
import os
import matplotlib.pyplot as plt
import numpy as np
from autolab_core import Point, Logger, DepthImage
from visualization import Visualizer2D as vis
from ...grasping import Grasp2D, SuctionPoint2D
from ...utils import GeneralConstants, NoValidGraspsException
from .enums import SamplingMethod
from .policy import GraspingPolicy, GraspAction
class FullyConvolutionalGraspingPolicy(GraspingPolicy):
"""Abstract grasp sampling policy class using Fully-Convolutional GQ-CNN
network."""
def __init__(self, cfg, filters=None):
"""
Parameters
----------
cfg : dict
Python dictionary of policy configuration parameters.
filters : dict
Python dictionary of kinematic filters to apply.
"""
GraspingPolicy.__init__(self, cfg, init_sampler=False)
# Init logger.
self._logger = Logger.get_logger(self.__class__.__name__)
self._cfg = cfg
self._sampling_method = self._cfg["sampling_method"]
# GQ-CNN parameters.
self._gqcnn_stride = self._cfg["gqcnn_stride"]
self._gqcnn_recep_h = self._cfg["gqcnn_recep_h"]
self._gqcnn_recep_w = self._cfg["gqcnn_recep_w"]
# Grasp filtering.
self._filters = filters
self._max_grasps_to_filter = self._cfg["max_grasps_to_filter"]
self._filter_grasps = self._cfg["filter_grasps"]
# Visualization parameters.
self._vis_config = self._cfg["policy_vis"]
self._vis_scale = self._vis_config["scale"]
self._vis_show_axis = self._vis_config["show_axis"]
self._num_vis_samples = self._vis_config["num_samples"]
self._vis_actions_2d = self._vis_config["actions_2d"]
self._vis_actions_3d = self._vis_config["actions_3d"]
self._vis_affordance_map = self._vis_config["affordance_map"]
self._vis_output_dir = None
# If this exists in the config then all visualizations will be logged
# here instead of displayed.
if "output_dir" in self._vis_config:
self._vis_output_dir = self._vis_config["output_dir"]
self._state_counter = 0
def _unpack_state(self, state):
"""Unpack information from the provided `RgbdImageState`."""
# TODO(vsatish): Don't access raw depth data like this.
return (state.rgbd_im.depth, state.rgbd_im.depth._data,
state.segmask.raw_data, state.camera_intr)
def _mask_predictions(self, preds, raw_segmask):
"""Mask the given predictions with the given segmask, setting the rest
to 0.0."""
preds_masked = np.zeros_like(preds)
raw_segmask_cropped = raw_segmask[self._gqcnn_recep_h //
2:raw_segmask.shape[0] -
self._gqcnn_recep_h // 2,
self._gqcnn_recep_w //
2:raw_segmask.shape[1] -
self._gqcnn_recep_w // 2, 0]
raw_segmask_downsampled = raw_segmask_cropped[::self._gqcnn_stride, ::
self._gqcnn_stride]
if raw_segmask_downsampled.shape[0] != preds.shape[1]:
raw_segmask_downsampled_new = np.zeros(preds.shape[1:3])
raw_segmask_downsampled_new[:raw_segmask_downsampled.
shape[0], :raw_segmask_downsampled.
shape[1]] = raw_segmask_downsampled
raw_segmask_downsampled = raw_segmask_downsampled_new
nonzero_mask_ind = np.where(raw_segmask_downsampled > 0)
preds_masked[:, nonzero_mask_ind[0],
nonzero_mask_ind[1]] = preds[:, nonzero_mask_ind[0],
nonzero_mask_ind[1]]
return preds_masked
def _sample_predictions(self, preds, num_actions):
"""Sample predictions."""
dim2 = preds.shape[2]
dim1 = preds.shape[1]
dim3 = preds.shape[3]
preds_flat = np.ravel(preds)
pred_ind_flat = self._sample_predictions_flat(preds_flat, num_actions)
pred_ind = np.zeros((num_actions, len(preds.shape)), dtype=np.int32)
for idx in range(num_actions):
pred_ind[idx, 0] = pred_ind_flat[idx] // (dim2 * dim1 * dim3)
pred_ind[idx, 1] = (pred_ind_flat[idx] -
(pred_ind[idx, 0] *
(dim2 * dim1 * dim3))) // (dim2 * dim3)
pred_ind[idx, 2] = (pred_ind_flat[idx] - (pred_ind[idx, 0] *
(dim2 * dim1 * dim3)) -
(pred_ind[idx, 1] * (dim2 * dim3))) // dim3
pred_ind[idx, 3] = (pred_ind_flat[idx] - (pred_ind[idx, 0] *
(dim2 * dim1 * dim3)) -
(pred_ind[idx, 1] * (dim2 * dim3))) % dim3
return pred_ind
def _sample_predictions_flat(self, preds_flat, num_samples):
"""Helper function to do the actual sampling."""
if num_samples == 1:
# `argmax` is faster than `argpartition` for special case of single
# sample.
if self._sampling_method == SamplingMethod.TOP_K:
return [np.argmax(preds_flat)]
elif self._sampling_method == SamplingMethod.UNIFORM:
nonzero_ind = np.where(preds_flat > 0)[0]
return np.random.choice(nonzero_ind)
else:
raise ValueError("Invalid sampling method: {}".format(
self._sampling_method))
else:
if self._sampling_method == "top_k":
return np.argpartition(preds_flat,
-1 * num_samples)[-1 * num_samples:]
elif self._sampling_method == "uniform":
nonzero_ind = np.where(preds_flat > 0)[0]
if nonzero_ind.shape[0] == 0:
raise NoValidGraspsException(
"No grasps with nonzero quality")
return np.random.choice(nonzero_ind, size=num_samples)
else:
raise ValueError("Invalid sampling method: {}".format(
self._sampling_method))
@abstractmethod
def _get_actions(self, preds, ind, images, depths, camera_intr,
num_actions):
"""Generate the actions to be returned."""
pass
@abstractmethod
def _visualize_3d(self, actions, wrapped_depth_im, camera_intr,
num_actions):
"""Visualize the actions in 3D."""
pass
@abstractmethod
def _visualize_affordance_map(self,
preds,
depth_im,
scale,
plot_max=True,
output_dir=None):
"""Visualize an affordance map of the network predictions overlayed on
the depth image."""
pass
def _visualize_2d(self,
actions,
preds,
wrapped_depth_im,
num_actions,
scale,
show_axis,
output_dir=None):
"""Visualize the actions in 2D."""
self._logger.info("Visualizing actions in 2d...")
# Plot actions in 2D.
vis.figure()
vis.imshow(wrapped_depth_im)
for i in range(num_actions):
vis.grasp(actions[i].grasp,
scale=scale,
show_axis=show_axis,
color=plt.cm.RdYlGn(actions[i].q_value))
vis.title("Top {} Grasps".format(num_actions))
if output_dir is not None:
vis.savefig(os.path.join(output_dir, "top_grasps.png"))
else:
vis.show()
def _filter(self, actions):
"""Filter actions."""
for action in actions:
valid = True
for filter_name, is_valid in self._filters.items():
if not is_valid(action.grasp):
self._logger.info(
"Grasp {} is not valid with filter {}".format(
action.grasp, filter_name))
valid = False
break
if valid:
return action
raise NoValidGraspsException("No grasps found after filtering!")
@abstractmethod
def _gen_images_and_depths(self, depth, segmask):
"""Generate inputs for the grasp quality function."""
pass
def _action(self, state, num_actions=1):
"""Plan action(s)."""
if self._filter_grasps:
assert self._filters is not None, ("Trying to filter grasps but no"
" filters were provided!")
assert num_actions == 1, ("Filtering support is only implemented"
" for single actions!")
num_actions = self._max_grasps_to_filter
# Set up log dir for state visualizations.
state_output_dir = None
if self._vis_output_dir is not None:
state_output_dir = os.path.join(
self._vis_output_dir,
"state_{}".format(str(self._state_counter).zfill(5)))
if not os.path.exists(state_output_dir):
os.makedirs(state_output_dir)
self._state_counter += 1
# Unpack the `RgbdImageState`.
wrapped_depth, raw_depth, raw_seg, camera_intr = self._unpack_state(
state)
# Predict.
images, depths = self._gen_images_and_depths(raw_depth, raw_seg)
preds = self._grasp_quality_fn.quality(images, depths)
# Get success probablility predictions only (this is needed because the
# output of the network is pairs of (p_failure, p_success)).
preds_success_only = preds[:, :, :, 1::2]
# Mask predicted success probabilities with the cropped and downsampled
# object segmask so we only sample grasps on the objects.
preds_success_only = self._mask_predictions(preds_success_only,
raw_seg)
# If we want to visualize more than one action, we have to sample more.
# TODO(vsatish): If this is used with the "top_k" sampling method, the
# final returned action is not the best because the argpartition does
# not sort the partitioned indices. Fix this.
num_actions_to_sample = self._num_vis_samples if (
self._vis_actions_2d or self._vis_actions_3d) else num_actions
if (self._sampling_method == SamplingMethod.TOP_K
and self._num_vis_samples):
self._logger.warning("FINAL GRASP RETURNED IS NOT THE BEST!")
# Sample num_actions_to_sample indices from the success predictions.
sampled_ind = self._sample_predictions(preds_success_only,
num_actions_to_sample)
# Wrap actions to be returned.
actions = self._get_actions(preds_success_only, sampled_ind, images,
depths, camera_intr, num_actions_to_sample)
# Filter grasps.
if self._filter_grasps:
actions = sorted(actions,
reverse=True,
key=lambda action: action.q_value)
actions = [self._filter(actions)]
# Visualize.
if self._vis_actions_3d:
self._logger.info("Generating 3D Visualization...")
self._visualize_3d(actions, wrapped_depth, camera_intr,
num_actions_to_sample)
if self._vis_actions_2d:
self._logger.info("Generating 2D visualization...")
self._visualize_2d(actions,
preds_success_only,
wrapped_depth,
num_actions_to_sample,
self._vis_scale,
self._vis_show_axis,
output_dir=state_output_dir)
if self._vis_affordance_map:
self._visualize_affordance_map(preds_success_only,
wrapped_depth,
self._vis_scale,
output_dir=state_output_dir)
return actions[-1] if (self._filter_grasps or num_actions
== 1) else actions[-(num_actions + 1):]
def action_set(self, state, num_actions):
"""Plan a set of actions.
Parameters
----------
state : :obj:`gqcnn.RgbdImageState`
The RGBD image state.
num_actions : int
The number of actions to plan.
Returns
------
list of :obj:`gqcnn.GraspAction`
The planned grasps.
"""
return [
action.grasp
for action in self._action(state, num_actions=num_actions)
]
class FullyConvolutionalGraspingPolicyParallelJaw(
FullyConvolutionalGraspingPolicy):
"""Parallel jaw grasp sampling policy using the FC-GQ-CNN."""
def __init__(self, cfg, filters=None):
"""
Parameters
----------
cfg : dict
Python dictionary of policy configuration parameters.
filters : dict
Python dictionary of functions to apply to filter invalid grasps.
"""
FullyConvolutionalGraspingPolicy.__init__(self, cfg, filters=filters)
self._gripper_width = self._cfg["gripper_width"]
# Depth sampling parameters.
self._num_depth_bins = self._cfg["num_depth_bins"]
self._depth_offset = 0.0 # Hard offset from the workspace.
if "depth_offset" in self._cfg:
self._depth_offset = self._cfg["depth_offset"]
def _sample_depths(self, raw_depth_im, raw_seg):
"""Sample depths from the raw depth image."""
max_depth = np.max(raw_depth_im) + self._depth_offset
# For sampling the min depth, we only sample from the portion of the
# depth image in the object segmask because sometimes the rim of the
# bin is not properly subtracted out of the depth image.
raw_depth_im_segmented = np.ones_like(raw_depth_im)
raw_depth_im_segmented[np.where(raw_seg > 0)] = raw_depth_im[np.where(
raw_seg > 0)]
min_depth = np.min(raw_depth_im_segmented) + self._depth_offset
depth_bin_width = (max_depth - min_depth) / self._num_depth_bins
depths = np.zeros((self._num_depth_bins, 1))
for i in range(self._num_depth_bins):
depths[i][0] = min_depth + (i * depth_bin_width +
depth_bin_width / 2)
return depths
def _get_actions(self, preds, ind, images, depths, camera_intr,
num_actions):
"""Generate the actions to be returned."""
actions = []
# TODO(vsatish): These should use the max angle instead.
ang_bin_width = GeneralConstants.PI / preds.shape[-1]
for i in range(num_actions):
im_idx = ind[i, 0]
h_idx = ind[i, 1]
w_idx = ind[i, 2]
ang_idx = ind[i, 3]
center = Point(
np.asarray([
w_idx * self._gqcnn_stride + self._gqcnn_recep_w // 2,
h_idx * self._gqcnn_stride + self._gqcnn_recep_h // 2
]))
ang = GeneralConstants.PI / 2 - (ang_idx * ang_bin_width +
ang_bin_width / 2)
depth = depths[im_idx, 0]
grasp = Grasp2D(center,
ang,
depth,
width=self._gripper_width,
camera_intr=camera_intr)
grasp_action = GraspAction(grasp, preds[im_idx, h_idx, w_idx,
ang_idx],
DepthImage(images[im_idx]))
actions.append(grasp_action)
return actions
def _gen_images_and_depths(self, depth, segmask):
"""Replicate the depth image and sample corresponding depths."""
depths = self._sample_depths(depth, segmask)
images = np.tile(np.asarray([depth]), (self._num_depth_bins, 1, 1, 1))
return images, depths
def _visualize_3d(self, actions, wrapped_depth_im, camera_intr,
num_actions):
"""Visualize the actions in 3D."""
raise NotImplementedError
def _visualize_affordance_map(self, preds, depth_im):
"""Visualize an affordance map of the network predictions overlayed on
the depth image."""
raise NotImplementedError
class FullyConvolutionalGraspingPolicySuction(FullyConvolutionalGraspingPolicy
):
"""Suction grasp sampling policy using the FC-GQ-CNN."""
def _get_actions(self, preds, ind, images, depths, camera_intr,
num_actions):
"""Generate the actions to be returned."""
depth_im = DepthImage(images[0], frame=camera_intr.frame)
point_cloud_im = camera_intr.deproject_to_image(depth_im)
normal_cloud_im = point_cloud_im.normal_cloud_im()
actions = []
for i in range(num_actions):
im_idx = ind[i, 0]
h_idx = ind[i, 1]
w_idx = ind[i, 2]
center = Point(
np.asarray([
w_idx * self._gqcnn_stride + self._gqcnn_recep_w // 2,
h_idx * self._gqcnn_stride + self._gqcnn_recep_h // 2
]))
axis = -normal_cloud_im[center.y, center.x]
if np.linalg.norm(axis) == 0:
continue
depth = depth_im[center.y, center.x, 0]
if depth == 0.0:
continue
grasp = SuctionPoint2D(center,
axis=axis,
depth=depth,
camera_intr=camera_intr)
grasp_action = GraspAction(grasp, preds[im_idx, h_idx, w_idx, 0],
DepthImage(images[im_idx]))
actions.append(grasp_action)
return actions
def _visualize_affordance_map(self,
preds,
depth_im,
scale,
plot_max=True,
output_dir=None):
"""Visualize an affordance map of the network predictions overlayed on
the depth image."""
self._logger.info("Visualizing affordance map...")
affordance_map = preds[0, ..., 0]
tf_depth_im = depth_im.crop(
depth_im.shape[0] - self._gqcnn_recep_h, depth_im.shape[1] -
self._gqcnn_recep_w).resize(1.0 / self._gqcnn_stride)
# Plot.
vis.figure()
vis.imshow(tf_depth_im)
plt.imshow(affordance_map,
cmap=plt.cm.RdYlGn,
alpha=0.3,
vmin=0.0,
vmax=1.0)
if plot_max:
affordance_argmax = np.unravel_index(np.argmax(affordance_map),
affordance_map.shape)
plt.scatter(affordance_argmax[1],
affordance_argmax[0],
c="black",
marker=".",
s=scale * 25)
vis.title("Grasp Affordance Map")
if output_dir is not None:
vis.savefig(os.path.join(output_dir, "grasp_affordance_map.png"))
else:
vis.show()
def _gen_images_and_depths(self, depth, segmask):
"""Extend the image to a 4D tensor."""
# TODO(vsatish): Depth should be made an optional input to the network.
return np.expand_dims(depth, 0), np.array([-1])
def _visualize_3d(self, actions, wrapped_depth_im, camera_intr,
num_actions):
"""Visualize the actions in 3D."""
raise NotImplementedError
================================================
FILE: gqcnn/grasping/policy/policy.py
================================================
# -*- coding: utf-8 -*-
"""
Copyright ©2017. The Regents of the University of California (Regents).
All Rights Reserved. Permission to use, copy, modify, and distribute this
software and its documentation for educational, research, and not-for-profit
purposes, without fee and without a signed licensing agreement, is hereby
granted, provided that the above copyright notice, this paragraph and the
following two paragraphs appear in all copies, modifications, and
distributions. Contact The Office of Technology Licensing, UC Berkeley, 2150
Shattuck Avenue, Suite 510, Berkeley, CA 94720-1620, (510) 643-7201,
otl@berkeley.edu,
http://ipira.berkeley.edu/industry-info for commercial licensing opportunities.
IN NO EVENT SHALL REGENTS BE LIABLE TO ANY PARTY FOR DIRECT, INDIRECT, SPECIAL,
INCIDENTAL, OR CONSEQUENTIAL DAMAGES, INCLUDING LOST PROFITS, ARISING OUT OF
THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN IF REGENTS HAS BEEN
ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
REGENTS SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
PURPOSE. THE SOFTWARE AND ACCOMPANYING DOCUMENTATION, IF ANY, PROVIDED
HEREUNDER IS PROVIDED "AS IS". REGENTS HAS NO OBLIGATION TO PROVIDE
MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
Grasping policies.
Author
------
Jeff Mahler
"""
from abc import ABC, abstractmethod
import copy
import pickle as pkl
import math
import os
from time import time
import matplotlib.pyplot as plt
import numpy as np
import scipy.ndimage.filters as snf
import scipy.stats as ss
from sklearn.mixture import GaussianMixture
import autolab_core.utils as utils
from autolab_core import (Point, Logger, BinaryImage, ColorImage,
DepthImage, RgbdImage, SegmentationImage,
CameraIntrinsics)
from visualization import Visualizer2D as vis
from ..constraint_fn import GraspConstraintFnFactory
from ..grasp import Grasp2D, SuctionPoint2D, MultiSuctionPoint2D
from ..grasp_quality_function import (GraspQualityFunctionFactory,
GQCnnQualityFunction)
from ..image_grasp_sampler import ImageGraspSamplerFactory
from ...utils import GeneralConstants, NoValidGraspsException
class RgbdImageState(object):
"""State to encapsulate RGB-D images."""
def __init__(self,
rgbd_im,
camera_intr,
segmask=None,
obj_segmask=None,
fully_observed=None):
"""
Parameters
----------
rgbd_im : :obj:`autolab_core.RgbdImage`
An RGB-D image to plan grasps on.
camera_intr : :obj:`autolab_core.CameraIntrinsics`
Intrinsics of the RGB-D camera.
segmask : :obj:`autolab_core.BinaryImage`
Segmentation mask for the image.
obj_segmask : :obj:`autolab_core.SegmentationImage`
Segmentation mask for the different objects in the image.
full_observed : :obj:`object`
Representation of the fully observed state.
"""
self.rgbd_im = rgbd_im
self.camera_intr = camera_intr
self.segmask = segmask
self.obj_segmask = obj_segmask
self.fully_observed = fully_observed
def save(self, save_dir):
"""Save to a directory.
Parameters
----------
save_dir : str
The directory to save to.
"""
if not os.path.exists(save_dir):
os.mkdir(save_dir)
color_image_filename = os.path.join(save_dir, "color.png")
depth_image_filename = os.path.join(save_dir, "depth.npy")
camera_intr_filename = os.path.join(save_dir, "camera.intr")
segmask_filename = os.path.join(save_dir, "segmask.npy")
obj_segmask_filename = os.path.join(save_dir, "obj_segmask.npy")
state_filename = os.path.join(save_dir, "state.pkl")
self.rgbd_im.color.save(color_image_filename)
self.rgbd_im.depth.save(depth_image_filename)
self.camera_intr.save(camera_intr_filename)
if self.segmask is not None:
self.segmask.save(segmask_filename)
if self.obj_segmask is not None:
self.obj_segmask.save(obj_segmask_filename)
if self.fully_observed is not None:
pkl.dump(self.fully_observed, open(state_filename, "wb"))
@staticmethod
def load(save_dir):
"""Load an :obj:`RGBDImageState`.
Parameters
----------
save_dir : str
The directory to load from.
"""
if not os.path.exists(save_dir):
raise ValueError("Directory %s does not exist!" % (save_dir))
color_image_filename = os.path.join(save_dir, "color.png")
depth_image_filename = os.path.join(save_dir, "depth.npy")
camera_intr_filename = os.path.join(save_dir, "camera.intr")
segmask_filename = os.path.join(save_dir, "segmask.npy")
obj_segmask_filename = os.path.join(save_dir, "obj_segmask.npy")
state_filename = os.path.join(save_dir, "state.pkl")
camera_intr = CameraIntrinsics.load(camera_intr_filename)
color = ColorImage.open(color_image_filename, frame=camera_intr.frame)
depth = DepthImage.open(depth_image_filename, frame=camera_intr.frame)
segmask = None
if os.path.exists(segmask_filename):
segmask = BinaryImage.open(segmask_filename,
frame=camera_intr.frame)
obj_segmask = None
if os.path.exists(obj_segmask_filename):
obj_segmask = SegmentationImage.open(obj_segmask_filename,
frame=camera_intr.frame)
fully_observed = None
if os.path.exists(state_filename):
fully_observed = pkl.load(open(state_filename, "rb"))
return RgbdImageState(RgbdImage.from_color_and_depth(color, depth),
camera_intr,
segmask=segmask,
obj_segmask=obj_segmask,
fully_observed=fully_observed)
class GraspAction(object):
"""Action to encapsulate grasps.
"""
def __init__(self, grasp, q_value, image=None, policy_name=None):
"""
Parameters
----------
grasp : :obj`Grasp2D` or :obj:`SuctionPoint2D`
2D grasp to wrap.
q_value : float
Grasp quality.
image : :obj:`autolab_core.DepthImage`
Depth image corresponding to grasp.
policy_name : str
Policy name.
"""
self.grasp = grasp
self.q_value = q_value
self.image = image
self.policy_name = policy_name
def save(self, save_dir):
"""Save grasp action.
Parameters
----------
save_dir : str
Directory to save the grasp action to.
"""
if not os.path.exists(save_dir):
os.mkdir(save_dir)
grasp_filename = os.path.join(save_dir, "grasp.pkl")
q_value_filename = os.path.join(save_dir, "pred_robustness.pkl")
image_filename = os.path.join(save_dir, "tf_image.npy")
pkl.dump(self.grasp, open(grasp_filename, "wb"))
pkl.dump(self.q_value, open(q_value_filename, "wb"))
if self.image is not None:
self.image.save(image_filename)
@staticmethod
def load(save_dir):
"""Load a saved grasp action.
Parameters
----------
save_dir : str
Directory of the saved grasp action.
Returns
-------
:obj:`GraspAction`
Loaded grasp action.
"""
if not os.path.exists(save_dir):
raise ValueError("Directory %s does not exist!" % (save_dir))
grasp_filename = os.path.join(save_dir, "grasp.pkl")
q_value_filename = os.path.join(save_dir, "pred_robustness.pkl")
image_filename = os.path.join(save_dir, "tf_image.npy")
grasp = pkl.load(open(grasp_filename, "rb"))
q_value = pkl.load(open(q_value_filename, "rb"))
image = None
if os.path.exists(image_filename):
image = DepthImage.open(image_filename)
return GraspAction(grasp, q_value, image)
class Policy(ABC):
"""Abstract policy class."""
def __call__(self, state):
"""Execute the policy on a state."""
return self.action(state)
@abstractmethod
def action(self, state):
"""Returns an action for a given state."""
pass
class GraspingPolicy(Policy):
"""Policy for robust grasping with Grasp Quality Convolutional Neural
Networks (GQ-CNN)."""
def __init__(self, config, init_sampler=True):
"""
Parameters
----------
config : dict
Python dictionary of parameters for the policy.
init_sampler : bool
Whether or not to initialize the grasp sampler.
Notes
-----
Required configuration parameters are specified in Other Parameters.
Other Parameters
----------------
sampling : dict
Dictionary of parameters for grasp sampling, see
`gqcnn/grasping/image_grasp_sampler.py`.
gqcnn_model : str
String path to a trained GQ-CNN model.
"""
# Store parameters.
self._config = config
self._gripper_width = 0.05
if "gripper_width" in config:
self._gripper_width = config["gripper_width"]
# Set the logging dir and possibly log file.
self._logging_dir = None
log_file = None
if "logging_dir" in self.config:
self._logging_dir = self.config["logging_dir"]
if not os.path.exists(self._logging_dir):
os.makedirs(self._logging_dir)
log_file = os.path.join(self._logging_dir, "policy.log")
# Setup logger.
self._logger = Logger.get_logger(self.__class__.__name__,
log_file=log_file,
global_log_file=True)
# Init grasp sampler.
if init_sampler:
self._sampling_config = config["sampling"]
self._sampling_config["gripper_width"] = self._gripper_width
if "crop_width" in config["metric"] and "crop_height" in config[
"metric"]:
pad = max(
math.ceil(
np.sqrt(2) * (config["metric"]["crop_width"] / 2)),
math.ceil(
np.sqrt(2) * (config["metric"]["crop_height"] / 2)))
self._sampling_config["min_dist_from_boundary"] = pad
self._sampling_config["gripper_width"] = self._gripper_width
sampler_type = self._sampling_config["type"]
self._grasp_sampler = ImageGraspSamplerFactory.sampler(
sampler_type, self._sampling_config)
# Init constraint function.
self._grasp_constraint_fn = None
if "constraints" in self._config:
self._constraint_config = self._config["constraints"]
constraint_type = self._constraint_config["type"]
self._grasp_constraint_fn = GraspConstraintFnFactory.constraint_fn(
constraint_type, self._constraint_config)
# Init grasp quality function.
self._metric_config = config["metric"]
metric_type = self._metric_config["type"]
self._grasp_quality_fn = GraspQualityFunctionFactory.quality_function(
metric_type, self._metric_config)
@property
def config(self):
"""Returns the policy configuration parameters.
Returns
-------
dict
Python dictionary of the policy configuration parameters.
"""
return self._config
@property
def grasp_sampler(self):
"""Returns the grasp sampler.
Returns
-------
:obj:`gqcnn.grasping.image_grasp_sampler.ImageGraspSampler`
The grasp sampler.
"""
return self._grasp_sampler
@property
def grasp_quality_fn(self):
"""Returns the grasp quality function.
Returns
-------
:obj:`gqcnn.grasping.grasp_quality_function.GraspQualityFunction`
The grasp quality function.
"""
return self._grasp_quality_fn
@property
def grasp_constraint_fn(self):
"""Returns the grasp constraint function.
Returns
-------
:obj:`gqcnn.grasping.constraint_fn.GraspConstraintFn`
The grasp contraint function.
"""
return self._grasp_constraint_fn
@property
def gqcnn(self):
"""Returns the GQ-CNN.
Returns
-------
:obj:`gqcnn.model.tf.GQCNNTF`
The GQ-CNN model.
"""
return self._gqcnn
def set_constraint_fn(self, constraint_fn):
"""Sets the grasp constraint function.
Parameters
----------
constraint_fn : :obj`gqcnn.grasping.constraint_fn.GraspConstraintFn`
The grasp contraint function.
"""
self._grasp_constraint_fn = constraint_fn
def action(self, state):
"""Returns an action for a given state.
Parameters
----------
state : :obj:`RgbdImageState`
The RGB-D image state to plan grasps on.
Returns
-------
:obj:`GraspAction`
The planned grasp action.
"""
# Save state.
if self._logging_dir is not None:
policy_id = utils.gen_experiment_id()
policy_dir = os.path.join(self._logging_dir,
"policy_output_%s" % (policy_id))
while os.path.exists(policy_dir):
policy_id = utils.gen_experiment_id()
policy_dir = os.path.join(self._logging_dir,
"policy_output_%s" % (policy_id))
self._policy_dir = policy_dir
os.mkdir(self._policy_dir)
state_dir = os.path.join(self._policy_dir, "state")
state.save(state_dir)
# Plan action.
action = self._action(state)
# Save action.
if self._logging_dir is not None:
action_dir = os.path.join(self._policy_dir, "action")
action.save(action_dir)
return action
@abstractmethod
def _action(self, state):
"""Returns an action for a given state.
"""
pass
def show(self, filename=None, dpi=100):
"""Show a figure.
Parameters
----------
filename : str
File to save figure to.
dpi : int
Dpi of figure.
"""
if self._logging_dir is None:
vis.show()
else:
filename = os.path.join(self._policy_dir, filename)
vis.savefig(filename, dpi=dpi)
class UniformRandomGraspingPolicy(GraspingPolicy):
"""Returns a grasp uniformly at random."""
def __init__(self, config):
"""
Parameters
----------
config : dict
Python dictionary of policy configuration parameters.
filters : dict
Python dictionary of functions to apply to filter invalid grasps.
"""
GraspingPolicy.__init__(self, config)
self._num_grasp_samples = 1
self._grasp_center_std = 0.0
if "grasp_center_std" in config:
self._grasp_center_std = config["grasp_center_std"]
def _action(self, state):
"""Plans the grasp with the highest probability of success on
the given RGB-D image.
Attributes
----------
state : :obj:`RgbdImageState`
Image to plan grasps on.
Returns
-------
:obj:`GraspAction`
Grasp to execute.
"""
# Check valid input.
if not isinstance(state, RgbdImageState):
raise ValueError("Must provide an RGB-D image state.")
# Parse state.
rgbd_im = state.rgbd_im
camera_intr = state.camera_intr
segmask = state.segmask
# Sample grasps.
grasps = self._grasp_sampler.sample(
rgbd_im,
camera_intr,
self._num_grasp_samples,
segmask=segmask,
visualize=self.config["vis"]["grasp_sampling"],
constraint_fn=self._grasp_constraint_fn,
seed=None)
num_grasps = len(grasps)
if num_grasps == 0:
self._logger.warning("No valid grasps could be found")
raise NoValidGraspsException()
# Set grasp.
grasp = grasps[0]
# Perturb grasp.
if self._grasp_center_std > 0.0:
grasp_center_rv = ss.multivariate_normal(
grasp.center.data, cov=self._grasp_center_std**2)
grasp.center.data = grasp_center_rv.rvs(size=1)[0]
# Form tensors.
return GraspAction(grasp, 0.0, state.rgbd_im.depth)
class RobustGraspingPolicy(GraspingPolicy):
"""Samples a set of grasp candidates in image space,
ranks the grasps by the predicted probability of success from a GQ-CNN
and returns the grasp with the highest probability of success."""
def __init__(self, config, filters=None):
"""
Parameters
----------
config : dict
Python dictionary of policy configuration parameters.
filters : dict
Python dictionary of functions to apply to filter invalid grasps.
Notes
-----
Required configuration dictionary parameters are specified in
Other Parameters.
Other Parameters
----------------
num_grasp_samples : int
Number of grasps to sample.
gripper_width : float, optional
Width of the gripper in meters.
logging_dir : str, optional
Directory in which to save the sampled grasps and input images.
"""
GraspingPolicy.__init__(self, config)
self._parse_config()
self._filters = filters
def _parse_config(self):
"""Parses the parameters of the policy."""
self._num_grasp_samples = self.config["sampling"]["num_grasp_samples"]
self._max_grasps_filter = 1
if "max_grasps_filter" in self.config:
self._max_grasps_filter = self.config["max_grasps_filter"]
self._gripper_width = np.inf
if "gripper_width" in self.config:
self._gripper_width = self.config["gripper_width"]
def select(self, grasps, q_value):
"""Selects the grasp with the highest probability of success.
Can override for alternate policies (e.g. epsilon greedy).
Parameters
----------
grasps : list
Python list of :obj:`gqcnn.grasping.Grasp2D` or
:obj:`gqcnn.grasping.SuctionPoint2D` grasps to select from.
q_values : list
Python list of associated q-values.
Returns
-------
:obj:`gqcnn.grasping.Grasp2D` or :obj:`gqcnn.grasping.SuctionPoint2D`
Grasp with highest probability of success .
"""
# Sort grasps.
num_grasps = len(grasps)
grasps_and_predictions = zip(np.arange(num_grasps), q_value)
grasps_and_predictions = sorted(grasps_and_predictions,
key=lambda x: x[1],
reverse=True)
# Return top grasps.
if self._filters is None:
return grasps_and_predictions[0][0]
# Filter grasps.
self._logger.info("Filtering grasps")
i = 0
while i < self._max_grasps_filter and i < len(grasps_and_predictions):
index = grasps_and_predictions[i][0]
grasp = grasps[index]
valid = True
for filter_name, is_valid in self._filters.items():
valid = is_valid(grasp)
self._logger.debug("Grasp {} filter {} valid: {}".format(
i, filter_name, valid))
if not valid:
valid = False
break
if valid:
return index
i += 1
raise NoValidGraspsException("No grasps satisfied filters")
def _action(self, state):
"""Plans the grasp with the highest probability of success on
the given RGB-D image.
Attributes
----------
state : :obj:`RgbdImageState`
Image to plan grasps on.
Returns
-------
:obj:`GraspAction`
Grasp to execute.
"""
# Check valid input.
if not isinstance(state, RgbdImageState):
raise ValueError("Must provide an RGB-D image state.")
# Parse state.
rgbd_im = state.rgbd_im
camera_intr = state.camera_intr
segmask = state.segmask
# Sample grasps.
grasps = self._grasp_sampler.sample(
rgbd_im,
camera_intr,
self._num_grasp_samples,
segmask=segmask,
visualize=self.config["vis"]["grasp_sampling"],
constraint_fn=self._grasp_constraint_fn,
seed=None)
num_grasps = len(grasps)
if num_grasps == 0:
self._logger.warning("No valid grasps could be found")
raise NoValidGraspsException()
# Compute grasp quality.
compute_start = time()
q_values = self._grasp_quality_fn(state, grasps, params=self._config)
self._logger.debug("Grasp evaluation took %.3f sec" %
(time() - compute_start))
if self.config["vis"]["grasp_candidates"]:
# Display each grasp on the original image, colored by predicted
# success.
norm_q_values = (q_values - np.min(q_values)) / (np.max(q_values) -
np.min(q_values))
vis.figure(size=(GeneralConstants.FIGSIZE,
GeneralConstants.FIGSIZE))
vis.imshow(rgbd_im.depth,
vmin=self.config["vis"]["vmin"],
vmax=self.config["vis"]["vmax"])
for grasp, q in zip(grasps, norm_q_values):
vis.grasp(grasp,
scale=1.0,
grasp_center_size=10,
show_center=False,
show_axis=True,
color=plt.cm.RdYlBu(q))
vis.title("Sampled grasps")
filename = None
if self._logging_dir is not None:
filename = os.path.join(self._logging_dir,
"grasp_candidates.png")
vis.show(filename)
# Select grasp.
index = self.select(grasps, q_values)
grasp = grasps[index]
q_value = q_values[index]
if self.config["vis"]["grasp_plan"]:
vis.figure()
vis.imshow(rgbd_im.depth,
vmin=self.config["vis"]["vmin"],
vmax=self.config["vis"]["vmax"])
vis.grasp(grasp, scale=2.0, show_axis=True)
vis.title("Best Grasp: d=%.3f, q=%.3f" % (grasp.depth, q_value))
vis.show()
return GraspAction(grasp, q_value, state.rgbd_im.depth)
class CrossEntropyRobustGraspingPolicy(GraspingPolicy):
"""Optimizes a set of grasp candidates in image space using the
cross entropy method.
Cross entropy method (CEM):
(1) sample an initial set of candidates
(2) sort the candidates
(3) fit a GMM to the top P%
(4) re-sample grasps from the distribution
(5) repeat steps 2-4 for K iters
(6) return the best candidate from the final sample set
"""
def __init__(self, config, filters=None):
"""
Parameters
----------
config : dict
Python dictionary of policy configuration parameters.
filters : dict
Python dictionary of functions to apply to filter invalid grasps.
Notes
-----
Required configuration dictionary parameters are specified in Other
Parameters.
Other Parameters
----------------
num_seed_samples : int
Number of candidate to sample in the initial set.
num_gmm_samples : int
Number of candidates to sample on each resampling from the GMMs.
num_iters : int
Number of sample-and-refit iterations of CEM.
gmm_refit_p : float
Top p-% of grasps used for refitting.
gmm_component_frac : float
Percentage of the elite set size used to determine number of GMM
components.
gmm_reg_covar : float
Regularization parameters for GMM covariance matrix, enforces
diversity of fitted distributions.
deterministic : bool, optional
Whether to set the random seed to enforce deterministic behavior.
gripper_width : float, optional
Width of the gripper in meters.
"""
GraspingPolicy.__init__(self, config)
self._parse_config()
self._filters = filters
self._case_counter = 0
def _parse_config(self):
"""Parses the parameters of the policy."""
# Cross entropy method parameters.
self._num_seed_samples = self.config["num_seed_samples"]
self._num_gmm_samples = self.config["num_gmm_samples"]
self._num_iters = self.config["num_iters"]
self._gmm_refit_p = self.config["gmm_refit_p"]
self._gmm_component_frac = self.config["gmm_component_frac"]
self._gmm_reg_covar = self.config["gmm_reg_covar"]
self._depth_gaussian_sigma = 0.0
if "depth_gaussian_sigma" in self.config:
self._depth_gaussian_sigma = self.config["depth_gaussian_sigma"]
self._max_grasps_filter = 1
if "max_grasps_filter" in self.config:
self._max_grasps_filter = self.config["max_grasps_filter"]
self._max_resamples_per_iteration = 100
if "max_resamples_per_iteration" in self.config:
self._max_resamples_per_iteration = self.config[
"max_resamples_per_iteration"]
self._max_approach_angle = np.inf
if "max_approach_angle" in self.config:
self._max_approach_angle = np.deg2rad(
self.config["max_approach_angle"])
# Gripper parameters.
self._seed = None
if self.config["deterministic"]:
self._seed = GeneralConstants.SEED
self._gripper_width = np.inf
if "gripper_width" in self.config:
self._gripper_width = self.config["gripper_width"]
# Affordance map visualization.
self._vis_grasp_affordance_map = False
if "grasp_affordance_map" in self.config["vis"]:
self._vis_grasp_affordance_map = self.config["vis"][
"grasp_affordance_map"]
self._state_counter = 0 # Used for logging state data.
def select(self, grasps, q_values):
"""Selects the grasp with the highest probability of success.
Can override for alternate policies (e.g. epsilon greedy).
Parameters
----------
grasps : list
Python list of :obj:`gqcnn.grasping.Grasp2D` or
:obj:`gqcnn.grasping.SuctionPoint2D` grasps to select from.
q_values : list
Python list of associated q-values.
Returns
-------
:obj:`gqcnn.grasping.Grasp2D` or :obj:`gqcnn.grasping.SuctionPoint2D`
Grasp with highest probability of success.
"""
# Sort.
self._logger.info("Sorting grasps")
num_grasps = len(grasps)
if num_grasps == 0:
raise NoValidGraspsException("Zero grasps")
grasps_and_predictions = zip(np.arange(num_grasps), q_values)
grasps_and_predictions = sorted(grasps_and_predictions,
key=lambda x: x[1],
reverse=True)
# Return top grasps.
if self._filters is None:
return grasps_and_predictions[0][0]
# Filter grasps.
self._logger.info("Filtering grasps")
i = 0
while i < self._max_grasps_filter and i < len(grasps_and_predictions):
index = grasps_and_predictions[i][0]
grasp = grasps[index]
valid = True
for filter_name, is_valid in self._filters.items():
valid = is_valid(grasp)
self._logger.debug("Grasp {} filter {} valid: {}".format(
i, filter_name, valid))
if not valid:
valid = False
break
if valid:
return index
i += 1
raise NoValidGraspsException("No grasps satisfied filters")
def _mask_predictions(self, pred_map, segmask):
self._logger.info("Masking predictions...")
seg_pred_mismatch_msg = ("Prediction map shape {} does not match shape"
" of segmask {}.")
assert pred_map.shape == segmask.shape, seg_pred_mismatch_msg.format(
pred_map.shape, segmask.shape)
preds_masked = np.zeros_like(pred_map)
nonzero_ind = np.where(segmask > 0)
preds_masked[nonzero_ind] = pred_map[nonzero_ind]
return preds_masked
def _gen_grasp_affordance_map(self, state, stride=1):
self._logger.info("Generating grasp affordance map...")
# Generate grasps at points to evaluate (this is just the interface to
# `GraspQualityFunction`).
crop_candidate_start_time = time()
point_cloud_im = state.camera_intr.deproject_to_image(
state.rgbd_im.depth)
normal_cloud_im = point_cloud_im.normal_cloud_im()
q_vals = []
gqcnn_recep_h_half = self._grasp_quality_fn.gqcnn_recep_height // 2
gqcnn_recep_w_half = self._grasp_quality_fn.gqcnn_recep_width // 2
im_h = state.rgbd_im.height
im_w = state.rgbd_im.width
for i in range(gqcnn_recep_h_half - 1, im_h - gqcnn_recep_h_half,
stride):
grasps = []
for j in range(gqcnn_recep_w_half - 1, im_w - gqcnn_recep_w_half,
stride):
# TODO(vsatish): Find a better way to find policy type.
if self.config["sampling"]["type"] == "suction":
grasps.append(
SuctionPoint2D(Point(np.array([j, i])),
axis=-normal_cloud_im[i, j],
depth=state.rgbd_im.depth[i, j],
camera_intr=state.camera_intr))
else:
raise NotImplementedError(
"Parallel Jaw Grasp Affordance Maps Not Supported!")
q_vals.extend(self._grasp_quality_fn(state, grasps))
self._logger.info(
"Generating crop grasp candidates took {} sec.".format(
time() - crop_candidate_start_time))
# Mask out predictions not in the segmask (we don't really care about
# them).
pred_map = np.array(q_vals).reshape(
(im_h - gqcnn_recep_h_half * 2) // stride + 1,
(im_w - gqcnn_recep_w_half * 2) // stride + 1)
# TODO(vsatish): Don't access the raw data like this!
tf_segmask = state.segmask.crop(im_h - gqcnn_recep_h_half * 2,
im_w - gqcnn_recep_w_half * 2).resize(
1.0 / stride,
interp="nearest")._data.squeeze()
if tf_segmask.shape != pred_map.shape:
new_tf_segmask = np.zeros_like(pred_map)
smaller_i = min(pred_map.shape[0], tf_segmask.shape[0])
smaller_j = min(pred_map.shape[1], tf_segmask.shape[1])
new_tf_segmask[:smaller_i, :smaller_j] = tf_segmask[:smaller_i, :
smaller_j]
tf_segmask = new_tf_segmask
pred_map_masked = self._mask_predictions(pred_map, tf_segmask)
return pred_map_masked
def _plot_grasp_affordance_map(self,
state,
affordance_map,
stride=1,
grasps=None,
q_values=None,
plot_max=True,
title=None,
scale=1.0,
save_fname=None,
save_path=None):
gqcnn_recep_h_half = self._grasp_quality_fn.gqcnn_recep_height // 2
gqcnn_recep_w_half = self._grasp_quality_fn.gqcnn_recep_width // 2
im_h = state.rgbd_im.height
im_w = state.rgbd_im.width
# Plot.
vis.figure()
tf_depth_im = state.rgbd_im.depth.crop(
im_h - gqcnn_recep_h_half * 2,
im_w - gqcnn_recep_w_half * 2).resize(1.0 / stride,
interp="nearest")
vis.imshow(tf_depth_im)
plt.imshow(affordance_map, cmap=plt.cm.RdYlGn, alpha=0.3)
if grasps is not None:
grasps = copy.deepcopy(grasps)
for grasp, q in zip(grasps, q_values):
grasp.center.data[0] -= gqcnn_recep_w_half
grasp.center.data[1] -= gqcnn_recep_h_half
vis.grasp(grasp,
scale=scale,
show_center=False,
show_axis=True,
color=plt.cm.RdYlGn(q))
if plot_max:
affordance_argmax = np.unravel_index(np.argmax(affordance_map),
affordance_map.shape)
plt.scatter(affordance_argmax[1],
affordance_argmax[0],
c="black",
marker=".",
s=scale * 25)
if title is not None:
vis.title(title)
if save_path is not None:
save_path = os.path.join(save_path, save_fname)
vis.show(save_path)
def action_set(self, state):
"""Plan a set of grasps with the highest probability of success on
the given RGB-D image.
Parameters
----------
state : :obj:`RgbdImageState`
Image to plan grasps on.
Returns
-------
python list of :obj:`gqcnn.grasping.Grasp2D` or :obj:`gqcnn.grasping.SuctionPoint2D` # noqa E501
Grasps to execute.
"""
# Check valid input.
if not isinstance(state, RgbdImageState):
raise ValueError("Must provide an RGB-D image state.")
state_output_dir = None
if self._logging_dir is not None:
state_output_dir = os.path.join(
self._logging_dir,
"state_{}".format(str(self._state_counter).zfill(5)))
if not os.path.exists(state_output_dir):
os.makedirs(state_output_dir)
self._state_counter += 1
# Parse state.
seed_set_start = time()
rgbd_im = state.rgbd_im
depth_im = rgbd_im.depth
camera_intr = state.camera_intr
segmask = state.segmask
if self._depth_gaussian_sigma > 0:
depth_im_filtered = depth_im.apply(
snf.gaussian_filter, sigma=self._depth_gaussian_sigma)
else:
depth_im_filtered = depth_im
point_cloud_im = camera_intr.deproject_to_image(depth_im_filtered)
normal_cloud_im = point_cloud_im.normal_cloud_im()
# Visualize grasp affordance map.
if self._vis_grasp_affordance_map:
grasp_affordance_map = self._gen_grasp_affordance_map(state)
self._plot_grasp_affordance_map(state,
grasp_affordance_map,
title="Grasp Affordance Map",
save_fname="affordance_map.png",
save_path=state_output_dir)
if "input_images" in self.config["vis"] and self.config["vis"][
"input_images"]:
vis.figure()
vis.subplot(1, 2, 1)
vis.imshow(depth_im)
vis.title("Depth")
vis.subplot(1, 2, 2)
vis.imshow(segmask)
vis.title("Segmask")
filename = None
if self._logging_dir is not None:
filename = os.path.join(self._logging_dir, "input_images.png")
vis.show(filename)
# Sample grasps.
self._logger.info("Sampling seed set")
grasps = self._grasp_sampler.sample(
rgbd_im,
camera_intr,
self._num_seed_samples,
segmask=segmask,
visualize=self.config["vis"]["grasp_sampling"],
constraint_fn=self._grasp_constraint_fn,
seed=self._seed)
num_grasps = len(grasps)
if num_grasps == 0:
self._logger.warning("No valid grasps could be found")
raise NoValidGraspsException()
grasp_type = "parallel_jaw"
if isinstance(grasps[0], SuctionPoint2D):
grasp_type = "suction"
elif isinstance(grasps[0], MultiSuctionPoint2D):
grasp_type = "multi_suction"
self._logger.info("Sampled %d grasps" % (len(grasps)))
self._logger.info("Computing the seed set took %.3f sec" %
(time() - seed_set_start))
# Iteratively refit and sample.
for j in range(self._num_iters):
self._logger.info("CEM iter %d" % (j))
# Predict grasps.
predict_start = time()
q_values = self._grasp_quality_fn(state,
grasps,
params=self._config)
self._logger.info("Prediction took %.3f sec" %
(time() - predict_start))
# Sort grasps.
resample_start = time()
q_values_and_indices = zip(q_values, np.arange(num_grasps))
q_values_and_indices = sorted(q_values_and_indices,
key=lambda x: x[0],
reverse=True)
if self.config["vis"]["grasp_candidates"]:
# Display each grasp on the original image, colored by
# predicted success.
# norm_q_values = ((q_values - np.min(q_values)) /
# (np.max(q_values) - np.min(q_values)))
norm_q_values = q_values
title = "Sampled Grasps Iter %d" % (j)
if self._vis_grasp_affordance_map:
self._plot_grasp_affordance_map(
state,
grasp_affordance_map,
grasps=grasps,
q_values=norm_q_values,
scale=2.0,
title=title,
save_fname="cem_iter_{}.png".format(j),
save_path=state_output_dir)
display_grasps_and_q_values = zip(grasps, q_values)
display_grasps_and_q_values = sorted(
display_grasps_and_q_values, key=lambda x: x[1])
vis.figure(size=(GeneralConstants.FIGSIZE,
GeneralConstants.FIGSIZE))
vis.imshow(rgbd_im.depth,
vmin=self.config["vis"]["vmin"],
vmax=self.config["vis"]["vmax"])
for grasp, q in display_grasps_and_q_values:
vis.grasp(grasp,
scale=2.0,
jaw_width=2.0,
show_center=False,
show_axis=True,
color=plt.cm.RdYlBu(q))
vis.title("Sampled grasps iter %d" % (j))
filename = None
if self._logging_dir is not None:
filename = os.path.join(self._logging_dir,
"cem_iter_%d.png" % (j))
vis.show(filename)
# Fit elite set.
elite_start = time()
num_refit = max(int(np.ceil(self._gmm_refit_p * num_grasps)), 1)
elite_q_values = [i[0] for i in q_values_and_indices[:num_refit]]
elite_grasp_indices = [
i[1] for i in q_values_and_indices[:num_refit]
]
elite_grasps = [grasps[i] for i in elite_grasp_indices]
elite_grasp_arr = np.array([g.feature_vec for g in elite_grasps])
if self.config["vis"]["elite_grasps"]:
# Display each grasp on the original image, colored by
# predicted success.
norm_q_values = (elite_q_values - np.min(elite_q_values)) / (
np.max(elite_q_values) - np.min(elite_q_values))
vis.figure(size=(GeneralConstants.FIGSIZE,
GeneralConstants.FIGSIZE))
vis.imshow(rgbd_im.depth,
vmin=self.config["vis"]["vmin"],
vmax=self.config["vis"]["vmax"])
for grasp, q in zip(elite_grasps, norm_q_values):
vis.grasp(grasp,
scale=1.5,
show_center=False,
show_axis=True,
color=plt.cm.RdYlBu(q))
vis.title("Elite grasps iter %d" % (j))
filename = None
if self._logging_dir is not None:
filename = os.path.join(self._logging_dir,
"elite_set_iter_%d.png" % (j))
vis.show(filename)
# Normalize elite set.
elite_grasp_mean = np.mean(elite_grasp_arr, axis=0)
elite_grasp_std = np.std(elite_grasp_arr, axis=0)
elite_grasp_std[elite_grasp_std == 0] = 1e-6
elite_grasp_arr = (elite_grasp_arr -
elite_grasp_mean) / elite_grasp_std
self._logger.info("Elite set computation took %.3f sec" %
(time() - elite_start))
# Fit a GMM to the top samples.
num_components = max(
int(np.ceil(self._gmm_component_frac * num_refit)), 1)
uniform_weights = (1.0 / num_components) * np.ones(num_components)
gmm = GaussianMixture(n_components=num_components,
weights_init=uniform_weights,
reg_covar=self._gmm_reg_covar)
train_start = time()
gmm.fit(elite_grasp_arr)
self._logger.info("GMM fitting with %d components took %.3f sec" %
(num_components, time() - train_start))
# Sample the next grasps.
grasps = []
loop_start = time()
num_tries = 0
while (len(grasps) < self._num_gmm_samples
and num_tries < self._max_resamples_per_iteration):
# Sample from GMM.
sample_start = time()
grasp_vecs, _ = gmm.sample(n_samples=self._num_gmm_samples)
grasp_vecs = elite_grasp_std * grasp_vecs + elite_grasp_mean
self._logger.info("GMM sampling took %.3f sec" %
(time() - sample_start))
# Convert features to grasps and store if in segmask.
for k, grasp_vec in enumerate(grasp_vecs):
feature_start = time()
if grasp_type == "parallel_jaw":
# Form grasp object.
grasp = Grasp2D.from_feature_vec(
grasp_vec,
width=self._gripper_width,
camera_intr=camera_intr)
elif grasp_type == "suction":
# Read depth and approach axis.
u = int(min(max(grasp_vec[1], 0), depth_im.height - 1))
v = int(min(max(grasp_vec[0], 0), depth_im.width - 1))
grasp_depth = depth_im[u, v, 0]
# Approach axis.
grasp_axis = -normal_cloud_im[u, v]
# Form grasp object.
grasp = SuctionPoint2D.from_feature_vec(
grasp_vec,
camera_intr=camera_intr,
depth=grasp_depth,
axis=grasp_axis)
elif grasp_type == "multi_suction":
# Read depth and approach axis.
u = int(min(max(grasp_vec[1], 0), depth_im.height - 1))
v = int(min(max(grasp_vec[0], 0), depth_im.width - 1))
grasp_depth = depth_im[u, v]
# Approach_axis.
grasp_axis = -normal_cloud_im[u, v]
# Form grasp object.
grasp = MultiSuctionPoint2D.from_feature_vec(
grasp_vec,
camera_intr=camera_intr,
depth=grasp_depth,
axis=grasp_axis)
self._logger.debug("Feature vec took %.5f sec" %
(time() - feature_start))
bounds_start = time()
# Check in bounds.
if (state.segmask is None or
(grasp.center.y >= 0
and grasp.center.y < state.segmask.height
and grasp.center.x >= 0
and grasp.center.x < state.segmask.width
and np.any(state.segmask[int(grasp.center.y),
int(grasp.center.x)] != 0)
and grasp.approach_angle < self._max_approach_angle)
and (self._grasp_constraint_fn is None
or self._grasp_constraint_fn(grasp))):
# Check validity according to filters.
grasps.append(grasp)
self._logger.debug("Bounds took %.5f sec" %
(time() - bounds_start))
num_tries += 1
# Check num grasps.
num_grasps = len(grasps)
if num_grasps == 0:
self._logger.warning("No valid grasps could be found")
raise NoValidGraspsException()
self._logger.info("Resample loop took %.3f sec" %
(time() - loop_start))
self._logger.info("Resampling took %.3f sec" %
(time() - resample_start))
# Predict final set of grasps.
predict_start = time()
q_values = self._grasp_quality_fn(state, grasps, params=self._config)
self._logger.info("Final prediction took %.3f sec" %
(time() - predict_start))
if self.config["vis"]["grasp_candidates"]:
# Display each grasp on the original image, colored by predicted
# success.
# norm_q_values = ((q_values - np.min(q_values)) /
# (np.max(q_values) - np.min(q_values)))
norm_q_values = q_values
title = "Final Sampled Grasps"
if self._vis_grasp_affordance_map:
self._plot_grasp_affordance_map(
state,
grasp_affordance_map,
grasps=grasps,
q_values=norm_q_values,
scale=2.0,
title=title,
save_fname="final_sampled_grasps.png",
save_path=state_output_dir)
display_grasps_and_q_values = zip(grasps, q_values)
display_grasps_and_q_values = sorted(display_grasps_and_q_values,
key=lambda x: x[1])
vis.figure(size=(GeneralConstants.FIGSIZE,
GeneralConstants.FIGSIZE))
vis.imshow(rgbd_im.depth,
vmin=self.config["vis"]["vmin"],
vmax=self.config["vis"]["vmax"])
for grasp, q in display_grasps_and_q_values:
vis.grasp(grasp,
scale=2.0,
jaw_width=2.0,
show_center=False,
show_axis=True,
color=plt.cm.RdYlBu(q))
vis.title("Sampled grasps iter %d" % (j))
filename = None
if self._logging_dir is not None:
filename = os.path.join(self._logging_dir,
"cem_iter_%d.png" % (j))
vis.show(filename)
return grasps, q_values
def _action(self, state):
"""Plans the grasp with the highest probability of success on
the given RGB-D image.
Attributes
----------
state : :obj:`RgbdImageState`
Image to plan grasps on.
Returns
-------
:obj:`GraspAction`
Grasp to execute.
"""
# Parse state.
rgbd_im = state.rgbd_im
depth_im = rgbd_im.depth
# camera_intr = state.camera_intr
# segmask = state.segmask
# Plan grasps.
grasps, q_values = self.action_set(state)
# Select grasp.
index = self.select(grasps, q_values)
grasp = grasps[index]
q_value = q_values[index]
if self.config["vis"]["grasp_plan"]:
title = "Best Grasp: d=%.3f, q=%.3f" % (grasp.depth, q_value)
vis.figure()
vis.imshow(depth_im,
vmin=self.config["vis"]["vmin"],
vmax=self.config["vis"]["vmax"])
vis.grasp(grasp,
scale=5.0,
show_center=False,
show_axis=True,
jaw_width=1.0,
grasp_axis_width=0.2)
vis.title(title)
filename = None
if self._logging_dir is not None:
filename = os.path.join(self._logging_dir, "planned_grasp.png")
vis.show(filename)
# Form return image.
image = depth_im
if isinstance(self._grasp_quality_fn, GQCnnQualityFunction):
image_arr, _ = self._grasp_quality_fn.grasps_to_tensors([grasp],
state)
image = DepthImage(image_arr[0, ...], frame=rgbd_im.frame)
# Return action.
action = GraspAction(grasp, q_value, image)
return action
class QFunctionRobustGraspingPolicy(CrossEntropyRobustGraspingPolicy):
"""Optimizes a set of antipodal grasp candidates in image space using the
cross entropy method with a GQ-CNN that estimates the Q-function
for use in Q-learning.
Notes
-----
Required configuration parameters are specified in Other Parameters.
Other Parameters
----------------
reinit_pc1 : bool
Whether or not to reinitialize the pc1 layer of the GQ-CNN.
reinit_fc3: bool
Whether or not to reinitialize the fc3 layer of the GQ-CNN.
reinit_fc4: bool
Whether or not to reinitialize the fc4 layer of the GQ-CNN.
reinit_fc5: bool
Whether or not to reinitialize the fc5 layer of the GQ-CNN.
num_seed_samples : int
Number of candidate to sample in the initial set.
num_gmm_samples : int
Number of candidates to sample on each resampling from the GMMs.
num_iters : int
Number of sample-and-refit iterations of CEM.
gmm_refit_p : float
Top p-% of grasps used for refitting.
gmm_component_frac : float
Percentage of the elite set size used to determine number of GMM
components.
gmm_reg_covar : float
Regularization parameters for GMM covariance matrix, enforces diversity
of fitted distributions.
deterministic : bool, optional
Whether to set the random seed to enforce deterministic behavior.
gripper_width : float, optional
Width of the gripper in meters.
"""
def __init__(self, config):
CrossEntropyRobustGraspingPolicy.__init__(self, config)
QFunctionRobustGraspingPolicy._parse_config(self)
self._setup_gqcnn()
def _parse_config(self):
"""Parses the parameters of the policy."""
self._reinit_pc1 = self.config["reinit_pc1"]
self._reinit_fc3 = self.config["reinit_fc3"]
self._reinit_fc4 = self.config["reinit_fc4"]
self._reinit_fc5 = self.config["reinit_fc5"]
def _setup_gqcnn(self):
"""Sets up the GQ-CNN."""
# Close existing session (from superclass initializer).
self.gqcnn.close_session()
# Check valid output size.
if self.gqcnn.fc5_out_size != 1 and not self._reinit_fc5:
raise ValueError("Q function must return scalar values")
# Reinitialize layers
if self._reinit_fc5:
self.gqcnn.fc5_out_size = 1
# TODO(jmahler): Implement reinitialization of pc0.
self.gqcnn.reinitialize_layers(self._reinit_fc3, self._reinit_fc4,
self._reinit_fc5)
self.gqcnn.initialize_network(add_softmax=False)
class EpsilonGreedyQFunctionRobustGraspingPolicy(QFunctionRobustGraspingPolicy
):
"""Optimizes a set of antipodal grasp candidates in image space
using the cross entropy method with a GQ-CNN that estimates the
Q-function for use in Q-learning, and chooses a random antipodal
grasp with probability epsilon.
Notes
-----
Required configuration parameters are specified in Other Parameters.
Other Parameters
----------------
epsilon : float
"""
def __init__(self, config):
QFunctionRobustGraspingPolicy.__init__(self, config)
self._parse_config()
def _parse_config(self):
"""Parses the parameters of the policy."""
self._epsilon = self.config["epsilon"]
@property
def epsilon(self):
return self._epsilon
@epsilon.setter
def epsilon(self, val):
self._epsilon = val
def greedy_action(self, state):
"""Plans the grasp with the highest probability of success on
the given RGB-D image.
Attributes
----------
state : :obj:`RgbdImageState`
Image to plan grasps on.
Returns
-------
:obj:`GraspAction`
Grasp to execute.
"""
return CrossEntropyRobustGraspingPolicy.action(self, state)
def _action(self, state):
"""Plans the grasp with the highest probability of success on
the given RGB-D image.
Attributes
----------
state : :obj:`RgbdImageState`
Image to plan grasps on.
Returns
-------
:obj:`GraspAction`
Grasp to execute.
"""
# Take the greedy action with prob 1-epsilon.
if np.random.rand() > self.epsilon:
self._logger.debug("Taking greedy action")
return CrossEntropyRobustGraspingPolicy.action(self, state)
# Otherwise take a random action.
self._logger.debug("Taking random action")
# Check valid input.
if not isinstance(state, RgbdImageState):
raise ValueError("Must provide an RGB-D image state.")
# Parse state.
rgbd_im = state.rgbd_im
camera_intr = state.camera_intr
segmask = state.segmask
# Sample random antipodal grasps.
grasps = self._grasp_sampler.sample(
rgbd_im,
camera_intr,
self._num_seed_samples,
segmask=segmask,
visualize=self.config["vis"]["grasp_sampling"],
constraint_fn=self._grasp_constraint_fn,
seed=self._seed)
num_grasps = len(grasps)
if num_grasps == 0:
self._logger.warning("No valid grasps could be found")
raise NoValidGraspsException()
# Choose a grasp uniformly at random.
grasp_ind = np.random.choice(num_grasps, size=1)[0]
grasp = grasps[grasp_ind]
depth = grasp.depth
# Create transformed image.
image_tensor, pose_tensor = self.grasps_to_tensors([grasp], state)
image = DepthImage(image_tensor[0, ...])
# Predict prob success.
output_arr = self.gqcnn.predict(image_tensor, pose_tensor)
q_value = output_arr[0, -1]
# Visualize planned grasp.
if self.config["vis"]["grasp_plan"]:
scale_factor = self.gqcnn.im_width / self._crop_width
scaled_camera_intr = camera_intr.resize(scale_factor)
vis_grasp = Grasp2D(Point(image.center),
0.0,
depth,
width=self._gripper_width,
camera_intr=scaled_camera_intr)
vis.figure()
vis.imshow(image)
vis.grasp(vis_grasp, scale=1.5, show_center=False, show_axis=True)
vis.title("Best Grasp: d=%.3f, q=%.3f" % (depth, q_value))
vis.show()
# Return action.
return GraspAction(grasp, q_value, image)
class CompositeGraspingPolicy(Policy):
"""Grasping policy composed of multiple sub-policies.
Attributes
----------
policies : dict mapping str to `gqcnn.GraspingPolicy`
Key-value dict mapping policy names to grasping policies.
"""
def __init__(self, policies):
self._policies = policies
self._logger = Logger.get_logger(self.__class__.__name__,
log_file=None,
global_log_file=True)
@property
def policies(self):
return self._policies
def subpolicy(self, name):
return self._policies[name]
def set_constraint_fn(self, constraint_fn):
for policy in self._policies:
policy.set_constraint_fn(constraint_fn)
class PriorityCompositeGraspingPolicy(CompositeGraspingPolicy):
def __init__(self, policies, priority_list):
# Check validity.
for name in priority_list:
if str(name) not in policies:
raise ValueError(
"Policy named %s is not in the list of policies!" % (name))
self._priority_list = priority_list
CompositeGraspingPolicy.__init__(self, policies)
@property
def priority_list(self):
return self._priority_list
def action(self, state, policy_subset=None, min_q_value=-1.0):
"""Returns an action for a given state.
"""
action = None
i = 0
max_q = min_q_value
while action is None or (max_q <= min_q_value
and i < len(self._priority_list)):
name = self._priority_list[i]
if policy_subset is not None and name not in policy_subset:
i += 1
continue
self._logger.info("Planning action for sub-policy {}".format(name))
try:
action = self.policies[name].action(state)
action.policy_name = name
max_q = action.q_value
except NoValidGraspsException:
pass
i += 1
if action is None:
raise NoValidGraspsException()
return action
def action_set(self, state, policy_subset=None, min_q_value=-1.0):
"""Returns an action for a given state.
"""
actions = None
q_values = None
i = 0
max_q = min_q_value
while actions is None or (max_q <= min_q_value
and i < len(self._priority_list)):
name = self._priority_list[i]
if policy_subset is not None and name not in policy_subset:
i += 1
continue
self._logger.info(
"Planning action set for sub-policy {}".format(name))
try:
actions, q_values = self.policies[name].action_set(state)
for action in actions:
action.policy_name = name
max_q = np.max(q_values)
except NoValidGraspsException:
pass
i += 1
if actions is None:
raise NoValidGraspsException()
return actions, q_values
class GreedyCompositeGraspingPolicy(CompositeGraspingPolicy):
def __init__(self, policies):
CompositeGraspingPolicy.__init__(self, policies)
def action(self, state, policy_subset=None, min_q_value=-1.0):
"""Returns an action for a given state."""
# Compute all possible actions.
actions = []
for name, policy in self.policies.items():
if policy_subset is not None and name not in policy_subset:
continue
try:
action = policy.action(state)
action.policy_name = name
actions.append()
except NoValidGraspsException:
pass
if len(actions) == 0:
raise NoValidGraspsException()
# Rank based on q value.
actions = sorted(actions, key=lambda x: x.q_value, reverse=True)
return actions[0]
def action_set(self, state, policy_subset=None, min_q_value=-1.0):
"""Returns an action for a given state."""
actions = []
q_values = []
for name, policy in self.policies.items():
if policy_subset is not None and name not in policy_subset:
continue
try:
action_set, q_vals = self.policies[name].action_set(state)
for action in action_set:
action.policy_name = name
actions.extend(action_set)
q_values.extend(q_vals)
except NoValidGraspsException:
continue
if actions is None:
raise NoValidGraspsException()
return actions, q_values
================================================
FILE: gqcnn/model/__init__.py
================================================
# -*- coding: utf-8 -*-
"""
Copyright ©2017. The Regents of the University of California (Regents).
All Rights Reserved. Permission to use, copy, modify, and distribute this
software and its documentation for educational, research, and not-for-profit
purposes, without fee and without a signed licensing agreement, is hereby
granted, provided that the above copyright notice, this paragraph and the
following two paragraphs appear in all copies, modifications, and
distributions. Contact The Office of Technology Licensing, UC Berkeley, 2150
Shattuck Avenue, Suite 510, Berkeley, CA 94720-1620, (510) 643-7201,
otl@berkeley.edu,
http://ipira.berkeley.edu/industry-info for commercial licensing opportunities.
IN NO EVENT SHALL REGENTS BE LIABLE TO ANY PARTY FOR DIRECT, INDIRECT, SPECIAL,
INCIDENTAL, OR CONSEQUENTIAL DAMAGES, INCLUDING LOST PROFITS, ARISING OUT OF
THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN IF REGENTS HAS BEEN
ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
REGENTS SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
PURPOSE. THE SOFTWARE AND ACCOMPANYING DOCUMENTATION, IF ANY, PROVIDED
HEREUNDER IS PROVIDED "AS IS". REGENTS HAS NO OBLIGATION TO PROVIDE
MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
Factory functions to obtain `GQCNN`/`FCGQCNN` class based on backend.
Author: Vishal Satish
"""
from .tf import GQCNNTF, FCGQCNNTF
from autolab_core import Logger
def get_gqcnn_model(backend="tf", verbose=True):
"""Get the GQ-CNN model for the provided backend.
Note:
Currently only TensorFlow is supported.
Parameters
----------
backend : str
The backend to use, currently only "tf" is supported.
verbose : bool
Whether or not to log initialization output to `stdout`.
Returns
-------
:obj:`gqcnn.model.tf.GQCNNTF`
GQ-CNN model with TensorFlow backend.
"""
# Set up logger.
logger = Logger.get_logger("GQCNNModelFactory", silence=(not verbose))
# Return desired GQ-CNN instance based on backend.
if backend == "tf":
logger.info("Initializing GQ-CNN with Tensorflow as backend...")
return GQCNNTF
else:
raise ValueError("Invalid backend: {}".format(backend))
def get_fc_gqcnn_model(backend="tf", verbose=True):
"""Get the FC-GQ-CNN model for the provided backend.
Note:
Currently only TensorFlow is supported.
Parameters
----------
backend : str
The backend to use, currently only "tf" is supported.
verbose : bool
Whether or not to log initialization output to `stdout`.
Returns
-------
:obj:`gqcnn.model.tf.FCGQCNNTF`
FC-GQ-CNN model with TensorFlow backend.
"""
# Set up logger.
logger = Logger.get_logger("FCGQCNNModelFactory", silence=(not verbose))
# Return desired Fully-Convolutional GQ-CNN instance based on backend.
if backend == "tf":
logger.info("Initializing FC-GQ-CNN with Tensorflow as backend...")
return FCGQCNNTF
else:
raise ValueError("Invalid backend: {}".format(backend))
================================================
FILE: gqcnn/model/tf/__init__.py
================================================
# -*- coding: utf-8 -*-
"""
Copyright ©2017. The Regents of the University of California (Regents).
All Rights Reserved. Permission to use, copy, modify, and distribute this
software and its documentation for educational, research, and not-for-profit
purposes, without fee and without a signed licensing agreement, is hereby
granted, provided that the above copyright notice, this paragraph and the
following two paragraphs appear in all copies, modifications, and
distributions. Contact The Office of Technology Licensing, UC Berkeley, 2150
Shattuck Avenue, Suite 510, Berkeley, CA 94720-1620, (510) 643-7201,
otl@berkeley.edu,
http://ipira.berkeley.edu/industry-info for commercial licensing opportunities.
IN NO EVENT SHALL REGENTS BE LIABLE TO ANY PARTY FOR DIRECT, INDIRECT, SPECIAL,
INCIDENTAL, OR CONSEQUENTIAL DAMAGES, INCLUDING LOST PROFITS, ARISING OUT OF
THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN IF REGENTS HAS BEEN
ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
REGENTS SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
PURPOSE. THE SOFTWARE AND ACCOMPANYING DOCUMENTATION, IF ANY, PROVIDED
HEREUNDER IS PROVIDED "AS IS". REGENTS HAS NO OBLIGATION TO PROVIDE
MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
"""
from .network_tf import GQCNNTF
from .fc_network_tf import FCGQCNNTF
__all__ = ["GQCNNTF", "FCGQCNNTF"]
================================================
FILE: gqcnn/model/tf/fc_network_tf.py
================================================
# -*- coding: utf-8 -*-
"""
Copyright ©2017. The Regents of the University of California (Regents).
All Rights Reserved. Permission to use, copy, modify, and distribute this
software and its documentation for educational, research, and not-for-profit
purposes, without fee and without a signed licensing agreement, is hereby
granted, provided that the above copyright notice, this paragraph and the
following two paragraphs appear in all copies, modifications, and
distributions. Contact The Office of Technology Licensing, UC Berkeley, 2150
Shattuck Avenue, Suite 510, Berkeley, CA 94720-1620, (510) 643-7201,
otl@berkeley.edu,
http://ipira.berkeley.edu/industry-info for commercial licensing opportunities.
IN NO EVENT SHALL REGENTS BE LIABLE TO ANY PARTY FOR DIRECT, INDIRECT, SPECIAL,
INCIDENTAL, OR CONSEQUENTIAL DAMAGES, INCLUDING LOST PROFITS, ARISING OUT OF
THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN IF REGENTS HAS BEEN
ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
REGENTS SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
PURPOSE. THE SOFTWARE AND ACCOMPANYING DOCUMENTATION, IF ANY, PROVIDED
HEREUNDER IS PROVIDED "AS IS". REGENTS HAS NO OBLIGATION TO PROVIDE
MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
FC-GQ-CNN network implemented in Tensorflow.
Author
------
Vishal Satish
"""
from collections import OrderedDict
import json
import os
import tensorflow as tf
from .network_tf import GQCNNTF
from ...utils import TrainingMode, InputDepthMode, GQCNNFilenames
class FCGQCNNTF(GQCNNTF):
"""FC-GQ-CNN network implemented in Tensorflow.
Note
----
FC-GQ-CNNs are never directly trained, but instead a pre-trained GQ-CNN
is converted to an FC-GQ-CNN at inference time.
"""
def __init__(self, gqcnn_config, fc_config, verbose=True, log_file=None):
"""
Parameters
----------
gqcnn_config : dict
Python dictionary of pre-trained GQ-CNN model configuration
parameters.
fc_config : dict
Python dictionary of FC-GQ-CNN model configuration parameters.
verbose : bool
Whether or not to log model output to `stdout`.
log_file : str
If provided, model output will also be logged to this file.
"""
super(FCGQCNNTF, self).__init__(gqcnn_config, log_file=log_file)
super(FCGQCNNTF, self)._parse_config(gqcnn_config)
# We call the child `_parse_config` again (even though it gets called
# in the parent `GQCNNTF` constructor) because the call to the parent
# `_parse_config` overwrites the first call.
self._parse_config(fc_config)
# Check that conv layers of GQ-CNN were trained with VALID padding.
for layer_name, layer_config in self._architecture["im_stream"].items(
):
if layer_config["type"] == "conv":
invalid_pad_msg = ("GQ-CNN used for FC-GQ-CNN must have VALID"
" padding for conv layers. Found layer: {}"
" with padding: {}")
assert layer_config["pad"] == "VALID", invalid_pad_msg.format(
layer_name, layer_config["pad"])
@staticmethod
def load(model_dir, fc_config, log_file=None):
"""Load an FC-GQ-CNN from a pre-trained GQ-CNN.
Parameters
----------
model_dir : str
Path to pre-trained GQ-CNN model.
fc_config : dict
Python dictionary of FC-GQ-CNN model configuration parameters.
log_file : str
If provided, model output will also be logged to this file.
Returns
-------
:obj:`FCGQCNNTF`
Initialized FC-GQ-CNN.
"""
config_file = os.path.join(model_dir, GQCNNFilenames.SAVED_CFG)
with open(config_file) as data_file:
train_config = json.load(data_file, object_pairs_hook=OrderedDict)
gqcnn_config = train_config["gqcnn"]
# Initialize weights and Tensorflow network.
fcgqcnn = FCGQCNNTF(gqcnn_config, fc_config, log_file=log_file)
fcgqcnn.init_weights_file(
os.path.join(model_dir, GQCNNFilenames.FINAL_MODEL))
fcgqcnn.init_mean_and_std(model_dir)
training_mode = train_config["training_mode"]
if training_mode == TrainingMode.CLASSIFICATION:
fcgqcnn.initialize_network(add_softmax=True)
elif training_mode == TrainingMode.REGRESSION:
fcgqcnn.initialize_network()
else:
raise ValueError("Invalid training mode: {}".format(training_mode))
return fcgqcnn
def _parse_config(self, cfg):
# Override GQ-CNN image height and width.
self._im_width = cfg["im_width"]
self._im_height = cfg["im_height"]
def _pack(self, dim_h, dim_w, data, vector=False):
if vector:
# First reshape vector into 3-dimensional tensor.
reshaped = tf.reshape(data, tf.concat([[1, 1], tf.shape(data)], 0))
# Then tile into tensor of shape dim_h x dim_w x `data.dim0`.
packed = tf.tile(reshaped, [dim_h, dim_w, 1])
else:
# First reshape second dimension of tensor into 3-dimensional
# tensor.
reshaped = tf.reshape(
data,
tf.concat([tf.shape(data)[0:1], [1, 1],
tf.shape(data)[1:]], 0))
# Then tile into tensor of shape
# bsize=1 x dim_h x dim_w x `data.dim1`.
packed = tf.tile(reshaped, [1, dim_h, dim_w, 1])
return packed
def _build_fully_conv_layer(self,
input_node,
filter_dim,
fc_name,
final_fc_layer=False):
self._logger.info(
"Converting fc layer {} to fully convolutional...".format(fc_name))
# Create new set of weights by reshaping fully connected layer weights.
fcW = self._weights.weights["{}_weights".format(fc_name)]
convW = tf.Variable(tf.reshape(
fcW,
tf.concat([[filter_dim, filter_dim],
[tf.shape(fcW)[0] // (filter_dim * filter_dim)],
tf.shape(fcW)[1:]], 0)),
name="{}_fully_conv_weights".format(fc_name))
self._weights.weights["{}_fully_conv_weights".format(fc_name)] = convW
# Get bias.
convb = self._weights.weights["{}_bias".format(fc_name)]
# Compute conv out (note that we use padding="VALID" here because we
# want an output size of 1 x 1 x num_filts for the original input
# size).
convh = tf.nn.conv2d(input_node,
convW,
strides=[1, 1, 1, 1],
padding="VALID")
# Pack bias into tensor of shape `tf.shape(convh)`.
# TODO(vsatish): This is unnecessary and can be removed for potential
# performance improvements.
bias_packed = self._pack(tf.shape(convh)[1],
tf.shape(convh)[2],
convb,
vector=True)
# Add bias term.
convh = convh + bias_packed
# Apply activation.
if not final_fc_layer:
convh = self._leaky_relu(convh, alpha=self._relu_coeff)
# Add output to feature_dict.
self._feature_tensors[fc_name] = convh
return convh
def _build_fully_conv_merge_layer(self, input_node_im, input_node_pose,
filter_dim, fc_name):
self._logger.info(
"Converting fc merge layer {} to fully convolutional...".format(
fc_name))
# Create new set of weights for image stream by reshaping
# fully-connected layer weights.
fcW_im = self._weights.weights["{}_input_1_weights".format(fc_name)]
convW = tf.Variable(tf.reshape(
fcW_im,
tf.concat([[filter_dim, filter_dim],
[tf.shape(fcW_im)[0] // (filter_dim * filter_dim)],
tf.shape(fcW_im)[1:]], 0)),
name="{}_im_fully_conv_weights".format(fc_name))
self._weights.weights["{}_im_fully_conv_weights".format(
fc_name)] = convW
# Compute im stream conv out (note that we use padding="VALID" here
# because we want an output size of 1 x 1 x num_filts for the original
# input size).
convh_im = tf.nn.conv2d(input_node_im,
convW,
strides=[1, 1, 1, 1],
padding="VALID")
# Get pose stream fully-connected weights.
fcW_pose = self._weights.weights["{}_input_2_weights".format(fc_name)]
# Compute matmul for pose stream.
pose_out = tf.matmul(input_node_pose, fcW_pose)
# Pack pose_out into a tensor of shape=tf.shape(convh_im).
# TODO(vsatish): This is unnecessary and can be removed for potential
# performance improvements.
pose_packed = self._pack(
tf.shape(convh_im)[1],
tf.shape(convh_im)[2], pose_out)
# Add the im and pose tensors.
convh = convh_im + pose_packed
# Pack bias.
# TODO(vsatish): This is unnecessary and can be removed for potential
# performance improvements.
fc_bias = self._weights.weights["{}_bias".format(fc_name)]
bias_packed = self._pack(tf.shape(convh_im)[1],
tf.shape(convh_im)[2],
fc_bias,
vector=True)
# Add bias and apply activation.
convh = self._leaky_relu(convh + bias_packed, alpha=self._relu_coeff)
return convh
def _build_im_stream(self,
input_node,
input_pose_node,
input_height,
input_width,
input_channels,
drop_rate,
layers,
only_stream=False):
self._logger.info("Building Image Stream...")
if self._input_depth_mode == InputDepthMode.SUB:
sub_mean = tf.constant(self._im_depth_sub_mean, dtype=tf.float32)
sub_std = tf.constant(self._im_depth_sub_std, dtype=tf.float32)
sub_im = tf.subtract(
input_node,
tf.tile(
tf.reshape(input_pose_node, tf.constant((-1, 1, 1, 1))),
tf.constant((1, input_height, input_width, 1))))
norm_sub_im = tf.div(tf.subtract(sub_im, sub_mean), sub_std)
input_node = norm_sub_im
output_node = input_node
prev_layer = "start" # Dummy placeholder.
filter_dim = self._train_im_width
last_index = len(layers) - 1
for layer_index, (layer_name,
layer_config) in enumerate(layers.items()):
layer_type = layer_config["type"]
if layer_type == "conv":
if prev_layer == "fc":
raise ValueError("Cannot have conv layer after fc layer!")
output_node, input_height, input_width, input_channels = \
self._build_conv_layer(
output_node,
input_height,
input_width,
input_channels,
layer_config["filt_dim"],
layer_config["filt_dim"],
layer_config["num_filt"],
layer_config["pool_stride"],
layer_config["pool_stride"],
layer_config["pool_size"],
layer_name,
norm=layer_config["norm"],
pad=layer_config["pad"])
prev_layer = layer_type
if layer_config["pad"] == "SAME":
filter_dim //= layer_config["pool_stride"]
else:
filter_dim = ((filter_dim - layer_config["filt_dim"]) //
layer_config["pool_stride"]) + 1
elif layer_type == "fc":
if layer_index == last_index and only_stream:
output_node = self._build_fully_conv_layer(
output_node,
filter_dim,
layer_name,
final_fc_layer=True)
else:
output_node = self._build_fully_conv_layer(
output_node, filter_dim, layer_name)
prev_layer = layer_type
# Because fully-convolutional layers at this point in the
# network have a filter_dim of 1.
filter_dim = 1
elif layer_type == "pc":
raise ValueError(
"Cannot have pose connected layer in image stream!")
elif layer_type == "fc_merge":
raise ValueError("Cannot have merge layer in image stream!")
else:
raise ValueError(
"Unsupported layer type: {}".format(layer_type))
return output_node, -1
def _build_merge_stream(self, input_stream_1, input_stream_2, fan_in_1,
fan_in_2, drop_rate, layers):
self._logger.info("Building Merge Stream...")
# First check if first layer is a merge layer.
if layers[list(layers)[0]]["type"] != "fc_merge":
raise ValueError(
"First layer in merge stream must be of type fc_merge!")
last_index = len(layers) - 1
# Because fully-convolutional layers at this point in the network have
# a filter_dim of 1.
filter_dim = 1
fan_in = -1
output_node = None # Will be overridden.
for layer_index, (layer_name,
layer_config) in enumerate(layers.items()):
layer_type = layer_config["type"]
if layer_type == "conv":
raise ValueError("Cannot have conv layer in merge stream!")
elif layer_type == "fc":
if layer_index == last_index:
output_node = self._build_fully_conv_layer(
output_node,
filter_dim,
layer_name,
final_fc_layer=True)
else:
output_node = self._build_fully_conv_layer(
output_node, filter_dim, layer_name)
elif layer_type == "pc":
raise ValueError(
"Cannot have pose connected layer in merge stream!")
elif layer_type == "fc_merge":
output_node = self._build_fully_conv_merge_layer(
input_stream_1, input_stream_2, filter_dim, layer_name)
else:
raise ValueError(
"Unsupported layer type: {}".format(layer_type))
return output_node, fan_in
================================================
FILE: gqcnn/model/tf/network_tf.py
================================================
# -*- coding: utf-8 -*-
"""
Copyright ©2017. The Regents of the University of California (Regents).
All Rights Reserved. Permission to use, copy, modify, and distribute this
software and its documentation for educational, research, and not-for-profit
purposes, without fee and without a signed licensing agreement, is hereby
granted, provided that the above copyright notice, this paragraph and the
following two paragraphs appear in all copies, modifications, and
distributions. Contact The Office of Technology Licensing, UC Berkeley, 2150
Shattuck Avenue, Suite 510, Berkeley, CA 94720-1620, (510) 643-7201,
otl@berkeley.edu,
http://ipira.berkeley.edu/industry-info for commercial licensing opportunities.
IN NO EVENT SHALL REGENTS BE LIABLE TO ANY PARTY FOR DIRECT, INDIRECT, SPECIAL,
INCIDENTAL, OR CONSEQUENTIAL DAMAGES, INCLUDING LOST PROFITS, ARISING OUT OF
THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN IF REGENTS HAS BEEN
ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
REGENTS SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
PURPOSE. THE SOFTWARE AND ACCOMPANYING DOCUMENTATION, IF ANY, PROVIDED
HEREUNDER IS PROVIDED "AS IS". REGENTS HAS NO OBLIGATION TO PROVIDE
MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
GQ-CNN network implemented in Tensorflow.
Author
------
Vishal Satish & Jeff Mahler
"""
from collections import OrderedDict
import errno
from functools import reduce
import json
import math
import operator
import os
import time
import numpy as np
import tensorflow as tf
import tensorflow.contrib.framework as tcf
from autolab_core import Logger
from ...utils import (reduce_shape, read_pose_data, pose_dim,
weight_name_to_layer_name, GripperMode, TrainingMode,
InputDepthMode, GQCNNFilenames)
class GQCNNWeights(object):
"""Helper struct for storing network weights."""
def __init__(self):
self.weights = {}
class GQCNNTF(object):
"""GQ-CNN network implemented in Tensorflow."""
def __init__(self, gqcnn_config, verbose=True, log_file=None):
"""
Parameters
----------
gqcnn_config : dict
Python dictionary of model configuration parameters.
verbose : bool
Whether or not to log model output to `stdout`.
log_file : str
If provided, model output will also be logged to this file.
"""
self._sess = None
self._graph = tf.Graph()
# Set up logger.
self._logger = Logger.get_logger(self.__class__.__name__,
log_file=log_file,
silence=(not verbose),
global_log_file=verbose)
self._weights = GQCNNWeights()
self._parse_config(gqcnn_config)
@staticmethod
def load(model_dir, verbose=True, log_file=None):
"""Instantiate a trained GQ-CNN for fine-tuning or inference.
Parameters
----------
model_dir : str
Path to trained GQ-CNN model.
verbose : bool
Whether or not to log model output to `stdout`.
log_file : str
If provided, model output will also be logged to this file.
Returns
-------
:obj:`GQCNNTF`
Initialized GQ-CNN.
"""
config_file = os.path.join(model_dir, GQCNNFilenames.SAVED_CFG)
with open(config_file) as data_file:
train_config = json.load(data_file, object_pairs_hook=OrderedDict)
# Support for legacy configs.
try:
gqcnn_config = train_config["gqcnn"]
except KeyError:
gqcnn_config = train_config["gqcnn_config"]
# Convert old networks to new flexible arch format.
gqcnn_config["debug"] = 0
gqcnn_config["seed"] = 0
# Legacy networks had no angular support.
gqcnn_config["num_angular_bins"] = 0
# Legacy networks only supported depth integration through pose
# stream.
gqcnn_config["input_depth_mode"] = InputDepthMode.POSE_STREAM
arch_config = gqcnn_config["architecture"]
if "im_stream" not in arch_config:
new_arch_config = OrderedDict()
new_arch_config["im_stream"] = OrderedDict()
new_arch_config["pose_stream"] = OrderedDict()
new_arch_config["merge_stream"] = OrderedDict()
layer_name = "conv1_1"
new_arch_config["im_stream"][layer_name] = arch_config[
layer_name]
new_arch_config["im_stream"][layer_name]["type"] = "conv"
new_arch_config["im_stream"][layer_name]["pad"] = "SAME"
if "padding" in arch_config[layer_name]:
new_arch_config["im_stream"][layer_name][
"pad"] = arch_config[layer_name]["padding"]
layer_name = "conv1_2"
new_arch_config["im_stream"][layer_name] = arch_config[
layer_name]
new_arch_config["im_stream"][layer_name]["type"] = "conv"
new_arch_config["im_stream"][layer_name]["pad"] = "SAME"
if "padding" in arch_config[layer_name]:
new_arch_config["im_stream"][layer_name][
"pad"] = arch_config[layer_name]["padding"]
layer_name = "conv2_1"
new_arch_config["im_stream"][layer_name] = arch_config[
layer_name]
new_arch_config["im_stream"][layer_name]["type"] = "conv"
new_arch_config["im_stream"][layer_name]["pad"] = "SAME"
if "padding" in arch_config[layer_name]:
new_arch_config["im_stream"][layer_name][
"pad"] = arch_config[layer_name]["padding"]
layer_name = "conv2_2"
new_arch_config["im_stream"][layer_name] = arch_config[
layer_name]
new_arch_config["im_stream"][layer_name]["type"] = "conv"
new_arch_config["im_stream"][layer_name]["pad"] = "SAME"
if "padding" in arch_config[layer_name]:
new_arch_config["im_stream"][layer_name][
"pad"] = arch_config[layer_name]["padding"]
layer_name = "conv3_1"
if layer_name in arch_config:
new_arch_config["im_stream"][layer_name] = arch_config[
layer_name]
new_arch_config["im_stream"][layer_name]["type"] = "conv"
new_arch_config["im_stream"][layer_name]["pad"] = "SAME"
if "padding" in arch_config[layer_name]:
new_arch_config["im_stream"][layer_name][
"pad"] = arch_config[layer_name]["padding"]
layer_name = "conv3_2"
if layer_name in arch_config:
new_arch_config["im_stream"][layer_name] = arch_config[
layer_name]
new_arch_config["im_stream"][layer_name]["type"] = "conv"
new_arch_config["im_stream"][layer_name]["pad"] = "SAME"
if "padding" in arch_config[layer_name]:
new_arch_config["im_stream"][layer_name][
"pad"] = arch_config[layer_name]["padding"]
layer_name = "fc3"
new_arch_config["im_stream"][layer_name] = arch_config[
layer_name]
new_arch_config["im_stream"][layer_name]["type"] = "fc"
layer_name = "pc1"
new_arch_config["pose_stream"][layer_name] = arch_config[
layer_name]
new_arch_config["pose_stream"][layer_name]["type"] = "pc"
layer_name = "pc2"
if layer_name in arch_config:
new_arch_config["pose_stream"][layer_name] = arch_config[
layer_name]
new_arch_config["pose_stream"][layer_name]["type"] = "pc"
layer_name = "fc4"
new_arch_config["merge_stream"][layer_name] = arch_config[
layer_name]
new_arch_config["merge_stream"][layer_name][
"type"] = "fc_merge"
layer_name = "fc5"
new_arch_config["merge_stream"][layer_name] = arch_config[
layer_name]
new_arch_config["merge_stream"][layer_name]["type"] = "fc"
gqcnn_config["architecture"] = new_arch_config
# Initialize weights and Tensorflow network.
gqcnn = GQCNNTF(gqcnn_config, verbose=verbose, log_file=log_file)
gqcnn.init_weights_file(
os.path.join(model_dir, GQCNNFilenames.FINAL_MODEL))
gqcnn.init_mean_and_std(model_dir)
training_mode = train_config["training_mode"]
if training_mode == TrainingMode.CLASSIFICATION:
gqcnn.initialize_network(add_softmax=True)
elif training_mode == TrainingMode.REGRESSION:
gqcnn.initialize_network()
else:
raise ValueError("Invalid training mode: {}".format(training_mode))
return gqcnn
def init_mean_and_std(self, model_dir):
"""Loads the means and stds of a trained GQ-CNN to use for data
normalization during inference.
Parameters
----------
model_dir : str
Path to trained GQ-CNN model where means and standard deviations
are stored.
"""
# Load in means and stds.
if self._input_depth_mode == InputDepthMode.POSE_STREAM:
try:
self._im_mean = np.load(
os.path.join(model_dir, GQCNNFilenames.IM_MEAN))
self._im_std = np.load(
os.path.join(model_dir, GQCNNFilenames.IM_STD))
except IOError as e: # Python 2.6,7/3+ compatibility.
if e.errno == errno.ENOENT: # File not found.
# Support for legacy file naming convention.
self._im_mean = np.load(
os.path.join(model_dir, GQCNNFilenames.LEG_MEAN))
self._im_std = np.load(
os.path.join(model_dir, GQCNNFilenames.LEG_STD))
else:
# Some other IOError.
raise e
self._pose_mean = np.load(
os.path.join(model_dir, GQCNNFilenames.POSE_MEAN))
self._pose_std = np.load(
os.path.join(model_dir, GQCNNFilenames.POSE_STD))
# Read the certain parts of the pose mean/std that we desire.
if len(self._pose_mean.shape
) > 0 and self._pose_mean.shape[0] != self._pose_dim:
# Handle multi-dim storage.
if len(self._pose_mean.shape
) > 1 and self._pose_mean.shape[1] == self._pose_dim:
self._pose_mean = self._pose_mean[0, :]
self._pose_std = self._pose_std[0, :]
else:
self._pose_mean = read_pose_data(self._pose_mean,
self._gripper_mode)
self._pose_std = read_pose_data(self._pose_std,
self._gripper_mode)
elif self._input_depth_mode == InputDepthMode.SUB:
self._im_depth_sub_mean = np.load(
os.path.join(model_dir, GQCNNFilenames.IM_DEPTH_SUB_MEAN))
self._im_depth_sub_std = np.load(
os.path.join(model_dir, GQCNNFilenames.IM_DEPTH_SUB_STD))
elif self._input_depth_mode == InputDepthMode.IM_ONLY:
self._im_mean = np.load(
os.path.join(model_dir, GQCNNFilenames.IM_MEAN))
self._im_std = np.load(
os.path.join(model_dir, GQCNNFilenames.IM_STD))
else:
raise ValueError("Unsupported input depth mode: {}".format(
self._input_depth_mode))
def set_base_network(self, model_dir):
"""Initialize network weights for the base network. Used during
fine-tuning.
Parameters
----------
model_dir : str
Path to pre-trained GQ-CNN model.
"""
# Check architecture.
if "base_model" not in self._architecture:
self._logger.warning(
"Architecuture has no base model. The network has not been"
" modified.")
return False
base_model_config = self._architecture["base_model"]
output_layer = base_model_config["output_layer"]
# Read model.
ckpt_file = os.path.join(model_dir, GQCNNFilenames.FINAL_MODEL)
config_file = os.path.join(model_dir, GQCNNFilenames.SAVED_ARCH)
base_arch = json.load(open(config_file, "r"),
object_pairs_hook=OrderedDict)
# Read base layer names.
self._base_layer_names = []
found_base_layer = False
use_legacy = not ("im_stream" in base_arch)
if use_legacy:
layer_iter = iter(base_arch)
while not found_base_layer:
layer_name = next(layer_iter)
self._base_layer_names.append(layer_name)
if layer_name == output_layer:
found_base_layer = True
else:
stream_iter = iter(base_arch)
while not found_base_layer:
stream_name = next(stream_iter)
stream_arch = base_arch[stream_name]
layer_iter = iter(stream_arch)
stop = False
while not found_base_layer and not stop:
try:
layer_name = next(layer_iter)
self._base_layer_names.append(layer_name)
if layer_name == output_layer:
found_base_layer = True
except StopIteration:
stop = True
with self._graph.as_default():
# Create new tf checkpoint reader.
reader = tf.train.NewCheckpointReader(ckpt_file)
# Create empty weights object.
self._weights = GQCNNWeights()
# Read/generate weight/bias variable names.
ckpt_vars = tcf.list_variables(ckpt_file)
full_var_names = []
short_names = []
for variable, shape in ckpt_vars:
full_var_names.append(variable)
short_names.append(variable.split("/")[-1])
# Load variables.
for full_var_name, short_name in zip(full_var_names, short_names):
# Check valid weights.
layer_name = weight_name_to_layer_name(short_name)
# Add weights.
if layer_name in self._base_layer_names:
self._weights.weights[short_name] = tf.Variable(
reader.get_tensor(full_var_name), name=full_var_name)
def init_weights_file(self, ckpt_file):
"""Load trained GQ-CNN weights.
Parameters
----------
ckpt_file : str
Tensorflow checkpoint file from which to load model weights.
"""
with self._graph.as_default():
# Create new tf checkpoint reader.
reader = tf.train.NewCheckpointReader(ckpt_file)
# Create empty weight object.
self._weights = GQCNNWeights()
# Read/generate weight/bias variable names.
ckpt_vars = tcf.list_variables(ckpt_file)
full_var_names = []
short_names = []
for variable, shape in ckpt_vars:
full_var_names.append(variable)
short_names.append(variable.split("/")[-1])
# Load variables.
for full_var_name, short_name in zip(full_var_names, short_names):
self._weights.weights[short_name] = tf.Variable(
reader.get_tensor(full_var_name), name=full_var_name)
def _parse_config(self, gqcnn_config):
"""Parse configuration file.
Parameters
----------
gqcnn_config : dict
Python dictionary of model configuration parameters.
"""
# Parse GQ-CNN config.
# Load tensor params.
self._batch_size = gqcnn_config["batch_size"]
self._train_im_height = gqcnn_config["im_height"]
self._train_im_width = gqcnn_config["im_width"]
self._im_height = self._train_im_height
self._im_width = self._train_im_width
self._num_channels = gqcnn_config["im_channels"]
try:
self._gripper_mode = gqcnn_config["gripper_mode"]
except KeyError:
# Legacy support.
self._input_data_mode = gqcnn_config["input_data_mode"]
if self._input_data_mode == "tf_image":
self._gripper_mode = GripperMode.LEGACY_PARALLEL_JAW
elif self._input_data_mode == "tf_image_suction":
self._gripper_mode = GripperMode.LEGACY_SUCTION
elif self._input_data_mode == "suction":
self._gripper_mode = GripperMode.SUCTION
elif self._input_data_mode == "multi_suction":
self._gripper_mode = GripperMode.MULTI_SUCTION
elif self._input_data_mode == "parallel_jaw":
self._gripper_mode = GripperMode.PARALLEL_JAW
else:
raise ValueError(
"Legacy input data mode: {} not supported!".format(
self._input_data_mode))
self._logger.warning("Could not read gripper mode. Attempting"
" legacy conversion to: {}".format(
self._gripper_mode))
# Setup gripper pose dimension depending on gripper mode.
self._pose_dim = pose_dim(self._gripper_mode)
# Load architecture.
self._architecture = gqcnn_config["architecture"]
# Get input depth mode.
self._input_depth_mode = InputDepthMode.POSE_STREAM # Legacy support.
if "input_depth_mode" in gqcnn_config:
self._input_depth_mode = gqcnn_config["input_depth_mode"]
# Load network local response normalization layer constants.
self._normalization_radius = gqcnn_config["radius"]
self._normalization_alpha = gqcnn_config["alpha"]
self._normalization_beta = gqcnn_config["beta"]
self._normalization_bias = gqcnn_config["bias"]
# Get ReLU coefficient.
self._relu_coeff = 0.0 # Legacy support.
if "relu_coeff" in gqcnn_config:
self._relu_coeff = gqcnn_config["relu_coeff"]
# Debugging.
self._debug = gqcnn_config["debug"]
self._rand_seed = gqcnn_config["seed"]
# Initialize means and standard deviations to be 0 and 1, respectively.
if self._input_depth_mode == InputDepthMode.POSE_STREAM:
self._im_mean = 0
self._im_std = 1
self._pose_mean = np.zeros(self._pose_dim)
self._pose_std = np.ones(self._pose_dim)
elif self._input_depth_mode == InputDepthMode.SUB:
self._im_depth_sub_mean = 0
self._im_depth_sub_std = 1
elif self._input_depth_mode == InputDepthMode.IM_ONLY:
self._im_mean = 0
self._im_std = 1
# Get number of angular bins.
self._angular_bins = 0 # Legacy support.
if "angular_bins" in gqcnn_config:
self._angular_bins = gqcnn_config["angular_bins"]
# Get max angle.
self._max_angle = np.pi
if "max_angle" in gqcnn_config:
self._max_angle = np.deg2rad(gqcnn_config["max_angle"])
# If using angular bins, make sure output size of final fully connected
# layer is 2x number of angular bins (because of failure/success probs
# for each bin).
if self._angular_bins > 0:
final_out_size = list(
list(self._architecture.values())[-1].values())[-1]["out_size"]
ang_mismatch_msg = ("When predicting angular outputs, output"
" size of final fully connected layer must"
" be 2x number of angular bins.")
assert final_out_size == 2 * self._angular_bins, ang_mismatch_msg
# Intermediate network feature handles.
self._feature_tensors = {}
# Base layer names for fine-tuning.
self._base_layer_names = []
def initialize_network(self,
train_im_node=None,
train_pose_node=None,
add_softmax=False,
add_sigmoid=False):
"""Set up input placeholders and build network.
Parameters
----------
train_im_node : :obj:`tf.placeholder`
Images for training.
train_pose_node : :obj:`tf.placeholder`
Poses for training.
add_softmax : bool
Whether or not to add a softmax layer to output of network.
add_sigmoid : bool
Whether or not to add a sigmoid layer to output of network.
"""
with self._graph.as_default():
# Set TF random seed if debugging.
if self._debug:
tf.set_random_seed(self._rand_seed)
# Setup input placeholders.
if train_im_node is not None:
# Training.
self._input_im_node = tf.placeholder_with_default(
train_im_node, (None, self._im_height, self._im_width,
self._num_channels))
self._input_pose_node = tf.placeholder_with_default(
train_pose_node, (None, self._pose_dim))
else:
# Inference only using GQ-CNN instantiated from `GQCNNTF.load`.
self._input_im_node = tf.placeholder(
tf.float32, (self._batch_size, self._im_height,
self._im_width, self._num_channels))
self._input_pose_node = tf.placeholder(
tf.float32, (self._batch_size, self._pose_dim))
self._input_drop_rate_node = tf.placeholder_with_default(
tf.constant(0.0), ())
# Build network.
self._output_tensor = self._build_network(
self._input_im_node, self._input_pose_node,
self._input_drop_rate_node)
# Add softmax function to output of network (this is optional
# because 1) we might be doing regression or 2) we are training and
# Tensorflow has an optimized cross-entropy loss with the softmax
# already built-in).
if add_softmax:
self.add_softmax_to_output()
# Add sigmoid function to output of network (for weighted
# cross-entropy loss).
if add_sigmoid:
self.add_sigmoid_to_output()
# Create feed tensors for prediction.
self._input_im_arr = np.zeros((self._batch_size, self._im_height,
self._im_width, self._num_channels))
self._input_pose_arr = np.zeros((self._batch_size, self._pose_dim))
def open_session(self):
"""Open Tensorflow session."""
if self._sess is not None:
self._logger.warning("Found already initialized TF Session...")
return self._sess
self._logger.info("Initializing TF Session...")
with self._graph.as_default():
init = tf.global_variables_initializer()
self.tf_config = tf.ConfigProto()
# Allow Tensorflow gpu growth so Tensorflow does not lock-up all
# GPU memory.
self.tf_config.gpu_options.allow_growth = True
self._sess = tf.Session(graph=self._graph, config=self.tf_config)
self._sess.run(init)
return self._sess
def close_session(self):
"""Close Tensorflow session."""
if self._sess is None:
self._logger.warning("No TF Session to close...")
return
self._logger.info("Closing TF Session...")
with self._graph.as_default():
self._sess.close()
self._sess = None
def __del__(self):
"""Destructor that basically just makes sure the Tensorflow session
has been closed."""
if self._sess is not None:
self.close_session()
@property
def input_depth_mode(self):
return self._input_depth_mode
@property
def batch_size(self):
return self._batch_size
@property
def im_height(self):
return self._im_height
@property
def im_width(self):
return self._im_width
@property
def num_channels(self):
return self._num_channels
@property
def pose_dim(self):
return self._pose_dim
@property
def gripper_mode(self):
return self._gripper_mode
@property
def input_im_node(self):
return self._input_im_node
@property
def input_pose_node(self):
return self._input_pose_node
@property
def input_drop_rate_node(self):
return self._input_drop_rate_node
@property
def output(self):
return self._output_tensor
@property
def weights(self):
return self._weights.weights
@property
def tf_graph(self):
return self._graph
@property
def sess(self):
return self._sess
@property
def angular_bins(self):
return self._angular_bins
@property
def max_angle(self):
return self._max_angle
@property
def stride(self):
return reduce(operator.mul, [
layer["pool_stride"]
for layer in self._architecture["im_stream"].values()
if layer["type"] == "conv"
])
@property
def filters(self):
"""Evaluate the filters of the first convolution layer.
Returns
-------
:obj:`numpy.ndarray`
Filters (weights) from first convolution layer of the network.
"""
close_sess = False
if self._sess is None:
close_sess = True
self.open_session()
first_layer_name = list(self._architecture["im_stream"])[0]
try:
filters = self._sess.run(
self._weights.weights["{}_weights".format(first_layer_name)])
except KeyError:
# Legacy support.
filters = self._sess.run(
self._weights.weights["{}W".format(first_layer_name)])
if close_sess:
self.close_session()
return filters
def set_batch_size(self, batch_size):
"""Update the batch size to be used for during inference.
Parameters
----------
batch_size : int
The new batch size.
"""
self._batch_size = batch_size
def set_im_mean(self, im_mean):
"""Update image mean to be used for normalization during inference.
Parameters
----------
im_mean : float
The new image mean.
"""
self._im_mean = im_mean
def get_im_mean(self):
"""Get the current image mean used for normalization during inference.
Returns
-------
: float
The image mean.
"""
return self.im_mean
def set_im_std(self, im_std):
"""Update image standard deviation to be used for normalization during
inference.
Parameters
----------
im_std : float
The new image standard deviation.
"""
self._im_std = im_std
def get_im_std(self):
"""Get the current image standard deviation to be used for
normalization during inference.
Returns
-------
: float
The image standard deviation.
"""
return self.im_std
def set_pose_mean(self, pose_mean):
"""Update pose mean to be used for normalization during inference.
Parameters
----------
pose_mean : :obj:`numpy.ndarray`
The new pose mean.
"""
self._pose_mean = pose_mean
def get_pose_mean(self):
"""Get the current pose mean to be used for normalization during
inference.
Returns
-------
:obj:`numpy.ndarray`
The pose mean.
"""
return self._pose_mean
def set_pose_std(self, pose_std):
"""Update pose standard deviation to be used for normalization during
inference.
Parameters
----------
pose_std : :obj:`numpy.ndarray`
The new pose standard deviation.
"""
self._pose_std = pose_std
def get_pose_std(self):
"""Get the current pose standard deviation to be used for normalization
during inference.
Returns
-------
:obj:`numpy.ndarray`
The pose standard deviation.
"""
return self._pose_std
def set_im_depth_sub_mean(self, im_depth_sub_mean):
"""Update mean of subtracted image and gripper depth to be used for
normalization during inference.
Parameters
----------
im_depth_sub_mean : float
The new mean of subtracted image and gripper depth.
"""
self._im_depth_sub_mean = im_depth_sub_mean
def set_im_depth_sub_std(self, im_depth_sub_std):
"""Update standard deviation of subtracted image and gripper depth to
be used for normalization during inference.
Parameters
----------
im_depth_sub_std : float
The standard deviation of subtracted image and gripper depth.
"""
self._im_depth_sub_std = im_depth_sub_std
def add_softmax_to_output(self):
"""Adds softmax to output of network."""
with tf.name_scope("softmax"):
if self._angular_bins > 0:
self._logger.info("Building Pair-wise Softmax Layer...")
binwise_split_output = tf.split(self._output_tensor,
self._angular_bins,
axis=-1)
binwise_split_output_soft = [
tf.nn.softmax(s) for s in binwise_split_output
]
self._output_tensor = tf.concat(binwise_split_output_soft, -1)
else:
self._logger.info("Building Softmax Layer...")
self._output_tensor = tf.nn.softmax(self._output_tensor)
def add_sigmoid_to_output(self):
"""Adds sigmoid to output of network."""
with tf.name_scope("sigmoid"):
self._logger.info("Building Sigmoid Layer...")
self._output_tensor = tf.nn.sigmoid(self._output_tensor)
def update_batch_size(self, batch_size):
"""Update the inference batch size.
Parameters
----------
batch_size : float
The new batch size.
"""
self._batch_size = batch_size
def _predict(self, image_arr, pose_arr, verbose=False):
"""Query predictions from the network.
Parameters
----------
image_arr : :obj:`numpy.ndarray`
Input images.
pose_arr : :obj:`numpy.ndarray`
Input gripper poses.
verbose : bool
Whether or not to log progress to `stdout`, useful to turn off
during training.
"""
# Get prediction start time.
start_time = time.time()
if verbose:
self._logger.info("Predicting...")
# Setup for prediction.
num_batches = math.ceil(image_arr.shape[0] / self._batch_size)
num_images = image_arr.shape[0]
num_poses = pose_arr.shape[0]
output_arr = None
if num_images != num_poses:
raise ValueError("Must provide same number of images as poses!")
# Predict in batches.
with self._graph.as_default():
if self._sess is None:
raise RuntimeError(
"No TF Session open. Please call open_session() first.")
i = 0
batch_idx = 0
while i < num_images:
if verbose:
self._logger.info("Predicting batch {} of {}...".format(
batch_idx, num_batches))
batch_idx += 1
dim = min(self._batch_size, num_images - i)
cur_ind = i
end_ind = cur_ind + dim
if self._input_depth_mode == InputDepthMode.POSE_STREAM:
self._input_im_arr[:dim,
...] = (image_arr[cur_ind:end_ind, ...]
- self._im_mean) / self._im_std
self._input_pose_arr[:dim, :] = (
pose_arr[cur_ind:end_ind, :] -
self._pose_mean) / self._pose_std
elif self._input_depth_mode == InputDepthMode.SUB:
self._input_im_arr[:dim, ...] = image_arr[cur_ind:end_ind,
...]
self._input_pose_arr[:dim, :] = pose_arr[
cur_ind:end_ind, :]
elif self._input_depth_mode == InputDepthMode.IM_ONLY:
self._input_im_arr[:dim,
...] = (image_arr[cur_ind:end_ind, ...]
- self._im_mean) / self._im_std
gqcnn_output = self._sess.run(
self._output_tensor,
feed_dict={
self._input_im_node: self._input_im_arr,
self._input_pose_node: self._input_pose_arr
})
# Allocate output tensor.
if output_arr is None:
output_arr = np.zeros([num_images] +
list(gqcnn_output.shape[1:]))
output_arr[cur_ind:end_ind, :] = gqcnn_output[:dim, :]
i = end_ind
# Get total prediction time.
pred_time = time.time() - start_time
if verbose:
self._logger.info("Prediction took {} seconds.".format(pred_time))
return output_arr
def predict(self, image_arr, pose_arr, verbose=False):
"""Predict the probability of grasp success given a depth image and
gripper pose.
Parameters
----------
image_arr : :obj:`numpy ndarray`
4D tensor of depth images.
pose_arr : :obj:`numpy ndarray`
Tensor of gripper poses.
verbose : bool
Whether or not to log progress to stdout, useful to turn off during
training.
"""
return self._predict(image_arr, pose_arr, verbose=verbose)
def featurize(self,
image_arr,
pose_arr=None,
feature_layer="conv1_1",
verbose=False):
"""Featurize a set of inputs.
Parameters
----------
image_arr : :obj:`numpy ndarray`
4D tensor of depth images.
pose_arr : :obj:`numpy ndarray`
Optional tensor of gripper poses.
feature_layer : str
The network layer to featurize.
verbose : bool
Whether or not to log progress to `stdout`.
"""
# Get featurization start time.
start_time = time.time()
if verbose:
self._logger.info("Featurizing...")
if feature_layer not in self._feature_tensors:
raise ValueError(
"Feature layer: {} not recognized.".format(feature_layer))
# Setup for featurization.
num_images = image_arr.shape[0]
if pose_arr is not None:
num_poses = pose_arr.shape[0]
if num_images != num_poses:
raise ValueError(
"Must provide same number of images as poses!")
output_arr = None
# Featurize in batches.
with self._graph.as_default():
if self._sess is None:
raise RuntimeError(
"No TF Session open. Please call open_session() first.")
i = 0
while i < num_images:
if verbose:
self._logger.info("Featurizing {} of {}...".format(
i, num_images))
dim = min(self._batch_size, num_images - i)
cur_ind = i
end_ind = cur_ind + dim
self._input_im_arr[:dim, :, :, :] = (
image_arr[cur_ind:end_ind, :, :, :] -
self._im_mean) / self._im_std
if pose_arr is not None:
self._input_pose_arr[:dim, :] = (
pose_arr[cur_ind:end_ind, :] -
self._pose_mean) / self._pose_std
if pose_arr is not None:
gqcnn_output = self._sess.run(
self._feature_tensors[feature_layer],
feed_dict={
self._input_im_node: self._input_im_arr,
self._input_pose_node: self._input_pose_arr
})
else:
gqcnn_output = self._sess.run(
self._feature_tensors[feature_layer],
feed_dict={self._input_im_node: self._input_im_arr})
if output_arr is None:
output_arr = np.zeros([num_images] +
list(gqcnn_output.shape[1:]))
output_arr[cur_ind:end_ind, :] = gqcnn_output[:dim, :]
i = end_ind
if verbose:
self._logger.info(
"Featurization took {} seconds".format(time.time() -
start_time))
# Truncate extraneous values off of end of `output_arr`.
# TODO(vsatish): This isn't needed, right?
output_arr = output_arr[:num_images]
return output_arr
def _leaky_relu(self, x, alpha=.1):
return tf.maximum(alpha * x, x)
def _build_conv_layer(self,
input_node,
input_height,
input_width,
input_channels,
filter_h,
filter_w,
num_filt,
pool_stride_h,
pool_stride_w,
pool_size,
name,
norm=False,
pad="SAME"):
self._logger.info("Building convolutional layer: {}...".format(name))
with tf.name_scope(name):
# Initialize weights.
if "{}_weights".format(name) in self._weights.weights:
convW = self._weights.weights["{}_weights".format(name)]
convb = self._weights.weights["{}_bias".format(name)]
elif "{}W".format(
name) in self._weights.weights: # Legacy support.
self._logger.info(
"Using old format for layer {}.".format(name))
convW = self._weights.weights["{}W".format(name)]
convb = self._weights.weights["{}b".format(name)]
else:
self._logger.info("Reinitializing layer {}.".format(name))
convW_shape = [filter_h, filter_w, input_channels, num_filt]
fan_in = filter_h * filter_w * input_channels
std = np.sqrt(2 / fan_in)
convW = tf.Variable(tf.truncated_normal(convW_shape,
stddev=std),
name="{}_weights".format(name))
convb = tf.Variable(tf.truncated_normal([num_filt],
stddev=std),
name="{}_bias".format(name))
self._weights.weights["{}_weights".format(name)] = convW
self._weights.weights["{}_bias".format(name)] = convb
if pad == "SAME":
out_height = input_height // pool_stride_h
out_width = input_width // pool_stride_w
else:
out_height = math.ceil(
(input_height - filter_h + 1) / pool_stride_h)
out_width = math.ceil(
(input_width - filter_w + 1) / pool_stride_w)
out_channels = num_filt
# Build layer.
convh = tf.nn.conv2d(
input_node, convW, strides=[1, 1, 1, 1], padding=pad) + convb
convh = self._leaky_relu(convh, alpha=self._relu_coeff)
if norm:
convh = tf.nn.local_response_normalization(
convh,
depth_radius=self._normalization_radius,
alpha=self._normalization_alpha,
beta=self._normalization_beta,
bias=self._normalization_bias)
pool = tf.nn.max_pool(convh,
ksize=[1, pool_size, pool_size, 1],
strides=[1, pool_stride_h, pool_stride_w, 1],
padding="SAME")
# Add output to feature dict.
self._feature_tensors[name] = pool
return pool, out_height, out_width, out_channels
def _build_fc_layer(self,
input_node,
fan_in,
out_size,
name,
input_is_multi,
drop_rate,
final_fc_layer=False):
self._logger.info("Building fully connected layer: {}...".format(name))
# Initialize weights.
if "{}_weights".format(name) in self._weights.weights:
fcW = self._weights.weights["{}_weights".format(name)]
fcb = self._weights.weights["{}_bias".format(name)]
elif "{}W".format(name) in self._weights.weights: # Legacy support.
self._logger.info("Using old format for layer {}.".format(name))
fcW = self._weights.weights["{}W".format(name)]
fcb = self._weights.weights["{}b".format(name)]
else:
self._logger.info("Reinitializing layer {}.".format(name))
std = np.sqrt(2 / fan_in)
fcW = tf.Variable(tf.truncated_normal([fan_in, out_size],
stddev=std),
name="{}_weights".format(name))
if final_fc_layer:
fcb = tf.Variable(tf.constant(0.0, shape=[out_size]),
name="{}_bias".format(name))
else:
fcb = tf.Variable(tf.truncated_normal([out_size], stddev=std),
name="{}_bias".format(name))
self._weights.weights["{}_weights".format(name)] = fcW
self._weights.weights["{}_bias".format(name)] = fcb
# Build layer.
if input_is_multi:
reduced_dim1 = reduce_shape(input_node.get_shape())
input_node = tf.reshape(input_node, [-1, reduced_dim1])
if final_fc_layer:
fc = tf.matmul(input_node, fcW) + fcb
else:
fc = self._leaky_relu(tf.matmul(input_node, fcW) + fcb,
alpha=self._relu_coeff)
fc = tf.nn.dropout(fc, 1 - drop_rate)
# Add output to feature dict.
self._feature_tensors[name] = fc
return fc, out_size
# TODO(vsatish): This really doesn't need to it's own layer type...it does
# the same thing as `_build_fc_layer`.
def _build_pc_layer(self, input_node, fan_in, out_size, name):
self._logger.info(
"Building Fully Connected Pose Layer: {}...".format(name))
# Initialize weights.
if "{}_weights".format(name) in self._weights.weights:
pcW = self._weights.weights["{}_weights".format(name)]
pcb = self._weights.weights["{}_bias".format(name)]
elif "{}W".format(name) in self._weights.weights: # Legacy support.
self._logger.info("Using old format for layer {}".format(name))
pcW = self._weights.weights["{}W".format(name)]
pcb = self._weights.weights["{}b".format(name)]
else:
self._logger.info("Reinitializing layer {}".format(name))
std = np.sqrt(2 / fan_in)
pcW = tf.Variable(tf.truncated_normal([fan_in, out_size],
stddev=std),
name="{}_weights".format(name))
pcb = tf.Variable(tf.truncated_normal([out_size], stddev=std),
name="{}_bias".format(name))
self._weights.weights["{}_weights".format(name)] = pcW
self._weights.weights["{}_bias".format(name)] = pcb
# Build layer.
pc = self._leaky_relu(tf.matmul(input_node, pcW) + pcb,
alpha=self._relu_coeff)
# Add output to feature dict.
self._feature_tensors[name] = pc
return pc, out_size
def _build_fc_merge(self, input_fc_node_1, input_fc_node_2, fan_in_1,
fan_in_2, out_size, drop_rate, name):
self._logger.info("Building Merge Layer: {}...".format(name))
# Initialize weights.
if "{}_input_1_weights".format(name) in self._weights.weights:
input1W = self._weights.weights["{}_input_1_weights".format(name)]
input2W = self._weights.weights["{}_input_2_weights".format(name)]
fcb = self._weights.weights["{}_bias".format(name)]
elif "{}W_im".format(name) in self._weights.weights: # Legacy support.
self._logger.info("Using old format for layer {}.".format(name))
input1W = self._weights.weights["{}W_im".format(name)]
input2W = self._weights.weights["{}W_pose".format(name)]
fcb = self._weights.weights["{}b".format(name)]
else:
self._logger.info("Reinitializing layer {}.".format(name))
std = np.sqrt(2 / (fan_in_1 + fan_in_2))
input1W = tf.Variable(tf.truncated_normal([fan_in_1, out_size],
stddev=std),
name="{}_input_1_weights".format(name))
input2W = tf.Variable(tf.truncated_normal([fan_in_2, out_size],
stddev=std),
name="{}_input_2_weights".format(name))
fcb = tf.Variable(tf.truncated_normal([out_size], stddev=std),
name="{}_bias".format(name))
self._weights.weights["{}_input_1_weights".format(name)] = input1W
self._weights.weights["{}_input_2_weights".format(name)] = input2W
self._weights.weights["{}_bias".format(name)] = fcb
# Build layer.
fc = self._leaky_relu(tf.matmul(input_fc_node_1, input1W) +
tf.matmul(input_fc_node_2, input2W) + fcb,
alpha=self._relu_coeff)
fc = tf.nn.dropout(fc, 1 - drop_rate)
# Add output to feature dict.
self._feature_tensors[name] = fc
return fc, out_size
def _build_im_stream(self,
input_node,
input_pose_node,
input_height,
input_width,
input_channels,
drop_rate,
layers,
only_stream=False):
self._logger.info("Building Image Stream...")
if self._input_depth_mode == InputDepthMode.SUB:
sub_mean = tf.constant(self._im_depth_sub_mean, dtype=tf.float32)
sub_std = tf.constant(self._im_depth_sub_std, dtype=tf.float32)
sub_im = tf.subtract(
input_node,
tf.tile(
tf.reshape(input_pose_node, tf.constant((-1, 1, 1, 1))),
tf.constant((1, input_height, input_width, 1))))
norm_sub_im = tf.div(tf.subtract(sub_im, sub_mean), sub_std)
input_node = norm_sub_im
output_node = input_node
prev_layer = "start" # Dummy placeholder.
last_index = len(layers) - 1
for layer_index, (layer_name,
layer_config) in enumerate(layers.items()):
layer_type = layer_config["type"]
if layer_type == "conv":
if prev_layer == "fc":
raise ValueError("Cannot have conv layer after fc layer!")
output_node, input_height, input_width, input_channels = \
self._build_conv_layer(
output_node,
input_height,
input_width,
input_channels,
layer_config["filt_dim"],
layer_config["filt_dim"],
layer_config["num_filt"],
layer_config["pool_stride"],
layer_config["pool_stride"],
layer_config["pool_size"],
layer_name,
norm=layer_config["norm"],
pad=layer_config["pad"])
prev_layer = layer_type
elif layer_type == "fc":
if layer_config["out_size"] == 0:
continue
prev_layer_is_conv = False
if prev_layer == "conv":
prev_layer_is_conv = True
fan_in = input_height * input_width * input_channels
if layer_index == last_index and only_stream:
output_node, fan_in = self._build_fc_layer(
output_node,
fan_in,
layer_config["out_size"],
layer_name,
prev_layer_is_conv,
drop_rate,
final_fc_layer=True)
else:
output_node, fan_in = self._build_fc_layer(
output_node, fan_in, layer_config["out_size"],
layer_name, prev_layer_is_conv, drop_rate)
prev_layer = layer_type
elif layer_type == "pc":
raise ValueError(
"Cannot have pose connected layer in image stream!")
elif layer_type == "fc_merge":
raise ValueError("Cannot have merge layer in image stream!")
else:
raise ValueError(
"Unsupported layer type: {}".format(layer_type))
return output_node, fan_in
def _build_pose_stream(self, input_node, fan_in, layers):
self._logger.info("Building Pose Stream...")
output_node = input_node
for layer_name, layer_config in layers.items():
layer_type = layer_config["type"]
if layer_type == "conv":
raise ValueError("Cannot have conv layer in pose stream")
elif layer_type == "fc":
raise ValueError(
"Cannot have fully connected layer in pose stream")
elif layer_type == "pc":
if layer_config["out_size"] == 0:
continue
output_node, fan_in = self._build_pc_layer(
output_node, fan_in, layer_config["out_size"], layer_name)
elif layer_type == "fc_merge":
raise ValueError("Cannot have merge layer in pose stream")
else:
raise ValueError(
"Unsupported layer type: {}".format(layer_type))
return output_node, fan_in
def _build_merge_stream(self, input_stream_1, input_stream_2, fan_in_1,
fan_in_2, drop_rate, layers):
self._logger.info("Building Merge Stream...")
# First check if first layer is a merge layer.
# TODO(vsatish): Can't we just get the first value because it's
# ordered?
if layers[list(layers)[0]]["type"] != "fc_merge":
raise ValueError(
"First layer in merge stream must be a fc_merge layer!")
last_index = len(layers) - 1
fan_in = -1
output_node = None # Will be overridden.
for layer_index, (layer_name,
layer_config) in enumerate(layers.items()):
layer_type = layer_config["type"]
if layer_type == "conv":
raise ValueError("Cannot have conv layer in merge stream!")
elif layer_type == "fc":
if layer_config["out_size"] == 0:
continue
if layer_index == last_index:
output_node, fan_in = self._build_fc_layer(
output_node,
fan_in,
layer_config["out_size"],
layer_name,
False,
drop_rate,
final_fc_layer=True)
else:
output_node, fan_in = self._build_fc_layer(
output_node, fan_in, layer_config["out_size"],
layer_name, False, drop_rate)
elif layer_type == "pc":
raise ValueError(
"Cannot have pose connected layer in merge stream!")
elif layer_type == "fc_merge":
if layer_config["out_size"] == 0:
continue
output_node, fan_in = self._build_fc_merge(
input_stream_1, input_stream_2, fan_in_1, fan_in_2,
layer_config["out_size"], drop_rate, layer_name)
else:
raise ValueError(
"Unsupported layer type: {}".format(layer_type))
return output_node, fan_in
def _build_network(self, input_im_node, input_pose_node,
input_drop_rate_node):
"""Build GQ-CNN.
Parameters
----------
input_im_node :obj:`tf.placeholder`
Image placeholder.
input_pose_node :obj:`tf.placeholder`
Gripper pose placeholder.
input_drop_rate_node :obj:`tf.placeholder`
Drop rate placeholder.
Returns
-------
:obj:`tf.Tensor`
Tensor output of network.
"""
self._logger.info("Building Network...")
if self._input_depth_mode == InputDepthMode.POSE_STREAM:
missing_stream_msg = ("When using input depth mode"
" 'pose_stream', both pose stream and"
" merge stream must be present!")
assert "pose_stream" in self._architecture and \
"merge_stream" in self._architecture, missing_stream_msg
with tf.name_scope("im_stream"):
output_im_stream, fan_out_im = self._build_im_stream(
input_im_node, input_pose_node, self._im_height,
self._im_width, self._num_channels, input_drop_rate_node,
self._architecture["im_stream"])
with tf.name_scope("pose_stream"):
output_pose_stream, fan_out_pose = self._build_pose_stream(
input_pose_node, self._pose_dim,
self._architecture["pose_stream"])
with tf.name_scope("merge_stream"):
return self._build_merge_stream(
output_im_stream, output_pose_stream, fan_out_im,
fan_out_pose, input_drop_rate_node,
self._architecture["merge_stream"])[0]
elif self._input_depth_mode == InputDepthMode.SUB or \
self._input_depth_mode == InputDepthMode.IM_ONLY:
extraneous_stream_msg = ("When using input depth mode '{}', only"
" im stream is allowed!")
assert not ("pose_stream" in self._architecture or "merge_stream"
in self._architecture), extraneous_stream_msg.format(
self._input_depth_mode)
with tf.name_scope("im_stream"):
return self._build_im_stream(input_im_node,
input_pose_node,
self._im_height,
self._im_width,
self._num_channels,
input_drop_rate_node,
self._architecture["im_stream"],
only_stream=True)[0]
================================================
FILE: gqcnn/search/__init__.py
================================================
# -*- coding: utf-8 -*-
"""
Copyright ©2017. The Regents of the University of California (Regents).
All Rights Reserved. Permission to use, copy, modify, and distribute this
software and its documentation for educational, research, and not-for-profit
purposes, without fee and without a signed licensing agreement, is hereby
granted, provided that the above copyright notice, this paragraph and the
following two paragraphs appear in all copies, modifications, and
distributions. Contact The Office of Technology Licensing, UC Berkeley, 2150
Shattuck Avenue, Suite 510, Berkeley, CA 94720-1620, (510) 643-7201,
otl@berkeley.edu,
http://ipira.berkeley.edu/industry-info for commercial licensing opportunities.
IN NO EVENT SHALL REGENTS BE LIABLE TO ANY PARTY FOR DIRECT, INDIRECT, SPECIAL,
INCIDENTAL, OR CONSEQUENTIAL DAMAGES, INCLUDING LOST PROFITS, ARISING OUT OF
THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN IF REGENTS HAS BEEN
ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
REGENTS SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
PURPOSE. THE SOFTWARE AND ACCOMPANYING DOCUMENTATION, IF ANY, PROVIDED
HEREUNDER IS PROVIDED "AS IS". REGENTS HAS NO OBLIGATION TO PROVIDE
MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
"""
from .search import GQCNNSearch
__all__ = ["GQCNNSearch"]
================================================
FILE: gqcnn/search/enums.py
================================================
# -*- coding: utf-8 -*-
"""
Copyright ©2017. The Regents of the University of California (Regents).
All Rights Reserved. Permission to use, copy, modify, and distribute this
software and its documentation for educational, research, and not-for-profit
purposes, without fee and without a signed licensing agreement, is hereby
granted, provided that the above copyright notice, this paragraph and the
following two paragraphs appear in all copies, modifications, and
distributions. Contact The Office of Technology Licensing, UC Berkeley, 2150
Shattuck Avenue, Suite 510, Berkeley, CA 94720-1620, (510) 643-7201,
otl@berkeley.edu,
http://ipira.berkeley.edu/industry-info for commercial licensing opportunities.
IN NO EVENT SHALL REGENTS BE LIABLE TO ANY PARTY FOR DIRECT, INDIRECT, SPECIAL,
INCIDENTAL, OR CONSEQUENTIAL DAMAGES, INCLUDING LOST PROFITS, ARISING OUT OF
THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN IF REGENTS HAS BEEN
ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
REGENTS SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
PURPOSE. THE SOFTWARE AND ACCOMPANYING DOCUMENTATION, IF ANY, PROVIDED
HEREUNDER IS PROVIDED "AS IS". REGENTS HAS NO OBLIGATION TO PROVIDE
MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
Enums for hyper-parameter search.
Author
------
Vishal Satish
"""
class TrialConstants(object):
TRIAL_CPU_LOAD = 300 # Decrease to get more aggressize CPU utilization.
TRIAL_GPU_LOAD = 33 # Decrease to get more aggressize GPU utilization.
# This really depends on model size (`TRIAL_GPU_LOAD` does too, but it's
# not a hard limit per se). Ideally we would initialize models one-by-one
# and monitor the space left, but because model initialization comes after
# some metric calculation, we set this to be some upper bound based on the
# largest model and do batch initalizations from there.
TRIAL_GPU_MEM = 2000
class SearchConstants(object):
SEARCH_THREAD_SLEEP = 2
MIN_TIME_BETWEEN_SCHEDULE_ATTEMPTS = 20
================================================
FILE: gqcnn/search/resource_manager.py
================================================
# -*- coding: utf-8 -*-
"""
Copyright ©2017. The Regents of the University of California (Regents).
All Rights Reserved. Permission to use, copy, modify, and distribute this
software and its documentation for educational, research, and not-for-profit
purposes, without fee and without a signed licensing agreement, is hereby
granted, provided that the above copyright notice, this paragraph and the
following two paragraphs appear in all copies, modifications, and
distributions. Contact The Office of Technology Licensing, UC Berkeley, 2150
Shattuck Avenue, Suite 510, Berkeley, CA 94720-1620, (510) 643-7201,
otl@berkeley.edu,
http://ipira.berkeley.edu/industry-info for commercial licensing opportunities.
IN NO EVENT SHALL REGENTS BE LIABLE TO ANY PARTY FOR DIRECT, INDIRECT, SPECIAL,
INCIDENTAL, OR CONSEQUENTIAL DAMAGES, INCLUDING LOST PROFITS, ARISING OUT OF
THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN IF REGENTS HAS BEEN
ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
REGENTS SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
PURPOSE. THE SOFTWARE AND ACCOMPANYING DOCUMENTATION, IF ANY, PROVIDED
HEREUNDER IS PROVIDED "AS IS". REGENTS HAS NO OBLIGATION TO PROVIDE
MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
Intelligent resource manager for hyper-parameter search. Queries resources
available and appropriately distributes resources over possible trials to run.
Author
------
Vishal Satish
"""
import math
import random
import time
import GPUtil
import numpy as np
import psutil
from autolab_core import Logger
CPU_LOAD_SAMPLE_INTERVAL = 4.0
# This is a hack because it seems that psutil is returning a lower load than
# htop, which could be because htop takes into account queued tasks.
CPU_LOAD_OFFSET = 50
GPU_STAT_NUM_SAMPLES = 4
GPU_STAT_SAMPLE_INTERVAL = 1.0
class ResourceManager(object):
def __init__(self,
trial_cpu_load,
trial_gpu_load,
trial_gpu_mem,
monitor_cpu=True,
monitor_gpu=True,
cpu_cores=[],
gpu_devices=[]):
self._monitor_cpu = monitor_cpu
self._monitor_gpu = monitor_gpu
# Set up logger.
self._logger = Logger.get_logger(self.__class__.__name__)
if not monitor_cpu:
self._logger.warning(
"Not monitoring cpu resources is not advised.")
if not monitor_gpu:
self._logger.warning(
"Not monitoring gpu resources is not advised.")
self._trial_cpu_load = trial_cpu_load
self._trial_gpu_load = trial_gpu_load
self._trial_gpu_mem = trial_gpu_mem
self._cpu_cores = cpu_cores
if len(self._cpu_cores) == 0:
self._logger.warning(
"No CPU cores specified-proceeding to use all available cores."
)
self._cpu_cores = range(psutil.cpu_count())
self._cpu_count = len(self._cpu_cores)
self._gpu_devices = gpu_devices
if len(self._gpu_devices) == 0:
no_gpus_specified_msg = ("No GPU devices specified-proceeding to"
" use all available devices.")
self._logger.warning(no_gpus_specified_msg)
self._gpu_devices = range(len(GPUtil.getGPUs()))
@property
def cpu_cores(self):
return self._cpu_cores
def _get_cpu_load(self):
self._logger.info("Sampling cpu load...")
cpu_core_loads = psutil.cpu_percent(interval=CPU_LOAD_SAMPLE_INTERVAL,
percpu=True)
total_load = 0
for core in self._cpu_cores:
total_load += cpu_core_loads[core]
return total_load + CPU_LOAD_OFFSET
def _get_gpu_stats(self):
self._logger.info("Sampling gpu memory and load...")
gpu_samples = []
for _ in range(GPU_STAT_NUM_SAMPLES):
gpu_samples.append(GPUtil.getGPUs())
time.sleep(GPU_STAT_SAMPLE_INTERVAL)
num_gpus = len(gpu_samples[0])
sample_loads = np.zeros((num_gpus, GPU_STAT_NUM_SAMPLES))
sample_mems = np.zeros((num_gpus, GPU_STAT_NUM_SAMPLES))
total_mems = np.zeros((num_gpus, ))
for i in range(GPU_STAT_NUM_SAMPLES):
for gpu in gpu_samples[i]:
if gpu.id in self._gpu_devices:
sample_loads[gpu.id, i] = gpu.load * 100
sample_mems[gpu.id, i] = gpu.memoryUsed
else:
# Trick the manager into thinking the GPU is fully utilized
# so it will never be chosen.
sample_loads[gpu.id, i] = 100
sample_mems[gpu.id, i] = gpu.memoryTotal
total_mems[gpu.id] = gpu.memoryTotal
return total_mems.tolist(), np.mean(
sample_loads, axis=1).tolist(), np.mean(sample_mems,
axis=1).tolist()
def _build_gpu_list(self, max_possible_trials_per_device):
gpus_avail = []
for device_id, max_trials in enumerate(max_possible_trials_per_device):
for _ in range(max_trials):
gpus_avail.append(str(device_id))
# This is because we might truncate this list later because of a more
# severe resource bottleneck, in which case we want to evenly
# distribute the load.
random.shuffle(gpus_avail)
return gpus_avail
def num_trials_to_schedule(self, num_pending_trials):
num_trials_to_schedule = num_pending_trials
if self._monitor_cpu: # Check cpu bandwith.
cpu_load = min(self._get_cpu_load(), self._cpu_count * 100)
max_possible_trials_cpu = int(
(self._cpu_count * 100 - cpu_load) // self._trial_cpu_load)
self._logger.info("CPU load: {}%, Max possible trials: {}".format(
cpu_load, max_possible_trials_cpu))
num_trials_to_schedule = min(num_trials_to_schedule,
max_possible_trials_cpu)
if self._monitor_gpu: # Check gpu bandwith.
total_gpu_mems, gpu_loads, gpu_mems = self._get_gpu_stats()
max_possible_trials_gpu_load_per_device = [
int((100 - gpu_load) // self._trial_gpu_load)
for gpu_load in gpu_loads
]
max_possible_trials_gpu_mem_per_device = [
int((total_gpu_mem - gpu_mem) // self._trial_gpu_mem)
for total_gpu_mem, gpu_mem in zip(total_gpu_mems, gpu_mems)
]
max_possible_trials_gpu_per_device = map(
lambda x: min(x[0], x[1]),
zip(max_possible_trials_gpu_load_per_device,
max_possible_trials_gpu_mem_per_device))
self._logger.info(
"GPU loads: {}, GPU mems: {}, Max possible trials: {}".format(
"% ".join([str(gpu_load) for gpu_load in gpu_loads]) + "%",
"MiB ".join([str(gpu_mem)
for gpu_mem in gpu_mems]) + "MiB",
sum(max_possible_trials_gpu_per_device)))
num_trials_to_schedule = min(
num_trials_to_schedule,
sum(max_possible_trials_gpu_per_device))
# Build the device list for scheduling trials on specific gpus.
gpus_avail = self._build_gpu_list(
max_possible_trials_gpu_per_device)
else:
# Just distribute load among gpus.
num_gpus = self._get_gpu_count()
trials_per_gpu = int(math.ceil(num_trials_to_schedule / num_gpus))
gpus_avail = self._build_gpu_list([trials_per_gpu] * num_gpus)
gpus_avail = gpus_avail[:num_trials_to_schedule]
self._logger.info(
"Max possible trials overall: {}".format(num_trials_to_schedule))
return num_trials_to_schedule, gpus_avail
================================================
FILE: gqcnn/search/search.py
================================================
# -*- coding: utf-8 -*-
"""
Copyright ©2017. The Regents of the University of California (Regents).
All Rights Reserved. Permission to use, copy, modify, and distribute this
software and its documentation for educational, research, and not-for-profit
purposes, without fee and without a signed licensing agreement, is hereby
granted, provided that the above copyright notice, this paragraph and the
following two paragraphs appear in all copies, modifications, and
distributions. Contact The Office of Technology Licensing, UC Berkeley, 2150
Shattuck Avenue, Suite 510, Berkeley, CA 94720-1620, (510) 643-7201,
otl@berkeley.edu,
http://ipira.berkeley.edu/industry-info for commercial licensing opportunities.
IN NO EVENT SHALL REGENTS BE LIABLE TO ANY PARTY FOR DIRECT, INDIRECT, SPECIAL,
INCIDENTAL, OR CONSEQUENTIAL DAMAGES, INCLUDING LOST PROFITS, ARISING OUT OF
THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN IF REGENTS HAS BEEN
ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
REGENTS SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
PURPOSE. THE SOFTWARE AND ACCOMPANYING DOCUMENTATION, IF ANY, PROVIDED
HEREUNDER IS PROVIDED "AS IS". REGENTS HAS NO OBLIGATION TO PROVIDE
MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
Perform hyper-parameter search over a set of GQ-CNN model/training
parameters. Actively monitor system resources and appropriately schedule
trials.
Author
------
Vishal Satish
"""
import os
import time
from .resource_manager import ResourceManager
from .trial import (GQCNNTrainingAndAnalysisTrial,
GQCNNFineTuningAndAnalysisTrial)
from .utils import gen_trial_params, gen_timestamp, log_trial_status
from .enums import TrialConstants, SearchConstants
from autolab_core import Logger
from ..utils import is_py2, GQCNNTrainingStatus
if is_py2():
from Queue import Queue
else:
from queue import Queue
class GQCNNSearch(object):
def __init__(self,
analysis_config,
train_configs,
datasets,
split_names,
base_models=[],
output_dir=None,
search_name=None,
monitor_cpu=True,
monitor_gpu=True,
cpu_cores=[],
gpu_devices=[]):
self._analysis_cfg = analysis_config
# Create trial output dir if not specified.
if search_name is None:
search_name = "gqcnn_hyperparam_search_{}".format(gen_timestamp())
if output_dir is None:
output_dir = "models"
self._trial_output_dir = os.path.join(output_dir, search_name)
if not os.path.exists(self._trial_output_dir):
os.makedirs(self._trial_output_dir)
# Set up logger.
self._logger = Logger.get_logger(self.__class__.__name__,
log_file=os.path.join(
self._trial_output_dir,
"search.log"),
global_log_file=True)
# Init resource manager.
self._resource_manager = ResourceManager(TrialConstants.TRIAL_CPU_LOAD,
TrialConstants.TRIAL_GPU_LOAD,
TrialConstants.TRIAL_GPU_MEM,
monitor_cpu=monitor_cpu,
monitor_gpu=monitor_gpu,
cpu_cores=cpu_cores,
gpu_devices=gpu_devices)
# Parse train configs and generate individual trial parameters.
if len(base_models) > 0:
inconsistent_inputs_msg = ("Must have equal number of training"
" configs, datasets, split_names, and"
" base models!")
assert len(train_configs) == len(datasets) == len(
split_names) == len(base_models), inconsistent_inputs_msg
else:
inconsistent_inputs_msg = ("Must have equal number of training"
" configs, datasets, and split_names!")
assert len(train_configs) == len(datasets) == len(
split_names), inconsistent_inputs_msg
self._logger.info("Generating trial parameters...")
trial_params = gen_trial_params(train_configs,
datasets,
split_names,
base_models=base_models)
# Create pending trial queue.
self._trials_pending_queue = Queue()
if len(base_models) > 0:
for trial_name, hyperparam_summary, train_cfg, dataset, \
base_model, split_name in trial_params:
self._trials_pending_queue.put(
GQCNNFineTuningAndAnalysisTrial(self._analysis_cfg,
train_cfg, dataset,
base_model, split_name,
self._trial_output_dir,
trial_name,
hyperparam_summary))
else:
for trial_name, hyperparam_summary, train_cfg, dataset, \
split_name in trial_params:
self._trials_pending_queue.put(
GQCNNTrainingAndAnalysisTrial(self._analysis_cfg,
train_cfg, dataset,
split_name,
self._trial_output_dir,
trial_name,
hyperparam_summary))
# Create containers to hold running, finished, and errored-out trials.
self._trials_running = []
self._trials_finished = []
self._trials_errored = []
def search(self):
self._logger.info("Beginning hyper-parameter search...")
done = False
waiting_for_trial_init = False
last_schedule_attempt_time = -1
search_start_time = time.time()
while not done:
num_trials_pending = self._trials_pending_queue.qsize()
num_trials_running = len(self._trials_running)
num_trials_finished = len(self._trials_finished)
num_trials_errored = len(self._trials_errored)
self._logger.info(
"----------------------------------------------------")
self._logger.info(
"Num trials pending: {}".format(num_trials_pending))
self._logger.info(
"Num trials running: {}".format(num_trials_running))
self._logger.info(
"Num trials finished: {}".format(num_trials_finished))
if num_trials_errored > 0:
self._logger.info(
"Num trials errored: {}".format(num_trials_errored))
if num_trials_pending > 0 and not waiting_for_trial_init and (
time.time() - last_schedule_attempt_time
) > SearchConstants.MIN_TIME_BETWEEN_SCHEDULE_ATTEMPTS:
self._logger.info("Attempting to schedule more trials...")
num_trials_to_schedule, gpus_avail = \
self._resource_manager.num_trials_to_schedule(
num_trials_pending)
self._logger.info(
"Scheduling {} trials".format(num_trials_to_schedule))
if num_trials_to_schedule > 0:
# Start trials.
for _, gpu in zip(range(num_trials_to_schedule),
gpus_avail):
trial = self._trials_pending_queue.get()
trial.begin(
gpu_avail=gpu,
cpu_cores_avail=self._resource_manager.cpu_cores)
self._trials_running.append(trial)
# Block scheduling until trials have started training (this
# is when we know what resources are still available).
waiting_for_trial_init = True
last_schedule_attempt_time = time.time()
# Check if trials have started training.
if waiting_for_trial_init:
training_has_started = [
trial.training_status == GQCNNTrainingStatus.TRAINING
for trial in self._trials_running
]
if all(training_has_started):
waiting_for_trial_init = False
# Log trial status.
if len(self._trials_running) > 0:
self._logger.info(log_trial_status(self._trials_running))
# Check if any trials have finished running or errored-out.
finished_trials_to_move = []
errored_trials_to_move = []
for trial in self._trials_running:
if trial.finished:
finished_trials_to_move.append(trial)
elif trial.errored_out:
errored_trials_to_move.append(trial)
self._trials_finished.extend(finished_trials_to_move)
self._trials_errored.extend(errored_trials_to_move)
for trial in finished_trials_to_move:
self._trials_running.remove(trial)
for trial in errored_trials_to_move:
self._trials_running.remove(trial)
# Update stopping criteria and sleep.
done = (num_trials_pending == 0) and (num_trials_running == 0)
time.sleep(SearchConstants.SEARCH_THREAD_SLEEP)
self._logger.info(
"------------------Successful Trials------------------")
self._logger.info(log_trial_status(self._trials_finished))
if len(self._trials_errored) > 0:
self._logger.info(
"--------------------Failed Trials--------------------")
self._logger.info(log_trial_status(self._trials_errored))
self._logger.info(
"Hyper-parameter search finished in {} seconds.".format(
time.time() - search_start_time))
================================================
FILE: gqcnn/search/trial.py
================================================
# -*- coding: utf-8 -*-
"""
Copyright ©2017. The Regents of the University of California (Regents).
All Rights Reserved. Permission to use, copy, modify, and distribute this
software and its documentation for educational, research, and not-for-profit
purposes, without fee and without a signed licensing agreement, is hereby
granted, provided that the above copyright notice, this paragraph and the
following two paragraphs appear in all copies, modifications, and
distributions. Contact The Office of Technology Licensing, UC Berkeley, 2150
Shattuck Avenue, Suite 510, Berkeley, CA 94720-1620, (510) 643-7201,
otl@berkeley.edu,
http://ipira.berkeley.edu/industry-info for commercial licensing opportunities.
IN NO EVENT SHALL REGENTS BE LIABLE TO ANY PARTY FOR DIRECT, INDIRECT, SPECIAL,
INCIDENTAL, OR CONSEQUENTIAL DAMAGES, INCLUDING LOST PROFITS, ARISING OUT OF
THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN IF REGENTS HAS BEEN
ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
REGENTS SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
PURPOSE. THE SOFTWARE AND ACCOMPANYING DOCUMENTATION, IF ANY, PROVIDED
HEREUNDER IS PROVIDED "AS IS". REGENTS HAS NO OBLIGATION TO PROVIDE
MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
Trials for hyper-parameter search.
Author
------
Vishal Satish
"""
from abc import ABC, abstractmethod
import json
import multiprocessing
import os
import sys
import numpy as np
from ..model import get_gqcnn_model
from ..training import get_gqcnn_trainer
from ..utils import GeneralConstants, GQCNNTrainingStatus
from ..analysis import GQCNNAnalyzer
class TrialStatus:
PENDING = "pending"
RUNNING = "running"
FINISHED = "finished"
EXCEPTION = "exception"
class GQCNNTrialWithAnalysis(ABC):
def __init__(self, analysis_cfg, train_cfg, dataset_dir, split_name,
output_dir, model_name, hyperparam_summary):
self._analysis_cfg = analysis_cfg
self._train_cfg = train_cfg
self._dataset_dir = dataset_dir
self._split_name = split_name
self._output_dir = output_dir
self._model_name = model_name
self._hyperparam_summary = hyperparam_summary
self._manager = multiprocessing.Manager()
# To communicate with training.
self._train_progress_dict = self._build_train_progress_dict()
# To communicate with trial.
self._trial_progress_dict = self._build_trial_progress_dict()
self._process = None
def _build_train_progress_dict(self):
progress_dict = self._manager.dict(
training_status=GQCNNTrainingStatus.NOT_STARTED,
epoch=np.nan,
analysis=None)
return progress_dict
def _build_trial_progress_dict(self):
progress_dict = self._manager.dict(status=TrialStatus.PENDING)
return progress_dict
@abstractmethod
def _run(self, trainer):
pass
def _run_trial(self,
analysis_config,
train_config,
dataset_dir,
split_name,
output_dir,
model_name,
train_progress_dict,
trial_progress_dict,
hyperparam_summary,
gpu_avail="",
cpu_cores_avail=[],
backend="tf"):
trial_progress_dict["status"] = TrialStatus.RUNNING
try:
os.system("taskset -pc {} {}".format(
",".join(str(i) for i in cpu_cores_avail), os.getpid()))
os.environ["CUDA_VISIBLE_DEVICES"] = gpu_avail
gqcnn = get_gqcnn_model(backend,
verbose=False)(train_config["gqcnn"],
verbose=False)
trainer = get_gqcnn_trainer(backend)(
gqcnn,
dataset_dir,
split_name,
output_dir,
train_config,
name=model_name,
progress_dict=train_progress_dict,
verbose=False)
self._run(trainer)
with open(
os.path.join(output_dir, model_name,
"hyperparam_summary.json"), "wb") as fhandle:
json.dump(hyperparam_summary,
fhandle,
indent=GeneralConstants.JSON_INDENT)
train_progress_dict["training_status"] = "analyzing"
analyzer = GQCNNAnalyzer(analysis_config, verbose=False)
_, _, init_train_error, final_train_error, init_train_loss, \
final_train_loss, init_val_error, final_val_error, \
norm_final_val_error = analyzer.analyze(
os.path.join(output_dir, model_name), output_dir)
analysis_dict = {}
analysis_dict["init_train_error"] = init_train_error
analysis_dict["final_train_error"] = final_train_error
analysis_dict["init_train_loss"] = init_train_loss
analysis_dict["final_train_loss"] = final_train_loss
analysis_dict["init_val_error"] = init_val_error
analysis_dict["final_val_error"] = final_val_error
analysis_dict["norm_final_val_error"] = norm_final_val_error
train_progress_dict["analysis"] = analysis_dict
train_progress_dict["training_status"] = "finished"
trial_progress_dict["status"] = TrialStatus.FINISHED
sys.exit(0)
except Exception as e:
trial_progress_dict["status"] = TrialStatus.EXCEPTION
trial_progress_dict["error_msg"] = str(e)
sys.exit(0)
@property
def finished(self):
return self._trial_progress_dict["status"] == TrialStatus.FINISHED
@property
def errored_out(self):
return self._trial_progress_dict["status"] == TrialStatus.EXCEPTION
@property
def error_msg(self):
return self._trial_progress_dict["error_msg"]
@property
def training_status(self):
return self._train_progress_dict["training_status"]
def begin(self, gpu_avail="", cpu_cores_avail=[]):
self._status = TrialStatus.RUNNING
self._process = multiprocessing.Process(
target=self._run_trial,
args=(self._analysis_cfg, self._train_cfg, self._dataset_dir,
self._split_name, self._output_dir, self._model_name,
self._train_progress_dict, self._trial_progress_dict,
self._hyperparam_summary, gpu_avail, cpu_cores_avail))
self._process.start()
def __str__(self):
trial_str = "Trial: {}, Training Stage: {}".format(
self._model_name, self.training_status)
if self.training_status == GQCNNTrainingStatus.TRAINING and not \
np.isnan(self._train_progress_dict["epoch"]):
trial_str += ", Epoch: {}/{}".format(
self._train_progress_dict["epoch"],
self._train_cfg["num_epochs"])
if self.errored_out:
trial_str += ", Error message: {}".format(self.error_msg)
if self.training_status == "finished":
finished_msg = (", Initial train error: {}, Final train error: {},"
" Initial train loss: {}, Final train loss: {},"
" Initial val error: {}, Final val error: {}, Norm"
" final val error: {}")
trial_str += finished_msg.format(
self._train_progress_dict["analysis"]["init_train_error"],
self._train_progress_dict["analysis"]["final_train_error"],
self._train_progress_dict["analysis"]["init_train_loss"],
self._train_progress_dict["analysis"]["final_train_loss"],
self._train_progress_dict["analysis"]["init_val_error"],
self._train_progress_dict["analysis"]["final_val_error"],
self._train_progress_dict["analysis"]["norm_final_val_error"])
return trial_str
class GQCNNTrainingAndAnalysisTrial(GQCNNTrialWithAnalysis):
def _run(self, trainer):
trainer.train()
class GQCNNFineTuningAndAnalysisTrial(GQCNNTrialWithAnalysis):
def __init__(self, analysis_cfg, train_cfg, dataset_dir, base_model_dir,
split_name, output_dir, model_name, hyperparam_summary):
GQCNNTrialWithAnalysis.__init__(self, analysis_cfg, train_cfg,
dataset_dir, split_name, output_dir,
model_name, hyperparam_summary)
self._base_model_dir = base_model_dir
def _run(self, trainer):
trainer.finetune(self._base_model_dir)
================================================
FILE: gqcnn/search/utils.py
================================================
# -*- coding: utf-8 -*-
"""
Copyright ©2017. The Regents of the University of California (Regents).
All Rights Reserved. Permission to use, copy, modify, and distribute this
software and its documentation for educational, research, and not-for-profit
purposes, without fee and without a signed licensing agreement, is hereby
granted, provided that the above copyright notice, this paragraph and the
following two paragraphs appear in all copies, modifications, and
distributions. Contact The Office of Technology Licensing, UC Berkeley, 2150
Shattuck Avenue, Suite 510, Berkeley, CA 94720-1620, (510) 643-7201,
otl@berkeley.edu,
http://ipira.berkeley.edu/industry-info for commercial licensing opportunities.
IN NO EVENT SHALL REGENTS BE LIABLE TO ANY PARTY FOR DIRECT, INDIRECT, SPECIAL,
INCIDENTAL, OR CONSEQUENTIAL DAMAGES, INCLUDING LOST PROFITS, ARISING OUT OF
THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN IF REGENTS HAS BEEN
ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
REGENTS SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
PURPOSE. THE SOFTWARE AND ACCOMPANYING DOCUMENTATION, IF ANY, PROVIDED
HEREUNDER IS PROVIDED "AS IS". REGENTS HAS NO OBLIGATION TO PROVIDE
MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
Utility functions for hyper-parameter search.
Author
------
Vishal Satish
"""
from collections import OrderedDict, defaultdict
import copy
from datetime import datetime
import itertools
def get_fields_to_search_over(train_config, prev_keys=[]):
fields = []
anchored_fields = defaultdict(list)
for key in train_config:
if isinstance(train_config[key], list):
prev_keys_copy = copy.deepcopy(prev_keys)
prev_keys_copy.append(key)
if isinstance(train_config[key][0],
str) and train_config[key][0].startswith("anchor_"):
anchored_fields[train_config[key][0]].append(prev_keys_copy)
train_config[key] = train_config[key][1:]
else:
fields.append(prev_keys_copy)
elif isinstance(train_config[key], OrderedDict):
prev_keys_copy = copy.deepcopy(prev_keys)
prev_keys_copy.append(key)
sub_fields, sub_anchored_fields = get_fields_to_search_over(
train_config[key], prev_keys=prev_keys_copy)
fields.extend(sub_fields)
update_dict(anchored_fields, sub_anchored_fields)
return fields, anchored_fields
def update_dict(dict1, dict2):
for key, val in dict2.items():
if key in dict1:
dict1[key].extend(val)
else:
dict1[key] = val
def get_nested_key(cfg, key):
val = cfg
for k in key:
val = val[k]
return val
def set_nested_key(cfg, key, val):
root_field = cfg
for k in key[:-1]:
root_field = root_field[k]
root_field[key[-1]] = val
def gen_config_summary_dict(hyperparam_combination):
summary_dict = {}
for key, val in hyperparam_combination:
summary_dict["/".join(key)] = val
return summary_dict
def parse_master_train_config(train_config):
configs = []
hyperparam_search_fields, hyperparam_anchored_search_fields = \
get_fields_to_search_over(train_config)
# Ensure a one-to-one mapping between hyperparameters of fields with
# matching anchor tags.
for anchor_tag, fields in hyperparam_anchored_search_fields.items():
num_params = []
for field in fields:
num_params.append(len(get_nested_key(train_config, field)))
invalid_anchor_tag_msg = ("All fields in anchor tag '{}' do not have"
" the same # of parameters to search over!")
assert max(num_params) == min(
num_params), invalid_anchor_tag_msg.format(anchor_tag)
# If there is nothing to search over just return the given config.
if len(hyperparam_search_fields) == 0 and len(
hyperparam_anchored_search_fields) == 0:
return [("", train_config)]
# Generate a list of all the possible hyper-parameters to search over.
# Normal fields.
hyperparam_search_params = []
for search_field in hyperparam_search_fields:
search_field_params = []
for val in get_nested_key(train_config, search_field):
search_field_params.append((search_field, val))
hyperparam_search_params.append(search_field_params)
# Anchored fields.
for anchored_fields in hyperparam_anchored_search_fields.values():
combinations = [[] for _ in range(
len(get_nested_key(train_config, anchored_fields[0])))]
for field in anchored_fields:
for idx, val in enumerate(get_nested_key(train_config, field)):
combinations[idx].append((field, val))
hyperparam_search_params.append(combinations)
# Get all permutations of the possible hyper-parameters.
hyperparam_combinations = list(
itertools.product(*hyperparam_search_params))
def flatten_combo(combo):
flattened = []
for item in combo:
if isinstance(item, list):
for sub_item in item:
flattened.append(sub_item)
else:
flattened.append(item)
return flattened
hyperparam_combinations = [
flatten_combo(combo) for combo in hyperparam_combinations
]
# Generate possible configs to search over.
for combo in hyperparam_combinations:
config = copy.deepcopy(train_config)
for field, val in combo:
set_nested_key(config, field, val)
configs.append((gen_config_summary_dict(combo), config))
return configs
def gen_timestamp():
return str(datetime.now()).split(".")[0].replace(" ", "_")
def gen_trial_params_train(master_train_configs, datasets, split_names):
trial_params = []
for master_train_config, dataset, split_name in zip(
master_train_configs, datasets, split_names):
train_configs = parse_master_train_config(master_train_config)
for i, (hyperparam_summary_dict,
train_config) in enumerate(train_configs):
trial_name = "{}_{}_trial_{}_{}".format(
dataset.split("/")[-3], split_name, i, gen_timestamp())
trial_params.append((trial_name, hyperparam_summary_dict,
train_config, dataset, split_name))
return trial_params
def gen_trial_params_finetune(master_train_configs, datasets, base_models,
split_names):
trial_params = []
for master_train_config, dataset, base_model, split_name in zip(
master_train_configs, datasets, base_models, split_names):
train_configs = parse_master_train_config(master_train_config)
for i, (hyperparam_summary_dict,
train_config) in enumerate(train_configs):
trial_name = "{}_{}_trial_{}_{}".format(
dataset.split("/")[-3], split_name, i, gen_timestamp())
trial_params.append(
(trial_name, hyperparam_summary_dict, train_config, dataset,
base_model, split_name))
return trial_params
def gen_trial_params(master_train_configs,
datasets,
split_names,
base_models=[]):
if len(base_models) > 0:
return gen_trial_params_finetune(master_train_configs, datasets,
base_models, split_names)
else:
return gen_trial_params_train(master_train_configs, datasets,
split_names)
def log_trial_status(trials):
status_str = "--------------------TRIAL STATUS--------------------"
for trial in trials:
status_str += "\n"
status_str += "[{}]".format(str(trial))
return status_str
================================================
FILE: gqcnn/training/__init__.py
================================================
# -*- coding: utf-8 -*-
"""
Copyright ©2017. The Regents of the University of California (Regents).
All Rights Reserved. Permission to use, copy, modify, and distribute this
software and its documentation for educational, research, and not-for-profit
purposes, without fee and without a signed licensing agreement, is hereby
granted, provided that the above copyright notice, this paragraph and the
following two paragraphs appear in all copies, modifications, and
distributions. Contact The Office of Technology Licensing, UC Berkeley, 2150
Shattuck Avenue, Suite 510, Berkeley, CA 94720-1620, (510) 643-7201,
otl@berkeley.edu,
http://ipira.berkeley.edu/industry-info for commercial licensing opportunities.
IN NO EVENT SHALL REGENTS BE LIABLE TO ANY PARTY FOR DIRECT, INDIRECT, SPECIAL,
INCIDENTAL, OR CONSEQUENTIAL DAMAGES, INCLUDING LOST PROFITS, ARISING OUT OF
THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN IF REGENTS HAS BEEN
ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
REGENTS SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
PURPOSE. THE SOFTWARE AND ACCOMPANYING DOCUMENTATION, IF ANY, PROVIDED
HEREUNDER IS PROVIDED "AS IS". REGENTS HAS NO OBLIGATION TO PROVIDE
MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
Factory functions to obtain GQCNNTrainer class based on chosen deep learning
backend. Currently only Tensorflow is supported.
Author: Vishal Satish
"""
from .tf import GQCNNTrainerTF
def get_gqcnn_trainer(backend="tf"):
"""Get the GQ-CNN Trainer for the provided backend.
Note
----
Currently only TensorFlow is supported.
Parameters
----------
backend : str
The backend to use, currently only "tf" is supported.
Returns
-------
:obj:`gqcnn.training.tf.GQCNNTrainerTF`
GQ-CNN Trainer with TensorFlow backend.
"""
# Return desired `GQCNNTrainer` instance based on backend.
if backend == "tf":
return GQCNNTrainerTF
else:
raise ValueError("Invalid backend: {}".format(backend))
================================================
FILE: gqcnn/training/tf/__init__.py
================================================
# -*- coding: utf-8 -*-
"""
Copyright ©2017. The Regents of the University of California (Regents).
All Rights Reserved. Permission to use, copy, modify, and distribute this
software and its documentation for educational, research, and not-for-profit
purposes, without fee and without a signed licensing agreement, is hereby
granted, provided that the above copyright notice, this paragraph and the
following two paragraphs appear in all copies, modifications, and
distributions. Contact The Office of Technology Licensing, UC Berkeley, 2150
Shattuck Avenue, Suite 510, Berkeley, CA 94720-1620, (510) 643-7201,
otl@berkeley.edu,
http://ipira.berkeley.edu/industry-info for commercial licensing opportunities.
IN NO EVENT SHALL REGENTS BE LIABLE TO ANY PARTY FOR DIRECT, INDIRECT, SPECIAL,
INCIDENTAL, OR CONSEQUENTIAL DAMAGES, INCLUDING LOST PROFITS, ARISING OUT OF
THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN IF REGENTS HAS BEEN
ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
REGENTS SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
PURPOSE. THE SOFTWARE AND ACCOMPANYING DOCUMENTATION, IF ANY, PROVIDED
HEREUNDER IS PROVIDED "AS IS". REGENTS HAS NO OBLIGATION TO PROVIDE
MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
"""
from .trainer_tf import GQCNNTrainerTF
__all__ = ["GQCNNTrainerTF"]
================================================
FILE: gqcnn/training/tf/trainer_tf.py
================================================
# -*- coding: utf-8 -*-
"""
Copyright ©2017. The Regents of the University of California (Regents).
All Rights Reserved. Permission to use, copy, modify, and distribute this
software and its documentation for educational, research, and not-for-profit
purposes, without fee and without a signed licensing agreement, is hereby
granted, provided that the above copyright notice, this paragraph and the
following two paragraphs appear in all copies, modifications, and
distributions. Contact The Office of Technology Licensing, UC Berkeley, 2150
Shattuck Avenue, Suite 510, Berkeley, CA 94720-1620, (510) 643-7201,
otl@berkeley.edu,
http://ipira.berkeley.edu/industry-info for commercial licensing opportunities.
IN NO EVENT SHALL REGENTS BE LIABLE TO ANY PARTY FOR DIRECT, INDIRECT, SPECIAL,
INCIDENTAL, OR CONSEQUENTIAL DAMAGES, INCLUDING LOST PROFITS, ARISING OUT OF
THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN IF REGENTS HAS BEEN
ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
REGENTS SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
PURPOSE. THE SOFTWARE AND ACCOMPANYING DOCUMENTATION, IF ANY, PROVIDED
HEREUNDER IS PROVIDED "AS IS". REGENTS HAS NO OBLIGATION TO PROVIDE
MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
Trains a GQ-CNN network using Tensorflow backend.
Author
------
Vishal Satish & Jeff Mahler
"""
import collections
import json
import multiprocessing as mp
import os
import random
import shutil
import signal
import subprocess
import sys
import time
from past.builtins import xrange
import cv2
import numpy as np
import scipy.stats as ss
import tensorflow as tf
from autolab_core import (BinaryClassificationResult, RegressionResult,
TensorDataset, Logger)
from autolab_core.constants import JSON_INDENT
import autolab_core.utils as utils
from ...utils import (TrainingMode, GripperMode, InputDepthMode,
GeneralConstants, TrainStatsLogger, GQCNNTrainingStatus,
GQCNNFilenames, pose_dim, read_pose_data,
weight_name_to_layer_name, is_py2, imresize)
if is_py2():
import Queue
else:
import queue as Queue
class GQCNNTrainerTF(object):
"""Trains a GQ-CNN with Tensorflow backend."""
def __init__(self,
gqcnn,
dataset_dir,
split_name,
output_dir,
config,
name=None,
progress_dict=None,
verbose=True):
"""
Parameters
----------
gqcnn : :obj:`GQCNN`
Grasp quality neural network to optimize.
dataset_dir : str
Path to the training/validation dataset.
split_name : str
Name of the split to train on.
output_dir : str
Path to save the model output.
config : dict
Dictionary of configuration parameters.
name : str
Name of the the model.
"""
self.gqcnn = gqcnn
self.dataset_dir = dataset_dir
self.split_name = split_name
self.output_dir = output_dir
self.cfg = config
self.tensorboard_has_launched = False
self.model_name = name
self.progress_dict = progress_dict
self.finetuning = False
# Create a directory for the model.
if self.model_name is None:
model_id = utils.gen_experiment_id()
self.model_name = "model_{}".format(model_id)
self.model_dir = os.path.join(self.output_dir, self.model_name)
if not os.path.exists(self.model_dir):
os.mkdir(self.model_dir)
# Set up logger.
self.logger = Logger.get_logger(self.__class__.__name__,
log_file=os.path.join(
self.model_dir, "training.log"),
silence=(not verbose),
global_log_file=verbose)
# Check default split.
if split_name is None:
self.logger.warning("Using default image-wise split.")
self.split_name = "image_wise"
# Update cfg for saving.
self.cfg["dataset_dir"] = self.dataset_dir
self.cfg["split_name"] = self.split_name
def _create_loss(self):
"""Creates a loss based on config file.
Returns
-------
:obj:`tensorflow Tensor`
Loss.
"""
if self.cfg["loss"] == "l2":
return (1.0 / self.train_batch_size) * tf.nn.l2_loss(
tf.subtract(tf.nn.sigmoid(self.train_net_output),
self.train_labels_node))
elif self.cfg["loss"] == "sparse":
if self._angular_bins > 0:
log = tf.reshape(
tf.dynamic_partition(self.train_net_output,
self.train_pred_mask_node, 2)[1],
(-1, 2))
return tf.reduce_mean(
tf.nn.sparse_softmax_cross_entropy_with_logits(
_sentinel=None,
labels=self.train_labels_node,
logits=log))
else:
return tf.reduce_mean(
tf.nn.sparse_softmax_cross_entropy_with_logits(
_sentinel=None,
labels=self.train_labels_node,
logits=self.train_net_output,
name=None))
elif self.cfg["loss"] == "weighted_cross_entropy":
return tf.reduce_mean(
tf.nn.weighted_cross_entropy_with_logits(
targets=tf.reshape(self.train_labels_node, [-1, 1]),
logits=self.train_net_output,
pos_weight=self.pos_weight,
name=None))
def _create_optimizer(self, loss, batch, var_list, learning_rate):
"""Create optimizer based on config file.
Parameters
----------
loss : :obj:`tensorflow Tensor`
Loss to use, generated with `_create_loss`.
batch : :obj:`tf.Variable`
Variable to keep track of the current gradient step number.
var_list : :obj:`lst`
List of tf.Variable objects to update to minimize loss (ex. network
weights).
learning_rate : float
Learning rate for training.
Returns
-------
:obj:`tf.train.Optimizer`
Optimizer.
"""
# Instantiate optimizer.
if self.cfg["optimizer"] == "momentum":
optimizer = tf.train.MomentumOptimizer(learning_rate,
self.momentum_rate)
elif self.cfg["optimizer"] == "adam":
optimizer = tf.train.AdamOptimizer(learning_rate)
elif self.cfg["optimizer"] == "rmsprop":
optimizer = tf.train.RMSPropOptimizer(learning_rate)
else:
raise ValueError("Optimizer '{}' not supported".format(
self.cfg["optimizer"]))
# Compute gradients.
gradients, variables = zip(
*optimizer.compute_gradients(loss, var_list=var_list))
# Clip gradients to prevent exploding gradient problem.
gradients, global_grad_norm = tf.clip_by_global_norm(
gradients, self.max_global_grad_norm)
# Generate op to apply gradients.
apply_grads = optimizer.apply_gradients(zip(gradients, variables),
global_step=batch)
return apply_grads, global_grad_norm
def _launch_tensorboard(self):
"""Launches Tensorboard to visualize training."""
FNULL = open(os.devnull, "w")
tensorboard_launch_msg = ("Launching Tensorboard, please navigate to"
" localhost:{} in your favorite web browser"
" to view summaries.")
self.logger.info(tensorboard_launch_msg.format(self._tensorboard_port))
self._tensorboard_proc = subprocess.Popen([
"tensorboard", "--port",
str(self._tensorboard_port), "--logdir", self.summary_dir
],
stdout=FNULL)
def _close_tensorboard(self):
"""Closes Tensorboard process."""
self.logger.info("Closing Tensorboard...")
self._tensorboard_proc.terminate()
def train(self):
"""Perform optimization."""
with self.gqcnn.tf_graph.as_default():
self._train()
def _train(self):
"""Perform optimization."""
# Run setup.
if self.progress_dict is not None:
self.progress_dict[
"training_status"] = GQCNNTrainingStatus.SETTING_UP
self._setup()
# Build network.
self.gqcnn.initialize_network(self.input_im_node, self.input_pose_node)
# Optimize weights.
if self.progress_dict is not None:
self.progress_dict[
"training_status"] = GQCNNTrainingStatus.TRAINING
self._optimize_weights()
def finetune(self, base_model_dir):
"""Perform fine-tuning.
Parameters
----------
base_model_dir : str
Path to the pre-trained base model to use.
"""
with self.gqcnn.tf_graph.as_default():
self._finetune(base_model_dir)
def _finetune(self, base_model_dir):
"""Perform fine-tuning.
Parameters
----------
base_model_dir : str
Path to the pre-trained base model to use.
"""
# Set flag and base model for fine-tuning.
self.finetuning = True
self.base_model_dir = base_model_dir
# Run setup.
self._setup()
# Build network.
self.gqcnn.set_base_network(base_model_dir)
self.gqcnn.initialize_network(self.input_im_node, self.input_pose_node)
# Optimize weights.
if self.progress_dict is not None:
self.progress_dict[
"training_status"] = GQCNNTrainingStatus.TRAINING
self._optimize_weights(finetune=True)
def _optimize_weights(self, finetune=False):
"""Optimize the network weights."""
start_time = time.time()
# Setup output.
self.train_net_output = self.gqcnn.output
if self.training_mode == TrainingMode.CLASSIFICATION:
if self.cfg["loss"] == "weighted_cross_entropy":
self.gqcnn.add_sigmoid_to_output()
else:
self.gqcnn.add_softmax_to_output()
elif self.training_mode == TrainingMode.REGRESSION:
self.gqcnn.add_sigmoid_to_output()
else:
raise ValueError("Training mode: {} not supported !".format(
self.training_mode))
train_predictions = self.gqcnn.output
drop_rate_in = self.gqcnn.input_drop_rate_node
self.weights = self.gqcnn.weights
# Once weights have been initialized, create TF saver for weights.
self.saver = tf.train.Saver()
# Form loss.
with tf.name_scope("loss"):
# Part 1: error.
loss = self._create_loss()
unregularized_loss = loss
# Part 2: regularization.
layer_weights = list(self.weights.values())
with tf.name_scope("regularization"):
regularizers = tf.nn.l2_loss(layer_weights[0])
for w in layer_weights[1:]:
regularizers = regularizers + tf.nn.l2_loss(w)
loss += self.train_l2_regularizer * regularizers
# Setup learning rate.
batch = tf.Variable(0)
learning_rate = tf.train.exponential_decay(
self.base_lr, # Base learning rate.
batch * self.train_batch_size, # Current index into the dataset.
self.decay_step, # Decay step.
self.decay_rate, # decay rate.
staircase=True)
# Setup variable list.
var_list = list(self.weights.values())
if finetune:
var_list = []
for weights_name, weights_val in self.weights.items():
layer_name = weight_name_to_layer_name(weights_name)
if self.optimize_base_layers or \
layer_name not in self.gqcnn._base_layer_names:
var_list.append(weights_val)
# Create optimizer.
with tf.name_scope("optimizer"):
apply_grad_op, global_grad_norm = self._create_optimizer(
loss, batch, var_list, learning_rate)
# Add a handler for SIGINT for graceful exit.
def handler(signum, frame):
self.logger.info("caught CTRL+C, exiting...")
self._cleanup()
exit(0)
signal.signal(signal.SIGINT, handler)
# Now that everything in our graph is set up, we write the graph to the
# summary event so it can be visualized in Tensorboard.
self.summary_writer.add_graph(self.gqcnn.tf_graph)
# Begin optimization loop.
try:
# Start prefetch queue workers.
self.prefetch_q_workers = []
seed = self._seed
for i in range(self.num_prefetch_q_workers):
if self.num_prefetch_q_workers > 1 or not self._debug:
seed = np.random.randint(GeneralConstants.SEED_SAMPLE_MAX)
p = mp.Process(target=self._load_and_enqueue, args=(seed, ))
p.start()
self.prefetch_q_workers.append(p)
# Init TF variables.
init = tf.global_variables_initializer()
self.sess.run(init)
self.logger.info("Beginning Optimization...")
# Create a `TrainStatsLogger` object to log training statistics at
# certain intervals.
self.train_stats_logger = TrainStatsLogger(self.model_dir)
# Loop through training steps.
training_range = xrange(
int(self.num_epochs * self.num_train) // self.train_batch_size)
for step in training_range:
# Run optimization.
step_start = time.time()
if self._angular_bins > 0:
images, poses, labels, masks = self.prefetch_q.get()
_, l, ur_l, lr, predictions, raw_net_output = \
self.sess.run(
[
apply_grad_op, loss, unregularized_loss,
learning_rate, train_predictions,
self.train_net_output
],
feed_dict={
drop_rate_in: self.drop_rate,
self.input_im_node: images,
self.input_pose_node: poses,
self.train_labels_node: labels,
self.train_pred_mask_node: masks
},
options=GeneralConstants.timeout_option)
else:
images, poses, labels = self.prefetch_q.get()
_, l, ur_l, lr, predictions, raw_net_output = \
self.sess.run(
[
apply_grad_op, loss, unregularized_loss,
learning_rate, train_predictions,
self.train_net_output
],
feed_dict={
drop_rate_in: self.drop_rate,
self.input_im_node: images,
self.input_pose_node: poses,
self.train_labels_node: labels
},
options=GeneralConstants.timeout_option)
step_stop = time.time()
self.logger.info("Step took {} sec.".format(
str(round(step_stop - step_start, 3))))
if self.training_mode == TrainingMode.REGRESSION:
self.logger.info("Max " + str(np.max(predictions)))
self.logger.info("Min " + str(np.min(predictions)))
elif self.cfg["loss"] != "weighted_cross_entropy":
if self._angular_bins == 0:
ex = np.exp(raw_net_output - np.tile(
np.max(raw_net_output, axis=1)[:, np.newaxis],
[1, 2]))
softmax = ex / np.tile(
np.sum(ex, axis=1)[:, np.newaxis], [1, 2])
self.logger.info("Max " + str(np.max(softmax[:, 1])))
self.logger.info("Min " + str(np.min(softmax[:, 1])))
self.logger.info("Pred nonzero " +
str(np.sum(softmax[:, 1] > 0.5)))
self.logger.info("True nonzero " + str(np.sum(labels)))
else:
sigmoid = 1.0 / (1.0 + np.exp(-raw_net_output))
self.logger.info("Max " + str(np.max(sigmoid)))
self.logger.info("Min " + str(np.min(sigmoid)))
self.logger.info("Pred nonzero " +
str(np.sum(sigmoid > 0.5)))
self.logger.info("True nonzero " +
str(np.sum(labels > 0.5)))
if np.isnan(l) or np.any(np.isnan(poses)):
self.logger.error(
"Encountered NaN in loss or training poses!")
raise Exception
# Log output.
if step % self.log_frequency == 0:
elapsed_time = time.time() - start_time
start_time = time.time()
self.logger.info("Step {} (epoch {}), {} s".format(
step,
str(
round(
step * self.train_batch_size / self.num_train,
3)),
str(round(1000 * elapsed_time / self.eval_frequency,
2))))
self.logger.info(
"Minibatch loss: {}, learning rate: {}".format(
str(round(l, 3)), str(round(lr, 6))))
if self.progress_dict is not None:
self.progress_dict["epoch"] = str(
round(
step * self.train_batch_size / self.num_train,
2))
train_error = l
if self.training_mode == TrainingMode.CLASSIFICATION:
if self._angular_bins > 0:
predictions = predictions[masks.astype(
bool)].reshape((-1, 2))
classification_result = BinaryClassificationResult(
predictions[:, 1], labels)
train_error = classification_result.error_rate
self.logger.info("Minibatch error: {}".format(
str(round(train_error, 3))))
self.summary_writer.add_summary(
self.sess.run(
self.merged_log_summaries,
feed_dict={
self.minibatch_error_placeholder: train_error,
self.minibatch_loss_placeholder: l,
self.learning_rate_placeholder: lr
}), step)
sys.stdout.flush()
# Update the `TrainStatsLogger`.
self.train_stats_logger.update(train_eval_iter=step,
train_loss=l,
train_error=train_error,
total_train_error=None,
val_eval_iter=None,
val_error=None,
learning_rate=lr)
# Evaluate model.
if step % self.eval_frequency == 0 and step > 0:
if self.cfg["eval_total_train_error"]:
train_result = self._error_rate_in_batches(
validation_set=False)
self.logger.info("Training error: {}".format(
str(round(train_result.error_rate, 3))))
# Update the `TrainStatsLogger` and save.
self.train_stats_logger.update(
train_eval_iter=None,
train_loss=None,
train_error=None,
total_train_error=train_result.error_rate,
total_train_loss=train_result.cross_entropy_loss,
val_eval_iter=None,
val_error=None,
learning_rate=None)
self.train_stats_logger.log()
if self.train_pct < 1.0:
val_result = self._error_rate_in_batches()
self.summary_writer.add_summary(
self.sess.run(
self.merged_eval_summaries,
feed_dict={
self.val_error_placeholder: val_result.
error_rate
}), step)
self.logger.info("Validation error: {}".format(
str(round(val_result.error_rate, 3))))
self.logger.info("Validation loss: {}".format(
str(round(val_result.cross_entropy_loss, 3))))
sys.stdout.flush()
# Update the `TrainStatsLogger`.
if self.train_pct < 1.0:
self.train_stats_logger.update(
train_eval_iter=None,
train_loss=None,
train_error=None,
total_train_error=None,
val_eval_iter=step,
val_loss=val_result.cross_entropy_loss,
val_error=val_result.error_rate,
learning_rate=None)
else:
self.train_stats_logger.update(train_eval_iter=None,
train_loss=None,
train_error=None,
total_train_error=None,
val_eval_iter=step,
learning_rate=None)
# Save everything!
self.train_stats_logger.log()
# Save the model.
if step % self.save_frequency == 0 and step > 0:
self.saver.save(
self.sess,
os.path.join(self.model_dir,
GQCNNFilenames.INTER_MODEL.format(step)))
self.saver.save(
self.sess,
os.path.join(self.model_dir,
GQCNNFilenames.FINAL_MODEL))
# Launch tensorboard only after the first iteration.
if not self.tensorboard_has_launched:
self.tensorboard_has_launched = True
self._launch_tensorboard()
# Get final errors and flush the stdout pipeline.
final_val_result = self._error_rate_in_batches()
self.logger.info("Final validation error: {}".format(
str(round(final_val_result.error_rate, 3))))
self.logger.info("Final validation loss: {}".format(
str(round(final_val_result.cross_entropy_loss, 3))))
if self.cfg["eval_total_train_error"]:
final_train_result = self._error_rate_in_batches(
validation_set=False)
self.logger.info("Final training error: {}".format(
final_train_result.error_rate))
self.logger.info("Final training loss: {}".format(
final_train_result.cross_entropy_loss))
sys.stdout.flush()
# Update the `TrainStatsLogger`.
self.train_stats_logger.update(
train_eval_iter=None,
train_loss=None,
train_error=None,
total_train_error=None,
val_eval_iter=step,
val_loss=final_val_result.cross_entropy_loss,
val_error=final_val_result.error_rate,
learning_rate=None)
# Log & save everything!
self.train_stats_logger.log()
self.saver.save(
self.sess,
os.path.join(self.model_dir, GQCNNFilenames.FINAL_MODEL))
except Exception as e:
self._cleanup()
raise e
self._cleanup()
def _compute_data_metrics(self):
"""Calculate image mean, image std, pose mean, pose std, normalization
params."""
# Subsample tensors for faster runtime.
random_file_indices = np.random.choice(self.num_tensors,
size=self.num_random_files,
replace=False)
if self.gqcnn.input_depth_mode == InputDepthMode.POSE_STREAM:
# Compute image stats.
im_mean_filename = os.path.join(self.model_dir,
GQCNNFilenames.IM_MEAN)
im_std_filename = os.path.join(self.model_dir,
GQCNNFilenames.IM_STD)
if os.path.exists(im_mean_filename) and os.path.exists(
im_std_filename):
self.im_mean = np.load(im_mean_filename)
self.im_std = np.load(im_std_filename)
else:
self.im_mean = 0
self.im_std = 0
# Compute mean.
self.logger.info("Computing image mean")
num_summed = 0
for k, i in enumerate(random_file_indices):
if k % self.preproc_log_frequency == 0:
self.logger.info(
"Adding file {} of {} to image mean estimate".
format(k + 1, random_file_indices.shape[0]))
im_data = self.dataset.tensor(self.im_field_name, i).arr
train_indices = self.train_index_map[i]
if train_indices.shape[0] > 0:
self.im_mean += np.sum(im_data[train_indices, ...])
num_summed += self.train_index_map[i].shape[
0] * im_data.shape[1] * im_data.shape[2]
self.im_mean = self.im_mean / num_summed
# Compute std.
self.logger.info("Computing image std")
for k, i in enumerate(random_file_indices):
if k % self.preproc_log_frequency == 0:
self.logger.info(
"Adding file {} of {} to image std estimate".
format(k + 1, random_file_indices.shape[0]))
im_data = self.dataset.tensor(self.im_field_name, i).arr
train_indices = self.train_index_map[i]
if train_indices.shape[0] > 0:
self.im_std += np.sum(
(im_data[train_indices, ...] - self.im_mean)**2)
self.im_std = np.sqrt(self.im_std / num_summed)
# Save.
np.save(im_mean_filename, self.im_mean)
np.save(im_std_filename, self.im_std)
# Update GQ-CNN instance.
self.gqcnn.set_im_mean(self.im_mean)
self.gqcnn.set_im_std(self.im_std)
# Compute pose stats.
pose_mean_filename = os.path.join(self.model_dir,
GQCNNFilenames.POSE_MEAN)
pose_std_filename = os.path.join(self.model_dir,
GQCNNFilenames.POSE_STD)
if os.path.exists(pose_mean_filename) and os.path.exists(
pose_std_filename):
self.pose_mean = np.load(pose_mean_filename)
self.pose_std = np.load(pose_std_filename)
else:
self.pose_mean = np.zeros(self.raw_pose_shape)
self.pose_std = np.zeros(self.raw_pose_shape)
# Compute mean.
num_summed = 0
self.logger.info("Computing pose mean")
for k, i in enumerate(random_file_indices):
if k % self.preproc_log_frequency == 0:
self.logger.info(
"Adding file {} of {} to pose mean estimate".
format(k + 1, random_file_indices.shape[0]))
pose_data = self.dataset.tensor(self.pose_field_name,
i).arr
train_indices = self.train_index_map[i]
if self.gripper_mode == GripperMode.SUCTION:
rand_indices = np.random.choice(
pose_data.shape[0],
size=pose_data.shape[0] // 2,
replace=False)
pose_data[rand_indices,
4] = -pose_data[rand_indices, 4]
elif self.gripper_mode == GripperMode.LEGACY_SUCTION:
rand_indices = np.random.choice(
pose_data.shape[0],
size=pose_data.shape[0] // 2,
replace=False)
pose_data[rand_indices,
3] = -pose_data[rand_indices, 3]
if train_indices.shape[0] > 0:
pose_data = pose_data[train_indices, :]
pose_data = pose_data[np.isfinite(pose_data[:, 3]), :]
self.pose_mean += np.sum(pose_data, axis=0)
num_summed += pose_data.shape[0]
self.pose_mean = self.pose_mean / num_summed
# Compute std.
self.logger.info("Computing pose std")
for k, i in enumerate(random_file_indices):
if k % self.preproc_log_frequency == 0:
self.logger.info(
"Adding file {} of {} to pose std estimate".format(
k + 1, random_file_indices.shape[0]))
pose_data = self.dataset.tensor(self.pose_field_name,
i).arr
train_indices = self.train_index_map[i]
if self.gripper_mode == GripperMode.SUCTION:
rand_indices = np.random.choice(
pose_data.shape[0],
size=pose_data.shape[0] // 2,
replace=False)
pose_data[rand_indices,
4] = -pose_data[rand_indices, 4]
elif self.gripper_mode == GripperMode.LEGACY_SUCTION:
rand_indices = np.random.choice(
pose_data.shape[0],
size=pose_data.shape[0] // 2,
replace=False)
pose_data[rand_indices,
3] = -pose_data[rand_indices, 3]
if train_indices.shape[0] > 0:
pose_data = pose_data[train_indices, :]
pose_data = pose_data[np.isfinite(pose_data[:, 3]), :]
self.pose_std += np.sum(
(pose_data - self.pose_mean)**2, axis=0)
self.pose_std = np.sqrt(self.pose_std / num_summed)
self.pose_std[self.pose_std == 0] = 1.0
# Save.
self.pose_mean = read_pose_data(self.pose_mean,
self.gripper_mode)
self.pose_std = read_pose_data(self.pose_std,
self.gripper_mode)
np.save(pose_mean_filename, self.pose_mean)
np.save(pose_std_filename, self.pose_std)
# Update GQ-CNN instance.
self.gqcnn.set_pose_mean(self.pose_mean)
self.gqcnn.set_pose_std(self.pose_std)
# Check for invalid values.
if np.any(np.isnan(self.pose_mean)) or np.any(
np.isnan(self.pose_std)):
self.logger.error(
"Pose mean or pose std is NaN! Check the input dataset")
exit(0)
elif self.gqcnn.input_depth_mode == InputDepthMode.SUB:
# Compute (image - depth) stats.
im_depth_sub_mean_filename = os.path.join(
self.model_dir, GQCNNFilenames.IM_DEPTH_SUB_MEAN)
im_depth_sub_std_filename = os.path.join(
self.model_dir, GQCNNFilenames.IM_DEPTH_SUB_STD)
if os.path.exists(im_depth_sub_mean_filename) and os.path.exists(
im_depth_sub_std_filename):
self.im_depth_sub_mean = np.load(im_depth_sub_mean_filename)
self.im_depth_sub_std = np.load(im_depth_sub_std_filename)
else:
self.im_depth_sub_mean = 0
self.im_depth_sub_std = 0
# Compute mean.
self.logger.info("Computing (image - depth) mean.")
num_summed = 0
for k, i in enumerate(random_file_indices):
if k % self.preproc_log_frequency == 0:
self.logger.info("Adding file {} of {}...".format(
k + 1, random_file_indices.shape[0]))
im_data = self.dataset.tensor(self.im_field_name, i).arr
depth_data = read_pose_data(
self.dataset.tensor(self.pose_field_name, i).arr,
self.gripper_mode)
sub_data = im_data - np.tile(
np.reshape(depth_data, (-1, 1, 1, 1)),
(1, im_data.shape[1], im_data.shape[2], 1))
train_indices = self.train_index_map[i]
if train_indices.shape[0] > 0:
self.im_depth_sub_mean += np.sum(
sub_data[train_indices, ...])
num_summed += self.train_index_map[i].shape[
0] * im_data.shape[1] * im_data.shape[2]
self.im_depth_sub_mean = self.im_depth_sub_mean / num_summed
# Compute std.
self.logger.info("Computing (image - depth) std.")
for k, i in enumerate(random_file_indices):
if k % self.preproc_log_frequency == 0:
self.logger.info("Adding file {} of {}...".format(
k + 1, random_file_indices.shape[0]))
im_data = self.dataset.tensor(self.im_field_name, i).arr
depth_data = read_pose_data(
self.dataset.tensor(self.pose_field_name, i).arr,
self.gripper_mode)
sub_data = im_data - np.tile(
np.reshape(depth_data, (-1, 1, 1, 1)),
(1, im_data.shape[1], im_data.shape[2], 1))
train_indices = self.train_index_map[i]
if train_indices.shape[0] > 0:
self.im_depth_sub_std += np.sum(
(sub_data[train_indices, ...] -
self.im_depth_sub_mean)**2)
self.im_depth_sub_std = np.sqrt(self.im_depth_sub_std /
num_summed)
# Save.
np.save(im_depth_sub_mean_filename, self.im_depth_sub_mean)
np.save(im_depth_sub_std_filename, self.im_depth_sub_std)
# Update GQ-CNN instance.
self.gqcnn.set_im_depth_sub_mean(self.im_depth_sub_mean)
self.gqcnn.set_im_depth_sub_std(self.im_depth_sub_std)
elif self.gqcnn.input_depth_mode == InputDepthMode.IM_ONLY:
# Compute image stats.
im_mean_filename = os.path.join(self.model_dir,
GQCNNFilenames.IM_MEAN)
im_std_filename = os.path.join(self.model_dir,
GQCNNFilenames.IM_STD)
if os.path.exists(im_mean_filename) and os.path.exists(
im_std_filename):
self.im_mean = np.load(im_mean_filename)
self.im_std = np.load(im_std_filename)
else:
self.im_mean = 0
self.im_std = 0
# Compute mean.
self.logger.info("Computing image mean.")
num_summed = 0
for k, i in enumerate(random_file_indices):
if k % self.preproc_log_frequency == 0:
self.logger.info("Adding file {} of {}...".format(
k + 1, random_file_indices.shape[0]))
im_data = self.dataset.tensor(self.im_field_name, i).arr
train_indices = self.train_index_map[i]
if train_indices.shape[0] > 0:
self.im_mean += np.sum(im_data[train_indices, ...])
num_summed += self.train_index_map[i].shape[
0] * im_data.shape[1] * im_data.shape[2]
self.im_mean = self.im_mean / num_summed
# Compute std.
self.logger.info("Computing image std.")
for k, i in enumerate(random_file_indices):
if k % self.preproc_log_frequency == 0:
self.logger.info("Adding file {} of {}...".format(
k + 1, random_file_indices.shape[0]))
im_data = self.dataset.tensor(self.im_field_name, i).arr
train_indices = self.train_index_map[i]
if train_indices.shape[0] > 0:
self.im_std += np.sum(
(im_data[train_indices, ...] - self.im_mean)**2)
self.im_std = np.sqrt(self.im_std / num_summed)
# Save.
np.save(im_mean_filename, self.im_mean)
np.save(im_std_filename, self.im_std)
# Update GQ-CNN instance.
self.gqcnn.set_im_mean(self.im_mean)
self.gqcnn.set_im_std(self.im_std)
# Compute normalization parameters of the network.
pct_pos_train_filename = os.path.join(self.model_dir,
GQCNNFilenames.PCT_POS_TRAIN)
pct_pos_val_filename = os.path.join(self.model_dir,
GQCNNFilenames.PCT_POS_VAL)
if os.path.exists(pct_pos_train_filename) and os.path.exists(
pct_pos_val_filename):
pct_pos_train = np.load(pct_pos_train_filename)
pct_pos_val = np.load(pct_pos_val_filename)
else:
self.logger.info("Computing grasp quality metric stats.")
all_train_metrics = None
all_val_metrics = None
# Read metrics.
for k, i in enumerate(random_file_indices):
if k % self.preproc_log_frequency == 0:
self.logger.info("Adding file {} of {}...".format(
k + 1, random_file_indices.shape[0]))
metric_data = self.dataset.tensor(self.label_field_name, i).arr
train_indices = self.train_index_map[i]
val_indices = self.val_index_map[i]
if train_indices.shape[0] > 0:
train_metric_data = metric_data[train_indices]
if all_train_metrics is None:
all_train_metrics = train_metric_data
else:
all_train_metrics = np.r_[all_train_metrics,
train_metric_data]
if val_indices.shape[0] > 0:
val_metric_data = metric_data[val_indices]
if all_val_metrics is None:
all_val_metrics = val_metric_data
else:
all_val_metrics = np.r_[all_val_metrics,
val_metric_data]
# Compute train stats.
self.min_metric = np.min(all_train_metrics)
self.max_metric = np.max(all_train_metrics)
self.mean_metric = np.mean(all_train_metrics)
self.median_metric = np.median(all_train_metrics)
# Save metrics.
pct_pos_train = np.sum(all_train_metrics > self.metric_thresh
) / all_train_metrics.shape[0]
np.save(pct_pos_train_filename, np.array(pct_pos_train))
if self.train_pct < 1.0:
pct_pos_val = np.sum(all_val_metrics > self.metric_thresh
) / all_val_metrics.shape[0]
np.save(pct_pos_val_filename, np.array(pct_pos_val))
self.logger.info("Percent positive in train: " + str(pct_pos_train))
if self.train_pct < 1.0:
self.logger.info("Percent positive in val: " + str(pct_pos_val))
if self._angular_bins > 0:
self.logger.info("Calculating angular bin statistics...")
bin_counts = np.zeros((self._angular_bins, ))
for m in range(self.num_tensors):
pose_arr = self.dataset.tensor(self.pose_field_name, m).arr
angles = pose_arr[:, 3]
neg_ind = np.where(angles < 0)
angles = np.abs(angles) % self._max_angle
angles[neg_ind] *= -1
g_90 = np.where(angles > (self._max_angle / 2))
l_neg_90 = np.where(angles < (-1 * (self._max_angle / 2)))
angles[g_90] -= self._max_angle
angles[l_neg_90] += self._max_angle
# TODO(vsatish): Actually fix this.
angles *= -1 # Hack to fix reverse angle convention.
angles += (self._max_angle / 2)
for i in range(angles.shape[0]):
bin_counts[int(angles[i] // self._bin_width)] += 1
self.logger.info("Bin counts: {}.".format(bin_counts))
def _compute_split_indices(self):
"""Compute train and validation indices for each tensor to speed data
accesses."""
# Read indices.
train_indices, val_indices, _ = self.dataset.split(self.split_name)
# Loop through tensors, assigning indices to each file.
self.train_index_map = {}
for i in range(self.dataset.num_tensors):
self.train_index_map[i] = []
for i in train_indices:
tensor_index = self.dataset.tensor_index(i)
datapoint_indices = self.dataset.datapoint_indices_for_tensor(
tensor_index)
lowest = np.min(datapoint_indices)
self.train_index_map[tensor_index].append(i - lowest)
for i, indices in self.train_index_map.items():
self.train_index_map[i] = np.array(indices)
self.val_index_map = {}
for i in range(self.dataset.num_tensors):
self.val_index_map[i] = []
for i in val_indices:
tensor_index = self.dataset.tensor_index(i)
if tensor_index not in self.val_index_map:
self.val_index_map[tensor_index] = []
datapoint_indices = self.dataset.datapoint_indices_for_tensor(
tensor_index)
lowest = np.min(datapoint_indices)
self.val_index_map[tensor_index].append(i - lowest)
for i, indices in self.val_index_map.items():
self.val_index_map[i] = np.array(indices)
def _setup_output_dirs(self):
"""Setup output directories."""
self.logger.info("Saving model to: {}".format(self.model_dir))
# Create the summary dir.
self.summary_dir = os.path.join(self.model_dir,
"tensorboard_summaries")
if not os.path.exists(self.summary_dir):
os.mkdir(self.summary_dir)
else:
# If the summary directory already exists, clean it out by deleting
# all files in it. We don't want Tensorboard to get confused with
# old logs while reusing the same directory.
old_files = os.listdir(self.summary_dir)
for f in old_files:
os.remove(os.path.join(self.summary_dir, f))
# Setup filter directory.
self.filter_dir = os.path.join(self.model_dir, "filters")
if not os.path.exists(self.filter_dir):
os.mkdir(self.filter_dir)
def _save_configs(self):
"""Save training configuration."""
# Update config for fine-tuning.
if self.finetuning:
self.cfg["base_model_dir"] = self.base_model_dir
# Save config.
out_config_filename = os.path.join(self.model_dir,
GQCNNFilenames.SAVED_CFG)
tempOrderedDict = collections.OrderedDict()
for key in self.cfg:
tempOrderedDict[key] = self.cfg[key]
with open(out_config_filename, "w") as outfile:
json.dump(tempOrderedDict, outfile, indent=JSON_INDENT)
# Save training script.
this_filename = sys.argv[0]
out_train_filename = os.path.join(self.model_dir, "training_script.py")
shutil.copyfile(this_filename, out_train_filename)
# Save architecture.
out_architecture_filename = os.path.join(self.model_dir,
GQCNNFilenames.SAVED_ARCH)
json.dump(self.cfg["gqcnn"]["architecture"],
open(out_architecture_filename, "w"),
indent=JSON_INDENT)
def _read_training_params(self):
"""Read training parameters from configuration file."""
# Splits.
self.train_pct = self.cfg["train_pct"]
self.total_pct = self.cfg["total_pct"]
# Training sizes.
self.train_batch_size = self.cfg["train_batch_size"]
self.val_batch_size = self.cfg["val_batch_size"]
self.max_files_eval = None
if "max_files_eval" in self.cfg:
self.max_files_eval = self.cfg["max_files_eval"]
# Logging.
self.num_epochs = self.cfg["num_epochs"]
self.eval_frequency = self.cfg["eval_frequency"]
self.save_frequency = self.cfg["save_frequency"]
self.log_frequency = self.cfg["log_frequency"]
# Optimization.
self.train_l2_regularizer = self.cfg["train_l2_regularizer"]
self.base_lr = self.cfg["base_lr"]
self.decay_step_multiplier = self.cfg["decay_step_multiplier"]
self.decay_rate = self.cfg["decay_rate"]
self.momentum_rate = self.cfg["momentum_rate"]
self.max_training_examples_per_load = self.cfg[
"max_training_examples_per_load"]
self.drop_rate = self.cfg["drop_rate"]
self.max_global_grad_norm = self.cfg["max_global_grad_norm"]
self.optimize_base_layers = False
if "optimize_base_layers" in self.cfg:
self.optimize_base_layers = self.cfg["optimize_base_layers"]
# Metrics.
self.target_metric_name = self.cfg["target_metric_name"]
self.metric_thresh = self.cfg["metric_thresh"]
self.training_mode = self.cfg["training_mode"]
if self.training_mode != TrainingMode.CLASSIFICATION:
raise ValueError(
"Training mode '{}' not currently supported!".format(
self.training_mode))
# Tensorboad.
self._tensorboard_port = self.cfg["tensorboard_port"]
# Preprocessing.
self.preproc_log_frequency = self.cfg["preproc_log_frequency"]
self.num_random_files = self.cfg["num_random_files"]
self.max_prefetch_q_size = GeneralConstants.MAX_PREFETCH_Q_SIZE
if "max_prefetch_q_size" in self.cfg:
self.max_prefetch_q_size = self.cfg["max_prefetch_q_size"]
self.num_prefetch_q_workers = GeneralConstants.NUM_PREFETCH_Q_WORKERS
if "num_prefetch_q_workers" in self.cfg:
self.num_prefetch_q_workers = self.cfg["num_prefetch_q_workers"]
# Re-weighting positives/negatives.
self.pos_weight = 0.0
if "pos_weight" in self.cfg:
self.pos_weight = self.cfg["pos_weight"]
self.pos_accept_prob = 1.0
self.neg_accept_prob = 1.0
if self.pos_weight > 1:
self.neg_accept_prob = 1 / self.pos_weight
else:
self.pos_accept_prob = self.pos_weight
if self.train_pct < 0 or self.train_pct > 1:
raise ValueError("Train percentage must be in range [0,1]")
if self.total_pct < 0 or self.total_pct > 1:
raise ValueError("Total percentage must be in range [0,1]")
# Input normalization.
self._norm_inputs = True
if self.gqcnn.input_depth_mode == InputDepthMode.SUB:
self._norm_inputs = False
# Angular training.
self._angular_bins = self.gqcnn.angular_bins
self._max_angle = self.gqcnn.max_angle
# During angular training, make sure symmetrization in denoising is
# turned off and also set the angular bin width.
if self._angular_bins > 0:
symmetrization_msg = ("Symmetrization denoising must be turned off"
" during angular training.")
assert not self.cfg["symmetrize"], symmetrization_msg
self._bin_width = self._max_angle / self._angular_bins
# Debugging.
self._debug = self.cfg["debug"]
self._seed = self.cfg["seed"]
if self._debug:
if self.num_prefetch_q_workers > 1:
self.logger.warning(
"Deterministic execution is not possible with "
"more than one prefetch queue worker even in debug mode.")
# This reduces initialization time for fast debugging.
self.num_random_files = self.cfg["debug_num_files"]
np.random.seed(self._seed)
random.seed(self._seed)
def _setup_denoising_and_synthetic(self):
"""Setup denoising and synthetic data parameters."""
# Multiplicative denoising.
if self.cfg["multiplicative_denoising"]:
self.gamma_shape = self.cfg["gamma_shape"]
self.gamma_scale = 1 / self.gamma_shape
# Gaussian process noise.
if self.cfg["gaussian_process_denoising"]:
self.gp_rescale_factor = self.cfg[
"gaussian_process_scaling_factor"]
self.gp_sample_height = int(self.im_height /
self.gp_rescale_factor)
self.gp_sample_width = int(self.im_width / self.gp_rescale_factor)
self.gp_num_pix = self.gp_sample_height * self.gp_sample_width
self.gp_sigma = self.cfg["gaussian_process_sigma"]
def _open_dataset(self):
"""Open the dataset."""
# Read in filenames of training data (poses, images, labels).
self.dataset = TensorDataset.open(self.dataset_dir)
self.num_datapoints = self.dataset.num_datapoints
self.num_tensors = self.dataset.num_tensors
self.datapoints_per_file = self.dataset.datapoints_per_file
self.num_random_files = min(self.num_tensors, self.num_random_files)
# Read split.
if not self.dataset.has_split(self.split_name):
self.logger.info(
"Dataset split: {} not found. Creating new split...".format(
self.split_name))
self.dataset.make_split(self.split_name, train_pct=self.train_pct)
else:
self.logger.info("Training split: {} found in dataset.".format(
self.split_name))
self._compute_split_indices()
def _compute_data_params(self):
"""Compute parameters of the dataset."""
# Image params.
self.im_field_name = self.cfg["image_field_name"]
self.im_height = self.dataset.config["fields"][
self.im_field_name]["height"]
self.im_width = self.dataset.config["fields"][
self.im_field_name]["width"]
self.im_channels = self.dataset.config["fields"][
self.im_field_name]["channels"]
# NOTE: There was originally some weird math going on here...
self.im_center = np.array([self.im_height // 2, self.im_width // 2])
# Poses.
self.pose_field_name = self.cfg["pose_field_name"]
self.gripper_mode = self.gqcnn.gripper_mode
self.pose_dim = pose_dim(self.gripper_mode)
self.raw_pose_shape = self.dataset.config["fields"][
self.pose_field_name]["height"]
# Outputs.
self.label_field_name = self.target_metric_name
self.num_categories = 2
# Compute the number of train and val examples.
self.num_train = 0
self.num_val = 0
for train_indices in self.train_index_map.values():
self.num_train += train_indices.shape[0]
for val_indices in self.train_index_map.values():
self.num_val += val_indices.shape[0]
# Set params based on the number of training examples (convert epochs
# to steps).
self.eval_frequency = int(
np.ceil(self.eval_frequency *
(self.num_train / self.train_batch_size)))
self.save_frequency = int(
np.ceil(self.save_frequency *
(self.num_train / self.train_batch_size)))
self.decay_step = self.decay_step_multiplier * self.num_train
def _setup_tensorflow(self):
"""Setup Tensorflow placeholders, session, and queue."""
# Set up training label and NumPy data types.
if self.training_mode == TrainingMode.REGRESSION:
train_label_dtype = tf.float32
self.numpy_dtype = np.float32
elif self.training_mode == TrainingMode.CLASSIFICATION:
train_label_dtype = tf.int64
self.numpy_dtype = np.int64
if self.cfg["loss"] == "weighted_cross_entropy":
train_label_dtype = tf.float32
self.numpy_dtype = np.float32
else:
raise ValueError("Training mode '{}' not supported".format(
self.training_mode))
# Set up placeholders.
self.train_labels_node = tf.placeholder(train_label_dtype,
(self.train_batch_size, ))
self.input_im_node = tf.placeholder(
tf.float32, (self.train_batch_size, self.im_height, self.im_width,
self.im_channels))
self.input_pose_node = tf.placeholder(
tf.float32, (self.train_batch_size, self.pose_dim))
if self._angular_bins > 0:
self.train_pred_mask_node = tf.placeholder(
tf.int32, (self.train_batch_size, self._angular_bins * 2))
# Create data prefetch queue.
self.prefetch_q = mp.Queue(self.max_prefetch_q_size)
# Get weights.
self.weights = self.gqcnn.weights
# Open a TF session for the GQ-CNN instance and store it also as the
# optimizer session.
self.sess = self.gqcnn.open_session()
# Setup data prefetch queue worker termination event.
self.term_event = mp.Event()
self.term_event.clear()
def _setup_summaries(self):
"""Sets up placeholders for summary values and creates summary
writer."""
# Create placeholders for Python values because `tf.summary.scalar`
# expects a placeholder.
self.val_error_placeholder = tf.placeholder(tf.float32, [])
self.minibatch_error_placeholder = tf.placeholder(tf.float32, [])
self.minibatch_loss_placeholder = tf.placeholder(tf.float32, [])
self.learning_rate_placeholder = tf.placeholder(tf.float32, [])
# Tag the `tf.summary.scalar`s so that we can group them together and
# write different batches of summaries at different intervals.
tf.summary.scalar("val_error",
self.val_error_placeholder,
collections=["eval_frequency"])
tf.summary.scalar("minibatch_error",
self.minibatch_error_placeholder,
collections=["log_frequency"])
tf.summary.scalar("minibatch_loss",
self.minibatch_loss_placeholder,
collections=["log_frequency"])
tf.summary.scalar("learning_rate",
self.learning_rate_placeholder,
collections=["log_frequency"])
self.merged_eval_summaries = tf.summary.merge_all("eval_frequency")
self.merged_log_summaries = tf.summary.merge_all("log_frequency")
# Create a TF summary writer with the specified summary directory.
self.summary_writer = tf.summary.FileWriter(self.summary_dir)
# Initialize the variables again now that we have added some new ones.
with self.sess.as_default():
tf.global_variables_initializer().run()
def _cleanup(self):
self.logger.info("Cleaning and preparing to exit optimization...")
# Set termination event for prefetch queue workers.
self.logger.info("Terminating prefetch queue workers...")
self.term_event.set()
# Flush prefetch queue.
# NOTE: This prevents a deadlock with the worker process queue buffers.
self._flush_prefetch_queue()
# Join prefetch queue worker processes.
for p in self.prefetch_q_workers:
p.join()
# Close tensorboard if started.
if self.tensorboard_has_launched:
self._close_tensorboard()
# Close Tensorflow session.
self.gqcnn.close_session()
def _flush_prefetch_queue(self):
"""Flush prefetch queue."""
self.logger.info("Flushing prefetch queue...")
for i in range(self.prefetch_q.qsize()):
self.prefetch_q.get()
def _setup(self):
"""Setup for training."""
# Initialize data prefetch queue thread exit booleans.
self.queue_thread_exited = False
self.forceful_exit = False
# Setup output directories.
self._setup_output_dirs()
# Save training configuration.
self._save_configs()
# Read training parameters from config file.
self._read_training_params()
# Setup image and pose data files.
self._open_dataset()
# Compute data parameters.
self._compute_data_params()
# Setup denoising and synthetic data parameters.
self._setup_denoising_and_synthetic()
# Compute means, std's, and normalization metrics.
self._compute_data_metrics()
# Setup Tensorflow session/placeholders/queue.
self._setup_tensorflow()
# Setup summaries for visualizing metrics in Tensorboard.
self._setup_summaries()
def _load_and_enqueue(self, seed):
"""Loads and enqueues a batch of images for training."""
# When the parent process receives a SIGINT, it will itself handle
# cleaning up child processes.
signal.signal(signal.SIGINT, signal.SIG_IGN)
# Set the random seed explicitly to prevent all workers from possible
# inheriting the same seed on process initialization.
np.random.seed(seed)
random.seed(seed)
# Open dataset.
dataset = TensorDataset.open(self.dataset_dir)
while not self.term_event.is_set():
# Loop through data.
num_queued = 0
start_i = 0
end_i = 0
file_num = 0
queue_start = time.time()
# Init buffers.
train_images = np.zeros([
self.train_batch_size, self.im_height, self.im_width,
self.im_channels
]).astype(np.float32)
train_poses = np.zeros([self.train_batch_size,
self.pose_dim]).astype(np.float32)
train_labels = np.zeros(self.train_batch_size).astype(
self.numpy_dtype)
if self._angular_bins > 0:
train_pred_mask = np.zeros(
(self.train_batch_size, self._angular_bins * 2),
dtype=bool)
while start_i < self.train_batch_size:
# Compute num remaining.
num_remaining = self.train_batch_size - num_queued
# Generate tensor index uniformly at random.
file_num = np.random.choice(self.num_tensors, size=1)[0]
read_start = time.time()
train_images_tensor = dataset.tensor(self.im_field_name,
file_num)
train_poses_tensor = dataset.tensor(self.pose_field_name,
file_num)
train_labels_tensor = dataset.tensor(self.label_field_name,
file_num)
read_stop = time.time()
self.logger.debug("Reading data took {} sec".format(
str(round(read_stop - read_start, 3))))
self.logger.debug("File num: {}".format(file_num))
# Get batch indices uniformly at random.
train_ind = self.train_index_map[file_num]
np.random.shuffle(train_ind)
if self.gripper_mode == GripperMode.LEGACY_SUCTION:
tp_tmp = read_pose_data(train_poses_tensor.data,
self.gripper_mode)
train_ind = train_ind[np.isfinite(tp_tmp[train_ind, 1])]
# Filter positives and negatives.
if self.training_mode == TrainingMode.CLASSIFICATION and \
self.pos_weight != 0.0:
labels = 1 * (train_labels_tensor.arr > self.metric_thresh)
np.random.shuffle(train_ind)
filtered_ind = []
for index in train_ind:
if labels[index] == 0 and np.random.rand(
) < self.neg_accept_prob:
filtered_ind.append(index)
elif labels[index] == 1 and np.random.rand(
) < self.pos_accept_prob:
filtered_ind.append(index)
train_ind = np.array(filtered_ind)
# Samples train indices.
upper = min(num_remaining, train_ind.shape[0],
self.max_training_examples_per_load)
ind = train_ind[:upper]
num_loaded = ind.shape[0]
if num_loaded == 0:
self.logger.warning("Queueing zero examples!!!!")
continue
# Subsample data.
train_images_arr = train_images_tensor.arr[ind, ...]
train_poses_arr = train_poses_tensor.arr[ind, ...]
angles = train_poses_arr[:, 3]
train_label_arr = train_labels_tensor.arr[ind]
num_images = train_images_arr.shape[0]
# Resize images.
rescale_factor = self.im_height / train_images_arr.shape[1]
if rescale_factor != 1.0:
resized_train_images_arr = np.zeros([
num_images, self.im_height, self.im_width,
self.im_channels
]).astype(np.float32)
for i in range(num_images):
for c in range(train_images_arr.shape[3]):
resized_train_images_arr[i, :, :, c] = imresize(
train_images_arr[i, :, :, c],
rescale_factor,
interp="bicubic")
train_images_arr = resized_train_images_arr
# Add noises to images.
train_images_arr, train_poses_arr = self._distort(
train_images_arr, train_poses_arr)
# Slice poses.
train_poses_arr = read_pose_data(train_poses_arr,
self.gripper_mode)
# Standardize inputs and outputs.
if self._norm_inputs:
train_images_arr = (train_images_arr -
self.im_mean) / self.im_std
if self.gqcnn.input_depth_mode == \
InputDepthMode.POSE_STREAM:
train_poses_arr = (train_poses_arr -
self.pose_mean) / self.pose_std
train_label_arr = 1 * (train_label_arr > self.metric_thresh)
train_label_arr = train_label_arr.astype(self.numpy_dtype)
if self._angular_bins > 0:
bins = np.zeros_like(train_label_arr)
# Form prediction mask to use when calculating loss.
neg_ind = np.where(angles < 0)
angles = np.abs(angles) % self._max_angle
angles[neg_ind] *= -1
g_90 = np.where(angles > (self._max_angle / 2))
l_neg_90 = np.where(angles < (-1 * (self._max_angle / 2)))
angles[g_90] -= self._max_angle
angles[l_neg_90] += self._max_angle
# TODO(vsatish): Actually fix this.
angles *= -1 # Hack to fix reverse angle convention.
angles += (self._max_angle / 2)
train_pred_mask_arr = np.zeros(
(train_label_arr.shape[0], self._angular_bins * 2))
for i in range(angles.shape[0]):
bins[i] = angles[i] // self._bin_width
train_pred_mask_arr[i,
int((angles[i] //
self._bin_width) * 2)] = 1
train_pred_mask_arr[
i, int((angles[i] // self._bin_width) * 2 + 1)] = 1
# Compute the number of examples loaded.
num_loaded = train_images_arr.shape[0]
end_i = start_i + num_loaded
# Enqueue training data batch.
train_images[start_i:end_i, ...] = train_images_arr.copy()
train_poses[start_i:end_i, :] = train_poses_arr.copy()
train_labels[start_i:end_i] = train_label_arr.copy()
if self._angular_bins > 0:
train_pred_mask[start_i:end_i] = train_pred_mask_arr.copy()
# Update start index.
start_i = end_i
num_queued += num_loaded
# Send data to queue.
if not self.term_event.is_set():
try:
if self._angular_bins > 0:
self.prefetch_q.put_nowait(
(train_images, train_poses, train_labels,
train_pred_mask))
else:
self.prefetch_q.put_nowait(
(train_images, train_poses, train_labels))
except Queue.Full:
time.sleep(GeneralConstants.QUEUE_SLEEP)
queue_stop = time.time()
self.logger.debug("Queue batch took {} sec".format(
str(round(queue_stop - queue_start, 3))))
def _distort(self, image_arr, pose_arr):
"""Adds noise to a batch of images."""
# Read params.
num_images = image_arr.shape[0]
# Denoising and synthetic data generation.
if self.cfg["multiplicative_denoising"]:
mult_samples = ss.gamma.rvs(self.gamma_shape,
scale=self.gamma_scale,
size=num_images)
mult_samples = mult_samples[:, np.newaxis, np.newaxis, np.newaxis]
image_arr = image_arr * np.tile(
mult_samples,
[1, self.im_height, self.im_width, self.im_channels])
# Add correlated Gaussian noise.
if self.cfg["gaussian_process_denoising"]:
for i in range(num_images):
if np.random.rand() < self.cfg["gaussian_process_rate"]:
train_image = image_arr[i, :, :, 0]
gp_noise = ss.norm.rvs(scale=self.gp_sigma,
size=self.gp_num_pix).reshape(
self.gp_sample_height,
self.gp_sample_width)
gp_noise = imresize(gp_noise,
self.gp_rescale_factor,
interp="bicubic")
train_image[train_image > 0] += gp_noise[train_image > 0]
image_arr[i, :, :, 0] = train_image
# Symmetrize images.
if self.cfg["symmetrize"]:
for i in range(num_images):
train_image = image_arr[i, :, :, 0]
# Rotate with 50% probability.
if np.random.rand() < 0.5:
theta = 180.0
rot_map = cv2.getRotationMatrix2D(tuple(self.im_center),
theta, 1)
train_image = cv2.warpAffine(
train_image,
rot_map, (self.im_height, self.im_width),
flags=cv2.INTER_NEAREST)
if self.gripper_mode == GripperMode.LEGACY_SUCTION:
pose_arr[:, 3] = -pose_arr[:, 3]
elif self.gripper_mode == GripperMode.SUCTION:
pose_arr[:, 4] = -pose_arr[:, 4]
# Reflect left right with 50% probability.
if np.random.rand() < 0.5:
train_image = np.fliplr(train_image)
# Reflect up down with 50% probability.
if np.random.rand() < 0.5:
train_image = np.flipud(train_image)
if self.gripper_mode == GripperMode.LEGACY_SUCTION:
pose_arr[:, 3] = -pose_arr[:, 3]
elif self.gripper_mode == GripperMode.SUCTION:
pose_arr[:, 4] = -pose_arr[:, 4]
image_arr[i, :, :, 0] = train_image
return image_arr, pose_arr
def _error_rate_in_batches(self, num_files_eval=None, validation_set=True):
"""Compute error and loss over either training or validation set.
Returns
-------
:obj:"autolab_core.BinaryClassificationResult`
validation error
"""
all_predictions = []
all_labels = []
# Subsample files.
file_indices = np.arange(self.num_tensors)
if num_files_eval is None:
num_files_eval = self.max_files_eval
np.random.shuffle(file_indices)
if self.max_files_eval is not None and num_files_eval > 0:
file_indices = file_indices[:num_files_eval]
for i in file_indices:
# Load next file.
images = self.dataset.tensor(self.im_field_name, i).arr
poses = self.dataset.tensor(self.pose_field_name, i).arr
raw_poses = np.array(poses, copy=True)
labels = self.dataset.tensor(self.label_field_name, i).arr
# If no datapoints from this file are in validation then just
# continue.
if validation_set:
indices = self.val_index_map[i]
else:
indices = self.train_index_map[i]
if len(indices) == 0:
continue
images = images[indices, ...]
poses = read_pose_data(poses[indices, :], self.gripper_mode)
raw_poses = raw_poses[indices, :]
labels = labels[indices]
if self.training_mode == TrainingMode.CLASSIFICATION:
labels = 1 * (labels > self.metric_thresh)
labels = labels.astype(np.uint8)
if self._angular_bins > 0:
# Form mask to extract predictions from ground-truth angular
# bins.
angles = raw_poses[:, 3]
neg_ind = np.where(angles < 0)
angles = np.abs(angles) % self._max_angle
angles[neg_ind] *= -1
g_90 = np.where(angles > (self._max_angle / 2))
l_neg_90 = np.where(angles < (-1 * (self._max_angle / 2)))
angles[g_90] -= self._max_angle
angles[l_neg_90] += self._max_angle
# TODO(vsatish): Actually fix this.
angles *= -1 # Hack to fix reverse angle convention.
angles += (self._max_angle / 2)
pred_mask = np.zeros((labels.shape[0], self._angular_bins * 2),
dtype=bool)
for i in range(angles.shape[0]):
pred_mask[i, int(
(angles[i] // self._bin_width) * 2)] = True
pred_mask[i,
int((angles[i] // self._bin_width) * 2 +
1)] = True
# Get predictions.
predictions = self.gqcnn.predict(images, poses)
if self._angular_bins > 0:
predictions = predictions[pred_mask].reshape((-1, 2))
# Update.
all_predictions.extend(predictions[:, 1].tolist())
all_labels.extend(labels.tolist())
# Get learning result.
result = None
if self.training_mode == TrainingMode.CLASSIFICATION:
result = BinaryClassificationResult(all_predictions, all_labels)
else:
result = RegressionResult(all_predictions, all_labels)
return result
================================================
FILE: gqcnn/utils/__init__.py
================================================
# -*- coding: utf-8 -*-
"""
Copyright ©2017. The Regents of the University of California (Regents).
All Rights Reserved. Permission to use, copy, modify, and distribute this
software and its documentation for educational, research, and not-for-profit
purposes, without fee and without a signed licensing agreement, is hereby
granted, provided that the above copyright notice, this paragraph and the
following two paragraphs appear in all copies, modifications, and
distributions. Contact The Office of Technology Licensing, UC Berkeley, 2150
Shattuck Avenue, Suite 510, Berkeley, CA 94720-1620, (510) 643-7201,
otl@berkeley.edu,
http://ipira.berkeley.edu/industry-info for commercial licensing opportunities.
IN NO EVENT SHALL REGENTS BE LIABLE TO ANY PARTY FOR DIRECT, INDIRECT, SPECIAL,
INCIDENTAL, OR CONSEQUENTIAL DAMAGES, INCLUDING LOST PROFITS, ARISING OUT OF
THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN IF REGENTS HAS BEEN
ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
REGENTS SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
PURPOSE. THE SOFTWARE AND ACCOMPANYING DOCUMENTATION, IF ANY, PROVIDED
HEREUNDER IS PROVIDED "AS IS". REGENTS HAS NO OBLIGATION TO PROVIDE
MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
"""
from .enums import (ImageMode, TrainingMode, GripperMode, InputDepthMode,
GeneralConstants, GQCNNTrainingStatus, GQCNNFilenames)
from .policy_exceptions import (NoValidGraspsException,
NoAntipodalPairsFoundException)
from .train_stats_logger import TrainStatsLogger
from .utils import (is_py2, set_cuda_visible_devices, pose_dim, read_pose_data,
reduce_shape, weight_name_to_layer_name, imresize)
__all__ = [
"is_py2", "set_cuda_visible_devices", "pose_dim", "read_pose_data",
"reduce_shape", "weight_name_to_layer_name", "imresize", "ImageMode",
"TrainingMode", "GripperMode", "InputDepthMode", "GeneralConstants",
"GQCNNTrainingStatus", "NoValidGraspsException",
"NoAntipodalPairsFoundException", "TrainStatsLogger", "GQCNNFilenames"
]
================================================
FILE: gqcnn/utils/enums.py
================================================
# -*- coding: utf-8 -*-
"""
Copyright ©2017. The Regents of the University of California (Regents).
All Rights Reserved. Permission to use, copy, modify, and distribute this
software and its documentation for educational, research, and not-for-profit
purposes, without fee and without a signed licensing agreement, is hereby
granted, provided that the above copyright notice, this paragraph and the
following two paragraphs appear in all copies, modifications, and
distributions. Contact The Office of Technology Licensing, UC Berkeley, 2150
Shattuck Avenue, Suite 510, Berkeley, CA 94720-1620, (510) 643-7201,
otl@berkeley.edu,
http://ipira.berkeley.edu/industry-info for commercial licensing opportunities.
IN NO EVENT SHALL REGENTS BE LIABLE TO ANY PARTY FOR DIRECT, INDIRECT, SPECIAL,
INCIDENTAL, OR CONSEQUENTIAL DAMAGES, INCLUDING LOST PROFITS, ARISING OUT OF
THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN IF REGENTS HAS BEEN
ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
REGENTS SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
PURPOSE. THE SOFTWARE AND ACCOMPANYING DOCUMENTATION, IF ANY, PROVIDED
HEREUNDER IS PROVIDED "AS IS". REGENTS HAS NO OBLIGATION TO PROVIDE
MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
Constants/enums.
Author
------
Vishal Satish
"""
import math
import tensorflow as tf
# Other constants.
class GeneralConstants(object):
SEED = 3472134
SEED_SAMPLE_MAX = 2**32 - 1 # Max range for `np.random.seed`.
timeout_option = tf.RunOptions(timeout_in_ms=1000000)
MAX_PREFETCH_Q_SIZE = 250
NUM_PREFETCH_Q_WORKERS = 3
QUEUE_SLEEP = 0.001
PI = math.pi
FIGSIZE = 16 # For visualization.
# Enum for image modalities.
class ImageMode(object):
BINARY = "binary"
DEPTH = "depth"
BINARY_TF = "binary_tf"
COLOR_TF = "color_tf"
GRAY_TF = "gray_tf"
DEPTH_TF = "depth_tf"
DEPTH_TF_TABLE = "depth_tf_table"
TF_DEPTH_IMS = "tf_depth_ims"
# Enum for training modes.
class TrainingMode(object):
CLASSIFICATION = "classification"
REGRESSION = "regression" # Has not been tested, for experimentation only!
# Enum for input pose data formats.
class GripperMode(object):
PARALLEL_JAW = "parallel_jaw"
SUCTION = "suction"
MULTI_SUCTION = "multi_suction"
LEGACY_PARALLEL_JAW = "legacy_parallel_jaw"
LEGACY_SUCTION = "legacy_suction"
# Enum for input depth mode.
class InputDepthMode(object):
POSE_STREAM = "pose_stream"
SUB = "im_depth_sub"
IM_ONLY = "im_only"
# Enum for training status.
class GQCNNTrainingStatus(object):
NOT_STARTED = "not_started"
SETTING_UP = "setting_up"
TRAINING = "training"
# Enum for filenames.
class GQCNNFilenames(object):
PCT_POS_VAL = "pct_pos_val.npy"
PCT_POS_TRAIN = "pct_pos_train.npy"
LEARNING_RATES = "learning_rates.npy"
TRAIN_ITERS = "train_eval_iters.npy"
TRAIN_LOSSES = "train_losses.npy"
TRAIN_ERRORS = "train_errors.npy"
TOTAL_TRAIN_LOSSES = "total_train_losses.npy"
TOTAL_TRAIN_ERRORS = "total_train_errors.npy"
VAL_ITERS = "val_eval_iters.npy"
VAL_LOSSES = "val_losses.npy"
VAL_ERRORS = "val_errors.npy"
LEG_MEAN = "mean.npy"
LEG_STD = "std.npy"
IM_MEAN = "im_mean.npy"
IM_STD = "im_std.npy"
IM_DEPTH_SUB_MEAN = "im_depth_sub_mean.npy"
IM_DEPTH_SUB_STD = "im_depth_sub_std.npy"
POSE_MEAN = "pose_mean.npy"
POSE_STD = "pose_std.npy"
FINAL_MODEL = "model.ckpt"
INTER_MODEL = "model_{}.ckpt"
SAVED_ARCH = "architecture.json"
SAVED_CFG = "config.json"
================================================
FILE: gqcnn/utils/policy_exceptions.py
================================================
# -*- coding: utf-8 -*-
"""
Copyright ©2017. The Regents of the University of California (Regents).
All Rights Reserved. Permission to use, copy, modify, and distribute this
software and its documentation for educational, research, and not-for-profit
purposes, without fee and without a signed licensing agreement, is hereby
granted, provided that the above copyright notice, this paragraph and the
following two paragraphs appear in all copies, modifications, and
distributions. Contact The Office of Technology Licensing, UC Berkeley, 2150
Shattuck Avenue, Suite 510, Berkeley, CA 94720-1620, (510) 643-7201,
otl@berkeley.edu,
http://ipira.berkeley.edu/industry-info for commercial licensing opportunities.
IN NO EVENT SHALL REGENTS BE LIABLE TO ANY PARTY FOR DIRECT, INDIRECT, SPECIAL,
INCIDENTAL, OR CONSEQUENTIAL DAMAGES, INCLUDING LOST PROFITS, ARISING OUT OF
THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN IF REGENTS HAS BEEN
ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
REGENTS SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
PURPOSE. THE SOFTWARE AND ACCOMPANYING DOCUMENTATION, IF ANY, PROVIDED
HEREUNDER IS PROVIDED "AS IS". REGENTS HAS NO OBLIGATION TO PROVIDE
MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
Exceptions that can be thrown by sub-classes of `GraspingPolicy`.
Author
------
Vishal Satish
"""
class NoValidGraspsException(Exception):
"""Exception for when antipodal point pairs can be found in the depth
image but none are valid grasps that can be executed."""
def __init__(self,
in_collision=True,
not_confident=False,
*args,
**kwargs):
self.in_collision = in_collision
self.not_confident = not_confident
Exception.__init__(self, *args, **kwargs)
class NoAntipodalPairsFoundException(Exception):
"""Exception for when no antipodal point pairs can be found in the depth
image."""
pass
================================================
FILE: gqcnn/utils/train_stats_logger.py
================================================
# -*- coding: utf-8 -*-
"""
Copyright ©2017. The Regents of the University of California (Regents).
All Rights Reserved. Permission to use, copy, modify, and distribute this
software and its documentation for educational, research, and not-for-profit
purposes, without fee and without a signed licensing agreement, is hereby
granted, provided that the above copyright notice, this paragraph and the
following two paragraphs appear in all copies, modifications, and
distributions. Contact The Office of Technology Licensing, UC Berkeley, 2150
Shattuck Avenue, Suite 510, Berkeley, CA 94720-1620, (510) 643-7201,
otl@berkeley.edu,
http://ipira.berkeley.edu/industry-info for commercial licensing opportunities.
IN NO EVENT SHALL REGENTS BE LIABLE TO ANY PARTY FOR DIRECT, INDIRECT, SPECIAL,
INCIDENTAL, OR CONSEQUENTIAL DAMAGES, INCLUDING LOST PROFITS, ARISING OUT OF
THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN IF REGENTS HAS BEEN
ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
REGENTS SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
PURPOSE. THE SOFTWARE AND ACCOMPANYING DOCUMENTATION, IF ANY, PROVIDED
HEREUNDER IS PROVIDED "AS IS". REGENTS HAS NO OBLIGATION TO PROVIDE
MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
Handles logging of various training statistics.
Author
------
Vishal Satish
"""
import os
import numpy as np
from .enums import GQCNNFilenames
class TrainStatsLogger(object):
"""Logger for training statistics."""
def __init__(self, experiment_dir):
"""
Parameters
----------
experiment_dir : str
The experiment directory to save statistics to.
"""
self.experiment_dir = experiment_dir
self.train_eval_iters = []
self.train_losses = []
self.train_errors = []
self.total_train_errors = []
self.total_train_losses = []
self.val_eval_iters = []
self.val_losses = []
self.val_errors = []
self.val_losses = []
self.learning_rates = []
def log(self):
"""Flush all of the statistics to the given experiment directory."""
np.save(os.path.join(self.experiment_dir, GQCNNFilenames.TRAIN_ITERS),
self.train_eval_iters)
np.save(os.path.join(self.experiment_dir, GQCNNFilenames.TRAIN_LOSSES),
self.train_losses)
np.save(os.path.join(self.experiment_dir, GQCNNFilenames.TRAIN_ERRORS),
self.train_errors)
np.save(
os.path.join(self.experiment_dir,
GQCNNFilenames.TOTAL_TRAIN_ERRORS),
self.total_train_errors)
np.save(
os.path.join(self.experiment_dir,
GQCNNFilenames.TOTAL_TRAIN_LOSSES),
self.total_train_losses)
np.save(os.path.join(self.experiment_dir, GQCNNFilenames.VAL_ITERS),
self.val_eval_iters)
np.save(os.path.join(self.experiment_dir, GQCNNFilenames.VAL_LOSSES),
self.val_losses)
np.save(os.path.join(self.experiment_dir, GQCNNFilenames.VAL_ERRORS),
self.val_errors)
np.save(
os.path.join(self.experiment_dir, GQCNNFilenames.LEARNING_RATES),
self.learning_rates)
def update(self, **stats):
"""Update training statistics.
Note
----
Any statistic that is `None` in the argument dict will not be updated.
Parameters
----------
stats : dict
Dict of statistics to be updated.
"""
for stat, val in stats.items():
if stat == "train_eval_iter":
if val is not None:
self.train_eval_iters.append(val)
elif stat == "train_loss":
if val is not None:
self.train_losses.append(val)
elif stat == "train_error":
if val is not None:
self.train_errors.append(val)
elif stat == "total_train_error":
if val is not None:
self.total_train_errors.append(val)
elif stat == "total_train_loss":
if val is not None:
self.total_train_losses.append(val)
elif stat == "val_eval_iter":
if val is not None:
self.val_eval_iters.append(val)
elif stat == "val_loss":
if val is not None:
self.val_losses.append(val)
elif stat == "val_error":
if val is not None:
self.val_errors.append(val)
elif stat == "learning_rate":
if val is not None:
self.learning_rates.append(val)
================================================
FILE: gqcnn/utils/utils.py
================================================
# -*- coding: utf-8 -*-
"""
Copyright ©2017. The Regents of the University of California (Regents).
All Rights Reserved. Permission to use, copy, modify, and distribute this
software and its documentation for educational, research, and not-for-profit
purposes, without fee and without a signed licensing agreement, is hereby
granted, provided that the above copyright notice, this paragraph and the
following two paragraphs appear in all copies, modifications, and
distributions. Contact The Office of Technology Licensing, UC Berkeley, 2150
Shattuck Avenue, Suite 510, Berkeley, CA 94720-1620, (510) 643-7201,
otl@berkeley.edu,
http://ipira.berkeley.edu/industry-info for commercial licensing opportunities.
IN NO EVENT SHALL REGENTS BE LIABLE TO ANY PARTY FOR DIRECT, INDIRECT, SPECIAL,
INCIDENTAL, OR CONSEQUENTIAL DAMAGES, INCLUDING LOST PROFITS, ARISING OUT OF
THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN IF REGENTS HAS BEEN
ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
REGENTS SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
PURPOSE. THE SOFTWARE AND ACCOMPANYING DOCUMENTATION, IF ANY, PROVIDED
HEREUNDER IS PROVIDED "AS IS". REGENTS HAS NO OBLIGATION TO PROVIDE
MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
Simple utility functions.
Authors
-------
Jeff Mahler, Vishal Satish, Lucas Manuelli
"""
from functools import reduce
import os
import sys
import numpy as np
import skimage.transform as skt
from autolab_core import Logger
from .enums import GripperMode
# Set up logger.
logger = Logger.get_logger("gqcnn/utils/utils.py")
def is_py2():
return sys.version[0] == "2"
def set_cuda_visible_devices(gpu_list):
"""Sets CUDA_VISIBLE_DEVICES environment variable to only show certain
gpus.
Note
----
If gpu_list is empty does nothing.
Parameters
----------
gpu_list : list
List of gpus to set as visible.
"""
if len(gpu_list) == 0:
return
cuda_visible_devices = ""
for gpu in gpu_list:
cuda_visible_devices += str(gpu) + ","
logger.info(
"Setting CUDA_VISIBLE_DEVICES = {}".format(cuda_visible_devices))
os.environ["CUDA_VISIBLE_DEVICES"] = cuda_visible_devices
def pose_dim(gripper_mode):
"""Returns the dimensions of the pose vector for the given
gripper mode.
Parameters
----------
gripper_mode: :obj:`GripperMode`
Enum for gripper mode, see optimizer_constants.py for all possible
gripper modes.
Returns
-------
:obj:`numpy.ndarray`
Sliced pose_data corresponding to gripper mode.
"""
if gripper_mode == GripperMode.PARALLEL_JAW:
return 1
elif gripper_mode == GripperMode.SUCTION:
return 2
elif gripper_mode == GripperMode.MULTI_SUCTION:
return 1
elif gripper_mode == GripperMode.LEGACY_PARALLEL_JAW:
return 1
elif gripper_mode == GripperMode.LEGACY_SUCTION:
return 2
else:
raise ValueError(
"Gripper mode '{}' not supported.".format(gripper_mode))
def read_pose_data(pose_arr, gripper_mode):
"""Read the pose data and slice it according to the specified gripper mode.
Parameters
----------
pose_arr: :obj:`numpy.ndarray`
Full pose data array read in from file.
gripper_mode: :obj:`GripperMode`
Enum for gripper mode, see optimizer_constants.py for all possible
gripper modes.
Returns
-------
:obj:`numpy.ndarray`
Sliced pose_data corresponding to input data mode.
"""
if gripper_mode == GripperMode.PARALLEL_JAW:
if pose_arr.ndim == 1:
return pose_arr[2:3]
else:
return pose_arr[:, 2:3]
elif gripper_mode == GripperMode.SUCTION:
if pose_arr.ndim == 1:
return np.r_[pose_arr[2], pose_arr[4]]
else:
return np.c_[pose_arr[:, 2], pose_arr[:, 4]]
elif gripper_mode == GripperMode.MULTI_SUCTION:
if pose_arr.ndim == 1:
return pose_arr[2:3]
else:
return pose_arr[:, 2:3]
elif gripper_mode == GripperMode.LEGACY_PARALLEL_JAW:
if pose_arr.ndim == 1:
return pose_arr[2:3]
else:
return pose_arr[:, 2:3]
elif gripper_mode == GripperMode.LEGACY_SUCTION:
if pose_arr.ndim == 1:
return pose_arr[2:4]
else:
return pose_arr[:, 2:4]
else:
raise ValueError(
"Gripper mode '{}' not supported.".format(gripper_mode))
def reduce_shape(shape):
"""Get shape of a layer for flattening."""
shape = [x.value for x in shape[1:]]
f = lambda x, y: 1 if y is None else x * y # noqa: E731
return reduce(f, shape, 1)
def weight_name_to_layer_name(weight_name):
"""Convert the name of weights to the layer name."""
tokens = weight_name.split("_")
type_name = tokens[-1]
# Modern naming convention.
if type_name == "weights" or type_name == "bias":
if len(tokens) >= 3 and tokens[-3] == "input":
return weight_name[:weight_name.rfind("input") - 1]
return weight_name[:weight_name.rfind(type_name) - 1]
# Legacy.
if type_name == "im":
return weight_name[:-4]
if type_name == "pose":
return weight_name[:-6]
return weight_name[:-1]
def imresize(image, size, interp="nearest"):
"""Wrapper over `skimage.transform.resize` to mimic `scipy.misc.imresize`.
Copied from https://github.com/BerkeleyAutomation/autolab_core/blob/master/autolab_core/image.py#L32. # noqa: E501
Since `scipy.misc.imresize` has been removed in version 1.3.*, instead use
`skimage.transform.resize`. The "lanczos" and "cubic" interpolation methods
are not supported by `skimage.transform.resize`, however there is now
"biquadratic", "biquartic", and "biquintic".
Parameters
----------
image : :obj:`numpy.ndarray`
The image to resize.
size : int, float, or tuple
* int - Percentage of current size.
* float - Fraction of current size.
* tuple - Size of the output image.
interp : :obj:`str`, optional
Interpolation to use for re-sizing ("neartest", "bilinear",
"biquadratic", "bicubic", "biquartic", "biquintic"). Default is
"nearest".
Returns
-------
:obj:`np.ndarray`
The resized image.
"""
skt_interp_map = {
"nearest": 0,
"bilinear": 1,
"biquadratic": 2,
"bicubic": 3,
"biquartic": 4,
"biquintic": 5
}
if interp in ("lanczos", "cubic"):
raise ValueError("'lanczos' and 'cubic'"
" interpolation are no longer supported.")
assert interp in skt_interp_map, ("Interpolation '{}' not"
" supported.".format(interp))
if isinstance(size, (tuple, list)):
output_shape = size
elif isinstance(size, (float)):
np_shape = np.asarray(image.shape).astype(np.float32)
np_shape[0:2] *= size
output_shape = tuple(np_shape.astype(int))
elif isinstance(size, (int)):
np_shape = np.asarray(image.shape).astype(np.float32)
np_shape[0:2] *= size / 100.0
output_shape = tuple(np_shape.astype(int))
else:
raise ValueError("Invalid type for size '{}'.".format(type(size)))
return skt.resize(image,
output_shape,
order=skt_interp_map[interp],
anti_aliasing=False,
mode="constant")
================================================
FILE: gqcnn/version.py
================================================
__version__ = "1.3.0"
================================================
FILE: launch/grasp_planning_service.launch
================================================
================================================
FILE: msg/Action.msg
================================================
# Copyright ©2017. The Regents of the University of California (Regents).
# All Rights Reserved. Permission to use, copy, modify, and distribute this
# software and its documentation for educational, research, and not-for-profit
# purposes, without fee and without a signed licensing agreement, is hereby
# granted, provided that the above copyright notice, this paragraph and the
# following two paragraphs appear in all copies, modifications, and
# distributions. Contact The Office of Technology Licensing, UC Berkeley, 2150
# Shattuck Avenue, Suite 510, Berkeley, CA 94720-1620, (510) 643-7201,
# otl@berkeley.edu,
# http://ipira.berkeley.edu/industry-info for commercial licensing opportunities.
# IN NO EVENT SHALL REGENTS BE LIABLE TO ANY PARTY FOR DIRECT, INDIRECT, SPECIAL,
# INCIDENTAL, OR CONSEQUENTIAL DAMAGES, INCLUDING LOST PROFITS, ARISING OUT OF
# THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN IF REGENTS HAS BEEN
# ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
# REGENTS SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
# THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE. THE SOFTWARE AND ACCOMPANYING DOCUMENTATION, IF ANY, PROVIDED
# HEREUNDER IS PROVIDED "AS IS". REGENTS HAS NO OBLIGATION TO PROVIDE
# MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
uint32 width
uint32 height
uint8[] mask_data
string action_type
float32[] action_data
float32 confidence
================================================
FILE: msg/BoundingBox.msg
================================================
# Copyright ©2017. The Regents of the University of California (Regents).
# All Rights Reserved. Permission to use, copy, modify, and distribute this
# software and its documentation for educational, research, and not-for-profit
# purposes, without fee and without a signed licensing agreement, is hereby
# granted, provided that the above copyright notice, this paragraph and the
# following two paragraphs appear in all copies, modifications, and
# distributions. Contact The Office of Technology Licensing, UC Berkeley, 2150
# Shattuck Avenue, Suite 510, Berkeley, CA 94720-1620, (510) 643-7201,
# otl@berkeley.edu,
# http://ipira.berkeley.edu/industry-info for commercial licensing opportunities.
# IN NO EVENT SHALL REGENTS BE LIABLE TO ANY PARTY FOR DIRECT, INDIRECT, SPECIAL,
# INCIDENTAL, OR CONSEQUENTIAL DAMAGES, INCLUDING LOST PROFITS, ARISING OUT OF
# THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN IF REGENTS HAS BEEN
# ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
# REGENTS SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
# THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE. THE SOFTWARE AND ACCOMPANYING DOCUMENTATION, IF ANY, PROVIDED
# HEREUNDER IS PROVIDED "AS IS". REGENTS HAS NO OBLIGATION TO PROVIDE
# MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
float64 minX
float64 minY
float64 maxX
float64 maxY
================================================
FILE: msg/GQCNNGrasp.msg
================================================
# Copyright ©2017. The Regents of the University of California (Regents).
# All Rights Reserved. Permission to use, copy, modify, and distribute this
# software and its documentation for educational, research, and not-for-profit
# purposes, without fee and without a signed licensing agreement, is hereby
# granted, provided that the above copyright notice, this paragraph and the
# following two paragraphs appear in all copies, modifications, and
# distributions. Contact The Office of Technology Licensing, UC Berkeley, 2150
# Shattuck Avenue, Suite 510, Berkeley, CA 94720-1620, (510) 643-7201,
# otl@berkeley.edu,
# http://ipira.berkeley.edu/industry-info for commercial licensing opportunities.
# IN NO EVENT SHALL REGENTS BE LIABLE TO ANY PARTY FOR DIRECT, INDIRECT, SPECIAL,
# INCIDENTAL, OR CONSEQUENTIAL DAMAGES, INCLUDING LOST PROFITS, ARISING OUT OF
# THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN IF REGENTS HAS BEEN
# ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
# REGENTS SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
# THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE. THE SOFTWARE AND ACCOMPANYING DOCUMENTATION, IF ANY, PROVIDED
# HEREUNDER IS PROVIDED "AS IS". REGENTS HAS NO OBLIGATION TO PROVIDE
# MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
geometry_msgs/Pose pose
float64 q_value
uint8 PARALLEL_JAW=0
uint8 SUCTION=1
uint8 grasp_type
float64[2] center_px
float64 angle
float64 depth
sensor_msgs/Image thumbnail
================================================
FILE: msg/Observation.msg
================================================
# Copyright ©2017. The Regents of the University of California (Regents).
# All Rights Reserved. Permission to use, copy, modify, and distribute this
# software and its documentation for educational, research, and not-for-profit
# purposes, without fee and without a signed licensing agreement, is hereby
# granted, provided that the above copyright notice, this paragraph and the
# following two paragraphs appear in all copies, modifications, and
# distributions. Contact The Office of Technology Licensing, UC Berkeley, 2150
# Shattuck Avenue, Suite 510, Berkeley, CA 94720-1620, (510) 643-7201,
# otl@berkeley.edu,
# http://ipira.berkeley.edu/industry-info for commercial licensing opportunities.
# IN NO EVENT SHALL REGENTS BE LIABLE TO ANY PARTY FOR DIRECT, INDIRECT, SPECIAL,
# INCIDENTAL, OR CONSEQUENTIAL DAMAGES, INCLUDING LOST PROFITS, ARISING OUT OF
# THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN IF REGENTS HAS BEEN
# ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
# REGENTS SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
# THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE. THE SOFTWARE AND ACCOMPANYING DOCUMENTATION, IF ANY, PROVIDED
# HEREUNDER IS PROVIDED "AS IS". REGENTS HAS NO OBLIGATION TO PROVIDE
# MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
uint32 width
uint32 height
float32[] image_data
================================================
FILE: package.xml
================================================
gqcnn
1.3.0
ROS package for deploying Grasp Quality Convolutional Neural Networks (GQ-CNNs).
Vishal Satish
Regents
catkin
rospy
rospy
message_generation
message_runtime
================================================
FILE: post-checkout
================================================
#!/bin/sh
#
# An example hook script to prepare a packed repository for use over
# dumb transports.
#
# To enable this hook, rename this file to "post-update".
find . -name "*.pyc" -exec rm '{}' ';'
================================================
FILE: requirements/cpu_requirements.txt
================================================
autolab-core
autolab-perception
visualization
numpy
opencv-python
scipy
matplotlib
tensorflow<=1.15.0
scikit-learn
scikit-image
gputil
psutil
================================================
FILE: requirements/docs_requirements.txt
================================================
sphinx
sphinxcontrib-napoleon
sphinx_rtd_theme
================================================
FILE: requirements/gpu_requirements.txt
================================================
autolab-core
autolab-perception
visualization
numpy
opencv-python
scipy
matplotlib
tensorflow-gpu<=1.15.0
scikit-learn
scikit-image
gputil
psutil
================================================
FILE: ros_nodes/grasp_planner_node.py
================================================
#!/usr/bin/env python
# -*- coding: utf-8 -*-
"""
Copyright ©2017. The Regents of the University of California (Regents).
All Rights Reserved. Permission to use, copy, modify, and distribute this
software and its documentation for educational, research, and not-for-profit
purposes, without fee and without a signed licensing agreement, is hereby
granted, provided that the above copyright notice, this paragraph and the
following two paragraphs appear in all copies, modifications, and
distributions. Contact The Office of Technology Licensing, UC Berkeley, 2150
Shattuck Avenue, Suite 510, Berkeley, CA 94720-1620, (510) 643-7201,
otl@berkeley.edu,
http://ipira.berkeley.edu/industry-info for commercial licensing opportunities.
IN NO EVENT SHALL REGENTS BE LIABLE TO ANY PARTY FOR DIRECT, INDIRECT, SPECIAL,
INCIDENTAL, OR CONSEQUENTIAL DAMAGES, INCLUDING LOST PROFITS, ARISING OUT OF
THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN IF REGENTS HAS BEEN
ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
REGENTS SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
PURPOSE. THE SOFTWARE AND ACCOMPANYING DOCUMENTATION, IF ANY, PROVIDED
HEREUNDER IS PROVIDED "AS IS". REGENTS HAS NO OBLIGATION TO PROVIDE
MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
ROS Server for planning GQ-CNN grasps.
Author
-----
Vishal Satish & Jeff Mahler
"""
import json
import math
import os
import time
from cv_bridge import CvBridge, CvBridgeError
import numpy as np
import rospy
from autolab_core import (YamlConfig, CameraIntrinsics, ColorImage,
DepthImage, BinaryImage, RgbdImage)
from visualization import Visualizer2D as vis
from gqcnn.grasping import (Grasp2D, SuctionPoint2D, RgbdImageState,
RobustGraspingPolicy,
CrossEntropyRobustGraspingPolicy,
FullyConvolutionalGraspingPolicyParallelJaw,
FullyConvolutionalGraspingPolicySuction)
from gqcnn.utils import GripperMode, NoValidGraspsException
from geometry_msgs.msg import PoseStamped
from std_msgs.msg import Header
from gqcnn.srv import (GQCNNGraspPlanner, GQCNNGraspPlannerBoundingBox,
GQCNNGraspPlannerSegmask)
from gqcnn.msg import GQCNNGrasp
class GraspPlanner(object):
def __init__(self, cfg, cv_bridge, grasping_policy, grasp_pose_publisher):
"""
Parameters
----------
cfg : dict
Dictionary of configuration parameters.
cv_bridge: :obj:`CvBridge`
ROS `CvBridge`.
grasping_policy: :obj:`GraspingPolicy`
Grasping policy to use.
grasp_pose_publisher: :obj:`Publisher`
ROS publisher to publish pose of planned grasp for visualization.
"""
self.cfg = cfg
self.cv_bridge = cv_bridge
self.grasping_policy = grasping_policy
self.grasp_pose_publisher = grasp_pose_publisher
# Set minimum input dimensions.
policy_type = "cem"
if "type" in self.cfg["policy"]:
policy_type = self.cfg["policy"]["type"]
fully_conv_policy_types = {"fully_conv_suction", "fully_conv_pj"}
if policy_type in fully_conv_policy_types:
self.min_width = self.cfg["policy"]["gqcnn_recep_w"]
self.min_height = self.cfg["policy"]["gqcnn_recep_h"]
else:
pad = max(
math.ceil(
np.sqrt(2) *
(float(self.cfg["policy"]["metric"]["crop_width"]) / 2)),
math.ceil(
np.sqrt(2) *
(float(self.cfg["policy"]["metric"]["crop_height"]) / 2)))
self.min_width = 2 * pad + self.cfg["policy"]["metric"][
"crop_width"]
self.min_height = 2 * pad + self.cfg["policy"]["metric"][
"crop_height"]
def read_images(self, req):
"""Reads images from a ROS service request.
Parameters
---------
req: :obj:`ROS ServiceRequest`
ROS ServiceRequest for grasp planner service.
"""
# Get the raw depth and color images as ROS `Image` objects.
raw_color = req.color_image
raw_depth = req.depth_image
# Get the raw camera info as ROS `CameraInfo`.
raw_camera_info = req.camera_info
# Wrap the camera info in a BerkeleyAutomation/autolab_core
# `CameraIntrinsics` object.
camera_intr = CameraIntrinsics(
raw_camera_info.header.frame_id, raw_camera_info.K[0],
raw_camera_info.K[4], raw_camera_info.K[2], raw_camera_info.K[5],
raw_camera_info.K[1], raw_camera_info.height,
raw_camera_info.width)
# Create wrapped BerkeleyAutomation/autolab_core RGB and depth images
# by unpacking the ROS images using ROS `CvBridge`
try:
color_im = ColorImage(self.cv_bridge.imgmsg_to_cv2(
raw_color, "rgb8"),
frame=camera_intr.frame)
depth_im = DepthImage(self.cv_bridge.imgmsg_to_cv2(
raw_depth, desired_encoding="passthrough"),
frame=camera_intr.frame)
except CvBridgeError as cv_bridge_exception:
rospy.logerr(cv_bridge_exception)
# Check image sizes.
if color_im.height != depth_im.height or \
color_im.width != depth_im.width:
msg = ("Color image and depth image must be the same shape! Color"
" is %d x %d but depth is %d x %d") % (
color_im.height, color_im.width, depth_im.height,
depth_im.width)
rospy.logerr(msg)
raise rospy.ServiceException(msg)
if (color_im.height < self.min_height
or color_im.width < self.min_width):
msg = ("Color image is too small! Must be at least %d x %d"
" resolution but the requested image is only %d x %d") % (
self.min_height, self.min_width, color_im.height,
color_im.width)
rospy.logerr(msg)
raise rospy.ServiceException(msg)
return color_im, depth_im, camera_intr
def plan_grasp(self, req):
"""Grasp planner request handler.
Parameters
---------
req: :obj:`ROS ServiceRequest`
ROS `ServiceRequest` for grasp planner service.
"""
color_im, depth_im, camera_intr = self.read_images(req)
return self._plan_grasp(color_im, depth_im, camera_intr)
def plan_grasp_bb(self, req):
"""Grasp planner request handler.
Parameters
---------
req: :obj:`ROS ServiceRequest`
`ROS ServiceRequest` for grasp planner service.
"""
color_im, depth_im, camera_intr = self.read_images(req)
return self._plan_grasp(color_im,
depth_im,
camera_intr,
bounding_box=req.bounding_box)
def plan_grasp_segmask(self, req):
"""Grasp planner request handler.
Parameters
---------
req: :obj:`ROS ServiceRequest`
ROS `ServiceRequest` for grasp planner service.
"""
color_im, depth_im, camera_intr = self.read_images(req)
raw_segmask = req.segmask
try:
segmask = BinaryImage(self.cv_bridge.imgmsg_to_cv2(
raw_segmask, desired_encoding="passthrough"),
frame=camera_intr.frame)
except CvBridgeError as cv_bridge_exception:
rospy.logerr(cv_bridge_exception)
if color_im.height != segmask.height or \
color_im.width != segmask.width:
msg = ("Images and segmask must be the same shape! Color image is"
" %d x %d but segmask is %d x %d") % (
color_im.height, color_im.width, segmask.height,
segmask.width)
rospy.logerr(msg)
raise rospy.ServiceException(msg)
return self._plan_grasp(color_im,
depth_im,
camera_intr,
segmask=segmask)
def _plan_grasp(self,
color_im,
depth_im,
camera_intr,
bounding_box=None,
segmask=None):
"""Grasp planner request handler.
Parameters
---------
req: :obj:`ROS ServiceRequest`
ROS `ServiceRequest` for grasp planner service.
"""
rospy.loginfo("Planning Grasp")
# Inpaint images.
color_im = color_im.inpaint(
rescale_factor=self.cfg["inpaint_rescale_factor"])
depth_im = depth_im.inpaint(
rescale_factor=self.cfg["inpaint_rescale_factor"])
# Init segmask.
if segmask is None:
segmask = BinaryImage(255 *
np.ones(depth_im.shape).astype(np.uint8),
frame=color_im.frame)
# Visualize.
if self.cfg["vis"]["color_image"]:
vis.imshow(color_im)
vis.show()
if self.cfg["vis"]["depth_image"]:
vis.imshow(depth_im)
vis.show()
if self.cfg["vis"]["segmask"] and segmask is not None:
vis.imshow(segmask)
vis.show()
# Aggregate color and depth images into a single
# BerkeleyAutomation/autolab_core `RgbdImage`.
rgbd_im = RgbdImage.from_color_and_depth(color_im, depth_im)
# Mask bounding box.
if bounding_box is not None:
# Calc bb parameters.
min_x = bounding_box.minX
min_y = bounding_box.minY
max_x = bounding_box.maxX
max_y = bounding_box.maxY
# Contain box to image->don't let it exceed image height/width
# bounds.
if min_x < 0:
min_x = 0
if min_y < 0:
min_y = 0
if max_x > rgbd_im.width:
max_x = rgbd_im.width
if max_y > rgbd_im.height:
max_y = rgbd_im.height
# Mask.
bb_segmask_arr = np.zeros([rgbd_im.height, rgbd_im.width])
bb_segmask_arr[min_y:max_y, min_x:max_x] = 255
bb_segmask = BinaryImage(bb_segmask_arr.astype(np.uint8),
segmask.frame)
segmask = segmask.mask_binary(bb_segmask)
# Visualize.
if self.cfg["vis"]["rgbd_state"]:
masked_rgbd_im = rgbd_im.mask_binary(segmask)
vis.figure()
vis.subplot(1, 2, 1)
vis.imshow(masked_rgbd_im.color)
vis.subplot(1, 2, 2)
vis.imshow(masked_rgbd_im.depth)
vis.show()
# Create an `RgbdImageState` with the cropped `RgbdImage` and
# `CameraIntrinsics`.
rgbd_state = RgbdImageState(rgbd_im, camera_intr, segmask=segmask)
# Execute policy.
try:
return self.execute_policy(rgbd_state, self.grasping_policy,
self.grasp_pose_publisher,
camera_intr.frame)
except NoValidGraspsException:
rospy.logerr(
("While executing policy found no valid grasps from sampled"
" antipodal point pairs. Aborting Policy!"))
raise rospy.ServiceException(
("While executing policy found no valid grasps from sampled"
" antipodal point pairs. Aborting Policy!"))
def execute_policy(self, rgbd_image_state, grasping_policy,
grasp_pose_publisher, pose_frame):
"""Executes a grasping policy on an `RgbdImageState`.
Parameters
----------
rgbd_image_state: :obj:`RgbdImageState`
`RgbdImageState` to encapsulate
depth and color image along with camera intrinsics.
grasping_policy: :obj:`GraspingPolicy`
Grasping policy to use.
grasp_pose_publisher: :obj:`Publisher`
ROS publisher to publish pose of planned grasp for visualization.
pose_frame: :obj:`str`
Frame of reference to publish pose in.
"""
# Execute the policy"s action.
grasp_planning_start_time = time.time()
grasp = grasping_policy(rgbd_image_state)
# Create `GQCNNGrasp` return msg and populate it.
gqcnn_grasp = GQCNNGrasp()
gqcnn_grasp.q_value = grasp.q_value
gqcnn_grasp.pose = grasp.grasp.pose().pose_msg
if isinstance(grasp.grasp, Grasp2D):
gqcnn_grasp.grasp_type = GQCNNGrasp.PARALLEL_JAW
elif isinstance(grasp.grasp, SuctionPoint2D):
gqcnn_grasp.grasp_type = GQCNNGrasp.SUCTION
else:
rospy.logerr("Grasp type not supported!")
raise rospy.ServiceException("Grasp type not supported!")
# Store grasp representation in image space.
gqcnn_grasp.center_px[0] = grasp.grasp.center[0]
gqcnn_grasp.center_px[1] = grasp.grasp.center[1]
gqcnn_grasp.angle = grasp.grasp.angle
gqcnn_grasp.depth = grasp.grasp.depth
gqcnn_grasp.thumbnail = grasp.image.rosmsg
# Create and publish the pose alone for easy visualization of grasp
# pose in Rviz.
pose_stamped = PoseStamped()
pose_stamped.pose = grasp.grasp.pose().pose_msg
header = Header()
header.stamp = rospy.Time.now()
header.frame_id = pose_frame
pose_stamped.header = header
grasp_pose_publisher.publish(pose_stamped)
# Return `GQCNNGrasp` msg.
rospy.loginfo("Total grasp planning time: " +
str(time.time() - grasp_planning_start_time) + " secs.")
return gqcnn_grasp
if __name__ == "__main__":
# Initialize the ROS node.
rospy.init_node("Grasp_Sampler_Server")
# Initialize `CvBridge`.
cv_bridge = CvBridge()
# Get configs.
model_name = rospy.get_param("~model_name")
model_dir = rospy.get_param("~model_dir")
fully_conv = rospy.get_param("~fully_conv")
if model_dir.lower() == "default":
model_dir = os.path.join(os.path.dirname(os.path.realpath(__file__)),
"../models")
model_dir = os.path.join(model_dir, model_name)
model_config = json.load(open(os.path.join(model_dir, "config.json"), "r"))
try:
gqcnn_config = model_config["gqcnn"]
gripper_mode = gqcnn_config["gripper_mode"]
except KeyError:
gqcnn_config = model_config["gqcnn_config"]
input_data_mode = gqcnn_config["input_data_mode"]
if input_data_mode == "tf_image":
gripper_mode = GripperMode.LEGACY_PARALLEL_JAW
elif input_data_mode == "tf_image_suction":
gripper_mode = GripperMode.LEGACY_SUCTION
elif input_data_mode == "suction":
gripper_mode = GripperMode.SUCTION
elif input_data_mode == "multi_suction":
gripper_mode = GripperMode.MULTI_SUCTION
elif input_data_mode == "parallel_jaw":
gripper_mode = GripperMode.PARALLEL_JAW
else:
raise ValueError(
"Input data mode {} not supported!".format(input_data_mode))
# Set config.
if fully_conv:
config_filename = os.path.join(
os.path.dirname(os.path.realpath(__file__)), "..",
"cfg/examples/ros/fc_gqcnn_suction.yaml")
else:
config_filename = os.path.join(
os.path.dirname(os.path.realpath(__file__)), "..",
"cfg/examples/ros/gqcnn_suction.yaml")
if (gripper_mode == GripperMode.LEGACY_PARALLEL_JAW
or gripper_mode == GripperMode.PARALLEL_JAW):
if fully_conv:
config_filename = os.path.join(
os.path.dirname(os.path.realpath(__file__)), "..",
"cfg/examples/ros/fc_gqcnn_pj.yaml")
else:
config_filename = os.path.join(
os.path.dirname(os.path.realpath(__file__)), "..",
"cfg/examples/ros/gqcnn_pj.yaml")
# Read config.
cfg = YamlConfig(config_filename)
policy_cfg = cfg["policy"]
policy_cfg["metric"]["gqcnn_model"] = model_dir
# Create publisher to publish pose of final grasp.
grasp_pose_publisher = rospy.Publisher("/gqcnn_grasp/pose",
PoseStamped,
queue_size=10)
# Create a grasping policy.
rospy.loginfo("Creating Grasping Policy")
if fully_conv:
# TODO(vsatish): We should really be doing this in some factory policy.
if policy_cfg["type"] == "fully_conv_suction":
grasping_policy = \
FullyConvolutionalGraspingPolicySuction(policy_cfg)
elif policy_cfg["type"] == "fully_conv_pj":
grasping_policy = \
FullyConvolutionalGraspingPolicyParallelJaw(policy_cfg)
else:
raise ValueError(
"Invalid fully-convolutional policy type: {}".format(
policy_cfg["type"]))
else:
policy_type = "cem"
if "type" in policy_cfg:
policy_type = policy_cfg["type"]
if policy_type == "ranking":
grasping_policy = RobustGraspingPolicy(policy_cfg)
elif policy_type == "cem":
grasping_policy = CrossEntropyRobustGraspingPolicy(policy_cfg)
else:
raise ValueError("Invalid policy type: {}".format(policy_type))
# Create a grasp planner.
grasp_planner = GraspPlanner(cfg, cv_bridge, grasping_policy,
grasp_pose_publisher)
# Initialize the ROS service.
grasp_planning_service = rospy.Service("grasp_planner", GQCNNGraspPlanner,
grasp_planner.plan_grasp)
grasp_planning_service_bb = rospy.Service("grasp_planner_bounding_box",
GQCNNGraspPlannerBoundingBox,
grasp_planner.plan_grasp_bb)
grasp_planning_service_segmask = rospy.Service(
"grasp_planner_segmask", GQCNNGraspPlannerSegmask,
grasp_planner.plan_grasp_segmask)
rospy.loginfo("Grasping Policy Initialized")
# Spin forever.
rospy.spin()
================================================
FILE: scripts/docker/build-docker.sh
================================================
#!/bin/bash
# Copyright ©2017. The Regents of the University of California (Regents).
# All Rights Reserved. Permission to use, copy, modify, and distribute this
# software and its documentation for educational, research, and not-for-profit
# purposes, without fee and without a signed licensing agreement, is hereby
# granted, provided that the above copyright notice, this paragraph and the
# following two paragraphs appear in all copies, modifications, and
# distributions. Contact The Office of Technology Licensing, UC Berkeley, 2150
# Shattuck Avenue, Suite 510, Berkeley, CA 94720-1620, (510) 643-7201,
# otl@berkeley.edu,
# http://ipira.berkeley.edu/industry-info for commercial licensing opportunities.
# IN NO EVENT SHALL REGENTS BE LIABLE TO ANY PARTY FOR DIRECT, INDIRECT, SPECIAL,
# INCIDENTAL, OR CONSEQUENTIAL DAMAGES, INCLUDING LOST PROFITS, ARISING OUT OF
# THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN IF REGENTS HAS BEEN
# ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
# REGENTS SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
# THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE. THE SOFTWARE AND ACCOMPANYING DOCUMENTATION, IF ANY, PROVIDED
# HEREUNDER IS PROVIDED "AS IS". REGENTS HAS NO OBLIGATION TO PROVIDE
# MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
# Build the CPU and GPU docker images.
git archive --format=tar -o docker/gqcnn.tar --prefix=gqcnn/ master
docker build --no-cache -t gqcnn/gpu -f docker/gpu/Dockerfile .
docker build --no-cache -t gqcnn/cpu -f docker/cpu/Dockerfile .
rm docker/gqcnn.tar
================================================
FILE: scripts/policies/run_all_dex-net_2.0_examples.sh
================================================
#!/bin/bash
# Copyright ©2017. The Regents of the University of California (Regents).
# All Rights Reserved. Permission to use, copy, modify, and distribute this
# software and its documentation for educational, research, and not-for-profit
# purposes, without fee and without a signed licensing agreement, is hereby
# granted, provided that the above copyright notice, this paragraph and the
# following two paragraphs appear in all copies, modifications, and
# distributions. Contact The Office of Technology Licensing, UC Berkeley, 2150
# Shattuck Avenue, Suite 510, Berkeley, CA 94720-1620, (510) 643-7201,
# otl@berkeley.edu,
# http://ipira.berkeley.edu/industry-info for commercial licensing opportunities.
# IN NO EVENT SHALL REGENTS BE LIABLE TO ANY PARTY FOR DIRECT, INDIRECT, SPECIAL,
# INCIDENTAL, OR CONSEQUENTIAL DAMAGES, INCLUDING LOST PROFITS, ARISING OUT OF
# THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN IF REGENTS HAS BEEN
# ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
# REGENTS SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
# THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE. THE SOFTWARE AND ACCOMPANYING DOCUMENTATION, IF ANY, PROVIDED
# HEREUNDER IS PROVIDED "AS IS". REGENTS HAS NO OBLIGATION TO PROVIDE
# MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
set -e
echo "RUNNING EXAMPLE 1"
python examples/policy.py GQCNN-2.0 --depth_image data/examples/single_object/primesense/depth_0.npy --segmask data/examples/single_object/primesense/segmask_0.png --config_filename cfg/examples/replication/dex-net_2.0.yaml
echo "RUNNING EXAMPLE 2"
python examples/policy.py GQCNN-2.0 --depth_image data/examples/single_object/primesense/depth_1.npy --segmask data/examples/single_object/primesense/segmask_1.png --config_filename cfg/examples/replication/dex-net_2.0.yaml
echo "RUNNING EXAMPLE 3"
python examples/policy.py GQCNN-2.0 --depth_image data/examples/single_object/primesense/depth_2.npy --segmask data/examples/single_object/primesense/segmask_2.png --config_filename cfg/examples/replication/dex-net_2.0.yaml
echo "RUNNING EXAMPLE 4"
python examples/policy.py GQCNN-2.0 --depth_image data/examples/single_object/primesense/depth_3.npy --segmask data/examples/single_object/primesense/segmask_3.png --config_filename cfg/examples/replication/dex-net_2.0.yaml
echo "RUNNING EXAMPLE 5"
python examples/policy.py GQCNN-2.0 --depth_image data/examples/single_object/primesense/depth_4.npy --segmask data/examples/single_object/primesense/segmask_4.png --config_filename cfg/examples/replication/dex-net_2.0.yaml
echo "RUNNING EXAMPLE 6"
python examples/policy.py GQCNN-2.0 --depth_image data/examples/single_object/primesense/depth_5.npy --segmask data/examples/single_object/primesense/segmask_5.png --config_filename cfg/examples/replication/dex-net_2.0.yaml
echo "RUNNING EXAMPLE 7"
python examples/policy.py GQCNN-2.0 --depth_image data/examples/single_object/primesense/depth_6.npy --segmask data/examples/single_object/primesense/segmask_6.png --config_filename cfg/examples/replication/dex-net_2.0.yaml
echo "RUNNING EXAMPLE 8"
python examples/policy.py GQCNN-2.0 --depth_image data/examples/single_object/primesense/depth_7.npy --segmask data/examples/single_object/primesense/segmask_7.png --config_filename cfg/examples/replication/dex-net_2.0.yaml
echo "RUNNING EXAMPLE 9"
python examples/policy.py GQCNN-2.0 --depth_image data/examples/single_object/primesense/depth_8.npy --segmask data/examples/single_object/primesense/segmask_8.png --config_filename cfg/examples/replication/dex-net_2.0.yaml
echo "RUNNING EXAMPLE 10"
python examples/policy.py GQCNN-2.0 --depth_image data/examples/single_object/primesense/depth_9.npy --segmask data/examples/single_object/primesense/segmask_9.png --config_filename cfg/examples/replication/dex-net_2.0.yaml
================================================
FILE: scripts/policies/run_all_dex-net_2.1_examples.sh
================================================
#!/bin/bash
# Copyright ©2017. The Regents of the University of California (Regents).
# All Rights Reserved. Permission to use, copy, modify, and distribute this
# software and its documentation for educational, research, and not-for-profit
# purposes, without fee and without a signed licensing agreement, is hereby
# granted, provided that the above copyright notice, this paragraph and the
# following two paragraphs appear in all copies, modifications, and
# distributions. Contact The Office of Technology Licensing, UC Berkeley, 2150
# Shattuck Avenue, Suite 510, Berkeley, CA 94720-1620, (510) 643-7201,
# otl@berkeley.edu,
# http://ipira.berkeley.edu/industry-info for commercial licensing opportunities.
# IN NO EVENT SHALL REGENTS BE LIABLE TO ANY PARTY FOR DIRECT, INDIRECT, SPECIAL,
# INCIDENTAL, OR CONSEQUENTIAL DAMAGES, INCLUDING LOST PROFITS, ARISING OUT OF
# THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN IF REGENTS HAS BEEN
# ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
# REGENTS SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
# THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE. THE SOFTWARE AND ACCOMPANYING DOCUMENTATION, IF ANY, PROVIDED
# HEREUNDER IS PROVIDED "AS IS". REGENTS HAS NO OBLIGATION TO PROVIDE
# MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
set -e
echo "RUNNING EXAMPLE 1"
python examples/policy.py GQCNN-2.1 --depth_image data/examples/clutter/primesense/depth_0.npy --segmask data/examples/clutter/primesense/segmask_0.png --config_filename cfg/examples/replication/dex-net_2.1.yaml
echo "RUNNING EXAMPLE 2"
python examples/policy.py GQCNN-2.1 --depth_image data/examples/clutter/primesense/depth_1.npy --segmask data/examples/clutter/primesense/segmask_1.png --config_filename cfg/examples/replication/dex-net_2.1.yaml
echo "RUNNING EXAMPLE 3"
python examples/policy.py GQCNN-2.1 --depth_image data/examples/clutter/primesense/depth_2.npy --segmask data/examples/clutter/primesense/segmask_2.png --config_filename cfg/examples/replication/dex-net_2.1.yaml
echo "RUNNING EXAMPLE 4"
python examples/policy.py GQCNN-2.1 --depth_image data/examples/clutter/primesense/depth_3.npy --segmask data/examples/clutter/primesense/segmask_3.png --config_filename cfg/examples/replication/dex-net_2.1.yaml
echo "RUNNING EXAMPLE 5"
python examples/policy.py GQCNN-2.1 --depth_image data/examples/clutter/primesense/depth_4.npy --segmask data/examples/clutter/primesense/segmask_4.png --config_filename cfg/examples/replication/dex-net_2.1.yaml
================================================
FILE: scripts/policies/run_all_dex-net_3.0_examples.sh
================================================
#!/bin/bash
# Copyright ©2017. The Regents of the University of California (Regents).
# All Rights Reserved. Permission to use, copy, modify, and distribute this
# software and its documentation for educational, research, and not-for-profit
# purposes, without fee and without a signed licensing agreement, is hereby
# granted, provided that the above copyright notice, this paragraph and the
# following two paragraphs appear in all copies, modifications, and
# distributions. Contact The Office of Technology Licensing, UC Berkeley, 2150
# Shattuck Avenue, Suite 510, Berkeley, CA 94720-1620, (510) 643-7201,
# otl@berkeley.edu,
# http://ipira.berkeley.edu/industry-info for commercial licensing opportunities.
# IN NO EVENT SHALL REGENTS BE LIABLE TO ANY PARTY FOR DIRECT, INDIRECT, SPECIAL,
# INCIDENTAL, OR CONSEQUENTIAL DAMAGES, INCLUDING LOST PROFITS, ARISING OUT OF
# THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN IF REGENTS HAS BEEN
# ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
# REGENTS SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
# THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE. THE SOFTWARE AND ACCOMPANYING DOCUMENTATION, IF ANY, PROVIDED
# HEREUNDER IS PROVIDED "AS IS". REGENTS HAS NO OBLIGATION TO PROVIDE
# MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
set -e
echo "RUNNING EXAMPLE 1"
python examples/policy.py GQCNN-3.0 --depth_image data/examples/single_object/primesense/depth_0.npy --segmask data/examples/single_object/primesense/segmask_0.png --config_filename cfg/examples/replication/dex-net_3.0.yaml
echo "RUNNING EXAMPLE 2"
python examples/policy.py GQCNN-3.0 --depth_image data/examples/single_object/primesense/depth_1.npy --segmask data/examples/single_object/primesense/segmask_1.png --config_filename cfg/examples/replication/dex-net_3.0.yaml
echo "RUNNING EXAMPLE 3"
python examples/policy.py GQCNN-3.0 --depth_image data/examples/single_object/primesense/depth_2.npy --segmask data/examples/single_object/primesense/segmask_2.png --config_filename cfg/examples/replication/dex-net_3.0.yaml
echo "RUNNING EXAMPLE 4"
python examples/policy.py GQCNN-3.0 --depth_image data/examples/single_object/primesense/depth_3.npy --segmask data/examples/single_object/primesense/segmask_3.png --config_filename cfg/examples/replication/dex-net_3.0.yaml
echo "RUNNING EXAMPLE 5"
python examples/policy.py GQCNN-3.0 --depth_image data/examples/single_object/primesense/depth_4.npy --segmask data/examples/single_object/primesense/segmask_4.png --config_filename cfg/examples/replication/dex-net_3.0.yaml
echo "RUNNING EXAMPLE 6"
python examples/policy.py GQCNN-3.0 --depth_image data/examples/single_object/primesense/depth_5.npy --segmask data/examples/single_object/primesense/segmask_5.png --config_filename cfg/examples/replication/dex-net_3.0.yaml
echo "RUNNING EXAMPLE 7"
python examples/policy.py GQCNN-3.0 --depth_image data/examples/single_object/primesense/depth_6.npy --segmask data/examples/single_object/primesense/segmask_6.png --config_filename cfg/examples/replication/dex-net_3.0.yaml
echo "RUNNING EXAMPLE 8"
python examples/policy.py GQCNN-3.0 --depth_image data/examples/single_object/primesense/depth_7.npy --segmask data/examples/single_object/primesense/segmask_7.png --config_filename cfg/examples/replication/dex-net_3.0.yaml
echo "RUNNING EXAMPLE 9"
python examples/policy.py GQCNN-3.0 --depth_image data/examples/single_object/primesense/depth_8.npy --segmask data/examples/single_object/primesense/segmask_8.png --config_filename cfg/examples/replication/dex-net_3.0.yaml
echo "RUNNING EXAMPLE 10"
python examples/policy.py GQCNN-3.0 --depth_image data/examples/single_object/primesense/depth_9.npy --segmask data/examples/single_object/primesense/segmask_9.png --config_filename cfg/examples/replication/dex-net_3.0.yaml
================================================
FILE: scripts/policies/run_all_dex-net_4.0_fc_pj_examples.sh
================================================
#!/bin/bash
# Copyright ©2017. The Regents of the University of California (Regents).
# All Rights Reserved. Permission to use, copy, modify, and distribute this
# software and its documentation for educational, research, and not-for-profit
# purposes, without fee and without a signed licensing agreement, is hereby
# granted, provided that the above copyright notice, this paragraph and the
# following two paragraphs appear in all copies, modifications, and
# distributions. Contact The Office of Technology Licensing, UC Berkeley, 2150
# Shattuck Avenue, Suite 510, Berkeley, CA 94720-1620, (510) 643-7201,
# otl@berkeley.edu,
# http://ipira.berkeley.edu/industry-info for commercial licensing opportunities.
# IN NO EVENT SHALL REGENTS BE LIABLE TO ANY PARTY FOR DIRECT, INDIRECT, SPECIAL,
# INCIDENTAL, OR CONSEQUENTIAL DAMAGES, INCLUDING LOST PROFITS, ARISING OUT OF
# THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN IF REGENTS HAS BEEN
# ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
# REGENTS SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
# THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE. THE SOFTWARE AND ACCOMPANYING DOCUMENTATION, IF ANY, PROVIDED
# HEREUNDER IS PROVIDED "AS IS". REGENTS HAS NO OBLIGATION TO PROVIDE
# MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
set -e
echo "RUNNING EXAMPLE 1"
python examples/policy.py FC-GQCNN-4.0-PJ --fully_conv --depth_image data/examples/clutter/phoxi/fcgqcnn/depth_0.npy --segmask data/examples/clutter/phoxi/fcgqcnn/segmask_0.png --config_filename cfg/examples/replication/dex-net_4.0_fc_pj.yaml --camera_intr data/calib/phoxi/phoxi.intr
echo "RUNNING EXAMPLE 2"
python examples/policy.py FC-GQCNN-4.0-PJ --fully_conv --depth_image data/examples/clutter/phoxi/fcgqcnn/depth_1.npy --segmask data/examples/clutter/phoxi/fcgqcnn/segmask_1.png --config_filename cfg/examples/replication/dex-net_4.0_fc_pj.yaml --camera_intr data/calib/phoxi/phoxi.intr
echo "RUNNING EXAMPLE 3"
python examples/policy.py FC-GQCNN-4.0-PJ --fully_conv --depth_image data/examples/clutter/phoxi/fcgqcnn/depth_2.npy --segmask data/examples/clutter/phoxi/fcgqcnn/segmask_2.png --config_filename cfg/examples/replication/dex-net_4.0_fc_pj.yaml --camera_intr data/calib/phoxi/phoxi.intr
echo "RUNNING EXAMPLE 4"
python examples/policy.py FC-GQCNN-4.0-PJ --fully_conv --depth_image data/examples/clutter/phoxi/fcgqcnn/depth_3.npy --segmask data/examples/clutter/phoxi/fcgqcnn/segmask_3.png --config_filename cfg/examples/replication/dex-net_4.0_fc_pj.yaml --camera_intr data/calib/phoxi/phoxi.intr
echo "RUNNING EXAMPLE 5"
python examples/policy.py FC-GQCNN-4.0-PJ --fully_conv --depth_image data/examples/clutter/phoxi/fcgqcnn/depth_4.npy --segmask data/examples/clutter/phoxi/fcgqcnn/segmask_4.png --config_filename cfg/examples/replication/dex-net_4.0_fc_pj.yaml --camera_intr data/calib/phoxi/phoxi.intr
================================================
FILE: scripts/policies/run_all_dex-net_4.0_fc_suction_examples.sh
================================================
#!/bin/bash
# Copyright ©2017. The Regents of the University of California (Regents).
# All Rights Reserved. Permission to use, copy, modify, and distribute this
# software and its documentation for educational, research, and not-for-profit
# purposes, without fee and without a signed licensing agreement, is hereby
# granted, provided that the above copyright notice, this paragraph and the
# following two paragraphs appear in all copies, modifications, and
# distributions. Contact The Office of Technology Licensing, UC Berkeley, 2150
# Shattuck Avenue, Suite 510, Berkeley, CA 94720-1620, (510) 643-7201,
# otl@berkeley.edu,
# http://ipira.berkeley.edu/industry-info for commercial licensing opportunities.
# IN NO EVENT SHALL REGENTS BE LIABLE TO ANY PARTY FOR DIRECT, INDIRECT, SPECIAL,
# INCIDENTAL, OR CONSEQUENTIAL DAMAGES, INCLUDING LOST PROFITS, ARISING OUT OF
# THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN IF REGENTS HAS BEEN
# ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
# REGENTS SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
# THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE. THE SOFTWARE AND ACCOMPANYING DOCUMENTATION, IF ANY, PROVIDED
# HEREUNDER IS PROVIDED "AS IS". REGENTS HAS NO OBLIGATION TO PROVIDE
# MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
set -e
echo "RUNNING EXAMPLE 1"
python examples/policy.py FC-GQCNN-4.0-SUCTION --fully_conv --depth_image data/examples/clutter/phoxi/fcgqcnn/depth_0.npy --segmask data/examples/clutter/phoxi/fcgqcnn/segmask_0.png --config_filename cfg/examples/replication/dex-net_4.0_fc_suction.yaml --camera_intr data/calib/phoxi/phoxi.intr
echo "RUNNING EXAMPLE 2"
python examples/policy.py FC-GQCNN-4.0-SUCTION --fully_conv --depth_image data/examples/clutter/phoxi/fcgqcnn/depth_1.npy --segmask data/examples/clutter/phoxi/fcgqcnn/segmask_1.png --config_filename cfg/examples/replication/dex-net_4.0_fc_suction.yaml --camera_intr data/calib/phoxi/phoxi.intr
echo "RUNNING EXAMPLE 3"
python examples/policy.py FC-GQCNN-4.0-SUCTION --fully_conv --depth_image data/examples/clutter/phoxi/fcgqcnn/depth_2.npy --segmask data/examples/clutter/phoxi/fcgqcnn/segmask_2.png --config_filename cfg/examples/replication/dex-net_4.0_fc_suction.yaml --camera_intr data/calib/phoxi/phoxi.intr
echo "RUNNING EXAMPLE 4"
python examples/policy.py FC-GQCNN-4.0-SUCTION --fully_conv --depth_image data/examples/clutter/phoxi/fcgqcnn/depth_3.npy --segmask data/examples/clutter/phoxi/fcgqcnn/segmask_3.png --config_filename cfg/examples/replication/dex-net_4.0_fc_suction.yaml --camera_intr data/calib/phoxi/phoxi.intr
echo "RUNNING EXAMPLE 5"
python examples/policy.py FC-GQCNN-4.0-SUCTION --fully_conv --depth_image data/examples/clutter/phoxi/fcgqcnn/depth_4.npy --segmask data/examples/clutter/phoxi/fcgqcnn/segmask_4.png --config_filename cfg/examples/replication/dex-net_4.0_fc_suction.yaml --camera_intr data/calib/phoxi/phoxi.intr
================================================
FILE: scripts/policies/run_all_dex-net_4.0_pj_examples.sh
================================================
#!/bin/bash
# Copyright ©2017. The Regents of the University of California (Regents).
# All Rights Reserved. Permission to use, copy, modify, and distribute this
# software and its documentation for educational, research, and not-for-profit
# purposes, without fee and without a signed licensing agreement, is hereby
# granted, provided that the above copyright notice, this paragraph and the
# following two paragraphs appear in all copies, modifications, and
# distributions. Contact The Office of Technology Licensing, UC Berkeley, 2150
# Shattuck Avenue, Suite 510, Berkeley, CA 94720-1620, (510) 643-7201,
# otl@berkeley.edu,
# http://ipira.berkeley.edu/industry-info for commercial licensing opportunities.
# IN NO EVENT SHALL REGENTS BE LIABLE TO ANY PARTY FOR DIRECT, INDIRECT, SPECIAL,
# INCIDENTAL, OR CONSEQUENTIAL DAMAGES, INCLUDING LOST PROFITS, ARISING OUT OF
# THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN IF REGENTS HAS BEEN
# ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
# REGENTS SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
# THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE. THE SOFTWARE AND ACCOMPANYING DOCUMENTATION, IF ANY, PROVIDED
# HEREUNDER IS PROVIDED "AS IS". REGENTS HAS NO OBLIGATION TO PROVIDE
# MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
set -e
echo "RUNNING EXAMPLE 1"
python examples/policy.py GQCNN-4.0-PJ --depth_image data/examples/clutter/phoxi/dex-net_4.0/depth_0.npy --segmask data/examples/clutter/phoxi/dex-net_4.0/segmask_0.png --config_filename cfg/examples/replication/dex-net_4.0_pj.yaml --camera_intr data/calib/phoxi/phoxi.intr
echo "RUNNING EXAMPLE 2"
python examples/policy.py GQCNN-4.0-PJ --depth_image data/examples/clutter/phoxi/dex-net_4.0/depth_1.npy --segmask data/examples/clutter/phoxi/dex-net_4.0/segmask_1.png --config_filename cfg/examples/replication/dex-net_4.0_pj.yaml --camera_intr data/calib/phoxi/phoxi.intr
echo "RUNNING EXAMPLE 3"
python examples/policy.py GQCNN-4.0-PJ --depth_image data/examples/clutter/phoxi/dex-net_4.0/depth_2.npy --segmask data/examples/clutter/phoxi/dex-net_4.0/segmask_2.png --config_filename cfg/examples/replication/dex-net_4.0_pj.yaml --camera_intr data/calib/phoxi/phoxi.intr
echo "RUNNING EXAMPLE 4"
python examples/policy.py GQCNN-4.0-PJ --depth_image data/examples/clutter/phoxi/dex-net_4.0/depth_3.npy --segmask data/examples/clutter/phoxi/dex-net_4.0/segmask_3.png --config_filename cfg/examples/replication/dex-net_4.0_pj.yaml --camera_intr data/calib/phoxi/phoxi.intr
echo "RUNNING EXAMPLE 5"
python examples/policy.py GQCNN-4.0-PJ --depth_image data/examples/clutter/phoxi/dex-net_4.0/depth_4.npy --segmask data/examples/clutter/phoxi/dex-net_4.0/segmask_4.png --config_filename cfg/examples/replication/dex-net_4.0_pj.yaml --camera_intr data/calib/phoxi/phoxi.intr
================================================
FILE: scripts/policies/run_all_dex-net_4.0_suction_examples.sh
================================================
#!/bin/bash
# Copyright ©2017. The Regents of the University of California (Regents).
# All Rights Reserved. Permission to use, copy, modify, and distribute this
# software and its documentation for educational, research, and not-for-profit
# purposes, without fee and without a signed licensing agreement, is hereby
# granted, provided that the above copyright notice, this paragraph and the
# following two paragraphs appear in all copies, modifications, and
# distributions. Contact The Office of Technology Licensing, UC Berkeley, 2150
# Shattuck Avenue, Suite 510, Berkeley, CA 94720-1620, (510) 643-7201,
# otl@berkeley.edu,
# http://ipira.berkeley.edu/industry-info for commercial licensing opportunities.
# IN NO EVENT SHALL REGENTS BE LIABLE TO ANY PARTY FOR DIRECT, INDIRECT, SPECIAL,
# INCIDENTAL, OR CONSEQUENTIAL DAMAGES, INCLUDING LOST PROFITS, ARISING OUT OF
# THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN IF REGENTS HAS BEEN
# ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
# REGENTS SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
# THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE. THE SOFTWARE AND ACCOMPANYING DOCUMENTATION, IF ANY, PROVIDED
# HEREUNDER IS PROVIDED "AS IS". REGENTS HAS NO OBLIGATION TO PROVIDE
# MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
set -e
echo "RUNNING EXAMPLE 1"
python examples/policy.py GQCNN-4.0-SUCTION --depth_image data/examples/clutter/phoxi/dex-net_4.0/depth_0.npy --segmask data/examples/clutter/phoxi/dex-net_4.0/segmask_0.png --config_filename cfg/examples/replication/dex-net_4.0_suction.yaml --camera_intr data/calib/phoxi/phoxi.intr
echo "RUNNING EXAMPLE 2"
python examples/policy.py GQCNN-4.0-SUCTION --depth_image data/examples/clutter/phoxi/dex-net_4.0/depth_1.npy --segmask data/examples/clutter/phoxi/dex-net_4.0/segmask_1.png --config_filename cfg/examples/replication/dex-net_4.0_suction.yaml --camera_intr data/calib/phoxi/phoxi.intr
echo "RUNNING EXAMPLE 3"
python examples/policy.py GQCNN-4.0-SUCTION --depth_image data/examples/clutter/phoxi/dex-net_4.0/depth_2.npy --segmask data/examples/clutter/phoxi/dex-net_4.0/segmask_2.png --config_filename cfg/examples/replication/dex-net_4.0_suction.yaml --camera_intr data/calib/phoxi/phoxi.intr
echo "RUNNING EXAMPLE 4"
python examples/policy.py GQCNN-4.0-SUCTION --depth_image data/examples/clutter/phoxi/dex-net_4.0/depth_3.npy --segmask data/examples/clutter/phoxi/dex-net_4.0/segmask_3.png --config_filename cfg/examples/replication/dex-net_4.0_suction.yaml --camera_intr data/calib/phoxi/phoxi.intr
echo "RUNNING EXAMPLE 5"
python examples/policy.py GQCNN-4.0-SUCTION --depth_image data/examples/clutter/phoxi/dex-net_4.0/depth_4.npy --segmask data/examples/clutter/phoxi/dex-net_4.0/segmask_4.png --config_filename cfg/examples/replication/dex-net_4.0_suction.yaml --camera_intr data/calib/phoxi/phoxi.intr
================================================
FILE: scripts/training/train_dex-net_2.0.sh
================================================
#!/bin/bash
# Copyright ©2017. The Regents of the University of California (Regents).
# All Rights Reserved. Permission to use, copy, modify, and distribute this
# software and its documentation for educational, research, and not-for-profit
# purposes, without fee and without a signed licensing agreement, is hereby
# granted, provided that the above copyright notice, this paragraph and the
# following two paragraphs appear in all copies, modifications, and
# distributions. Contact The Office of Technology Licensing, UC Berkeley, 2150
# Shattuck Avenue, Suite 510, Berkeley, CA 94720-1620, (510) 643-7201,
# otl@berkeley.edu,
# http://ipira.berkeley.edu/industry-info for commercial licensing opportunities.
# IN NO EVENT SHALL REGENTS BE LIABLE TO ANY PARTY FOR DIRECT, INDIRECT, SPECIAL,
# INCIDENTAL, OR CONSEQUENTIAL DAMAGES, INCLUDING LOST PROFITS, ARISING OUT OF
# THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN IF REGENTS HAS BEEN
# ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
# REGENTS SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
# THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE. THE SOFTWARE AND ACCOMPANYING DOCUMENTATION, IF ANY, PROVIDED
# HEREUNDER IS PROVIDED "AS IS". REGENTS HAS NO OBLIGATION TO PROVIDE
# MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
python tools/train.py data/training/dex-net_2.0 --config_filename cfg/train_dex-net_2.0.yaml --name GQCNN-2.0
================================================
FILE: scripts/training/train_dex-net_2.1.sh
================================================
#!/bin/bash
# Copyright ©2017. The Regents of the University of California (Regents).
# All Rights Reserved. Permission to use, copy, modify, and distribute this
# software and its documentation for educational, research, and not-for-profit
# purposes, without fee and without a signed licensing agreement, is hereby
# granted, provided that the above copyright notice, this paragraph and the
# following two paragraphs appear in all copies, modifications, and
# distributions. Contact The Office of Technology Licensing, UC Berkeley, 2150
# Shattuck Avenue, Suite 510, Berkeley, CA 94720-1620, (510) 643-7201,
# otl@berkeley.edu,
# http://ipira.berkeley.edu/industry-info for commercial licensing opportunities.
# IN NO EVENT SHALL REGENTS BE LIABLE TO ANY PARTY FOR DIRECT, INDIRECT, SPECIAL,
# INCIDENTAL, OR CONSEQUENTIAL DAMAGES, INCLUDING LOST PROFITS, ARISING OUT OF
# THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN IF REGENTS HAS BEEN
# ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
# REGENTS SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
# THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE. THE SOFTWARE AND ACCOMPANYING DOCUMENTATION, IF ANY, PROVIDED
# HEREUNDER IS PROVIDED "AS IS". REGENTS HAS NO OBLIGATION TO PROVIDE
# MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
echo "Please contact Vishal Satish (vsatish@berkeley.edu) or Jeffrey Mahler (jmahler@berkeley.edu) for instructions on training Dex-Net 2.1."
================================================
FILE: scripts/training/train_dex-net_3.0.sh
================================================
#!/bin/bash
# Copyright ©2017. The Regents of the University of California (Regents).
# All Rights Reserved. Permission to use, copy, modify, and distribute this
# software and its documentation for educational, research, and not-for-profit
# purposes, without fee and without a signed licensing agreement, is hereby
# granted, provided that the above copyright notice, this paragraph and the
# following two paragraphs appear in all copies, modifications, and
# distributions. Contact The Office of Technology Licensing, UC Berkeley, 2150
# Shattuck Avenue, Suite 510, Berkeley, CA 94720-1620, (510) 643-7201,
# otl@berkeley.edu,
# http://ipira.berkeley.edu/industry-info for commercial licensing opportunities.
# IN NO EVENT SHALL REGENTS BE LIABLE TO ANY PARTY FOR DIRECT, INDIRECT, SPECIAL,
# INCIDENTAL, OR CONSEQUENTIAL DAMAGES, INCLUDING LOST PROFITS, ARISING OUT OF
# THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN IF REGENTS HAS BEEN
# ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
# REGENTS SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
# THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE. THE SOFTWARE AND ACCOMPANYING DOCUMENTATION, IF ANY, PROVIDED
# HEREUNDER IS PROVIDED "AS IS". REGENTS HAS NO OBLIGATION TO PROVIDE
# MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
python tools/train.py data/training/dex-net_3.0 --config_filename cfg/train_dex-net_3.0.yaml --name GQCNN-3.0
================================================
FILE: scripts/training/train_dex-net_4.0_fc_pj.sh
================================================
#!/bin/bash
# Copyright ©2017. The Regents of the University of California (Regents).
# All Rights Reserved. Permission to use, copy, modify, and distribute this
# software and its documentation for educational, research, and not-for-profit
# purposes, without fee and without a signed licensing agreement, is hereby
# granted, provided that the above copyright notice, this paragraph and the
# following two paragraphs appear in all copies, modifications, and
# distributions. Contact The Office of Technology Licensing, UC Berkeley, 2150
# Shattuck Avenue, Suite 510, Berkeley, CA 94720-1620, (510) 643-7201,
# otl@berkeley.edu,
# http://ipira.berkeley.edu/industry-info for commercial licensing opportunities.
# IN NO EVENT SHALL REGENTS BE LIABLE TO ANY PARTY FOR DIRECT, INDIRECT, SPECIAL,
# INCIDENTAL, OR CONSEQUENTIAL DAMAGES, INCLUDING LOST PROFITS, ARISING OUT OF
# THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN IF REGENTS HAS BEEN
# ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
# REGENTS SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
# THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE. THE SOFTWARE AND ACCOMPANYING DOCUMENTATION, IF ANY, PROVIDED
# HEREUNDER IS PROVIDED "AS IS". REGENTS HAS NO OBLIGATION TO PROVIDE
# MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
python tools/train.py data/training/dex-net_4.0_fc_pj --config_filename cfg/train_dex-net_4.0_fc_pj.yaml --name GQCNN-4.0-FC-PJ
================================================
FILE: scripts/training/train_dex-net_4.0_fc_suction.sh
================================================
#!/bin/bash
# Copyright ©2017. The Regents of the University of California (Regents).
# All Rights Reserved. Permission to use, copy, modify, and distribute this
# software and its documentation for educational, research, and not-for-profit
# purposes, without fee and without a signed licensing agreement, is hereby
# granted, provided that the above copyright notice, this paragraph and the
# following two paragraphs appear in all copies, modifications, and
# distributions. Contact The Office of Technology Licensing, UC Berkeley, 2150
# Shattuck Avenue, Suite 510, Berkeley, CA 94720-1620, (510) 643-7201,
# otl@berkeley.edu,
# http://ipira.berkeley.edu/industry-info for commercial licensing opportunities.
# IN NO EVENT SHALL REGENTS BE LIABLE TO ANY PARTY FOR DIRECT, INDIRECT, SPECIAL,
# INCIDENTAL, OR CONSEQUENTIAL DAMAGES, INCLUDING LOST PROFITS, ARISING OUT OF
# THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN IF REGENTS HAS BEEN
# ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
# REGENTS SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
# THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE. THE SOFTWARE AND ACCOMPANYING DOCUMENTATION, IF ANY, PROVIDED
# HEREUNDER IS PROVIDED "AS IS". REGENTS HAS NO OBLIGATION TO PROVIDE
# MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
python tools/train.py data/training/dex-net_4.0_fc_suction --config_filename cfg/train_dex-net_4.0_fc_suction.yaml --name GQCNN-4.0-FC-Suction
================================================
FILE: scripts/training/train_dex-net_4.0_pj.sh
================================================
#!/bin/bash
# Copyright ©2017. The Regents of the University of California (Regents).
# All Rights Reserved. Permission to use, copy, modify, and distribute this
# software and its documentation for educational, research, and not-for-profit
# purposes, without fee and without a signed licensing agreement, is hereby
# granted, provided that the above copyright notice, this paragraph and the
# following two paragraphs appear in all copies, modifications, and
# distributions. Contact The Office of Technology Licensing, UC Berkeley, 2150
# Shattuck Avenue, Suite 510, Berkeley, CA 94720-1620, (510) 643-7201,
# otl@berkeley.edu,
# http://ipira.berkeley.edu/industry-info for commercial licensing opportunities.
# IN NO EVENT SHALL REGENTS BE LIABLE TO ANY PARTY FOR DIRECT, INDIRECT, SPECIAL,
# INCIDENTAL, OR CONSEQUENTIAL DAMAGES, INCLUDING LOST PROFITS, ARISING OUT OF
# THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN IF REGENTS HAS BEEN
# ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
# REGENTS SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
# THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE. THE SOFTWARE AND ACCOMPANYING DOCUMENTATION, IF ANY, PROVIDED
# HEREUNDER IS PROVIDED "AS IS". REGENTS HAS NO OBLIGATION TO PROVIDE
# MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
python tools/train.py data/training/dexnet_4_pj --config_filename cfg/train_dex-net_4.0_pj.yaml --name GQCNN-4.0-PJ
================================================
FILE: scripts/training/train_dex-net_4.0_suction.sh
================================================
#!/bin/bash
# Copyright ©2017. The Regents of the University of California (Regents).
# All Rights Reserved. Permission to use, copy, modify, and distribute this
# software and its documentation for educational, research, and not-for-profit
# purposes, without fee and without a signed licensing agreement, is hereby
# granted, provided that the above copyright notice, this paragraph and the
# following two paragraphs appear in all copies, modifications, and
# distributions. Contact The Office of Technology Licensing, UC Berkeley, 2150
# Shattuck Avenue, Suite 510, Berkeley, CA 94720-1620, (510) 643-7201,
# otl@berkeley.edu,
# http://ipira.berkeley.edu/industry-info for commercial licensing opportunities.
# IN NO EVENT SHALL REGENTS BE LIABLE TO ANY PARTY FOR DIRECT, INDIRECT, SPECIAL,
# INCIDENTAL, OR CONSEQUENTIAL DAMAGES, INCLUDING LOST PROFITS, ARISING OUT OF
# THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN IF REGENTS HAS BEEN
# ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
# REGENTS SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
# THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE. THE SOFTWARE AND ACCOMPANYING DOCUMENTATION, IF ANY, PROVIDED
# HEREUNDER IS PROVIDED "AS IS". REGENTS HAS NO OBLIGATION TO PROVIDE
# MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
python tools/train.py data/training/dexnet_4_suction --config_filename cfg/train_dex-net_4.0_suction.yaml --name GQCNN-4.0-Suction
================================================
FILE: setup.py
================================================
# -*- coding: utf-8 -*-
"""
Copyright ©2017. The Regents of the University of California (Regents).
All Rights Reserved. Permission to use, copy, modify, and distribute this
software and its documentation for educational, research, and not-for-profit
purposes, without fee and without a signed licensing agreement, is hereby
granted, provided that the above copyright notice, this paragraph and the
following two paragraphs appear in all copies, modifications, and
distributions. Contact The Office of Technology Licensing, UC Berkeley, 2150
Shattuck Avenue, Suite 510, Berkeley, CA 94720-1620, (510) 643-7201,
otl@berkeley.edu,
http://ipira.berkeley.edu/industry-info for commercial licensing opportunities.
IN NO EVENT SHALL REGENTS BE LIABLE TO ANY PARTY FOR DIRECT, INDIRECT, SPECIAL,
INCIDENTAL, OR CONSEQUENTIAL DAMAGES, INCLUDING LOST PROFITS, ARISING OUT OF
THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN IF REGENTS HAS BEEN
ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
REGENTS SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
PURPOSE. THE SOFTWARE AND ACCOMPANYING DOCUMENTATION, IF ANY, PROVIDED
HEREUNDER IS PROVIDED "AS IS". REGENTS HAS NO OBLIGATION TO PROVIDE
MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
Setup of `gqcnn` Python codebase.
Author
------
Vishal Satish & Jeff Mahler
"""
import logging
import os
from setuptools import setup, find_packages
from setuptools.command.develop import develop
from setuptools.command.install import install
import subprocess
import sys
TF_MAX_VERSION = "1.15.0"
# Set up logger.
logging.basicConfig() # Configure the root logger.
logger = logging.getLogger("setup.py")
logger.setLevel(logging.INFO)
def get_tf_dep():
# Check whether or not the Nvidia driver and GPUs are available and add the
# corresponding Tensorflow dependency.
tf_dep = "tensorflow<={}".format(TF_MAX_VERSION)
try:
gpus = subprocess.check_output(
["nvidia-smi", "--query-gpu=gpu_name",
"--format=csv"]).decode().strip().split("\n")[1:]
if len(gpus) > 0:
tf_dep = "tensorflow-gpu<={}".format(TF_MAX_VERSION)
else:
no_device_msg = ("Found Nvidia device driver but no"
" devices...installing Tensorflow for CPU.")
logger.warning(no_device_msg)
except OSError:
no_driver_msg = ("Could not find Nvidia device driver...installing"
" Tensorflow for CPU.")
logger.warning(no_driver_msg)
return tf_dep
# TODO(vsatish): Use inheritance here.
class DevelopCmd(develop):
user_options_custom = [
("docker", None, "installing in Docker"),
]
user_options = getattr(develop, "user_options", []) + user_options_custom
def initialize_options(self):
develop.initialize_options(self)
# Initialize options.
self.docker = False
def finalize_options(self):
develop.finalize_options(self)
def run(self):
# Install Tensorflow dependency.
if not self.docker:
tf_dep = get_tf_dep()
subprocess.Popen([sys.executable, "-m", "pip", "install",
tf_dep]).wait()
else:
# If we're using Docker, this will already have been installed
# explicitly through the correct `{cpu/gpu}_requirements.txt`;
# there is no way to check for CUDA/GPUs at Docker build time
# because there is no easy way to set the Nvidia runtime.
# TODO(vsatish): Figure out why this isn't printed.
skip_tf_msg = ("Omitting Tensorflow dependency because of Docker"
" installation.")
logger.warning(skip_tf_msg)
# Run installation.
develop.run(self)
class InstallCmd(install, object):
user_options_custom = [
("docker", None, "installing in Docker"),
]
user_options = getattr(install, "user_options", []) + user_options_custom
def initialize_options(self):
install.initialize_options(self)
# Initialize options.
self.docker = False
def finalize_options(self):
install.finalize_options(self)
def run(self):
# Install Tensorflow dependency.
if not self.docker:
tf_dep = get_tf_dep()
subprocess.Popen([sys.executable, "-m", "pip", "install",
tf_dep]).wait()
else:
# If we're using Docker, this will already have been installed
# explicitly through the correct `{cpu/gpu}_requirements.txt`;
# there is no way to check for CUDA/GPUs at Docker build time
# because there is no easy way to set the Nvidia runtime.
# TODO (vsatish): Figure out why this isn't printed.
skip_tf_msg = ("Omitting Tensorflow dependency because of Docker"
" installation.")
logger.warning(skip_tf_msg)
# Run installation.
install.run(self)
requirements = [
"autolab-core", "autolab-perception", "visualization", "numpy", "scipy",
"matplotlib", "opencv-python", "scikit-learn", "scikit-image", "psutil",
"gputil"
]
exec(
open(
os.path.join(os.path.dirname(os.path.realpath(__file__)),
"gqcnn/version.py")).read())
setup(
name="gqcnn",
version=__version__, # noqa F821
description=("Project code for running Grasp Quality Convolutional"
" Neural Networks"),
author="Vishal Satish",
author_email="vsatish@berkeley.edu",
license="Berkeley Copyright",
url="https://github.com/BerkeleyAutomation/gqcnn",
keywords="robotics grasping vision deep learning",
classifiers=[
"Development Status :: 4 - Beta",
"Programming Language :: Python :: 3.5",
"Programming Language :: Python :: 3.6",
"Programming Language :: Python :: 3.7",
"Natural Language :: English", # yapf: disable
"Topic :: Scientific/Engineering"
],
packages=find_packages(),
install_requires=requirements,
extras_require={
"docs": ["sphinx", "sphinxcontrib-napoleon", "sphinx_rtd_theme"],
},
cmdclass={
"install": InstallCmd,
"develop": DevelopCmd
})
================================================
FILE: srv/GQCNNGraspPlanner.srv
================================================
# Copyright ©2017. The Regents of the University of California (Regents).
# All Rights Reserved. Permission to use, copy, modify, and distribute this
# software and its documentation for educational, research, and not-for-profit
# purposes, without fee and without a signed licensing agreement, is hereby
# granted, provided that the above copyright notice, this paragraph and the
# following two paragraphs appear in all copies, modifications, and
# distributions. Contact The Office of Technology Licensing, UC Berkeley, 2150
# Shattuck Avenue, Suite 510, Berkeley, CA 94720-1620, (510) 643-7201,
# otl@berkeley.edu,
# http://ipira.berkeley.edu/industry-info for commercial licensing opportunities.
# IN NO EVENT SHALL REGENTS BE LIABLE TO ANY PARTY FOR DIRECT, INDIRECT, SPECIAL,
# INCIDENTAL, OR CONSEQUENTIAL DAMAGES, INCLUDING LOST PROFITS, ARISING OUT OF
# THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN IF REGENTS HAS BEEN
# ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
# REGENTS SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
# THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE. THE SOFTWARE AND ACCOMPANYING DOCUMENTATION, IF ANY, PROVIDED
# HEREUNDER IS PROVIDED "AS IS". REGENTS HAS NO OBLIGATION TO PROVIDE
# MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
# request params
sensor_msgs/Image color_image
sensor_msgs/Image depth_image
sensor_msgs/CameraInfo camera_info
---
# response params
GQCNNGrasp grasp
================================================
FILE: srv/GQCNNGraspPlannerBoundingBox.srv
================================================
# Copyright ©2017. The Regents of the University of California (Regents).
# All Rights Reserved. Permission to use, copy, modify, and distribute this
# software and its documentation for educational, research, and not-for-profit
# purposes, without fee and without a signed licensing agreement, is hereby
# granted, provided that the above copyright notice, this paragraph and the
# following two paragraphs appear in all copies, modifications, and
# distributions. Contact The Office of Technology Licensing, UC Berkeley, 2150
# Shattuck Avenue, Suite 510, Berkeley, CA 94720-1620, (510) 643-7201,
# otl@berkeley.edu,
# http://ipira.berkeley.edu/industry-info for commercial licensing opportunities.
# IN NO EVENT SHALL REGENTS BE LIABLE TO ANY PARTY FOR DIRECT, INDIRECT, SPECIAL,
# INCIDENTAL, OR CONSEQUENTIAL DAMAGES, INCLUDING LOST PROFITS, ARISING OUT OF
# THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN IF REGENTS HAS BEEN
# ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
# REGENTS SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
# THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE. THE SOFTWARE AND ACCOMPANYING DOCUMENTATION, IF ANY, PROVIDED
# HEREUNDER IS PROVIDED "AS IS". REGENTS HAS NO OBLIGATION TO PROVIDE
# MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
# request params
sensor_msgs/Image color_image
sensor_msgs/Image depth_image
sensor_msgs/CameraInfo camera_info
BoundingBox bounding_box
---
# response params
GQCNNGrasp grasp
================================================
FILE: srv/GQCNNGraspPlannerFull.srv
================================================
# Copyright ©2017. The Regents of the University of California (Regents).
# All Rights Reserved. Permission to use, copy, modify, and distribute this
# software and its documentation for educational, research, and not-for-profit
# purposes, without fee and without a signed licensing agreement, is hereby
# granted, provided that the above copyright notice, this paragraph and the
# following two paragraphs appear in all copies, modifications, and
# distributions. Contact The Office of Technology Licensing, UC Berkeley, 2150
# Shattuck Avenue, Suite 510, Berkeley, CA 94720-1620, (510) 643-7201,
# otl@berkeley.edu,
# http://ipira.berkeley.edu/industry-info for commercial licensing opportunities.
# IN NO EVENT SHALL REGENTS BE LIABLE TO ANY PARTY FOR DIRECT, INDIRECT, SPECIAL,
# INCIDENTAL, OR CONSEQUENTIAL DAMAGES, INCLUDING LOST PROFITS, ARISING OUT OF
# THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN IF REGENTS HAS BEEN
# ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
# REGENTS SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
# THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE. THE SOFTWARE AND ACCOMPANYING DOCUMENTATION, IF ANY, PROVIDED
# HEREUNDER IS PROVIDED "AS IS". REGENTS HAS NO OBLIGATION TO PROVIDE
# MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
# request params
sensor_msgs/Image color_image
sensor_msgs/Image depth_image
sensor_msgs/CameraInfo camera_info
BoundingBox bounding_box
sensor_msgs/Image segmask
---
# response params
GQCNNGrasp grasp
================================================
FILE: srv/GQCNNGraspPlannerSegmask.srv
================================================
# Copyright ©2017. The Regents of the University of California (Regents).
# All Rights Reserved. Permission to use, copy, modify, and distribute this
# software and its documentation for educational, research, and not-for-profit
# purposes, without fee and without a signed licensing agreement, is hereby
# granted, provided that the above copyright notice, this paragraph and the
# following two paragraphs appear in all copies, modifications, and
# distributions. Contact The Office of Technology Licensing, UC Berkeley, 2150
# Shattuck Avenue, Suite 510, Berkeley, CA 94720-1620, (510) 643-7201,
# otl@berkeley.edu,
# http://ipira.berkeley.edu/industry-info for commercial licensing opportunities.
# IN NO EVENT SHALL REGENTS BE LIABLE TO ANY PARTY FOR DIRECT, INDIRECT, SPECIAL,
# INCIDENTAL, OR CONSEQUENTIAL DAMAGES, INCLUDING LOST PROFITS, ARISING OUT OF
# THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN IF REGENTS HAS BEEN
# ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
# REGENTS SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
# THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE. THE SOFTWARE AND ACCOMPANYING DOCUMENTATION, IF ANY, PROVIDED
# HEREUNDER IS PROVIDED "AS IS". REGENTS HAS NO OBLIGATION TO PROVIDE
# MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
# request params
sensor_msgs/Image color_image
sensor_msgs/Image depth_image
sensor_msgs/CameraInfo camera_info
sensor_msgs/Image segmask
---
# response params
GQCNNGrasp grasp
================================================
FILE: tools/analyze_gqcnn_performance.py
================================================
# -*- coding: utf-8 -*-
"""
Copyright ©2017. The Regents of the University of California (Regents).
All Rights Reserved. Permission to use, copy, modify, and distribute this
software and its documentation for educational, research, and not-for-profit
purposes, without fee and without a signed licensing agreement, is hereby
granted, provided that the above copyright notice, this paragraph and the
following two paragraphs appear in all copies, modifications, and
distributions. Contact The Office of Technology Licensing, UC Berkeley, 2150
Shattuck Avenue, Suite 510, Berkeley, CA 94720-1620, (510) 643-7201,
otl@berkeley.edu,
http://ipira.berkeley.edu/industry-info for commercial licensing opportunities.
IN NO EVENT SHALL REGENTS BE LIABLE TO ANY PARTY FOR DIRECT, INDIRECT, SPECIAL,
INCIDENTAL, OR CONSEQUENTIAL DAMAGES, INCLUDING LOST PROFITS, ARISING OUT OF
THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN IF REGENTS HAS BEEN
ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
REGENTS SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
PURPOSE. THE SOFTWARE AND ACCOMPANYING DOCUMENTATION, IF ANY, PROVIDED
HEREUNDER IS PROVIDED "AS IS". REGENTS HAS NO OBLIGATION TO PROVIDE
MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
Analyzes a GQ-CNN model.
Author
------
Vishal Satish & Jeff Mahler
"""
import argparse
import os
from autolab_core import YamlConfig, Logger
from gqcnn import GQCNNAnalyzer
# Setup logger.
logger = Logger.get_logger("tools/analyze_gqcnn_performance.py")
if __name__ == "__main__":
# Parse args.
parser = argparse.ArgumentParser(
description=("Analyze a Grasp Quality Convolutional Neural Network"
" with TensorFlow"))
parser.add_argument("model_name",
type=str,
default=None,
help="name of model to analyze")
parser.add_argument("--output_dir",
type=str,
default=None,
help="path to save the analysis")
parser.add_argument(
"--dataset_config_filename",
type=str,
default=None,
help="path to a configuration file for testing on a custom dataset")
parser.add_argument("--config_filename",
type=str,
default=None,
help="path to the configuration file to use")
parser.add_argument("--model_dir",
type=str,
default=None,
help="path to the model")
args = parser.parse_args()
model_name = args.model_name
output_dir = args.output_dir
dataset_config_filename = args.dataset_config_filename
config_filename = args.config_filename
model_dir = args.model_dir
# Create model dir.
if model_dir is None:
model_dir = os.path.join(os.path.dirname(os.path.realpath(__file__)),
"../models")
model_dir = os.path.join(model_dir, model_name)
# If `model_dir` contains many models, analyze all of them.
model_dir = [model_dir]
if "config.json" not in os.listdir(model_dir[0]):
logger.warning(
"Found multiple models in model_dir, analyzing all of them...")
models = os.listdir(model_dir[0])
model_dir = [os.path.join(model_dir[0], model) for model in models]
# Set defaults.
if output_dir is None:
output_dir = os.path.join(os.path.dirname(os.path.realpath(__file__)),
"../analysis")
if config_filename is None:
config_filename = os.path.join(
os.path.dirname(os.path.realpath(__file__)), "..",
"cfg/tools/analyze_gqcnn_performance.yaml")
# Turn relative paths absolute.
if not os.path.isabs(output_dir):
output_dir = os.path.join(os.getcwd(), output_dir)
if not os.path.isabs(config_filename):
config_filename = os.path.join(os.getcwd(), config_filename)
if dataset_config_filename is not None and not os.path.isabs(
dataset_config_filename):
dataset_config_filename = os.path.join(os.getcwd(),
dataset_config_filename)
# Make the output dir.
if not os.path.exists(output_dir):
os.mkdir(output_dir)
# Read config.
config = YamlConfig(config_filename)
dataset_config = None
if dataset_config_filename is not None:
dataset_config = YamlConfig(dataset_config_filename)
# Run the analyzer.
analyzer = GQCNNAnalyzer(config, plot_backend="pdf")
for model in model_dir:
analyzer.analyze(model, output_dir, dataset_config)
================================================
FILE: tools/finetune.py
================================================
# -*- coding: utf-8 -*-
"""
Copyright ©2017. The Regents of the University of California (Regents).
All Rights Reserved. Permission to use, copy, modify, and distribute this
software and its documentation for educational, research, and not-for-profit
purposes, without fee and without a signed licensing agreement, is hereby
granted, provided that the above copyright notice, this paragraph and the
following two paragraphs appear in all copies, modifications, and
distributions. Contact The Office of Technology Licensing, UC Berkeley, 2150
Shattuck Avenue, Suite 510, Berkeley, CA 94720-1620, (510) 643-7201,
otl@berkeley.edu,
http://ipira.berkeley.edu/industry-info for commercial licensing opportunities.
IN NO EVENT SHALL REGENTS BE LIABLE TO ANY PARTY FOR DIRECT, INDIRECT, SPECIAL,
INCIDENTAL, OR CONSEQUENTIAL DAMAGES, INCLUDING LOST PROFITS, ARISING OUT OF
THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN IF REGENTS HAS BEEN
ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
REGENTS SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
PURPOSE. THE SOFTWARE AND ACCOMPANYING DOCUMENTATION, IF ANY, PROVIDED
HEREUNDER IS PROVIDED "AS IS". REGENTS HAS NO OBLIGATION TO PROVIDE
MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
Script for fine-tuning a Grasp Quality Convolutional Neural Network (GQ-CNN).
Author
------
Vishal Satish & Jeff Mahler
"""
import argparse
import os
import time
from autolab_core import YamlConfig, Logger
import autolab_core.utils as utils
from gqcnn import get_gqcnn_model, get_gqcnn_trainer
from gqcnn import utils as gqcnn_utils
# Setup logger.
logger = Logger.get_logger("tools/finetune.py")
if __name__ == "__main__":
# Parse args.
parser = argparse.ArgumentParser(description=(
"Fine-Tune a pre-trained Grasp Quality Convolutional Neural Network"
" with TensorFlow"))
parser.add_argument(
"dataset_dir",
type=str,
default=None,
help="path to the dataset to use for training and validation")
parser.add_argument("base_model_name",
type=str,
default=None,
help="name of the pre-trained model to fine-tune")
parser.add_argument("--split_name",
type=str,
default="image_wise",
help="name of the split to train on")
parser.add_argument("--output_dir",
type=str,
default=None,
help="path to store the model")
parser.add_argument("--tensorboard_port",
type=int,
default=None,
help="port to launch tensorboard on")
parser.add_argument("--seed",
type=int,
default=None,
help="random seed for training")
parser.add_argument("--config_filename",
type=str,
default=None,
help="path to the configuration file to use")
parser.add_argument("--model_dir",
type=str,
default=None,
help="path to the pre-trained model to fine-tune")
parser.add_argument("--name",
type=str,
default=None,
help="name for the trained model")
parser.add_argument(
"--save_datetime",
type=bool,
default=False,
help=("whether or not to save a model with the date and time of"
" training"))
parser.add_argument("--backend",
type=str,
default="tf",
help="the deep learning framework to use")
args = parser.parse_args()
dataset_dir = args.dataset_dir
base_model_name = args.base_model_name
split_name = args.split_name
output_dir = args.output_dir
tensorboard_port = args.tensorboard_port
seed = args.seed
config_filename = args.config_filename
model_dir = args.model_dir
name = args.name
save_datetime = args.save_datetime
backend = args.backend
# Set default output dir.
if output_dir is None:
output_dir = os.path.join(os.path.dirname(os.path.realpath(__file__)),
"../models")
# Set default config filename.
if config_filename is None:
config_filename = os.path.join(
os.path.dirname(os.path.realpath(__file__)), "..",
"cfg/finetune.yaml")
# Set default model dir.
if model_dir is None:
model_dir = os.path.join(os.path.dirname(os.path.realpath(__file__)),
"../models")
# Turn relative paths absolute.
if not os.path.isabs(dataset_dir):
dataset_dir = os.path.join(os.getcwd(), dataset_dir)
if not os.path.isabs(model_dir):
model_dir = os.path.join(os.getcwd(), model_dir)
if not os.path.isabs(output_dir):
output_dir = os.path.join(os.getcwd(), output_dir)
if not os.path.isabs(config_filename):
config_filename = os.path.join(os.getcwd(), config_filename)
# Create full path to the pre-trained model.
model_dir = os.path.join(model_dir, base_model_name)
# Create output dir if necessary.
utils.mkdir_safe(output_dir)
# Open train config.
train_config = YamlConfig(config_filename)
if seed is not None:
train_config["seed"] = seed
train_config["gqcnn"]["seed"] = seed
if tensorboard_port is not None:
train_config["tensorboard_port"] = tensorboard_port
gqcnn_params = train_config["gqcnn"]
# Create a unique output folder based on the date and time.
if save_datetime:
# Create output dir.
unique_name = time.strftime("%Y%m%d-%H%M%S")
output_dir = os.path.join(output_dir, unique_name)
utils.mkdir_safe(output_dir)
# Set visible devices.
if "gpu_list" in train_config:
gqcnn_utils.set_cuda_visible_devices(train_config["gpu_list"])
# Fine-tune the network.
start_time = time.time()
gqcnn = get_gqcnn_model(backend)(gqcnn_params)
trainer = get_gqcnn_trainer(backend)(gqcnn,
dataset_dir,
split_name,
output_dir,
train_config,
name=name)
trainer.finetune(model_dir)
logger.info("Total Fine-tuning Time: " +
str(utils.get_elapsed_time(time.time() - start_time)))
================================================
FILE: tools/hyperparam_search.py
================================================
# -*- coding: utf-8 -*-
"""
Copyright ©2017. The Regents of the University of California (Regents).
All Rights Reserved. Permission to use, copy, modify, and distribute this
software and its documentation for educational, research, and not-for-profit
purposes, without fee and without a signed licensing agreement, is hereby
granted, provided that the above copyright notice, this paragraph and the
following two paragraphs appear in all copies, modifications, and
distributions. Contact The Office of Technology Licensing, UC Berkeley, 2150
Shattuck Avenue, Suite 510, Berkeley, CA 94720-1620, (510) 643-7201,
otl@berkeley.edu,
http://ipira.berkeley.edu/industry-info for commercial licensing opportunities.
IN NO EVENT SHALL REGENTS BE LIABLE TO ANY PARTY FOR DIRECT, INDIRECT, SPECIAL,
INCIDENTAL, OR CONSEQUENTIAL DAMAGES, INCLUDING LOST PROFITS, ARISING OUT OF
THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN IF REGENTS HAS BEEN
ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
REGENTS SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
PURPOSE. THE SOFTWARE AND ACCOMPANYING DOCUMENTATION, IF ANY, PROVIDED
HEREUNDER IS PROVIDED "AS IS". REGENTS HAS NO OBLIGATION TO PROVIDE
MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
Script for searching over Grasp Quality Convolutional Neural Network (GQ-CNN)
hyper-parameters.
Author
------
Vishal Satish
"""
import argparse
from autolab_core import YamlConfig, Logger
from gqcnn import GQCNNSearch
# Set up logger.
logger = Logger.get_logger("tools/hyperparam_search.py")
if __name__ == "__main__":
# Parse args.
parser = argparse.ArgumentParser(
description="Hyper-parameter search for GQ-CNN.")
parser.add_argument("datasets",
nargs="+",
default=None,
help="path to datasets")
parser.add_argument("--base_model_dirs",
nargs="+",
default=[],
help="path to pre-trained base models for fine-tuning")
parser.add_argument("--train_configs",
nargs="+",
default=["cfg/train.yaml"],
help="path to training configs")
parser.add_argument("--analysis_config",
type=str,
default="cfg/tools/analyze_gqcnn_performance.yaml")
parser.add_argument("--split_names",
nargs="+",
default=["image_wise"],
help="dataset splits to use")
parser.add_argument("--output_dir",
type=str,
default="models",
help="path to store search data")
parser.add_argument("--search_name",
type=str,
default=None,
help="name of search")
parser.add_argument("--cpu_cores",
nargs="+",
default=[],
help="CPU cores to use")
parser.add_argument("--gpu_devices",
nargs="+",
default=[],
help="GPU devices to use")
args = parser.parse_args()
datasets = args.datasets
base_model_dirs = args.base_model_dirs
train_configs = args.train_configs
analysis_config = args.analysis_config
split_names = args.split_names
output_dir = args.output_dir
search_name = args.search_name
cpu_cores = [int(core) for core in args.cpu_cores]
gpu_devices = [int(device) for device in args.gpu_devices]
assert len(datasets) == len(
train_configs
), "Must have same number of datasets as training configs!"
if len(base_model_dirs) > 0:
models_datasets_mismatch_msg = ("Must have same number of base models"
" for fine-tuning as datasets and"
" training configs!")
assert len(base_model_dirs) == len(
datasets), models_datasets_mismatch_msg
if len(split_names) < len(datasets):
if len(split_names) == 1:
logger.warning(
"Using split '{}' for all datasets/configs...".format(
split_names[0]))
split_names *= len(datasets)
else:
not_enough_splits_msg = ("Can't have fewer splits that"
" datasets/configs provided unless there"
" is only one.")
raise ValueError(not_enough_splits_msg)
# Parse configs.
analysis_config = YamlConfig(analysis_config)
train_configs = [YamlConfig(cfg) for cfg in train_configs]
# Search.
search = GQCNNSearch(analysis_config,
train_configs,
datasets,
split_names,
output_dir=output_dir,
search_name=search_name,
cpu_cores=cpu_cores,
gpu_devices=gpu_devices,
base_models=base_model_dirs)
search.search()
================================================
FILE: tools/plot_training_losses.py
================================================
# -*- coding: utf-8 -*-
"""
Copyright ©2017. The Regents of the University of California (Regents).
All Rights Reserved. Permission to use, copy, modify, and distribute this
software and its documentation for educational, research, and not-for-profit
purposes, without fee and without a signed licensing agreement, is hereby
granted, provided that the above copyright notice, this paragraph and the
following two paragraphs appear in all copies, modifications, and
distributions. Contact The Office of Technology Licensing, UC Berkeley, 2150
Shattuck Avenue, Suite 510, Berkeley, CA 94720-1620, (510) 643-7201,
otl@berkeley.edu,
http://ipira.berkeley.edu/industry-info for commercial licensing opportunities.
IN NO EVENT SHALL REGENTS BE LIABLE TO ANY PARTY FOR DIRECT, INDIRECT, SPECIAL,
INCIDENTAL, OR CONSEQUENTIAL DAMAGES, INCLUDING LOST PROFITS, ARISING OUT OF
THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN IF REGENTS HAS BEEN
ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
REGENTS SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
PURPOSE. THE SOFTWARE AND ACCOMPANYING DOCUMENTATION, IF ANY, PROVIDED
HEREUNDER IS PROVIDED "AS IS". REGENTS HAS NO OBLIGATION TO PROVIDE
MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
Script to plot the various errors saved during training.
Author
------
Jeff Mahler
Required Parameters
-------------------
model_dir : str
Command line argument, the path to the model whose errors are to plotted.
All plots and other metrics will be saved to this directory.
"""
import os
import sys
import matplotlib.pyplot as plt
import numpy as np
from autolab_core import Logger
from gqcnn.utils import GeneralConstants, GQCNNFilenames
# Set up logger.
logger = Logger.get_logger("tools/plot_training_losses.py")
if __name__ == "__main__":
result_dir = sys.argv[1]
train_errors_filename = os.path.join(result_dir,
GQCNNFilenames.TRAIN_ERRORS)
val_errors_filename = os.path.join(result_dir, GQCNNFilenames.VAL_ERRORS)
train_iters_filename = os.path.join(result_dir, GQCNNFilenames.TRAIN_ITERS)
val_iters_filename = os.path.join(result_dir, GQCNNFilenames.VAL_ITERS)
pct_pos_val_filename = os.path.join(result_dir, GQCNNFilenames.PCT_POS_VAL)
train_losses_filename = os.path.join(result_dir, GQCNNFilenames.TRAIN_LOSS)
val_losses_filename = os.path.join(result_dir, GQCNNFilenames.VAL_LOSS)
raw_train_errors = np.load(train_errors_filename)
val_errors = np.load(val_errors_filename)
raw_train_iters = np.load(train_iters_filename)
val_iters = np.load(val_iters_filename)
pct_pos_val = float(val_errors[0])
if os.path.exists(pct_pos_val_filename):
pct_pos_val = 100.0 * np.load(pct_pos_val_filename)
raw_train_losses = np.load(train_losses_filename)
val_losses = None
try:
val_losses = np.load(val_losses_filename)
except FileNotFoundError:
pass
val_errors = np.r_[pct_pos_val, val_errors]
val_iters = np.r_[0, val_iters]
# Window the training error.
i = 0
train_errors = []
train_losses = []
train_iters = []
while i < raw_train_errors.shape[0]:
train_errors.append(
np.mean(raw_train_errors[i:i + GeneralConstants.WINDOW]))
train_losses.append(
np.mean(raw_train_losses[i:i + GeneralConstants.WINDOW]))
train_iters.append(i)
i += GeneralConstants.WINDOW
train_errors = np.array(train_errors)
train_losses = np.array(train_losses)
train_iters = np.array(train_iters)
if val_losses is not None:
val_losses = np.r_[train_losses[0], val_losses]
init_val_error = val_errors[0]
norm_train_errors = train_errors / init_val_error
norm_val_errors = val_errors / init_val_error
norm_final_val_error = val_errors[-1] / val_errors[0]
if pct_pos_val > 0:
norm_final_val_error = val_errors[-1] / pct_pos_val
logger.info("TRAIN")
logger.info("Original Error {}".format(train_errors[0]))
logger.info("Final Error {}".format(train_errors[-1]))
logger.info("Orig loss {}".format(train_losses[0]))
logger.info("Final loss {}".format(train_losses[-1]))
logger.info("VAL")
logger.info("Original error {}".format(pct_pos_val))
logger.info("Final error {}".format(val_errors[-1]))
logger.info("Normalized error {}".format(norm_final_val_error))
if val_losses is not None:
logger.info("Orig loss {}".format(val_losses[0]))
logger.info("Final loss {}".format(val_losses[-1]))
plt.figure()
plt.plot(train_iters, train_errors, linewidth=4, color="b")
plt.plot(val_iters, val_errors, linewidth=4, color="g")
plt.ylim(0, 100)
plt.legend(("Training (Minibatch)", "Validation"), fontsize=15, loc="best")
plt.xlabel("Iteration", fontsize=15)
plt.ylabel("Error Rate", fontsize=15)
plt.figure()
plt.plot(train_iters, norm_train_errors, linewidth=4, color="b")
plt.plot(val_iters, norm_val_errors, linewidth=4, color="g")
plt.ylim(0, 2.0)
plt.legend(("Training (Minibatch)", "Validation"), fontsize=15, loc="best")
plt.xlabel("Iteration", fontsize=15)
plt.ylabel("Normalized Error Rate", fontsize=15)
train_losses[train_losses > 100.0] = 3.0
plt.figure()
plt.plot(train_iters, train_losses, linewidth=4, color="b")
plt.ylim(0, 2.0)
plt.xlabel("Iteration", fontsize=15)
plt.ylabel("Training Loss", fontsize=15)
if val_losses is not None:
val_losses[val_losses > 100.0] = 3.0
plt.figure()
plt.plot(val_iters, val_losses, linewidth=4, color="b")
plt.ylim(0, 2.0)
plt.xlabel("Iteration", fontsize=15)
plt.ylabel("Validation Loss", fontsize=15)
plt.show()
plt.figure(figsize=(8, 6))
plt.plot(train_iters, train_errors, linewidth=4, color="b")
plt.plot(val_iters, val_errors, linewidth=4, color="g")
plt.ylim(0, 100)
plt.legend(("Training (Minibatch)", "Validation"), fontsize=15, loc="best")
plt.xlabel("Iteration", fontsize=15)
plt.ylabel("Error Rate", fontsize=15)
plt.savefig(os.path.join(result_dir, "training_curve.jpg"))
plt.figure(figsize=(8, 6))
plt.plot(train_iters, norm_train_errors, linewidth=4, color="b")
plt.plot(val_iters, norm_val_errors, linewidth=4, color="g")
plt.ylim(0, 2.0)
plt.legend(("Training (Minibatch)", "Validation"), fontsize=15, loc="best")
plt.xlabel("Iteration", fontsize=15)
plt.ylabel("Normalized Error Rate", fontsize=15)
plt.savefig(os.path.join(result_dir, "normalized_training_curve.jpg"))
================================================
FILE: tools/run_policy.py
================================================
# -*- coding: utf-8 -*-
"""
Copyright ©2017. The Regents of the University of California (Regents).
All Rights Reserved. Permission to use, copy, modify, and distribute this
software and its documentation for educational, research, and not-for-profit
purposes, without fee and without a signed licensing agreement, is hereby
granted, provided that the above copyright notice, this paragraph and the
following two paragraphs appear in all copies, modifications, and
distributions. Contact The Office of Technology Licensing, UC Berkeley, 2150
Shattuck Avenue, Suite 510, Berkeley, CA 94720-1620, (510) 643-7201,
otl@berkeley.edu,
http://ipira.berkeley.edu/industry-info for commercial licensing opportunities.
IN NO EVENT SHALL REGENTS BE LIABLE TO ANY PARTY FOR DIRECT, INDIRECT, SPECIAL,
INCIDENTAL, OR CONSEQUENTIAL DAMAGES, INCLUDING LOST PROFITS, ARISING OUT OF
THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN IF REGENTS HAS BEEN
ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
REGENTS SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
PURPOSE. THE SOFTWARE AND ACCOMPANYING DOCUMENTATION, IF ANY, PROVIDED
HEREUNDER IS PROVIDED "AS IS". REGENTS HAS NO OBLIGATION TO PROVIDE
MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
Script to run saved policy output from user.
Author
------
Jeff Mahler
"""
import argparse
import os
import random
import time
import numpy as np
from autolab_core import YamlConfig, Logger
from visualization import Visualizer2D as vis2d
from gqcnn import RgbdImageState, ParallelJawGrasp
from gqcnn import CrossEntropyRobustGraspingPolicy
# Set up logger.
logger = Logger.get_logger("tools/run_policy.py")
if __name__ == "__main__":
# Parse args.
parser = argparse.ArgumentParser(
description=("Run a saved test case through a GQ-CNN policy. For"
" debugging purposes only."))
parser.add_argument("test_case_path",
type=str,
default=None,
help="path to test case")
parser.add_argument("--config_filename",
type=str,
default="cfg/tools/run_policy.yaml",
help="path to configuration file to use")
parser.add_argument("--output_dir",
type=str,
default=None,
help="directory to store output")
parser.add_argument("--seed", type=int, default=None, help="random seed")
args = parser.parse_args()
test_case_path = args.test_case_path
config_filename = args.config_filename
output_dir = args.output_dir
seed = args.seed
# Make output dir.
if output_dir is not None and not os.path.exists(output_dir):
os.mkdir(output_dir)
# Make relative paths absolute.
if not os.path.isabs(config_filename):
config_filename = os.path.join(
os.path.dirname(os.path.realpath(__file__)), "..", config_filename)
# Set random seed.
if seed is not None:
np.random.seed(seed)
random.seed(seed)
# Read config.
config = YamlConfig(config_filename)
policy_config = config["policy"]
# Load test case.
state_path = os.path.join(test_case_path, "state")
action_path = os.path.join(test_case_path, "action")
state = RgbdImageState.load(state_path)
original_action = ParallelJawGrasp.load(action_path)
# Init policy.
policy = CrossEntropyRobustGraspingPolicy(policy_config)
if policy_config["vis"]["input_images"]:
vis2d.figure()
if state.segmask is None:
vis2d.subplot(1, 2, 1)
vis2d.imshow(state.rgbd_im.color)
vis2d.title("COLOR")
vis2d.subplot(1, 2, 2)
vis2d.imshow(state.rgbd_im.depth,
vmin=policy_config["vis"]["vmin"],
vmax=policy_config["vis"]["vmax"])
vis2d.title("DEPTH")
else:
vis2d.subplot(1, 3, 1)
vis2d.imshow(state.rgbd_im.color)
vis2d.title("COLOR")
vis2d.subplot(1, 3, 2)
vis2d.imshow(state.rgbd_im.depth,
vmin=policy_config["vis"]["vmin"],
vmax=policy_config["vis"]["vmax"])
vis2d.title("DEPTH")
vis2d.subplot(1, 3, 3)
vis2d.imshow(state.segmask)
vis2d.title("SEGMASK")
filename = None
if output_dir is not None:
filename = os.path.join(output_dir, "input_images.png")
vis2d.show(filename)
# Query policy.
policy_start = time.time()
action = policy(state)
logger.info("Planning took %.3f sec" % (time.time() - policy_start))
# Vis final grasp.
if policy_config["vis"]["final_grasp"]:
vis2d.figure(size=(10, 10))
vis2d.subplot(1, 2, 1)
vis2d.imshow(state.rgbd_im.depth,
vmin=policy_config["vis"]["vmin"],
vmax=policy_config["vis"]["vmax"])
vis2d.grasp(original_action.grasp,
scale=policy_config["vis"]["grasp_scale"],
show_center=False,
show_axis=True,
color="r")
vis2d.title("Original (Q=%.3f) (Z=%.3f)" %
(original_action.q_value, original_action.grasp.depth))
vis2d.subplot(1, 2, 2)
vis2d.imshow(state.rgbd_im.depth,
vmin=policy_config["vis"]["vmin"],
vmax=policy_config["vis"]["vmax"])
vis2d.grasp(action.grasp,
scale=policy_config["vis"]["grasp_scale"],
show_center=False,
show_axis=True,
color="r")
vis2d.title("New (Q=%.3f) (Z=%.3f)" %
(action.q_value, action.grasp.depth))
filename = None
if output_dir is not None:
filename = os.path.join(output_dir, "planned_grasp.png")
vis2d.show(filename)
================================================
FILE: tools/train.py
================================================
# -*- coding: utf-8 -*-
"""
Copyright ©2017. The Regents of the University of California (Regents).
All Rights Reserved. Permission to use, copy, modify, and distribute this
software and its documentation for educational, research, and not-for-profit
purposes, without fee and without a signed licensing agreement, is hereby
granted, provided that the above copyright notice, this paragraph and the
following two paragraphs appear in all copies, modifications, and
distributions. Contact The Office of Technology Licensing, UC Berkeley, 2150
Shattuck Avenue, Suite 510, Berkeley, CA 94720-1620, (510) 643-7201,
otl@berkeley.edu,
http://ipira.berkeley.edu/industry-info for commercial licensing opportunities.
IN NO EVENT SHALL REGENTS BE LIABLE TO ANY PARTY FOR DIRECT, INDIRECT, SPECIAL,
INCIDENTAL, OR CONSEQUENTIAL DAMAGES, INCLUDING LOST PROFITS, ARISING OUT OF
THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN IF REGENTS HAS BEEN
ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
REGENTS SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
PURPOSE. THE SOFTWARE AND ACCOMPANYING DOCUMENTATION, IF ANY, PROVIDED
HEREUNDER IS PROVIDED "AS IS". REGENTS HAS NO OBLIGATION TO PROVIDE
MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
Script for training a Grasp Quality Convolutional Neural Network (GQ-CNN).
Author
------
Vishal Satish & Jeff Mahler
"""
import argparse
import os
import time
from autolab_core import YamlConfig, Logger
import autolab_core.utils as utils
from gqcnn import get_gqcnn_model, get_gqcnn_trainer, utils as gqcnn_utils
# Setup logger.
logger = Logger.get_logger("tools/train.py")
if __name__ == "__main__":
# Parse args.
parser = argparse.ArgumentParser(
description=("Train a Grasp Quality Convolutional Neural Network with"
" TensorFlow"))
parser.add_argument(
"dataset_dir",
type=str,
default=None,
help="path to the dataset to use for training and validation")
parser.add_argument("--split_name",
type=str,
default="image_wise",
help="name of the split to train on")
parser.add_argument("--output_dir",
type=str,
default=None,
help="path to store the model")
parser.add_argument("--tensorboard_port",
type=int,
default=None,
help="port to launch tensorboard on")
parser.add_argument("--seed",
type=int,
default=None,
help="random seed for training")
parser.add_argument("--config_filename",
type=str,
default=None,
help="path to the configuration file to use")
parser.add_argument("--name",
type=str,
default=None,
help="name for the trained model")
parser.add_argument(
"--save_datetime",
type=bool,
default=False,
help=("whether or not to save a model with the date and time of"
" training"))
parser.add_argument("--backend",
type=str,
default="tf",
help="the deep learning framework to use")
args = parser.parse_args()
dataset_dir = args.dataset_dir
split_name = args.split_name
output_dir = args.output_dir
tensorboard_port = args.tensorboard_port
seed = args.seed
config_filename = args.config_filename
name = args.name
save_datetime = args.save_datetime
backend = args.backend
# Set default output dir.
if output_dir is None:
output_dir = os.path.join(os.path.dirname(os.path.realpath(__file__)),
"../models")
# Set default config filename.
if config_filename is None:
config_filename = os.path.join(
os.path.dirname(os.path.realpath(__file__)), "..",
"cfg/train.yaml")
# Turn relative paths absolute.
if not os.path.isabs(dataset_dir):
dataset_dir = os.path.join(os.getcwd(), dataset_dir)
if not os.path.isabs(output_dir):
output_dir = os.path.join(os.getcwd(), output_dir)
if not os.path.isabs(config_filename):
config_filename = os.path.join(os.getcwd(), config_filename)
# Create output dir if necessary.
utils.mkdir_safe(output_dir)
# Open train config.
train_config = YamlConfig(config_filename)
if seed is not None:
train_config["seed"] = seed
train_config["gqcnn"]["seed"] = seed
if tensorboard_port is not None:
train_config["tensorboard_port"] = tensorboard_port
gqcnn_params = train_config["gqcnn"]
# Create a unique output folder based on the date and time.
if save_datetime:
# Create output dir.
unique_name = time.strftime("%Y%m%d-%H%M%S")
output_dir = os.path.join(output_dir, unique_name)
utils.mkdir_safe(output_dir)
# Set visible devices.
if "gpu_list" in train_config:
gqcnn_utils.set_cuda_visible_devices(train_config["gpu_list"])
# Train the network.
start_time = time.time()
gqcnn = get_gqcnn_model(backend)(gqcnn_params)
trainer = get_gqcnn_trainer(backend)(gqcnn,
dataset_dir,
split_name,
output_dir,
train_config,
name=name)
trainer.train()
logger.info("Total Training Time: " +
str(utils.get_elapsed_time(time.time() - start_time)))