Repository: BerkeleyAutomation/gqcnn Branch: master Commit: 499a609fe9df Files: 170 Total size: 754.8 KB Directory structure: gitextract_i8u5tpyn/ ├── .flake8 ├── .github/ │ └── ISSUE_TEMPLATE/ │ ├── bug-performance-issue--custom-images-.md │ ├── bug-performance-issue--physical-robot-.md │ ├── bug-performance-issue--replication-.md │ ├── installation-issue.md │ └── other-issue.md ├── .gitignore ├── .style.yapf ├── .travis.yml ├── CMakeLists.txt ├── LICENSE ├── README.md ├── cfg/ │ ├── examples/ │ │ ├── antipodal_grasp_sampling.yaml │ │ ├── fc_gqcnn_pj.yaml │ │ ├── fc_gqcnn_suction.yaml │ │ ├── gqcnn_pj.yaml │ │ ├── gqcnn_suction.yaml │ │ ├── replication/ │ │ │ ├── dex-net_2.0.yaml │ │ │ ├── dex-net_2.1.yaml │ │ │ ├── dex-net_3.0.yaml │ │ │ ├── dex-net_4.0_fc_pj.yaml │ │ │ ├── dex-net_4.0_fc_suction.yaml │ │ │ ├── dex-net_4.0_pj.yaml │ │ │ └── dex-net_4.0_suction.yaml │ │ └── ros/ │ │ ├── fc_gqcnn_pj.yaml │ │ ├── fc_gqcnn_suction.yaml │ │ ├── gqcnn_pj.yaml │ │ └── gqcnn_suction.yaml │ ├── finetune.yaml │ ├── finetune_dex-net_4.0_pj.yaml │ ├── finetune_dex-net_4.0_suction.yaml │ ├── finetune_example_pj.yaml │ ├── finetune_example_suction.yaml │ ├── hyperparam_search/ │ │ ├── train_dex-net_4.0_fc_suction_hyperparam_search.yaml │ │ └── train_hyperparam_search.yaml │ ├── tools/ │ │ ├── analyze_gqcnn_performance.yaml │ │ └── run_policy.yaml │ ├── train.yaml │ ├── train_dex-net_2.0.yaml │ ├── train_dex-net_3.0.yaml │ ├── train_dex-net_4.0_fc_pj.yaml │ ├── train_dex-net_4.0_fc_suction.yaml │ ├── train_dex-net_4.0_pj.yaml │ ├── train_dex-net_4.0_suction.yaml │ ├── train_example_pj.yaml │ ├── train_example_suction.yaml │ └── train_fc.yaml ├── ci/ │ └── travis/ │ └── format.sh ├── data/ │ ├── calib/ │ │ ├── phoxi/ │ │ │ ├── phoxi.intr │ │ │ └── phoxi_to_world.tf │ │ └── primesense/ │ │ ├── primesense.intr │ │ └── primesense.tf │ └── examples/ │ ├── clutter/ │ │ ├── phoxi/ │ │ │ ├── dex-net_4.0/ │ │ │ │ ├── depth_0.npy │ │ │ │ ├── depth_1.npy │ │ │ │ ├── depth_2.npy │ │ │ │ ├── depth_3.npy │ │ │ │ └── depth_4.npy │ │ │ └── fcgqcnn/ │ │ │ ├── depth_0.npy │ │ │ ├── depth_1.npy │ │ │ ├── depth_2.npy │ │ │ ├── depth_3.npy │ │ │ └── depth_4.npy │ │ └── primesense/ │ │ ├── depth_0.npy │ │ ├── depth_1.npy │ │ ├── depth_2.npy │ │ ├── depth_3.npy │ │ └── depth_4.npy │ └── single_object/ │ └── primesense/ │ ├── depth_0.npy │ ├── depth_1.npy │ ├── depth_2.npy │ ├── depth_3.npy │ ├── depth_4.npy │ ├── depth_5.npy │ ├── depth_6.npy │ ├── depth_7.npy │ ├── depth_8.npy │ └── depth_9.npy ├── docker/ │ ├── cpu/ │ │ └── Dockerfile │ └── gpu/ │ └── Dockerfile ├── docs/ │ ├── Makefile │ ├── gh_deploy.sh │ ├── make.bat │ └── source/ │ ├── api/ │ │ ├── analysis.rst │ │ ├── gqcnn.rst │ │ ├── policies.rst │ │ └── training.rst │ ├── benchmarks/ │ │ └── benchmarks.rst │ ├── conf.py │ ├── index.rst │ ├── info/ │ │ └── info.rst │ ├── install/ │ │ └── install.rst │ ├── license/ │ │ └── license.rst │ ├── replication/ │ │ └── replication.rst │ └── tutorials/ │ ├── analysis.rst │ ├── planning.rst │ ├── training.rst │ └── tutorial.rst ├── examples/ │ ├── antipodal_grasp_sampling.py │ ├── policy.py │ ├── policy_ros.py │ └── policy_with_image_proc.py ├── gqcnn/ │ ├── __init__.py │ ├── analysis/ │ │ ├── __init__.py │ │ └── analyzer.py │ ├── grasping/ │ │ ├── __init__.py │ │ ├── actions.py │ │ ├── constraint_fn.py │ │ ├── grasp.py │ │ ├── grasp_quality_function.py │ │ ├── image_grasp_sampler.py │ │ └── policy/ │ │ ├── __init__.py │ │ ├── enums.py │ │ ├── fc_policy.py │ │ └── policy.py │ ├── model/ │ │ ├── __init__.py │ │ └── tf/ │ │ ├── __init__.py │ │ ├── fc_network_tf.py │ │ └── network_tf.py │ ├── search/ │ │ ├── __init__.py │ │ ├── enums.py │ │ ├── resource_manager.py │ │ ├── search.py │ │ ├── trial.py │ │ └── utils.py │ ├── training/ │ │ ├── __init__.py │ │ └── tf/ │ │ ├── __init__.py │ │ └── trainer_tf.py │ ├── utils/ │ │ ├── __init__.py │ │ ├── enums.py │ │ ├── policy_exceptions.py │ │ ├── train_stats_logger.py │ │ └── utils.py │ └── version.py ├── launch/ │ └── grasp_planning_service.launch ├── msg/ │ ├── Action.msg │ ├── BoundingBox.msg │ ├── GQCNNGrasp.msg │ └── Observation.msg ├── package.xml ├── post-checkout ├── requirements/ │ ├── cpu_requirements.txt │ ├── docs_requirements.txt │ └── gpu_requirements.txt ├── ros_nodes/ │ └── grasp_planner_node.py ├── scripts/ │ ├── docker/ │ │ └── build-docker.sh │ ├── policies/ │ │ ├── run_all_dex-net_2.0_examples.sh │ │ ├── run_all_dex-net_2.1_examples.sh │ │ ├── run_all_dex-net_3.0_examples.sh │ │ ├── run_all_dex-net_4.0_fc_pj_examples.sh │ │ ├── run_all_dex-net_4.0_fc_suction_examples.sh │ │ ├── run_all_dex-net_4.0_pj_examples.sh │ │ └── run_all_dex-net_4.0_suction_examples.sh │ └── training/ │ ├── train_dex-net_2.0.sh │ ├── train_dex-net_2.1.sh │ ├── train_dex-net_3.0.sh │ ├── train_dex-net_4.0_fc_pj.sh │ ├── train_dex-net_4.0_fc_suction.sh │ ├── train_dex-net_4.0_pj.sh │ └── train_dex-net_4.0_suction.sh ├── setup.py ├── srv/ │ ├── GQCNNGraspPlanner.srv │ ├── GQCNNGraspPlannerBoundingBox.srv │ ├── GQCNNGraspPlannerFull.srv │ └── GQCNNGraspPlannerSegmask.srv └── tools/ ├── analyze_gqcnn_performance.py ├── finetune.py ├── hyperparam_search.py ├── plot_training_losses.py ├── run_policy.py └── train.py ================================================ FILE CONTENTS ================================================ ================================================ FILE: .flake8 ================================================ [flake8] ignore = C408, E121, E123, E126, E226, E24, E704, W503, W504, W605 exclude = docs/source/conf.py inline-quotes = " no-avoid-escape = 1 ================================================ FILE: .github/ISSUE_TEMPLATE/bug-performance-issue--custom-images-.md ================================================ --- name: Bug/Performance Issue [Custom Images] about: You are testing your own images in software. title: 'Issue: Bug/Performance Issue [Custom Images]' labels: '' assignees: '' --- **System information** - OS Platform and Distribution (e.g., Linux Ubuntu 16.04): - Python version: - Installed using pip or ROS: - Camera: - GPU model (if applicable): **Describe what you are trying to do** **Describe current behavior** **Describe the expected behavior** **Describe the input images** Provide details such as original captured resolution, pre-processing, final resolution, etc. **Describe the physical camera setup** Provide details such as the distance from camera to workspace, orientation, etc.. Attach a picture if possible. **Other info / logs** Include any logs or source code that would be helpful to diagnose the problem. If including tracebacks, please include the full traceback. Large logs and files should be attached. ================================================ FILE: .github/ISSUE_TEMPLATE/bug-performance-issue--physical-robot-.md ================================================ --- name: Bug/Performance Issue [Physical Robot] about: You are running on a physical robot. title: 'Issue: Bug/Performance Issue [Physical Robot]' labels: '' assignees: '' --- **System information** - OS Platform and Distribution (e.g., Linux Ubuntu 16.04): - Python version: - Installed using pip or ROS: - Camera: - Gripper: - Robot: - GPU model (if applicable): **Describe what you are trying to do** Provide details such as what angle you are attempting grasps at relative to the workspace. **Describe current behavior** **Describe the expected behavior** **Describe the input images** Provide details such as original captured resolution, pre-processing, final resolution, etc. **Describe the physical camera setup** Provide details such as the distance from camera to workspace, orientation, etc.. Attach a picture if possible. **Describe the physical robot setup** Attach a picture if possible. **Other info / logs** Include any logs or source code that would be helpful to diagnose the problem. If including tracebacks, please include the full traceback. Large logs and files should be attached. ================================================ FILE: .github/ISSUE_TEMPLATE/bug-performance-issue--replication-.md ================================================ --- name: Bug/Performance Issue [Replication] about: You are replicating training or the policy with the provided images/datasets/models. title: 'Issue: Bug/Performance Issue [Replication]' labels: '' assignees: '' --- **System information** - OS Platform and Distribution (e.g., Linux Ubuntu 16.04): - Python version: - Installed using pip or ROS: - GPU model (if applicable): **Describe the result you are trying to replicate** If you can, provide a link to the section in the [documentation](https://berkeleyautomation.github.io/gqcnn/index.html). **Provide the exact sequence of commands / steps that you executed to replicate this result** **Describe the unexpected behavior** **Other info / logs** Include any logs or source code that would be helpful to diagnose the problem. If including tracebacks, please include the full traceback. Large logs and files should be attached. ================================================ FILE: .github/ISSUE_TEMPLATE/installation-issue.md ================================================ --- name: Installation Issue about: You are experiencing installation issues. title: 'Issue: Installation Issue' labels: '' assignees: '' --- **System information** - OS Platform and Distribution (e.g., Linux Ubuntu 16.04): - Python version: - Installed using pip or ROS: **Describe the problem** **Provide the exact sequence of commands / steps that you executed before running into the problem** **Any other info / logs** Include any logs or source code that would be helpful to diagnose the problem. If including tracebacks, please include the full traceback. Large logs and files should be attached. ================================================ FILE: .github/ISSUE_TEMPLATE/other-issue.md ================================================ --- name: Other Issue about: Your issue doesn't fall into the above categories. title: 'Issue: Other Issue' labels: '' assignees: '' --- **System information** - OS Platform and Distribution (e.g., Linux Ubuntu 16.04): - Python version: - Installed using pip or ROS: **What is the problem** Provide as much detail as possible and steps to replicate it if applicable. **Other info / logs** Include any logs or source code that would be helpful to diagnose the problem. If including tracebacks, please include the full traceback. Large logs and files should be attached. ================================================ FILE: .gitignore ================================================ # Byte-compiled / optimized / DLL files __pycache__/ *.py[cod] *$py.class # C extensions *.so # Distribution / packaging .Python env/ build/ develop-eggs/ dist/ downloads/ data/training/ eggs/ .eggs/ lib/ lib64/ parts/ sdist/ var/ *.egg-info/ .installed.cfg *.egg # PyInstaller # Usually these files are written by a python script from a template # before PyInstaller builds the exe, so as to inject date/other infos into it. *.manifest *.spec # Installer logs pip-log.txt pip-delete-this-directory.txt # Unit test / coverage reports htmlcov/ .tox/ .coverage .coverage.* .cache nosetests.xml coverage.xml *,cover .hypothesis/ # Translations *.mo *.pot # Django stuff: *.log local_settings.py # Flask stuff: instance/ .webassets-cache # Scrapy stuff: .scrapy # PyBuilder target/ # IPython Notebook .ipynb_checkpoints # pyenv .python-version # celery beat schedule file celerybeat-schedule # dotenv .env # virtualenv venv/ ENV/ # Spyder project settings .spyderproject # Rope project settings .ropeproject # Temp files *~ .#* #* ================================================ FILE: .style.yapf ================================================ [style] based_on_style=pep8 allow_split_before_dict_value=False join_multiple_lines=False ================================================ FILE: .travis.yml ================================================ # Copyright ©2017. The Regents of the University of California (Regents). # All Rights Reserved. Permission to use, copy, modify, and distribute this # software and its documentation for educational, research, and not-for-profit # purposes, without fee and without a signed licensing agreement, is hereby # granted, provided that the above copyright notice, this paragraph and the # following two paragraphs appear in all copies, modifications, and # distributions. Contact The Office of Technology Licensing, UC Berkeley, 2150 # Shattuck Avenue, Suite 510, Berkeley, CA 94720-1620, (510) 643-7201, # otl@berkeley.edu, # http://ipira.berkeley.edu/industry-info for commercial licensing opportunities. # IN NO EVENT SHALL REGENTS BE LIABLE TO ANY PARTY FOR DIRECT, INDIRECT, SPECIAL, # INCIDENTAL, OR CONSEQUENTIAL DAMAGES, INCLUDING LOST PROFITS, ARISING OUT OF # THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN IF REGENTS HAS BEEN # ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. # REGENTS SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, BUT NOT LIMITED TO, # THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR # PURPOSE. THE SOFTWARE AND ACCOMPANYING DOCUMENTATION, IF ANY, PROVIDED # HEREUNDER IS PROVIDED "AS IS". REGENTS HAS NO OBLIGATION TO PROVIDE # MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. language: python matrix: - name: "Python 3.5 on Xenial Linux" python: 3.5 dist: xenial - name: "Python 3.6 on Xenial Linux" python: 3.6 dist: xenial - name: "Python 3.7 on Xenial Linux" python: 3.7 dist: xenial - name: "Linter" python: 3.7 dist: xenial before_install: [] install: [] script: - pip install yapf==0.27.0 # Must use specific version. - pip install flake8 flake8-comprehensions flake8-quotes==2.0.0 - ./ci/travis/format.sh --all # Should exit with 0 for no diffs. - flake8 before_install: - sudo apt-get update - sudo apt-get install -y curl g++ make - sudo apt-get install -y python-opengl # For GLU. - pushd ~ - curl -L http://download.osgeo.org/libspatialindex/spatialindex-src-1.8.5.tar.gz | tar xz - cd spatialindex-src-1.8.5 - ./configure - make - sudo make install - sudo ldconfig - popd - pip install -U setuptools # Required for easy_install to find right # skimage version for Python 3.5. install: - pip install . script: - python -c "import gqcnn" notifications: email: false ================================================ FILE: CMakeLists.txt ================================================ # Copyright ©2017. The Regents of the University of California (Regents). # All Rights Reserved. Permission to use, copy, modify, and distribute this # software and its documentation for educational, research, and not-for-profit # purposes, without fee and without a signed licensing agreement, is hereby # granted, provided that the above copyright notice, this paragraph and the # following two paragraphs appear in all copies, modifications, and # distributions. Contact The Office of Technology Licensing, UC Berkeley, 2150 # Shattuck Avenue, Suite 510, Berkeley, CA 94720-1620, (510) 643-7201, # otl@berkeley.edu, # http://ipira.berkeley.edu/industry-info for commercial licensing opportunities. # IN NO EVENT SHALL REGENTS BE LIABLE TO ANY PARTY FOR DIRECT, INDIRECT, SPECIAL, # INCIDENTAL, OR CONSEQUENTIAL DAMAGES, INCLUDING LOST PROFITS, ARISING OUT OF # THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN IF REGENTS HAS BEEN # ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. # REGENTS SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, BUT NOT LIMITED TO, # THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR # PURPOSE. THE SOFTWARE AND ACCOMPANYING DOCUMENTATION, IF ANY, PROVIDED # HEREUNDER IS PROVIDED "AS IS". REGENTS HAS NO OBLIGATION TO PROVIDE # MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. cmake_minimum_required(VERSION 2.8.3) project(gqcnn) ## Find catkin macros and libraries ## if COMPONENTS list like find_package(catkin REQUIRED COMPONENTS xyz) ## is used, also find other catkin packages find_package(catkin REQUIRED COMPONENTS rospy message_generation geometry_msgs sensor_msgs ) ## System dependencies are found with CMake's conventions # find_package(Boost REQUIRED COMPONENTS system) ## Uncomment this if the package has a setup.py. This macro ensures ## modules and global scripts declared therein get installed ## See http://ros.org/doc/api/catkin/html/user_guide/setup_dot_py.html catkin_python_setup() ################################################ ## Declare ROS messages, services and actions ## ################################################ ## To declare and build messages, services or actions from within this ## package, follow these steps: ## * Let MSG_DEP_SET be the set of packages whose message types you use in ## your messages/services/actions (e.g. std_msgs, actionlib_msgs, ...). ## * In the file package.xml: ## * add a build_depend tag for "message_generation" ## * add a build_depend and a run_depend tag for each package in MSG_DEP_SET ## * If MSG_DEP_SET isn't empty the following dependency has been pulled in ## but can be declared for certainty nonetheless: ## * add a run_depend tag for "message_runtime" ## * In this file (CMakeLists.txt): ## * add "message_generation" and every package in MSG_DEP_SET to ## find_package(catkin REQUIRED COMPONENTS ...) ## * add "message_runtime" and every package in MSG_DEP_SET to ## catkin_package(CATKIN_DEPENDS ...) ## * uncomment the add_*_files sections below as needed ## and list every .msg/.srv/.action file to be processed ## * uncomment the generate_messages entry below ## * add every package in MSG_DEP_SET to generate_messages(DEPENDENCIES ...) ## Generate messages in the 'msg' folder add_message_files( FILES GQCNNGrasp.msg BoundingBox.msg Action.msg Observation.msg ) ## Generate services in the 'srv' folder add_service_files( FILES GQCNNGraspPlanner.srv GQCNNGraspPlannerBoundingBox.srv GQCNNGraspPlannerSegmask.srv ) ## Generate actions in the 'action' folder # add_action_files( # FILES # Action1.action # Action2.action # ) ## Generate added messages and services with any dependencies listed here generate_messages( DEPENDENCIES geometry_msgs sensor_msgs ) ################################################ ## Declare ROS dynamic reconfigure parameters ## ################################################ ## To declare and build dynamic reconfigure parameters within this ## package, follow these steps: ## * In the file package.xml: ## * add a build_depend and a run_depend tag for "dynamic_reconfigure" ## * In this file (CMakeLists.txt): ## * add "dynamic_reconfigure" to ## find_package(catkin REQUIRED COMPONENTS ...) ## * uncomment the "generate_dynamic_reconfigure_options" section below ## and list every .cfg file to be processed ## Generate dynamic reconfigure parameters in the 'cfg' folder # generate_dynamic_reconfigure_options( # cfg/DynReconf1.cfg # cfg/DynReconf2.cfg # ) ################################### ## catkin specific configuration ## ################################### ## The catkin_package macro generates cmake config files for your package ## Declare things to be passed to dependent projects ## INCLUDE_DIRS: uncomment this if you package contains header files ## LIBRARIES: libraries you create in this project that dependent projects also need ## CATKIN_DEPENDS: catkin_packages dependent projects also need ## DEPENDS: system dependencies of this project that dependent projects also need catkin_package( # INCLUDE_DIRS include # LIBRARIES yumipy # CATKIN_DEPENDS rospy # DEPENDS system_lib CATKIN_DEPENDS message_runtime ) ########### ## Build ## ########### ## Specify additional locations of header files ## Your package locations should be listed before other locations # include_directories(include) include_directories( ${catkin_INCLUDE_DIRS} ) ## Declare a C++ library # add_library(yumipy # src/${PROJECT_NAME}/yumipy.cpp # ) ## Add cmake target dependencies of the library ## as an example, code may need to be generated before libraries ## either from message generation or dynamic reconfigure # add_dependencies(yumipy ${${PROJECT_NAME}_EXPORTED_TARGETS} ${catkin_EXPORTED_TARGETS}) ## Declare a C++ executable # add_executable(yumipy_node src/yumipy_node.cpp) ## Add cmake target dependencies of the executable ## same as for the library above #add_dependencies(gqcnn gqcnn_generate_messages_cpp) ## Specify libraries to link a library or executable target against # target_link_libraries(yumipy_node # ${catkin_LIBRARIES} # ) ############# ## Install ## ############# # all install targets should use catkin DESTINATION variables # See http://ros.org/doc/api/catkin/html/adv_user_guide/variables.html ## Mark executable scripts (Python etc.) for installation ## in contrast to setup.py, you can choose the destination # install(PROGRAMS # scripts/my_python_script # DESTINATION ${CATKIN_PACKAGE_BIN_DESTINATION} # ) ## Mark executables and/or libraries for installation # install(TARGETS yumipy yumipy_node # ARCHIVE DESTINATION ${CATKIN_PACKAGE_LIB_DESTINATION} # LIBRARY DESTINATION ${CATKIN_PACKAGE_LIB_DESTINATION} # RUNTIME DESTINATION ${CATKIN_PACKAGE_BIN_DESTINATION} # ) ## Mark cpp header files for installation # install(DIRECTORY include/${PROJECT_NAME}/ # DESTINATION ${CATKIN_PACKAGE_INCLUDE_DESTINATION} # FILES_MATCHING PATTERN "*.h" # PATTERN ".svn" EXCLUDE # ) ## Mark other files for installation (e.g. launch and bag files, etc.) # install(FILES # # myfile1 # # myfile2 # DESTINATION ${CATKIN_PACKAGE_SHARE_DESTINATION} # ) ############# ## Testing ## ############# ## Add gtest based cpp test target and link libraries # catkin_add_gtest(${PROJECT_NAME}-test test/test_yumipy.cpp) # if(TARGET ${PROJECT_NAME}-test) # target_link_libraries(${PROJECT_NAME}-test ${PROJECT_NAME}) # endif() ## Add folders to be run by python nosetests # catkin_add_nosetests(test) ================================================ FILE: LICENSE ================================================ Copyright ©2017. The Regents of the University of California (Regents). All Rights Reserved. Permission to use, copy, modify, and distribute this software and its documentation for educational, research, and not-for-profit purposes, without fee and without a signed licensing agreement, is hereby granted, provided that the above copyright notice, this paragraph and the following two paragraphs appear in all copies, modifications, and distributions. Contact The Office of Technology Licensing, UC Berkeley, 2150 Shattuck Avenue, Suite 510, Berkeley, CA 94720-1620, (510) 643- 7201, otl@berkeley.edu, http://ipira.berkeley.edu/industry-info for commercial licensing opportunities. IN NO EVENT SHALL REGENTS BE LIABLE TO ANY PARTY FOR DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, INCLUDING LOST PROFITS, ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN IF REGENTS HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. REGENTS SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE AND ACCOMPANYING DOCUMENTATION, IF ANY, PROVIDED HEREUNDER IS PROVIDED "AS IS". REGENTS HAS NO OBLIGATION TO PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. ================================================ FILE: README.md ================================================ ## Note: Python 2.x support has officially been dropped. # Berkeley AUTOLAB's GQCNN Package

Build Status Release Software License Python 3 Versions

## Package Overview The gqcnn Python package is for training and analysis of Grasp Quality Convolutional Neural Networks (GQ-CNNs). It is part of the ongoing [Dexterity-Network (Dex-Net)](https://berkeleyautomation.github.io/dex-net/) project created and maintained by the [AUTOLAB](https://autolab.berkeley.edu) at UC Berkeley. ## Installation and Usage Please see the [docs](https://berkeleyautomation.github.io/gqcnn/) for installation and usage instructions. ## Citation If you use any part of this code in a publication, please cite [the appropriate Dex-Net publication](https://berkeleyautomation.github.io/gqcnn/index.html#academic-use). ================================================ FILE: cfg/examples/antipodal_grasp_sampling.yaml ================================================ # policy params policy: # general params deterministic: 1 gripper_width: 0.05 # sampling params sampling: # type type: antipodal_depth # antipodality friction_coef: 0.5 depth_grad_thresh: 0.0025 depth_grad_gaussian_sigma: 1.0 downsample_rate: 4 max_rejection_samples: 4000 # distance max_dist_from_center: 1000 min_dist_from_boundary: 45 min_grasp_dist: 10.0 angle_dist_weight: 5.0 # depth sampling depth_sampling_mode: uniform depth_samples_per_grasp: 1 depth_sample_win_height: 1 depth_sample_win_width: 1 min_depth_offset: 0.015 max_depth_offset: 0.05 # metrics metric: type: zero # visualization vis: grasp_sampling: 0 final_grasp: 1 vmin: 0.4 vmax: 1.0 ================================================ FILE: cfg/examples/fc_gqcnn_pj.yaml ================================================ policy: type: fully_conv_pj sampling_method: top_k num_depth_bins: 16 gripper_width: 0.05 gqcnn_stride: 4 gqcnn_recep_h: 96 gqcnn_recep_w: 96 # filtering params max_grasps_to_filter: 50 filter_grasps: 0 # metrics metric: type: fcgqcnn gqcnn_model: /path/to/your/FC-GQ-Image-Wise gqcnn_backend: tf fully_conv_gqcnn_config: im_height: 386 im_width: 516 # visualization policy_vis: scale: 0.5 show_axis: 1 num_samples: 0 actions_2d: 0 actions_3d: 0 affordance_map: 0 vis: final_grasp: 1 vmin: 0.5 vmax: 0.8 # image pre-processing before input to policy inpaint_rescale_factor: 0.5 ================================================ FILE: cfg/examples/fc_gqcnn_suction.yaml ================================================ policy: type: fully_conv_suction sampling_method: top_k gqcnn_stride: 4 gqcnn_recep_h: 96 gqcnn_recep_w: 96 # filtering params max_grasps_to_filter: 50 filter_grasps: 0 # metrics metric: type: fcgqcnn gqcnn_model: /path/to/your/FC-GQ-Image-Wise-Suction gqcnn_backend: tf fully_conv_gqcnn_config: im_height: 386 im_width: 516 # visualization policy_vis: scale: 0.5 show_axis: 1 num_samples: 0 actions_2d: 0 actions_3d: 0 affordance_map: 0 vis: final_grasp: 1 vmin: 0.5 vmax: 0.8 # image pre-processing before input to policy inpaint_rescale_factor: 0.5 ================================================ FILE: cfg/examples/gqcnn_pj.yaml ================================================ policy: # optimization params num_seed_samples: 128 num_gmm_samples: 64 num_iters: 3 gmm_refit_p: 0.25 gmm_component_frac: 0.4 gmm_reg_covar: 0.01 # general params deterministic: 1 gripper_width: 0.05 # sampling params sampling: # type type: antipodal_depth # antipodality friction_coef: 1.0 depth_grad_thresh: 0.0025 depth_grad_gaussian_sigma: 1.0 downsample_rate: 4 max_rejection_samples: 4000 # distance max_dist_from_center: 160 min_dist_from_boundary: 45 min_grasp_dist: 2.5 angle_dist_weight: 5.0 # depth sampling depth_sampling_mode: uniform depth_samples_per_grasp: 3 depth_sample_win_height: 1 depth_sample_win_width: 1 min_depth_offset: 0.015 max_depth_offset: 0.05 # metrics metric: type: gqcnn gqcnn_model: models/GQCNN-4.0-PJ crop_height: 96 crop_width: 96 # visualization vis: grasp_sampling : 0 tf_images: 0 grasp_candidates: 0 elite_grasps: 0 grasp_ranking: 0 grasp_plan: 0 final_grasp: 1 vmin: 0.5 vmax: 0.8 k: 25 # image proc params inpaint_rescale_factor: 0.5 ================================================ FILE: cfg/examples/gqcnn_suction.yaml ================================================ policy: # optimization params num_seed_samples: 200 num_gmm_samples: 50 num_iters: 3 gmm_refit_p: 0.25 gmm_component_frac: 0.4 gmm_reg_covar: 0.01 # general params deterministic: 1 gripper_width: 0.05 crop_height: 96 crop_width: 96 # sampling params sampling: # type type: suction # params max_suction_dir_optical_axis_angle: 30 delta_theta: 5 delta_phi: 5 sigma_depth: 0.0025 min_suction_dist: 1.0 angle_dist_weight: 5.0 depth_gaussian_sigma: 1.0 num_grasp_samples: 1000 max_dist_from_center: 260 max_num_samples: 10000 # metric params metric: type: gqcnn gqcnn_model: models/GQCNN-4.0-SUCTION crop_height: 96 crop_width: 96 # visualization vis: grasp_sampling : 0 tf_images: 0 plane: 0 grasp_candidates: 0 elite_grasps: 0 grasp_ranking: 0 grasp_plan: 0 grasp_affordance_map: 0 final_grasp: 1 vmin: 0.5 vmax: 0.8 k: 25 # image proc params inpaint_rescale_factor: 0.5 # detection params detection: type: point_cloud_box foreground_mask_tolerance: 60 min_pt: - 0.205 - -0.3 - 0.01 max_pt: - 0.65 - 0.3 - 0.15 selection_policy: min focus: 0 min_contour_area: 250.0 max_contour_area: 1000000.0 min_box_area: 250.0 max_box_area: 1000000.0 box_padding_px: 15 rescale_factor: 1.0 interpolation: bilinear depth_grad_thresh: 10.0 contour_dist_thresh: 2.5 point_cloud_mask_only: 1 image_width: 640 image_height: 480 filter_dim: 1 ================================================ FILE: cfg/examples/replication/dex-net_2.0.yaml ================================================ # policy params policy: # optimization params num_seed_samples: 250 num_gmm_samples: 50 num_iters: 3 gmm_refit_p: 0.25 gmm_component_frac: 0.4 gmm_reg_covar: 0.01 # general params deterministic: 1 gripper_width: 0.05 # sampling params sampling: # type type: antipodal_depth # antipodality friction_coef: 0.8 depth_grad_thresh: 0.0025 depth_grad_gaussian_sigma: 1.0 downsample_rate: 2 max_rejection_samples: 4000 # distance max_dist_from_center: 160 min_dist_from_boundary: 45 min_grasp_dist: 2.5 angle_dist_weight: 5.0 # depth sampling depth_sampling_mode: uniform depth_samples_per_grasp: 1 depth_sample_win_height: 1 depth_sample_win_width: 1 min_depth_offset: 0.005 max_depth_offset: 0.04 # metrics metric: type: gqcnn gqcnn_model: models/GQCNN-2.0 crop_height: 96 crop_width: 96 # visualization vis: grasp_sampling : 0 tf_images: 0 grasp_candidates: 0 elite_grasps: 0 grasp_ranking: 0 grasp_plan: 0 final_grasp: 1 vmin: 0.0 vmax: 1.0 k: 25 # image proc params inpaint_rescale_factor: 0.5 ================================================ FILE: cfg/examples/replication/dex-net_2.1.yaml ================================================ # policy params policy: # optimization params num_seed_samples: 250 num_gmm_samples: 50 num_iters: 3 gmm_refit_p: 0.25 gmm_component_frac: 0.4 gmm_reg_covar: 0.01 # general params deterministic: 1 gripper_width: 0.05 # sampling params sampling: # type type: antipodal_depth # antipodality friction_coef: 0.8 depth_grad_thresh: 0.0025 depth_grad_gaussian_sigma: 1.0 downsample_rate: 2 max_rejection_samples: 4000 # distance max_dist_from_center: 160 min_dist_from_boundary: 45 min_grasp_dist: 2.5 angle_dist_weight: 5.0 # depth sampling depth_sampling_mode: uniform depth_samples_per_grasp: 1 depth_sample_win_height: 1 depth_sample_win_width: 1 min_depth_offset: 0.005 max_depth_offset: 0.04 # metrics metric: type: gqcnn gqcnn_model: models/GQCNN-2.1 crop_height: 96 crop_width: 96 # visualization vis: grasp_sampling : 0 tf_images: 0 grasp_candidates: 0 elite_grasps: 0 grasp_ranking: 0 grasp_plan: 0 final_grasp: 1 vmin: 0.0 vmax: 1.0 k: 25 # image proc params inpaint_rescale_factor: 0.5 ================================================ FILE: cfg/examples/replication/dex-net_3.0.yaml ================================================ # policy params policy: # optimization params num_seed_samples: 250 num_gmm_samples: 50 num_iters: 3 gmm_refit_p: 0.25 gmm_component_frac: 0.4 gmm_reg_covar: 0.01 # general params deterministic: 1 max_approach_angle: 80 # sampling params sampling: # type type: suction # params max_suction_dir_optical_axis_angle: 30 delta_theta: 1 delta_phi: 1 mean_depth: 0.0025 sigma_depth: 0.000001 min_suction_dist: 1.0 angle_dist_weight: 5.0 depth_gaussian_sigma: 1.0 max_dist_from_center: 10000000000 max_num_samples: 10000 num_grasp_samples: 500 # metric params metric: type: gqcnn gqcnn_model: models/GQCNN-3.0 crop_height: 96 crop_width: 96 # visualization vis: grasp_sampling : 0 tf_images: 0 plane: 0 grasp_candidates: 0 elite_grasps: 0 grasp_ranking: 0 grasp_plan: 0 final_grasp: 1 vmin: 0.0 vmax: 1.0 k: 25 # image proc params inpaint_rescale_factor: 0.5 ================================================ FILE: cfg/examples/replication/dex-net_4.0_fc_pj.yaml ================================================ policy: type: fully_conv_pj sampling_method: top_k num_depth_bins: 16 gripper_width: 0.05 gqcnn_stride: 4 gqcnn_recep_h: 96 gqcnn_recep_w: 96 # filtering params max_grasps_to_filter: 50 filter_grasps: 0 # metrics metric: type: fcgqcnn gqcnn_model: models/FCGQCNN-4.0-PJ gqcnn_backend: tf fully_conv_gqcnn_config: im_height: 480 im_width: 640 # visualization policy_vis: scale: 0.5 show_axis: 1 num_samples: 0 actions_2d: 0 actions_3d: 0 affordance_map: 0 vis: final_grasp: 1 vmin: 0.0 vmax: 1.0 # image pre-processing before input to policy inpaint_rescale_factor: 0.5 ================================================ FILE: cfg/examples/replication/dex-net_4.0_fc_suction.yaml ================================================ policy: type: fully_conv_suction sampling_method: top_k gqcnn_stride: 4 gqcnn_recep_h: 96 gqcnn_recep_w: 96 # filtering params max_grasps_to_filter: 50 filter_grasps: 0 # metrics metric: type: fcgqcnn gqcnn_model: models/FCGQCNN-4.0-SUCTION gqcnn_backend: tf fully_conv_gqcnn_config: im_height: 480 im_width: 640 # visualization policy_vis: scale: 0.5 show_axis: 1 num_samples: 0 actions_2d: 0 actions_3d: 0 affordance_map: 0 vis: final_grasp: 1 vmin: 0.0 vmax: 1.0 # image pre-processing before input to policy inpaint_rescale_factor: 0.5 ================================================ FILE: cfg/examples/replication/dex-net_4.0_pj.yaml ================================================ policy: # optimization params num_seed_samples: 128 num_gmm_samples: 64 num_iters: 3 gmm_refit_p: 0.25 gmm_component_frac: 0.4 gmm_reg_covar: 0.01 # general params deterministic: 1 gripper_width: 0.05 # sampling params sampling: # type type: antipodal_depth # antipodality friction_coef: 1.0 depth_grad_thresh: 0.0025 depth_grad_gaussian_sigma: 1.0 downsample_rate: 4 max_rejection_samples: 4000 # distance max_dist_from_center: 160 min_dist_from_boundary: 45 min_grasp_dist: 2.5 angle_dist_weight: 5.0 # depth sampling depth_sampling_mode: uniform depth_samples_per_grasp: 3 depth_sample_win_height: 1 depth_sample_win_width: 1 min_depth_offset: 0.015 max_depth_offset: 0.05 # metrics metric: type: gqcnn gqcnn_model: models/GQCNN-4.0-PJ crop_height: 96 crop_width: 96 # visualization vis: grasp_sampling : 0 tf_images: 0 grasp_candidates: 0 elite_grasps: 0 grasp_ranking: 0 grasp_plan: 0 final_grasp: 1 vmin: 0.0 vmax: 1.0 k: 25 # image proc params inpaint_rescale_factor: 0.5 ================================================ FILE: cfg/examples/replication/dex-net_4.0_suction.yaml ================================================ policy: # optimization params num_seed_samples: 200 num_gmm_samples: 50 num_iters: 3 gmm_refit_p: 0.25 gmm_component_frac: 0.4 gmm_reg_covar: 0.01 # general params deterministic: 1 gripper_width: 0.05 crop_height: 96 crop_width: 96 # sampling params sampling: # type type: suction # params max_suction_dir_optical_axis_angle: 30 delta_theta: 5 delta_phi: 5 sigma_depth: 0.0025 min_suction_dist: 1.0 angle_dist_weight: 5.0 depth_gaussian_sigma: 1.0 num_grasp_samples: 1000 max_dist_from_center: 260 max_num_samples: 10000 # metric params metric: type: gqcnn gqcnn_model: models/GQCNN-4.0-SUCTION crop_height: 96 crop_width: 96 # visualization vis: grasp_sampling : 0 tf_images: 0 plane: 0 grasp_candidates: 0 elite_grasps: 0 grasp_ranking: 0 grasp_plan: 0 grasp_affordance_map: 0 final_grasp: 1 vmin: 0.0 vmax: 1.0 k: 25 # image proc params inpaint_rescale_factor: 0.5 # detection params detection: type: point_cloud_box foreground_mask_tolerance: 60 min_pt: - 0.205 - -0.3 - 0.01 max_pt: - 0.65 - 0.3 - 0.15 selection_policy: min focus: 0 min_contour_area: 250.0 max_contour_area: 1000000.0 min_box_area: 250.0 max_box_area: 1000000.0 box_padding_px: 15 rescale_factor: 1.0 interpolation: bilinear depth_grad_thresh: 10.0 contour_dist_thresh: 2.5 point_cloud_mask_only: 1 image_width: 640 image_height: 480 filter_dim: 1 ================================================ FILE: cfg/examples/ros/fc_gqcnn_pj.yaml ================================================ !include ../fc_gqcnn_pj.yaml # ROS-specific visualization vis: rgbd_state: 0 cropped_rgbd_image: 0 color_image: 0 depth_image: 0 segmask: 0 ================================================ FILE: cfg/examples/ros/fc_gqcnn_suction.yaml ================================================ !include ../fc_gqcnn_suction.yaml # ROS-specific visualization vis: rgbd_state: 0 cropped_rgbd_image: 0 color_image: 0 depth_image: 0 segmask: 0 ================================================ FILE: cfg/examples/ros/gqcnn_pj.yaml ================================================ !include ../gqcnn_pj.yaml # ROS-specific visualization vis: rgbd_state: 0 cropped_rgbd_image: 0 color_image: 0 depth_image: 0 segmask: 0 ================================================ FILE: cfg/examples/ros/gqcnn_suction.yaml ================================================ !include ../gqcnn_suction.yaml # ROS-specific visualization vis: rgbd_state: 0 cropped_rgbd_image: 0 color_image: 0 depth_image: 0 segmask: 0 ================================================ FILE: cfg/finetune.yaml ================================================ # general optimization params train_batch_size: 64 val_batch_size: &val_batch_size 64 # logging params num_epochs: 10 # number of epochs to train for eval_frequency: 2.5 # how often to get validation error (in epochs) save_frequency: 2.5 # how often to save output (in epochs) vis_frequency: 10000 # how often to visualize filters (in epochs) log_frequency: 1 # how often to log output (in steps) # train / val split params train_pct: 0.9 # percentage of the data to use for training vs validation total_pct: 1.0 # percentage of all the files to use eval_total_train_error: 0 # whether or not to evaluate the total training error on each validataion max_files_eval: 1000 # the number of validation files to use in each eval # optimization params loss: sparse optimizer: momentum train_l2_regularizer: 0.0005 base_lr: 0.01 decay_step_multiplier: 0.5 # number of times to go through training datapoints before stepping down decay rate (in epochs) decay_rate: 0.95 momentum_rate: 0.9 max_training_examples_per_load: 128 drop_rate: 0.0 max_global_grad_norm: 100000000000 optimize_base_layers: 0 # input params training_mode: classification image_field_name: tf_depth_ims pose_field_name: grasps # label params target_metric_name: grasp_metrics # name of the field to use for the labels metric_thresh: 0.5 # threshold for positive examples (label = 1 if grasp_metric > metric_thresh) # preproc params num_random_files: 1000 # the number of random files to compute dataset statistics in preprocessing (lower speeds initialization) preproc_log_frequency: 100 # how often to log preprocessing (in steps) # denoising / synthetic data params multiplicative_denoising: 0 gamma_shape: 1000.00 symmetrize: 1 gaussian_process_denoising: 0 gaussian_process_rate: 0.5 gaussian_process_scaling_factor: 4.0 gaussian_process_sigma: 0.005 # tensorboard tensorboard_port: 6006 # debugging params debug: &debug 0 debug_num_files: 10 # speeds up initialization seed: &seed 24098 ### GQCNN CONFIG ### gqcnn: # basic data metrics im_height: 96 im_width: 96 im_channels: 1 debug: *debug seed: *seed # needs to match input data mode that was used for training, determines the pose dimensions for the network gripper_mode: parallel_jaw # prediction batch size, in training this will be overriden by the val_batch_size in the optimizer's config file batch_size: *val_batch_size # architecture architecture: base_model: output_layer: fc4 im_stream: conv1_1: type: conv filt_dim: 9 num_filt: 8 pool_size: 2 pool_stride: 2 pad: VALID norm: 0 norm_type: local_response conv1_2: type: conv filt_dim: 3 num_filt: 8 pool_size: 2 pool_stride: 2 pad: VALID norm: 0 norm_type: local_response conv2_1: type: conv filt_dim: 3 num_filt: 8 pool_size: 1 pool_stride: 1 pad: VALID norm: 0 norm_type: local_response conv2_2: type: conv filt_dim: 3 num_filt: 8 pool_size: 2 pool_stride: 2 pad: VALID norm: 0 norm_type: local_response fc3: type: fc out_size: 64 pose_stream: pc1: type: pc out_size: 4 pc2: type: pc out_size: 0 merge_stream: fc4: type: fc_merge out_size: 64 fc5: type: fc out_size: 2 # architecture normalization constants radius: 2 alpha: 2.0e-05 beta: 0.75 bias: 1.0 # leaky relu coefficient relu_coeff: 0.0 ================================================ FILE: cfg/finetune_dex-net_4.0_pj.yaml ================================================ # general optimization params train_batch_size: 64 val_batch_size: &val_batch_size 64 # logging params num_epochs: 50 # number of epochs to train for eval_frequency: 10.0 # how often to get validation error (in epochs) save_frequency: 10.0 # how often to save output (in epochs) vis_frequency: 10000 # how often to visualize filters (in epochs) log_frequency: 1 # how often to log output (in steps) # train / val split params train_pct: 0.9 # percentage of the data to use for training vs validation total_pct: 1.0 # percentage of all the files to use eval_total_train_error: 0 # whether or not to evaluate the total training error on each validataion max_files_eval: 1000 # the number of validation files to use in each eval # optimization params loss: sparse optimizer: momentum train_l2_regularizer: 0.0005 base_lr: 0.01 decay_step_multiplier: 0.5 # number of times to go through training datapoints before stepping down decay rate (in epochs) decay_rate: 0.95 momentum_rate: 0.9 max_training_examples_per_load: 128 drop_rate: 0.0 max_global_grad_norm: 100000000000 optimize_base_layers: 0 # input params training_mode: classification image_field_name: tf_depth_ims pose_field_name: grasps # label params target_metric_name: grasp_metrics # name of the field to use for the labels metric_thresh: 0.5 # threshold for positive examples (label = 1 if grasp_metric > metric_thresh) # preproc params num_random_files: 1000 # the number of random files to compute dataset statistics in preprocessing (lower speeds initialization) preproc_log_frequency: 100 # how often to log preprocessing (in steps) # denoising / synthetic data params multiplicative_denoising: 0 gamma_shape: 1000.00 symmetrize: 1 gaussian_process_denoising: 0 gaussian_process_rate: 0.5 gaussian_process_scaling_factor: 4.0 gaussian_process_sigma: 0.005 # tensorboard tensorboard_port: 6006 # debugging params debug: &debug 0 debug_num_files: 10 # speeds up initialization seed: &seed 24098 ### GQCNN CONFIG ### gqcnn: # basic data metrics im_height: 96 im_width: 96 im_channels: 1 debug: *debug seed: *seed # needs to match input data mode that was used for training, determines the pose dimensions for the network gripper_mode: parallel_jaw # method by which to integrate depth into the network input_depth_mode: pose_stream # used for training with multiple angular predictions angular_bins: 0 # prediction batch size, in training this will be overriden by the val_batch_size in the optimizer's config file batch_size: *val_batch_size # architecture architecture: base_model: output_layer: fc4 im_stream: conv1_1: type: conv filt_dim: 9 num_filt: 16 pool_size: 1 pool_stride: 1 pad: VALID norm: 0 norm_type: local_response conv1_2: type: conv filt_dim: 5 num_filt: 16 pool_size: 2 pool_stride: 2 pad: VALID norm: 0 norm_type: local_response conv2_1: type: conv filt_dim: 5 num_filt: 16 pool_size: 1 pool_stride: 1 pad: VALID norm: 0 norm_type: local_response conv2_2: type: conv filt_dim: 5 num_filt: 16 pool_size: 2 pool_stride: 2 pad: VALID norm: 0 norm_type: local_response fc3: type: fc out_size: 128 pose_stream: pc1: type: pc out_size: 16 pc2: type: pc out_size: 0 merge_stream: fc4: type: fc_merge out_size: 128 fc5: type: fc out_size: 2 # architecture normalization constants radius: 2 alpha: 2.0e-05 beta: 0.75 bias: 1.0 # leaky relu coefficient relu_coeff: 0.0 ================================================ FILE: cfg/finetune_dex-net_4.0_suction.yaml ================================================ # general optimization params train_batch_size: 64 val_batch_size: &val_batch_size 64 # logging params num_epochs: 50 # number of epochs to train for eval_frequency: 10.0 # how often to get validation error (in epochs) save_frequency: 10.0 # how often to save output (in epochs) vis_frequency: 10000 # how often to visualize filters (in epochs) log_frequency: 1 # how often to log output (in steps) # train / val split params train_pct: 0.9 # percentage of the data to use for training vs validation total_pct: 1.0 # percentage of all the files to use eval_total_train_error: 0 # whether or not to evaluate the total training error on each validataion max_files_eval: 1000 # the number of validation files to use in each eval # optimization params loss: sparse optimizer: momentum train_l2_regularizer: 0.0005 base_lr: 0.01 decay_step_multiplier: 0.5 # number of times to go through training datapoints before stepping down decay rate (in epochs) decay_rate: 0.95 momentum_rate: 0.9 max_training_examples_per_load: 128 drop_rate: 0.0 max_global_grad_norm: 100000000000 optimize_base_layers: 0 # input params training_mode: classification image_field_name: tf_depth_ims pose_field_name: grasps # label params target_metric_name: grasp_metrics # name of the field to use for the labels metric_thresh: 0.5 # threshold for positive examples (label = 1 if grasp_metric > metric_thresh) # preproc params num_random_files: 1000 # the number of random files to compute dataset statistics in preprocessing (lower speeds initialization) preproc_log_frequency: 100 # how often to log preprocessing (in steps) # denoising / synthetic data params multiplicative_denoising: 0 gamma_shape: 1000.00 symmetrize: 1 gaussian_process_denoising: 0 gaussian_process_rate: 0.5 gaussian_process_scaling_factor: 4.0 gaussian_process_sigma: 0.005 # tensorboard tensorboard_port: 6006 # debugging params debug: &debug 0 debug_num_files: 10 # speeds up initialization seed: &seed 24098 ### GQCNN CONFIG ### gqcnn: # basic data metrics im_height: 96 im_width: 96 im_channels: 1 debug: *debug seed: *seed # needs to match input data mode that was used for training, determines the pose dimensions for the network gripper_mode: suction # method by which to integrate depth into the network input_depth_mode: pose_stream # used for training with multiple angular predictions angular_bins: 0 # prediction batch size, in training this will be overriden by the val_batch_size in the optimizer's config file batch_size: *val_batch_size # architecture architecture: base_model: output_layer: fc3 im_stream: conv1_1: type: conv filt_dim: 9 num_filt: 16 pool_size: 1 pool_stride: 1 pad: VALID norm: 0 norm_type: local_response conv1_2: type: conv filt_dim: 5 num_filt: 16 pool_size: 2 pool_stride: 2 pad: VALID norm: 0 norm_type: local_response conv2_1: type: conv filt_dim: 5 num_filt: 16 pool_size: 1 pool_stride: 1 pad: VALID norm: 0 norm_type: local_response conv2_2: type: conv filt_dim: 5 num_filt: 16 pool_size: 2 pool_stride: 2 pad: VALID norm: 0 norm_type: local_response fc3: type: fc out_size: 128 pose_stream: pc1: type: pc out_size: 16 pc2: type: pc out_size: 0 merge_stream: fc4: type: fc_merge out_size: 128 fc5: type: fc out_size: 2 # architecture normalization constants radius: 2 alpha: 2.0e-05 beta: 0.75 bias: 1.0 # leaky relu coefficient relu_coeff: 0.0 ================================================ FILE: cfg/finetune_example_pj.yaml ================================================ # general optimization params train_batch_size: 64 val_batch_size: &val_batch_size 64 # logging params num_epochs: 20 # number of epochs to train for eval_frequency: 10.0 # how often to get validation error (in epochs) save_frequency: 10.0 # how often to save output (in epochs) vis_frequency: 10000 # how often to visualize filters (in epochs) log_frequency: 1 # how often to log output (in steps) # train / val split params train_pct: 0.9 # percentage of the data to use for training vs validation total_pct: 1.0 # percentage of all the files to use eval_total_train_error: 0 # whether or not to evaluate the total training error on each validataion max_files_eval: 1000 # the number of validation files to use in each eval # optimization params loss: sparse optimizer: momentum train_l2_regularizer: 0.0005 base_lr: 0.01 decay_step_multiplier: 0.33 # number of times to go through training datapoints before stepping down decay rate (in epochs) decay_rate: 0.95 momentum_rate: 0.9 max_training_examples_per_load: 128 drop_rate: 0.0 max_global_grad_norm: 100000000000 optimize_base_layers: 0 # input params training_mode: classification image_field_name: tf_depth_ims pose_field_name: grasps # label params target_metric_name: grasp_metrics # name of the field to use for the labels metric_thresh: 0.5 # threshold for positive examples (label = 1 if grasp_metric > metric_thresh) # preproc params num_random_files: 1000 # the number of random files to compute dataset statistics in preprocessing (lower speeds initialization) preproc_log_frequency: 100 # how often to log preprocessing (in steps) # denoising / synthetic data params multiplicative_denoising: 0 gamma_shape: 1000.00 symmetrize: 1 gaussian_process_denoising: 0 gaussian_process_rate: 0.5 gaussian_process_scaling_factor: 4.0 gaussian_process_sigma: 0.005 # tensorboard tensorboard_port: 6006 # debugging params debug: &debug 0 debug_num_files: 10 # speeds up initialization seed: &seed 24098 ### GQCNN CONFIG ### gqcnn: # basic data metrics im_height: 96 im_width: 96 im_channels: 1 debug: *debug seed: *seed # needs to match input data mode that was used for training, determines the pose dimensions for the network gripper_mode: parallel_jaw # method by which to integrate depth into the network input_depth_mode: pose_stream # used for training with multiple angular predictions angular_bins: 0 # prediction batch size, in training this will be overriden by the val_batch_size in the optimizer's config file batch_size: *val_batch_size # architecture architecture: base_model: output_layer: conv2_2 im_stream: conv1_1: type: conv filt_dim: 9 num_filt: 16 pool_size: 1 pool_stride: 1 pad: VALID norm: 0 norm_type: local_response conv1_2: type: conv filt_dim: 5 num_filt: 16 pool_size: 2 pool_stride: 2 pad: VALID norm: 0 norm_type: local_response conv2_1: type: conv filt_dim: 5 num_filt: 16 pool_size: 1 pool_stride: 1 pad: VALID norm: 0 norm_type: local_response conv2_2: type: conv filt_dim: 5 num_filt: 16 pool_size: 2 pool_stride: 2 pad: VALID norm: 0 norm_type: local_response fc3: type: fc out_size: 64 pose_stream: pc1: type: pc out_size: 16 pc2: type: pc out_size: 0 merge_stream: fc4: type: fc_merge out_size: 64 fc5: type: fc out_size: 2 # architecture normalization constants radius: 2 alpha: 2.0e-05 beta: 0.75 bias: 1.0 # leaky relu coefficient relu_coeff: 0.0 ================================================ FILE: cfg/finetune_example_suction.yaml ================================================ # general optimization params train_batch_size: 64 val_batch_size: &val_batch_size 64 # logging params num_epochs: 20 # number of epochs to train for eval_frequency: 10.0 # how often to get validation error (in epochs) save_frequency: 10.0 # how often to save output (in epochs) vis_frequency: 10000 # how often to visualize filters (in epochs) log_frequency: 1 # how often to log output (in steps) # train / val split params train_pct: 0.9 # percentage of the data to use for training vs validation total_pct: 1.0 # percentage of all the files to use eval_total_train_error: 0 # whether or not to evaluate the total training error on each validataion max_files_eval: 1000 # the number of validation files to use in each eval # optimization params loss: sparse optimizer: momentum train_l2_regularizer: 0.0005 base_lr: 0.01 decay_step_multiplier: 0.33 # number of times to go through training datapoints before stepping down decay rate (in epochs) decay_rate: 0.95 momentum_rate: 0.9 max_training_examples_per_load: 128 drop_rate: 0.0 max_global_grad_norm: 100000000000 optimize_base_layers: 0 # input params training_mode: classification image_field_name: tf_depth_ims pose_field_name: grasps # label params target_metric_name: grasp_metrics # name of the field to use for the labels metric_thresh: 0.5 # threshold for positive examples (label = 1 if grasp_metric > metric_thresh) # preproc params num_random_files: 1000 # the number of random files to compute dataset statistics in preprocessing (lower speeds initialization) preproc_log_frequency: 100 # how often to log preprocessing (in steps) # denoising / synthetic data params multiplicative_denoising: 0 gamma_shape: 1000.00 symmetrize: 1 gaussian_process_denoising: 0 gaussian_process_rate: 0.5 gaussian_process_scaling_factor: 4.0 gaussian_process_sigma: 0.005 # tensorboard tensorboard_port: 6006 # debugging params debug: &debug 0 debug_num_files: 10 # speeds up initialization seed: &seed 24098 ### GQCNN CONFIG ### gqcnn: # basic data metrics im_height: 96 im_width: 96 im_channels: 1 debug: *debug seed: *seed # needs to match input data mode that was used for training, determines the pose dimensions for the network gripper_mode: suction # method by which to integrate depth into the network input_depth_mode: pose_stream # used for training with multiple angular predictions angular_bins: 0 # prediction batch size, in training this will be overriden by the val_batch_size in the optimizer's config file batch_size: *val_batch_size # architecture architecture: base_model: output_layer: conv2_2 im_stream: conv1_1: type: conv filt_dim: 9 num_filt: 16 pool_size: 1 pool_stride: 1 pad: VALID norm: 0 norm_type: local_response conv1_2: type: conv filt_dim: 5 num_filt: 16 pool_size: 2 pool_stride: 2 pad: VALID norm: 0 norm_type: local_response conv2_1: type: conv filt_dim: 5 num_filt: 16 pool_size: 1 pool_stride: 1 pad: VALID norm: 0 norm_type: local_response conv2_2: type: conv filt_dim: 5 num_filt: 16 pool_size: 2 pool_stride: 2 pad: VALID norm: 0 norm_type: local_response fc3: type: fc out_size: 64 pose_stream: pc1: type: pc out_size: 16 pc2: type: pc out_size: 0 merge_stream: fc4: type: fc_merge out_size: 64 fc5: type: fc out_size: 2 # architecture normalization constants radius: 2 alpha: 2.0e-05 beta: 0.75 bias: 1.0 # leaky relu coefficient relu_coeff: 0.0 ================================================ FILE: cfg/hyperparam_search/train_dex-net_4.0_fc_suction_hyperparam_search.yaml ================================================ # general optimization params train_batch_size: 64 val_batch_size: &val_batch_size 64 # logging params num_epochs: 50 # number of epochs to train for eval_frequency: 10 # how often to get validation error (in epochs) save_frequency: 10 # how often to save output (in epochs) vis_frequency: 10000 # how often to visualize filters (in epochs) log_frequency: 1 # how often to log output (in steps) # train / val split params train_pct: 0.8 # percentage of the data to use for training vs validation total_pct: 1.0 # percentage of all the files to use eval_total_train_error: 0 # whether or not to evaluate the total training error on each validataion max_files_eval: 1000 # the number of validation files to use in each eval # optimization params loss: sparse optimizer: momentum train_l2_regularizer: 0.0005 base_lr: 0.01 decay_step_multiplier: 0.5 # number of times to go through training datapoints before stepping down decay rate (in epochs) decay_rate: 0.95 momentum_rate: 0.9 max_training_examples_per_load: 128 drop_rate: 0.0 max_global_grad_norm: 100000000000 # input params training_mode: classification image_field_name: tf_depth_ims pose_field_name: grasps # label params target_metric_name: grasp_metrics # name of the field to use for the labels metric_thresh: 0.5 # threshold for positive examples (label = 1 if grasp_metric > metric_thresh) # preproc params num_random_files: 10000 # the number of random files to compute dataset statistics in preprocessing (lower speeds initialization) preproc_log_frequency: 100 # how often to log preprocessing (in steps) # denoising / synthetic data params multiplicative_denoising: 0 gamma_shape: 1000.00 symmetrize: 1 gaussian_process_denoising: 0 gaussian_process_rate: 0.5 gaussian_process_scaling_factor: 4.0 gaussian_process_sigma: 0.005 # tensorboard tensorboard_port: 6006 # debugging params debug: 0 debug_num_files: 1000000000 seed: 24098 ### GQCNN CONFIG ### gqcnn: # basic data metrics im_height: 96 im_width: 96 im_channels: 1 debug: 0 seed: 24098 # needs to match input data mode that was used for training, determines the pose dimensions for the network gripper_mode: suction # method by which to integrate depth into the network input_depth_mode: im_only # used for training with multiple angular predictions angular_bins: 0 # prediction batch size, in training this will be overriden by the val_batch_size in the optimizer's config file batch_size: *val_batch_size # architecture architecture: im_stream: conv1_1: type: conv filt_dim: ['anchor_filt_dim', 6, 9, 9] num_filt: ['anchor_num_filts', 8, 12, 16, 20] pool_size: 1 pool_stride: 1 pad: VALID norm: 0 norm_type: local_response conv1_2: type: conv filt_dim: ['anchor_filt_dim', 3, 5, 5] num_filt: ['anchor_num_filts', 8, 12, 16, 20] pool_size: 2 pool_stride: 2 pad: VALID norm: 0 norm_type: local_response conv2_1: type: conv filt_dim: ['anchor_filt_dim', 3, 5, 3] num_filt: ['anchor_num_filts', 8, 12, 16, 20] pool_size: 1 pool_stride: 1 pad: VALID norm: 0 norm_type: local_response conv2_2: type: conv filt_dim: ['anchor_filt_dim', 3, 5, 3] num_filt: ['anchor_num_filts', 8, 12, 16, 20] pool_size: 2 pool_stride: 2 pad: VALID norm: 0 norm_type: local_response fc3: type: fc out_size: ['anchor_fc_out_size', 32, 64, 128] fc4: type: fc out_size: ['anchor_fc_out_size', 32, 64, 128] fc5: type: fc out_size: 2 # architecture normalization constants radius: 2 alpha: 2.0e-05 beta: 0.75 bias: 1.0 # leaky relu coefficient relu_coeff: 0.0 ================================================ FILE: cfg/hyperparam_search/train_hyperparam_search.yaml ================================================ # Example hyper-parameter search config corresponding to cfg/train.yaml that # grid-searches through number of training epochs, base learning rate, and # conv1_1 filter sizes. # general optimization params train_batch_size: 64 val_batch_size: &val_batch_size 64 # logging params num_epochs: [25, 50, 75] # number of epochs to train for eval_frequency: 10 # how often to get validation error (in epochs) save_frequency: 10 # how often to save output (in epochs) vis_frequency: 10000 # how often to visualize filters (in epochs) log_frequency: 1 # how often to log output (in steps) # train / val split params train_pct: 0.9 # percentage of the data to use for training vs validation total_pct: 1.0 # percentage of all the files to use eval_total_train_error: 0 # whether or not to evaluate the total training error on each validataion max_files_eval: 1000 # the number of validation files to use in each eval # optimization params loss: sparse optimizer: momentum train_l2_regularizer: 0.0005 base_lr: [0.001, 0.01, 0.05, 0.1] decay_step_multiplier: 1.0 # number of times to go through training datapoints before stepping down decay rate (in epochs) decay_rate: 0.95 momentum_rate: 0.9 max_training_examples_per_load: 128 drop_rate: 0.0 max_global_grad_norm: 100000000000 # input params training_mode: classification image_field_name: tf_depth_ims pose_field_name: grasps # label params target_metric_name: grasp_metrics # name of the field to use for the labels metric_thresh: 0.5 # threshold for positive examples (label = 1 if grasp_metric > metric_thresh) # preproc params num_random_files: 1000 # the number of random files to compute dataset statistics in preprocessing (lower speeds initialization) preproc_log_frequency: 100 # how often to log preprocessing (in steps) # denoising / synthetic data params multiplicative_denoising: 0 gamma_shape: 1000.00 symmetrize: 1 gaussian_process_denoising: 0 gaussian_process_rate: 0.5 gaussian_process_scaling_factor: 4.0 gaussian_process_sigma: 0.005 # tensorboard tensorboard_port: 6006 # debugging params debug: 0 debug_num_files: 1000000000 seed: 24098 ### GQCNN CONFIG ### gqcnn: # basic data metrics im_height: 96 im_width: 96 im_channels: 1 debug: 0 seed: 24098 # needs to match input data mode that was used for training, determines the pose dimensions for the network gripper_mode: parallel_jaw # method by which to integrate depth into the network input_depth_mode: pose_stream # used for training with multiple angular predictions angular_bins: 0 # prediction batch size, in training this will be overriden by the val_batch_size in the optimizer's config file batch_size: *val_batch_size # architecture architecture: im_stream: conv1_1: type: conv filt_dim: [3, 5, 9] num_filt: 8 pool_size: 2 pool_stride: 2 pad: VALID norm: 0 norm_type: local_response conv1_2: type: conv filt_dim: 3 num_filt: 8 pool_size: 2 pool_stride: 2 pad: VALID norm: 0 norm_type: local_response conv2_1: type: conv filt_dim: 3 num_filt: 8 pool_size: 1 pool_stride: 1 pad: VALID norm: 0 norm_type: local_response conv2_2: type: conv filt_dim: 3 num_filt: 8 pool_size: 2 pool_stride: 2 pad: VALID norm: 0 norm_type: local_response fc3: type: fc out_size: 64 pose_stream: pc1: type: pc out_size: 4 pc2: type: pc out_size: 0 merge_stream: fc4: type: fc_merge out_size: 64 fc5: type: fc out_size: 2 # architecture normalization constants radius: 2 alpha: 2.0e-05 beta: 0.75 bias: 1.0 # leaky relu coefficient relu_coeff: 0.0 ================================================ FILE: cfg/tools/analyze_gqcnn_performance.yaml ================================================ log_rate: 10 font_size: 15 line_width: 4 dpi: 100 num_bins: 100 num_vis: 10 ================================================ FILE: cfg/tools/run_policy.yaml ================================================ # policy params policy: # optimization params num_seed_samples: 250 num_gmm_samples: 100 num_iters: 3 gmm_refit_p: 0.125 gmm_component_frac: 0.8 gmm_reg_covar: 0.01 max_resamples_per_iteration: 10 # general params deterministic: 0 gripper: yumi_metal_spline gripper_width: 0.05 max_approach_angle: 60 logging_dir: logs/debug # sampling params sampling: # type type: antipodal_depth # antipodality friction_coef: 0.8 depth_grad_thresh: 0.002 depth_grad_gaussian_sigma: 0.5 min_num_edge_pixels: 25 downsample_rate: 2 max_rejection_samples: 4000 # distance max_dist_from_center: 100000 min_dist_from_boundary: 45 min_grasp_dist: 1.0 angle_dist_weight: 5.0 # depth sampling depth_samples_per_grasp: 1 depth_sample_win_height: 1 depth_sample_win_width: 1 depth_sampling_mode: uniform min_depth_offset: 0.015 max_depth_offset: 0.04 # metric params metric: type: gqcnn gqcnn_model: /mnt/data/diskstation/models/icra2019/gqcnn_mini_dexnet/ crop_height: 96 crop_width: 96 # filter params filter_collisions: 1 filter_ik: 1 filter_unreachable: 0 max_grasps_filter: 10 collision_free_filter: approach_dist: 0.075 rescale_factor: 0.125 ik_filter: approach_dist: 0.075 traj_len: 0 group_name: left_jaws ik_timeout: 0.1 reachability_filter: unreachable_pose_dir: /mnt/data/diskstation/unreachable_poses/left/ min_rot_dist: 0.1 min_trans_dist: 0.01 # visualization vis: input_images: 0 grasp_sampling : 0 tf_images: 0 grasp_candidates: 0 elite_grasps: 0 grasp_ranking: 0 grasp_plan: 0 seg_point_cloud: 0 filtered_point_cloud: 0 final_grasp: 1 grasp_scale: 1 vmin: 0.6 vmax: 0.9 k: 25 ================================================ FILE: cfg/train.yaml ================================================ # general optimization params train_batch_size: 64 val_batch_size: &val_batch_size 64 # logging params num_epochs: 50 # number of epochs to train for eval_frequency: 10 # how often to get validation error (in epochs) save_frequency: 10 # how often to save output (in epochs) vis_frequency: 10000 # how often to visualize filters (in epochs) log_frequency: 1 # how often to log output (in steps) # train / val split params train_pct: 0.9 # percentage of the data to use for training vs validation total_pct: 1.0 # percentage of all the files to use eval_total_train_error: 0 # whether or not to evaluate the total training error on each validataion max_files_eval: 1000 # the number of validation files to use in each eval # optimization params loss: sparse optimizer: momentum train_l2_regularizer: 0.0005 base_lr: 0.01 decay_step_multiplier: 1.0 # number of times to go through training datapoints before stepping down decay rate (in epochs) decay_rate: 0.95 momentum_rate: 0.9 max_training_examples_per_load: 128 drop_rate: 0.0 max_global_grad_norm: 100000000000 # input params training_mode: classification image_field_name: tf_depth_ims pose_field_name: grasps # label params target_metric_name: grasp_metrics # name of the field to use for the labels metric_thresh: 0.5 # threshold for positive examples (label = 1 if grasp_metric > metric_thresh) # preproc params num_random_files: 1000 # the number of random files to compute dataset statistics in preprocessing (lower speeds initialization) preproc_log_frequency: 100 # how often to log preprocessing (in steps) # denoising / synthetic data params multiplicative_denoising: 0 gamma_shape: 1000.00 symmetrize: 1 gaussian_process_denoising: 0 gaussian_process_rate: 0.5 gaussian_process_scaling_factor: 4.0 gaussian_process_sigma: 0.005 # tensorboard tensorboard_port: 6006 # debugging params debug: &debug 0 debug_num_files: 10 # speeds up initialization seed: &seed 24098 ### GQCNN CONFIG ### gqcnn: # basic data metrics im_height: 96 im_width: 96 im_channels: 1 debug: *debug seed: *seed # needs to match input data mode that was used for training, determines the pose dimensions for the network gripper_mode: parallel_jaw # method by which to integrate depth into the network input_depth_mode: pose_stream # used for training with multiple angular predictions angular_bins: 0 # prediction batch size, in training this will be overriden by the val_batch_size in the optimizer's config file batch_size: *val_batch_size # architecture architecture: im_stream: conv1_1: type: conv filt_dim: 9 num_filt: 8 pool_size: 2 pool_stride: 2 pad: VALID norm: 0 norm_type: local_response conv1_2: type: conv filt_dim: 3 num_filt: 8 pool_size: 2 pool_stride: 2 pad: VALID norm: 0 norm_type: local_response conv2_1: type: conv filt_dim: 3 num_filt: 8 pool_size: 1 pool_stride: 1 pad: VALID norm: 0 norm_type: local_response conv2_2: type: conv filt_dim: 3 num_filt: 8 pool_size: 2 pool_stride: 2 pad: VALID norm: 0 norm_type: local_response fc3: type: fc out_size: 64 pose_stream: pc1: type: pc out_size: 4 pc2: type: pc out_size: 0 merge_stream: fc4: type: fc_merge out_size: 64 fc5: type: fc out_size: 2 # architecture normalization constants radius: 2 alpha: 2.0e-05 beta: 0.75 bias: 1.0 # leaky relu coefficient relu_coeff: 0.0 ================================================ FILE: cfg/train_dex-net_2.0.yaml ================================================ # general optimization params train_batch_size: 64 val_batch_size: &val_batch_size 64 # logging params num_epochs: 50 # number of epochs to train for eval_frequency: 5 # how often to get validation error (in epochs) save_frequency: 5 # how often to save output (in epochs) vis_frequency: 10000 # how often to visualize filters (in epochs) log_frequency: 1 # how often to log output (in steps) # train / val split params train_pct: 0.8 # percentage of the data to use for training vs validation total_pct: 1.0 # percentage of all the files to use eval_total_train_error: 0 # whether or not to evaluate the total training error on each validataion max_files_eval: 1000 # the number of validation files to use in each eval # optimization params loss: sparse optimizer: momentum train_l2_regularizer: 0.0005 base_lr: 0.01 decay_step_multiplier: 0.66 # number of times to go through training datapoints before stepping down decay rate (in epochs) decay_rate: 0.95 momentum_rate: 0.9 max_training_examples_per_load: 128 drop_rate: 0.0 max_global_grad_norm: 100000000000 # input params training_mode: classification image_field_name: depth_ims_tf_table pose_field_name: hand_poses # label params target_metric_name: robust_ferrari_canny # name of the field to use for the labels metric_thresh: 0.002 # threshold for positive examples (label = 1 if grasp_metric > metric_thresh) # preproc params num_random_files: 10000 # the number of random files to compute dataset statistics in preprocessing (lower speeds initialization) preproc_log_frequency: 100 # how often to log preprocessing (in steps) # denoising / synthetic data params multiplicative_denoising: 1 gamma_shape: 1000.00 symmetrize: 1 gaussian_process_denoising: 1 gaussian_process_rate: 0.5 gaussian_process_scaling_factor: 4.0 gaussian_process_sigma: 0.005 # tensorboard tensorboard_port: 6006 # debugging params debug: &debug 0 debug_num_files: 10 # speeds up initialization seed: &seed 24098 ### GQCNN CONFIG ### gqcnn: # basic data metrics im_height: 32 im_width: 32 im_channels: 1 debug: *debug seed: *seed # needs to match input data mode that was used for training, determines the pose dimensions for the network gripper_mode: legacy_parallel_jaw # prediction batch size, in training this will be overriden by the val_batch_size in the optimizer's config file batch_size: *val_batch_size # architecture architecture: im_stream: conv1_1: type: conv filt_dim: 7 num_filt: 64 pool_size: 1 pool_stride: 1 pad: SAME norm: 0 norm_type: local_response conv1_2: type: conv filt_dim: 5 num_filt: 64 pool_size: 2 pool_stride: 2 pad: SAME norm: 1 norm_type: local_response conv2_1: type: conv filt_dim: 3 num_filt: 64 pool_size: 1 pool_stride: 1 pad: SAME norm: 0 norm_type: local_response conv2_2: type: conv filt_dim: 3 num_filt: 64 pool_size: 2 pool_stride: 2 pad: SAME norm: 1 norm_type: local_response fc3: type: fc out_size: 1024 pose_stream: pc1: type: pc out_size: 16 pc2: type: pc out_size: 0 merge_stream: fc4: type: fc_merge out_size: 1024 fc5: type: fc out_size: 2 # architecture normalization constants radius: 2 alpha: 2.0e-05 beta: 0.75 bias: 1.0 # leaky relu coefficient relu_coeff: 0.0 ================================================ FILE: cfg/train_dex-net_3.0.yaml ================================================ # general optimization params train_batch_size: 64 val_batch_size: &val_batch_size 64 # logging params num_epochs: 25 # number of epochs to train for eval_frequency: 5 # how often to get validation error (in epochs) save_frequency: 5 # how often to save output (in epochs) vis_frequency: 10000 # how often to visualize filters (in epochs) log_frequency: 1 # how often to log output (in steps) # train / val split params train_pct: 0.8 # percentage of the data to use for training vs validation total_pct: 1.0 # percentage of all the files to use eval_total_train_error: 0 # whether or not to evaluate the total training error on each validataion max_files_eval: 1000 # the number of validation files to use in each eval # optimization params loss: sparse optimizer: momentum train_l2_regularizer: 0.0005 base_lr: 0.01 decay_step_multiplier: 0.66 # number of times to go through training datapoints before stepping down decay rate (in epochs) decay_rate: 0.95 momentum_rate: 0.9 max_training_examples_per_load: 128 drop_rate: 0.0 max_global_grad_norm: 100000000000 # input params training_mode: classification image_field_name: depth_ims_tf_table pose_field_name: hand_poses # label params target_metric_name: robust_suction_wrench_resistance # name of the field to use for the labels metric_thresh: 0.2 # threshold for positive examples (label = 1 if grasp_metric > metric_thresh) # preproc params num_random_files: 10000 # the number of random files to compute dataset statistics in preprocessing (lower speeds initialization) preproc_log_frequency: 100 # how often to log preprocessing (in steps) # denoising / synthetic data params multiplicative_denoising: 1 gamma_shape: 1000.00 symmetrize: 1 gaussian_process_denoising: 1 gaussian_process_rate: 0.5 gaussian_process_scaling_factor: 4.0 gaussian_process_sigma: 0.005 # tensorboard tensorboard_port: 6006 # debugging params debug: &debug 0 debug_num_files: 10 # speeds up initialization seed: &seed 24098 ### GQCNN CONFIG ### gqcnn: # basic data metrics im_height: 32 im_width: 32 im_channels: 1 debug: *debug seed: *seed # needs to match input data mode that was used for training, determines the pose dimensions for the network gripper_mode: legacy_suction # prediction batch size, in training this will be overriden by the val_batch_size in the optimizer's config file batch_size: *val_batch_size # architecture architecture: im_stream: conv1_1: type: conv filt_dim: 7 num_filt: 64 pool_size: 1 pool_stride: 1 pad: SAME norm: 0 norm_type: local_response conv1_2: type: conv filt_dim: 5 num_filt: 64 pool_size: 2 pool_stride: 2 pad: SAME norm: 1 norm_type: local_response conv2_1: type: conv filt_dim: 3 num_filt: 64 pool_size: 1 pool_stride: 1 pad: SAME norm: 0 norm_type: local_response conv2_2: type: conv filt_dim: 3 num_filt: 64 pool_size: 2 pool_stride: 2 pad: SAME norm: 1 norm_type: local_response fc3: type: fc out_size: 1024 pose_stream: pc1: type: pc out_size: 128 pc2: type: pc out_size: 0 merge_stream: fc4: type: fc_merge out_size: 1024 fc5: type: fc out_size: 2 # architecture normalization constants radius: 2 alpha: 2.0e-05 beta: 0.75 bias: 1.0 # leaky relu coefficient relu_coeff: 0.0 ================================================ FILE: cfg/train_dex-net_4.0_fc_pj.yaml ================================================ # general optimization params train_batch_size: 64 val_batch_size: &val_batch_size 64 # logging params num_epochs: 50 # number of epochs to train for eval_frequency: 10 # how often to get validation error (in epochs) save_frequency: 10 # how often to save output (in epochs) vis_frequency: 10000 # how often to visualize filters (in epochs) log_frequency: 1 # how often to log output (in steps) # train / val split params train_pct: 0.8 # percentage of the data to use for training vs validation total_pct: 1.0 # percentage of all the files to use eval_total_train_error: 0 # whether or not to evaluate the total training error on each validataion max_files_eval: 1000 # the number of validation files to use in each eval # optimization params loss: sparse optimizer: momentum train_l2_regularizer: 0.0005 base_lr: 0.01 decay_step_multiplier: 0.5 # number of times to go through training datapoints before stepping down decay rate (in epochs) decay_rate: 0.95 momentum_rate: 0.9 max_training_examples_per_load: 128 drop_rate: 0.0 max_global_grad_norm: 100000000000 # input params training_mode: classification image_field_name: tf_depth_ims pose_field_name: grasps # label params target_metric_name: grasp_metrics # name of the field to use for the labels metric_thresh: 0.5 # threshold for positive examples (label = 1 if grasp_metric > metric_thresh) # preproc params num_random_files: 10000 # the number of random files to compute dataset statistics in preprocessing (lower speeds initialization) preproc_log_frequency: 100 # how often to log preprocessing (in steps) # denoising / synthetic data params multiplicative_denoising: 0 gamma_shape: 1000.00 symmetrize: 0 gaussian_process_denoising: 0 gaussian_process_rate: 0.5 gaussian_process_scaling_factor: 4.0 gaussian_process_sigma: 0.005 # tensorboard tensorboard_port: 6006 # debugging params debug: &debug 0 debug_num_files: 10 # speeds up initialization seed: &seed 24098 ### GQCNN CONFIG ### gqcnn: # basic data metrics im_height: 96 im_width: 96 im_channels: 1 debug: *debug seed: *seed # needs to match input data mode that was used for training, determines the pose dimensions for the network gripper_mode: parallel_jaw # method by which to integrate depth into the network input_depth_mode: im_depth_sub # used for training with multiple angular predictions angular_bins: 16 # prediction batch size, in training this will be overriden by the val_batch_size in the optimizer's config file batch_size: *val_batch_size # architecture architecture: im_stream: conv1_1: type: conv filt_dim: 9 num_filt: 16 pool_size: 1 pool_stride: 1 pad: VALID norm: 0 norm_type: local_response conv1_2: type: conv filt_dim: 5 num_filt: 16 pool_size: 2 pool_stride: 2 pad: VALID norm: 0 norm_type: local_response conv2_1: type: conv filt_dim: 5 num_filt: 16 pool_size: 1 pool_stride: 1 pad: VALID norm: 0 norm_type: local_response conv2_2: type: conv filt_dim: 5 num_filt: 16 pool_size: 2 pool_stride: 2 pad: VALID norm: 0 norm_type: local_response fc3: type: fc out_size: 128 fc4: type: fc out_size: 128 fc5: type: fc out_size: 32 # architecture normalization constants radius: 2 alpha: 2.0e-05 beta: 0.75 bias: 1.0 # leaky relu coefficient relu_coeff: 0.0 ================================================ FILE: cfg/train_dex-net_4.0_fc_suction.yaml ================================================ # general optimization params train_batch_size: 64 val_batch_size: &val_batch_size 64 # logging params num_epochs: 50 # number of epochs to train for eval_frequency: 10 # how often to get validation error (in epochs) save_frequency: 10 # how often to save output (in epochs) vis_frequency: 10000 # how often to visualize filters (in epochs) log_frequency: 1 # how often to log output (in steps) # train / val split params train_pct: 0.8 # percentage of the data to use for training vs validation total_pct: 1.0 # percentage of all the files to use eval_total_train_error: 0 # whether or not to evaluate the total training error on each validataion max_files_eval: 1000 # the number of validation files to use in each eval # optimization params loss: sparse optimizer: momentum train_l2_regularizer: 0.0005 base_lr: 0.01 decay_step_multiplier: 0.5 # number of times to go through training datapoints before stepping down decay rate (in epochs) decay_rate: 0.95 momentum_rate: 0.9 max_training_examples_per_load: 128 drop_rate: 0.0 max_global_grad_norm: 100000000000 # input params training_mode: classification image_field_name: tf_depth_ims pose_field_name: grasps # label params target_metric_name: grasp_metrics # name of the field to use for the labels metric_thresh: 0.5 # threshold for positive examples (label = 1 if grasp_metric > metric_thresh) # preproc params num_random_files: 10000 # the number of random files to compute dataset statistics in preprocessing (lower speeds initialization) preproc_log_frequency: 100 # how often to log preprocessing (in steps) # denoising / synthetic data params multiplicative_denoising: 0 gamma_shape: 1000.00 symmetrize: 1 gaussian_process_denoising: 0 gaussian_process_rate: 0.5 gaussian_process_scaling_factor: 4.0 gaussian_process_sigma: 0.005 # tensorboard tensorboard_port: 6006 # debugging params debug: &debug 0 debug_num_files: 10 # speeds up initialization seed: &seed 24098 ### GQCNN CONFIG ### gqcnn: # basic data metrics im_height: 96 im_width: 96 im_channels: 1 debug: *debug seed: *seed # needs to match input data mode that was used for training, determines the pose dimensions for the network gripper_mode: suction # method by which to integrate depth into the network input_depth_mode: im_only # used for training with multiple angular predictions angular_bins: 0 # prediction batch size, in training this will be overriden by the val_batch_size in the optimizer's config file batch_size: *val_batch_size # architecture architecture: im_stream: conv1_1: type: conv filt_dim: 9 num_filt: 16 pool_size: 1 pool_stride: 1 pad: VALID norm: 0 norm_type: local_response conv1_2: type: conv filt_dim: 5 num_filt: 16 pool_size: 2 pool_stride: 2 pad: VALID norm: 0 norm_type: local_response conv2_1: type: conv filt_dim: 5 num_filt: 16 pool_size: 1 pool_stride: 1 pad: VALID norm: 0 norm_type: local_response conv2_2: type: conv filt_dim: 5 num_filt: 16 pool_size: 2 pool_stride: 2 pad: VALID norm: 0 norm_type: local_response fc3: type: fc out_size: 128 fc4: type: fc out_size: 128 fc5: type: fc out_size: 2 # architecture normalization constants radius: 2 alpha: 2.0e-05 beta: 0.75 bias: 1.0 # leaky relu coefficient relu_coeff: 0.0 ================================================ FILE: cfg/train_dex-net_4.0_pj.yaml ================================================ # general optimization params train_batch_size: 64 val_batch_size: &val_batch_size 64 # logging params num_epochs: 50 # number of epochs to train for eval_frequency: 10 # how often to get validation error (in epochs) save_frequency: 10 # how often to save output (in epochs) vis_frequency: 10000 # how often to visualize filters (in epochs) log_frequency: 1 # how often to log output (in steps) # train / val split params train_pct: 0.8 # percentage of the data to use for training vs validation total_pct: 1.0 # percentage of all the files to use eval_total_train_error: 0 # whether or not to evaluate the total training error on each validataion max_files_eval: 1000 # the number of validation files to use in each eval # optimization params loss: sparse optimizer: momentum train_l2_regularizer: 0.0005 base_lr: 0.01 decay_step_multiplier: 0.5 # number of times to go through training datapoints before stepping down decay rate (in epochs) decay_rate: 0.95 momentum_rate: 0.9 max_training_examples_per_load: 128 drop_rate: 0.0 max_global_grad_norm: 100000000000 # input params training_mode: classification image_field_name: tf_depth_ims pose_field_name: grasps # label params target_metric_name: grasp_metrics # name of the field to use for the labels metric_thresh: 0.5 # threshold for positive examples (label = 1 if grasp_metric > metric_thresh) # preproc params num_random_files: 10000 # the number of random files to compute dataset statistics in preprocessing (lower speeds initialization) preproc_log_frequency: 100 # how often to log preprocessing (in steps) # denoising / synthetic data params multiplicative_denoising: 0 gamma_shape: 1000.00 symmetrize: 1 gaussian_process_denoising: 0 gaussian_process_rate: 0.5 gaussian_process_scaling_factor: 4.0 gaussian_process_sigma: 0.005 # tensorboard tensorboard_port: 6006 # debugging params debug: &debug 0 debug_num_files: 10 # speeds up initialization seed: &seed 24098 ### GQCNN CONFIG ### gqcnn: # basic data metrics im_height: 96 im_width: 96 im_channels: 1 debug: *debug seed: *seed # needs to match input data mode that was used for training, determines the pose dimensions for the network gripper_mode: parallel_jaw # method by which to integrate depth into the network input_depth_mode: pose_stream # used for training with multiple angular predictions angular_bins: 0 # prediction batch size, in training this will be overriden by the val_batch_size in the optimizer's config file batch_size: *val_batch_size # architecture architecture: im_stream: conv1_1: type: conv filt_dim: 9 num_filt: 16 pool_size: 1 pool_stride: 1 pad: VALID norm: 0 norm_type: local_response conv1_2: type: conv filt_dim: 5 num_filt: 16 pool_size: 2 pool_stride: 2 pad: VALID norm: 0 norm_type: local_response conv2_1: type: conv filt_dim: 5 num_filt: 16 pool_size: 1 pool_stride: 1 pad: VALID norm: 0 norm_type: local_response conv2_2: type: conv filt_dim: 5 num_filt: 16 pool_size: 2 pool_stride: 2 pad: VALID norm: 0 norm_type: local_response fc3: type: fc out_size: 128 pose_stream: pc1: type: pc out_size: 16 pc2: type: pc out_size: 0 merge_stream: fc4: type: fc_merge out_size: 128 fc5: type: fc out_size: 2 # architecture normalization constants radius: 2 alpha: 2.0e-05 beta: 0.75 bias: 1.0 # leaky relu coefficient relu_coeff: 0.0 ================================================ FILE: cfg/train_dex-net_4.0_suction.yaml ================================================ # general optimization params train_batch_size: 64 val_batch_size: &val_batch_size 64 # logging params num_epochs: 50 # number of epochs to train for eval_frequency: 10 # how often to get validation error (in epochs) save_frequency: 10 # how often to save output (in epochs) vis_frequency: 10000 # how often to visualize filters (in epochs) log_frequency: 1 # how often to log output (in steps) # train / val split params train_pct: 0.8 # percentage of the data to use for training vs validation total_pct: 1.0 # percentage of all the files to use eval_total_train_error: 0 # whether or not to evaluate the total training error on each validataion max_files_eval: 1000 # the number of validation files to use in each eval # optimization params loss: sparse optimizer: momentum train_l2_regularizer: 0.0005 base_lr: 0.01 decay_step_multiplier: 0.5 # number of times to go through training datapoints before stepping down decay rate (in epochs) decay_rate: 0.95 momentum_rate: 0.9 max_training_examples_per_load: 128 drop_rate: 0.0 max_global_grad_norm: 100000000000 # input params training_mode: classification image_field_name: tf_depth_ims pose_field_name: grasps # label params target_metric_name: grasp_metrics # name of the field to use for the labels metric_thresh: 0.5 # threshold for positive examples (label = 1 if grasp_metric > metric_thresh) # preproc params num_random_files: 10000 # the number of random files to compute dataset statistics in preprocessing (lower speeds initialization) preproc_log_frequency: 100 # how often to log preprocessing (in steps) # denoising / synthetic data params multiplicative_denoising: 0 gamma_shape: 1000.00 symmetrize: 1 gaussian_process_denoising: 0 gaussian_process_rate: 0.5 gaussian_process_scaling_factor: 4.0 gaussian_process_sigma: 0.005 # tensorboard tensorboard_port: 6006 # debugging params debug: &debug 0 debug_num_files: 10 # speeds up initialization seed: &seed 24098 ### GQCNN CONFIG ### gqcnn: # basic data metrics im_height: 96 im_width: 96 im_channels: 1 debug: *debug seed: *seed # needs to match input data mode that was used for training, determines the pose dimensions for the network gripper_mode: suction # method by which to integrate depth into the network input_depth_mode: pose_stream # used for training with multiple angular predictions angular_bins: 0 # prediction batch size, in training this will be overriden by the val_batch_size in the optimizer's config file batch_size: *val_batch_size # architecture architecture: im_stream: conv1_1: type: conv filt_dim: 9 num_filt: 8 pool_size: 1 pool_stride: 1 pad: VALID norm: 0 norm_type: local_response conv1_2: type: conv filt_dim: 3 num_filt: 8 pool_size: 2 pool_stride: 2 pad: VALID norm: 0 norm_type: local_response conv2_1: type: conv filt_dim: 3 num_filt: 8 pool_size: 1 pool_stride: 1 pad: VALID norm: 0 norm_type: local_response conv2_2: type: conv filt_dim: 3 num_filt: 8 pool_size: 2 pool_stride: 2 pad: VALID norm: 0 norm_type: local_response fc3: type: fc out_size: 64 pose_stream: pc1: type: pc out_size: 16 pc2: type: pc out_size: 0 merge_stream: fc4: type: fc_merge out_size: 64 fc5: type: fc out_size: 2 # architecture normalization constants radius: 2 alpha: 2.0e-05 beta: 0.75 bias: 1.0 # leaky relu coefficient relu_coeff: 0.0 ================================================ FILE: cfg/train_example_pj.yaml ================================================ # general optimization params train_batch_size: 64 val_batch_size: &val_batch_size 64 # logging params num_epochs: 100 # number of epochs to train for eval_frequency: 10 # how often to get validation error (in epochs) save_frequency: 10 # how often to save output (in epochs) vis_frequency: 10000 # how often to visualize filters (in epochs) log_frequency: 1 # how often to log output (in steps) # train / val split params train_pct: 0.9 # percentage of the data to use for training vs validation total_pct: 1.0 # percentage of all the files to use eval_total_train_error: 0 # whether or not to evaluate the total training error on each validataion max_files_eval: 1000 # the number of validation files to use in each eval # optimization params loss: sparse optimizer: momentum train_l2_regularizer: 0.0005 base_lr: 0.01 decay_step_multiplier: 1.0 # number of times to go through training datapoints before stepping down decay rate (in epochs) decay_rate: 0.98 momentum_rate: 0.9 max_training_examples_per_load: 128 drop_rate: 0.0 max_global_grad_norm: 100000000000 # input params training_mode: classification image_field_name: tf_depth_ims pose_field_name: grasps # label params target_metric_name: grasp_metrics # name of the field to use for the labels metric_thresh: 0.5 # threshold for positive examples (label = 1 if grasp_metric > metric_thresh) # preproc params num_random_files: 1000 # the number of random files to compute dataset statistics in preprocessing (lower speeds initialization) preproc_log_frequency: 100 # how often to log preprocessing (in steps) # denoising / synthetic data params multiplicative_denoising: 0 gamma_shape: 1000.00 symmetrize: 1 gaussian_process_denoising: 0 gaussian_process_rate: 0.5 gaussian_process_scaling_factor: 4.0 gaussian_process_sigma: 0.005 # tensorboard tensorboard_port: 6006 # debugging params debug: &debug 0 debug_num_files: 10 # speeds up initialization seed: &seed 24098 ### GQCNN CONFIG ### gqcnn: # basic data metrics im_height: 96 im_width: 96 im_channels: 1 debug: *debug seed: *seed # needs to match input data mode that was used for training, determines the pose dimensions for the network gripper_mode: parallel_jaw # method by which to integrate depth into the network input_depth_mode: pose_stream # used for training with multiple angular predictions angular_bins: 0 # prediction batch size, in training this will be overriden by the val_batch_size in the optimizer's config file batch_size: *val_batch_size # architecture architecture: im_stream: conv1_1: type: conv filt_dim: 9 num_filt: 8 pool_size: 2 pool_stride: 2 pad: VALID norm: 0 norm_type: local_response conv1_2: type: conv filt_dim: 3 num_filt: 8 pool_size: 2 pool_stride: 2 pad: VALID norm: 0 norm_type: local_response conv2_1: type: conv filt_dim: 3 num_filt: 8 pool_size: 1 pool_stride: 1 pad: VALID norm: 0 norm_type: local_response conv2_2: type: conv filt_dim: 3 num_filt: 8 pool_size: 2 pool_stride: 2 pad: VALID norm: 0 norm_type: local_response fc3: type: fc out_size: 64 pose_stream: pc1: type: pc out_size: 4 pc2: type: pc out_size: 0 merge_stream: fc4: type: fc_merge out_size: 64 fc5: type: fc out_size: 2 # architecture normalization constants radius: 2 alpha: 2.0e-05 beta: 0.75 bias: 1.0 # leaky relu coefficient relu_coeff: 0.0 ================================================ FILE: cfg/train_example_suction.yaml ================================================ # general optimization params train_batch_size: 64 val_batch_size: &val_batch_size 64 # logging params num_epochs: 50 # number of epochs to train for eval_frequency: 10 # how often to get validation error (in epochs) save_frequency: 10 # how often to save output (in epochs) vis_frequency: 10000 # how often to visualize filters (in epochs) log_frequency: 1 # how often to log output (in steps) # train / val split params train_pct: 0.9 # percentage of the data to use for training vs validation total_pct: 1.0 # percentage of all the files to use eval_total_train_error: 0 # whether or not to evaluate the total training error on each validataion max_files_eval: 1000 # the number of validation files to use in each eval # optimization params loss: sparse optimizer: momentum train_l2_regularizer: 0.0005 base_lr: 0.01 decay_step_multiplier: 1.0 # number of times to go through training datapoints before stepping down decay rate (in epochs) decay_rate: 0.95 momentum_rate: 0.9 max_training_examples_per_load: 128 drop_rate: 0.0 max_global_grad_norm: 100000000000 # input params training_mode: classification image_field_name: tf_depth_ims pose_field_name: grasps # label params target_metric_name: grasp_metrics # name of the field to use for the labels metric_thresh: 0.5 # threshold for positive examples (label = 1 if grasp_metric > metric_thresh) # preproc params num_random_files: 1000 # the number of random files to compute dataset statistics in preprocessing (lower speeds initialization) preproc_log_frequency: 100 # how often to log preprocessing (in steps) # denoising / synthetic data params multiplicative_denoising: 0 gamma_shape: 1000.00 symmetrize: 1 gaussian_process_denoising: 0 gaussian_process_rate: 0.5 gaussian_process_scaling_factor: 4.0 gaussian_process_sigma: 0.005 # tensorboard tensorboard_port: 6006 # debugging params debug: &debug 0 debug_num_files: 10 # speeds up initialization seed: &seed 24098 ### GQCNN CONFIG ### gqcnn: # basic data metrics im_height: 96 im_width: 96 im_channels: 1 debug: *debug seed: *seed # needs to match input data mode that was used for training, determines the pose dimensions for the network gripper_mode: suction # method by which to integrate depth into the network input_depth_mode: pose_stream # used for training with multiple angular predictions angular_bins: 0 # prediction batch size, in training this will be overriden by the val_batch_size in the optimizer's config file batch_size: *val_batch_size # architecture architecture: im_stream: conv1_1: type: conv filt_dim: 9 num_filt: 8 pool_size: 1 pool_stride: 1 pad: VALID norm: 0 norm_type: local_response conv1_2: type: conv filt_dim: 3 num_filt: 8 pool_size: 2 pool_stride: 2 pad: VALID norm: 0 norm_type: local_response conv2_1: type: conv filt_dim: 3 num_filt: 8 pool_size: 1 pool_stride: 1 pad: VALID norm: 0 norm_type: local_response conv2_2: type: conv filt_dim: 3 num_filt: 8 pool_size: 2 pool_stride: 2 pad: VALID norm: 0 norm_type: local_response fc3: type: fc out_size: 64 pose_stream: pc1: type: pc out_size: 16 pc2: type: pc out_size: 0 merge_stream: fc4: type: fc_merge out_size: 64 fc5: type: fc out_size: 2 # architecture normalization constants radius: 2 alpha: 2.0e-05 beta: 0.75 bias: 1.0 # leaky relu coefficient relu_coeff: 0.0 ================================================ FILE: cfg/train_fc.yaml ================================================ # general optimization params train_batch_size: 64 val_batch_size: &val_batch_size 64 # logging params num_epochs: 50 # number of epochs to train for eval_frequency: 10 # how often to get validation error (in epochs) save_frequency: 10 # how often to save output (in epochs) vis_frequency: 10000 # how often to visualize filters (in epochs) log_frequency: 1 # how often to log output (in steps) # train / val split params train_pct: 0.8 # percentage of the data to use for training vs validation total_pct: 1.0 # percentage of all the files to use eval_total_train_error: 0 # whether or not to evaluate the total training error on each validataion max_files_eval: 1000 # the number of validation files to use in each eval # optimization params loss: sparse optimizer: momentum train_l2_regularizer: 0.0005 base_lr: 0.01 decay_step_multiplier: 0.5 # number of times to go through training datapoints before stepping down decay rate (in epochs) decay_rate: 0.95 momentum_rate: 0.9 max_training_examples_per_load: 128 drop_rate: 0.0 max_global_grad_norm: 100000000000 # input params training_mode: classification image_field_name: tf_depth_ims pose_field_name: grasps # label params target_metric_name: grasp_metrics # name of the field to use for the labels metric_thresh: 0.5 # threshold for positive examples (label = 1 if grasp_metric > metric_thresh) # preproc params num_random_files: 10000 # the number of random files to compute dataset statistics in preprocessing (lower speeds initialization) preproc_log_frequency: 100 # how often to log preprocessing (in steps) # denoising / synthetic data params multiplicative_denoising: 0 gamma_shape: 1000.00 symmetrize: 0 gaussian_process_denoising: 0 gaussian_process_rate: 0.5 gaussian_process_scaling_factor: 4.0 gaussian_process_sigma: 0.005 # tensorboard tensorboard_port: 6006 # debugging params debug: &debug 0 debug_num_files: 10 # speeds up initialization seed: &seed 24098 ### GQCNN CONFIG ### gqcnn: # basic data metrics im_height: 96 im_width: 96 im_channels: 1 debug: *debug seed: *seed # needs to match input data mode that was used for training, determines the pose dimensions for the network gripper_mode: parallel_jaw # method by which to integrate depth into the network input_depth_mode: im_depth_sub # used for training with multiple angular predictions angular_bins: 16 # prediction batch size, in training this will be overriden by the val_batch_size in the optimizer's config file batch_size: *val_batch_size # architecture architecture: im_stream: conv1_1: type: conv filt_dim: 9 num_filt: 16 pool_size: 1 pool_stride: 1 pad: VALID norm: 0 norm_type: local_response conv1_2: type: conv filt_dim: 5 num_filt: 16 pool_size: 2 pool_stride: 2 pad: VALID norm: 0 norm_type: local_response conv2_1: type: conv filt_dim: 5 num_filt: 16 pool_size: 1 pool_stride: 1 pad: VALID norm: 0 norm_type: local_response conv2_2: type: conv filt_dim: 5 num_filt: 16 pool_size: 2 pool_stride: 2 pad: VALID norm: 0 norm_type: local_response fc3: type: fc out_size: 128 fc4: type: fc out_size: 128 fc5: type: fc out_size: 32 # architecture normalization constants radius: 2 alpha: 2.0e-05 beta: 0.75 bias: 1.0 # leaky relu coefficient relu_coeff: 0.0 ================================================ FILE: ci/travis/format.sh ================================================ #!/bin/bash # Copyright ©2017. The Regents of the University of California (Regents). # All Rights Reserved. Permission to use, copy, modify, and distribute this # software and its documentation for educational, research, and not-for-profit # purposes, without fee and without a signed licensing agreement, is hereby # granted, provided that the above copyright notice, this paragraph and the # following two paragraphs appear in all copies, modifications, and # distributions. Contact The Office of Technology Licensing, UC Berkeley, 2150 # Shattuck Avenue, Suite 510, Berkeley, CA 94720-1620, (510) 643-7201, # otl@berkeley.edu, # http://ipira.berkeley.edu/industry-info for commercial licensing opportunities. # IN NO EVENT SHALL REGENTS BE LIABLE TO ANY PARTY FOR DIRECT, INDIRECT, SPECIAL, # INCIDENTAL, OR CONSEQUENTIAL DAMAGES, INCLUDING LOST PROFITS, ARISING OUT OF # THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN IF REGENTS HAS BEEN # ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. # REGENTS SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, BUT NOT LIMITED TO, # THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR # PURPOSE. THE SOFTWARE AND ACCOMPANYING DOCUMENTATION, IF ANY, PROVIDED # HEREUNDER IS PROVIDED "AS IS". REGENTS HAS NO OBLIGATION TO PROVIDE # MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. # Script for YAPF formatting. Adapted from https://github.com/ray-project/ray/blob/master/ci/travis/format.sh. YAPF_FLAGS=( '--style' ".style.yapf" '--recursive' '--parallel' ) YAPF_EXCLUDES=() # Format specified files format() { yapf --in-place "${YAPF_FLAGS[@]}" -- "$@" } # Format all files, and print the diff to `stdout` for Travis. format_all() { yapf --diff "${YAPF_FLAGS[@]}" "${YAPF_EXCLUDES[@]}" . } # This flag formats individual files. `--files` *must* be the first command line # arg to use this option. if [[ "$1" == '--files' ]]; then format "${@:2}" # If `--all` is passed, then any further arguments are ignored and the # entire Python directory is formatted. elif [[ "$1" == '--all' ]]; then format_all fi ================================================ FILE: data/calib/phoxi/phoxi.intr ================================================ {"_cy": 191.75, "_cx": 255.5, "_fy": 552.5, "_height": 386, "_fx": 552.5, "_width": 516, "_skew": 0.0, "_K": 0, "_frame": "phoxi"} ================================================ FILE: data/calib/phoxi/phoxi_to_world.tf ================================================ phoxi world 0.385146 -0.121589 0.808145 0.005659 -0.999983 0.001249 -0.989208 -0.005415 0.146420 -0.146410 -0.002064 -0.989222 ================================================ FILE: data/calib/primesense/primesense.intr ================================================ {"_cy": 239.5, "_cx": 319.5, "_fy": 525.0, "_height": 480, "_fx": 525.0, "_width": 640, "_skew": 0.0, "_K": 0, "_frame": "primesense_overhead"} ================================================ FILE: data/calib/primesense/primesense.tf ================================================ primesense_overhead world 0.214280 0.004186 0.872913 0.021188 -0.975273 0.219984 -0.999765 -0.021698 0.000098 0.004678 -0.219934 -0.975504 ================================================ FILE: docker/cpu/Dockerfile ================================================ FROM ubuntu:xenial MAINTAINER Vishal Satish # Args. # Must be an absolute path. ARG work_dir=/root/Workspace # Install `apt-get` deps. RUN apt-get update && apt-get install -y \ build-essential \ python3 \ python3-dev \ python3-tk \ python-opengl \ curl \ libsm6 \ libxext6 \ libglib2.0-0 \ libxrender1 \ wget \ unzip # Install libspatialindex (required for latest rtree). RUN curl -L http://download.osgeo.org/libspatialindex/spatialindex-src-1.8.5.tar.gz | tar xz && \ cd spatialindex-src-1.8.5 && \ ./configure && \ make && \ make install && \ ldconfig && \ cd .. # Install pip (`apt-get install python-pip` causes trouble w/ networkx). RUN curl -O https://bootstrap.pypa.io/get-pip.py && \ python3 get-pip.py && \ rm get-pip.py # Required for easy_install to find right skimage version for Python 3.5. RUN pip3 install -U setuptools # Make working directory. WORKDIR ${work_dir} # Copy the library. ADD docker/gqcnn.tar . # This is because `python setup.py develop` skips `install_requires` (I think). RUN python3 -m pip install -r gqcnn/requirements/cpu_requirements.txt # Install the library in editable mode because it's more versatile (in case we want to develop or if users want to modify things) # Keep the egg outside of the library in site-packages in case we want to mount the library (overwriting it) for development with docker ENV PYTHONPATH ${work_dir}/gqcnn WORKDIR /usr/local/lib/python3.5/site-packages/ RUN python3 ${work_dir}/gqcnn/setup.py develop --docker # Move to the top-level gqcnn package dir. WORKDIR ${work_dir}/gqcnn ================================================ FILE: docker/gpu/Dockerfile ================================================ FROM nvidia/cuda:10.0-cudnn7-devel-ubuntu16.04 MAINTAINER Vishal Satish # Args # `work_dir` must be an absolute path. ARG work_dir=/root/Workspace # Install `apt-get` deps. RUN apt-get update && apt-get install -y \ build-essential \ python3 \ python3-dev \ python3-tk \ python-opengl \ curl \ libsm6 \ libxext6 \ libglib2.0-0 \ libxrender1 \ wget \ unzip # Install libspatialindex (required for latest rtree). RUN curl -L http://download.osgeo.org/libspatialindex/spatialindex-src-1.8.5.tar.gz | tar xz && \ cd spatialindex-src-1.8.5 && \ ./configure && \ make && \ make install && \ ldconfig && \ cd .. # Install pip (`apt-get install python-pip` causes trouble w/ networkx). RUN curl -O https://bootstrap.pypa.io/get-pip.py && \ python3 get-pip.py && \ rm get-pip.py # Required for easy_install to find right skimage version for Python 3.5. RUN pip3 install -U setuptools # Make working directory. WORKDIR ${work_dir} # Copy the library. ADD docker/gqcnn.tar . # This is because `python setup.py develop` skips install_requires (I think) and also because we want to explicitly use the GPU requirements. RUN python3 -m pip install -r gqcnn/requirements/gpu_requirements.txt # Install the library in editable mode because it's more versatile (in case we want to develop or if users want to modify things) # Keep the egg outside of the library in site-packages in case we want to mount the library (overwriting it) for development with docker ENV PYTHONPATH ${work_dir}/gqcnn WORKDIR /usr/local/lib/python3.5/site-packages/ RUN python3 ${work_dir}/gqcnn/setup.py develop --docker # Move to the base library dir WORKDIR ${work_dir}/gqcnn ================================================ FILE: docs/Makefile ================================================ # Minimal makefile for Sphinx documentation # # You can set these variables from the command line. SPHINXOPTS = SPHINXBUILD = sphinx-build SPHINXPROJ = GQCNN SOURCEDIR = source BUILDDIR = build GH_PAGES_SOURCES = docs examples gqcnn tools # Put it first so that "make" without argument is like "make help". help: @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) .PHONY: help Makefile .PHONY: clean clean: rm -rf $(BUILDDIR)/* .PHONY: gh-pages gh-pages: git checkout gh-pages && \ cd .. && \ git rm -rf . && git clean -fxd && \ git checkout master $(GH_PAGES_SOURCES) && \ git reset HEAD && \ cd docs && \ make html && \ cd .. && \ mv -fv docs/build/html/* ./ && \ touch .nojekyll && \ rm -rf $(GH_PAGES_SOURCES) && \ git add -A && \ git commit -m "Generated gh-pages for `git log master -1 --pretty=short --abbrev-commit`" && \ git push origin --delete gh-pages && \ git push origin gh-pages ; \ git checkout master # Catch-all target: route all unknown targets to Sphinx using the new # "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). %: Makefile @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) ================================================ FILE: docs/gh_deploy.sh ================================================ #!/bin/sh make gh-pages cd ../docs ================================================ FILE: docs/make.bat ================================================ @ECHO OFF pushd %~dp0 REM Command file for Sphinx documentation if "%SPHINXBUILD%" == "" ( set SPHINXBUILD=sphinx-build ) set SOURCEDIR=source set BUILDDIR=build set SPHINXPROJ=GQCNN if "%1" == "" goto help %SPHINXBUILD% >NUL 2>NUL if errorlevel 9009 ( echo. echo.The 'sphinx-build' command was not found. Make sure you have Sphinx echo.installed, then set the SPHINXBUILD environment variable to point echo.to the full path of the 'sphinx-build' executable. Alternatively you echo.may add the Sphinx directory to PATH. echo. echo.If you don't have Sphinx installed, grab it from echo.http://sphinx-doc.org/ exit /b 1 ) %SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% goto end :help %SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% :end popd ================================================ FILE: docs/source/api/analysis.rst ================================================ Analysis ======== GQCNNAnalyzer ~~~~~~~~~~~~~ A tool for analyzing trained GQ-CNNs. Calculates statistics such as training/valiation errors and losses. Also plots Precision-Recall Curve and ROC, and saves sample TP/TN/FP/FN training/validation examples. .. autoclass:: gqcnn.GQCNNAnalyzer ================================================ FILE: docs/source/api/gqcnn.rst ================================================ GQ-CNN ====== GQ-CNN and FC-GQ-CNN classes are **never accessed directly**, but through a lightweight factory function that returns the corresponding class depending on the specified backend. :: $ from gqcnn import get_gqcnn_model $ $ backend = 'tf' $ my_gqcnn = get_gqcnn_model(backend)() .. autofunction:: gqcnn.get_gqcnn_model .. autofunction:: gqcnn.get_fc_gqcnn_model GQCNNTF ~~~~~~~ Tensorflow implementation of GQ-CNN model. .. autoclass:: gqcnn.model.tf.GQCNNTF :exclude-members: init_mean_and_std, set_base_network, init_weights_file, initialize_network, set_batch_size, set_im_mean, get_im_mean, set_im_std, get_im_std, set_pose_mean, get_pose_mean, set_pose_std, get_pose_std, set_im_depth_sub_mean, set_im_depth_sub_std, add_softmax_to_output, add_sigmoid_to_output, update_batch_size, FCGQCNNTF ~~~~~~~~~ Tensorflow implementation of FC-GQ-CNN model. .. autoclass:: gqcnn.model.tf.FCGQCNNTF :exclude-members: __init__ ================================================ FILE: docs/source/api/policies.rst ================================================ Policies ======== All GQ-CNN grasping policies are child classes of the base :ref:`GraspingPolicy` class that implements `__call__()`, which operates on :ref:`RgbdImageStates ` and returns a :ref:`GraspAction`. :: $ from gqcnn import RgbdImageState, CrossEntropyRobustGraspingPolicy $ $ im = RgbdImageState.load() $ my_policy = CrossEntropyRobustGraspingPolicy() $ $ my_grasp_action = my_policy(im) Primary Policies ~~~~~~~~~~~~~~~~ CrossEntropyRobustGraspingPolicy -------------------------------- An implementation of the `Cross Entropy Method (CEM)`_ used in `Dex-Net 2.0`_, `Dex-Net 2.1`_, `Dex-Net 3.0`_, and `Dex-Net 4.0`_ to iteratively locate the best grasp. .. autoclass:: gqcnn.CrossEntropyRobustGraspingPolicy FullyConvolutionalGraspingPolicyParallelJaw ------------------------------------------- An implementation of the `FC-GQ-CNN`_ parallel jaw policy that uses dense, parallelized fully convolutional networks. .. autoclass:: gqcnn.FullyConvolutionalGraspingPolicyParallelJaw FullyConvolutionalGraspingPolicySuction --------------------------------------- An implementation of the `FC-GQ-CNN`_ suction policy that uses dense, parallelized fully convolutional networks. .. autoclass:: gqcnn.FullyConvolutionalGraspingPolicySuction Grasps and Image Wrappers ~~~~~~~~~~~~~~~~~~~~~~~~~ .. _RgbdImageState: RgbdImageState -------------- A wrapper for states containing an RGBD (RGB + Depth) image, camera intrinisics, and segmentation masks. .. autoclass:: gqcnn.RgbdImageState .. _GraspAction: GraspAction ----------- A wrapper for 2D grasp actions such as :ref:`Grasp2D` or :ref:`SuctionPoint2D`. .. autoclass:: gqcnn.grasping.policy.policy.GraspAction .. _Grasp2D: Grasp2D ------- A wrapper for 2D parallel jaw grasps. .. autoclass:: gqcnn.grasping.grasp.Grasp2D .. _SuctionPoint2D: SuctionPoint2D -------------- A wrapper for 2D suction grasps. .. autoclass:: gqcnn.grasping.grasp.SuctionPoint2D Miscellaneous and Parent Policies ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Policy ------ .. autoclass:: gqcnn.grasping.policy.policy.Policy .. _GraspingPolicy: GraspingPolicy -------------- .. autoclass:: gqcnn.grasping.policy.policy.GraspingPolicy FullyConvolutionalGraspingPolicy -------------------------------- .. autoclass:: gqcnn.grasping.policy.fc_policy.FullyConvolutionalGraspingPolicy RobustGraspingPolicy -------------------- .. autoclass:: gqcnn.RobustGraspingPolicy UniformRandomGraspingPolicy --------------------------- .. autoclass:: gqcnn.UniformRandomGraspingPolicy .. _Cross Entropy Method (CEM): https://en.wikipedia.org/wiki/Cross-entropy_method .. _Dex-Net 2.0: https://berkeleyautomation.github.io/dex-net/#dexnet_2 .. _Dex-Net 2.1: https://berkeleyautomation.github.io/dex-net/#dexnet_21 .. _Dex-Net 3.0: https://berkeleyautomation.github.io/dex-net/#dexnet_3 .. _Dex-Net 4.0: https://berkeleyautomation.github.io/dex-net/#dexnet_4 .. _FC-GQ-CNN: https://berkeleyautomation.github.io/fcgqcnn ================================================ FILE: docs/source/api/training.rst ================================================ Training ======== GQ-CNN training classes are **never accessed directly**, but through a lightweight factory function that returns the corresponding class depending on the specified backend. :: $ from gqcnn import get_gqcnn_trainer $ $ backend = 'tf' $ my_trainer = get_gqcnn_trainer(backend)() .. autofunction:: gqcnn.get_gqcnn_trainer GQCNNTrainerTF ~~~~~~~~~~~~~~ .. autoclass:: gqcnn.training.tf.GQCNNTrainerTF ================================================ FILE: docs/source/benchmarks/benchmarks.rst ================================================ Dex-Net 2.0 ~~~~~~~~~~~ Below are the highest classification accuracies achieved on the `Dex-Net 2.0`_ dataset on a randomized 80-20 train-validation split using various splitting rules: .. image:: ../images/gqcnn_leaderboard.png :width: 100% The current leader is a ConvNet submitted by nomagic.ai. `GQ` is our best GQ-CNN for `Dex-Net 2.0`_. We believe grasping performance on the physical robot can be improved if these validation error rates can be further reduced by modifications to the network architecture and optimization. If you achieve superior numbers on a randomized validation set, please email Jeff Mahler (jmahler@berkeley.edu) with the subject "Dex-Net 2.0 Benchmark Submission" and we will consider testing on our ABB YuMi. .. _Dex-Net 2.0: https://berkeleyautomation.github.io/dex-net/#dexnet_2 ================================================ FILE: docs/source/conf.py ================================================ # -*- coding: utf-8 -*- # # core documentation build configuration file, created by # sphinx-quickstart on Sun Oct 16 14:33:48 2016. # # This file is execfile()d with the current directory set to its # containing dir. # # Note that not all possible configuration values are present in this # autogenerated file. # # All configuration values have a default; values that are commented out # serve to show the default. import sys import os import sphinx_rtd_theme # If extensions (or modules to document with autodoc) are in another directory, # add these directories to sys.path here. If the directory is relative to the # documentation root, use os.path.abspath to make it absolute, like shown here. sys.path.insert( 0, os.path.join(os.path.abspath(os.path.dirname(__name__)), "../../")) sys.path.insert( 0, os.path.join(os.path.abspath(os.path.dirname(__name__)), "../../examples")) sys.path.insert( 0, os.path.join(os.path.abspath(os.path.dirname(__name__)), "../../tools")) # -- General configuration ------------------------------------------------ # If your documentation needs a minimal Sphinx version, state it here. #needs_sphinx = "1.0" # Add any Sphinx extension module names here, as strings. They can be # extensions coming with Sphinx (named "sphinx.ext.*") or your custom # ones. extensions = ["sphinx.ext.autodoc", "sphinxcontrib.napoleon"] autoclass_content = "class" autodoc_member_order = "bysource" autodoc_default_flags = ["members", "show-inheritance"] napoleon_include_special_with_doc = True napoleon_include_init_with_doc = True # Add any paths that contain templates here, relative to this directory. templates_path = ["_templates"] # The suffix(es) of source filenames. # You can specify multiple suffix as a list of string: # source_suffix = [".rst", ".md"] source_suffix = ".rst" # The encoding of source files. #source_encoding = "utf-8-sig" # The master toctree document. master_doc = "index" # General information about the project. project = u"GQCNN" copyright = u"2019, The Regents of the University of California" author = u"Vishal Satish, Jeff Mahler" # The version info for the project you"re documenting, acts as replacement for # |version| and |release|, also used in various other places throughout the # built documents. # # The short X.Y version. version = u"1.1" # The full version, including alpha/beta/rc tags. release = u"1.1.0" # The language for content autogenerated by Sphinx. Refer to documentation # for a list of supported languages. # # This is also used if you do content translation via gettext catalogs. # Usually you set "language" from the command line for these cases. language = None # There are two options for replacing |today|: either, you set today to some # non-false value, then it is used: #today = "" # Else, today_fmt is used as the format for a strftime call. #today_fmt = "%B %d, %Y" # List of patterns, relative to source directory, that match files and # directories to ignore when looking for source files. exclude_patterns = [] # The reST default role (used for this markup: `text`) to use for all # documents. #default_role = None # If true, "()" will be appended to :func: etc. cross-reference text. #add_function_parentheses = True # If true, the current module name will be prepended to all description # unit titles (such as .. function::). #add_module_names = True # If true, sectionauthor and moduleauthor directives will be shown in the # output. They are ignored by default. #show_authors = False # The name of the Pygments (syntax highlighting) style to use. pygments_style = "sphinx" # A list of ignored prefixes for module index sorting. #modindex_common_prefix = [] # If true, keep warnings as "system message" paragraphs in the built documents. #keep_warnings = False # If true, `todo` and `todoList` produce output, else they produce nothing. todo_include_todos = False # -- Options for HTML output ---------------------------------------------- # The theme to use for HTML and HTML Help pages. See the documentation for # a list of builtin themes. html_theme = "sphinx_rtd_theme" html_theme_path = [sphinx_rtd_theme.get_html_theme_path()] # Theme options are theme-specific and customize the look and feel of a theme # further. For a list of options available for each theme, see the # documentation. #html_theme_options = {} # Add any paths that contain custom themes here, relative to this directory. #html_theme_path = [] # The name for this set of Sphinx documents. If None, it defaults to # " v documentation". #html_title = None # A shorter title for the navigation bar. Default is the same as html_title. #html_short_title = None # The name of an image file (relative to this directory) to place at the top # of the sidebar. #html_logo = None # The name of an image file (relative to this directory) to use as a favicon of # the docs. This file should be a Windows icon file (.ico) being 16x16 or 32x32 # pixels large. #html_favicon = None # Add any paths that contain custom static files (such as style sheets) here, # relative to this directory. They are copied after the builtin static files, # so a file named "default.css" will overwrite the builtin "default.css". # html_static_path = ["_static"] # Add any extra paths that contain custom files (such as robots.txt or # .htaccess) here, relative to this directory. These files are copied # directly to the root of the documentation. #html_extra_path = [] # If not "", a "Last updated on:" timestamp is inserted at every page bottom, # using the given strftime format. #html_last_updated_fmt = "%b %d, %Y" # If true, SmartyPants will be used to convert quotes and dashes to # typographically correct entities. #html_use_smartypants = True # Custom sidebar templates, maps document names to template names. #html_sidebars = {} # Additional templates that should be rendered to pages, maps page names to # template names. #html_additional_pages = {} # If false, no module index is generated. #html_domain_indices = True # If false, no index is generated. #html_use_index = True # If true, the index is split into individual pages for each letter. #html_split_index = False # If true, links to the reST sources are added to the pages. #html_show_sourcelink = True # If true, "Created using Sphinx" is shown in the HTML footer. Default is True. #html_show_sphinx = True # If true, "(C) Copyright ..." is shown in the HTML footer. Default is True. #html_show_copyright = True # If true, an OpenSearch description file will be output, and all pages will # contain a tag referring to it. The value of this option must be the # base URL from which the finished HTML is served. #html_use_opensearch = "" # This is the file name suffix for HTML files (e.g. ".xhtml"). #html_file_suffix = None # Language to be used for generating the HTML full-text search index. # Sphinx supports the following languages: # "da", "de", "en", "es", "fi", "fr", "hu", "it", "ja" # "nl", "no", "pt", "ro", "ru", "sv", "tr" #html_search_language = "en" # A dictionary with options for the search language support, empty by default. # Now only "ja" uses this config value #html_search_options = {"type": "default"} # The name of a javascript file (relative to the configuration directory) that # implements a search results scorer. If empty, the default will be used. #html_search_scorer = "scorer.js" # Output file base name for HTML help builder. htmlhelp_basename = "GQCNNdoc" # -- Options for LaTeX output --------------------------------------------- latex_elements = { # The paper size ("letterpaper" or "a4paper"). #"papersize": "letterpaper", # The font size ("10pt", "11pt" or "12pt"). #"pointsize": "10pt", # Additional stuff for the LaTeX preamble. #"preamble": "", # Latex figure (float) alignment #"figure_align": "htbp", } # Grouping the document tree into LaTeX files. List of tuples # (source start file, target name, title, # author, documentclass [howto, manual, or own class]). latex_documents = [ (master_doc, "GQCNN.tex", u"GQCNN Documentation", u"Jeff Mahler, Vishal Satish", "manual"), ] # The name of an image file (relative to this directory) to place at the top of # the title page. #latex_logo = None # For "manual" documents, if this is true, then toplevel headings are parts, # not chapters. #latex_use_parts = False # If true, show page references after internal links. #latex_show_pagerefs = False # If true, show URL addresses after external links. #latex_show_urls = False # Documents to append as an appendix to all manuals. #latex_appendices = [] # If false, no module index is generated. #latex_domain_indices = True # -- Options for manual page output --------------------------------------- # One entry per manual page. List of tuples # (source start file, name, description, authors, manual section). man_pages = [(master_doc, "GQCNN", u"GQCNN Documentation", [author], 1)] # If true, show URL addresses after external links. #man_show_urls = False # -- Options for Texinfo output ------------------------------------------- # Grouping the document tree into Texinfo files. List of tuples # (source start file, target name, title, author, # dir menu entry, description, category) texinfo_documents = [ (master_doc, "GQCNN", u"GQCNN Documentation", author, "GQCNN", "One line description of project.", "Miscellaneous"), ] # Documents to append as an appendix to all manuals. #texinfo_appendices = [] # If false, no module index is generated. #texinfo_domain_indices = True # How to display URL addresses: "footnote", "no", or "inline". #texinfo_show_urls = "footnote" # If true, do not generate a @detailmenu in the "Top" node"s menu. #texinfo_no_detailmenu = False ================================================ FILE: docs/source/index.rst ================================================ .. GQCNN documentation master file, created by sphinx-quickstart on Thu May 4 16:09:26 2017. You can adapt this file completely to your liking, but it should at least contain the root `toctree` directive. Berkeley AUTOLAB's GQCNN Package ================================ Overview -------- The `gqcnn` package is a Python API for training and deploying `Grasp Quality Convolutional Neural Networks (GQ-CNNs)`_ for grasp planning using training datasets from the `Dexterity Network (Dex-Net)`_, developed by the `Berkeley AUTOLAB`_ and introduced in the `Dex-Net 2.0 paper`_. .. note:: We're excited to announce **version 1.0**, which brings the GQ-CNN package up to date with recent research in `Dex-Net`_. Version 1.0 introduces support for: #. **Dex-Net 4.0:** Composite policies that decide whether to use a suction cup or parallel-jaw gripper. #. **Fully Convolutional GQ-CNNs:** Fully convolutional architectures that efficiently evaluate millions of grasps faster than prior GQ-CNNs. Links ----- * `Source Code`_ * `Datasets`_ * `Pretrained Models`_ * `Dex-Net Website`_ * `UC Berkeley AUTOLAB`_ .. _Source Code: https://github.com/BerkeleyAutomation/gqcnn .. _Datasets: http://bit.ly/2rIM7Jk .. _Pretrained Models: http://bit.ly/2tAFMko .. _Dex-Net Website: https://berkeleyautomation.github.io/dex-net .. _UC Berkeley AUTOLAB: http://autolab.berkeley.edu .. image:: images/gqcnn.png :width: 100 % Project Goals ------------- The goals of this project are to facilitate: #. **Replicability** of GQ-CNN training and deployment from: #. The latest `Dex-Net 4.0`_ results. #. Older results such as `Dex-Net 2.0`_, `Dex-Net 2.1`_ and `Dex-Net 3.0`_. #. Experimental results such as `FC-GQ-CNN`_. #. **Research extensions** on novel GQ-CNN architectures that have higher performance on `Dex-Net 4.0`_ training datasets. .. _Dex-Net 2.0: https://berkeleyautomation.github.io/dex-net/#dexnet_2 .. _Dex-Net 2.1: https://berkeleyautomation.github.io/dex-net/#dexnet_21 .. _Dex-Net 3.0: https://berkeleyautomation.github.io/dex-net/#dexnet_3 .. _Dex-Net 4.0: https://berkeleyautomation.github.io/dex-net/#dexnet_4 .. _FC-GQ-CNN: https://berkeleyautomation.github.io/fcgqcnn Our longer-term goal is to encourage development of robust GQ-CNNs that can be used to plan grasps on different hardware setups with different robots and cameras. Disclaimer ---------- GQ-CNN models are sensitive to the following parameters used during dataset generation: #. The robot gripper #. The depth camera #. The distance between the camera and workspace. As a result, we cannot guarantee performance of our pre-trained models on other physical setups. If you have a specific use-case in mind, please reach out to us. It might be possible to generate a custom dataset for you particular setup. We are actively researching how to generate more robust datasets that can generalize across robots, cameras, and viewpoints. Development ----------- The package is currently under active development. Please raise all bugs, feature requests, and other issues under the `Github Issues`_. For other questions or concerns, please contact Jeff Mahler (jmahler@berkeley.edu) or Vishal Satish (vsatish@berkeley.edu) with the subject line starting with "GQ-CNN Development". Academic Use ------------ If you use the code, datasets, or models in a publication, please cite the appropriate paper: #. **Dex-Net 2.0** `(bibtex) `__ #. **Dex-Net 2.1** `(bibtex) `__ #. **Dex-Net 3.0** `(bibtex) `__ #. **Dex-Net 4.0** `(bibtex) `__ #. **FC-GQ-CNN** `(bibtex) `__ .. _Grasp Quality Convolutional Neural Networks (GQ-CNNs): info/info.html .. _Dexterity Network (Dex-Net): https://berkeleyautomation.github.io/dex-net .. _Dex-Net: https://berkeleyautomation.github.io/dex-net .. _Berkeley AUTOLAB: http://autolab.berkeley.edu/ .. _Dex-Net 2.0 paper: https://github.com/BerkeleyAutomation/dex-net/raw/gh-pages/docs/dexnet_rss2017_final.pdf .. _Github Issues: https://github.com/BerkeleyAutomation/gqcnn/issues .. toctree:: :maxdepth: 2 :caption: Background info/info.rst .. toctree:: :maxdepth: 2 :caption: Installation Guide install/install.rst .. toctree:: :maxdepth: 2 :caption: Getting Started tutorials/tutorial.rst .. toctree:: :maxdepth: 2 :caption: Replication replication/replication.rst .. toctree:: :maxdepth: 2 :caption: Benchmarks benchmarks/benchmarks.rst .. toctree:: :maxdepth: 2 :caption: API Documentation :glob: api/gqcnn.rst api/training.rst api/analysis.rst api/policies.rst .. toctree:: :maxdepth: 2 :caption: License license/license.rst Indices and tables ================== * :ref:`genindex` * :ref:`modindex` * :ref:`search` ================================================ FILE: docs/source/info/info.rst ================================================ What are GQ-CNNs? ----------------- GQ-CNNs are neural network architectures that take as input a depth image and grasp, and output the predicted probability that the grasp will successfully hold the object while lifting, transporting, and shaking the object. .. figure:: ../images/gqcnn.png :width: 100% :align: center Original GQ-CNN architecture from `Dex-Net 2.0`_. .. figure:: ../images/fcgqcnn_arch_diagram.png :width: 100% :align: center Alternate faster GQ-CNN architecture from `FC-GQ-CNN`_. The GQ-CNN weights are trained on datasets of synthetic point clouds, parallel jaw grasps, and grasp metrics generated from physics-based models with domain randomization for sim-to-real transfer. See the ongoing `Dexterity Network (Dex-Net)`_ project for more information. .. _Dexterity Network (Dex-Net): https://berkeleyautomation.github.io/dex-net .. _Dex-Net 2.0: https://berkeleyautomation.github.io/dex-net/#dexnet_2 .. _FC-GQ-CNN: https://berkeleyautomation.github.io/fcgqcnn ================================================ FILE: docs/source/install/install.rst ================================================ Prerequisites ~~~~~~~~~~~~~ Python """""" The `gqcnn` package has only been tested with `Python 3.5`, `Python 3.6`, and `Python 3.7`. Ubuntu """""" The `gqcnn` package has only been tested with `Ubuntu 12.04`, `Ubuntu 14.04` and `Ubuntu 16.04`. Virtualenv """""""""" We highly recommend using a Python environment management system, in particular `Virtualenv`, with the Pip and ROS installations. **Note: Several users have encountered problems with dependencies when using Conda.** Pip Installation ~~~~~~~~~~~~~~~~ The pip installation is intended for users who are **only interested in 1) Training GQ-CNNs or 2) Grasp planning on saved RGBD images**, not interfacing with a physical robot. If you have intentions of using GQ-CNNs for grasp planning on a physical robot, we suggest you `install as a ROS package`_. .. _install as a ROS package: https://berkeleyautomation.github.io/gqcnn/install/install.html#ros-installation 1. Clone the repository """"""""""""""""""""""" Clone or download the `project`_ from Github. :: $ git clone https://github.com/BerkeleyAutomation/gqcnn.git .. _project: https://github.com/BerkeleyAutomation/gqcnn 2. Run pip installation """"""""""""""""""""""" Change directories into the `gqcnn` repository and run the pip installation. :: $ pip install . This will install `gqcnn` in your current virtual environment. .. _ros-install: ROS Installation ~~~~~~~~~~~~~~~~ Installation as a ROS package is intended for users who wish to use GQ-CNNs to plan grasps on a physical robot. 1. Clone the repository """"""""""""""""""""""" Clone or download the `project`_ from Github. :: $ cd /src $ git clone https://github.com/BerkeleyAutomation/gqcnn.git 2. Build the catkin package """"""""""""""""""""""""""" Build the catkin package. :: $ cd $ catkin_make Then re-source `devel/setup.bash` for the package to be available through Python. Docker Installation ~~~~~~~~~~~~~~~~~~~ We currently do not provide pre-built Docker images, but you can build them yourself. This will require you to have installed `Docker`_ or `Nvidia-Docker`_ if you plan on using GPUs. Note that our provided build for GPUs uses CUDA 10.0 and cuDNN 7.0, so make sure that this is compatible with your GPU hardware. If you wish to use a different CUDA/cuDNN version, change the base image in `docker/gpu/Dockerfile` to the desired `CUDA/cuDNN image distribution`_. **Note that other images have not yet been tested.** .. _Docker: https://www.docker.com/ .. _Nvidia-Docker: https://github.com/NVIDIA/nvidia-docker .. _CUDA/cuDNN image distribution: https://hub.docker.com/r/nvidia/cuda/ 1. Clone the repository """"""""""""""""""""""" Clone or download the `project`_ from Github. :: $ git clone https://github.com/BerkeleyAutomation/gqcnn.git .. _project: https://github.com/BerkeleyAutomation/gqcnn 2. Build Docker images """""""""""""""""""""" Change directories into the `gqcnn` repository and run the build script. :: $ ./scripts/docker/build-docker.sh This will build the images `gqcnn/cpu` and `gqcnn/gpu`. 3. Run Docker image """""""""""""""""""" To run `gqcnn/cpu`: :: $ docker run --rm -it gqcnn/cpu To run `gqcnn/gpu`: :: $ nvidia-docker run --rm -it gqcnn/gpu Note the use of `nvidia-docker` in the latter to enable the Nvidia runtime. You will then see an interactive shell like this: :: $ root@a96488604093:~/Workspace/gqcnn# Now you can proceed to run the examples and tutorial! ================================================ FILE: docs/source/license/license.rst ================================================ License ~~~~~~~ The `gqcnn` library, pre-trained models, and raw datasets are licensed under The Regents of the University of California (Regents). Copyright ©2019. All Rights Reserved. Permission to use, copy, modify, and distribute this software and its documentation for educational, research, and not-for-profit purposes, without fee and without a signed licensing agreement, is hereby granted, provided that the above copyright notice, this paragraph and the following two paragraphs appear in all copies, modifications, and distributions. Contact The Office of Technology Licensing, UC Berkeley, 2150 Shattuck Avenue, Suite 510, Berkeley, CA 94720-1620, (510) 643- 7201, otl@berkeley.edu, http://ipira.berkeley.edu/industry-info for commercial licensing opportunities. IN NO EVENT SHALL REGENTS BE LIABLE TO ANY PARTY FOR DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, INCLUDING LOST PROFITS, ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN IF REGENTS HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. REGENTS SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE AND ACCOMPANYING DOCUMENTATION, IF ANY, PROVIDED HEREUNDER IS PROVIDED "AS IS". REGENTS HAS NO OBLIGATION TO PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. The raw datasets are generated from 3D object models from `3DNet`_ and `the KIT Object Database`_ that may be subject to copyright. .. _3DNet: https://repo.acin.tuwien.ac.at/tmp/permanent/3d-net.org/ .. _the KIT Object Database: https://h2t-projects.webarchiv.kit.edu/Projects/ObjectModelsWebUI/ ================================================ FILE: docs/source/replication/replication.rst ================================================ Replicating Results ~~~~~~~~~~~~~~~~~~~ Numerous publications in the larger `Dex-Net`_ project utilize GQ-CNNs, particularly `Dex-Net 2.0`_, `Dex-Net 2.1`_, `Dex-Net 3.0`_, `Dex-Net 4.0`_, and `FC-GQ-CNN`_. One of the goals of the `gqcnn` library is to provide the necessary code and instructions for replicating the results of these papers. Thus, we have provided a number of replication scripts under the `scripts/` directory. There are two ways to replicate results: #. **Use a pre-trained model:** Download a pre-trained GQ-CNN model and run an example policy. #. **Train from scratch:** Download the raw dataset, train a GQ-CNN model, and run an example policy using the model you just trained. **We highly encourage method 1.** Note that method 2 is computationally expensive as training takes roughly 24 hours on a Nvidia Titan Xp GPU. Furthermore, the raw datasets are fairly large in size. Please keep in mind that GQ-CNN models are sensitive to the following parameters used during dataset generation: #. The robot gripper #. The depth camera #. The distance between the camera and workspace. As a result, we cannot guarantee performance of our pre-trained models on other physical setups. For more information about the pre-trained models and sample inputs for the example policy, see :ref:`pre-trained-models` and :ref:`sample-inputs`. Using a Pre-trained Model ========================= First download the pre-trained models. :: $ ./scripts/downloads/models/download_models.sh Dex-Net 2.0 """"""""""" Evaluate the pre-trained GQ-CNN model. :: $ ./scripts/policies/run_all_dex-net_2.0_examples.sh Dex-Net 2.1 """"""""""" Evaluate the pre-trained GQ-CNN model. :: $ ./scripts/policies/run_all_dex-net_2.1_examples.sh Dex-Net 3.0 """"""""""" Evaluate the pre-trained GQ-CNN model. :: $ ./scripts/policies/run_all_dex-net_3.0_examples.sh Dex-Net 4.0 """"""""""" To evaluate the pre-trained **parallel jaw** GQ-CNN model. :: $ ./scripts/policies/run_all_dex-net_4.0_pj_examples.sh To evaluate the pre-trained **suction** GQ-CNN model. :: $ ./scripts/policies/run_all_dex-net_4.0_suction_examples.sh FC-GQ-CNN """"""""""" To evaluate the pre-trained **parallel jaw** FC-GQ-CNN model. :: $ ./scripts/policies/run_all_dex-net_4.0_fc_pj_examples.sh To evaluate the pre-trained **suction** FC-GQ-CNN model. :: $ ./scripts/policies/run_all_dex-net_4.0_fc_suction_examples.sh Training from Scratch ===================== Dex-Net 2.0 """"""""""" First download the appropriate dataset. :: $ ./scripts/downloads/datasets/download_dex-net_2.0.sh Then train a GQ-CNN from scratch. :: $ ./scripts/training/train_dex-net_2.0.sh Finally, evaluate the trained GQ-CNN. :: $ ./scripts/policies/run_all_dex-net_2.0_examples.sh Dex-Net 2.1 """"""""""" First download the appropriate dataset. :: $ ./scripts/downloads/datasets/download_dex-net_2.1.sh Then train a GQ-CNN from scratch. :: $ ./scripts/training/train_dex-net_2.1.sh Finally, evaluate the trained GQ-CNN. :: $ ./scripts/policies/run_all_dex-net_2.1_examples.sh Dex-Net 3.0 """"""""""" First download the appropriate dataset. :: $ ./scripts/downloads/datasets/download_dex-net_3.0.sh Then train a GQ-CNN from scratch. :: $ ./scripts/training/train_dex-net_3.0.sh Finally, evaluate the trained GQ-CNN. :: $ ./scripts/policies/run_all_dex-net_3.0_examples.sh Dex-Net 4.0 """"""""""" To replicate the `Dex-Net 4.0`_ **parallel jaw** results, first download the appropriate dataset. :: $ ./scripts/downloads/datasets/download_dex-net_4.0_pj.sh Then train a GQ-CNN from scratch. :: $ ./scripts/training/train_dex-net_4.0_pj.sh Finally, evaluate the trained GQ-CNN. :: $ ./scripts/policies/run_all_dex-net_4.0_pj_examples.sh To replicate the `Dex-Net 4.0`_ **suction** results, first download the appropriate dataset. :: $ ./scripts/downloads/datasets/download_dex-net_4.0_suction.sh Then train a GQ-CNN from scratch. :: $ ./scripts/training/train_dex-net_4.0_suction.sh Finally, evaluate the trained GQ-CNN. :: $ ./scripts/policies/run_all_dex-net_4.0_suction_examples.sh FC-GQ-CNN """"""""""" To replicate the `FC-GQ-CNN`_ **parallel jaw** results, first download the appropriate dataset. :: $ ./scripts/downloads/datasets/download_dex-net_4.0_fc_pj.sh Then train a FC-GQ-CNN from scratch. :: $ ./scripts/training/train_dex-net_4.0_fc_pj.sh Finally, evaluate the trained FC-GQ-CNN. :: $ ./scripts/policies/run_all_dex-net_4.0_fc_pj_examples.sh To replicate the `FC-GQ-CNN`_ **suction** results, first download the appropriate dataset. :: $ ./scripts/downloads/datasets/download_dex-net_4.0_fc_suction.sh Then train a FC-GQ-CNN from scratch. :: $ ./scripts/training/train_dex-net_4.0_fc_suction.sh Finally, evaluate the trained FC-GQ-CNN. :: $ ./scripts/policies/run_all_dex-net_4.0_fc_suction_examples.sh .. _Dex-Net: https://berkeleyautomation.github.io/dex-net/ .. _Dex-Net 2.0: https://berkeleyautomation.github.io/dex-net/#dexnet_2 .. _Dex-Net 2.1: https://berkeleyautomation.github.io/dex-net/#dexnet_21 .. _Dex-Net 3.0: https://berkeleyautomation.github.io/dex-net/#dexnet_3 .. _Dex-Net 4.0: https://berkeleyautomation.github.io/dex-net/#dexnet_4 .. _FC-GQ-CNN: https://berkeleyautomation.github.io/fcgqcnn .. _gqcnn: https://github.com/BerkeleyAutomation/gqcnn ================================================ FILE: docs/source/tutorials/analysis.rst ================================================ Analysis ~~~~~~~~ It is helpful to check the training and validation loss and classification errors to ensure that the network has trained successfully. To analyze the performance of a trained GQ-CNN, run: :: $ python tools/analyze_gqcnn_performance.py The args are: #. **model_name**: Name of a trained model. The script will store a detailed analysis in `analysis//`. To analyze the networks we just trained, run: :: $ python tools/analyze_gqcnn_performance.py gqcnn_example_pj $ python tools/analyze_gqcnn_performance.py gqcnn_example_suction Below is the expected output for the **parallel jaw** network. Please keep in mind that the exact performance values may change due to randomization in the training dataset and random weight initialization: :: $ GQCNNAnalyzer INFO TRAIN $ GQCNNAnalyzer INFO Original error: 36.812 $ GQCNNAnalyzer INFO Final error: 6.061 $ GQCNNAnalyzer INFO Orig loss: 0.763 $ GQCNNAnalyzer INFO Final loss: 0.248 $ GQCNNAnalyzer INFO VAL $ GQCNNAnalyzer INFO Original error: 32.212 $ GQCNNAnalyzer INFO Final error: 7.509 $ GQCNNAnalyzer INFO Normalized error: 0.233 A set of plots will be saved to `analysis/gqcnn_example_pj/`. The plots `training_error_rates.png` and `precision_recall.png` should look like the following: .. image:: ../images/plots/pj_error_rate.png :width: 49 % .. image:: ../images/plots/pj_roc.png :width: 49 % Here is the expected output for the **suction** network: :: $ GQCNNAnalyzer INFO TRAIN $ GQCNNAnalyzer INFO Original error: 17.844 $ GQCNNAnalyzer INFO Final error: 6.417 $ GQCNNAnalyzer INFO Orig loss: 0.476 $ GQCNNAnalyzer INFO Final loss: 0.189 $ GQCNNAnalyzer INFO VAL $ GQCNNAnalyzer INFO Original error: 18.036 $ GQCNNAnalyzer INFO Final error: 6.907 $ GQCNNAnalyzer INFO Normalized error: 0.383 A set of plots will be saved to `analysis/gqcnn_example_suction/`. The plots `training_error_rates.png` and `precision_recall.png` should look like the following: .. image:: ../images/plots/suction_error_rate.png :width: 49 % .. image:: ../images/plots/suction_roc.png :width: 49 % ================================================ FILE: docs/source/tutorials/planning.rst ================================================ Grasp Planning ~~~~~~~~~~~~~~ Grasp planning involves searching for the grasp with the highest predicted probability of success given a point cloud. In the `gqcnn` package this is implemented as policies that map an RGBD image to a 6-DOF grasping pose by maximizing the output of a GQ-CNN. The maximization can be implemented with iterative methods such as the `Cross Entropy Method (CEM)`_, which is used in `Dex-Net 2.0`_, `Dex-Net 2.1`_, `Dex-Net 3.0`_, `Dex-Net 4.0`_, or much faster `fully convolutional networks`, which are used in the `FC-GQ-CNN`_. .. _Cross Entropy Method (CEM): https://en.wikipedia.org/wiki/Cross-entropy_method .. _Dex-Net 2.0: https://berkeleyautomation.github.io/dex-net/#dexnet_2 .. _Dex-Net 2.1: https://berkeleyautomation.github.io/dex-net/#dexnet_21 .. _Dex-Net 3.0: https://berkeleyautomation.github.io/dex-net/#dexnet_3 .. _Dex-Net 4.0: https://berkeleyautomation.github.io/dex-net/#dexnet_4 .. _FC-GQ-CNN: https://berkeleyautomation.github.io/fcgqcnn We provide example policies in `examples/`. In particular, we provide both an example Python policy and an example ROS policy. **Note that the ROS policy requires the ROS gqcnn installation**, which can be found :ref:`here `. We highly recommend using the Python policy unless you need to plan grasps on a physical robot using ROS. .. _sample-inputs: Sample Inputs ------------- Sample inputs from our experimental setup are provided with the repo: #. **data/examples/clutter/phoxi/dex-net_4.0**: Set of example images from a PhotoNeo PhoXi S containing objects used in `Dex-Net 4.0`_ experiments arranged in heaps. #. **data/examples/clutter/phoxi/fcgqcnn**: Set of example images from a PhotoNeo PhoXi S containing objects in `FC-GQ-CNN`_ experiments arranged in heaps. #. **data/examples/single_object/primesense/**: Set of example images from a Primesense Carmine containing objects used in `Dex-Net 2.0`_ experiments in singulation. #. **data/examples/clutter/primesense/**: Set of example images from a Primesense Carmine containing objects used in `Dex-Net 2.1`_ experiments arranged in heaps. **\*\*Note that when trying these sample inputs, you must make sure that the GQ-CNN model you are using was trained for the corresponding camera and input type (singulation/clutter). See the following section for more details.\*\*** .. _pre-trained-models: Pre-trained Models ------------------ Pre-trained parallel jaw and suction models for `Dex-Net 4.0`_ are automatically downloaded with the `gqcnn` package installation. If you do wish to try out models for older results (or for our experimental `FC-GQ-CNN`_), all pre-trained models can be downloaded with: :: $ ./scripts/downloads/models/download_models.sh The models are: #. **GQCNN-2.0**: For `Dex-Net 2.0`_, trained on images of objects in singulation with parameters for a Primesense Carmine. #. **GQCNN-2.1**: For `Dex-Net 2.1`_, a `Dex-Net 2.0`_ model fine-tuned on images of objects in clutter with parameters for a Primesense Carmine. #. **GQCNN-3.0**: For `Dex-Net 3.0`_, trained on images of objects in clutter with parameters for a Primesense Carmine. #. **GQCNN-4.0-PJ**: For `Dex-Net 4.0`_, trained on images of objects in clutter with parameters for a PhotoNeo PhoXi S. #. **GQCNN-4.0-SUCTION**: For `Dex-Net 4.0`_, trained on images of objects in clutter with parameters for a PhotoNeo PhoXi S. #. **FC-GQCNN-4.0-PJ**: For `FC-GQ-CNN`_, trained on images of objects in clutter with parameters for a PhotoNeo PhoXi S. #. **FC-GQCNN-4.0-SUCTION**: For `FC-GQ-CNN`_, trained on images of objects in clutter with parameters for a PhotoNeo PhoXi S. **\*\*Note that GQ-CNN models are sensitive to the parameters used during dataset generation, specifically 1) Gripper geometry, an ABB YuMi Parallel Jaw Gripper for all our pre-trained models 2) Camera intrinsics, either a Primesense Carmine or PhotoNeo Phoxi S for all our pre-trained models (see above for which one) 3) Distance between camera and workspace during rendering, 50-70cm for all our pre-trained models. Thus we cannot guarantee performance of our pre-trained models on other physical setups. If you have a specific use-case in mind, please reach out to us.\*\*** We are actively researching how to generate more robust datasets that can generalize across robots, cameras, and viewpoints! Python Policy ------------- The example Python policy can be queried on saved images using: :: $ python examples/policy.py --depth_image --segmask --camera_intr The args are: #. **model_name**: Name of the GQ-CNN model to use. #. **depth_image_filename**: Path to a depth image (float array in .npy format). #. **segmask_filename**: Path to an object segmentation mask (binary image in .png format). #. **camera_intr_filename**: Path to a camera intrinsics file (.intr file generated with `BerkeleyAutomation's`_ `autolab_core`_ package). .. _BerkeleyAutomation's: https://github.com/BerkeleyAutomation .. _autolab_core: https://github.com/BerkeleyAutomation/autolab_core To evaluate the pre-trained `Dex-Net 4.0`_ **parallel jaw** network on sample images of objects in heaps run: :: $ python examples/policy.py GQCNN-4.0-PJ --depth_image data/examples/clutter/phoxi/dex-net_4.0/depth_0.npy --segmask data/examples/clutter/phoxi/dex-net_4.0/segmask_0.png --camera_intr data/calib/phoxi/phoxi.intr To evaluate the pre-trained `Dex-Net 4.0`_ **suction** network on sample images of objects in heaps run: :: $ python examples/policy.py GQCNN-4.0-SUCTION --depth_image data/examples/clutter/phoxi/dex-net_4.0/depth_0.npy --segmask data/examples/clutter/phoxi/dex-net_4.0/segmask_0.png --camera_intr data/calib/phoxi/phoxi.intr .. _ros-policy: ROS Policy ---------- First start the grasp planning service: :: $ roslaunch gqcnn grasp_planning_service.launch model_name:= The args are: #. **model_name**: Name of the GQ-CNN model to use. Default is **GQCNN-4.0-PJ**. #. **model_dir**: Path to the directory where the GQ-CNN models are located. Default is **models/**. If you are using the provided **download_models.sh** script, you shouldn't have to modify this. To start the grasp planning service with the pre-trained `Dex-Net 4.0`_ **parallel jaw** network run: :: $ roslaunch gqcnn grasp_planning_service.launch model_name:=GQCNN-4.0-PJ To start the grasp planning service with the pre-trained `Dex-Net 4.0`_ **suction** network run: :: $ roslaunch gqcnn grasp_planning_service.launch model_name:=GQCNN-4.0-SUCTION The example ROS policy can then be queried on saved images using: :: $ python examples/policy_ros.py --depth_image --segmask --camera_intr The args are: #. **depth_image_filename**: Path to a depth image (float array in .npy format). #. **segmask_filename**: Path to an object segmentation mask (binary image in .png format). #. **camera_intr_filename**: Path to a camera intrinsics file (.intr file generated with `BerkeleyAutomation's`_ `autolab_core`_ package). To query the policy on sample images of objects in heaps run: :: $ python examples/policy_ros.py --depth_image data/examples/clutter/phoxi/dex-net_4.0/depth_0.npy --segmask data/examples/clutter/phoxi/dex-net_4.0/segmask_0.png --camera_intr data/calib/phoxi/phoxi.intr Usage on a Physical Robot with ROS ---------------------------------- To run the GQ-CNN on a physical robot with ROS, you will want to implement your own ROS node to query the grasp planning service similar to what `examples/policy_ros.py` does. If you are interested in replicating this functionality on your own robot, please contact Jeff Mahler (jmahler@berkeley.edu) with the subject line: "Interested in GQ-CNN ROS Service". FC-GQ-CNN Policy ---------------- Our most recent research result, the `FC-GQ-CNN`_, combines novel fully convolutional network architectures with our prior work on GQ-CNNs to increase policy rate and reliability. Instead of relying on the `Cross Entropy Method (CEM)`_ to iteratively search over the policy action space for the best grasp, the FC-GQ-CNN instead densely and efficiently evaluates the entire action space in parallel. It is thus able to consider 5000x more grasps in 0.625s, resulting in a MPPH (Mean Picks Per Hour) of 296, compared to the prior 250 MPPH of `Dex-Net 4.0`_. .. figure:: ../images/fcgqcnn_arch_diagram.png :width: 100 % :align: center FC-GQ-CNN architecture. You can download the pre-trained `FC-GQ-CNN`_ parallel jaw and suction models along with the other pre-trained models: :: $ ./scripts/downloads/models/download_models.sh Then run the Python policy with the `\\--fully_conv` flag. To evaluate the pre-trained `FC-GQ-CNN`_ **parallel jaw** network on sample images of objects in heaps run: :: $ python examples/policy.py FC-GQCNN-4.0-PJ --fully_conv --depth_image data/examples/clutter/phoxi/fcgqcnn/depth_0.npy --segmask data/examples/clutter/phoxi/fcgqcnn/segmask_0.png --camera_intr data/calib/phoxi/phoxi.intr To evaluate the pre-trained `FC-GQ-CNN`_ **suction** network on sample images of objects in heaps run: :: $ python examples/policy.py FC-GQCNN-4.0-SUCTION --fully_conv --depth_image data/examples/clutter/phoxi/fcgqcnn/depth_0.npy --segmask data/examples/clutter/phoxi/fcgqcnn/segmask_0.png --camera_intr data/calib/phoxi/phoxi.intr With ROS ^^^^^^^^ Review the section on using the normal ROS policy first, which can be found :ref:`here `. Add the additional arg **fully_conv:=True** when launching the grasp planning service and provide the corresponding network (**FC-GQCNN-4.0-PJ** for **parallel jaw** and **FC-GQCNN-4.0-SUCTION** for **suction**). If you wish to test on inputs other than those provided in `data/examples/clutter/phoxi/fcgqcnn/`, you will need to edit the input height and width configuration in the appropriate `cfg/examples/` under `["policy"]["metric"]["fully_conv_gqcnn_config"]`. ================================================ FILE: docs/source/tutorials/training.rst ================================================ Training ~~~~~~~~ The `gqcnn` package can be used to train a `Dex-Net 4.0`_ GQ-CNN model on a custom offline `Dex-Net`_ dataset. Because training from scratch can be time-consuming, the most efficient way to train a new network is to fine-tune the weights of a pre-trained `Dex-Net 4.0`_ GQ-CNN model, which has already been trained on millions of images. .. _Dex-Net 4.0: https://berkeleyautomation.github.io/dex-net/#dexnet_4 .. _Dex-Net: https://berkeleyautomation.github.io/dex-net/ To fine-tune a GQ-CNN run: :: $ python tools/finetune.py --config_filename --name The args are: #. **training_dataset_path**: Path to the training dataset. #. **pretrained_network_name**: Name of pre-trained GQ-CNN. #. **config_filename**: Name of the config file to use. #. **model_name**: Name for the model. To train a GQ-CNN for a **parallel jaw** gripper on a sample dataset, run the fine-tuning script: :: $ python tools/finetune.py data/training/example_pj/ GQCNN-4.0-PJ --config_filename cfg/finetune_example_pj.yaml --name gqcnn_example_pj To train a GQ-CNN for a **suction** gripper run: :: $ python tools/finetune.py data/training/example_suction/ GQCNN-4.0-SUCTION --config_filename cfg/finetune_example_suction.yaml --name gqcnn_example_suction Visualizing Training -------------------- The `gqcnn` model contains support for visualizing training progress through Tensorboard. Tensorboard is automatically launched when the training script is run and can be accessed by navigating to **localhost:6006** in a web browser. There you will find something like the following: .. image:: ../images/tensorboard.png :width: 100 % Which displays useful training statistics such as validation error, minibatch loss, and learning rate. The Tensorflow summaries are stored in `models//tensorboard_summaries/`. ================================================ FILE: docs/source/tutorials/tutorial.rst ================================================ Overview ~~~~~~~~ There are two main use cases of the `gqcnn` package: #. :ref:`training` a `Dex-Net 4.0`_ GQ-CNN model on an offline `Dex-Net`_ dataset of point clouds, grasps, and grasp success metrics, and then grasp planning on RGBD images. #. :ref:`grasp planning` on RGBD images using a pre-trained `Dex-Net 4.0`_ GQ-CNN model. .. _Dex-Net 4.0: https://berkeleyautomation.github.io/dex-net/#dexnet_4 .. _Dex-Net: https://berkeleyautomation.github.io/dex-net/ Click on the links or scroll down to get started! Prerequisites ------------- Before running the tutorials please download the example models and datasets: :: $ cd /path/to/your/gqcnn $ ./scripts/downloads/download_example_data.sh $ ./scripts/downloads/models/download_models.sh Running Python Scripts ---------------------- All `gqcnn` Python scripts are designed to be run from the top-level directory of your `gqcnn` repo by default. This is because every script takes in a YAML file specifying parameters for the script, and this YAML file is stored relative to the repository root directory. We recommend that you run all scripts using this paradigm: :: cd /path/to/your/gqcnn python /path/to/script.py .. _training: .. include:: training.rst .. _analysis: .. include:: analysis.rst .. _grasp planning: .. include:: planning.rst ================================================ FILE: examples/antipodal_grasp_sampling.py ================================================ # -*- coding: utf-8 -*- """ Copyright ©2017. The Regents of the University of California (Regents). All Rights Reserved. Permission to use, copy, modify, and distribute this software and its documentation for educational, research, and not-for-profit purposes, without fee and without a signed licensing agreement, is hereby granted, provided that the above copyright notice, this paragraph and the following two paragraphs appear in all copies, modifications, and distributions. Contact The Office of Technology Licensing, UC Berkeley, 2150 Shattuck Avenue, Suite 510, Berkeley, CA 94720-1620, (510) 643-7201, otl@berkeley.edu, http://ipira.berkeley.edu/industry-info for commercial licensing opportunities. IN NO EVENT SHALL REGENTS BE LIABLE TO ANY PARTY FOR DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, INCLUDING LOST PROFITS, ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN IF REGENTS HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. REGENTS SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE AND ACCOMPANYING DOCUMENTATION, IF ANY, PROVIDED HEREUNDER IS PROVIDED "AS IS". REGENTS HAS NO OBLIGATION TO PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. Demonstrates image-based antipodal grasp candidate sampling, which is used in the Cross Entropy Method (CEM)-based GQ-CNN grasping policy. Samples images from a BerkeleyAutomation/perception `RgbdSensor`. Author ------ Jeff Mahler """ import argparse import os from autolab_core import RigidTransform, YamlConfig, Logger, RgbdImage from perception import RgbdSensorFactory from visualization import Visualizer2D as vis from gqcnn.grasping import AntipodalDepthImageGraspSampler # Set up logger. logger = Logger.get_logger("examples/antipodal_grasp_sampling.py") if __name__ == "__main__": # Parse args. parser = argparse.ArgumentParser(description=( "Sample antipodal grasps on a depth image from an RgbdSensor")) parser.add_argument("--config_filename", type=str, default="cfg/examples/antipodal_grasp_sampling.yaml", help="path to configuration file to use") args = parser.parse_args() config_filename = args.config_filename # Read config. config = YamlConfig(config_filename) sensor_type = config["sensor"]["type"] sensor_frame = config["sensor"]["frame"] num_grasp_samples = config["num_grasp_samples"] gripper_width = config["gripper_width"] inpaint_rescale_factor = config["inpaint_rescale_factor"] visualize_sampling = config["visualize_sampling"] sample_config = config["sampling"] # Read camera calib. tf_filename = "%s_to_world.tf" % (sensor_frame) T_camera_world = RigidTransform.load( os.path.join(config["calib_dir"], sensor_frame, tf_filename)) # Setup sensor. sensor = RgbdSensorFactory.sensor(sensor_type, config["sensor"]) sensor.start() camera_intr = sensor.ir_intrinsics # Read images. color_im, depth_im, _ = sensor.frames() color_im = color_im.inpaint(rescale_factor=inpaint_rescale_factor) depth_im = depth_im.inpaint(rescale_factor=inpaint_rescale_factor) rgbd_im = RgbdImage.from_color_and_depth(color_im, depth_im) # Sample grasps. grasp_sampler = AntipodalDepthImageGraspSampler(sample_config, gripper_width) grasps = grasp_sampler.sample(rgbd_im, camera_intr, num_grasp_samples, segmask=None, seed=100, visualize=visualize_sampling) # Visualize. vis.figure() vis.imshow(depth_im) for grasp in grasps: vis.grasp(grasp, scale=1.5, show_center=False, show_axis=True) vis.title("Sampled grasps") vis.show() ================================================ FILE: examples/policy.py ================================================ # -*- coding: utf-8 -*- """ Copyright ©2017. The Regents of the University of California (Regents). All Rights Reserved. Permission to use, copy, modify, and distribute this software and its documentation for educational, research, and not-for-profit purposes, without fee and without a signed licensing agreement, is hereby granted, provided that the above copyright notice, this paragraph and the following two paragraphs appear in all copies, modifications, and distributions. Contact The Office of Technology Licensing, UC Berkeley, 2150 Shattuck Avenue, Suite 510, Berkeley, CA 94720-1620, (510) 643-7201, otl@berkeley.edu, http://ipira.berkeley.edu/industry-info for commercial licensing opportunities. IN NO EVENT SHALL REGENTS BE LIABLE TO ANY PARTY FOR DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, INCLUDING LOST PROFITS, ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN IF REGENTS HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. REGENTS SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE AND ACCOMPANYING DOCUMENTATION, IF ANY, PROVIDED HEREUNDER IS PROVIDED "AS IS". REGENTS HAS NO OBLIGATION TO PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. Displays robust grasps planned using a GQ-CNN grapsing policy on a set of saved RGB-D images. The default configuration for the standard GQ-CNN policy is `cfg/examples/cfg/examples/gqcnn_pj.yaml`. The default configuration for the Fully-Convolutional GQ-CNN policy is `cfg/examples/fc_gqcnn_pj.yaml`. Author ------ Jeff Mahler & Vishal Satish """ import argparse import json import os import time import numpy as np from autolab_core import (YamlConfig, Logger, BinaryImage, CameraIntrinsics, ColorImage, DepthImage, RgbdImage) from visualization import Visualizer2D as vis from gqcnn.grasping import (RobustGraspingPolicy, CrossEntropyRobustGraspingPolicy, RgbdImageState, FullyConvolutionalGraspingPolicyParallelJaw, FullyConvolutionalGraspingPolicySuction) from gqcnn.utils import GripperMode # Set up logger. logger = Logger.get_logger("examples/policy.py") if __name__ == "__main__": # Parse args. parser = argparse.ArgumentParser( description="Run a grasping policy on an example image") parser.add_argument("model_name", type=str, default=None, help="name of a trained model to run") parser.add_argument( "--depth_image", type=str, default=None, help="path to a test depth image stored as a .npy file") parser.add_argument("--segmask", type=str, default=None, help="path to an optional segmask to use") parser.add_argument("--camera_intr", type=str, default=None, help="path to the camera intrinsics") parser.add_argument("--model_dir", type=str, default=None, help="path to the folder in which the model is stored") parser.add_argument("--config_filename", type=str, default=None, help="path to configuration file to use") parser.add_argument( "--fully_conv", action="store_true", help=("run Fully-Convolutional GQ-CNN policy instead of standard" " GQ-CNN policy")) args = parser.parse_args() model_name = args.model_name depth_im_filename = args.depth_image segmask_filename = args.segmask camera_intr_filename = args.camera_intr model_dir = args.model_dir config_filename = args.config_filename fully_conv = args.fully_conv assert not (fully_conv and depth_im_filename is not None and segmask_filename is None ), "Fully-Convolutional policy expects a segmask." if depth_im_filename is None: if fully_conv: depth_im_filename = os.path.join( os.path.dirname(os.path.realpath(__file__)), "..", "data/examples/clutter/primesense/depth_0.npy") else: depth_im_filename = os.path.join( os.path.dirname(os.path.realpath(__file__)), "..", "data/examples/single_object/primesense/depth_0.npy") if fully_conv and segmask_filename is None: segmask_filename = os.path.join( os.path.dirname(os.path.realpath(__file__)), "..", "data/examples/clutter/primesense/segmask_0.png") if camera_intr_filename is None: camera_intr_filename = os.path.join( os.path.dirname(os.path.realpath(__file__)), "..", "data/calib/primesense/primesense.intr") # Set model if provided. if model_dir is None: model_dir = os.path.join(os.path.dirname(os.path.realpath(__file__)), "../models") model_path = os.path.join(model_dir, model_name) # Get configs. model_config = json.load(open(os.path.join(model_path, "config.json"), "r")) try: gqcnn_config = model_config["gqcnn"] gripper_mode = gqcnn_config["gripper_mode"] except KeyError: gqcnn_config = model_config["gqcnn_config"] input_data_mode = gqcnn_config["input_data_mode"] if input_data_mode == "tf_image": gripper_mode = GripperMode.LEGACY_PARALLEL_JAW elif input_data_mode == "tf_image_suction": gripper_mode = GripperMode.LEGACY_SUCTION elif input_data_mode == "suction": gripper_mode = GripperMode.SUCTION elif input_data_mode == "multi_suction": gripper_mode = GripperMode.MULTI_SUCTION elif input_data_mode == "parallel_jaw": gripper_mode = GripperMode.PARALLEL_JAW else: raise ValueError( "Input data mode {} not supported!".format(input_data_mode)) # Set config. if config_filename is None: if (gripper_mode == GripperMode.LEGACY_PARALLEL_JAW or gripper_mode == GripperMode.PARALLEL_JAW): if fully_conv: config_filename = os.path.join( os.path.dirname(os.path.realpath(__file__)), "..", "cfg/examples/fc_gqcnn_pj.yaml") else: config_filename = os.path.join( os.path.dirname(os.path.realpath(__file__)), "..", "cfg/examples/gqcnn_pj.yaml") elif (gripper_mode == GripperMode.LEGACY_SUCTION or gripper_mode == GripperMode.SUCTION): if fully_conv: config_filename = os.path.join( os.path.dirname(os.path.realpath(__file__)), "..", "cfg/examples/fc_gqcnn_suction.yaml") else: config_filename = os.path.join( os.path.dirname(os.path.realpath(__file__)), "..", "cfg/examples/gqcnn_suction.yaml") # Read config. config = YamlConfig(config_filename) inpaint_rescale_factor = config["inpaint_rescale_factor"] policy_config = config["policy"] # Make relative paths absolute. if "gqcnn_model" in policy_config["metric"]: policy_config["metric"]["gqcnn_model"] = model_path if not os.path.isabs(policy_config["metric"]["gqcnn_model"]): policy_config["metric"]["gqcnn_model"] = os.path.join( os.path.dirname(os.path.realpath(__file__)), "..", policy_config["metric"]["gqcnn_model"]) # Setup sensor. camera_intr = CameraIntrinsics.load(camera_intr_filename) # Read images. depth_data = np.load(depth_im_filename) depth_im = DepthImage(depth_data, frame=camera_intr.frame) color_im = ColorImage(np.zeros([depth_im.height, depth_im.width, 3]).astype(np.uint8), frame=camera_intr.frame) # Optionally read a segmask. segmask = None if segmask_filename is not None: segmask = BinaryImage.open(segmask_filename) valid_px_mask = depth_im.invalid_pixel_mask().inverse() if segmask is None: segmask = valid_px_mask else: segmask = segmask.mask_binary(valid_px_mask) # Inpaint. depth_im = depth_im.inpaint(rescale_factor=inpaint_rescale_factor) if "input_images" in policy_config["vis"] and policy_config["vis"][ "input_images"]: vis.figure(size=(10, 10)) num_plot = 1 if segmask is not None: num_plot = 2 vis.subplot(1, num_plot, 1) vis.imshow(depth_im) if segmask is not None: vis.subplot(1, num_plot, 2) vis.imshow(segmask) vis.show() # Create state. rgbd_im = RgbdImage.from_color_and_depth(color_im, depth_im) state = RgbdImageState(rgbd_im, camera_intr, segmask=segmask) # Set input sizes for fully-convolutional policy. if fully_conv: policy_config["metric"]["fully_conv_gqcnn_config"][ "im_height"] = depth_im.shape[0] policy_config["metric"]["fully_conv_gqcnn_config"][ "im_width"] = depth_im.shape[1] # Init policy. if fully_conv: # TODO(vsatish): We should really be doing this in some factory policy. if policy_config["type"] == "fully_conv_suction": policy = FullyConvolutionalGraspingPolicySuction(policy_config) elif policy_config["type"] == "fully_conv_pj": policy = FullyConvolutionalGraspingPolicyParallelJaw(policy_config) else: raise ValueError( "Invalid fully-convolutional policy type: {}".format( policy_config["type"])) else: policy_type = "cem" if "type" in policy_config: policy_type = policy_config["type"] if policy_type == "ranking": policy = RobustGraspingPolicy(policy_config) elif policy_type == "cem": policy = CrossEntropyRobustGraspingPolicy(policy_config) else: raise ValueError("Invalid policy type: {}".format(policy_type)) # Query policy. policy_start = time.time() action = policy(state) logger.info("Planning took %.3f sec" % (time.time() - policy_start)) # Vis final grasp. if policy_config["vis"]["final_grasp"]: vis.figure(size=(10, 10)) vis.imshow(rgbd_im.depth, vmin=policy_config["vis"]["vmin"], vmax=policy_config["vis"]["vmax"]) vis.grasp(action.grasp, scale=2.5, show_center=False, show_axis=True) vis.title("Planned grasp at depth {0:.3f}m with Q={1:.3f}".format( action.grasp.depth, action.q_value)) vis.show() ================================================ FILE: examples/policy_ros.py ================================================ # -*- coding: utf-8 -*- """ Copyright ©2017. The Regents of the University of California (Regents). All Rights Reserved. Permission to use, copy, modify, and distribute this software and its documentation for educational, research, and not-for-profit purposes, without fee and without a signed licensing agreement, is hereby granted, provided that the above copyright notice, this paragraph and the following two paragraphs appear in all copies, modifications, and distributions. Contact The Office of Technology Licensing, UC Berkeley, 2150 Shattuck Avenue, Suite 510, Berkeley, CA 94720-1620, (510) 643-7201, otl@berkeley.edu, http://ipira.berkeley.edu/industry-info for commercial licensing opportunities. IN NO EVENT SHALL REGENTS BE LIABLE TO ANY PARTY FOR DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, INCLUDING LOST PROFITS, ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN IF REGENTS HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. REGENTS SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE AND ACCOMPANYING DOCUMENTATION, IF ANY, PROVIDED HEREUNDER IS PROVIDED "AS IS". REGENTS HAS NO OBLIGATION TO PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. Displays robust grasps planned using a GQ-CNN-based policy on a set of saved RGB-D images. The default configuration is cfg/examples/policy.yaml. Author ------ Jeff Mahler """ import argparse import logging import numpy as np import os import rosgraph.roslogging as rl import rospy import sys from cv_bridge import CvBridge, CvBridgeError from autolab_core import (Point, Logger, BinaryImage, CameraIntrinsics, ColorImage, DepthImage) from visualization import Visualizer2D as vis from gqcnn.grasping import Grasp2D, SuctionPoint2D, GraspAction from gqcnn.msg import GQCNNGrasp from gqcnn.srv import GQCNNGraspPlanner, GQCNNGraspPlannerSegmask # Set up logger. logger = Logger.get_logger("examples/policy_ros.py") if __name__ == "__main__": # Parse args. parser = argparse.ArgumentParser( description="Run a grasping policy on an example image") parser.add_argument( "--depth_image", type=str, default=None, help="path to a test depth image stored as a .npy file") parser.add_argument("--segmask", type=str, default=None, help="path to an optional segmask to use") parser.add_argument("--camera_intr", type=str, default=None, help="path to the camera intrinsics") parser.add_argument("--gripper_width", type=float, default=0.05, help="width of the gripper to plan for") parser.add_argument("--namespace", type=str, default="gqcnn", help="namespace of the ROS grasp planning service") parser.add_argument("--vis_grasp", type=bool, default=True, help="whether or not to visualize the grasp") args = parser.parse_args() depth_im_filename = args.depth_image segmask_filename = args.segmask camera_intr_filename = args.camera_intr gripper_width = args.gripper_width namespace = args.namespace vis_grasp = args.vis_grasp # Initialize the ROS node. rospy.init_node("grasp_planning_example") logging.getLogger().addHandler(rl.RosStreamHandler()) # Setup filenames. if depth_im_filename is None: depth_im_filename = os.path.join( os.path.dirname(os.path.realpath(__file__)), "..", "data/examples/single_object/primesense/depth_0.npy") if camera_intr_filename is None: camera_intr_filename = os.path.join( os.path.dirname(os.path.realpath(__file__)), "..", "data/calib/primesense/primesense.intr") # Wait for grasp planning service and create service proxy. rospy.wait_for_service("%s/grasp_planner" % (namespace)) rospy.wait_for_service("%s/grasp_planner_segmask" % (namespace)) plan_grasp = rospy.ServiceProxy("%s/grasp_planner" % (namespace), GQCNNGraspPlanner) plan_grasp_segmask = rospy.ServiceProxy( "%s/grasp_planner_segmask" % (namespace), GQCNNGraspPlannerSegmask) cv_bridge = CvBridge() # Set up sensor. camera_intr = CameraIntrinsics.load(camera_intr_filename) # Read images. depth_im = DepthImage.open(depth_im_filename, frame=camera_intr.frame) color_im = ColorImage(np.zeros([depth_im.height, depth_im.width, 3]).astype(np.uint8), frame=camera_intr.frame) # Read segmask. if segmask_filename is not None: segmask = BinaryImage.open(segmask_filename, frame=camera_intr.frame) grasp_resp = plan_grasp_segmask(color_im.rosmsg, depth_im.rosmsg, camera_intr.rosmsg, segmask.rosmsg) else: grasp_resp = plan_grasp(color_im.rosmsg, depth_im.rosmsg, camera_intr.rosmsg) grasp = grasp_resp.grasp # Convert to a grasp action. grasp_type = grasp.grasp_type if grasp_type == GQCNNGrasp.PARALLEL_JAW: center = Point(np.array([grasp.center_px[0], grasp.center_px[1]]), frame=camera_intr.frame) grasp_2d = Grasp2D(center, grasp.angle, grasp.depth, width=gripper_width, camera_intr=camera_intr) elif grasp_type == GQCNNGrasp.SUCTION: center = Point(np.array([grasp.center_px[0], grasp.center_px[1]]), frame=camera_intr.frame) grasp_2d = SuctionPoint2D(center, np.array([0, 0, 1]), grasp.depth, camera_intr=camera_intr) else: raise ValueError("Grasp type %d not recognized!" % (grasp_type)) try: thumbnail = DepthImage(cv_bridge.imgmsg_to_cv2( grasp.thumbnail, desired_encoding="passthrough"), frame=camera_intr.frame) except CvBridgeError as e: logger.error(e) logger.error("Failed to convert image") sys.exit(1) action = GraspAction(grasp_2d, grasp.q_value, thumbnail) # Vis final grasp. if vis_grasp: vis.figure(size=(10, 10)) vis.imshow(depth_im, vmin=0.6, vmax=0.9) vis.grasp(action.grasp, scale=2.5, show_center=False, show_axis=True) vis.title("Planned grasp on depth (Q=%.3f)" % (action.q_value)) vis.show() ================================================ FILE: examples/policy_with_image_proc.py ================================================ # -*- coding: utf-8 -*- """ Copyright ©2017. The Regents of the University of California (Regents). All Rights Reserved. Permission to use, copy, modify, and distribute this software and its documentation for educational, research, and not-for-profit purposes, without fee and without a signed licensing agreement, is hereby granted, provided that the above copyright notice, this paragraph and the following two paragraphs appear in all copies, modifications, and distributions. Contact The Office of Technology Licensing, UC Berkeley, 2150 Shattuck Avenue, Suite 510, Berkeley, CA 94720-1620, (510) 643-7201, otl@berkeley.edu, http://ipira.berkeley.edu/industry-info for commercial licensing opportunities. IN NO EVENT SHALL REGENTS BE LIABLE TO ANY PARTY FOR DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, INCLUDING LOST PROFITS, ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN IF REGENTS HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. REGENTS SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE AND ACCOMPANYING DOCUMENTATION, IF ANY, PROVIDED HEREUNDER IS PROVIDED "AS IS". REGENTS HAS NO OBLIGATION TO PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. Displays robust grasps planned using a GQ-CNN-based policy on a set of saved RGB-D images. The default configuration is cfg/examples/policy.yaml. Author ------ Jeff Mahler """ import argparse import os import time import numpy as np import pcl import skimage from autolab_core import (PointCloud, RigidTransform, YamlConfig, Logger, BinaryImage, CameraIntrinsics, ColorImage, DepthImage, RgbdImage, SegmentationImage) from visualization import Visualizer2D as vis from gqcnn import (RobustGraspingPolicy, CrossEntropyRobustGraspingPolicy, RgbdImageState) CLUSTER_TOL = 0.0015 MIN_CLUSTER_SIZE = 100 MAX_CLUSTER_SIZE = 1000000 # Set up logger. logger = Logger.get_logger("tools/policy_with_image_proc.py") if __name__ == "__main__": # Parse args. parser = argparse.ArgumentParser( description="Run a grasping policy on an example image") parser.add_argument( "--depth_image", type=str, default=None, help="path to a test depth image stored as a .npy file") parser.add_argument("--segmask", type=str, default=None, help="path to an optional segmask to use") parser.add_argument("--camera_intrinsics", type=str, default=None, help="path to the camera intrinsics") parser.add_argument("--camera_pose", type=str, default=None, help="path to the camera pose") parser.add_argument("--model_dir", type=str, default=None, help="path to a trained model to run") parser.add_argument("--config_filename", type=str, default=None, help="path to configuration file to use") args = parser.parse_args() depth_im_filename = args.depth_image segmask_filename = args.segmask camera_intr_filename = args.camera_intrinsics camera_pose_filename = args.camera_pose model_dir = args.model_dir config_filename = args.config_filename if depth_im_filename is None: depth_im_filename = os.path.join( os.path.dirname(os.path.realpath(__file__)), "..", "data/examples/single_object/depth_0.npy") if camera_intr_filename is None: camera_intr_filename = os.path.join( os.path.dirname(os.path.realpath(__file__)), "..", "data/calib/primesense.intr") if camera_pose_filename is None: camera_pose_filename = os.path.join( os.path.dirname(os.path.realpath(__file__)), "..", "data/calib/primesense.tf") if config_filename is None: config_filename = os.path.join( os.path.dirname(os.path.realpath(__file__)), "..", "cfg/examples/replication/dex-net_2.0.yaml") # Read config. config = YamlConfig(config_filename) inpaint_rescale_factor = config["inpaint_rescale_factor"] policy_config = config["policy"] # Make relative paths absolute. if model_dir is not None: policy_config["metric"]["gqcnn_model"] = model_dir if "gqcnn_model" in policy_config["metric"] and not os.path.isabs( policy_config["metric"]["gqcnn_model"]): policy_config["metric"]["gqcnn_model"] = os.path.join( os.path.dirname(os.path.realpath(__file__)), "..", policy_config["metric"]["gqcnn_model"]) # Setup sensor. camera_intr = CameraIntrinsics.load(camera_intr_filename) T_camera_world = RigidTransform.load(camera_pose_filename) # Read images. depth_data = np.load(depth_im_filename) depth_data = depth_data.astype(np.float32) / 1000.0 depth_im = DepthImage(depth_data, frame=camera_intr.frame) color_im = ColorImage(np.zeros([depth_im.height, depth_im.width, 3]).astype(np.uint8), frame=camera_intr.frame) # Optionally read a segmask. mask = np.zeros((camera_intr.height, camera_intr.width, 1), dtype=np.uint8) c = np.array([165, 460, 500, 135]) r = np.array([165, 165, 370, 370]) rr, cc = skimage.draw.polygon(r, c, shape=mask.shape) mask[rr, cc, 0] = 255 segmask = BinaryImage(mask) if segmask_filename is not None: segmask = BinaryImage.open(segmask_filename) valid_px_mask = depth_im.invalid_pixel_mask().inverse() if segmask is None: segmask = valid_px_mask else: segmask = segmask.mask_binary(valid_px_mask) # Create new cloud. point_cloud = camera_intr.deproject(depth_im) point_cloud.remove_zero_points() pcl_cloud = pcl.PointCloud(point_cloud.data.T.astype(np.float32)) tree = pcl_cloud.make_kdtree() # Find large clusters (likely to be real objects instead of noise). ec = pcl_cloud.make_EuclideanClusterExtraction() ec.set_ClusterTolerance(CLUSTER_TOL) ec.set_MinClusterSize(MIN_CLUSTER_SIZE) ec.set_MaxClusterSize(MAX_CLUSTER_SIZE) ec.set_SearchMethod(tree) cluster_indices = ec.Extract() num_clusters = len(cluster_indices) obj_segmask_data = np.zeros(depth_im.shape) # Read out all points in large clusters. cur_i = 0 for j, indices in enumerate(cluster_indices): num_points = len(indices) points = np.zeros([3, num_points]) for i, index in enumerate(indices): points[0, i] = pcl_cloud[index][0] points[1, i] = pcl_cloud[index][1] points[2, i] = pcl_cloud[index][2] segment = PointCloud(points, frame=camera_intr.frame) depth_segment = camera_intr.project_to_image(segment) obj_segmask_data[depth_segment.data > 0] = j + 1 obj_segmask = SegmentationImage(obj_segmask_data.astype(np.uint8)) obj_segmask = obj_segmask.mask_binary(segmask) # Inpaint. depth_im = depth_im.inpaint(rescale_factor=inpaint_rescale_factor) if "input_images" in policy_config["vis"] and policy_config["vis"][ "input_images"]: vis.figure(size=(10, 10)) num_plot = 3 vis.subplot(1, num_plot, 1) vis.imshow(depth_im) vis.subplot(1, num_plot, 2) vis.imshow(segmask) vis.subplot(1, num_plot, 3) vis.imshow(obj_segmask) vis.show() from visualization import Visualizer3D as vis3d point_cloud = camera_intr.deproject(depth_im) vis3d.figure() vis3d.points(point_cloud, subsample=3, random=True, color=(0, 0, 1), scale=0.001) vis3d.pose(RigidTransform()) vis3d.pose(T_camera_world.inverse()) vis3d.show() # Create state. rgbd_im = RgbdImage.from_color_and_depth(color_im, depth_im) state = RgbdImageState(rgbd_im, camera_intr, segmask=segmask) # Init policy. policy_type = "cem" if "type" in policy_config: policy_type = policy_config["type"] if policy_type == "ranking": policy = RobustGraspingPolicy(policy_config) else: policy = CrossEntropyRobustGraspingPolicy(policy_config) policy_start = time.time() action = policy(state) logger.info("Planning took %.3f sec" % (time.time() - policy_start)) # Vis final grasp. if policy_config["vis"]["final_grasp"]: vis.figure(size=(10, 10)) vis.imshow(rgbd_im.depth, vmin=policy_config["vis"]["vmin"], vmax=policy_config["vis"]["vmax"]) vis.grasp(action.grasp, scale=2.5, show_center=False, show_axis=True) vis.title("Planned grasp on depth (Q=%.3f)" % (action.q_value)) vis.show() # Get grasp pose. T_grasp_camera = action.grasp.pose( grasp_approach_dir=-T_camera_world.inverse().z_axis) grasp_pose_msg = T_grasp_camera.pose_msg # TODO: Control to reach the grasp pose. ================================================ FILE: gqcnn/__init__.py ================================================ # -*- coding: utf-8 -*- """ Copyright ©2017. The Regents of the University of California (Regents). All Rights Reserved. Permission to use, copy, modify, and distribute this software and its documentation for educational, research, and not-for-profit purposes, without fee and without a signed licensing agreement, is hereby granted, provided that the above copyright notice, this paragraph and the following two paragraphs appear in all copies, modifications, and distributions. Contact The Office of Technology Licensing, UC Berkeley, 2150 Shattuck Avenue, Suite 510, Berkeley, CA 94720-1620, (510) 643-7201, otl@berkeley.edu, http://ipira.berkeley.edu/industry-info for commercial licensing opportunities. IN NO EVENT SHALL REGENTS BE LIABLE TO ANY PARTY FOR DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, INCLUDING LOST PROFITS, ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN IF REGENTS HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. REGENTS SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE AND ACCOMPANYING DOCUMENTATION, IF ANY, PROVIDED HEREUNDER IS PROVIDED "AS IS". REGENTS HAS NO OBLIGATION TO PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. """ from .model import get_gqcnn_model, get_fc_gqcnn_model from .training import get_gqcnn_trainer from .grasping import (RobustGraspingPolicy, UniformRandomGraspingPolicy, CrossEntropyRobustGraspingPolicy, RgbdImageState, FullyConvolutionalGraspingPolicyParallelJaw, FullyConvolutionalGraspingPolicySuction) from .analysis import GQCNNAnalyzer from .search import GQCNNSearch from .utils import NoValidGraspsException, NoAntipodalPairsFoundException __all__ = [ "get_gqcnn_model", "get_fc_gqcnn_model", "get_gqcnn_trainer", "RobustGraspingPolicy", "UniformRandomGraspingPolicy", "CrossEntropyRobustGraspingPolicy", "RgbdImageState", "FullyConvolutionalGraspingPolicyParallelJaw", "FullyConvolutionalGraspingPolicySuction", "GQCNNAnalyzer", "GQCNNSearch", "NoValidGraspsException", "NoAntipodalPairsFoundException" ] ================================================ FILE: gqcnn/analysis/__init__.py ================================================ # -*- coding: utf-8 -*- """ Copyright ©2017. The Regents of the University of California (Regents). All Rights Reserved. Permission to use, copy, modify, and distribute this software and its documentation for educational, research, and not-for-profit purposes, without fee and without a signed licensing agreement, is hereby granted, provided that the above copyright notice, this paragraph and the following two paragraphs appear in all copies, modifications, and distributions. Contact The Office of Technology Licensing, UC Berkeley, 2150 Shattuck Avenue, Suite 510, Berkeley, CA 94720-1620, (510) 643-7201, otl@berkeley.edu, http://ipira.berkeley.edu/industry-info for commercial licensing opportunities. IN NO EVENT SHALL REGENTS BE LIABLE TO ANY PARTY FOR DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, INCLUDING LOST PROFITS, ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN IF REGENTS HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. REGENTS SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE AND ACCOMPANYING DOCUMENTATION, IF ANY, PROVIDED HEREUNDER IS PROVIDED "AS IS". REGENTS HAS NO OBLIGATION TO PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. """ from .analyzer import GQCNNAnalyzer __all__ = ["GQCNNAnalyzer"] ================================================ FILE: gqcnn/analysis/analyzer.py ================================================ # -*- coding: utf-8 -*- """ Copyright ©2017. The Regents of the University of California (Regents). All Rights Reserved. Permission to use, copy, modify, and distribute this software and its documentation for educational, research, and not-for-profit purposes, without fee and without a signed licensing agreement, is hereby granted, provided that the above copyright notice, this paragraph and the following two paragraphs appear in all copies, modifications, and distributions. Contact The Office of Technology Licensing, UC Berkeley, 2150 Shattuck Avenue, Suite 510, Berkeley, CA 94720-1620, (510) 643-7201, otl@berkeley.edu, http://ipira.berkeley.edu/industry-info for commercial licensing opportunities. IN NO EVENT SHALL REGENTS BE LIABLE TO ANY PARTY FOR DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, INCLUDING LOST PROFITS, ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN IF REGENTS HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. REGENTS SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE AND ACCOMPANYING DOCUMENTATION, IF ANY, PROVIDED HEREUNDER IS PROVIDED "AS IS". REGENTS HAS NO OBLIGATION TO PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. Class for analyzing a GQ-CNN model for grasp quality prediction. Author ------ Jeff Mahler """ import json import logging import os import matplotlib.pyplot as plt import numpy as np from autolab_core import (BinaryClassificationResult, TensorDataset, Logger, DepthImage) from autolab_core.constants import JSON_INDENT import autolab_core.utils as utils from visualization import Visualizer2D as vis2d from ..model import get_gqcnn_model from ..grasping import Grasp2D, SuctionPoint2D from ..utils import (GripperMode, GeneralConstants, read_pose_data, GQCNNFilenames) WINDOW = 100 # For loss. MAX_LOSS = 5.0 class GQCNNAnalyzer(object): """Analyzes a trained GQ-CNN model.""" def __init__(self, config, verbose=True, plot_backend="pdf"): """ Parameters ---------- config : dict Dictionary of analysis configuration parameters. verbose : bool Whether or not to log analysis output to stdout. plot_backend : str Matplotlib plotting backend to use, default is non-interactive "pdf" backend. """ self.cfg = config self.verbose = verbose # By default we want to use a non-interactive backend (ex. "pdf)" # because the `GQCNNAnalyzer` anyways only saves plots to disk, and # interactive backends sometimes cause issues with `localhost` when run # remotely through the Linux `screen` program. plt.switch_backend(plot_backend) self._parse_config() def _parse_config(self): """Read params from the config file.""" # Plotting params. self.log_rate = self.cfg["log_rate"] self.font_size = self.cfg["font_size"] self.line_width = self.cfg["line_width"] self.dpi = self.cfg["dpi"] self.num_bins = self.cfg["num_bins"] self.num_vis = self.cfg["num_vis"] def analyze(self, model_dir, output_dir, dataset_config=None): """Run analysis. Parameters ---------- model_dir : str Path to the GQ-CNN model to analyze. output_dir : str Path to save the analysis. dataset_config : dict Dictionary to configure dataset used for training evaluation if different from one used during training. Returns ------- :obj:`autolab_core.BinaryClassificationResult` Result of analysis on training data. :obj:`autolab_core.BinaryClassificationResult` Result of analysis on validation data. """ # Determine model output dir. model_name = "" model_root = model_dir while model_name == "" and model_root != "": model_root, model_name = os.path.split(model_root) model_output_dir = os.path.join(output_dir, model_name) if not os.path.exists(model_output_dir): os.mkdir(model_output_dir) # Set up logger. self.logger = Logger.get_logger(self.__class__.__name__, log_file=os.path.join( model_output_dir, "analysis.log"), silence=(not self.verbose), global_log_file=self.verbose) self.logger.info("Analyzing model %s" % (model_name)) self.logger.info("Saving output to %s" % (output_dir)) # Run predictions. train_result, val_result = self._run_prediction_single_model( model_dir, model_output_dir, dataset_config) # Finally plot curves. (init_train_error, final_train_error, init_train_loss, final_train_loss, init_val_error, final_val_error, norm_final_val_error) = self._plot(model_dir, model_output_dir, train_result, val_result) return (train_result, val_result, init_train_error, final_train_error, init_train_loss, final_train_loss, init_val_error, final_val_error, norm_final_val_error) def _plot_grasp(self, datapoint, image_field_name, pose_field_name, gripper_mode, angular_preds=None): """Plots a single grasp represented as a datapoint.""" image = DepthImage(datapoint[image_field_name][:, :, 0]) depth = datapoint[pose_field_name][2] width = 0 grasps = [] if gripper_mode == GripperMode.PARALLEL_JAW or \ gripper_mode == GripperMode.LEGACY_PARALLEL_JAW: if angular_preds is not None: num_bins = angular_preds.shape[0] / 2 bin_width = GeneralConstants.PI / num_bins for i in range(num_bins): bin_cent_ang = i * bin_width + bin_width / 2 grasps.append( Grasp2D(center=image.center, angle=GeneralConstants.PI / 2 - bin_cent_ang, depth=depth, width=0.0)) grasps.append( Grasp2D(center=image.center, angle=datapoint[pose_field_name][3], depth=depth, width=0.0)) else: grasps.append( Grasp2D(center=image.center, angle=0, depth=depth, width=0.0)) width = datapoint[pose_field_name][-1] else: grasps.append( SuctionPoint2D(center=image.center, axis=[1, 0, 0], depth=depth)) vis2d.imshow(image) for i, grasp in enumerate(grasps[:-1]): vis2d.grasp(grasp, width=width, color=plt.cm.RdYlGn(angular_preds[i * 2 + 1])) vis2d.grasp(grasps[-1], width=width, color="b") def _run_prediction_single_model(self, model_dir, model_output_dir, dataset_config): """Analyze the performance of a single model.""" # Read in model config. model_config_filename = os.path.join(model_dir, GQCNNFilenames.SAVED_CFG) with open(model_config_filename) as data_file: model_config = json.load(data_file) # Load model. self.logger.info("Loading model %s" % (model_dir)) log_file = None for handler in self.logger.handlers: if isinstance(handler, logging.FileHandler): log_file = handler.baseFilename gqcnn = get_gqcnn_model(verbose=self.verbose).load( model_dir, verbose=self.verbose, log_file=log_file) gqcnn.open_session() gripper_mode = gqcnn.gripper_mode angular_bins = gqcnn.angular_bins # Read params from the config. if dataset_config is None: dataset_dir = model_config["dataset_dir"] split_name = model_config["split_name"] image_field_name = model_config["image_field_name"] pose_field_name = model_config["pose_field_name"] metric_name = model_config["target_metric_name"] metric_thresh = model_config["metric_thresh"] else: dataset_dir = dataset_config["dataset_dir"] split_name = dataset_config["split_name"] image_field_name = dataset_config["image_field_name"] pose_field_name = dataset_config["pose_field_name"] metric_name = dataset_config["target_metric_name"] metric_thresh = dataset_config["metric_thresh"] gripper_mode = dataset_config["gripper_mode"] self.logger.info("Loading dataset %s" % (dataset_dir)) dataset = TensorDataset.open(dataset_dir) train_indices, val_indices, _ = dataset.split(split_name) # Visualize conv filters. conv1_filters = gqcnn.filters num_filt = conv1_filters.shape[3] d = utils.sqrt_ceil(num_filt) vis2d.clf() for k in range(num_filt): filt = conv1_filters[:, :, 0, k] vis2d.subplot(d, d, k + 1) vis2d.imshow(DepthImage(filt)) figname = os.path.join(model_output_dir, "conv1_filters.pdf") vis2d.savefig(figname, dpi=self.dpi) # Aggregate training and validation true labels and predicted # probabilities. all_predictions = [] if angular_bins > 0: all_predictions_raw = [] all_labels = [] for i in range(dataset.num_tensors): # Log progress. if i % self.log_rate == 0: self.logger.info("Predicting tensor %d of %d" % (i + 1, dataset.num_tensors)) # Read in data. image_arr = dataset.tensor(image_field_name, i).arr pose_arr = read_pose_data( dataset.tensor(pose_field_name, i).arr, gripper_mode) metric_arr = dataset.tensor(metric_name, i).arr label_arr = 1 * (metric_arr > metric_thresh) label_arr = label_arr.astype(np.uint8) if angular_bins > 0: # Form mask to extract predictions from ground-truth angular # bins. raw_poses = dataset.tensor(pose_field_name, i).arr angles = raw_poses[:, 3] neg_ind = np.where(angles < 0) # TODO(vsatish): These should use the max angle instead. angles = np.abs(angles) % GeneralConstants.PI angles[neg_ind] *= -1 g_90 = np.where(angles > (GeneralConstants.PI / 2)) l_neg_90 = np.where(angles < (-1 * (GeneralConstants.PI / 2))) angles[g_90] -= GeneralConstants.PI angles[l_neg_90] += GeneralConstants.PI # TODO(vsatish): Fix this along with the others. angles *= -1 # Hack to fix reverse angle convention. angles += (GeneralConstants.PI / 2) pred_mask = np.zeros((raw_poses.shape[0], angular_bins * 2), dtype=bool) bin_width = GeneralConstants.PI / angular_bins for i in range(angles.shape[0]): pred_mask[i, int((angles[i] // bin_width) * 2)] = True pred_mask[i, int((angles[i] // bin_width) * 2 + 1)] = True # Predict with GQ-CNN. predictions = gqcnn.predict(image_arr, pose_arr) if angular_bins > 0: raw_predictions = np.array(predictions) predictions = predictions[pred_mask].reshape((-1, 2)) # Aggregate. all_predictions.extend(predictions[:, 1].tolist()) if angular_bins > 0: all_predictions_raw.extend(raw_predictions.tolist()) all_labels.extend(label_arr.tolist()) # Close session. gqcnn.close_session() # Create arrays. all_predictions = np.array(all_predictions) all_labels = np.array(all_labels) train_predictions = all_predictions[train_indices] val_predictions = all_predictions[val_indices] train_labels = all_labels[train_indices] val_labels = all_labels[val_indices] if angular_bins > 0: all_predictions_raw = np.array(all_predictions_raw) train_predictions_raw = all_predictions_raw[train_indices] val_predictions_raw = all_predictions_raw[val_indices] # Aggregate results. train_result = BinaryClassificationResult(train_predictions, train_labels) val_result = BinaryClassificationResult(val_predictions, val_labels) train_result.save(os.path.join(model_output_dir, "train_result.cres")) val_result.save(os.path.join(model_output_dir, "val_result.cres")) # Get stats, plot curves. self.logger.info("Model %s training error rate: %.3f" % (model_dir, train_result.error_rate)) self.logger.info("Model %s validation error rate: %.3f" % (model_dir, val_result.error_rate)) self.logger.info("Model %s training loss: %.3f" % (model_dir, train_result.cross_entropy_loss)) self.logger.info("Model %s validation loss: %.3f" % (model_dir, val_result.cross_entropy_loss)) # Save images. vis2d.figure() example_dir = os.path.join(model_output_dir, "examples") if not os.path.exists(example_dir): os.mkdir(example_dir) # Train. self.logger.info("Saving training examples") train_example_dir = os.path.join(example_dir, "train") if not os.path.exists(train_example_dir): os.mkdir(train_example_dir) # Train TP. true_positive_indices = train_result.true_positive_indices np.random.shuffle(true_positive_indices) true_positive_indices = true_positive_indices[:self.num_vis] for i, j in enumerate(true_positive_indices): k = train_indices[j] datapoint = dataset.datapoint( k, field_names=[image_field_name, pose_field_name]) vis2d.clf() if angular_bins > 0: self._plot_grasp(datapoint, image_field_name, pose_field_name, gripper_mode, angular_preds=train_predictions_raw[j]) else: self._plot_grasp(datapoint, image_field_name, pose_field_name, gripper_mode) vis2d.title( "Datapoint %d: Pred: %.3f Label: %.3f" % (k, train_result.pred_probs[j], train_result.labels[j]), fontsize=self.font_size) vis2d.savefig( os.path.join(train_example_dir, "true_positive_%03d.png" % (i))) # Train FP. false_positive_indices = train_result.false_positive_indices np.random.shuffle(false_positive_indices) false_positive_indices = false_positive_indices[:self.num_vis] for i, j in enumerate(false_positive_indices): k = train_indices[j] datapoint = dataset.datapoint( k, field_names=[image_field_name, pose_field_name]) vis2d.clf() if angular_bins > 0: self._plot_grasp(datapoint, image_field_name, pose_field_name, gripper_mode, angular_preds=train_predictions_raw[j]) else: self._plot_grasp(datapoint, image_field_name, pose_field_name, gripper_mode) vis2d.title( "Datapoint %d: Pred: %.3f Label: %.3f" % (k, train_result.pred_probs[j], train_result.labels[j]), fontsize=self.font_size) vis2d.savefig( os.path.join(train_example_dir, "false_positive_%03d.png" % (i))) # Train TN. true_negative_indices = train_result.true_negative_indices np.random.shuffle(true_negative_indices) true_negative_indices = true_negative_indices[:self.num_vis] for i, j in enumerate(true_negative_indices): k = train_indices[j] datapoint = dataset.datapoint( k, field_names=[image_field_name, pose_field_name]) vis2d.clf() if angular_bins > 0: self._plot_grasp(datapoint, image_field_name, pose_field_name, gripper_mode, angular_preds=train_predictions_raw[j]) else: self._plot_grasp(datapoint, image_field_name, pose_field_name, gripper_mode) vis2d.title( "Datapoint %d: Pred: %.3f Label: %.3f" % (k, train_result.pred_probs[j], train_result.labels[j]), fontsize=self.font_size) vis2d.savefig( os.path.join(train_example_dir, "true_negative_%03d.png" % (i))) # Train TP. false_negative_indices = train_result.false_negative_indices np.random.shuffle(false_negative_indices) false_negative_indices = false_negative_indices[:self.num_vis] for i, j in enumerate(false_negative_indices): k = train_indices[j] datapoint = dataset.datapoint( k, field_names=[image_field_name, pose_field_name]) vis2d.clf() if angular_bins > 0: self._plot_grasp(datapoint, image_field_name, pose_field_name, gripper_mode, angular_preds=train_predictions_raw[j]) else: self._plot_grasp(datapoint, image_field_name, pose_field_name, gripper_mode) vis2d.title( "Datapoint %d: Pred: %.3f Label: %.3f" % (k, train_result.pred_probs[j], train_result.labels[j]), fontsize=self.font_size) vis2d.savefig( os.path.join(train_example_dir, "false_negative_%03d.png" % (i))) # Val. self.logger.info("Saving validation examples") val_example_dir = os.path.join(example_dir, "val") if not os.path.exists(val_example_dir): os.mkdir(val_example_dir) # Val TP. true_positive_indices = val_result.true_positive_indices np.random.shuffle(true_positive_indices) true_positive_indices = true_positive_indices[:self.num_vis] for i, j in enumerate(true_positive_indices): k = val_indices[j] datapoint = dataset.datapoint( k, field_names=[image_field_name, pose_field_name]) vis2d.clf() if angular_bins > 0: self._plot_grasp(datapoint, image_field_name, pose_field_name, gripper_mode, angular_preds=val_predictions_raw[j]) else: self._plot_grasp(datapoint, image_field_name, pose_field_name, gripper_mode) vis2d.title("Datapoint %d: Pred: %.3f Label: %.3f" % (k, val_result.pred_probs[j], val_result.labels[j]), fontsize=self.font_size) vis2d.savefig( os.path.join(val_example_dir, "true_positive_%03d.png" % (i))) # Val FP. false_positive_indices = val_result.false_positive_indices np.random.shuffle(false_positive_indices) false_positive_indices = false_positive_indices[:self.num_vis] for i, j in enumerate(false_positive_indices): k = val_indices[j] datapoint = dataset.datapoint( k, field_names=[image_field_name, pose_field_name]) vis2d.clf() if angular_bins > 0: self._plot_grasp(datapoint, image_field_name, pose_field_name, gripper_mode, angular_preds=val_predictions_raw[j]) else: self._plot_grasp(datapoint, image_field_name, pose_field_name, gripper_mode) vis2d.title("Datapoint %d: Pred: %.3f Label: %.3f" % (k, val_result.pred_probs[j], val_result.labels[j]), fontsize=self.font_size) vis2d.savefig( os.path.join(val_example_dir, "false_positive_%03d.png" % (i))) # Val TN. true_negative_indices = val_result.true_negative_indices np.random.shuffle(true_negative_indices) true_negative_indices = true_negative_indices[:self.num_vis] for i, j in enumerate(true_negative_indices): k = val_indices[j] datapoint = dataset.datapoint( k, field_names=[image_field_name, pose_field_name]) vis2d.clf() if angular_bins > 0: self._plot_grasp(datapoint, image_field_name, pose_field_name, gripper_mode, angular_preds=val_predictions_raw[j]) else: self._plot_grasp(datapoint, image_field_name, pose_field_name, gripper_mode) vis2d.title("Datapoint %d: Pred: %.3f Label: %.3f" % (k, val_result.pred_probs[j], val_result.labels[j]), fontsize=self.font_size) vis2d.savefig( os.path.join(val_example_dir, "true_negative_%03d.png" % (i))) # Val TP. false_negative_indices = val_result.false_negative_indices np.random.shuffle(false_negative_indices) false_negative_indices = false_negative_indices[:self.num_vis] for i, j in enumerate(false_negative_indices): k = val_indices[j] datapoint = dataset.datapoint( k, field_names=[image_field_name, pose_field_name]) vis2d.clf() if angular_bins > 0: self._plot_grasp(datapoint, image_field_name, pose_field_name, gripper_mode, angular_preds=val_predictions_raw[j]) else: self._plot_grasp(datapoint, image_field_name, pose_field_name, gripper_mode) vis2d.title("Datapoint %d: Pred: %.3f Label: %.3f" % (k, val_result.pred_probs[j], val_result.labels[j]), fontsize=self.font_size) vis2d.savefig( os.path.join(val_example_dir, "false_negative_%03d.png" % (i))) # Save summary stats. train_summary_stats = { "error_rate": train_result.error_rate, "ap_score": train_result.ap_score, "auc_score": train_result.auc_score, "loss": train_result.cross_entropy_loss } train_stats_filename = os.path.join(model_output_dir, "train_stats.json") json.dump(train_summary_stats, open(train_stats_filename, "w"), indent=JSON_INDENT, sort_keys=True) val_summary_stats = { "error_rate": val_result.error_rate, "ap_score": val_result.ap_score, "auc_score": val_result.auc_score, "loss": val_result.cross_entropy_loss } val_stats_filename = os.path.join(model_output_dir, "val_stats.json") json.dump(val_summary_stats, open(val_stats_filename, "w"), indent=JSON_INDENT, sort_keys=True) return train_result, val_result def _plot(self, model_dir, model_output_dir, train_result, val_result): """Plot analysis curves.""" self.logger.info("Plotting") _, model_name = os.path.split(model_output_dir) # Set params. colors = ["g", "b", "c", "y", "m", "r"] styles = ["-", "--", "-.", ":", "-"] # PR, ROC. vis2d.clf() train_result.precision_recall_curve(plot=True, line_width=self.line_width, color=colors[0], style=styles[0], label="TRAIN") val_result.precision_recall_curve(plot=True, line_width=self.line_width, color=colors[1], style=styles[1], label="VAL") vis2d.title("Precision Recall Curves", fontsize=self.font_size) handles, labels = vis2d.gca().get_legend_handles_labels() vis2d.legend(handles, labels, loc="best") figname = os.path.join(model_output_dir, "precision_recall.png") vis2d.savefig(figname, dpi=self.dpi) vis2d.clf() train_result.roc_curve(plot=True, line_width=self.line_width, color=colors[0], style=styles[0], label="TRAIN") val_result.roc_curve(plot=True, line_width=self.line_width, color=colors[1], style=styles[1], label="VAL") vis2d.title("Reciever Operating Characteristic", fontsize=self.font_size) handles, labels = vis2d.gca().get_legend_handles_labels() vis2d.legend(handles, labels, loc="best") figname = os.path.join(model_output_dir, "roc.png") vis2d.savefig(figname, dpi=self.dpi) # Plot histogram of prediction errors. num_bins = min(self.num_bins, train_result.num_datapoints) # Train positives. pos_ind = np.where(train_result.labels == 1)[0] diffs = np.abs(train_result.labels[pos_ind] - train_result.pred_probs[pos_ind]) vis2d.figure() utils.histogram(diffs, num_bins, bounds=(0, 1), normalized=False, plot=True) vis2d.title("Error on Positive Training Examples", fontsize=self.font_size) vis2d.xlabel("Abs Prediction Error", fontsize=self.font_size) vis2d.ylabel("Count", fontsize=self.font_size) figname = os.path.join(model_output_dir, "pos_train_errors_histogram.png") vis2d.savefig(figname, dpi=self.dpi) # Train negatives. neg_ind = np.where(train_result.labels == 0)[0] diffs = np.abs(train_result.labels[neg_ind] - train_result.pred_probs[neg_ind]) vis2d.figure() utils.histogram(diffs, num_bins, bounds=(0, 1), normalized=False, plot=True) vis2d.title("Error on Negative Training Examples", fontsize=self.font_size) vis2d.xlabel("Abs Prediction Error", fontsize=self.font_size) vis2d.ylabel("Count", fontsize=self.font_size) figname = os.path.join(model_output_dir, "neg_train_errors_histogram.png") vis2d.savefig(figname, dpi=self.dpi) # Histogram of validation errors. num_bins = min(self.num_bins, val_result.num_datapoints) # Val positives. pos_ind = np.where(val_result.labels == 1)[0] diffs = np.abs(val_result.labels[pos_ind] - val_result.pred_probs[pos_ind]) vis2d.figure() utils.histogram(diffs, num_bins, bounds=(0, 1), normalized=False, plot=True) vis2d.title("Error on Positive Validation Examples", fontsize=self.font_size) vis2d.xlabel("Abs Prediction Error", fontsize=self.font_size) vis2d.ylabel("Count", fontsize=self.font_size) figname = os.path.join(model_output_dir, "pos_val_errors_histogram.png") vis2d.savefig(figname, dpi=self.dpi) # Val negatives. neg_ind = np.where(val_result.labels == 0)[0] diffs = np.abs(val_result.labels[neg_ind] - val_result.pred_probs[neg_ind]) vis2d.figure() utils.histogram(diffs, num_bins, bounds=(0, 1), normalized=False, plot=True) vis2d.title("Error on Negative Validation Examples", fontsize=self.font_size) vis2d.xlabel("Abs Prediction Error", fontsize=self.font_size) vis2d.ylabel("Count", fontsize=self.font_size) figname = os.path.join(model_output_dir, "neg_val_errors_histogram.png") vis2d.savefig(figname, dpi=self.dpi) # Losses. try: train_errors_filename = os.path.join(model_dir, GQCNNFilenames.TRAIN_ERRORS) val_errors_filename = os.path.join(model_dir, GQCNNFilenames.VAL_ERRORS) val_iters_filename = os.path.join(model_dir, GQCNNFilenames.VAL_ITERS) pct_pos_val_filename = os.path.join(model_dir, GQCNNFilenames.PCT_POS_VAL) train_losses_filename = os.path.join(model_dir, GQCNNFilenames.TRAIN_LOSSES) raw_train_errors = np.load(train_errors_filename) val_errors = np.load(val_errors_filename) val_iters = np.load(val_iters_filename) pct_pos_val = float(val_errors[0]) if os.path.exists(pct_pos_val_filename): pct_pos_val = 100.0 * np.load(pct_pos_val_filename) raw_train_losses = np.load(train_losses_filename) val_errors = np.r_[pct_pos_val, val_errors] val_iters = np.r_[0, val_iters] # Window the training error. i = 0 train_errors = [] train_losses = [] train_iters = [] while i < raw_train_errors.shape[0]: train_errors.append(np.mean(raw_train_errors[i:i + WINDOW])) train_losses.append(np.mean(raw_train_losses[i:i + WINDOW])) train_iters.append(i) i += WINDOW train_errors = np.array(train_errors) train_losses = np.array(train_losses) train_iters = np.array(train_iters) init_val_error = val_errors[0] norm_train_errors = train_errors / init_val_error norm_val_errors = val_errors / init_val_error norm_final_val_error = val_result.error_rate / val_errors[0] if pct_pos_val > 0: norm_final_val_error = val_result.error_rate / pct_pos_val vis2d.clf() vis2d.plot(train_iters, train_errors, linewidth=self.line_width, color="b") vis2d.plot(val_iters, val_errors, linewidth=self.line_width, color="g") vis2d.ylim(0, 100) vis2d.legend(("TRAIN (Minibatch)", "VAL"), fontsize=self.font_size, loc="best") vis2d.xlabel("Iteration", fontsize=self.font_size) vis2d.ylabel("Error Rate", fontsize=self.font_size) vis2d.title("Error Rate vs Training Iteration", fontsize=self.font_size) figname = os.path.join(model_output_dir, "training_error_rates.png") vis2d.savefig(figname, dpi=self.dpi) vis2d.clf() vis2d.plot(train_iters, norm_train_errors, linewidth=4, color="b") vis2d.plot(val_iters, norm_val_errors, linewidth=4, color="g") vis2d.ylim(0, 2.0) vis2d.legend(("TRAIN (Minibatch)", "VAL"), fontsize=self.font_size, loc="best") vis2d.xlabel("Iteration", fontsize=self.font_size) vis2d.ylabel("Normalized Error Rate", fontsize=self.font_size) vis2d.title("Normalized Error Rate vs Training Iteration", fontsize=self.font_size) figname = os.path.join(model_output_dir, "training_norm_error_rates.png") vis2d.savefig(figname, dpi=self.dpi) train_losses[train_losses > MAX_LOSS] = MAX_LOSS # CAP LOSSES. vis2d.clf() vis2d.plot(train_iters, train_losses, linewidth=self.line_width, color="b") vis2d.ylim(0, 2.0) vis2d.xlabel("Iteration", fontsize=self.font_size) vis2d.ylabel("Loss", fontsize=self.font_size) vis2d.title("Training Loss vs Iteration", fontsize=self.font_size) figname = os.path.join(model_output_dir, "training_losses.png") vis2d.savefig(figname, dpi=self.dpi) # Log. self.logger.info("TRAIN") self.logger.info("Original error: %.3f" % (train_errors[0])) self.logger.info("Final error: %.3f" % (train_result.error_rate)) self.logger.info("Orig loss: %.3f" % (train_losses[0])) self.logger.info("Final loss: %.3f" % (train_losses[-1])) self.logger.info("VAL") self.logger.info("Original error: %.3f" % (pct_pos_val)) self.logger.info("Final error: %.3f" % (val_result.error_rate)) self.logger.info("Normalized error: %.3f" % (norm_final_val_error)) return (train_errors[0], train_result.error_rate, train_losses[0], train_losses[-1], pct_pos_val, val_result.error_rate, norm_final_val_error) except Exception as e: self.logger.error("Failed to plot training curves!\n" + str(e)) ================================================ FILE: gqcnn/grasping/__init__.py ================================================ # -*- coding: utf-8 -*- """ Copyright ©2017. The Regents of the University of California (Regents). All Rights Reserved. Permission to use, copy, modify, and distribute this software and its documentation for educational, research, and not-for-profit purposes, without fee and without a signed licensing agreement, is hereby granted, provided that the above copyright notice, this paragraph and the following two paragraphs appear in all copies, modifications, and distributions. Contact The Office of Technology Licensing, UC Berkeley, 2150 Shattuck Avenue, Suite 510, Berkeley, CA 94720-1620, (510) 643-7201, otl@berkeley.edu, http://ipira.berkeley.edu/industry-info for commercial licensing opportunities. IN NO EVENT SHALL REGENTS BE LIABLE TO ANY PARTY FOR DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, INCLUDING LOST PROFITS, ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN IF REGENTS HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. REGENTS SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE AND ACCOMPANYING DOCUMENTATION, IF ANY, PROVIDED HEREUNDER IS PROVIDED "AS IS". REGENTS HAS NO OBLIGATION TO PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. """ from .grasp import Grasp2D, SuctionPoint2D, MultiSuctionPoint2D from .grasp_quality_function import (GraspQualityFunctionFactory, GQCnnQualityFunction) from .image_grasp_sampler import (ImageGraspSamplerFactory, AntipodalDepthImageGraspSampler) from .constraint_fn import GraspConstraintFnFactory from .policy import (RobustGraspingPolicy, CrossEntropyRobustGraspingPolicy, FullyConvolutionalGraspingPolicyParallelJaw, FullyConvolutionalGraspingPolicySuction, UniformRandomGraspingPolicy, RgbdImageState, GraspAction) from .actions import (NoAction, ParallelJawGrasp3D, SuctionGrasp3D, MultiSuctionGrasp3D) __all__ = [ "Grasp2D", "SuctionPoint2D", "MultiSuctionPoint2D", "GraspQualityFunctionFactory", "GQCnnQualityFunction", "ImageGraspSamplerFactory", "AntipodalDepthImageGraspSampler", "RobustGraspingPolicy", "CrossEntropyRobustGraspingPolicy", "FullyConvolutionalGraspingPolicyParallelJaw", "FullyConvolutionalGraspingPolicySuction", "UniformRandomGraspingPolicy", "RgbdImageState", "GraspAction", "GraspConstraintFnFactory", "NoAction", "ParallelJawGrasp3D", "SuctionGrasp3D", "MultiSuctionGrasp3D" ] ================================================ FILE: gqcnn/grasping/actions.py ================================================ # -*- coding: utf-8 -*- """ Copyright ©2017. The Regents of the University of California (Regents). All Rights Reserved. Permission to use, copy, modify, and distribute this software and its documentation for educational, research, and not-for-profit purposes, without fee and without a signed licensing agreement, is hereby granted, provided that the above copyright notice, this paragraph and the following two paragraphs appear in all copies, modifications, and distributions. Contact The Office of Technology Licensing, UC Berkeley, 2150 Shattuck Avenue, Suite 510, Berkeley, CA 94720-1620, (510) 643-7201, otl@berkeley.edu, http://ipira.berkeley.edu/industry-info for commercial licensing opportunities. IN NO EVENT SHALL REGENTS BE LIABLE TO ANY PARTY FOR DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, INCLUDING LOST PROFITS, ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN IF REGENTS HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. REGENTS SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE AND ACCOMPANYING DOCUMENTATION, IF ANY, PROVIDED HEREUNDER IS PROVIDED "AS IS". REGENTS HAS NO OBLIGATION TO PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. Action classes for representing 3D grasp actions. Author ------ Jeff Mahler """ from abc import ABC, abstractmethod import numpy as np from autolab_core import Point from .grasp import Grasp2D, SuctionPoint2D, MultiSuctionPoint2D class Action(object): """Base action class. Attributes ---------- q_value : float Grasp quality. id : int Integer identifier for the action. metadata : dict Key-value dict of extra data about the action. """ def __init__(self, q_value=0.0, id=-1, metadata={}): self._q_value = q_value self._id = id self._metadata = metadata @property def q_value(self): return self._q_value @property def id(self): return self._id @property def metadata(self): return self._metadata class NoAction(Action): """Proxy for taking no action when none can be found.""" pass class GraspAction3D(ABC, Action): """Base grasp class with grasp specified as an end-effector pose. Attributes ---------- T_grasp_world : :obj:`RigidTransform` Pose of the grasp w.r.t. world coordinate frame. """ def __init__(self, T_grasp_world, q_value=0.0, id=-1, metadata={}): self.T_grasp_world = T_grasp_world Action.__init__(self, q_value, id, metadata) @abstractmethod def project(self, camera_intr, T_camera_world): pass class ParallelJawGrasp3D(GraspAction3D): """Grasping with a parallel-jaw gripper. Attributes ---------- T_grasp_world : :obj:`RigidTransform` Pose of the grasp wrt world coordinate frame. """ def project(self, camera_intr, T_camera_world, gripper_width=0.05): # Compute pose of grasp in camera frame. T_grasp_camera = T_camera_world.inverse() * self.T_grasp_world y_axis_camera = T_grasp_camera.y_axis[:2] if np.linalg.norm(y_axis_camera) > 0: y_axis_camera = y_axis_camera / np.linalg.norm(y_axis_camera) # Compute grasp axis rotation in image space. rot_grasp_camera = np.arccos(y_axis_camera[0]) if y_axis_camera[1] < 0: rot_grasp_camera = -rot_grasp_camera while rot_grasp_camera < 0: rot_grasp_camera += 2 * np.pi while rot_grasp_camera > 2 * np.pi: rot_grasp_camera -= 2 * np.pi # Compute grasp center in image space. t_grasp_camera = T_grasp_camera.translation p_grasp_camera = Point(t_grasp_camera, frame=camera_intr.frame) u_grasp_camera = camera_intr.project(p_grasp_camera) d_grasp_camera = t_grasp_camera[2] return Grasp2D(u_grasp_camera, rot_grasp_camera, d_grasp_camera, width=gripper_width, camera_intr=camera_intr) class SuctionGrasp3D(GraspAction3D): """Grasping with a suction-based gripper. Attributes ---------- T_grasp_world : :obj:`RigidTransform` Pose of the grasp wrt world coordinate frame. """ def project(self, camera_intr, T_camera_world): # Compute pose of grasp in camera frame. T_grasp_camera = T_camera_world.inverse() * self.T_grasp_world x_axis_camera = T_grasp_camera.x_axis # Compute grasp center in image space. t_grasp_camera = T_grasp_camera.translation p_grasp_camera = Point(t_grasp_camera, frame=camera_intr.frame) u_grasp_camera = camera_intr.project(p_grasp_camera) d_grasp_camera = t_grasp_camera[2] return SuctionPoint2D(u_grasp_camera, x_axis_camera, d_grasp_camera, camera_intr=camera_intr) class MultiSuctionGrasp3D(GraspAction3D): """Grasping with a multi-cup suction-based gripper. Attributes ---------- T_grasp_world : :obj:`RigidTransform` Pose of the grasp wrt world coordinate frame. """ def project(self, camera_intr, T_camera_world): # Compute pose of grasp in camera frame. T_grasp_camera = T_camera_world.inverse() * self.T_grasp_world return MultiSuctionPoint2D(T_grasp_camera, camera_intr=camera_intr) ================================================ FILE: gqcnn/grasping/constraint_fn.py ================================================ # -*- coding: utf-8 -*- """ Copyright ©2017. The Regents of the University of California (Regents). All Rights Reserved. Permission to use, copy, modify, and distribute this software and its documentation for educational, research, and not-for-profit purposes, without fee and without a signed licensing agreement, is hereby granted, provided that the above copyright notice, this paragraph and the following two paragraphs appear in all copies, modifications, and distributions. Contact The Office of Technology Licensing, UC Berkeley, 2150 Shattuck Avenue, Suite 510, Berkeley, CA 94720-1620, (510) 643-7201, otl@berkeley.edu, http://ipira.berkeley.edu/industry-info for commercial licensing opportunities. IN NO EVENT SHALL REGENTS BE LIABLE TO ANY PARTY FOR DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, INCLUDING LOST PROFITS, ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN IF REGENTS HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. REGENTS SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE AND ACCOMPANYING DOCUMENTATION, IF ANY, PROVIDED HEREUNDER IS PROVIDED "AS IS". REGENTS HAS NO OBLIGATION TO PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. Constraint functions for grasp sampling. Author ------ Jeff Mahler """ from abc import ABC, abstractmethod import numpy as np class GraspConstraintFn(ABC): """Abstract constraint functions for grasp sampling.""" def __init__(self, config): # Set params. self._config = config def __call__(self, grasp): """Evaluates whether or not a grasp is valid. Parameters ---------- grasp : :obj:`Grasp2D` Grasp to evaluate. Returns ------- bool True if the grasp satisfies constraints, False otherwise. """ return self.satisfies_constraints(grasp) @abstractmethod def satisfies_constraints(self, grasp): """Evaluates whether or not a grasp is valid. Parameters ---------- grasp : :obj:`Grasp2D` Grasp to evaluate. Returns ------- bool True if the grasp satisfies constraints, False otherwise. """ pass class DiscreteApproachGraspConstraintFn(GraspConstraintFn): """Constrains the grasp approach direction into a discrete set of angles from the world z direction.""" def __init__(self, config): # Init superclass. GraspConstraintFn.__init__(self, config) self._max_approach_angle = self._config["max_approach_angle"] self._angular_tolerance = self._config["angular_tolerance"] self._angular_step = self._config["angular_step"] self._T_camera_world = self._config["camera_pose"] def satisfies_constraints(self, grasp): """Evaluates whether or not a grasp is valid by evaluating the angle between the approach axis and the world z direction. Parameters ---------- grasp : :obj:`Grasp2D` Grasp to evaluate. Returns ------- bool True if the grasp satisfies constraints, False otherwise. """ # Find grasp angle in world coordinates. axis_world = self._T_camera_world.rotation.dot(grasp.approach_axis) angle = np.arccos(-axis_world[2]) # Check closest available angle. available_angles = np.array([0.0]) if self._angular_step > 0: available_angles = np.arange(start=0.0, stop=self._max_approach_angle, step=self._angular_step) diff = np.abs(available_angles - angle) angle_index = np.argmin(diff) if diff[angle_index] < self._angular_tolerance: return True return False class GraspConstraintFnFactory(object): @staticmethod def constraint_fn(fn_type, config): if fn_type == "none": return None elif fn_type == "discrete_approach_angle": return DiscreteApproachGraspConstraintFn(config) else: raise ValueError( "Grasp constraint function type {} not supported!".format( fn_type)) ================================================ FILE: gqcnn/grasping/grasp.py ================================================ # -*- coding: utf-8 -*- """ Copyright ©2017. The Regents of the University of California (Regents). All Rights Reserved. Permission to use, copy, modify, and distribute this software and its documentation for educational, research, and not-for-profit purposes, without fee and without a signed licensing agreement, is hereby granted, provided that the above copyright notice, this paragraph and the following two paragraphs appear in all copies, modifications, and distributions. Contact The Office of Technology Licensing, UC Berkeley, 2150 Shattuck Avenue, Suite 510, Berkeley, CA 94720-1620, (510) 643-7201, otl@berkeley.edu, http://ipira.berkeley.edu/industry-info for commercial licensing opportunities. IN NO EVENT SHALL REGENTS BE LIABLE TO ANY PARTY FOR DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, INCLUDING LOST PROFITS, ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN IF REGENTS HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. REGENTS SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE AND ACCOMPANYING DOCUMENTATION, IF ANY, PROVIDED HEREUNDER IS PROVIDED "AS IS". REGENTS HAS NO OBLIGATION TO PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. Classes to encapsulate parallel-jaw grasps in image space. Author ------ Jeff Mahler """ import numpy as np from autolab_core import Point, RigidTransform, CameraIntrinsics class Grasp2D(object): """Parallel-jaw grasp in image space. Attributes ---------- center : :obj:`autolab_core.Point` Point in image space. angle : float Grasp axis angle with the camera x-axis. depth : float Depth of the grasp center in 3D space. width : float Distance between the jaws in meters. camera_intr : :obj:`autolab_core.CameraIntrinsics` Frame of reference for camera that the grasp corresponds to. contact_points : list of :obj:`numpy.ndarray` Pair of contact points in image space. contact_normals : list of :obj:`numpy.ndarray` Pair of contact normals in image space. """ def __init__(self, center, angle=0.0, depth=1.0, width=0.0, camera_intr=None, contact_points=None, contact_normals=None): self.center = center self.angle = angle self.depth = depth self.width = width # If `camera_intr` is none use default primesense camera intrinsics. if not camera_intr: self.camera_intr = CameraIntrinsics("primesense_overhead", fx=525, fy=525, cx=319.5, cy=239.5, width=640, height=480) else: self.camera_intr = camera_intr self.contact_points = contact_points self.contact_normals = contact_normals frame = "image" if camera_intr is not None: frame = camera_intr.frame if isinstance(center, np.ndarray): self.center = Point(center, frame=frame) @property def axis(self): """Returns the grasp axis.""" return np.array([np.cos(self.angle), np.sin(self.angle)]) @property def approach_axis(self): return np.array([0, 0, 1]) @property def approach_angle(self): """The angle between the grasp approach axis and camera optical axis. """ return 0.0 @property def frame(self): """The name of the frame of reference for the grasp.""" if self.camera_intr is None: raise ValueError("Must specify camera intrinsics") return self.camera_intr.frame @property def width_px(self): """Returns the width in pixels.""" if self.camera_intr is None: missing_camera_intr_msg = ("Must specify camera intrinsics to" " compute gripper width in 3D space.") raise ValueError(missing_camera_intr_msg) # Form the jaw locations in 3D space at the given depth. p1 = Point(np.array([0, 0, self.depth]), frame=self.frame) p2 = Point(np.array([self.width, 0, self.depth]), frame=self.frame) # Project into pixel space. u1 = self.camera_intr.project(p1) u2 = self.camera_intr.project(p2) return np.linalg.norm(u1.data - u2.data) @property def endpoints(self): """Returns the grasp endpoints.""" p1 = self.center.data - (self.width_px / 2) * self.axis p2 = self.center.data + (self.width_px / 2) * self.axis return p1, p2 @property def feature_vec(self): """Returns the feature vector for the grasp. `v = [p1, p2, depth]` where `p1` and `p2` are the jaw locations in image space. """ p1, p2 = self.endpoints return np.r_[p1, p2, self.depth] @staticmethod def from_feature_vec(v, width=0.0, camera_intr=None): """Creates a `Grasp2D` instance from a feature vector and additional parameters. Parameters ---------- v : :obj:`numpy.ndarray` Feature vector, see `Grasp2D.feature_vec`. width : float Grasp opening width, in meters. camera_intr : :obj:`autolab_core.CameraIntrinsics` Frame of reference for camera that the grasp corresponds to. """ # Read feature vec. p1 = v[:2] p2 = v[2:4] depth = v[4] # Compute center and angle. center_px = (p1 + p2) // 2 center = Point(center_px, camera_intr.frame) axis = p2 - p1 if np.linalg.norm(axis) > 0: axis = axis / np.linalg.norm(axis) if axis[1] > 0: angle = np.arccos(axis[0]) else: angle = -np.arccos(axis[0]) return Grasp2D(center, angle, depth, width=width, camera_intr=camera_intr) def pose(self, grasp_approach_dir=None): """Computes the 3D pose of the grasp relative to the camera. If an approach direction is not specified then the camera optical axis is used. Parameters ---------- grasp_approach_dir : :obj:`numpy.ndarray` Approach direction for the grasp in camera basis (e.g. opposite to table normal). Returns ------- :obj:`autolab_core.RigidTransform` The transformation from the grasp to the camera frame of reference. """ # Check intrinsics. if self.camera_intr is None: raise ValueError( "Must specify camera intrinsics to compute 3D grasp pose") # Compute 3D grasp center in camera basis. grasp_center_im = self.center.data center_px_im = Point(grasp_center_im, frame=self.camera_intr.frame) grasp_center_camera = self.camera_intr.deproject_pixel( self.depth, center_px_im) grasp_center_camera = grasp_center_camera.data # Compute 3D grasp axis in camera basis. grasp_axis_im = self.axis grasp_axis_im = grasp_axis_im / np.linalg.norm(grasp_axis_im) grasp_axis_camera = np.array([grasp_axis_im[0], grasp_axis_im[1], 0]) grasp_axis_camera = grasp_axis_camera / np.linalg.norm( grasp_axis_camera) # Convert to 3D pose. grasp_rot_camera, _, _ = np.linalg.svd(grasp_axis_camera.reshape(3, 1)) grasp_x_camera = grasp_approach_dir if grasp_approach_dir is None: grasp_x_camera = np.array([0, 0, 1]) # Align with camera Z axis. grasp_y_camera = grasp_axis_camera grasp_z_camera = np.cross(grasp_x_camera, grasp_y_camera) grasp_z_camera = grasp_z_camera / np.linalg.norm(grasp_z_camera) grasp_y_camera = np.cross(grasp_z_camera, grasp_x_camera) grasp_rot_camera = np.array( [grasp_x_camera, grasp_y_camera, grasp_z_camera]).T if np.linalg.det(grasp_rot_camera) < 0: # Fix reflections due to SVD. grasp_rot_camera[:, 0] = -grasp_rot_camera[:, 0] T_grasp_camera = RigidTransform(rotation=grasp_rot_camera, translation=grasp_center_camera, from_frame="grasp", to_frame=self.camera_intr.frame) return T_grasp_camera @staticmethod def image_dist(g1, g2, alpha=1.0): """Computes the distance between grasps in image space. Uses Euclidean distance with alpha weighting of angles Parameters ---------- g1 : :obj:`Grasp2D` First grasp. g2 : :obj:`Grasp2D` Second grasp. alpha : float Weight of angle distance (rad to meters). Returns ------- float Distance between grasps. """ # Point to point distances. point_dist = np.linalg.norm(g1.center.data - g2.center.data) # Axis distances. dot = max(min(np.abs(g1.axis.dot(g2.axis)), 1.0), -1.0) axis_dist = np.arccos(dot) return point_dist + alpha * axis_dist class SuctionPoint2D(object): """Suction grasp in image space. Attributes ---------- center : :obj:`autolab_core.Point` Point in image space. axis : :obj:`numpy.ndarray` Dormalized 3-vector representing the direction of the suction tip. depth : float Depth of the suction point in 3D space. camera_intr : :obj:`autolab_core.CameraIntrinsics` Frame of reference for camera that the suction point corresponds to. """ def __init__(self, center, axis=None, depth=1.0, camera_intr=None): if axis is None: axis = np.array([0, 0, 1]) self.center = center self.axis = axis frame = "image" if camera_intr is not None: frame = camera_intr.frame if isinstance(center, np.ndarray): self.center = Point(center, frame=frame) if isinstance(axis, list): self.axis = np.array(axis) if np.abs(np.linalg.norm(self.axis) - 1.0) > 1e-3: raise ValueError("Illegal axis. Must be norm 1.") self.depth = depth # If `camera_intr` is `None` use default primesense camera intrinsics. if not camera_intr: self.camera_intr = CameraIntrinsics("primesense_overhead", fx=525, fy=525, cx=319.5, cy=239.5, width=640, height=480) else: self.camera_intr = camera_intr @property def frame(self): """The name of the frame of reference for the grasp.""" if self.camera_intr is None: raise ValueError("Must specify camera intrinsics") return self.camera_intr.frame @property def angle(self): """The angle that the grasp pivot axis makes in image space.""" rotation_axis = np.cross(self.axis, np.array([0, 0, 1])) rotation_axis_image = np.array([rotation_axis[0], rotation_axis[1]]) angle = 0 if np.linalg.norm(rotation_axis) > 0: rotation_axis_image = rotation_axis_image / np.linalg.norm( rotation_axis_image) angle = np.arccos(rotation_axis_image[0]) if rotation_axis[1] < 0: angle = -angle return angle @property def approach_angle(self): """The angle between the grasp approach axis and camera optical axis. """ dot = max(min(self.axis.dot(np.array([0, 0, 1])), 1.0), -1.0) return np.arccos(dot) @property def approach_axis(self): return self.axis @property def feature_vec(self): """Returns the feature vector for the suction point. Note ---- `v = [center, axis, depth]` """ return self.center.data @staticmethod def from_feature_vec(v, camera_intr=None, depth=None, axis=None): """Creates a `SuctionPoint2D` instance from a feature vector and additional parameters. Parameters ---------- v : :obj:`numpy.ndarray` Feature vector, see `Grasp2D.feature_vec`. camera_intr : :obj:`autolab_core.CameraIntrinsics` Frame of reference for camera that the grasp corresponds to. depth : float Hard-set the depth for the suction grasp. axis : :obj:`numpy.ndarray` Normalized 3-vector specifying the approach direction. """ # Read feature vec. center_px = v[:2] grasp_axis = np.array([0, 0, -1]) if v.shape[0] > 2 and axis is None: grasp_axis = v[2:5] grasp_axis = grasp_axis / np.linalg.norm(grasp_axis) elif axis is not None: grasp_axis = axis grasp_depth = 0.5 if v.shape[0] > 5 and depth is None: grasp_depth = v[5] elif depth is not None: grasp_depth = depth # Compute center and angle. center = Point(center_px, camera_intr.frame) return SuctionPoint2D(center, grasp_axis, grasp_depth, camera_intr=camera_intr) def pose(self): """Computes the 3D pose of the grasp relative to the camera. Returns ------- :obj:`autolab_core.RigidTransform` The transformation from the grasp to the camera frame of reference. """ # Check intrinsics. if self.camera_intr is None: raise ValueError( "Must specify camera intrinsics to compute 3D grasp pose") # Compute 3D grasp center in camera basis. suction_center_im = self.center.data center_px_im = Point(suction_center_im, frame=self.camera_intr.frame) suction_center_camera = self.camera_intr.deproject_pixel( self.depth, center_px_im) suction_center_camera = suction_center_camera.data # Compute 3D grasp axis in camera basis. suction_axis_camera = self.axis # Convert to 3D pose. suction_x_camera = suction_axis_camera suction_z_camera = np.array( [-suction_x_camera[1], suction_x_camera[0], 0]) if np.linalg.norm(suction_z_camera) < 1e-12: suction_z_camera = np.array([1.0, 0.0, 0.0]) suction_z_camera = suction_z_camera / np.linalg.norm(suction_z_camera) suction_y_camera = np.cross(suction_z_camera, suction_x_camera) suction_rot_camera = np.c_[suction_x_camera, suction_y_camera, suction_z_camera] T_suction_camera = RigidTransform(rotation=suction_rot_camera, translation=suction_center_camera, from_frame="grasp", to_frame=self.camera_intr.frame) return T_suction_camera @staticmethod def image_dist(g1, g2, alpha=1.0): """Computes the distance between grasps in image space. Uses Euclidean distance with alpha weighting of angles. Parameters ---------- g1 : :obj:`SuctionPoint2D` First suction point. g2 : :obj:`SuctionPoint2D` Second suction point. alpha : float Weight of angle distance (rad to meters). Returns ------- float Distance between grasps. """ # Point to point distances. point_dist = np.linalg.norm(g1.center.data - g2.center.data) # Axis distances. dot = max(min(np.abs(g1.axis.dot(g2.axis)), 1.0), -1.0) axis_dist = np.arccos(dot) return point_dist + alpha * axis_dist class MultiSuctionPoint2D(object): """Multi-Cup Suction grasp in image space. Equivalent to projecting a 6D pose to image space. Attributes ---------- pose : :obj:`autolab_core.RigidTransform` Pose in 3D camera space. camera_intr : :obj:`autolab_core.CameraIntrinsics` Frame of reference for camera that the suction point corresponds to. """ def __init__(self, pose, camera_intr=None): self._pose = pose # TODO(vsatish): Confirm that this is really not needed. # frame = "image" # if camera_intr is not None: # frame = camera_intr.frame # If `camera_intr` is `None` use default primesense camera intrinsics. if not camera_intr: self.camera_intr = CameraIntrinsics("primesense_overhead", fx=525, fy=525, cx=319.5, cy=239.5, width=640, height=480) else: self.camera_intr = camera_intr def pose(self): return self._pose @property def frame(self): """The name of the frame of reference for the grasp.""" if self.camera_intr is None: raise ValueError("Must specify camera intrinsics") return self.camera_intr.frame @property def center(self): center_camera = Point(self._pose.translation, frame=self.camera_intr.frame) center_px = self.camera_intr.project(center_camera) return center_px @property def axis(self): return self._pose.x_axis @property def approach_axis(self): return self.axis @property def approach_angle(self): """The angle between the grasp approach axis and camera optical axis. """ dot = max(min(self.axis.dot(np.array([0, 0, 1])), 1.0), -1.0) return np.arccos(dot) @property def angle(self): g_axis = self._pose.y_axis g_axis_im = np.array([g_axis[0], g_axis[1], 0]) if np.linalg.norm(g_axis_im) == 0: return 0 theta = np.arctan2(g_axis[1], g_axis[0]) return theta @property def depth(self): return self._pose.translation[2] @property def orientation(self): x_axis = self.axis y_axis = np.array([x_axis[1], -x_axis[0], 0]) if np.linalg.norm(y_axis) == 0: y_axis = np.array([1, 0, 0]) y_axis = y_axis / np.linalg.norm(y_axis) z_axis = np.cross(x_axis, y_axis) R = np.array([x_axis, y_axis, z_axis]).T delta_R = R.T.dot(self._pose.rotation) orientation = np.arccos(delta_R[1, 1]) if delta_R[1, 2] > 0: orientation = 2 * np.pi - orientation return orientation @property def feature_vec(self): """Returns the feature vector for the suction point. Note ---- `v = [center, axis, depth]` """ return np.r_[self.center.data, np.cos(self.orientation), np.sin(self.orientation)] @staticmethod def from_feature_vec(v, camera_intr=None, angle=None, depth=None, axis=None): """Creates a `SuctionPoint2D` instance from a feature vector and additional parameters. Parameters ---------- v : :obj:`numpy.ndarray` Feature vector, see `Grasp2D.feature_vec`. camera_intr : :obj:`autolab_core.CameraIntrinsics` Frame of reference for camera that the grasp corresponds to. depth : float Hard-set the depth for the suction grasp. axis : :obj:`numpy.ndarray` Normalized 3-vector specifying the approach direction. """ # Read feature vec. center_px = v[:2] grasp_angle = 0 if v.shape[0] > 2 and angle is None: # grasp_angle = v[2] grasp_vec = v[2:] grasp_vec = grasp_vec / np.linalg.norm(grasp_vec) grasp_angle = np.arctan2(grasp_vec[1], grasp_vec[0]) elif angle is not None: grasp_angle = angle grasp_axis = np.array([1, 0, 0]) if axis is not None: grasp_axis = axis grasp_depth = 0.5 if depth is not None: grasp_depth = depth x_axis = grasp_axis y_axis = np.array([grasp_axis[1], -grasp_axis[0], 0]) if np.linalg.norm(y_axis) == 0: y_axis = np.array([1, 0, 0]) y_axis = y_axis / np.linalg.norm(y_axis) z_axis = np.cross(x_axis, y_axis) R = np.array([x_axis, y_axis, z_axis]).T R = R.dot(RigidTransform.x_axis_rotation(grasp_angle)) t = camera_intr.deproject_pixel( grasp_depth, Point(center_px, frame=camera_intr.frame)).data T = RigidTransform(rotation=R, translation=t, from_frame="grasp", to_frame=camera_intr.frame) # Compute center and angle. return MultiSuctionPoint2D(T, camera_intr=camera_intr) @staticmethod def image_dist(g1, g2, alpha=1.0): """Computes the distance between grasps in image space. Uses Euclidean distance with alpha weighting of angles. Parameters ---------- g1 : :obj:`SuctionPoint2D` First suction point. g2 : :obj:`SuctionPoint2D` Second suction point. alpha : float Weight of angle distance (rad to meters). Returns ------- float Distance between grasps. """ # Point to point distances. point_dist = np.linalg.norm(g1.center.data - g2.center.data) # Axis distances. dot = max(min(np.abs(g1.axis.dot(g2.axis)), 1.0), -1.0) axis_dist = np.arccos(dot) return point_dist + alpha * axis_dist ================================================ FILE: gqcnn/grasping/grasp_quality_function.py ================================================ # -*- coding: utf-8 -*- """ Copyright ©2017. The Regents of the University of California (Regents). All Rights Reserved. Permission to use, copy, modify, and distribute this software and its documentation for educational, research, and not-for-profit purposes, without fee and without a signed licensing agreement, is hereby granted, provided that the above copyright notice, this paragraph and the following two paragraphs appear in all copies, modifications, and distributions. Contact The Office of Technology Licensing, UC Berkeley, 2150 Shattuck Avenue, Suite 510, Berkeley, CA 94720-1620, (510) 643-7201, otl@berkeley.edu, http://ipira.berkeley.edu/industry-info for commercial licensing opportunities. IN NO EVENT SHALL REGENTS BE LIABLE TO ANY PARTY FOR DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, INCLUDING LOST PROFITS, ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN IF REGENTS HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. REGENTS SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE AND ACCOMPANYING DOCUMENTATION, IF ANY, PROVIDED HEREUNDER IS PROVIDED "AS IS". REGENTS HAS NO OBLIGATION TO PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. Grasp quality functions: suction quality function and parallel jaw grasping quality fuction. Authors ------- Jason Liu & Jeff Mahler """ from abc import ABC, abstractmethod from time import time import cv2 import matplotlib.pyplot as plt import numpy as np import autolab_core.utils as utils from autolab_core import Point, PointCloud, RigidTransform, Logger, DepthImage from ..model import get_gqcnn_model, get_fc_gqcnn_model from .grasp import SuctionPoint2D from ..utils import GeneralConstants, GripperMode class GraspQualityFunction(ABC): """Abstract grasp quality class.""" def __init__(self): # Set up logger. self._logger = Logger.get_logger(self.__class__.__name__) def __call__(self, state, actions, params=None): """Evaluates grasp quality for a set of actions given a state.""" return self.quality(state, actions, params) @abstractmethod def quality(self, state, actions, params=None): """Evaluates grasp quality for a set of actions given a state. Parameters ---------- state : :obj:`object` State of the world e.g. image. actions : :obj:`list` List of actions to evaluate e.g. parallel-jaw or suction grasps. params : :obj:`dict` Optional parameters for the evaluation. Returns ------- :obj:`numpy.ndarray` Vector containing the real-valued grasp quality for each candidate. """ pass class ZeroGraspQualityFunction(object): """Null function.""" def quality(self, state, actions, params=None): """Returns zero for all grasps. Parameters ---------- state : :obj:`object` State of the world e.g. image. actions : :obj:`list` List of actions to evaluate e.g. parallel-jaw or suction grasps. params : :obj:`dict` Optional parameters for the evaluation. Returns ------- :obj:`numpy.ndarray` Vector containing the real-valued grasp quality for each candidate. """ return 0.0 class ParallelJawQualityFunction(GraspQualityFunction): """Abstract wrapper class for parallel jaw quality functions (only image based metrics for now).""" def __init__(self, config): GraspQualityFunction.__init__(self) # Read parameters. self._friction_coef = config["friction_coef"] self._max_friction_cone_angle = np.arctan(self._friction_coef) def friction_cone_angle(self, action): """Compute the angle between the axis and the boundaries of the friction cone.""" if action.contact_points is None or action.contact_normals is None: invalid_friction_ang_msg = ("Cannot compute friction cone angle" " without precomputed contact points" " and normals.") raise ValueError(invalid_friction_ang_msg) dot_prod1 = min(max(action.contact_normals[0].dot(-action.axis), -1.0), 1.0) angle1 = np.arccos(dot_prod1) dot_prod2 = min(max(action.contact_normals[1].dot(action.axis), -1.0), 1.0) angle2 = np.arccos(dot_prod2) return max(angle1, angle2) def force_closure(self, action): """Determine if the grasp is in force closure.""" return (self.friction_cone_angle(action) < self._max_friction_cone_angle) class ComForceClosureParallelJawQualityFunction(ParallelJawQualityFunction): """Measures the distance to the estimated center of mass for antipodal parallel-jaw grasps.""" def __init__(self, config): """Create a best-fit planarity suction metric.""" self._antipodality_pctile = config["antipodality_pctile"] ParallelJawQualityFunction.__init__(self, config) def quality(self, state, actions, params=None): """Given a parallel-jaw grasp, compute the distance to the center of mass of the grasped object. Parameters ---------- state : :obj:`RgbdImageState` An RgbdImageState instance that encapsulates rgbd_im, camera_intr, segmask, full_observed. action: :obj:`Grasp2D` A suction grasp in image space that encapsulates center, approach direction, depth, camera_intr. params: dict Stores params used in computing quality. Returns ------- :obj:`numpy.ndarray` Array of the quality for each grasp. """ # Compute antipodality. antipodality_q = [ ParallelJawQualityFunction.friction_cone_angle(self, action) for action in actions ] # Compute object centroid. object_com = state.rgbd_im.center if state.segmask is not None: nonzero_px = state.segmask.nonzero_pixels() object_com = np.mean(nonzero_px, axis=0) # Compute negative SSE from the best fit plane for each grasp. antipodality_thresh = abs( np.percentile(antipodality_q, 100 - self._antipodality_pctile)) qualities = [] max_q = max(state.rgbd_im.height, state.rgbd_im.width) for i, action in enumerate(actions): q = max_q friction_cone_angle = antipodality_q[i] force_closure = ParallelJawQualityFunction.force_closure( self, action) if force_closure or friction_cone_angle < antipodality_thresh: grasp_center = np.array([action.center.y, action.center.x]) if state.obj_segmask is not None: grasp_obj_id = state.obj_segmask[grasp_center[0], grasp_center[1]] obj_mask = state.obj_segmask.segment_mask(grasp_obj_id) nonzero_px = obj_mask.nonzero_pixels() object_com = np.mean(nonzero_px, axis=0) q = np.linalg.norm(grasp_center - object_com) if state.obj_segmask is not None and grasp_obj_id == 0: q = max_q q = (np.exp(-q / max_q) - np.exp(-1)) / (1 - np.exp(-1)) qualities.append(q) return np.array(qualities) class SuctionQualityFunction(GraspQualityFunction): """Abstract wrapper class for suction quality functions (only image based metrics for now).""" def __init__(self, config): GraspQualityFunction.__init(self) # Read parameters. self._window_size = config["window_size"] self._sample_rate = config["sample_rate"] def _points_in_window(self, point_cloud_image, action, segmask=None): """Retrieve all points on the object in a box of size `self._window_size`.""" # Read indices. im_shape = point_cloud_image.shape i_start = int(max(action.center.y - self._window_size // 2, 0)) # TODO: Confirm div. j_start = int(max(action.center.x - self._window_size // 2, 0)) i_end = int(min(i_start + self._window_size, im_shape[0])) j_end = int(min(j_start + self._window_size, im_shape[1])) step = int(1 / self._sample_rate) # Read 3D points in the window. points = point_cloud_image[i_start:i_end:step, j_start:j_end:step] stacked_points = points.reshape(points.shape[0] * points.shape[1], -1) # Form the matrices for plane-fitting. return stacked_points def _points_to_matrices(self, points): """Convert a set of 3D points to an A and b matrix for regression.""" A = points[:, [0, 1]] ones = np.ones((A.shape[0], 1)) A = np.hstack((A, ones)) b = points[:, 2] return A, b def _best_fit_plane(self, A, b): """Find a best-fit plane of points.""" try: w, _, _, _ = np.linalg.lstsq(A, b) except np.linalg.LinAlgError: self._logger.warning("Could not find a best-fit plane!") raise return w def _sum_of_squared_residuals(self, w, A, z): """Returns the sum of squared residuals from the plane.""" return (1.0 / A.shape[0]) * np.square(np.linalg.norm(np.dot(A, w) - z)) class BestFitPlanaritySuctionQualityFunction(SuctionQualityFunction): """A best-fit planarity suction metric.""" def __init__(self, config): SuctionQualityFunction.__init__(self, config) def quality(self, state, actions, params=None): """Given a suction point, compute a score based on a best-fit 3D plane of the neighboring points. Parameters ---------- state : :obj:`RgbdImageState` An RgbdImageState instance that encapsulates rgbd_im, camera_intr, segmask, full_observed. action: :obj:`SuctionPoint2D` A suction grasp in image space that encapsulates center, approach direction, depth, camera_intr. params: dict Stores params used in computing suction quality. Returns ------- :obj:`numpy.ndarray` Array of the quality for each grasp. """ qualities = [] # Deproject points. point_cloud_image = state.camera_intr.deproject_to_image( state.rgbd_im.depth) # Compute negative SSE from the best fit plane for each grasp. for i, action in enumerate(actions): if not isinstance(action, SuctionPoint2D): not_suction_msg = ("This function can only be used to evaluate" " suction quality.") raise ValueError(not_suction_msg) # x,y in matrix A and z is vector z. points = self._points_in_window(point_cloud_image, action, segmask=state.segmask) A, b = self._points_to_matrices(points) # vector w w/ a bias term represents a best-fit plane. w = self._best_fit_plane(A, b) if params is not None and params["vis"]["plane"]: from visualization import Visualizer2D as vis2d from visualization import Visualizer3D as vis3d mid_i = A.shape[0] // 2 pred_z = A.dot(w) p0 = np.array([A[mid_i, 0], A[mid_i, 1], pred_z[mid_i]]) n = np.array([w[0], w[1], -1]) n = n / np.linalg.norm(n) tx = np.array([n[1], -n[0], 0]) tx = tx / np.linalg.norm(tx) ty = np.cross(n, tx) R = np.array([tx, ty, n]).T T_table_world = RigidTransform(rotation=R, translation=p0, from_frame="patch", to_frame="world") vis3d.figure() vis3d.points(point_cloud_image.to_point_cloud(), scale=0.0025, subsample=10, random=True, color=(0, 0, 1)) vis3d.points(PointCloud(points.T), scale=0.0025, color=(1, 0, 0)) vis3d.table(T_table_world, dim=0.01) vis3d.show() vis2d.figure() vis2d.imshow(state.rgbd_im.depth) vis2d.scatter(action.center.x, action.center.y, s=50, c="b") vis2d.show() # Evaluate how well best-fit plane describles all points in window. quality = np.exp(-self._sum_of_squared_residuals(w, A, b)) qualities.append(quality) return np.array(qualities) class ApproachPlanaritySuctionQualityFunction(SuctionQualityFunction): """A approach planarity suction metric.""" def __init__(self, config): """Create approach planarity suction metric.""" SuctionQualityFunction.__init__(self, config) def _action_to_plane(self, point_cloud_image, action): """Convert a plane from point-normal form to general form.""" x = int(action.center.x) y = int(action.center.y) p_0 = point_cloud_image[y, x] n = -action.axis # TODO: Confirm divs. w = np.array([-n[0] / n[2], -n[1] / n[2], np.dot(n, p_0) / n[2]]) return w def quality(self, state, actions, params=None): """Given a suction point, compute a score based on a best-fit 3D plane of the neighboring points. Parameters ---------- state : :obj:`RgbdImageState` An RgbdImageState instance that encapsulates rgbd_im, camera_intr, segmask, full_observed. action: :obj:`SuctionPoint2D` A suction grasp in image space that encapsulates center, approach direction, depth, camera_intr. params: dict Stores params used in computing suction quality. Returns ------- :obj:`numpy.ndarray` Array of the quality for each grasp. """ qualities = [] # Deproject points. point_cloud_image = state.camera_intr.deproject_to_image( state.rgbd_im.depth) # Compute negative SSE from the best fit plane for each grasp. for i, action in enumerate(actions): if not isinstance(action, SuctionPoint2D): not_suction_msg = ("This function can only be used to evaluate" " suction quality.") raise ValueError(not_suction_msg) # x,y in matrix A and z is vector z. points = self._points_in_window(point_cloud_image, action, segmask=state.segmask) A, b = self._points_to_matrices(points) # vector w w/ a bias term represents a best-fit plane. w = self._action_to_plane(point_cloud_image, action) if params is not None and params["vis"]["plane"]: from visualization import Visualizer2D as vis2d from visualization import Visualizer3D as vis3d mid_i = A.shape[0] // 2 pred_z = A.dot(w) p0 = np.array([A[mid_i, 0], A[mid_i, 1], pred_z[mid_i]]) n = np.array([w[0], w[1], -1]) n = n / np.linalg.norm(n) tx = np.array([n[1], -n[0], 0]) tx = tx / np.linalg.norm(tx) ty = np.cross(n, tx) R = np.array([tx, ty, n]).T c = state.camera_intr.deproject_pixel(action.depth, action.center) d = Point(c.data - 0.01 * action.axis, frame=c.frame) T_table_world = RigidTransform(rotation=R, translation=p0, from_frame="patch", to_frame="world") vis3d.figure() vis3d.points(point_cloud_image.to_point_cloud(), scale=0.0025, subsample=10, random=True, color=(0, 0, 1)) vis3d.points(PointCloud(points.T), scale=0.0025, color=(1, 0, 0)) vis3d.points(c, scale=0.005, color=(1, 1, 0)) vis3d.points(d, scale=0.005, color=(1, 1, 0)) vis3d.table(T_table_world, dim=0.01) vis3d.show() vis2d.figure() vis2d.imshow(state.rgbd_im.depth) vis2d.scatter(action.center.x, action.center.y, s=50, c="b") vis2d.show() # Evaluate how well best-fit plane describles all points in window. quality = np.exp(-self._sum_of_squared_residuals(w, A, b)) qualities.append(quality) return np.array(qualities) class DiscApproachPlanaritySuctionQualityFunction(SuctionQualityFunction): """A approach planarity suction metric using a disc-shaped window.""" def __init__(self, config): """Create approach planarity suction metric.""" self._radius = config["radius"] SuctionQualityFunction.__init__(self, config) def _action_to_plane(self, point_cloud_image, action): """Convert a plane from point-normal form to general form.""" x = int(action.center.x) y = int(action.center.y) p_0 = point_cloud_image[y, x] n = -action.axis # TODO: Confirm divs. w = np.array([-n[0] / n[2], -n[1] / n[2], np.dot(n, p_0) / n[2]]) return w def _points_in_window(self, point_cloud_image, action, segmask=None): """Retrieve all points on the object in a disc of size `self._window_size`.""" # Compute plane. n = -action.axis U, _, _ = np.linalg.svd(n.reshape((3, 1))) tangents = U[:, 1:] # Read indices. im_shape = point_cloud_image.shape i_start = int(max(action.center.y - self._window_size // 2, 0)) j_start = int(max(action.center.x - self._window_size // 2, 0)) i_end = int(min(i_start + self._window_size, im_shape[0])) j_end = int(min(j_start + self._window_size, im_shape[1])) step = int(1 / self._sample_rate) # Read 3D points in the window. points = point_cloud_image[i_start:i_end:step, j_start:j_end:step] stacked_points = points.reshape(points.shape[0] * points.shape[1], -1) # Compute the center point. contact_point = point_cloud_image[int(action.center.y), int(action.center.x)] # Project onto approach plane. residuals = stacked_points - contact_point coords = residuals.dot(tangents) proj_residuals = coords.dot(tangents.T) # Check distance from the center point along the approach plane. dists = np.linalg.norm(proj_residuals, axis=1) stacked_points = stacked_points[dists <= self._radius] # Form the matrices for plane-fitting. return stacked_points def quality(self, state, actions, params=None): """Given a suction point, compute a score based on a best-fit 3D plane of the neighboring points. Parameters ---------- state : :obj:`RgbdImageState` An RgbdImageState instance that encapsulates rgbd_im, camera_intr, segmask, full_observed. action: :obj:`SuctionPoint2D` A suction grasp in image space that encapsulates center, approach direction, depth, camera_intr. params: dict Stores params used in computing suction quality. Returns ------- :obj:`numpy.ndarray` Array of the quality for each grasp. """ qualities = [] # Deproject points. point_cloud_image = state.camera_intr.deproject_to_image( state.rgbd_im.depth) # Compute negative SSE from the best fit plane for each grasp. for i, action in enumerate(actions): if not isinstance(action, SuctionPoint2D): not_suction_msg = ("This function can only be used to evaluate" " suction quality.") raise ValueError(not_suction_msg) # x,y in matrix A and z is vector z. points = self._points_in_window(point_cloud_image, action, segmask=state.segmask) A, b = self._points_to_matrices(points) # vector w w/ a bias term represents a best-fit plane. w = self._action_to_plane(point_cloud_image, action) sse = self._sum_of_squared_residuals(w, A, b) if params is not None and params["vis"]["plane"]: from visualization import Visualizer2D as vis2d from visualization import Visualizer3D as vis3d mid_i = A.shape[0] // 2 pred_z = A.dot(w) p0 = np.array([A[mid_i, 0], A[mid_i, 1], pred_z[mid_i]]) n = np.array([w[0], w[1], -1]) n = n / np.linalg.norm(n) tx = np.array([n[1], -n[0], 0]) tx = tx / np.linalg.norm(tx) ty = np.cross(n, tx) R = np.array([tx, ty, n]).T c = state.camera_intr.deproject_pixel(action.depth, action.center) d = Point(c.data - 0.01 * action.axis, frame=c.frame) T_table_world = RigidTransform(rotation=R, translation=p0, from_frame="patch", to_frame="world") vis3d.figure() vis3d.points(point_cloud_image.to_point_cloud(), scale=0.0025, subsample=10, random=True, color=(0, 0, 1)) vis3d.points(PointCloud(points.T), scale=0.0025, color=(1, 0, 0)) vis3d.points(c, scale=0.005, color=(1, 1, 0)) vis3d.points(d, scale=0.005, color=(1, 1, 0)) vis3d.table(T_table_world, dim=0.01) vis3d.show() vis2d.figure() vis2d.imshow(state.rgbd_im.depth) vis2d.scatter(action.center.x, action.center.y, s=50, c="b") vis2d.show() # Evaluate how well best-fit plane describles all points in window. quality = np.exp(-sse) qualities.append(quality) return np.array(qualities) class ComApproachPlanaritySuctionQualityFunction( ApproachPlanaritySuctionQualityFunction): """A approach planarity suction metric that ranks sufficiently planar points by their distance to the object COM.""" def __init__(self, config): """Create approach planarity suction metric.""" self._planarity_thresh = config["planarity_thresh"] ApproachPlanaritySuctionQualityFunction.__init__(self, config) def quality(self, state, actions, params=None): """Given a suction point, compute a score based on a best-fit 3D plane of the neighboring points. Parameters ---------- state : :obj:`RgbdImageState` An RgbdImageState instance that encapsulates rgbd_im, camera_intr, segmask, full_observed. action: :obj:`SuctionPoint2D` A suction grasp in image space that encapsulates center, approach direction, depth, camera_intr. params: dict Stores params used in computing suction quality. Returns ------- :obj:`numpy.ndarray` Array of the quality for each grasp. """ # Compute planarity. sse = ApproachPlanaritySuctionQualityFunction.quality(self, state, actions, params=params) if params["vis"]["hist"]: plt.figure() utils.histogram(sse, 100, (np.min(sse), np.max(sse)), normalized=False, plot=True) plt.show() # Compute object centroid. object_com = state.rgbd_im.center if state.segmask is not None: nonzero_px = state.segmask.nonzero_pixels() object_com = np.mean(nonzero_px, axis=0) # Threshold. qualities = [] for k, action in enumerate(actions): q = max(state.rgbd_im.height, state.rgbd_im.width) if np.abs(sse[k]) < self._planarity_thresh: grasp_center = np.array([action.center.y, action.center.x]) q = np.linalg.norm(grasp_center - object_com) qualities.append(np.exp(-q)) return np.array(qualities) class ComDiscApproachPlanaritySuctionQualityFunction( DiscApproachPlanaritySuctionQualityFunction): """A approach planarity suction metric that ranks sufficiently planar points by their distance to the object COM.""" # NOTE: THERE WAS A SLIGHTLY DIFFERENT DUPLICATE ABOVE. def __init__(self, config): """Create approach planarity suction metric.""" self._planarity_pctile = config["planarity_pctile"] self._planarity_abs_thresh = 0 if "planarity_abs_thresh" in config: self._planarity_abs_thresh = np.exp( -config["planarity_abs_thresh"]) DiscApproachPlanaritySuctionQualityFunction.__init__(self, config) def quality(self, state, actions, params=None): """Given a suction point, compute a score based on a best-fit 3D plane of the neighboring points. Parameters ---------- state : :obj:`RgbdImageState` An RgbdImageState instance that encapsulates rgbd_im, camera_intr, segmask, full_observed. action: :obj:`SuctionPoint2D` A suction grasp in image space that encapsulates center, approach direction, depth, camera_intr. params: dict Stores params used in computing suction quality. Returns ------- :obj:`numpy.ndarray` Array of the quality for each grasp. """ # Compute planarity. sse_q = DiscApproachPlanaritySuctionQualityFunction.quality( self, state, actions, params=params) if params["vis"]["hist"]: plt.figure() utils.histogram(sse_q, 100, (np.min(sse_q), np.max(sse_q)), normalized=False, plot=True) plt.show() # Compute object centroid. object_com = state.rgbd_im.center if state.segmask is not None: nonzero_px = state.segmask.nonzero_pixels() object_com = np.mean(nonzero_px, axis=0) # Threshold. planarity_thresh = abs( np.percentile(sse_q, 100 - self._planarity_pctile)) qualities = [] max_q = max(state.rgbd_im.height, state.rgbd_im.width) for k, action in enumerate(actions): q = max_q if sse_q[k] > planarity_thresh or sse_q[ k] > self._planarity_abs_thresh: grasp_center = np.array([action.center.y, action.center.x]) if state.obj_segmask is not None: grasp_obj_id = state.obj_segmask[grasp_center[0], grasp_center[1]] obj_mask = state.obj_segmask.segment_mask(grasp_obj_id) nonzero_px = obj_mask.nonzero_pixels() object_com = np.mean(nonzero_px, axis=0) q = np.linalg.norm(grasp_center - object_com) q = (np.exp(-q / max_q) - np.exp(-1)) / (1 - np.exp(-1)) qualities.append(q) return np.array(qualities) class GaussianCurvatureSuctionQualityFunction(SuctionQualityFunction): """A approach planarity suction metric.""" def __init__(self, config): """Create approach planarity suction metric.""" SuctionQualityFunction.__init__(self, config) def _points_to_matrices(self, points): """Convert a set of 3D points to an A and b matrix for regression.""" x = points[:, 0] y = points[:, 1] A = np.c_[x, y, x * x, x * y, y * y] ones = np.ones([A.shape[0], 1]) A = np.c_[A, ones] b = points[:, 2] return A, b def quality(self, state, actions, params=None): """Given a suction point, compute a score based on a best-fit 3D plane of the neighboring points. Parameters ---------- state : :obj:`RgbdImageState` An RgbdImageState instance that encapsulates rgbd_im, camera_intr, segmask, full_observed. action: :obj:`SuctionPoint2D` A suction grasp in image space that encapsulates center, approach direction, depth, camera_intr. params: dict Stores params used in computing suction quality. Returns ------- :obj:`numpy.ndarray` Array of the quality for each grasp. """ qualities = [] # Deproject points. point_cloud_image = state.camera_intr.deproject_to_image( state.rgbd_im.depth) # Compute negative SSE from the best fit plane for each grasp. for i, action in enumerate(actions): if not isinstance(action, SuctionPoint2D): not_suction_msg = ("This function can only be used to evaluate" " suction quality.") raise ValueError(not_suction_msg) # x,y in matrix A and z is vector z. points = self._points_in_window(point_cloud_image, action, segmask=state.segmask) A, b = self._points_to_matrices(points) # vector w w/ a bias term represents a best-fit plane. w = self._best_fit_plane(A, b) # Compute curvature. fx = w[0] fy = w[1] fxx = 2 * w[2] fxy = w[3] fyy = 2 * w[4] curvature = (fxx * fyy - fxy**2) / ((1 + fx**2 + fy**2)**2) # Store quality. quality = np.exp(-np.abs(curvature)) qualities.append(quality) return np.array(qualities) class DiscCurvatureSuctionQualityFunction( GaussianCurvatureSuctionQualityFunction): def __init__(self, config): """Create approach planarity suction metric.""" self._radius = config["radius"] SuctionQualityFunction.__init__(self, config) def _points_in_window(self, point_cloud_image, action, segmask=None): """Retrieve all points on the object in a disc of size `self._window_size`.""" # Read indices. im_shape = point_cloud_image.shape i_start = int(max(action.center.y - self._window_size // 2, 0)) j_start = int(max(action.center.x - self._window_size // 2, 0)) i_end = int(min(i_start + self._window_size, im_shape[0])) j_end = int(min(j_start + self._window_size, im_shape[1])) step = int(1 / self._sample_rate) # Read 3D points in the window. points = point_cloud_image[i_start:i_end:step, j_start:j_end:step] stacked_points = points.reshape(points.shape[0] * points.shape[1], -1) # Check the distance from the center point. contact_point = point_cloud_image[int(action.center.y), int(action.center.x)] dists = np.linalg.norm(stacked_points - contact_point, axis=1) stacked_points = stacked_points[dists <= self._radius] # Form the matrices for plane-fitting. return stacked_points class ComDiscCurvatureSuctionQualityFunction( DiscCurvatureSuctionQualityFunction): def __init__(self, config): """Create approach planarity suction metric.""" self._curvature_pctile = config["curvature_pctile"] DiscCurvatureSuctionQualityFunction.__init__(self, config) def quality(self, state, actions, params=None): """Given a suction point, compute a score based on the Gaussian curvature. Parameters ---------- state : :obj:`RgbdImageState` An RgbdImageState instance that encapsulates rgbd_im, camera_intr, segmask, full_observed. action: :obj:`SuctionPoint2D` A suction grasp in image space that encapsulates center, approach direction, depth, camera_intr. params: dict Stores params used in computing suction quality. Returns ------- :obj:`numpy.ndarray` Array of the quality for each grasp. """ # Compute planarity. curvature_q = DiscCurvatureSuctionQualityFunction.quality( self, state, actions, params=params) if params["vis"]["hist"]: plt.figure() # NOTE: This used to be an undefined `curvature`. utils.histogram(curvature_q, 100, (np.min(curvature_q), np.max(curvature_q)), normalized=False, plot=True) plt.show() # Compute object centroid. object_com = state.rgbd_im.center if state.segmask is not None: nonzero_px = state.segmask.nonzero_pixels() object_com = np.mean(nonzero_px, axis=0) # Threshold. curvature_q_thresh = abs( np.percentile(curvature_q, 100 - self._curvature_pctile)) qualities = [] max_q = max(state.rgbd_im.height, state.rgbd_im.width) for k, action in enumerate(actions): q = max_q if curvature_q[k] > curvature_q_thresh: grasp_center = np.array([action.center.y, action.center.x]) q = np.linalg.norm(grasp_center - object_com) q = (np.exp(-q / max_q) - np.exp(-1)) / (1 - np.exp(-1)) qualities.append(q) return np.array(qualities) class GQCnnQualityFunction(GraspQualityFunction): def __init__(self, config): """Create a GQCNN suction quality function.""" GraspQualityFunction.__init__(self) # Store parameters. self._config = config self._gqcnn_model_dir = config["gqcnn_model"] self._crop_height = config["crop_height"] self._crop_width = config["crop_width"] # Init GQ-CNN self._gqcnn = get_gqcnn_model().load(self._gqcnn_model_dir) # Open Tensorflow session for gqcnn. self._gqcnn.open_session() def __del__(self): try: self._gqcnn.close_session() except Exception: # TODO(vsatish): Except specific exception. pass @property def gqcnn(self): """Returns the GQ-CNN.""" return self._gqcnn @property def gqcnn_recep_height(self): return self._gqcnn.im_height @property def gqcnn_recep_width(self): return self._gqcnn.im_width @property def gqcnn_stride(self): return self._gqcnn.stride @property def config(self): """Returns the GQCNN quality function parameters.""" return self._config def grasps_to_tensors(self, grasps, state): """Converts a list of grasps to an image and pose tensor for fast grasp quality evaluation. Attributes ---------- grasps : :obj:`list` of :obj:`object` List of image grasps to convert. state : :obj:`RgbdImageState` RGB-D image to plan grasps on. Returns ------- image_arr : :obj:`numpy.ndarray` 4D numpy tensor of image to be predicted. pose_arr : :obj:`numpy.ndarray` 2D numpy tensor of depth values. """ # Parse params. gqcnn_im_height = self.gqcnn.im_height gqcnn_im_width = self.gqcnn.im_width gqcnn_num_channels = self.gqcnn.num_channels gqcnn_pose_dim = self.gqcnn.pose_dim gripper_mode = self.gqcnn.gripper_mode num_grasps = len(grasps) depth_im = state.rgbd_im.depth # Allocate tensors. tensor_start = time() image_tensor = np.zeros( [num_grasps, gqcnn_im_height, gqcnn_im_width, gqcnn_num_channels]) pose_tensor = np.zeros([num_grasps, gqcnn_pose_dim]) scale = gqcnn_im_height / self._crop_height depth_im_scaled = depth_im.resize(scale) for i, grasp in enumerate(grasps): translation = scale * np.array([ depth_im.center[0] - grasp.center.data[1], depth_im.center[1] - grasp.center.data[0] ]) im_tf = depth_im_scaled im_tf = depth_im_scaled.transform(translation, grasp.angle) im_tf = im_tf.crop(gqcnn_im_height, gqcnn_im_width) image_tensor[i, ...] = im_tf.raw_data if gripper_mode == GripperMode.PARALLEL_JAW: pose_tensor[i] = grasp.depth elif gripper_mode == GripperMode.SUCTION: pose_tensor[i, ...] = np.array( [grasp.depth, grasp.approach_angle]) elif gripper_mode == GripperMode.MULTI_SUCTION: pose_tensor[i] = grasp.depth elif gripper_mode == GripperMode.LEGACY_PARALLEL_JAW: pose_tensor[i] = grasp.depth elif gripper_mode == GripperMode.LEGACY_SUCTION: pose_tensor[i, ...] = np.array( [grasp.depth, grasp.approach_angle]) else: raise ValueError("Gripper mode %s not supported" % (gripper_mode)) self._logger.debug("Tensor conversion took %.3f sec" % (time() - tensor_start)) return image_tensor, pose_tensor def quality(self, state, actions, params): """Evaluate the quality of a set of actions according to a GQ-CNN. Parameters ---------- state : :obj:`RgbdImageState` State of the world described by an RGB-D image. actions: :obj:`object` Set of grasping actions to evaluate. params: dict Optional parameters for quality evaluation. Returns ------- :obj:`list` of float Real-valued grasp quality predictions for each action, between 0 and 1. """ # Form tensors. tensor_start = time() image_tensor, pose_tensor = self.grasps_to_tensors(actions, state) self._logger.info("Image transformation took %.3f sec" % (time() - tensor_start)) if params is not None and params["vis"]["tf_images"]: # Read vis params. k = params["vis"]["k"] d = utils.sqrt_ceil(k) # Display grasp transformed images. from visualization import Visualizer2D as vis2d vis2d.figure(size=(GeneralConstants.FIGSIZE, GeneralConstants.FIGSIZE)) for i, image_tf in enumerate(image_tensor[:k, ...]): depth = pose_tensor[i][0] vis2d.subplot(d, d, i + 1) vis2d.imshow(DepthImage(image_tf)) vis2d.title("Image %d: d=%.3f" % (i, depth)) vis2d.show() # Predict grasps. predict_start = time() output_arr = self.gqcnn.predict(image_tensor, pose_tensor) q_values = output_arr[:, -1] self._logger.info("Inference took %.3f sec" % (time() - predict_start)) return q_values.tolist() class NoMagicQualityFunction(GraspQualityFunction): def __init__(self, config): """Create a quality that uses `nomagic_net` as a quality function.""" from nomagic_submission import ConvNetModel from tensorpack import SaverRestore from tensorpack.predict import OfflinePredictor from tensorpack.predict.config import PredictConfig GraspQualityFunction.__init(self) # Store parameters. self._model_path = config["gqcnn_model"] self._batch_size = config["batch_size"] self._crop_height = config["crop_height"] self._crop_width = config["crop_width"] self._im_height = config["im_height"] self._im_width = config["im_width"] self._num_channels = config["num_channels"] self._pose_dim = config["pose_dim"] self._gripper_mode = config["gripper_mode"] self._data_mean = config["data_mean"] self._data_std = config["data_std"] # Init config. model = ConvNetModel() self._config = PredictConfig(model=model, session_init=SaverRestore( self._model_path), output_names=["prob"]) self._predictor = OfflinePredictor(self._config) @property def gqcnn(self): """Returns the GQ-CNN.""" return self._predictor @property def config(self): """Returns the GQCNN suction quality function parameters.""" return self._config def grasps_to_tensors(self, grasps, state): """Converts a list of grasps to an image and pose tensor for fast grasp quality evaluation. Attributes ---------- grasps : :obj:`list` of :obj:`object` List of image grasps to convert. state : :obj:`RgbdImageState` RGB-D image to plan grasps on. Returns ------- :obj:`numpy.ndarray` 4D numpy tensor of image to be predicted. :obj:`numpy.ndarray` 2D numpy tensor of depth values. """ # Parse params. gqcnn_im_height = self._im_height gqcnn_im_width = self._im_width gqcnn_num_channels = self._num_channels gqcnn_pose_dim = self._pose_dim gripper_mode = self._gripper_mode num_grasps = len(grasps) depth_im = state.rgbd_im.depth # Allocate tensors. tensor_start = time() image_tensor = np.zeros( [num_grasps, gqcnn_im_height, gqcnn_im_width, gqcnn_num_channels]) pose_tensor = np.zeros([num_grasps, gqcnn_pose_dim]) scale = gqcnn_im_height / self._crop_height depth_im_scaled = depth_im.resize(scale) for i, grasp in enumerate(grasps): translation = scale * np.array([ depth_im.center[0] - grasp.center.data[1], depth_im.center[1] - grasp.center.data[0] ]) im_tf = depth_im_scaled im_tf = depth_im_scaled.transform(translation, grasp.angle) im_tf = im_tf.crop(gqcnn_im_height, gqcnn_im_width) im_encoded = cv2.imencode(".png", np.uint8(im_tf.raw_data * 255))[1].tostring() im_decoded = cv2.imdecode(np.frombuffer(im_encoded, np.uint8), 0) / 255.0 image_tensor[i, :, :, 0] = ((im_decoded - self._data_mean) / self._data_std) if gripper_mode == GripperMode.PARALLEL_JAW: pose_tensor[i] = grasp.depth elif gripper_mode == GripperMode.SUCTION: pose_tensor[i, ...] = np.array( [grasp.depth, grasp.approach_angle]) elif gripper_mode == GripperMode.LEGACY_PARALLEL_JAW: pose_tensor[i] = grasp.depth elif gripper_mode == GripperMode.LEGACY_SUCTION: pose_tensor[i, ...] = np.array( [grasp.depth, grasp.approach_angle]) else: raise ValueError("Gripper mode %s not supported" % (gripper_mode)) self._logger.debug("Tensor conversion took %.3f sec" % (time() - tensor_start)) return image_tensor, pose_tensor def quality(self, state, actions, params): """Evaluate the quality of a set of actions according to a GQ-CNN. Parameters ---------- state : :obj:`RgbdImageState` State of the world described by an RGB-D image. actions: :obj:`object` Set of grasping actions to evaluate. params: dict Optional parameters for quality evaluation. Returns ------- :obj:`list` of float Real-valued grasp quality predictions for each action, between 0 and 1. """ # Form tensors. image_tensor, pose_tensor = self.grasps_to_tensors(actions, state) if params is not None and params["vis"]["tf_images"]: # Read vis params. k = params["vis"]["k"] d = utils.sqrt_ceil(k) # Display grasp transformed images. from visualization import Visualizer2D as vis2d vis2d.figure(size=(GeneralConstants.FIGSIZE, GeneralConstants.FIGSIZE)) for i, image_tf in enumerate(image_tensor[:k, ...]): depth = pose_tensor[i][0] vis2d.subplot(d, d, i + 1) vis2d.imshow(DepthImage(image_tf)) vis2d.title("Image %d: d=%.3f" % (i, depth)) vis2d.show() # Predict grasps. num_actions = len(actions) null_arr = -1 * np.ones(self._batch_size) predict_start = time() output_arr = np.zeros([num_actions, 2]) cur_i = 0 end_i = cur_i + min(self._batch_size, num_actions - cur_i) while cur_i < num_actions: output_arr[cur_i:end_i, :] = self.gqcnn( image_tensor[cur_i:end_i, :, :, 0], pose_tensor[cur_i:end_i, 0], null_arr)[0] cur_i = end_i end_i = cur_i + min(self._batch_size, num_actions - cur_i) q_values = output_arr[:, -1] self._logger.debug("Prediction took %.3f sec" % (time() - predict_start)) return q_values.tolist() class FCGQCnnQualityFunction(GraspQualityFunction): def __init__(self, config): """Grasp quality function using the fully-convolutional gqcnn.""" GraspQualityFunction.__init__(self) # Store parameters. self._config = config self._model_dir = config["gqcnn_model"] self._backend = config["gqcnn_backend"] self._fully_conv_config = config["fully_conv_gqcnn_config"] # Init fcgqcnn. self._fcgqcnn = get_fc_gqcnn_model(backend=self._backend).load( self._model_dir, self._fully_conv_config) # Open Tensorflow session for fcgqcnn. self._fcgqcnn.open_session() def __del__(self): try: self._fcgqcnn.close_session() except Exception: # TODO(vsatish): Except specific exception. pass @property def gqcnn(self): """Returns the FC-GQCNN.""" return self._fcgqcnn @property def config(self): """Returns the FC-GQCNN quality function parameters.""" return self._config def quality(self, images, depths, params=None): return self._fcgqcnn.predict(images, depths) class GraspQualityFunctionFactory(object): """Factory for grasp quality functions.""" @staticmethod def quality_function(metric_type, config): if metric_type == "zero": return ZeroGraspQualityFunction() elif metric_type == "parallel_jaw_com_force_closure": return ComForceClosureParallelJawQualityFunction(config) elif metric_type == "suction_best_fit_planarity": return BestFitPlanaritySuctionQualityFunction(config) elif metric_type == "suction_approach_planarity": return ApproachPlanaritySuctionQualityFunction(config) elif metric_type == "suction_com_approach_planarity": return ComApproachPlanaritySuctionQualityFunction(config) elif metric_type == "suction_disc_approach_planarity": return DiscApproachPlanaritySuctionQualityFunction(config) elif metric_type == "suction_com_disc_approach_planarity": return ComDiscApproachPlanaritySuctionQualityFunction(config) elif metric_type == "suction_gaussian_curvature": return GaussianCurvatureSuctionQualityFunction(config) elif metric_type == "suction_disc_curvature": return DiscCurvatureSuctionQualityFunction(config) elif metric_type == "suction_com_disc_curvature": return ComDiscCurvatureSuctionQualityFunction(config) elif metric_type == "gqcnn": return GQCnnQualityFunction(config) elif metric_type == "nomagic": return NoMagicQualityFunction(config) elif metric_type == "fcgqcnn": return FCGQCnnQualityFunction(config) else: raise ValueError("Grasp function type %s not supported!" % (metric_type)) ================================================ FILE: gqcnn/grasping/image_grasp_sampler.py ================================================ # -*- coding: utf-8 -*- """ Copyright ©2017. The Regents of the University of California (Regents). All Rights Reserved. Permission to use, copy, modify, and distribute this software and its documentation for educational, research, and not-for-profit purposes, without fee and without a signed licensing agreement, is hereby granted, provided that the above copyright notice, this paragraph and the following two paragraphs appear in all copies, modifications, and distributions. Contact The Office of Technology Licensing, UC Berkeley, 2150 Shattuck Avenue, Suite 510, Berkeley, CA 94720-1620, (510) 643-7201, otl@berkeley.edu, http://ipira.berkeley.edu/industry-info for commercial licensing opportunities. IN NO EVENT SHALL REGENTS BE LIABLE TO ANY PARTY FOR DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, INCLUDING LOST PROFITS, ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN IF REGENTS HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. REGENTS SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE AND ACCOMPANYING DOCUMENTATION, IF ANY, PROVIDED HEREUNDER IS PROVIDED "AS IS". REGENTS HAS NO OBLIGATION TO PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. Classes for sampling a set of grasps directly from images to generate data for a neural network. Author ------ Jeff Mahler & Sherdil Niyaz """ from abc import ABC, abstractmethod import random from time import time import matplotlib.pyplot as plt import numpy as np import scipy.ndimage.filters as snf import scipy.spatial.distance as ssd import scipy.stats as ss from autolab_core import (Point, RigidTransform, Logger, DepthImage, RgbdImage, GdImage) from visualization import Visualizer2D as vis from .grasp import Grasp2D, SuctionPoint2D, MultiSuctionPoint2D def force_closure(p1, p2, n1, n2, mu): """Computes whether or not the point and normal pairs are in force closure.""" # Line between the contacts. v = p2 - p1 v = v / np.linalg.norm(v) # Compute cone membership. alpha = np.arctan(mu) dot_1 = max(min(n1.dot(-v), 1.0), -1.0) dot_2 = max(min(n2.dot(v), 1.0), -1.0) in_cone_1 = (np.arccos(dot_1) < alpha) in_cone_2 = (np.arccos(dot_2) < alpha) return (in_cone_1 and in_cone_2) class DepthSamplingMode(object): """Modes for sampling grasp depth.""" UNIFORM = "uniform" MIN = "min" MAX = "max" class ImageGraspSampler(ABC): """Wraps image to crane grasp candidate generation for easy deployment of GQ-CNN. Attributes ---------- config : :obj:`autolab_core.YamlConfig` A dictionary-like object containing the parameters of the sampler. """ def __init__(self, config): # Set params. self._config = config # Setup logger. self._logger = Logger.get_logger(self.__class__.__name__) def sample(self, rgbd_im, camera_intr, num_samples, segmask=None, seed=None, visualize=False, constraint_fn=None): """Samples a set of 2D grasps from a given RGB-D image. Parameters ---------- rgbd_im : :obj:`autolab_core.RgbdImage` RGB-D image to sample from. camera_intr : :obj:`autolab_core.CameraIntrinsics` Intrinsics of the camera that captured the images. num_samples : int Number of grasps to sample. segmask : :obj:`autolab_core.BinaryImage` Binary image segmenting out the object of interest. seed : int Number to use in random seed (`None` if no seed). visualize : bool Whether or not to show intermediate samples (for debugging). constraint_fn : :obj:`GraspConstraintFn` Constraint function to apply to grasps. Returns ------- :obj:`list` of :obj:`Grasp2D` The list of grasps in image space. """ # Set random seed for determinism. if seed is not None: random.seed(seed) np.random.seed(seed) # Sample an initial set of grasps (without depth). self._logger.debug("Sampling 2d candidates") sampling_start = time() grasps = self._sample(rgbd_im, camera_intr, num_samples, segmask=segmask, visualize=visualize, constraint_fn=constraint_fn) sampling_stop = time() self._logger.debug("Sampled %d grasps from image" % (len(grasps))) self._logger.debug("Sampling grasps took %.3f sec" % (sampling_stop - sampling_start)) return grasps @abstractmethod def _sample(self, rgbd_im, camera_intr, num_samples, segmask=None, visualize=False, constraint_fn=None): """Sample a set of 2D grasp candidates from a depth image. Subclasses must override. Parameters ---------- rgbd_im : :obj:`autolab_core.RgbdImage` RGB-D image to sample from. camera_intr : :obj:`autolab_core.CameraIntrinsics` Intrinsics of the camera that captured the images. num_samples : int Number of grasps to sample. segmask : :obj:`autolab_core.BinaryImage` Binary image segmenting out the object of interest. visualize : bool Whether or not to show intermediate samples (for debugging). constraint_fn : :obj:`GraspConstraintFn` Constraint function to apply to grasps. Returns ------- :obj:`list` of :obj:`Grasp2D` List of 2D grasp candidates. """ pass class AntipodalDepthImageGraspSampler(ImageGraspSampler): """Grasp sampler for antipodal point pairs from depth image gradients. Notes ----- Required configuration parameters are specified in Other Parameters. Other Parameters ---------------- gripper_width : float Width of the gripper, in meters. friction_coef : float Friction coefficient for 2D force closure. depth_grad_thresh : float Threshold for depth image gradients to determine edge points for sampling. depth_grad_gaussian_sigma : float Sigma used for pre-smoothing the depth image for better gradients. downsample_rate : float Factor to downsample the depth image by before sampling grasps. max_rejection_samples : int Ceiling on the number of grasps to check in antipodal grasp rejection sampling. max_dist_from_center : int Maximum allowable distance of a grasp from the image center. min_grasp_dist : float Threshold on the grasp distance. angle_dist_weight : float Amount to weight the angle difference in grasp distance computation. depth_samples_per_grasp : int Number of depth samples to take per grasp. min_depth_offset : float Offset from the minimum depth at the grasp center pixel to use in depth sampling. max_depth_offset : float Offset from the maximum depth across all edges. depth_sample_win_height : float Height of a window around the grasp center pixel used to determine min depth. depth_sample_win_height : float Width of a window around the grasp center pixel used to determine min depth. depth_sampling_mode : str Name of depth sampling mode (uniform, min, max). """ def __init__(self, config, gripper_width=np.inf): # Init superclass. ImageGraspSampler.__init__(self, config) # Antipodality params. self._gripper_width = self._config["gripper_width"] self._friction_coef = self._config["friction_coef"] self._depth_grad_thresh = self._config["depth_grad_thresh"] self._depth_grad_gaussian_sigma = self._config[ "depth_grad_gaussian_sigma"] self._downsample_rate = self._config["downsample_rate"] self._rescale_factor = 1.0 / self._downsample_rate self._max_rejection_samples = self._config["max_rejection_samples"] self._min_num_edge_pixels = 0 if "min_num_edge_pixels" in self._config: self._min_num_edge_pixels = self._config["min_num_edge_pixels"] # Distance thresholds for rejection sampling. self._max_dist_from_center = self._config["max_dist_from_center"] self._min_dist_from_boundary = self._config["min_dist_from_boundary"] self._min_grasp_dist = self._config["min_grasp_dist"] self._angle_dist_weight = self._config["angle_dist_weight"] # Depth sampling params. self._depth_samples_per_grasp = max( self._config["depth_samples_per_grasp"], 1) self._min_depth_offset = self._config["min_depth_offset"] self._max_depth_offset = self._config["max_depth_offset"] self._h = self._config["depth_sample_win_height"] self._w = self._config["depth_sample_win_width"] self._depth_sampling_mode = self._config["depth_sampling_mode"] # Perturbation. self._grasp_center_sigma = 0.0 if "grasp_center_sigma" in self._config: self._grasp_center_sigma = self._config["grasp_center_sigma"] self._grasp_angle_sigma = 0.0 if "grasp_angle_sigma" in self._config: self._grasp_angle_sigma = np.deg2rad( self._config["grasp_angle_sigma"]) def _surface_normals(self, depth_im, edge_pixels): """Return an array of the surface normals at the edge pixels.""" # Compute the gradients. grad = np.gradient(depth_im.data.astype(np.float32)) # Compute surface normals. normals = np.zeros([edge_pixels.shape[0], 2]) for i, pix in enumerate(edge_pixels): dx = grad[1][pix[0], pix[1]] dy = grad[0][pix[0], pix[1]] normal_vec = np.array([dy, dx]) if np.linalg.norm(normal_vec) == 0: normal_vec = np.array([1, 0]) normal_vec = normal_vec / np.linalg.norm(normal_vec) normals[i, :] = normal_vec return normals def _sample_depth(self, min_depth, max_depth): """Samples a depth value between the min and max.""" depth_sample = max_depth if self._depth_sampling_mode == DepthSamplingMode.UNIFORM: depth_sample = min_depth + (max_depth - min_depth) * np.random.rand() elif self._depth_sampling_mode == DepthSamplingMode.MIN: depth_sample = min_depth return depth_sample def _sample(self, image, camera_intr, num_samples, segmask=None, visualize=False, constraint_fn=None): """Sample a set of 2D grasp candidates from a depth image. Parameters ---------- image : :obj:`autolab_core.RgbdImage` or :obj:`autolab_core.DepthImage` or :obj:`autolab_core.GdImage` # noqa: E501 RGB-D or Depth image to sample from. camera_intr : :obj:`autolab_core.CameraIntrinsics` Intrinsics of the camera that captured the images. num_samples : int Number of grasps to sample segmask : :obj:`autolab_core.BinaryImage` Binary image segmenting out the object of interest. visualize : bool Whether or not to show intermediate samples (for debugging). constraint_fn : :obj:`GraspConstraintFn` Constraint function to apply to grasps. Returns ------- :obj:`list` of :obj:`Grasp2D` List of 2D grasp candidates. """ if isinstance(image, RgbdImage) or isinstance(image, GdImage): depth_im = image.depth elif isinstance(image, DepthImage): depth_im = image else: raise ValueError( "image type must be one of [RgbdImage, DepthImage, GdImage]") # Sample antipodal pairs in image space. grasps = self._sample_antipodal_grasps(depth_im, camera_intr, num_samples, segmask=segmask, visualize=visualize, constraint_fn=constraint_fn) return grasps def _sample_antipodal_grasps(self, depth_im, camera_intr, num_samples, segmask=None, visualize=False, constraint_fn=None): """Sample a set of 2D grasp candidates from a depth image by finding depth edges, then uniformly sampling point pairs and keeping only antipodal grasps with width less than the maximum allowable. Parameters ---------- depth_im : :obj:"autolab_core.DepthImage" Depth image to sample from. camera_intr : :obj:`autolab_core.CameraIntrinsics` Intrinsics of the camera that captured the images. num_samples : int Number of grasps to sample. segmask : :obj:`autolab_core.BinaryImage` Binary image segmenting out the object of interest. visualize : bool Whether or not to show intermediate samples (for debugging). constraint_fn : :obj:`GraspConstraintFn` Constraint function to apply to grasps. Returns ------- :obj:`list` of :obj:`Grasp2D` List of 2D grasp candidates. """ # Compute edge pixels. edge_start = time() depth_im = depth_im.apply(snf.gaussian_filter, sigma=self._depth_grad_gaussian_sigma) scale_factor = self._rescale_factor depth_im_downsampled = depth_im.resize(scale_factor) depth_im_threshed = depth_im_downsampled.threshold_gradients( self._depth_grad_thresh) edge_pixels = (1.0 / scale_factor) * depth_im_threshed.zero_pixels() edge_pixels = edge_pixels.astype(np.int16) depth_im_mask = depth_im.copy() if segmask is not None: edge_pixels = np.array( [p for p in edge_pixels if np.any(segmask[p[0], p[1]] > 0)]) depth_im_mask = depth_im.mask_binary(segmask) # Re-threshold edges if there are too few. if edge_pixels.shape[0] < self._min_num_edge_pixels: self._logger.info("Too few edge pixels!") depth_im_threshed = depth_im.threshold_gradients( self._depth_grad_thresh) edge_pixels = depth_im_threshed.zero_pixels() edge_pixels = edge_pixels.astype(np.int16) depth_im_mask = depth_im.copy() if segmask is not None: edge_pixels = np.array([ p for p in edge_pixels if np.any(segmask[p[0], p[1]] > 0) ]) depth_im_mask = depth_im.mask_binary(segmask) num_pixels = edge_pixels.shape[0] self._logger.debug("Depth edge detection took %.3f sec" % (time() - edge_start)) self._logger.debug("Found %d edge pixels" % (num_pixels)) # Compute point cloud. point_cloud_im = camera_intr.deproject_to_image(depth_im_mask) # Compute_max_depth. depth_data = depth_im_mask.data[depth_im_mask.data > 0] if depth_data.shape[0] == 0: return [] min_depth = np.min(depth_data) + self._min_depth_offset max_depth = np.max(depth_data) + self._max_depth_offset # Compute surface normals. normal_start = time() edge_normals = self._surface_normals(depth_im, edge_pixels) self._logger.debug("Normal computation took %.3f sec" % (time() - normal_start)) if visualize: edge_pixels = edge_pixels[::2, :] edge_normals = edge_normals[::2, :] vis.figure() vis.subplot(1, 3, 1) vis.imshow(depth_im) if num_pixels > 0: vis.scatter(edge_pixels[:, 1], edge_pixels[:, 0], s=2, c="b") X = [pix[1] for pix in edge_pixels] Y = [pix[0] for pix in edge_pixels] U = [3 * pix[1] for pix in edge_normals] V = [-3 * pix[0] for pix in edge_normals] plt.quiver(X, Y, U, V, units="x", scale=0.25, width=0.5, zorder=2, color="r") vis.title("Edge pixels and normals") vis.subplot(1, 3, 2) vis.imshow(depth_im_threshed) vis.title("Edge map") vis.subplot(1, 3, 3) vis.imshow(segmask) vis.title("Segmask") vis.show() # Exit if no edge pixels. if num_pixels == 0: return [] # Form set of valid candidate point pairs. pruning_start = time() max_grasp_width_px = Grasp2D(Point(np.zeros(2)), 0.0, min_depth, width=self._gripper_width, camera_intr=camera_intr).width_px normal_ip = edge_normals.dot(edge_normals.T) dists = ssd.squareform(ssd.pdist(edge_pixels)) valid_indices = np.where( (normal_ip < -np.cos(np.arctan(self._friction_coef))) & (dists < max_grasp_width_px) & (dists > 0.0)) valid_indices = np.c_[valid_indices[0], valid_indices[1]] self._logger.debug("Normal pruning %.3f sec" % (time() - pruning_start)) # Raise exception if no antipodal pairs. num_pairs = valid_indices.shape[0] if num_pairs == 0: return [] # Prune out grasps. contact_points1 = edge_pixels[valid_indices[:, 0], :] contact_points2 = edge_pixels[valid_indices[:, 1], :] contact_normals1 = edge_normals[valid_indices[:, 0], :] contact_normals2 = edge_normals[valid_indices[:, 1], :] v = contact_points1 - contact_points2 v_norm = np.linalg.norm(v, axis=1) v = v / np.tile(v_norm[:, np.newaxis], [1, 2]) ip1 = np.sum(contact_normals1 * v, axis=1) ip2 = np.sum(contact_normals2 * (-v), axis=1) ip1[ip1 > 1.0] = 1.0 ip1[ip1 < -1.0] = -1.0 ip2[ip2 > 1.0] = 1.0 ip2[ip2 < -1.0] = -1.0 beta1 = np.arccos(ip1) beta2 = np.arccos(ip2) alpha = np.arctan(self._friction_coef) antipodal_indices = np.where((beta1 < alpha) & (beta2 < alpha))[0] # Raise exception if no antipodal pairs. num_pairs = antipodal_indices.shape[0] if num_pairs == 0: return [] sample_size = min(self._max_rejection_samples, num_pairs) grasp_indices = np.random.choice(antipodal_indices, size=sample_size, replace=False) self._logger.debug("Grasp comp took %.3f sec" % (time() - pruning_start)) # Compute grasps. sample_start = time() k = 0 grasps = [] while k < sample_size and len(grasps) < num_samples: grasp_ind = grasp_indices[k] p1 = contact_points1[grasp_ind, :] p2 = contact_points2[grasp_ind, :] n1 = contact_normals1[grasp_ind, :] n2 = contact_normals2[grasp_ind, :] # width = np.linalg.norm(p1 - p2) k += 1 # Compute center and axis. grasp_center = (p1 + p2) // 2 grasp_axis = p2 - p1 grasp_axis = grasp_axis / np.linalg.norm(grasp_axis) grasp_theta = np.pi / 2 if grasp_axis[1] != 0: grasp_theta = np.arctan2(grasp_axis[0], grasp_axis[1]) grasp_center_pt = Point(np.array( [grasp_center[1], grasp_center[0]]), frame=camera_intr.frame) # Compute grasp points in 3D. x1 = point_cloud_im[p1[0], p1[1]] x2 = point_cloud_im[p2[0], p2[1]] if np.linalg.norm(x2 - x1) > self._gripper_width: continue # Perturb. if self._grasp_center_sigma > 0.0: grasp_center_pt = grasp_center_pt + ss.multivariate_normal.rvs( cov=self._grasp_center_sigma * np.diag(np.ones(2))) if self._grasp_angle_sigma > 0.0: grasp_theta = grasp_theta + ss.norm.rvs( scale=self._grasp_angle_sigma) # Check center px dist from boundary. if (grasp_center[0] < self._min_dist_from_boundary or grasp_center[1] < self._min_dist_from_boundary or grasp_center[0] > depth_im.height - self._min_dist_from_boundary or grasp_center[1] > depth_im.width - self._min_dist_from_boundary): continue # Sample depths. for i in range(self._depth_samples_per_grasp): # Get depth in the neighborhood of the center pixel. depth_win = depth_im.data[grasp_center[0] - self._h:grasp_center[0] + self._h, grasp_center[1] - self._w:grasp_center[1] + self._w] center_depth = np.min(depth_win) if center_depth == 0 or np.isnan(center_depth): continue # Sample depth between the min and max. min_depth = center_depth + self._min_depth_offset max_depth = center_depth + self._max_depth_offset sample_depth = min_depth + (max_depth - min_depth) * np.random.rand() candidate_grasp = Grasp2D(grasp_center_pt, grasp_theta, sample_depth, width=self._gripper_width, camera_intr=camera_intr, contact_points=[p1, p2], contact_normals=[n1, n2]) if visualize: vis.figure() vis.imshow(depth_im) vis.grasp(candidate_grasp) vis.scatter(p1[1], p1[0], c="b", s=25) vis.scatter(p2[1], p2[0], c="b", s=25) vis.show() grasps.append(candidate_grasp) # Return sampled grasps. self._logger.debug("Loop took %.3f sec" % (time() - sample_start)) return grasps class DepthImageSuctionPointSampler(ImageGraspSampler): """Grasp sampler for suction points from depth images. Notes ----- Required configuration parameters are specified in Other Parameters. Other Parameters ---------------- max_suction_dir_optical_axis_angle : float Maximum angle, in degrees, between the suction approach axis and the camera optical axis. delta_theta : float Maximum deviation from zero for the aziumth angle of a rotational perturbation to the surface normal (for sample diversity). delta_phi : float Maximum deviation from zero for the elevation angle of a rotational perturbation to the surface normal (for sample diversity). sigma_depth : float Standard deviation for a normal distribution over depth values (for sample diversity). min_suction_dist : float Minimum admissible distance between suction points (for sample diversity). angle_dist_weight : float Amount to weight the angle difference in suction point distance computation. depth_gaussian_sigma : float Sigma used for pre-smoothing the depth image for better gradients. """ def __init__(self, config): # Init superclass. ImageGraspSampler.__init__(self, config) # Read params. self._max_suction_dir_optical_axis_angle = np.deg2rad( self._config["max_suction_dir_optical_axis_angle"]) self._max_dist_from_center = self._config["max_dist_from_center"] self._min_dist_from_boundary = self._config["min_dist_from_boundary"] self._max_num_samples = self._config["max_num_samples"] self._min_theta = -np.deg2rad(self._config["delta_theta"]) self._max_theta = np.deg2rad(self._config["delta_theta"]) self._theta_rv = ss.uniform(loc=self._min_theta, scale=self._max_theta - self._min_theta) self._min_phi = -np.deg2rad(self._config["delta_phi"]) self._max_phi = np.deg2rad(self._config["delta_phi"]) self._phi_rv = ss.uniform(loc=self._min_phi, scale=self._max_phi - self._min_phi) self._mean_depth = 0.0 if "mean_depth" in self._config: self._mean_depth = self._config["mean_depth"] self._sigma_depth = self._config["sigma_depth"] self._depth_rv = ss.norm(self._mean_depth, self._sigma_depth**2) self._min_suction_dist = self._config["min_suction_dist"] self._angle_dist_weight = self._config["angle_dist_weight"] self._depth_gaussian_sigma = self._config["depth_gaussian_sigma"] def _sample(self, image, camera_intr, num_samples, segmask=None, visualize=False, constraint_fn=None): """Sample a set of 2D grasp candidates from a depth image. Parameters ---------- image : :obj:`autolab_core.RgbdImage` or "autolab_core.DepthImage" RGB-D or D image to sample from. camera_intr : :obj:`autolab_core.CameraIntrinsics` Intrinsics of the camera that captured the images. num_samples : int Number of grasps to sample. segmask : :obj:`autolab_core.BinaryImage` Binary image segmenting out the object of interest. visualize : bool Whether or not to show intermediate samples (for debugging). Returns ------- :obj:`list` of :obj:`Grasp2D` List of 2D grasp candidates. """ if isinstance(image, RgbdImage) or isinstance(image, GdImage): depth_im = image.depth elif isinstance(image, DepthImage): depth_im = image else: raise ValueError( "image type must be one of [RgbdImage, DepthImage, GdImage]") # Sample antipodal pairs in image space. grasps = self._sample_suction_points(depth_im, camera_intr, num_samples, segmask=segmask, visualize=visualize, constraint_fn=constraint_fn) return grasps def _sample_suction_points(self, depth_im, camera_intr, num_samples, segmask=None, visualize=False, constraint_fn=None): """Sample a set of 2D suction point candidates from a depth image by choosing points on an object surface uniformly at random and then sampling around the surface normal. Parameters ---------- depth_im : :obj:"autolab_core.DepthImage" Depth image to sample from. camera_intr : :obj:`autolab_core.CameraIntrinsics` Intrinsics of the camera that captured the images. num_samples : int Number of grasps to sample. segmask : :obj:`autolab_core.BinaryImage` Binary image segmenting out the object of interest. visualize : bool Whether or not to show intermediate samples (for debugging). Returns ------- :obj:`list` of :obj:`SuctionPoint2D` List of 2D suction point candidates. """ # Compute edge pixels. filter_start = time() if self._depth_gaussian_sigma > 0: depth_im_mask = depth_im.apply(snf.gaussian_filter, sigma=self._depth_gaussian_sigma) else: depth_im_mask = depth_im.copy() if segmask is not None: depth_im_mask = depth_im.mask_binary(segmask) self._logger.debug("Filtering took %.3f sec" % (time() - filter_start)) if visualize: vis.figure() vis.subplot(1, 2, 1) vis.imshow(depth_im) vis.subplot(1, 2, 2) vis.imshow(depth_im_mask) vis.show() # Project to get the point cloud. cloud_start = time() point_cloud_im = camera_intr.deproject_to_image(depth_im_mask) normal_cloud_im = point_cloud_im.normal_cloud_im() nonzero_px = depth_im_mask.nonzero_pixels() num_nonzero_px = nonzero_px.shape[0] if num_nonzero_px == 0: return [] self._logger.debug("Normal cloud took %.3f sec" % (time() - cloud_start)) # Randomly sample points and add to image. sample_start = time() suction_points = [] k = 0 sample_size = min(self._max_num_samples, num_nonzero_px) indices = np.random.choice(num_nonzero_px, size=sample_size, replace=False) while k < sample_size and len(suction_points) < num_samples: # Sample a point uniformly at random. ind = indices[k] center_px = np.array([nonzero_px[ind, 1], nonzero_px[ind, 0]]) center = Point(center_px, frame=camera_intr.frame) axis = -normal_cloud_im[center.y, center.x] depth = point_cloud_im[center.y, center.x][2] # Update number of tries. k += 1 # Check center px dist from boundary. if (center_px[0] < self._min_dist_from_boundary or center_px[1] < self._min_dist_from_boundary or center_px[1] > depth_im.height - self._min_dist_from_boundary or center_px[0] > depth_im.width - self._min_dist_from_boundary): continue # Perturb depth. delta_depth = self._depth_rv.rvs(size=1)[0] depth = depth + delta_depth # Keep if the angle between the camera optical axis and the suction # direction is less than a threshold. dot = max(min(axis.dot(np.array([0, 0, 1])), 1.0), -1.0) psi = np.arccos(dot) if psi < self._max_suction_dir_optical_axis_angle: # Create candidate grasp. candidate = SuctionPoint2D(center, axis, depth, camera_intr=camera_intr) # Check constraint satisfaction. if constraint_fn is None or constraint_fn(candidate): if visualize: vis.figure() vis.imshow(depth_im) vis.scatter(center.x, center.y) vis.show() suction_points.append(candidate) self._logger.debug("Loop took %.3f sec" % (time() - sample_start)) return suction_points class DepthImageMultiSuctionPointSampler(ImageGraspSampler): """Grasp sampler for suction points from depth images. Notes ----- Required configuration parameters are specified in Other Parameters. Other Parameters ---------------- max_suction_dir_optical_axis_angle : float Maximum angle, in degrees, between the suction approach axis and the camera optical axis. delta_theta : float Maximum deviation from zero for the aziumth angle of a rotational perturbation to the surface normal (for sample diversity). delta_phi : float Maximum deviation from zero for the elevation angle of a rotational perturbation to the surface normal (for sample diversity). sigma_depth : float Standard deviation for a normal distribution over depth values (for sample diversity). min_suction_dist : float Minimum admissible distance between suction points (for sample diversity). angle_dist_weight : float Amount to weight the angle difference in suction point distance computation. depth_gaussian_sigma : float Sigma used for pre-smoothing the depth image for better gradients. """ def __init__(self, config): # Init superclass. ImageGraspSampler.__init__(self, config) # Read params. self._max_suction_dir_optical_axis_angle = np.deg2rad( self._config["max_suction_dir_optical_axis_angle"]) self._max_dist_from_center = self._config["max_dist_from_center"] self._min_dist_from_boundary = self._config["min_dist_from_boundary"] self._max_num_samples = self._config["max_num_samples"] self._min_theta = -np.deg2rad(self._config["delta_theta"]) self._max_theta = np.deg2rad(self._config["delta_theta"]) self._theta_rv = ss.uniform(loc=self._min_theta, scale=self._max_theta - self._min_theta) self._min_phi = -np.deg2rad(self._config["delta_phi"]) self._max_phi = np.deg2rad(self._config["delta_phi"]) self._phi_rv = ss.uniform(loc=self._min_phi, scale=self._max_phi - self._min_phi) self._mean_depth = 0.0 if "mean_depth" in self._config: self._mean_depth = self._config["mean_depth"] self._sigma_depth = self._config["sigma_depth"] self._depth_rv = ss.norm(self._mean_depth, self._sigma_depth**2) self._min_suction_dist = self._config["min_suction_dist"] self._angle_dist_weight = self._config["angle_dist_weight"] self._depth_gaussian_sigma = self._config["depth_gaussian_sigma"] def _sample(self, image, camera_intr, num_samples, segmask=None, visualize=False, constraint_fn=None): """Sample a set of 2D grasp candidates from a depth image. Parameters ---------- image : :obj:`autolab_core.RgbdImage` or `autolab_core.DepthImage` RGB-D or D image to sample from. camera_intr : :obj:`autolab_core.CameraIntrinsics` Intrinsics of the camera that captured the images. num_samples : int Number of grasps to sample. segmask : :obj:`autolab_core.BinaryImage` Binary image segmenting out the object of interest. visualize : bool Whether or not to show intermediate samples (for debugging). constraint_fn : :obj:`GraspConstraintFn` Constraint function to apply to grasps. Returns ------- :obj:`list` of :obj:`Grasp2D` List of 2D grasp candidates. """ if isinstance(image, RgbdImage) or isinstance(image, GdImage): depth_im = image.depth elif isinstance(image, DepthImage): depth_im = image else: raise ValueError( "image type must be one of [RgbdImage, DepthImage, GdImage]") # Sample antipodal pairs in image space. grasps = self._sample_suction_points(depth_im, camera_intr, num_samples, segmask=segmask, visualize=visualize, constraint_fn=constraint_fn) return grasps def _sample_suction_points(self, depth_im, camera_intr, num_samples, segmask=None, visualize=False, constraint_fn=None): """Sample a set of 2D suction point candidates from a depth image by choosing points on an object surface uniformly at random and then sampling around the surface normal. Parameters ---------- depth_im : :obj:"autolab_core.DepthImage" Depth image to sample from. camera_intr : :obj:`autolab_core.CameraIntrinsics` Intrinsics of the camera that captured the images. num_samples : int Number of grasps to sample. segmask : :obj:`autolab_core.BinaryImage` Binary image segmenting out the object of interest. visualize : bool Whether or not to show intermediate samples (for debugging). constraint_fn : :obj:`GraspConstraintFn` Constraint function to apply to grasps. Returns ------- :obj:`list` of :obj:`SuctionPoint2D` List of 2D suction point candidates. """ # Compute edge pixels. filter_start = time() if self._depth_gaussian_sigma > 0: depth_im_mask = depth_im.apply(snf.gaussian_filter, sigma=self._depth_gaussian_sigma) else: depth_im_mask = depth_im.copy() if segmask is not None: depth_im_mask = depth_im.mask_binary(segmask) self._logger.debug("Filtering took %.3f sec" % (time() - filter_start)) if visualize: vis.figure() vis.subplot(1, 2, 1) vis.imshow(depth_im) vis.subplot(1, 2, 2) vis.imshow(depth_im_mask) vis.show() # Project to get the point cloud. cloud_start = time() point_cloud_im = camera_intr.deproject_to_image(depth_im_mask) normal_cloud_im = point_cloud_im.normal_cloud_im() nonzero_px = depth_im_mask.nonzero_pixels() num_nonzero_px = nonzero_px.shape[0] if num_nonzero_px == 0: return [] self._logger.debug("Normal cloud took %.3f sec" % (time() - cloud_start)) # Randomly sample points and add to image. sample_start = time() suction_points = [] k = 0 sample_size = min(self._max_num_samples, num_nonzero_px) indices = np.random.choice(num_nonzero_px, size=sample_size, replace=False) while k < sample_size and len(suction_points) < num_samples: # Sample a point uniformly at random. ind = indices[k] center_px = np.array([nonzero_px[ind, 1], nonzero_px[ind, 0]]) center = Point(center_px, frame=camera_intr.frame) axis = -normal_cloud_im[center.y, center.x] # depth = point_cloud_im[center.y, center.x][2] orientation = 2 * np.pi * np.random.rand() # Update number of tries. k += 1 # Skip bad axes. if np.linalg.norm(axis) == 0: continue # Rotation matrix. x_axis = axis y_axis = np.array([axis[1], -axis[0], 0]) if np.linalg.norm(y_axis) == 0: y_axis = np.array([1, 0, 0]) y_axis = y_axis / np.linalg.norm(y_axis) z_axis = np.cross(x_axis, y_axis) R = np.array([x_axis, y_axis, z_axis]).T # R_orig = np.copy(R) R = R.dot(RigidTransform.x_axis_rotation(orientation)) t = point_cloud_im[center.y, center.x] pose = RigidTransform(rotation=R, translation=t, from_frame="grasp", to_frame=camera_intr.frame) # Check center px dist from boundary. if (center_px[0] < self._min_dist_from_boundary or center_px[1] < self._min_dist_from_boundary or center_px[1] > depth_im.height - self._min_dist_from_boundary or center_px[0] > depth_im.width - self._min_dist_from_boundary): continue # Keep if the angle between the camera optical axis and the suction # direction is less than a threshold. dot = max(min(axis.dot(np.array([0, 0, 1])), 1.0), -1.0) psi = np.arccos(dot) if psi < self._max_suction_dir_optical_axis_angle: # Check distance to ensure sample diversity. candidate = MultiSuctionPoint2D(pose, camera_intr=camera_intr) # Check constraint satisfaction. if constraint_fn is None or constraint_fn(candidate): if visualize: vis.figure() vis.imshow(depth_im) vis.scatter(center.x, center.y) vis.show() suction_points.append(candidate) self._logger.debug("Loop took %.3f sec" % (time() - sample_start)) return suction_points class ImageGraspSamplerFactory(object): """Factory for image grasp samplers.""" @staticmethod def sampler(sampler_type, config): if sampler_type == "antipodal_depth": return AntipodalDepthImageGraspSampler(config) elif sampler_type == "suction": return DepthImageSuctionPointSampler(config) elif sampler_type == "multi_suction": return DepthImageMultiSuctionPointSampler(config) else: raise ValueError("Image grasp sampler type %s not supported!" % (sampler_type)) ================================================ FILE: gqcnn/grasping/policy/__init__.py ================================================ # -*- coding: utf-8 -*- """ Copyright ©2017. The Regents of the University of California (Regents). All Rights Reserved. Permission to use, copy, modify, and distribute this software and its documentation for educational, research, and not-for-profit purposes, without fee and without a signed licensing agreement, is hereby granted, provided that the above copyright notice, this paragraph and the following two paragraphs appear in all copies, modifications, and distributions. Contact The Office of Technology Licensing, UC Berkeley, 2150 Shattuck Avenue, Suite 510, Berkeley, CA 94720-1620, (510) 643-7201, otl@berkeley.edu, http://ipira.berkeley.edu/industry-info for commercial licensing opportunities. IN NO EVENT SHALL REGENTS BE LIABLE TO ANY PARTY FOR DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, INCLUDING LOST PROFITS, ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN IF REGENTS HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. REGENTS SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE AND ACCOMPANYING DOCUMENTATION, IF ANY, PROVIDED HEREUNDER IS PROVIDED "AS IS". REGENTS HAS NO OBLIGATION TO PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. """ from .fc_policy import (FullyConvolutionalGraspingPolicyParallelJaw, FullyConvolutionalGraspingPolicySuction) from .policy import (RobustGraspingPolicy, CrossEntropyRobustGraspingPolicy, RgbdImageState, GraspAction, UniformRandomGraspingPolicy) __all__ = [ "FullyConvolutionalGraspingPolicyParallelJaw", "FullyConvolutionalGraspingPolicySuction", "RobustGraspingPolicy", "CrossEntropyRobustGraspingPolicy", "UniformRandomGraspingPolicy", "RgbdImageState", "GraspAction" ] ================================================ FILE: gqcnn/grasping/policy/enums.py ================================================ # -*- coding: utf-8 -*- """ Copyright ©2017. The Regents of the University of California (Regents). All Rights Reserved. Permission to use, copy, modify, and distribute this software and its documentation for educational, research, and not-for-profit purposes, without fee and without a signed licensing agreement, is hereby granted, provided that the above copyright notice, this paragraph and the following two paragraphs appear in all copies, modifications, and distributions. Contact The Office of Technology Licensing, UC Berkeley, 2150 Shattuck Avenue, Suite 510, Berkeley, CA 94720-1620, (510) 643-7201, otl@berkeley.edu, http://ipira.berkeley.edu/industry-info for commercial licensing opportunities. IN NO EVENT SHALL REGENTS BE LIABLE TO ANY PARTY FOR DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, INCLUDING LOST PROFITS, ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN IF REGENTS HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. REGENTS SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE AND ACCOMPANYING DOCUMENTATION, IF ANY, PROVIDED HEREUNDER IS PROVIDED "AS IS". REGENTS HAS NO OBLIGATION TO PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. Enums for GQ-CNN policies. Author ------ Vishal Satish """ class SamplingMethod(object): TOP_K = "top_k" UNIFORM = "uniform" ================================================ FILE: gqcnn/grasping/policy/fc_policy.py ================================================ # -*- coding: utf-8 -*- """ Copyright ©2017. The Regents of the University of California (Regents). All Rights Reserved. Permission to use, copy, modify, and distribute this software and its documentation for educational, research, and not-for-profit purposes, without fee and without a signed licensing agreement, is hereby granted, provided that the above copyright notice, this paragraph and the following two paragraphs appear in all copies, modifications, and distributions. Contact The Office of Technology Licensing, UC Berkeley, 2150 Shattuck Avenue, Suite 510, Berkeley, CA 94720-1620, (510) 643-7201, otl@berkeley.edu, http://ipira.berkeley.edu/industry-info for commercial licensing opportunities. IN NO EVENT SHALL REGENTS BE LIABLE TO ANY PARTY FOR DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, INCLUDING LOST PROFITS, ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN IF REGENTS HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. REGENTS SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE AND ACCOMPANYING DOCUMENTATION, IF ANY, PROVIDED HEREUNDER IS PROVIDED "AS IS". REGENTS HAS NO OBLIGATION TO PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. Fully-Convolutional GQ-CNN grasping policies. Author ------ Vishal Satish """ from abc import abstractmethod import os import matplotlib.pyplot as plt import numpy as np from autolab_core import Point, Logger, DepthImage from visualization import Visualizer2D as vis from ...grasping import Grasp2D, SuctionPoint2D from ...utils import GeneralConstants, NoValidGraspsException from .enums import SamplingMethod from .policy import GraspingPolicy, GraspAction class FullyConvolutionalGraspingPolicy(GraspingPolicy): """Abstract grasp sampling policy class using Fully-Convolutional GQ-CNN network.""" def __init__(self, cfg, filters=None): """ Parameters ---------- cfg : dict Python dictionary of policy configuration parameters. filters : dict Python dictionary of kinematic filters to apply. """ GraspingPolicy.__init__(self, cfg, init_sampler=False) # Init logger. self._logger = Logger.get_logger(self.__class__.__name__) self._cfg = cfg self._sampling_method = self._cfg["sampling_method"] # GQ-CNN parameters. self._gqcnn_stride = self._cfg["gqcnn_stride"] self._gqcnn_recep_h = self._cfg["gqcnn_recep_h"] self._gqcnn_recep_w = self._cfg["gqcnn_recep_w"] # Grasp filtering. self._filters = filters self._max_grasps_to_filter = self._cfg["max_grasps_to_filter"] self._filter_grasps = self._cfg["filter_grasps"] # Visualization parameters. self._vis_config = self._cfg["policy_vis"] self._vis_scale = self._vis_config["scale"] self._vis_show_axis = self._vis_config["show_axis"] self._num_vis_samples = self._vis_config["num_samples"] self._vis_actions_2d = self._vis_config["actions_2d"] self._vis_actions_3d = self._vis_config["actions_3d"] self._vis_affordance_map = self._vis_config["affordance_map"] self._vis_output_dir = None # If this exists in the config then all visualizations will be logged # here instead of displayed. if "output_dir" in self._vis_config: self._vis_output_dir = self._vis_config["output_dir"] self._state_counter = 0 def _unpack_state(self, state): """Unpack information from the provided `RgbdImageState`.""" # TODO(vsatish): Don't access raw depth data like this. return (state.rgbd_im.depth, state.rgbd_im.depth._data, state.segmask.raw_data, state.camera_intr) def _mask_predictions(self, preds, raw_segmask): """Mask the given predictions with the given segmask, setting the rest to 0.0.""" preds_masked = np.zeros_like(preds) raw_segmask_cropped = raw_segmask[self._gqcnn_recep_h // 2:raw_segmask.shape[0] - self._gqcnn_recep_h // 2, self._gqcnn_recep_w // 2:raw_segmask.shape[1] - self._gqcnn_recep_w // 2, 0] raw_segmask_downsampled = raw_segmask_cropped[::self._gqcnn_stride, :: self._gqcnn_stride] if raw_segmask_downsampled.shape[0] != preds.shape[1]: raw_segmask_downsampled_new = np.zeros(preds.shape[1:3]) raw_segmask_downsampled_new[:raw_segmask_downsampled. shape[0], :raw_segmask_downsampled. shape[1]] = raw_segmask_downsampled raw_segmask_downsampled = raw_segmask_downsampled_new nonzero_mask_ind = np.where(raw_segmask_downsampled > 0) preds_masked[:, nonzero_mask_ind[0], nonzero_mask_ind[1]] = preds[:, nonzero_mask_ind[0], nonzero_mask_ind[1]] return preds_masked def _sample_predictions(self, preds, num_actions): """Sample predictions.""" dim2 = preds.shape[2] dim1 = preds.shape[1] dim3 = preds.shape[3] preds_flat = np.ravel(preds) pred_ind_flat = self._sample_predictions_flat(preds_flat, num_actions) pred_ind = np.zeros((num_actions, len(preds.shape)), dtype=np.int32) for idx in range(num_actions): pred_ind[idx, 0] = pred_ind_flat[idx] // (dim2 * dim1 * dim3) pred_ind[idx, 1] = (pred_ind_flat[idx] - (pred_ind[idx, 0] * (dim2 * dim1 * dim3))) // (dim2 * dim3) pred_ind[idx, 2] = (pred_ind_flat[idx] - (pred_ind[idx, 0] * (dim2 * dim1 * dim3)) - (pred_ind[idx, 1] * (dim2 * dim3))) // dim3 pred_ind[idx, 3] = (pred_ind_flat[idx] - (pred_ind[idx, 0] * (dim2 * dim1 * dim3)) - (pred_ind[idx, 1] * (dim2 * dim3))) % dim3 return pred_ind def _sample_predictions_flat(self, preds_flat, num_samples): """Helper function to do the actual sampling.""" if num_samples == 1: # `argmax` is faster than `argpartition` for special case of single # sample. if self._sampling_method == SamplingMethod.TOP_K: return [np.argmax(preds_flat)] elif self._sampling_method == SamplingMethod.UNIFORM: nonzero_ind = np.where(preds_flat > 0)[0] return np.random.choice(nonzero_ind) else: raise ValueError("Invalid sampling method: {}".format( self._sampling_method)) else: if self._sampling_method == "top_k": return np.argpartition(preds_flat, -1 * num_samples)[-1 * num_samples:] elif self._sampling_method == "uniform": nonzero_ind = np.where(preds_flat > 0)[0] if nonzero_ind.shape[0] == 0: raise NoValidGraspsException( "No grasps with nonzero quality") return np.random.choice(nonzero_ind, size=num_samples) else: raise ValueError("Invalid sampling method: {}".format( self._sampling_method)) @abstractmethod def _get_actions(self, preds, ind, images, depths, camera_intr, num_actions): """Generate the actions to be returned.""" pass @abstractmethod def _visualize_3d(self, actions, wrapped_depth_im, camera_intr, num_actions): """Visualize the actions in 3D.""" pass @abstractmethod def _visualize_affordance_map(self, preds, depth_im, scale, plot_max=True, output_dir=None): """Visualize an affordance map of the network predictions overlayed on the depth image.""" pass def _visualize_2d(self, actions, preds, wrapped_depth_im, num_actions, scale, show_axis, output_dir=None): """Visualize the actions in 2D.""" self._logger.info("Visualizing actions in 2d...") # Plot actions in 2D. vis.figure() vis.imshow(wrapped_depth_im) for i in range(num_actions): vis.grasp(actions[i].grasp, scale=scale, show_axis=show_axis, color=plt.cm.RdYlGn(actions[i].q_value)) vis.title("Top {} Grasps".format(num_actions)) if output_dir is not None: vis.savefig(os.path.join(output_dir, "top_grasps.png")) else: vis.show() def _filter(self, actions): """Filter actions.""" for action in actions: valid = True for filter_name, is_valid in self._filters.items(): if not is_valid(action.grasp): self._logger.info( "Grasp {} is not valid with filter {}".format( action.grasp, filter_name)) valid = False break if valid: return action raise NoValidGraspsException("No grasps found after filtering!") @abstractmethod def _gen_images_and_depths(self, depth, segmask): """Generate inputs for the grasp quality function.""" pass def _action(self, state, num_actions=1): """Plan action(s).""" if self._filter_grasps: assert self._filters is not None, ("Trying to filter grasps but no" " filters were provided!") assert num_actions == 1, ("Filtering support is only implemented" " for single actions!") num_actions = self._max_grasps_to_filter # Set up log dir for state visualizations. state_output_dir = None if self._vis_output_dir is not None: state_output_dir = os.path.join( self._vis_output_dir, "state_{}".format(str(self._state_counter).zfill(5))) if not os.path.exists(state_output_dir): os.makedirs(state_output_dir) self._state_counter += 1 # Unpack the `RgbdImageState`. wrapped_depth, raw_depth, raw_seg, camera_intr = self._unpack_state( state) # Predict. images, depths = self._gen_images_and_depths(raw_depth, raw_seg) preds = self._grasp_quality_fn.quality(images, depths) # Get success probablility predictions only (this is needed because the # output of the network is pairs of (p_failure, p_success)). preds_success_only = preds[:, :, :, 1::2] # Mask predicted success probabilities with the cropped and downsampled # object segmask so we only sample grasps on the objects. preds_success_only = self._mask_predictions(preds_success_only, raw_seg) # If we want to visualize more than one action, we have to sample more. # TODO(vsatish): If this is used with the "top_k" sampling method, the # final returned action is not the best because the argpartition does # not sort the partitioned indices. Fix this. num_actions_to_sample = self._num_vis_samples if ( self._vis_actions_2d or self._vis_actions_3d) else num_actions if (self._sampling_method == SamplingMethod.TOP_K and self._num_vis_samples): self._logger.warning("FINAL GRASP RETURNED IS NOT THE BEST!") # Sample num_actions_to_sample indices from the success predictions. sampled_ind = self._sample_predictions(preds_success_only, num_actions_to_sample) # Wrap actions to be returned. actions = self._get_actions(preds_success_only, sampled_ind, images, depths, camera_intr, num_actions_to_sample) # Filter grasps. if self._filter_grasps: actions = sorted(actions, reverse=True, key=lambda action: action.q_value) actions = [self._filter(actions)] # Visualize. if self._vis_actions_3d: self._logger.info("Generating 3D Visualization...") self._visualize_3d(actions, wrapped_depth, camera_intr, num_actions_to_sample) if self._vis_actions_2d: self._logger.info("Generating 2D visualization...") self._visualize_2d(actions, preds_success_only, wrapped_depth, num_actions_to_sample, self._vis_scale, self._vis_show_axis, output_dir=state_output_dir) if self._vis_affordance_map: self._visualize_affordance_map(preds_success_only, wrapped_depth, self._vis_scale, output_dir=state_output_dir) return actions[-1] if (self._filter_grasps or num_actions == 1) else actions[-(num_actions + 1):] def action_set(self, state, num_actions): """Plan a set of actions. Parameters ---------- state : :obj:`gqcnn.RgbdImageState` The RGBD image state. num_actions : int The number of actions to plan. Returns ------ list of :obj:`gqcnn.GraspAction` The planned grasps. """ return [ action.grasp for action in self._action(state, num_actions=num_actions) ] class FullyConvolutionalGraspingPolicyParallelJaw( FullyConvolutionalGraspingPolicy): """Parallel jaw grasp sampling policy using the FC-GQ-CNN.""" def __init__(self, cfg, filters=None): """ Parameters ---------- cfg : dict Python dictionary of policy configuration parameters. filters : dict Python dictionary of functions to apply to filter invalid grasps. """ FullyConvolutionalGraspingPolicy.__init__(self, cfg, filters=filters) self._gripper_width = self._cfg["gripper_width"] # Depth sampling parameters. self._num_depth_bins = self._cfg["num_depth_bins"] self._depth_offset = 0.0 # Hard offset from the workspace. if "depth_offset" in self._cfg: self._depth_offset = self._cfg["depth_offset"] def _sample_depths(self, raw_depth_im, raw_seg): """Sample depths from the raw depth image.""" max_depth = np.max(raw_depth_im) + self._depth_offset # For sampling the min depth, we only sample from the portion of the # depth image in the object segmask because sometimes the rim of the # bin is not properly subtracted out of the depth image. raw_depth_im_segmented = np.ones_like(raw_depth_im) raw_depth_im_segmented[np.where(raw_seg > 0)] = raw_depth_im[np.where( raw_seg > 0)] min_depth = np.min(raw_depth_im_segmented) + self._depth_offset depth_bin_width = (max_depth - min_depth) / self._num_depth_bins depths = np.zeros((self._num_depth_bins, 1)) for i in range(self._num_depth_bins): depths[i][0] = min_depth + (i * depth_bin_width + depth_bin_width / 2) return depths def _get_actions(self, preds, ind, images, depths, camera_intr, num_actions): """Generate the actions to be returned.""" actions = [] # TODO(vsatish): These should use the max angle instead. ang_bin_width = GeneralConstants.PI / preds.shape[-1] for i in range(num_actions): im_idx = ind[i, 0] h_idx = ind[i, 1] w_idx = ind[i, 2] ang_idx = ind[i, 3] center = Point( np.asarray([ w_idx * self._gqcnn_stride + self._gqcnn_recep_w // 2, h_idx * self._gqcnn_stride + self._gqcnn_recep_h // 2 ])) ang = GeneralConstants.PI / 2 - (ang_idx * ang_bin_width + ang_bin_width / 2) depth = depths[im_idx, 0] grasp = Grasp2D(center, ang, depth, width=self._gripper_width, camera_intr=camera_intr) grasp_action = GraspAction(grasp, preds[im_idx, h_idx, w_idx, ang_idx], DepthImage(images[im_idx])) actions.append(grasp_action) return actions def _gen_images_and_depths(self, depth, segmask): """Replicate the depth image and sample corresponding depths.""" depths = self._sample_depths(depth, segmask) images = np.tile(np.asarray([depth]), (self._num_depth_bins, 1, 1, 1)) return images, depths def _visualize_3d(self, actions, wrapped_depth_im, camera_intr, num_actions): """Visualize the actions in 3D.""" raise NotImplementedError def _visualize_affordance_map(self, preds, depth_im): """Visualize an affordance map of the network predictions overlayed on the depth image.""" raise NotImplementedError class FullyConvolutionalGraspingPolicySuction(FullyConvolutionalGraspingPolicy ): """Suction grasp sampling policy using the FC-GQ-CNN.""" def _get_actions(self, preds, ind, images, depths, camera_intr, num_actions): """Generate the actions to be returned.""" depth_im = DepthImage(images[0], frame=camera_intr.frame) point_cloud_im = camera_intr.deproject_to_image(depth_im) normal_cloud_im = point_cloud_im.normal_cloud_im() actions = [] for i in range(num_actions): im_idx = ind[i, 0] h_idx = ind[i, 1] w_idx = ind[i, 2] center = Point( np.asarray([ w_idx * self._gqcnn_stride + self._gqcnn_recep_w // 2, h_idx * self._gqcnn_stride + self._gqcnn_recep_h // 2 ])) axis = -normal_cloud_im[center.y, center.x] if np.linalg.norm(axis) == 0: continue depth = depth_im[center.y, center.x, 0] if depth == 0.0: continue grasp = SuctionPoint2D(center, axis=axis, depth=depth, camera_intr=camera_intr) grasp_action = GraspAction(grasp, preds[im_idx, h_idx, w_idx, 0], DepthImage(images[im_idx])) actions.append(grasp_action) return actions def _visualize_affordance_map(self, preds, depth_im, scale, plot_max=True, output_dir=None): """Visualize an affordance map of the network predictions overlayed on the depth image.""" self._logger.info("Visualizing affordance map...") affordance_map = preds[0, ..., 0] tf_depth_im = depth_im.crop( depth_im.shape[0] - self._gqcnn_recep_h, depth_im.shape[1] - self._gqcnn_recep_w).resize(1.0 / self._gqcnn_stride) # Plot. vis.figure() vis.imshow(tf_depth_im) plt.imshow(affordance_map, cmap=plt.cm.RdYlGn, alpha=0.3, vmin=0.0, vmax=1.0) if plot_max: affordance_argmax = np.unravel_index(np.argmax(affordance_map), affordance_map.shape) plt.scatter(affordance_argmax[1], affordance_argmax[0], c="black", marker=".", s=scale * 25) vis.title("Grasp Affordance Map") if output_dir is not None: vis.savefig(os.path.join(output_dir, "grasp_affordance_map.png")) else: vis.show() def _gen_images_and_depths(self, depth, segmask): """Extend the image to a 4D tensor.""" # TODO(vsatish): Depth should be made an optional input to the network. return np.expand_dims(depth, 0), np.array([-1]) def _visualize_3d(self, actions, wrapped_depth_im, camera_intr, num_actions): """Visualize the actions in 3D.""" raise NotImplementedError ================================================ FILE: gqcnn/grasping/policy/policy.py ================================================ # -*- coding: utf-8 -*- """ Copyright ©2017. The Regents of the University of California (Regents). All Rights Reserved. Permission to use, copy, modify, and distribute this software and its documentation for educational, research, and not-for-profit purposes, without fee and without a signed licensing agreement, is hereby granted, provided that the above copyright notice, this paragraph and the following two paragraphs appear in all copies, modifications, and distributions. Contact The Office of Technology Licensing, UC Berkeley, 2150 Shattuck Avenue, Suite 510, Berkeley, CA 94720-1620, (510) 643-7201, otl@berkeley.edu, http://ipira.berkeley.edu/industry-info for commercial licensing opportunities. IN NO EVENT SHALL REGENTS BE LIABLE TO ANY PARTY FOR DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, INCLUDING LOST PROFITS, ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN IF REGENTS HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. REGENTS SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE AND ACCOMPANYING DOCUMENTATION, IF ANY, PROVIDED HEREUNDER IS PROVIDED "AS IS". REGENTS HAS NO OBLIGATION TO PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. Grasping policies. Author ------ Jeff Mahler """ from abc import ABC, abstractmethod import copy import pickle as pkl import math import os from time import time import matplotlib.pyplot as plt import numpy as np import scipy.ndimage.filters as snf import scipy.stats as ss from sklearn.mixture import GaussianMixture import autolab_core.utils as utils from autolab_core import (Point, Logger, BinaryImage, ColorImage, DepthImage, RgbdImage, SegmentationImage, CameraIntrinsics) from visualization import Visualizer2D as vis from ..constraint_fn import GraspConstraintFnFactory from ..grasp import Grasp2D, SuctionPoint2D, MultiSuctionPoint2D from ..grasp_quality_function import (GraspQualityFunctionFactory, GQCnnQualityFunction) from ..image_grasp_sampler import ImageGraspSamplerFactory from ...utils import GeneralConstants, NoValidGraspsException class RgbdImageState(object): """State to encapsulate RGB-D images.""" def __init__(self, rgbd_im, camera_intr, segmask=None, obj_segmask=None, fully_observed=None): """ Parameters ---------- rgbd_im : :obj:`autolab_core.RgbdImage` An RGB-D image to plan grasps on. camera_intr : :obj:`autolab_core.CameraIntrinsics` Intrinsics of the RGB-D camera. segmask : :obj:`autolab_core.BinaryImage` Segmentation mask for the image. obj_segmask : :obj:`autolab_core.SegmentationImage` Segmentation mask for the different objects in the image. full_observed : :obj:`object` Representation of the fully observed state. """ self.rgbd_im = rgbd_im self.camera_intr = camera_intr self.segmask = segmask self.obj_segmask = obj_segmask self.fully_observed = fully_observed def save(self, save_dir): """Save to a directory. Parameters ---------- save_dir : str The directory to save to. """ if not os.path.exists(save_dir): os.mkdir(save_dir) color_image_filename = os.path.join(save_dir, "color.png") depth_image_filename = os.path.join(save_dir, "depth.npy") camera_intr_filename = os.path.join(save_dir, "camera.intr") segmask_filename = os.path.join(save_dir, "segmask.npy") obj_segmask_filename = os.path.join(save_dir, "obj_segmask.npy") state_filename = os.path.join(save_dir, "state.pkl") self.rgbd_im.color.save(color_image_filename) self.rgbd_im.depth.save(depth_image_filename) self.camera_intr.save(camera_intr_filename) if self.segmask is not None: self.segmask.save(segmask_filename) if self.obj_segmask is not None: self.obj_segmask.save(obj_segmask_filename) if self.fully_observed is not None: pkl.dump(self.fully_observed, open(state_filename, "wb")) @staticmethod def load(save_dir): """Load an :obj:`RGBDImageState`. Parameters ---------- save_dir : str The directory to load from. """ if not os.path.exists(save_dir): raise ValueError("Directory %s does not exist!" % (save_dir)) color_image_filename = os.path.join(save_dir, "color.png") depth_image_filename = os.path.join(save_dir, "depth.npy") camera_intr_filename = os.path.join(save_dir, "camera.intr") segmask_filename = os.path.join(save_dir, "segmask.npy") obj_segmask_filename = os.path.join(save_dir, "obj_segmask.npy") state_filename = os.path.join(save_dir, "state.pkl") camera_intr = CameraIntrinsics.load(camera_intr_filename) color = ColorImage.open(color_image_filename, frame=camera_intr.frame) depth = DepthImage.open(depth_image_filename, frame=camera_intr.frame) segmask = None if os.path.exists(segmask_filename): segmask = BinaryImage.open(segmask_filename, frame=camera_intr.frame) obj_segmask = None if os.path.exists(obj_segmask_filename): obj_segmask = SegmentationImage.open(obj_segmask_filename, frame=camera_intr.frame) fully_observed = None if os.path.exists(state_filename): fully_observed = pkl.load(open(state_filename, "rb")) return RgbdImageState(RgbdImage.from_color_and_depth(color, depth), camera_intr, segmask=segmask, obj_segmask=obj_segmask, fully_observed=fully_observed) class GraspAction(object): """Action to encapsulate grasps. """ def __init__(self, grasp, q_value, image=None, policy_name=None): """ Parameters ---------- grasp : :obj`Grasp2D` or :obj:`SuctionPoint2D` 2D grasp to wrap. q_value : float Grasp quality. image : :obj:`autolab_core.DepthImage` Depth image corresponding to grasp. policy_name : str Policy name. """ self.grasp = grasp self.q_value = q_value self.image = image self.policy_name = policy_name def save(self, save_dir): """Save grasp action. Parameters ---------- save_dir : str Directory to save the grasp action to. """ if not os.path.exists(save_dir): os.mkdir(save_dir) grasp_filename = os.path.join(save_dir, "grasp.pkl") q_value_filename = os.path.join(save_dir, "pred_robustness.pkl") image_filename = os.path.join(save_dir, "tf_image.npy") pkl.dump(self.grasp, open(grasp_filename, "wb")) pkl.dump(self.q_value, open(q_value_filename, "wb")) if self.image is not None: self.image.save(image_filename) @staticmethod def load(save_dir): """Load a saved grasp action. Parameters ---------- save_dir : str Directory of the saved grasp action. Returns ------- :obj:`GraspAction` Loaded grasp action. """ if not os.path.exists(save_dir): raise ValueError("Directory %s does not exist!" % (save_dir)) grasp_filename = os.path.join(save_dir, "grasp.pkl") q_value_filename = os.path.join(save_dir, "pred_robustness.pkl") image_filename = os.path.join(save_dir, "tf_image.npy") grasp = pkl.load(open(grasp_filename, "rb")) q_value = pkl.load(open(q_value_filename, "rb")) image = None if os.path.exists(image_filename): image = DepthImage.open(image_filename) return GraspAction(grasp, q_value, image) class Policy(ABC): """Abstract policy class.""" def __call__(self, state): """Execute the policy on a state.""" return self.action(state) @abstractmethod def action(self, state): """Returns an action for a given state.""" pass class GraspingPolicy(Policy): """Policy for robust grasping with Grasp Quality Convolutional Neural Networks (GQ-CNN).""" def __init__(self, config, init_sampler=True): """ Parameters ---------- config : dict Python dictionary of parameters for the policy. init_sampler : bool Whether or not to initialize the grasp sampler. Notes ----- Required configuration parameters are specified in Other Parameters. Other Parameters ---------------- sampling : dict Dictionary of parameters for grasp sampling, see `gqcnn/grasping/image_grasp_sampler.py`. gqcnn_model : str String path to a trained GQ-CNN model. """ # Store parameters. self._config = config self._gripper_width = 0.05 if "gripper_width" in config: self._gripper_width = config["gripper_width"] # Set the logging dir and possibly log file. self._logging_dir = None log_file = None if "logging_dir" in self.config: self._logging_dir = self.config["logging_dir"] if not os.path.exists(self._logging_dir): os.makedirs(self._logging_dir) log_file = os.path.join(self._logging_dir, "policy.log") # Setup logger. self._logger = Logger.get_logger(self.__class__.__name__, log_file=log_file, global_log_file=True) # Init grasp sampler. if init_sampler: self._sampling_config = config["sampling"] self._sampling_config["gripper_width"] = self._gripper_width if "crop_width" in config["metric"] and "crop_height" in config[ "metric"]: pad = max( math.ceil( np.sqrt(2) * (config["metric"]["crop_width"] / 2)), math.ceil( np.sqrt(2) * (config["metric"]["crop_height"] / 2))) self._sampling_config["min_dist_from_boundary"] = pad self._sampling_config["gripper_width"] = self._gripper_width sampler_type = self._sampling_config["type"] self._grasp_sampler = ImageGraspSamplerFactory.sampler( sampler_type, self._sampling_config) # Init constraint function. self._grasp_constraint_fn = None if "constraints" in self._config: self._constraint_config = self._config["constraints"] constraint_type = self._constraint_config["type"] self._grasp_constraint_fn = GraspConstraintFnFactory.constraint_fn( constraint_type, self._constraint_config) # Init grasp quality function. self._metric_config = config["metric"] metric_type = self._metric_config["type"] self._grasp_quality_fn = GraspQualityFunctionFactory.quality_function( metric_type, self._metric_config) @property def config(self): """Returns the policy configuration parameters. Returns ------- dict Python dictionary of the policy configuration parameters. """ return self._config @property def grasp_sampler(self): """Returns the grasp sampler. Returns ------- :obj:`gqcnn.grasping.image_grasp_sampler.ImageGraspSampler` The grasp sampler. """ return self._grasp_sampler @property def grasp_quality_fn(self): """Returns the grasp quality function. Returns ------- :obj:`gqcnn.grasping.grasp_quality_function.GraspQualityFunction` The grasp quality function. """ return self._grasp_quality_fn @property def grasp_constraint_fn(self): """Returns the grasp constraint function. Returns ------- :obj:`gqcnn.grasping.constraint_fn.GraspConstraintFn` The grasp contraint function. """ return self._grasp_constraint_fn @property def gqcnn(self): """Returns the GQ-CNN. Returns ------- :obj:`gqcnn.model.tf.GQCNNTF` The GQ-CNN model. """ return self._gqcnn def set_constraint_fn(self, constraint_fn): """Sets the grasp constraint function. Parameters ---------- constraint_fn : :obj`gqcnn.grasping.constraint_fn.GraspConstraintFn` The grasp contraint function. """ self._grasp_constraint_fn = constraint_fn def action(self, state): """Returns an action for a given state. Parameters ---------- state : :obj:`RgbdImageState` The RGB-D image state to plan grasps on. Returns ------- :obj:`GraspAction` The planned grasp action. """ # Save state. if self._logging_dir is not None: policy_id = utils.gen_experiment_id() policy_dir = os.path.join(self._logging_dir, "policy_output_%s" % (policy_id)) while os.path.exists(policy_dir): policy_id = utils.gen_experiment_id() policy_dir = os.path.join(self._logging_dir, "policy_output_%s" % (policy_id)) self._policy_dir = policy_dir os.mkdir(self._policy_dir) state_dir = os.path.join(self._policy_dir, "state") state.save(state_dir) # Plan action. action = self._action(state) # Save action. if self._logging_dir is not None: action_dir = os.path.join(self._policy_dir, "action") action.save(action_dir) return action @abstractmethod def _action(self, state): """Returns an action for a given state. """ pass def show(self, filename=None, dpi=100): """Show a figure. Parameters ---------- filename : str File to save figure to. dpi : int Dpi of figure. """ if self._logging_dir is None: vis.show() else: filename = os.path.join(self._policy_dir, filename) vis.savefig(filename, dpi=dpi) class UniformRandomGraspingPolicy(GraspingPolicy): """Returns a grasp uniformly at random.""" def __init__(self, config): """ Parameters ---------- config : dict Python dictionary of policy configuration parameters. filters : dict Python dictionary of functions to apply to filter invalid grasps. """ GraspingPolicy.__init__(self, config) self._num_grasp_samples = 1 self._grasp_center_std = 0.0 if "grasp_center_std" in config: self._grasp_center_std = config["grasp_center_std"] def _action(self, state): """Plans the grasp with the highest probability of success on the given RGB-D image. Attributes ---------- state : :obj:`RgbdImageState` Image to plan grasps on. Returns ------- :obj:`GraspAction` Grasp to execute. """ # Check valid input. if not isinstance(state, RgbdImageState): raise ValueError("Must provide an RGB-D image state.") # Parse state. rgbd_im = state.rgbd_im camera_intr = state.camera_intr segmask = state.segmask # Sample grasps. grasps = self._grasp_sampler.sample( rgbd_im, camera_intr, self._num_grasp_samples, segmask=segmask, visualize=self.config["vis"]["grasp_sampling"], constraint_fn=self._grasp_constraint_fn, seed=None) num_grasps = len(grasps) if num_grasps == 0: self._logger.warning("No valid grasps could be found") raise NoValidGraspsException() # Set grasp. grasp = grasps[0] # Perturb grasp. if self._grasp_center_std > 0.0: grasp_center_rv = ss.multivariate_normal( grasp.center.data, cov=self._grasp_center_std**2) grasp.center.data = grasp_center_rv.rvs(size=1)[0] # Form tensors. return GraspAction(grasp, 0.0, state.rgbd_im.depth) class RobustGraspingPolicy(GraspingPolicy): """Samples a set of grasp candidates in image space, ranks the grasps by the predicted probability of success from a GQ-CNN and returns the grasp with the highest probability of success.""" def __init__(self, config, filters=None): """ Parameters ---------- config : dict Python dictionary of policy configuration parameters. filters : dict Python dictionary of functions to apply to filter invalid grasps. Notes ----- Required configuration dictionary parameters are specified in Other Parameters. Other Parameters ---------------- num_grasp_samples : int Number of grasps to sample. gripper_width : float, optional Width of the gripper in meters. logging_dir : str, optional Directory in which to save the sampled grasps and input images. """ GraspingPolicy.__init__(self, config) self._parse_config() self._filters = filters def _parse_config(self): """Parses the parameters of the policy.""" self._num_grasp_samples = self.config["sampling"]["num_grasp_samples"] self._max_grasps_filter = 1 if "max_grasps_filter" in self.config: self._max_grasps_filter = self.config["max_grasps_filter"] self._gripper_width = np.inf if "gripper_width" in self.config: self._gripper_width = self.config["gripper_width"] def select(self, grasps, q_value): """Selects the grasp with the highest probability of success. Can override for alternate policies (e.g. epsilon greedy). Parameters ---------- grasps : list Python list of :obj:`gqcnn.grasping.Grasp2D` or :obj:`gqcnn.grasping.SuctionPoint2D` grasps to select from. q_values : list Python list of associated q-values. Returns ------- :obj:`gqcnn.grasping.Grasp2D` or :obj:`gqcnn.grasping.SuctionPoint2D` Grasp with highest probability of success . """ # Sort grasps. num_grasps = len(grasps) grasps_and_predictions = zip(np.arange(num_grasps), q_value) grasps_and_predictions = sorted(grasps_and_predictions, key=lambda x: x[1], reverse=True) # Return top grasps. if self._filters is None: return grasps_and_predictions[0][0] # Filter grasps. self._logger.info("Filtering grasps") i = 0 while i < self._max_grasps_filter and i < len(grasps_and_predictions): index = grasps_and_predictions[i][0] grasp = grasps[index] valid = True for filter_name, is_valid in self._filters.items(): valid = is_valid(grasp) self._logger.debug("Grasp {} filter {} valid: {}".format( i, filter_name, valid)) if not valid: valid = False break if valid: return index i += 1 raise NoValidGraspsException("No grasps satisfied filters") def _action(self, state): """Plans the grasp with the highest probability of success on the given RGB-D image. Attributes ---------- state : :obj:`RgbdImageState` Image to plan grasps on. Returns ------- :obj:`GraspAction` Grasp to execute. """ # Check valid input. if not isinstance(state, RgbdImageState): raise ValueError("Must provide an RGB-D image state.") # Parse state. rgbd_im = state.rgbd_im camera_intr = state.camera_intr segmask = state.segmask # Sample grasps. grasps = self._grasp_sampler.sample( rgbd_im, camera_intr, self._num_grasp_samples, segmask=segmask, visualize=self.config["vis"]["grasp_sampling"], constraint_fn=self._grasp_constraint_fn, seed=None) num_grasps = len(grasps) if num_grasps == 0: self._logger.warning("No valid grasps could be found") raise NoValidGraspsException() # Compute grasp quality. compute_start = time() q_values = self._grasp_quality_fn(state, grasps, params=self._config) self._logger.debug("Grasp evaluation took %.3f sec" % (time() - compute_start)) if self.config["vis"]["grasp_candidates"]: # Display each grasp on the original image, colored by predicted # success. norm_q_values = (q_values - np.min(q_values)) / (np.max(q_values) - np.min(q_values)) vis.figure(size=(GeneralConstants.FIGSIZE, GeneralConstants.FIGSIZE)) vis.imshow(rgbd_im.depth, vmin=self.config["vis"]["vmin"], vmax=self.config["vis"]["vmax"]) for grasp, q in zip(grasps, norm_q_values): vis.grasp(grasp, scale=1.0, grasp_center_size=10, show_center=False, show_axis=True, color=plt.cm.RdYlBu(q)) vis.title("Sampled grasps") filename = None if self._logging_dir is not None: filename = os.path.join(self._logging_dir, "grasp_candidates.png") vis.show(filename) # Select grasp. index = self.select(grasps, q_values) grasp = grasps[index] q_value = q_values[index] if self.config["vis"]["grasp_plan"]: vis.figure() vis.imshow(rgbd_im.depth, vmin=self.config["vis"]["vmin"], vmax=self.config["vis"]["vmax"]) vis.grasp(grasp, scale=2.0, show_axis=True) vis.title("Best Grasp: d=%.3f, q=%.3f" % (grasp.depth, q_value)) vis.show() return GraspAction(grasp, q_value, state.rgbd_im.depth) class CrossEntropyRobustGraspingPolicy(GraspingPolicy): """Optimizes a set of grasp candidates in image space using the cross entropy method. Cross entropy method (CEM): (1) sample an initial set of candidates (2) sort the candidates (3) fit a GMM to the top P% (4) re-sample grasps from the distribution (5) repeat steps 2-4 for K iters (6) return the best candidate from the final sample set """ def __init__(self, config, filters=None): """ Parameters ---------- config : dict Python dictionary of policy configuration parameters. filters : dict Python dictionary of functions to apply to filter invalid grasps. Notes ----- Required configuration dictionary parameters are specified in Other Parameters. Other Parameters ---------------- num_seed_samples : int Number of candidate to sample in the initial set. num_gmm_samples : int Number of candidates to sample on each resampling from the GMMs. num_iters : int Number of sample-and-refit iterations of CEM. gmm_refit_p : float Top p-% of grasps used for refitting. gmm_component_frac : float Percentage of the elite set size used to determine number of GMM components. gmm_reg_covar : float Regularization parameters for GMM covariance matrix, enforces diversity of fitted distributions. deterministic : bool, optional Whether to set the random seed to enforce deterministic behavior. gripper_width : float, optional Width of the gripper in meters. """ GraspingPolicy.__init__(self, config) self._parse_config() self._filters = filters self._case_counter = 0 def _parse_config(self): """Parses the parameters of the policy.""" # Cross entropy method parameters. self._num_seed_samples = self.config["num_seed_samples"] self._num_gmm_samples = self.config["num_gmm_samples"] self._num_iters = self.config["num_iters"] self._gmm_refit_p = self.config["gmm_refit_p"] self._gmm_component_frac = self.config["gmm_component_frac"] self._gmm_reg_covar = self.config["gmm_reg_covar"] self._depth_gaussian_sigma = 0.0 if "depth_gaussian_sigma" in self.config: self._depth_gaussian_sigma = self.config["depth_gaussian_sigma"] self._max_grasps_filter = 1 if "max_grasps_filter" in self.config: self._max_grasps_filter = self.config["max_grasps_filter"] self._max_resamples_per_iteration = 100 if "max_resamples_per_iteration" in self.config: self._max_resamples_per_iteration = self.config[ "max_resamples_per_iteration"] self._max_approach_angle = np.inf if "max_approach_angle" in self.config: self._max_approach_angle = np.deg2rad( self.config["max_approach_angle"]) # Gripper parameters. self._seed = None if self.config["deterministic"]: self._seed = GeneralConstants.SEED self._gripper_width = np.inf if "gripper_width" in self.config: self._gripper_width = self.config["gripper_width"] # Affordance map visualization. self._vis_grasp_affordance_map = False if "grasp_affordance_map" in self.config["vis"]: self._vis_grasp_affordance_map = self.config["vis"][ "grasp_affordance_map"] self._state_counter = 0 # Used for logging state data. def select(self, grasps, q_values): """Selects the grasp with the highest probability of success. Can override for alternate policies (e.g. epsilon greedy). Parameters ---------- grasps : list Python list of :obj:`gqcnn.grasping.Grasp2D` or :obj:`gqcnn.grasping.SuctionPoint2D` grasps to select from. q_values : list Python list of associated q-values. Returns ------- :obj:`gqcnn.grasping.Grasp2D` or :obj:`gqcnn.grasping.SuctionPoint2D` Grasp with highest probability of success. """ # Sort. self._logger.info("Sorting grasps") num_grasps = len(grasps) if num_grasps == 0: raise NoValidGraspsException("Zero grasps") grasps_and_predictions = zip(np.arange(num_grasps), q_values) grasps_and_predictions = sorted(grasps_and_predictions, key=lambda x: x[1], reverse=True) # Return top grasps. if self._filters is None: return grasps_and_predictions[0][0] # Filter grasps. self._logger.info("Filtering grasps") i = 0 while i < self._max_grasps_filter and i < len(grasps_and_predictions): index = grasps_and_predictions[i][0] grasp = grasps[index] valid = True for filter_name, is_valid in self._filters.items(): valid = is_valid(grasp) self._logger.debug("Grasp {} filter {} valid: {}".format( i, filter_name, valid)) if not valid: valid = False break if valid: return index i += 1 raise NoValidGraspsException("No grasps satisfied filters") def _mask_predictions(self, pred_map, segmask): self._logger.info("Masking predictions...") seg_pred_mismatch_msg = ("Prediction map shape {} does not match shape" " of segmask {}.") assert pred_map.shape == segmask.shape, seg_pred_mismatch_msg.format( pred_map.shape, segmask.shape) preds_masked = np.zeros_like(pred_map) nonzero_ind = np.where(segmask > 0) preds_masked[nonzero_ind] = pred_map[nonzero_ind] return preds_masked def _gen_grasp_affordance_map(self, state, stride=1): self._logger.info("Generating grasp affordance map...") # Generate grasps at points to evaluate (this is just the interface to # `GraspQualityFunction`). crop_candidate_start_time = time() point_cloud_im = state.camera_intr.deproject_to_image( state.rgbd_im.depth) normal_cloud_im = point_cloud_im.normal_cloud_im() q_vals = [] gqcnn_recep_h_half = self._grasp_quality_fn.gqcnn_recep_height // 2 gqcnn_recep_w_half = self._grasp_quality_fn.gqcnn_recep_width // 2 im_h = state.rgbd_im.height im_w = state.rgbd_im.width for i in range(gqcnn_recep_h_half - 1, im_h - gqcnn_recep_h_half, stride): grasps = [] for j in range(gqcnn_recep_w_half - 1, im_w - gqcnn_recep_w_half, stride): # TODO(vsatish): Find a better way to find policy type. if self.config["sampling"]["type"] == "suction": grasps.append( SuctionPoint2D(Point(np.array([j, i])), axis=-normal_cloud_im[i, j], depth=state.rgbd_im.depth[i, j], camera_intr=state.camera_intr)) else: raise NotImplementedError( "Parallel Jaw Grasp Affordance Maps Not Supported!") q_vals.extend(self._grasp_quality_fn(state, grasps)) self._logger.info( "Generating crop grasp candidates took {} sec.".format( time() - crop_candidate_start_time)) # Mask out predictions not in the segmask (we don't really care about # them). pred_map = np.array(q_vals).reshape( (im_h - gqcnn_recep_h_half * 2) // stride + 1, (im_w - gqcnn_recep_w_half * 2) // stride + 1) # TODO(vsatish): Don't access the raw data like this! tf_segmask = state.segmask.crop(im_h - gqcnn_recep_h_half * 2, im_w - gqcnn_recep_w_half * 2).resize( 1.0 / stride, interp="nearest")._data.squeeze() if tf_segmask.shape != pred_map.shape: new_tf_segmask = np.zeros_like(pred_map) smaller_i = min(pred_map.shape[0], tf_segmask.shape[0]) smaller_j = min(pred_map.shape[1], tf_segmask.shape[1]) new_tf_segmask[:smaller_i, :smaller_j] = tf_segmask[:smaller_i, : smaller_j] tf_segmask = new_tf_segmask pred_map_masked = self._mask_predictions(pred_map, tf_segmask) return pred_map_masked def _plot_grasp_affordance_map(self, state, affordance_map, stride=1, grasps=None, q_values=None, plot_max=True, title=None, scale=1.0, save_fname=None, save_path=None): gqcnn_recep_h_half = self._grasp_quality_fn.gqcnn_recep_height // 2 gqcnn_recep_w_half = self._grasp_quality_fn.gqcnn_recep_width // 2 im_h = state.rgbd_im.height im_w = state.rgbd_im.width # Plot. vis.figure() tf_depth_im = state.rgbd_im.depth.crop( im_h - gqcnn_recep_h_half * 2, im_w - gqcnn_recep_w_half * 2).resize(1.0 / stride, interp="nearest") vis.imshow(tf_depth_im) plt.imshow(affordance_map, cmap=plt.cm.RdYlGn, alpha=0.3) if grasps is not None: grasps = copy.deepcopy(grasps) for grasp, q in zip(grasps, q_values): grasp.center.data[0] -= gqcnn_recep_w_half grasp.center.data[1] -= gqcnn_recep_h_half vis.grasp(grasp, scale=scale, show_center=False, show_axis=True, color=plt.cm.RdYlGn(q)) if plot_max: affordance_argmax = np.unravel_index(np.argmax(affordance_map), affordance_map.shape) plt.scatter(affordance_argmax[1], affordance_argmax[0], c="black", marker=".", s=scale * 25) if title is not None: vis.title(title) if save_path is not None: save_path = os.path.join(save_path, save_fname) vis.show(save_path) def action_set(self, state): """Plan a set of grasps with the highest probability of success on the given RGB-D image. Parameters ---------- state : :obj:`RgbdImageState` Image to plan grasps on. Returns ------- python list of :obj:`gqcnn.grasping.Grasp2D` or :obj:`gqcnn.grasping.SuctionPoint2D` # noqa E501 Grasps to execute. """ # Check valid input. if not isinstance(state, RgbdImageState): raise ValueError("Must provide an RGB-D image state.") state_output_dir = None if self._logging_dir is not None: state_output_dir = os.path.join( self._logging_dir, "state_{}".format(str(self._state_counter).zfill(5))) if not os.path.exists(state_output_dir): os.makedirs(state_output_dir) self._state_counter += 1 # Parse state. seed_set_start = time() rgbd_im = state.rgbd_im depth_im = rgbd_im.depth camera_intr = state.camera_intr segmask = state.segmask if self._depth_gaussian_sigma > 0: depth_im_filtered = depth_im.apply( snf.gaussian_filter, sigma=self._depth_gaussian_sigma) else: depth_im_filtered = depth_im point_cloud_im = camera_intr.deproject_to_image(depth_im_filtered) normal_cloud_im = point_cloud_im.normal_cloud_im() # Visualize grasp affordance map. if self._vis_grasp_affordance_map: grasp_affordance_map = self._gen_grasp_affordance_map(state) self._plot_grasp_affordance_map(state, grasp_affordance_map, title="Grasp Affordance Map", save_fname="affordance_map.png", save_path=state_output_dir) if "input_images" in self.config["vis"] and self.config["vis"][ "input_images"]: vis.figure() vis.subplot(1, 2, 1) vis.imshow(depth_im) vis.title("Depth") vis.subplot(1, 2, 2) vis.imshow(segmask) vis.title("Segmask") filename = None if self._logging_dir is not None: filename = os.path.join(self._logging_dir, "input_images.png") vis.show(filename) # Sample grasps. self._logger.info("Sampling seed set") grasps = self._grasp_sampler.sample( rgbd_im, camera_intr, self._num_seed_samples, segmask=segmask, visualize=self.config["vis"]["grasp_sampling"], constraint_fn=self._grasp_constraint_fn, seed=self._seed) num_grasps = len(grasps) if num_grasps == 0: self._logger.warning("No valid grasps could be found") raise NoValidGraspsException() grasp_type = "parallel_jaw" if isinstance(grasps[0], SuctionPoint2D): grasp_type = "suction" elif isinstance(grasps[0], MultiSuctionPoint2D): grasp_type = "multi_suction" self._logger.info("Sampled %d grasps" % (len(grasps))) self._logger.info("Computing the seed set took %.3f sec" % (time() - seed_set_start)) # Iteratively refit and sample. for j in range(self._num_iters): self._logger.info("CEM iter %d" % (j)) # Predict grasps. predict_start = time() q_values = self._grasp_quality_fn(state, grasps, params=self._config) self._logger.info("Prediction took %.3f sec" % (time() - predict_start)) # Sort grasps. resample_start = time() q_values_and_indices = zip(q_values, np.arange(num_grasps)) q_values_and_indices = sorted(q_values_and_indices, key=lambda x: x[0], reverse=True) if self.config["vis"]["grasp_candidates"]: # Display each grasp on the original image, colored by # predicted success. # norm_q_values = ((q_values - np.min(q_values)) / # (np.max(q_values) - np.min(q_values))) norm_q_values = q_values title = "Sampled Grasps Iter %d" % (j) if self._vis_grasp_affordance_map: self._plot_grasp_affordance_map( state, grasp_affordance_map, grasps=grasps, q_values=norm_q_values, scale=2.0, title=title, save_fname="cem_iter_{}.png".format(j), save_path=state_output_dir) display_grasps_and_q_values = zip(grasps, q_values) display_grasps_and_q_values = sorted( display_grasps_and_q_values, key=lambda x: x[1]) vis.figure(size=(GeneralConstants.FIGSIZE, GeneralConstants.FIGSIZE)) vis.imshow(rgbd_im.depth, vmin=self.config["vis"]["vmin"], vmax=self.config["vis"]["vmax"]) for grasp, q in display_grasps_and_q_values: vis.grasp(grasp, scale=2.0, jaw_width=2.0, show_center=False, show_axis=True, color=plt.cm.RdYlBu(q)) vis.title("Sampled grasps iter %d" % (j)) filename = None if self._logging_dir is not None: filename = os.path.join(self._logging_dir, "cem_iter_%d.png" % (j)) vis.show(filename) # Fit elite set. elite_start = time() num_refit = max(int(np.ceil(self._gmm_refit_p * num_grasps)), 1) elite_q_values = [i[0] for i in q_values_and_indices[:num_refit]] elite_grasp_indices = [ i[1] for i in q_values_and_indices[:num_refit] ] elite_grasps = [grasps[i] for i in elite_grasp_indices] elite_grasp_arr = np.array([g.feature_vec for g in elite_grasps]) if self.config["vis"]["elite_grasps"]: # Display each grasp on the original image, colored by # predicted success. norm_q_values = (elite_q_values - np.min(elite_q_values)) / ( np.max(elite_q_values) - np.min(elite_q_values)) vis.figure(size=(GeneralConstants.FIGSIZE, GeneralConstants.FIGSIZE)) vis.imshow(rgbd_im.depth, vmin=self.config["vis"]["vmin"], vmax=self.config["vis"]["vmax"]) for grasp, q in zip(elite_grasps, norm_q_values): vis.grasp(grasp, scale=1.5, show_center=False, show_axis=True, color=plt.cm.RdYlBu(q)) vis.title("Elite grasps iter %d" % (j)) filename = None if self._logging_dir is not None: filename = os.path.join(self._logging_dir, "elite_set_iter_%d.png" % (j)) vis.show(filename) # Normalize elite set. elite_grasp_mean = np.mean(elite_grasp_arr, axis=0) elite_grasp_std = np.std(elite_grasp_arr, axis=0) elite_grasp_std[elite_grasp_std == 0] = 1e-6 elite_grasp_arr = (elite_grasp_arr - elite_grasp_mean) / elite_grasp_std self._logger.info("Elite set computation took %.3f sec" % (time() - elite_start)) # Fit a GMM to the top samples. num_components = max( int(np.ceil(self._gmm_component_frac * num_refit)), 1) uniform_weights = (1.0 / num_components) * np.ones(num_components) gmm = GaussianMixture(n_components=num_components, weights_init=uniform_weights, reg_covar=self._gmm_reg_covar) train_start = time() gmm.fit(elite_grasp_arr) self._logger.info("GMM fitting with %d components took %.3f sec" % (num_components, time() - train_start)) # Sample the next grasps. grasps = [] loop_start = time() num_tries = 0 while (len(grasps) < self._num_gmm_samples and num_tries < self._max_resamples_per_iteration): # Sample from GMM. sample_start = time() grasp_vecs, _ = gmm.sample(n_samples=self._num_gmm_samples) grasp_vecs = elite_grasp_std * grasp_vecs + elite_grasp_mean self._logger.info("GMM sampling took %.3f sec" % (time() - sample_start)) # Convert features to grasps and store if in segmask. for k, grasp_vec in enumerate(grasp_vecs): feature_start = time() if grasp_type == "parallel_jaw": # Form grasp object. grasp = Grasp2D.from_feature_vec( grasp_vec, width=self._gripper_width, camera_intr=camera_intr) elif grasp_type == "suction": # Read depth and approach axis. u = int(min(max(grasp_vec[1], 0), depth_im.height - 1)) v = int(min(max(grasp_vec[0], 0), depth_im.width - 1)) grasp_depth = depth_im[u, v, 0] # Approach axis. grasp_axis = -normal_cloud_im[u, v] # Form grasp object. grasp = SuctionPoint2D.from_feature_vec( grasp_vec, camera_intr=camera_intr, depth=grasp_depth, axis=grasp_axis) elif grasp_type == "multi_suction": # Read depth and approach axis. u = int(min(max(grasp_vec[1], 0), depth_im.height - 1)) v = int(min(max(grasp_vec[0], 0), depth_im.width - 1)) grasp_depth = depth_im[u, v] # Approach_axis. grasp_axis = -normal_cloud_im[u, v] # Form grasp object. grasp = MultiSuctionPoint2D.from_feature_vec( grasp_vec, camera_intr=camera_intr, depth=grasp_depth, axis=grasp_axis) self._logger.debug("Feature vec took %.5f sec" % (time() - feature_start)) bounds_start = time() # Check in bounds. if (state.segmask is None or (grasp.center.y >= 0 and grasp.center.y < state.segmask.height and grasp.center.x >= 0 and grasp.center.x < state.segmask.width and np.any(state.segmask[int(grasp.center.y), int(grasp.center.x)] != 0) and grasp.approach_angle < self._max_approach_angle) and (self._grasp_constraint_fn is None or self._grasp_constraint_fn(grasp))): # Check validity according to filters. grasps.append(grasp) self._logger.debug("Bounds took %.5f sec" % (time() - bounds_start)) num_tries += 1 # Check num grasps. num_grasps = len(grasps) if num_grasps == 0: self._logger.warning("No valid grasps could be found") raise NoValidGraspsException() self._logger.info("Resample loop took %.3f sec" % (time() - loop_start)) self._logger.info("Resampling took %.3f sec" % (time() - resample_start)) # Predict final set of grasps. predict_start = time() q_values = self._grasp_quality_fn(state, grasps, params=self._config) self._logger.info("Final prediction took %.3f sec" % (time() - predict_start)) if self.config["vis"]["grasp_candidates"]: # Display each grasp on the original image, colored by predicted # success. # norm_q_values = ((q_values - np.min(q_values)) / # (np.max(q_values) - np.min(q_values))) norm_q_values = q_values title = "Final Sampled Grasps" if self._vis_grasp_affordance_map: self._plot_grasp_affordance_map( state, grasp_affordance_map, grasps=grasps, q_values=norm_q_values, scale=2.0, title=title, save_fname="final_sampled_grasps.png", save_path=state_output_dir) display_grasps_and_q_values = zip(grasps, q_values) display_grasps_and_q_values = sorted(display_grasps_and_q_values, key=lambda x: x[1]) vis.figure(size=(GeneralConstants.FIGSIZE, GeneralConstants.FIGSIZE)) vis.imshow(rgbd_im.depth, vmin=self.config["vis"]["vmin"], vmax=self.config["vis"]["vmax"]) for grasp, q in display_grasps_and_q_values: vis.grasp(grasp, scale=2.0, jaw_width=2.0, show_center=False, show_axis=True, color=plt.cm.RdYlBu(q)) vis.title("Sampled grasps iter %d" % (j)) filename = None if self._logging_dir is not None: filename = os.path.join(self._logging_dir, "cem_iter_%d.png" % (j)) vis.show(filename) return grasps, q_values def _action(self, state): """Plans the grasp with the highest probability of success on the given RGB-D image. Attributes ---------- state : :obj:`RgbdImageState` Image to plan grasps on. Returns ------- :obj:`GraspAction` Grasp to execute. """ # Parse state. rgbd_im = state.rgbd_im depth_im = rgbd_im.depth # camera_intr = state.camera_intr # segmask = state.segmask # Plan grasps. grasps, q_values = self.action_set(state) # Select grasp. index = self.select(grasps, q_values) grasp = grasps[index] q_value = q_values[index] if self.config["vis"]["grasp_plan"]: title = "Best Grasp: d=%.3f, q=%.3f" % (grasp.depth, q_value) vis.figure() vis.imshow(depth_im, vmin=self.config["vis"]["vmin"], vmax=self.config["vis"]["vmax"]) vis.grasp(grasp, scale=5.0, show_center=False, show_axis=True, jaw_width=1.0, grasp_axis_width=0.2) vis.title(title) filename = None if self._logging_dir is not None: filename = os.path.join(self._logging_dir, "planned_grasp.png") vis.show(filename) # Form return image. image = depth_im if isinstance(self._grasp_quality_fn, GQCnnQualityFunction): image_arr, _ = self._grasp_quality_fn.grasps_to_tensors([grasp], state) image = DepthImage(image_arr[0, ...], frame=rgbd_im.frame) # Return action. action = GraspAction(grasp, q_value, image) return action class QFunctionRobustGraspingPolicy(CrossEntropyRobustGraspingPolicy): """Optimizes a set of antipodal grasp candidates in image space using the cross entropy method with a GQ-CNN that estimates the Q-function for use in Q-learning. Notes ----- Required configuration parameters are specified in Other Parameters. Other Parameters ---------------- reinit_pc1 : bool Whether or not to reinitialize the pc1 layer of the GQ-CNN. reinit_fc3: bool Whether or not to reinitialize the fc3 layer of the GQ-CNN. reinit_fc4: bool Whether or not to reinitialize the fc4 layer of the GQ-CNN. reinit_fc5: bool Whether or not to reinitialize the fc5 layer of the GQ-CNN. num_seed_samples : int Number of candidate to sample in the initial set. num_gmm_samples : int Number of candidates to sample on each resampling from the GMMs. num_iters : int Number of sample-and-refit iterations of CEM. gmm_refit_p : float Top p-% of grasps used for refitting. gmm_component_frac : float Percentage of the elite set size used to determine number of GMM components. gmm_reg_covar : float Regularization parameters for GMM covariance matrix, enforces diversity of fitted distributions. deterministic : bool, optional Whether to set the random seed to enforce deterministic behavior. gripper_width : float, optional Width of the gripper in meters. """ def __init__(self, config): CrossEntropyRobustGraspingPolicy.__init__(self, config) QFunctionRobustGraspingPolicy._parse_config(self) self._setup_gqcnn() def _parse_config(self): """Parses the parameters of the policy.""" self._reinit_pc1 = self.config["reinit_pc1"] self._reinit_fc3 = self.config["reinit_fc3"] self._reinit_fc4 = self.config["reinit_fc4"] self._reinit_fc5 = self.config["reinit_fc5"] def _setup_gqcnn(self): """Sets up the GQ-CNN.""" # Close existing session (from superclass initializer). self.gqcnn.close_session() # Check valid output size. if self.gqcnn.fc5_out_size != 1 and not self._reinit_fc5: raise ValueError("Q function must return scalar values") # Reinitialize layers if self._reinit_fc5: self.gqcnn.fc5_out_size = 1 # TODO(jmahler): Implement reinitialization of pc0. self.gqcnn.reinitialize_layers(self._reinit_fc3, self._reinit_fc4, self._reinit_fc5) self.gqcnn.initialize_network(add_softmax=False) class EpsilonGreedyQFunctionRobustGraspingPolicy(QFunctionRobustGraspingPolicy ): """Optimizes a set of antipodal grasp candidates in image space using the cross entropy method with a GQ-CNN that estimates the Q-function for use in Q-learning, and chooses a random antipodal grasp with probability epsilon. Notes ----- Required configuration parameters are specified in Other Parameters. Other Parameters ---------------- epsilon : float """ def __init__(self, config): QFunctionRobustGraspingPolicy.__init__(self, config) self._parse_config() def _parse_config(self): """Parses the parameters of the policy.""" self._epsilon = self.config["epsilon"] @property def epsilon(self): return self._epsilon @epsilon.setter def epsilon(self, val): self._epsilon = val def greedy_action(self, state): """Plans the grasp with the highest probability of success on the given RGB-D image. Attributes ---------- state : :obj:`RgbdImageState` Image to plan grasps on. Returns ------- :obj:`GraspAction` Grasp to execute. """ return CrossEntropyRobustGraspingPolicy.action(self, state) def _action(self, state): """Plans the grasp with the highest probability of success on the given RGB-D image. Attributes ---------- state : :obj:`RgbdImageState` Image to plan grasps on. Returns ------- :obj:`GraspAction` Grasp to execute. """ # Take the greedy action with prob 1-epsilon. if np.random.rand() > self.epsilon: self._logger.debug("Taking greedy action") return CrossEntropyRobustGraspingPolicy.action(self, state) # Otherwise take a random action. self._logger.debug("Taking random action") # Check valid input. if not isinstance(state, RgbdImageState): raise ValueError("Must provide an RGB-D image state.") # Parse state. rgbd_im = state.rgbd_im camera_intr = state.camera_intr segmask = state.segmask # Sample random antipodal grasps. grasps = self._grasp_sampler.sample( rgbd_im, camera_intr, self._num_seed_samples, segmask=segmask, visualize=self.config["vis"]["grasp_sampling"], constraint_fn=self._grasp_constraint_fn, seed=self._seed) num_grasps = len(grasps) if num_grasps == 0: self._logger.warning("No valid grasps could be found") raise NoValidGraspsException() # Choose a grasp uniformly at random. grasp_ind = np.random.choice(num_grasps, size=1)[0] grasp = grasps[grasp_ind] depth = grasp.depth # Create transformed image. image_tensor, pose_tensor = self.grasps_to_tensors([grasp], state) image = DepthImage(image_tensor[0, ...]) # Predict prob success. output_arr = self.gqcnn.predict(image_tensor, pose_tensor) q_value = output_arr[0, -1] # Visualize planned grasp. if self.config["vis"]["grasp_plan"]: scale_factor = self.gqcnn.im_width / self._crop_width scaled_camera_intr = camera_intr.resize(scale_factor) vis_grasp = Grasp2D(Point(image.center), 0.0, depth, width=self._gripper_width, camera_intr=scaled_camera_intr) vis.figure() vis.imshow(image) vis.grasp(vis_grasp, scale=1.5, show_center=False, show_axis=True) vis.title("Best Grasp: d=%.3f, q=%.3f" % (depth, q_value)) vis.show() # Return action. return GraspAction(grasp, q_value, image) class CompositeGraspingPolicy(Policy): """Grasping policy composed of multiple sub-policies. Attributes ---------- policies : dict mapping str to `gqcnn.GraspingPolicy` Key-value dict mapping policy names to grasping policies. """ def __init__(self, policies): self._policies = policies self._logger = Logger.get_logger(self.__class__.__name__, log_file=None, global_log_file=True) @property def policies(self): return self._policies def subpolicy(self, name): return self._policies[name] def set_constraint_fn(self, constraint_fn): for policy in self._policies: policy.set_constraint_fn(constraint_fn) class PriorityCompositeGraspingPolicy(CompositeGraspingPolicy): def __init__(self, policies, priority_list): # Check validity. for name in priority_list: if str(name) not in policies: raise ValueError( "Policy named %s is not in the list of policies!" % (name)) self._priority_list = priority_list CompositeGraspingPolicy.__init__(self, policies) @property def priority_list(self): return self._priority_list def action(self, state, policy_subset=None, min_q_value=-1.0): """Returns an action for a given state. """ action = None i = 0 max_q = min_q_value while action is None or (max_q <= min_q_value and i < len(self._priority_list)): name = self._priority_list[i] if policy_subset is not None and name not in policy_subset: i += 1 continue self._logger.info("Planning action for sub-policy {}".format(name)) try: action = self.policies[name].action(state) action.policy_name = name max_q = action.q_value except NoValidGraspsException: pass i += 1 if action is None: raise NoValidGraspsException() return action def action_set(self, state, policy_subset=None, min_q_value=-1.0): """Returns an action for a given state. """ actions = None q_values = None i = 0 max_q = min_q_value while actions is None or (max_q <= min_q_value and i < len(self._priority_list)): name = self._priority_list[i] if policy_subset is not None and name not in policy_subset: i += 1 continue self._logger.info( "Planning action set for sub-policy {}".format(name)) try: actions, q_values = self.policies[name].action_set(state) for action in actions: action.policy_name = name max_q = np.max(q_values) except NoValidGraspsException: pass i += 1 if actions is None: raise NoValidGraspsException() return actions, q_values class GreedyCompositeGraspingPolicy(CompositeGraspingPolicy): def __init__(self, policies): CompositeGraspingPolicy.__init__(self, policies) def action(self, state, policy_subset=None, min_q_value=-1.0): """Returns an action for a given state.""" # Compute all possible actions. actions = [] for name, policy in self.policies.items(): if policy_subset is not None and name not in policy_subset: continue try: action = policy.action(state) action.policy_name = name actions.append() except NoValidGraspsException: pass if len(actions) == 0: raise NoValidGraspsException() # Rank based on q value. actions = sorted(actions, key=lambda x: x.q_value, reverse=True) return actions[0] def action_set(self, state, policy_subset=None, min_q_value=-1.0): """Returns an action for a given state.""" actions = [] q_values = [] for name, policy in self.policies.items(): if policy_subset is not None and name not in policy_subset: continue try: action_set, q_vals = self.policies[name].action_set(state) for action in action_set: action.policy_name = name actions.extend(action_set) q_values.extend(q_vals) except NoValidGraspsException: continue if actions is None: raise NoValidGraspsException() return actions, q_values ================================================ FILE: gqcnn/model/__init__.py ================================================ # -*- coding: utf-8 -*- """ Copyright ©2017. The Regents of the University of California (Regents). All Rights Reserved. Permission to use, copy, modify, and distribute this software and its documentation for educational, research, and not-for-profit purposes, without fee and without a signed licensing agreement, is hereby granted, provided that the above copyright notice, this paragraph and the following two paragraphs appear in all copies, modifications, and distributions. Contact The Office of Technology Licensing, UC Berkeley, 2150 Shattuck Avenue, Suite 510, Berkeley, CA 94720-1620, (510) 643-7201, otl@berkeley.edu, http://ipira.berkeley.edu/industry-info for commercial licensing opportunities. IN NO EVENT SHALL REGENTS BE LIABLE TO ANY PARTY FOR DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, INCLUDING LOST PROFITS, ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN IF REGENTS HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. REGENTS SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE AND ACCOMPANYING DOCUMENTATION, IF ANY, PROVIDED HEREUNDER IS PROVIDED "AS IS". REGENTS HAS NO OBLIGATION TO PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. Factory functions to obtain `GQCNN`/`FCGQCNN` class based on backend. Author: Vishal Satish """ from .tf import GQCNNTF, FCGQCNNTF from autolab_core import Logger def get_gqcnn_model(backend="tf", verbose=True): """Get the GQ-CNN model for the provided backend. Note: Currently only TensorFlow is supported. Parameters ---------- backend : str The backend to use, currently only "tf" is supported. verbose : bool Whether or not to log initialization output to `stdout`. Returns ------- :obj:`gqcnn.model.tf.GQCNNTF` GQ-CNN model with TensorFlow backend. """ # Set up logger. logger = Logger.get_logger("GQCNNModelFactory", silence=(not verbose)) # Return desired GQ-CNN instance based on backend. if backend == "tf": logger.info("Initializing GQ-CNN with Tensorflow as backend...") return GQCNNTF else: raise ValueError("Invalid backend: {}".format(backend)) def get_fc_gqcnn_model(backend="tf", verbose=True): """Get the FC-GQ-CNN model for the provided backend. Note: Currently only TensorFlow is supported. Parameters ---------- backend : str The backend to use, currently only "tf" is supported. verbose : bool Whether or not to log initialization output to `stdout`. Returns ------- :obj:`gqcnn.model.tf.FCGQCNNTF` FC-GQ-CNN model with TensorFlow backend. """ # Set up logger. logger = Logger.get_logger("FCGQCNNModelFactory", silence=(not verbose)) # Return desired Fully-Convolutional GQ-CNN instance based on backend. if backend == "tf": logger.info("Initializing FC-GQ-CNN with Tensorflow as backend...") return FCGQCNNTF else: raise ValueError("Invalid backend: {}".format(backend)) ================================================ FILE: gqcnn/model/tf/__init__.py ================================================ # -*- coding: utf-8 -*- """ Copyright ©2017. The Regents of the University of California (Regents). All Rights Reserved. Permission to use, copy, modify, and distribute this software and its documentation for educational, research, and not-for-profit purposes, without fee and without a signed licensing agreement, is hereby granted, provided that the above copyright notice, this paragraph and the following two paragraphs appear in all copies, modifications, and distributions. Contact The Office of Technology Licensing, UC Berkeley, 2150 Shattuck Avenue, Suite 510, Berkeley, CA 94720-1620, (510) 643-7201, otl@berkeley.edu, http://ipira.berkeley.edu/industry-info for commercial licensing opportunities. IN NO EVENT SHALL REGENTS BE LIABLE TO ANY PARTY FOR DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, INCLUDING LOST PROFITS, ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN IF REGENTS HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. REGENTS SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE AND ACCOMPANYING DOCUMENTATION, IF ANY, PROVIDED HEREUNDER IS PROVIDED "AS IS". REGENTS HAS NO OBLIGATION TO PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. """ from .network_tf import GQCNNTF from .fc_network_tf import FCGQCNNTF __all__ = ["GQCNNTF", "FCGQCNNTF"] ================================================ FILE: gqcnn/model/tf/fc_network_tf.py ================================================ # -*- coding: utf-8 -*- """ Copyright ©2017. The Regents of the University of California (Regents). All Rights Reserved. Permission to use, copy, modify, and distribute this software and its documentation for educational, research, and not-for-profit purposes, without fee and without a signed licensing agreement, is hereby granted, provided that the above copyright notice, this paragraph and the following two paragraphs appear in all copies, modifications, and distributions. Contact The Office of Technology Licensing, UC Berkeley, 2150 Shattuck Avenue, Suite 510, Berkeley, CA 94720-1620, (510) 643-7201, otl@berkeley.edu, http://ipira.berkeley.edu/industry-info for commercial licensing opportunities. IN NO EVENT SHALL REGENTS BE LIABLE TO ANY PARTY FOR DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, INCLUDING LOST PROFITS, ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN IF REGENTS HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. REGENTS SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE AND ACCOMPANYING DOCUMENTATION, IF ANY, PROVIDED HEREUNDER IS PROVIDED "AS IS". REGENTS HAS NO OBLIGATION TO PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. FC-GQ-CNN network implemented in Tensorflow. Author ------ Vishal Satish """ from collections import OrderedDict import json import os import tensorflow as tf from .network_tf import GQCNNTF from ...utils import TrainingMode, InputDepthMode, GQCNNFilenames class FCGQCNNTF(GQCNNTF): """FC-GQ-CNN network implemented in Tensorflow. Note ---- FC-GQ-CNNs are never directly trained, but instead a pre-trained GQ-CNN is converted to an FC-GQ-CNN at inference time. """ def __init__(self, gqcnn_config, fc_config, verbose=True, log_file=None): """ Parameters ---------- gqcnn_config : dict Python dictionary of pre-trained GQ-CNN model configuration parameters. fc_config : dict Python dictionary of FC-GQ-CNN model configuration parameters. verbose : bool Whether or not to log model output to `stdout`. log_file : str If provided, model output will also be logged to this file. """ super(FCGQCNNTF, self).__init__(gqcnn_config, log_file=log_file) super(FCGQCNNTF, self)._parse_config(gqcnn_config) # We call the child `_parse_config` again (even though it gets called # in the parent `GQCNNTF` constructor) because the call to the parent # `_parse_config` overwrites the first call. self._parse_config(fc_config) # Check that conv layers of GQ-CNN were trained with VALID padding. for layer_name, layer_config in self._architecture["im_stream"].items( ): if layer_config["type"] == "conv": invalid_pad_msg = ("GQ-CNN used for FC-GQ-CNN must have VALID" " padding for conv layers. Found layer: {}" " with padding: {}") assert layer_config["pad"] == "VALID", invalid_pad_msg.format( layer_name, layer_config["pad"]) @staticmethod def load(model_dir, fc_config, log_file=None): """Load an FC-GQ-CNN from a pre-trained GQ-CNN. Parameters ---------- model_dir : str Path to pre-trained GQ-CNN model. fc_config : dict Python dictionary of FC-GQ-CNN model configuration parameters. log_file : str If provided, model output will also be logged to this file. Returns ------- :obj:`FCGQCNNTF` Initialized FC-GQ-CNN. """ config_file = os.path.join(model_dir, GQCNNFilenames.SAVED_CFG) with open(config_file) as data_file: train_config = json.load(data_file, object_pairs_hook=OrderedDict) gqcnn_config = train_config["gqcnn"] # Initialize weights and Tensorflow network. fcgqcnn = FCGQCNNTF(gqcnn_config, fc_config, log_file=log_file) fcgqcnn.init_weights_file( os.path.join(model_dir, GQCNNFilenames.FINAL_MODEL)) fcgqcnn.init_mean_and_std(model_dir) training_mode = train_config["training_mode"] if training_mode == TrainingMode.CLASSIFICATION: fcgqcnn.initialize_network(add_softmax=True) elif training_mode == TrainingMode.REGRESSION: fcgqcnn.initialize_network() else: raise ValueError("Invalid training mode: {}".format(training_mode)) return fcgqcnn def _parse_config(self, cfg): # Override GQ-CNN image height and width. self._im_width = cfg["im_width"] self._im_height = cfg["im_height"] def _pack(self, dim_h, dim_w, data, vector=False): if vector: # First reshape vector into 3-dimensional tensor. reshaped = tf.reshape(data, tf.concat([[1, 1], tf.shape(data)], 0)) # Then tile into tensor of shape dim_h x dim_w x `data.dim0`. packed = tf.tile(reshaped, [dim_h, dim_w, 1]) else: # First reshape second dimension of tensor into 3-dimensional # tensor. reshaped = tf.reshape( data, tf.concat([tf.shape(data)[0:1], [1, 1], tf.shape(data)[1:]], 0)) # Then tile into tensor of shape # bsize=1 x dim_h x dim_w x `data.dim1`. packed = tf.tile(reshaped, [1, dim_h, dim_w, 1]) return packed def _build_fully_conv_layer(self, input_node, filter_dim, fc_name, final_fc_layer=False): self._logger.info( "Converting fc layer {} to fully convolutional...".format(fc_name)) # Create new set of weights by reshaping fully connected layer weights. fcW = self._weights.weights["{}_weights".format(fc_name)] convW = tf.Variable(tf.reshape( fcW, tf.concat([[filter_dim, filter_dim], [tf.shape(fcW)[0] // (filter_dim * filter_dim)], tf.shape(fcW)[1:]], 0)), name="{}_fully_conv_weights".format(fc_name)) self._weights.weights["{}_fully_conv_weights".format(fc_name)] = convW # Get bias. convb = self._weights.weights["{}_bias".format(fc_name)] # Compute conv out (note that we use padding="VALID" here because we # want an output size of 1 x 1 x num_filts for the original input # size). convh = tf.nn.conv2d(input_node, convW, strides=[1, 1, 1, 1], padding="VALID") # Pack bias into tensor of shape `tf.shape(convh)`. # TODO(vsatish): This is unnecessary and can be removed for potential # performance improvements. bias_packed = self._pack(tf.shape(convh)[1], tf.shape(convh)[2], convb, vector=True) # Add bias term. convh = convh + bias_packed # Apply activation. if not final_fc_layer: convh = self._leaky_relu(convh, alpha=self._relu_coeff) # Add output to feature_dict. self._feature_tensors[fc_name] = convh return convh def _build_fully_conv_merge_layer(self, input_node_im, input_node_pose, filter_dim, fc_name): self._logger.info( "Converting fc merge layer {} to fully convolutional...".format( fc_name)) # Create new set of weights for image stream by reshaping # fully-connected layer weights. fcW_im = self._weights.weights["{}_input_1_weights".format(fc_name)] convW = tf.Variable(tf.reshape( fcW_im, tf.concat([[filter_dim, filter_dim], [tf.shape(fcW_im)[0] // (filter_dim * filter_dim)], tf.shape(fcW_im)[1:]], 0)), name="{}_im_fully_conv_weights".format(fc_name)) self._weights.weights["{}_im_fully_conv_weights".format( fc_name)] = convW # Compute im stream conv out (note that we use padding="VALID" here # because we want an output size of 1 x 1 x num_filts for the original # input size). convh_im = tf.nn.conv2d(input_node_im, convW, strides=[1, 1, 1, 1], padding="VALID") # Get pose stream fully-connected weights. fcW_pose = self._weights.weights["{}_input_2_weights".format(fc_name)] # Compute matmul for pose stream. pose_out = tf.matmul(input_node_pose, fcW_pose) # Pack pose_out into a tensor of shape=tf.shape(convh_im). # TODO(vsatish): This is unnecessary and can be removed for potential # performance improvements. pose_packed = self._pack( tf.shape(convh_im)[1], tf.shape(convh_im)[2], pose_out) # Add the im and pose tensors. convh = convh_im + pose_packed # Pack bias. # TODO(vsatish): This is unnecessary and can be removed for potential # performance improvements. fc_bias = self._weights.weights["{}_bias".format(fc_name)] bias_packed = self._pack(tf.shape(convh_im)[1], tf.shape(convh_im)[2], fc_bias, vector=True) # Add bias and apply activation. convh = self._leaky_relu(convh + bias_packed, alpha=self._relu_coeff) return convh def _build_im_stream(self, input_node, input_pose_node, input_height, input_width, input_channels, drop_rate, layers, only_stream=False): self._logger.info("Building Image Stream...") if self._input_depth_mode == InputDepthMode.SUB: sub_mean = tf.constant(self._im_depth_sub_mean, dtype=tf.float32) sub_std = tf.constant(self._im_depth_sub_std, dtype=tf.float32) sub_im = tf.subtract( input_node, tf.tile( tf.reshape(input_pose_node, tf.constant((-1, 1, 1, 1))), tf.constant((1, input_height, input_width, 1)))) norm_sub_im = tf.div(tf.subtract(sub_im, sub_mean), sub_std) input_node = norm_sub_im output_node = input_node prev_layer = "start" # Dummy placeholder. filter_dim = self._train_im_width last_index = len(layers) - 1 for layer_index, (layer_name, layer_config) in enumerate(layers.items()): layer_type = layer_config["type"] if layer_type == "conv": if prev_layer == "fc": raise ValueError("Cannot have conv layer after fc layer!") output_node, input_height, input_width, input_channels = \ self._build_conv_layer( output_node, input_height, input_width, input_channels, layer_config["filt_dim"], layer_config["filt_dim"], layer_config["num_filt"], layer_config["pool_stride"], layer_config["pool_stride"], layer_config["pool_size"], layer_name, norm=layer_config["norm"], pad=layer_config["pad"]) prev_layer = layer_type if layer_config["pad"] == "SAME": filter_dim //= layer_config["pool_stride"] else: filter_dim = ((filter_dim - layer_config["filt_dim"]) // layer_config["pool_stride"]) + 1 elif layer_type == "fc": if layer_index == last_index and only_stream: output_node = self._build_fully_conv_layer( output_node, filter_dim, layer_name, final_fc_layer=True) else: output_node = self._build_fully_conv_layer( output_node, filter_dim, layer_name) prev_layer = layer_type # Because fully-convolutional layers at this point in the # network have a filter_dim of 1. filter_dim = 1 elif layer_type == "pc": raise ValueError( "Cannot have pose connected layer in image stream!") elif layer_type == "fc_merge": raise ValueError("Cannot have merge layer in image stream!") else: raise ValueError( "Unsupported layer type: {}".format(layer_type)) return output_node, -1 def _build_merge_stream(self, input_stream_1, input_stream_2, fan_in_1, fan_in_2, drop_rate, layers): self._logger.info("Building Merge Stream...") # First check if first layer is a merge layer. if layers[list(layers)[0]]["type"] != "fc_merge": raise ValueError( "First layer in merge stream must be of type fc_merge!") last_index = len(layers) - 1 # Because fully-convolutional layers at this point in the network have # a filter_dim of 1. filter_dim = 1 fan_in = -1 output_node = None # Will be overridden. for layer_index, (layer_name, layer_config) in enumerate(layers.items()): layer_type = layer_config["type"] if layer_type == "conv": raise ValueError("Cannot have conv layer in merge stream!") elif layer_type == "fc": if layer_index == last_index: output_node = self._build_fully_conv_layer( output_node, filter_dim, layer_name, final_fc_layer=True) else: output_node = self._build_fully_conv_layer( output_node, filter_dim, layer_name) elif layer_type == "pc": raise ValueError( "Cannot have pose connected layer in merge stream!") elif layer_type == "fc_merge": output_node = self._build_fully_conv_merge_layer( input_stream_1, input_stream_2, filter_dim, layer_name) else: raise ValueError( "Unsupported layer type: {}".format(layer_type)) return output_node, fan_in ================================================ FILE: gqcnn/model/tf/network_tf.py ================================================ # -*- coding: utf-8 -*- """ Copyright ©2017. The Regents of the University of California (Regents). All Rights Reserved. Permission to use, copy, modify, and distribute this software and its documentation for educational, research, and not-for-profit purposes, without fee and without a signed licensing agreement, is hereby granted, provided that the above copyright notice, this paragraph and the following two paragraphs appear in all copies, modifications, and distributions. Contact The Office of Technology Licensing, UC Berkeley, 2150 Shattuck Avenue, Suite 510, Berkeley, CA 94720-1620, (510) 643-7201, otl@berkeley.edu, http://ipira.berkeley.edu/industry-info for commercial licensing opportunities. IN NO EVENT SHALL REGENTS BE LIABLE TO ANY PARTY FOR DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, INCLUDING LOST PROFITS, ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN IF REGENTS HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. REGENTS SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE AND ACCOMPANYING DOCUMENTATION, IF ANY, PROVIDED HEREUNDER IS PROVIDED "AS IS". REGENTS HAS NO OBLIGATION TO PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. GQ-CNN network implemented in Tensorflow. Author ------ Vishal Satish & Jeff Mahler """ from collections import OrderedDict import errno from functools import reduce import json import math import operator import os import time import numpy as np import tensorflow as tf import tensorflow.contrib.framework as tcf from autolab_core import Logger from ...utils import (reduce_shape, read_pose_data, pose_dim, weight_name_to_layer_name, GripperMode, TrainingMode, InputDepthMode, GQCNNFilenames) class GQCNNWeights(object): """Helper struct for storing network weights.""" def __init__(self): self.weights = {} class GQCNNTF(object): """GQ-CNN network implemented in Tensorflow.""" def __init__(self, gqcnn_config, verbose=True, log_file=None): """ Parameters ---------- gqcnn_config : dict Python dictionary of model configuration parameters. verbose : bool Whether or not to log model output to `stdout`. log_file : str If provided, model output will also be logged to this file. """ self._sess = None self._graph = tf.Graph() # Set up logger. self._logger = Logger.get_logger(self.__class__.__name__, log_file=log_file, silence=(not verbose), global_log_file=verbose) self._weights = GQCNNWeights() self._parse_config(gqcnn_config) @staticmethod def load(model_dir, verbose=True, log_file=None): """Instantiate a trained GQ-CNN for fine-tuning or inference. Parameters ---------- model_dir : str Path to trained GQ-CNN model. verbose : bool Whether or not to log model output to `stdout`. log_file : str If provided, model output will also be logged to this file. Returns ------- :obj:`GQCNNTF` Initialized GQ-CNN. """ config_file = os.path.join(model_dir, GQCNNFilenames.SAVED_CFG) with open(config_file) as data_file: train_config = json.load(data_file, object_pairs_hook=OrderedDict) # Support for legacy configs. try: gqcnn_config = train_config["gqcnn"] except KeyError: gqcnn_config = train_config["gqcnn_config"] # Convert old networks to new flexible arch format. gqcnn_config["debug"] = 0 gqcnn_config["seed"] = 0 # Legacy networks had no angular support. gqcnn_config["num_angular_bins"] = 0 # Legacy networks only supported depth integration through pose # stream. gqcnn_config["input_depth_mode"] = InputDepthMode.POSE_STREAM arch_config = gqcnn_config["architecture"] if "im_stream" not in arch_config: new_arch_config = OrderedDict() new_arch_config["im_stream"] = OrderedDict() new_arch_config["pose_stream"] = OrderedDict() new_arch_config["merge_stream"] = OrderedDict() layer_name = "conv1_1" new_arch_config["im_stream"][layer_name] = arch_config[ layer_name] new_arch_config["im_stream"][layer_name]["type"] = "conv" new_arch_config["im_stream"][layer_name]["pad"] = "SAME" if "padding" in arch_config[layer_name]: new_arch_config["im_stream"][layer_name][ "pad"] = arch_config[layer_name]["padding"] layer_name = "conv1_2" new_arch_config["im_stream"][layer_name] = arch_config[ layer_name] new_arch_config["im_stream"][layer_name]["type"] = "conv" new_arch_config["im_stream"][layer_name]["pad"] = "SAME" if "padding" in arch_config[layer_name]: new_arch_config["im_stream"][layer_name][ "pad"] = arch_config[layer_name]["padding"] layer_name = "conv2_1" new_arch_config["im_stream"][layer_name] = arch_config[ layer_name] new_arch_config["im_stream"][layer_name]["type"] = "conv" new_arch_config["im_stream"][layer_name]["pad"] = "SAME" if "padding" in arch_config[layer_name]: new_arch_config["im_stream"][layer_name][ "pad"] = arch_config[layer_name]["padding"] layer_name = "conv2_2" new_arch_config["im_stream"][layer_name] = arch_config[ layer_name] new_arch_config["im_stream"][layer_name]["type"] = "conv" new_arch_config["im_stream"][layer_name]["pad"] = "SAME" if "padding" in arch_config[layer_name]: new_arch_config["im_stream"][layer_name][ "pad"] = arch_config[layer_name]["padding"] layer_name = "conv3_1" if layer_name in arch_config: new_arch_config["im_stream"][layer_name] = arch_config[ layer_name] new_arch_config["im_stream"][layer_name]["type"] = "conv" new_arch_config["im_stream"][layer_name]["pad"] = "SAME" if "padding" in arch_config[layer_name]: new_arch_config["im_stream"][layer_name][ "pad"] = arch_config[layer_name]["padding"] layer_name = "conv3_2" if layer_name in arch_config: new_arch_config["im_stream"][layer_name] = arch_config[ layer_name] new_arch_config["im_stream"][layer_name]["type"] = "conv" new_arch_config["im_stream"][layer_name]["pad"] = "SAME" if "padding" in arch_config[layer_name]: new_arch_config["im_stream"][layer_name][ "pad"] = arch_config[layer_name]["padding"] layer_name = "fc3" new_arch_config["im_stream"][layer_name] = arch_config[ layer_name] new_arch_config["im_stream"][layer_name]["type"] = "fc" layer_name = "pc1" new_arch_config["pose_stream"][layer_name] = arch_config[ layer_name] new_arch_config["pose_stream"][layer_name]["type"] = "pc" layer_name = "pc2" if layer_name in arch_config: new_arch_config["pose_stream"][layer_name] = arch_config[ layer_name] new_arch_config["pose_stream"][layer_name]["type"] = "pc" layer_name = "fc4" new_arch_config["merge_stream"][layer_name] = arch_config[ layer_name] new_arch_config["merge_stream"][layer_name][ "type"] = "fc_merge" layer_name = "fc5" new_arch_config["merge_stream"][layer_name] = arch_config[ layer_name] new_arch_config["merge_stream"][layer_name]["type"] = "fc" gqcnn_config["architecture"] = new_arch_config # Initialize weights and Tensorflow network. gqcnn = GQCNNTF(gqcnn_config, verbose=verbose, log_file=log_file) gqcnn.init_weights_file( os.path.join(model_dir, GQCNNFilenames.FINAL_MODEL)) gqcnn.init_mean_and_std(model_dir) training_mode = train_config["training_mode"] if training_mode == TrainingMode.CLASSIFICATION: gqcnn.initialize_network(add_softmax=True) elif training_mode == TrainingMode.REGRESSION: gqcnn.initialize_network() else: raise ValueError("Invalid training mode: {}".format(training_mode)) return gqcnn def init_mean_and_std(self, model_dir): """Loads the means and stds of a trained GQ-CNN to use for data normalization during inference. Parameters ---------- model_dir : str Path to trained GQ-CNN model where means and standard deviations are stored. """ # Load in means and stds. if self._input_depth_mode == InputDepthMode.POSE_STREAM: try: self._im_mean = np.load( os.path.join(model_dir, GQCNNFilenames.IM_MEAN)) self._im_std = np.load( os.path.join(model_dir, GQCNNFilenames.IM_STD)) except IOError as e: # Python 2.6,7/3+ compatibility. if e.errno == errno.ENOENT: # File not found. # Support for legacy file naming convention. self._im_mean = np.load( os.path.join(model_dir, GQCNNFilenames.LEG_MEAN)) self._im_std = np.load( os.path.join(model_dir, GQCNNFilenames.LEG_STD)) else: # Some other IOError. raise e self._pose_mean = np.load( os.path.join(model_dir, GQCNNFilenames.POSE_MEAN)) self._pose_std = np.load( os.path.join(model_dir, GQCNNFilenames.POSE_STD)) # Read the certain parts of the pose mean/std that we desire. if len(self._pose_mean.shape ) > 0 and self._pose_mean.shape[0] != self._pose_dim: # Handle multi-dim storage. if len(self._pose_mean.shape ) > 1 and self._pose_mean.shape[1] == self._pose_dim: self._pose_mean = self._pose_mean[0, :] self._pose_std = self._pose_std[0, :] else: self._pose_mean = read_pose_data(self._pose_mean, self._gripper_mode) self._pose_std = read_pose_data(self._pose_std, self._gripper_mode) elif self._input_depth_mode == InputDepthMode.SUB: self._im_depth_sub_mean = np.load( os.path.join(model_dir, GQCNNFilenames.IM_DEPTH_SUB_MEAN)) self._im_depth_sub_std = np.load( os.path.join(model_dir, GQCNNFilenames.IM_DEPTH_SUB_STD)) elif self._input_depth_mode == InputDepthMode.IM_ONLY: self._im_mean = np.load( os.path.join(model_dir, GQCNNFilenames.IM_MEAN)) self._im_std = np.load( os.path.join(model_dir, GQCNNFilenames.IM_STD)) else: raise ValueError("Unsupported input depth mode: {}".format( self._input_depth_mode)) def set_base_network(self, model_dir): """Initialize network weights for the base network. Used during fine-tuning. Parameters ---------- model_dir : str Path to pre-trained GQ-CNN model. """ # Check architecture. if "base_model" not in self._architecture: self._logger.warning( "Architecuture has no base model. The network has not been" " modified.") return False base_model_config = self._architecture["base_model"] output_layer = base_model_config["output_layer"] # Read model. ckpt_file = os.path.join(model_dir, GQCNNFilenames.FINAL_MODEL) config_file = os.path.join(model_dir, GQCNNFilenames.SAVED_ARCH) base_arch = json.load(open(config_file, "r"), object_pairs_hook=OrderedDict) # Read base layer names. self._base_layer_names = [] found_base_layer = False use_legacy = not ("im_stream" in base_arch) if use_legacy: layer_iter = iter(base_arch) while not found_base_layer: layer_name = next(layer_iter) self._base_layer_names.append(layer_name) if layer_name == output_layer: found_base_layer = True else: stream_iter = iter(base_arch) while not found_base_layer: stream_name = next(stream_iter) stream_arch = base_arch[stream_name] layer_iter = iter(stream_arch) stop = False while not found_base_layer and not stop: try: layer_name = next(layer_iter) self._base_layer_names.append(layer_name) if layer_name == output_layer: found_base_layer = True except StopIteration: stop = True with self._graph.as_default(): # Create new tf checkpoint reader. reader = tf.train.NewCheckpointReader(ckpt_file) # Create empty weights object. self._weights = GQCNNWeights() # Read/generate weight/bias variable names. ckpt_vars = tcf.list_variables(ckpt_file) full_var_names = [] short_names = [] for variable, shape in ckpt_vars: full_var_names.append(variable) short_names.append(variable.split("/")[-1]) # Load variables. for full_var_name, short_name in zip(full_var_names, short_names): # Check valid weights. layer_name = weight_name_to_layer_name(short_name) # Add weights. if layer_name in self._base_layer_names: self._weights.weights[short_name] = tf.Variable( reader.get_tensor(full_var_name), name=full_var_name) def init_weights_file(self, ckpt_file): """Load trained GQ-CNN weights. Parameters ---------- ckpt_file : str Tensorflow checkpoint file from which to load model weights. """ with self._graph.as_default(): # Create new tf checkpoint reader. reader = tf.train.NewCheckpointReader(ckpt_file) # Create empty weight object. self._weights = GQCNNWeights() # Read/generate weight/bias variable names. ckpt_vars = tcf.list_variables(ckpt_file) full_var_names = [] short_names = [] for variable, shape in ckpt_vars: full_var_names.append(variable) short_names.append(variable.split("/")[-1]) # Load variables. for full_var_name, short_name in zip(full_var_names, short_names): self._weights.weights[short_name] = tf.Variable( reader.get_tensor(full_var_name), name=full_var_name) def _parse_config(self, gqcnn_config): """Parse configuration file. Parameters ---------- gqcnn_config : dict Python dictionary of model configuration parameters. """ # Parse GQ-CNN config. # Load tensor params. self._batch_size = gqcnn_config["batch_size"] self._train_im_height = gqcnn_config["im_height"] self._train_im_width = gqcnn_config["im_width"] self._im_height = self._train_im_height self._im_width = self._train_im_width self._num_channels = gqcnn_config["im_channels"] try: self._gripper_mode = gqcnn_config["gripper_mode"] except KeyError: # Legacy support. self._input_data_mode = gqcnn_config["input_data_mode"] if self._input_data_mode == "tf_image": self._gripper_mode = GripperMode.LEGACY_PARALLEL_JAW elif self._input_data_mode == "tf_image_suction": self._gripper_mode = GripperMode.LEGACY_SUCTION elif self._input_data_mode == "suction": self._gripper_mode = GripperMode.SUCTION elif self._input_data_mode == "multi_suction": self._gripper_mode = GripperMode.MULTI_SUCTION elif self._input_data_mode == "parallel_jaw": self._gripper_mode = GripperMode.PARALLEL_JAW else: raise ValueError( "Legacy input data mode: {} not supported!".format( self._input_data_mode)) self._logger.warning("Could not read gripper mode. Attempting" " legacy conversion to: {}".format( self._gripper_mode)) # Setup gripper pose dimension depending on gripper mode. self._pose_dim = pose_dim(self._gripper_mode) # Load architecture. self._architecture = gqcnn_config["architecture"] # Get input depth mode. self._input_depth_mode = InputDepthMode.POSE_STREAM # Legacy support. if "input_depth_mode" in gqcnn_config: self._input_depth_mode = gqcnn_config["input_depth_mode"] # Load network local response normalization layer constants. self._normalization_radius = gqcnn_config["radius"] self._normalization_alpha = gqcnn_config["alpha"] self._normalization_beta = gqcnn_config["beta"] self._normalization_bias = gqcnn_config["bias"] # Get ReLU coefficient. self._relu_coeff = 0.0 # Legacy support. if "relu_coeff" in gqcnn_config: self._relu_coeff = gqcnn_config["relu_coeff"] # Debugging. self._debug = gqcnn_config["debug"] self._rand_seed = gqcnn_config["seed"] # Initialize means and standard deviations to be 0 and 1, respectively. if self._input_depth_mode == InputDepthMode.POSE_STREAM: self._im_mean = 0 self._im_std = 1 self._pose_mean = np.zeros(self._pose_dim) self._pose_std = np.ones(self._pose_dim) elif self._input_depth_mode == InputDepthMode.SUB: self._im_depth_sub_mean = 0 self._im_depth_sub_std = 1 elif self._input_depth_mode == InputDepthMode.IM_ONLY: self._im_mean = 0 self._im_std = 1 # Get number of angular bins. self._angular_bins = 0 # Legacy support. if "angular_bins" in gqcnn_config: self._angular_bins = gqcnn_config["angular_bins"] # Get max angle. self._max_angle = np.pi if "max_angle" in gqcnn_config: self._max_angle = np.deg2rad(gqcnn_config["max_angle"]) # If using angular bins, make sure output size of final fully connected # layer is 2x number of angular bins (because of failure/success probs # for each bin). if self._angular_bins > 0: final_out_size = list( list(self._architecture.values())[-1].values())[-1]["out_size"] ang_mismatch_msg = ("When predicting angular outputs, output" " size of final fully connected layer must" " be 2x number of angular bins.") assert final_out_size == 2 * self._angular_bins, ang_mismatch_msg # Intermediate network feature handles. self._feature_tensors = {} # Base layer names for fine-tuning. self._base_layer_names = [] def initialize_network(self, train_im_node=None, train_pose_node=None, add_softmax=False, add_sigmoid=False): """Set up input placeholders and build network. Parameters ---------- train_im_node : :obj:`tf.placeholder` Images for training. train_pose_node : :obj:`tf.placeholder` Poses for training. add_softmax : bool Whether or not to add a softmax layer to output of network. add_sigmoid : bool Whether or not to add a sigmoid layer to output of network. """ with self._graph.as_default(): # Set TF random seed if debugging. if self._debug: tf.set_random_seed(self._rand_seed) # Setup input placeholders. if train_im_node is not None: # Training. self._input_im_node = tf.placeholder_with_default( train_im_node, (None, self._im_height, self._im_width, self._num_channels)) self._input_pose_node = tf.placeholder_with_default( train_pose_node, (None, self._pose_dim)) else: # Inference only using GQ-CNN instantiated from `GQCNNTF.load`. self._input_im_node = tf.placeholder( tf.float32, (self._batch_size, self._im_height, self._im_width, self._num_channels)) self._input_pose_node = tf.placeholder( tf.float32, (self._batch_size, self._pose_dim)) self._input_drop_rate_node = tf.placeholder_with_default( tf.constant(0.0), ()) # Build network. self._output_tensor = self._build_network( self._input_im_node, self._input_pose_node, self._input_drop_rate_node) # Add softmax function to output of network (this is optional # because 1) we might be doing regression or 2) we are training and # Tensorflow has an optimized cross-entropy loss with the softmax # already built-in). if add_softmax: self.add_softmax_to_output() # Add sigmoid function to output of network (for weighted # cross-entropy loss). if add_sigmoid: self.add_sigmoid_to_output() # Create feed tensors for prediction. self._input_im_arr = np.zeros((self._batch_size, self._im_height, self._im_width, self._num_channels)) self._input_pose_arr = np.zeros((self._batch_size, self._pose_dim)) def open_session(self): """Open Tensorflow session.""" if self._sess is not None: self._logger.warning("Found already initialized TF Session...") return self._sess self._logger.info("Initializing TF Session...") with self._graph.as_default(): init = tf.global_variables_initializer() self.tf_config = tf.ConfigProto() # Allow Tensorflow gpu growth so Tensorflow does not lock-up all # GPU memory. self.tf_config.gpu_options.allow_growth = True self._sess = tf.Session(graph=self._graph, config=self.tf_config) self._sess.run(init) return self._sess def close_session(self): """Close Tensorflow session.""" if self._sess is None: self._logger.warning("No TF Session to close...") return self._logger.info("Closing TF Session...") with self._graph.as_default(): self._sess.close() self._sess = None def __del__(self): """Destructor that basically just makes sure the Tensorflow session has been closed.""" if self._sess is not None: self.close_session() @property def input_depth_mode(self): return self._input_depth_mode @property def batch_size(self): return self._batch_size @property def im_height(self): return self._im_height @property def im_width(self): return self._im_width @property def num_channels(self): return self._num_channels @property def pose_dim(self): return self._pose_dim @property def gripper_mode(self): return self._gripper_mode @property def input_im_node(self): return self._input_im_node @property def input_pose_node(self): return self._input_pose_node @property def input_drop_rate_node(self): return self._input_drop_rate_node @property def output(self): return self._output_tensor @property def weights(self): return self._weights.weights @property def tf_graph(self): return self._graph @property def sess(self): return self._sess @property def angular_bins(self): return self._angular_bins @property def max_angle(self): return self._max_angle @property def stride(self): return reduce(operator.mul, [ layer["pool_stride"] for layer in self._architecture["im_stream"].values() if layer["type"] == "conv" ]) @property def filters(self): """Evaluate the filters of the first convolution layer. Returns ------- :obj:`numpy.ndarray` Filters (weights) from first convolution layer of the network. """ close_sess = False if self._sess is None: close_sess = True self.open_session() first_layer_name = list(self._architecture["im_stream"])[0] try: filters = self._sess.run( self._weights.weights["{}_weights".format(first_layer_name)]) except KeyError: # Legacy support. filters = self._sess.run( self._weights.weights["{}W".format(first_layer_name)]) if close_sess: self.close_session() return filters def set_batch_size(self, batch_size): """Update the batch size to be used for during inference. Parameters ---------- batch_size : int The new batch size. """ self._batch_size = batch_size def set_im_mean(self, im_mean): """Update image mean to be used for normalization during inference. Parameters ---------- im_mean : float The new image mean. """ self._im_mean = im_mean def get_im_mean(self): """Get the current image mean used for normalization during inference. Returns ------- : float The image mean. """ return self.im_mean def set_im_std(self, im_std): """Update image standard deviation to be used for normalization during inference. Parameters ---------- im_std : float The new image standard deviation. """ self._im_std = im_std def get_im_std(self): """Get the current image standard deviation to be used for normalization during inference. Returns ------- : float The image standard deviation. """ return self.im_std def set_pose_mean(self, pose_mean): """Update pose mean to be used for normalization during inference. Parameters ---------- pose_mean : :obj:`numpy.ndarray` The new pose mean. """ self._pose_mean = pose_mean def get_pose_mean(self): """Get the current pose mean to be used for normalization during inference. Returns ------- :obj:`numpy.ndarray` The pose mean. """ return self._pose_mean def set_pose_std(self, pose_std): """Update pose standard deviation to be used for normalization during inference. Parameters ---------- pose_std : :obj:`numpy.ndarray` The new pose standard deviation. """ self._pose_std = pose_std def get_pose_std(self): """Get the current pose standard deviation to be used for normalization during inference. Returns ------- :obj:`numpy.ndarray` The pose standard deviation. """ return self._pose_std def set_im_depth_sub_mean(self, im_depth_sub_mean): """Update mean of subtracted image and gripper depth to be used for normalization during inference. Parameters ---------- im_depth_sub_mean : float The new mean of subtracted image and gripper depth. """ self._im_depth_sub_mean = im_depth_sub_mean def set_im_depth_sub_std(self, im_depth_sub_std): """Update standard deviation of subtracted image and gripper depth to be used for normalization during inference. Parameters ---------- im_depth_sub_std : float The standard deviation of subtracted image and gripper depth. """ self._im_depth_sub_std = im_depth_sub_std def add_softmax_to_output(self): """Adds softmax to output of network.""" with tf.name_scope("softmax"): if self._angular_bins > 0: self._logger.info("Building Pair-wise Softmax Layer...") binwise_split_output = tf.split(self._output_tensor, self._angular_bins, axis=-1) binwise_split_output_soft = [ tf.nn.softmax(s) for s in binwise_split_output ] self._output_tensor = tf.concat(binwise_split_output_soft, -1) else: self._logger.info("Building Softmax Layer...") self._output_tensor = tf.nn.softmax(self._output_tensor) def add_sigmoid_to_output(self): """Adds sigmoid to output of network.""" with tf.name_scope("sigmoid"): self._logger.info("Building Sigmoid Layer...") self._output_tensor = tf.nn.sigmoid(self._output_tensor) def update_batch_size(self, batch_size): """Update the inference batch size. Parameters ---------- batch_size : float The new batch size. """ self._batch_size = batch_size def _predict(self, image_arr, pose_arr, verbose=False): """Query predictions from the network. Parameters ---------- image_arr : :obj:`numpy.ndarray` Input images. pose_arr : :obj:`numpy.ndarray` Input gripper poses. verbose : bool Whether or not to log progress to `stdout`, useful to turn off during training. """ # Get prediction start time. start_time = time.time() if verbose: self._logger.info("Predicting...") # Setup for prediction. num_batches = math.ceil(image_arr.shape[0] / self._batch_size) num_images = image_arr.shape[0] num_poses = pose_arr.shape[0] output_arr = None if num_images != num_poses: raise ValueError("Must provide same number of images as poses!") # Predict in batches. with self._graph.as_default(): if self._sess is None: raise RuntimeError( "No TF Session open. Please call open_session() first.") i = 0 batch_idx = 0 while i < num_images: if verbose: self._logger.info("Predicting batch {} of {}...".format( batch_idx, num_batches)) batch_idx += 1 dim = min(self._batch_size, num_images - i) cur_ind = i end_ind = cur_ind + dim if self._input_depth_mode == InputDepthMode.POSE_STREAM: self._input_im_arr[:dim, ...] = (image_arr[cur_ind:end_ind, ...] - self._im_mean) / self._im_std self._input_pose_arr[:dim, :] = ( pose_arr[cur_ind:end_ind, :] - self._pose_mean) / self._pose_std elif self._input_depth_mode == InputDepthMode.SUB: self._input_im_arr[:dim, ...] = image_arr[cur_ind:end_ind, ...] self._input_pose_arr[:dim, :] = pose_arr[ cur_ind:end_ind, :] elif self._input_depth_mode == InputDepthMode.IM_ONLY: self._input_im_arr[:dim, ...] = (image_arr[cur_ind:end_ind, ...] - self._im_mean) / self._im_std gqcnn_output = self._sess.run( self._output_tensor, feed_dict={ self._input_im_node: self._input_im_arr, self._input_pose_node: self._input_pose_arr }) # Allocate output tensor. if output_arr is None: output_arr = np.zeros([num_images] + list(gqcnn_output.shape[1:])) output_arr[cur_ind:end_ind, :] = gqcnn_output[:dim, :] i = end_ind # Get total prediction time. pred_time = time.time() - start_time if verbose: self._logger.info("Prediction took {} seconds.".format(pred_time)) return output_arr def predict(self, image_arr, pose_arr, verbose=False): """Predict the probability of grasp success given a depth image and gripper pose. Parameters ---------- image_arr : :obj:`numpy ndarray` 4D tensor of depth images. pose_arr : :obj:`numpy ndarray` Tensor of gripper poses. verbose : bool Whether or not to log progress to stdout, useful to turn off during training. """ return self._predict(image_arr, pose_arr, verbose=verbose) def featurize(self, image_arr, pose_arr=None, feature_layer="conv1_1", verbose=False): """Featurize a set of inputs. Parameters ---------- image_arr : :obj:`numpy ndarray` 4D tensor of depth images. pose_arr : :obj:`numpy ndarray` Optional tensor of gripper poses. feature_layer : str The network layer to featurize. verbose : bool Whether or not to log progress to `stdout`. """ # Get featurization start time. start_time = time.time() if verbose: self._logger.info("Featurizing...") if feature_layer not in self._feature_tensors: raise ValueError( "Feature layer: {} not recognized.".format(feature_layer)) # Setup for featurization. num_images = image_arr.shape[0] if pose_arr is not None: num_poses = pose_arr.shape[0] if num_images != num_poses: raise ValueError( "Must provide same number of images as poses!") output_arr = None # Featurize in batches. with self._graph.as_default(): if self._sess is None: raise RuntimeError( "No TF Session open. Please call open_session() first.") i = 0 while i < num_images: if verbose: self._logger.info("Featurizing {} of {}...".format( i, num_images)) dim = min(self._batch_size, num_images - i) cur_ind = i end_ind = cur_ind + dim self._input_im_arr[:dim, :, :, :] = ( image_arr[cur_ind:end_ind, :, :, :] - self._im_mean) / self._im_std if pose_arr is not None: self._input_pose_arr[:dim, :] = ( pose_arr[cur_ind:end_ind, :] - self._pose_mean) / self._pose_std if pose_arr is not None: gqcnn_output = self._sess.run( self._feature_tensors[feature_layer], feed_dict={ self._input_im_node: self._input_im_arr, self._input_pose_node: self._input_pose_arr }) else: gqcnn_output = self._sess.run( self._feature_tensors[feature_layer], feed_dict={self._input_im_node: self._input_im_arr}) if output_arr is None: output_arr = np.zeros([num_images] + list(gqcnn_output.shape[1:])) output_arr[cur_ind:end_ind, :] = gqcnn_output[:dim, :] i = end_ind if verbose: self._logger.info( "Featurization took {} seconds".format(time.time() - start_time)) # Truncate extraneous values off of end of `output_arr`. # TODO(vsatish): This isn't needed, right? output_arr = output_arr[:num_images] return output_arr def _leaky_relu(self, x, alpha=.1): return tf.maximum(alpha * x, x) def _build_conv_layer(self, input_node, input_height, input_width, input_channels, filter_h, filter_w, num_filt, pool_stride_h, pool_stride_w, pool_size, name, norm=False, pad="SAME"): self._logger.info("Building convolutional layer: {}...".format(name)) with tf.name_scope(name): # Initialize weights. if "{}_weights".format(name) in self._weights.weights: convW = self._weights.weights["{}_weights".format(name)] convb = self._weights.weights["{}_bias".format(name)] elif "{}W".format( name) in self._weights.weights: # Legacy support. self._logger.info( "Using old format for layer {}.".format(name)) convW = self._weights.weights["{}W".format(name)] convb = self._weights.weights["{}b".format(name)] else: self._logger.info("Reinitializing layer {}.".format(name)) convW_shape = [filter_h, filter_w, input_channels, num_filt] fan_in = filter_h * filter_w * input_channels std = np.sqrt(2 / fan_in) convW = tf.Variable(tf.truncated_normal(convW_shape, stddev=std), name="{}_weights".format(name)) convb = tf.Variable(tf.truncated_normal([num_filt], stddev=std), name="{}_bias".format(name)) self._weights.weights["{}_weights".format(name)] = convW self._weights.weights["{}_bias".format(name)] = convb if pad == "SAME": out_height = input_height // pool_stride_h out_width = input_width // pool_stride_w else: out_height = math.ceil( (input_height - filter_h + 1) / pool_stride_h) out_width = math.ceil( (input_width - filter_w + 1) / pool_stride_w) out_channels = num_filt # Build layer. convh = tf.nn.conv2d( input_node, convW, strides=[1, 1, 1, 1], padding=pad) + convb convh = self._leaky_relu(convh, alpha=self._relu_coeff) if norm: convh = tf.nn.local_response_normalization( convh, depth_radius=self._normalization_radius, alpha=self._normalization_alpha, beta=self._normalization_beta, bias=self._normalization_bias) pool = tf.nn.max_pool(convh, ksize=[1, pool_size, pool_size, 1], strides=[1, pool_stride_h, pool_stride_w, 1], padding="SAME") # Add output to feature dict. self._feature_tensors[name] = pool return pool, out_height, out_width, out_channels def _build_fc_layer(self, input_node, fan_in, out_size, name, input_is_multi, drop_rate, final_fc_layer=False): self._logger.info("Building fully connected layer: {}...".format(name)) # Initialize weights. if "{}_weights".format(name) in self._weights.weights: fcW = self._weights.weights["{}_weights".format(name)] fcb = self._weights.weights["{}_bias".format(name)] elif "{}W".format(name) in self._weights.weights: # Legacy support. self._logger.info("Using old format for layer {}.".format(name)) fcW = self._weights.weights["{}W".format(name)] fcb = self._weights.weights["{}b".format(name)] else: self._logger.info("Reinitializing layer {}.".format(name)) std = np.sqrt(2 / fan_in) fcW = tf.Variable(tf.truncated_normal([fan_in, out_size], stddev=std), name="{}_weights".format(name)) if final_fc_layer: fcb = tf.Variable(tf.constant(0.0, shape=[out_size]), name="{}_bias".format(name)) else: fcb = tf.Variable(tf.truncated_normal([out_size], stddev=std), name="{}_bias".format(name)) self._weights.weights["{}_weights".format(name)] = fcW self._weights.weights["{}_bias".format(name)] = fcb # Build layer. if input_is_multi: reduced_dim1 = reduce_shape(input_node.get_shape()) input_node = tf.reshape(input_node, [-1, reduced_dim1]) if final_fc_layer: fc = tf.matmul(input_node, fcW) + fcb else: fc = self._leaky_relu(tf.matmul(input_node, fcW) + fcb, alpha=self._relu_coeff) fc = tf.nn.dropout(fc, 1 - drop_rate) # Add output to feature dict. self._feature_tensors[name] = fc return fc, out_size # TODO(vsatish): This really doesn't need to it's own layer type...it does # the same thing as `_build_fc_layer`. def _build_pc_layer(self, input_node, fan_in, out_size, name): self._logger.info( "Building Fully Connected Pose Layer: {}...".format(name)) # Initialize weights. if "{}_weights".format(name) in self._weights.weights: pcW = self._weights.weights["{}_weights".format(name)] pcb = self._weights.weights["{}_bias".format(name)] elif "{}W".format(name) in self._weights.weights: # Legacy support. self._logger.info("Using old format for layer {}".format(name)) pcW = self._weights.weights["{}W".format(name)] pcb = self._weights.weights["{}b".format(name)] else: self._logger.info("Reinitializing layer {}".format(name)) std = np.sqrt(2 / fan_in) pcW = tf.Variable(tf.truncated_normal([fan_in, out_size], stddev=std), name="{}_weights".format(name)) pcb = tf.Variable(tf.truncated_normal([out_size], stddev=std), name="{}_bias".format(name)) self._weights.weights["{}_weights".format(name)] = pcW self._weights.weights["{}_bias".format(name)] = pcb # Build layer. pc = self._leaky_relu(tf.matmul(input_node, pcW) + pcb, alpha=self._relu_coeff) # Add output to feature dict. self._feature_tensors[name] = pc return pc, out_size def _build_fc_merge(self, input_fc_node_1, input_fc_node_2, fan_in_1, fan_in_2, out_size, drop_rate, name): self._logger.info("Building Merge Layer: {}...".format(name)) # Initialize weights. if "{}_input_1_weights".format(name) in self._weights.weights: input1W = self._weights.weights["{}_input_1_weights".format(name)] input2W = self._weights.weights["{}_input_2_weights".format(name)] fcb = self._weights.weights["{}_bias".format(name)] elif "{}W_im".format(name) in self._weights.weights: # Legacy support. self._logger.info("Using old format for layer {}.".format(name)) input1W = self._weights.weights["{}W_im".format(name)] input2W = self._weights.weights["{}W_pose".format(name)] fcb = self._weights.weights["{}b".format(name)] else: self._logger.info("Reinitializing layer {}.".format(name)) std = np.sqrt(2 / (fan_in_1 + fan_in_2)) input1W = tf.Variable(tf.truncated_normal([fan_in_1, out_size], stddev=std), name="{}_input_1_weights".format(name)) input2W = tf.Variable(tf.truncated_normal([fan_in_2, out_size], stddev=std), name="{}_input_2_weights".format(name)) fcb = tf.Variable(tf.truncated_normal([out_size], stddev=std), name="{}_bias".format(name)) self._weights.weights["{}_input_1_weights".format(name)] = input1W self._weights.weights["{}_input_2_weights".format(name)] = input2W self._weights.weights["{}_bias".format(name)] = fcb # Build layer. fc = self._leaky_relu(tf.matmul(input_fc_node_1, input1W) + tf.matmul(input_fc_node_2, input2W) + fcb, alpha=self._relu_coeff) fc = tf.nn.dropout(fc, 1 - drop_rate) # Add output to feature dict. self._feature_tensors[name] = fc return fc, out_size def _build_im_stream(self, input_node, input_pose_node, input_height, input_width, input_channels, drop_rate, layers, only_stream=False): self._logger.info("Building Image Stream...") if self._input_depth_mode == InputDepthMode.SUB: sub_mean = tf.constant(self._im_depth_sub_mean, dtype=tf.float32) sub_std = tf.constant(self._im_depth_sub_std, dtype=tf.float32) sub_im = tf.subtract( input_node, tf.tile( tf.reshape(input_pose_node, tf.constant((-1, 1, 1, 1))), tf.constant((1, input_height, input_width, 1)))) norm_sub_im = tf.div(tf.subtract(sub_im, sub_mean), sub_std) input_node = norm_sub_im output_node = input_node prev_layer = "start" # Dummy placeholder. last_index = len(layers) - 1 for layer_index, (layer_name, layer_config) in enumerate(layers.items()): layer_type = layer_config["type"] if layer_type == "conv": if prev_layer == "fc": raise ValueError("Cannot have conv layer after fc layer!") output_node, input_height, input_width, input_channels = \ self._build_conv_layer( output_node, input_height, input_width, input_channels, layer_config["filt_dim"], layer_config["filt_dim"], layer_config["num_filt"], layer_config["pool_stride"], layer_config["pool_stride"], layer_config["pool_size"], layer_name, norm=layer_config["norm"], pad=layer_config["pad"]) prev_layer = layer_type elif layer_type == "fc": if layer_config["out_size"] == 0: continue prev_layer_is_conv = False if prev_layer == "conv": prev_layer_is_conv = True fan_in = input_height * input_width * input_channels if layer_index == last_index and only_stream: output_node, fan_in = self._build_fc_layer( output_node, fan_in, layer_config["out_size"], layer_name, prev_layer_is_conv, drop_rate, final_fc_layer=True) else: output_node, fan_in = self._build_fc_layer( output_node, fan_in, layer_config["out_size"], layer_name, prev_layer_is_conv, drop_rate) prev_layer = layer_type elif layer_type == "pc": raise ValueError( "Cannot have pose connected layer in image stream!") elif layer_type == "fc_merge": raise ValueError("Cannot have merge layer in image stream!") else: raise ValueError( "Unsupported layer type: {}".format(layer_type)) return output_node, fan_in def _build_pose_stream(self, input_node, fan_in, layers): self._logger.info("Building Pose Stream...") output_node = input_node for layer_name, layer_config in layers.items(): layer_type = layer_config["type"] if layer_type == "conv": raise ValueError("Cannot have conv layer in pose stream") elif layer_type == "fc": raise ValueError( "Cannot have fully connected layer in pose stream") elif layer_type == "pc": if layer_config["out_size"] == 0: continue output_node, fan_in = self._build_pc_layer( output_node, fan_in, layer_config["out_size"], layer_name) elif layer_type == "fc_merge": raise ValueError("Cannot have merge layer in pose stream") else: raise ValueError( "Unsupported layer type: {}".format(layer_type)) return output_node, fan_in def _build_merge_stream(self, input_stream_1, input_stream_2, fan_in_1, fan_in_2, drop_rate, layers): self._logger.info("Building Merge Stream...") # First check if first layer is a merge layer. # TODO(vsatish): Can't we just get the first value because it's # ordered? if layers[list(layers)[0]]["type"] != "fc_merge": raise ValueError( "First layer in merge stream must be a fc_merge layer!") last_index = len(layers) - 1 fan_in = -1 output_node = None # Will be overridden. for layer_index, (layer_name, layer_config) in enumerate(layers.items()): layer_type = layer_config["type"] if layer_type == "conv": raise ValueError("Cannot have conv layer in merge stream!") elif layer_type == "fc": if layer_config["out_size"] == 0: continue if layer_index == last_index: output_node, fan_in = self._build_fc_layer( output_node, fan_in, layer_config["out_size"], layer_name, False, drop_rate, final_fc_layer=True) else: output_node, fan_in = self._build_fc_layer( output_node, fan_in, layer_config["out_size"], layer_name, False, drop_rate) elif layer_type == "pc": raise ValueError( "Cannot have pose connected layer in merge stream!") elif layer_type == "fc_merge": if layer_config["out_size"] == 0: continue output_node, fan_in = self._build_fc_merge( input_stream_1, input_stream_2, fan_in_1, fan_in_2, layer_config["out_size"], drop_rate, layer_name) else: raise ValueError( "Unsupported layer type: {}".format(layer_type)) return output_node, fan_in def _build_network(self, input_im_node, input_pose_node, input_drop_rate_node): """Build GQ-CNN. Parameters ---------- input_im_node :obj:`tf.placeholder` Image placeholder. input_pose_node :obj:`tf.placeholder` Gripper pose placeholder. input_drop_rate_node :obj:`tf.placeholder` Drop rate placeholder. Returns ------- :obj:`tf.Tensor` Tensor output of network. """ self._logger.info("Building Network...") if self._input_depth_mode == InputDepthMode.POSE_STREAM: missing_stream_msg = ("When using input depth mode" " 'pose_stream', both pose stream and" " merge stream must be present!") assert "pose_stream" in self._architecture and \ "merge_stream" in self._architecture, missing_stream_msg with tf.name_scope("im_stream"): output_im_stream, fan_out_im = self._build_im_stream( input_im_node, input_pose_node, self._im_height, self._im_width, self._num_channels, input_drop_rate_node, self._architecture["im_stream"]) with tf.name_scope("pose_stream"): output_pose_stream, fan_out_pose = self._build_pose_stream( input_pose_node, self._pose_dim, self._architecture["pose_stream"]) with tf.name_scope("merge_stream"): return self._build_merge_stream( output_im_stream, output_pose_stream, fan_out_im, fan_out_pose, input_drop_rate_node, self._architecture["merge_stream"])[0] elif self._input_depth_mode == InputDepthMode.SUB or \ self._input_depth_mode == InputDepthMode.IM_ONLY: extraneous_stream_msg = ("When using input depth mode '{}', only" " im stream is allowed!") assert not ("pose_stream" in self._architecture or "merge_stream" in self._architecture), extraneous_stream_msg.format( self._input_depth_mode) with tf.name_scope("im_stream"): return self._build_im_stream(input_im_node, input_pose_node, self._im_height, self._im_width, self._num_channels, input_drop_rate_node, self._architecture["im_stream"], only_stream=True)[0] ================================================ FILE: gqcnn/search/__init__.py ================================================ # -*- coding: utf-8 -*- """ Copyright ©2017. The Regents of the University of California (Regents). All Rights Reserved. Permission to use, copy, modify, and distribute this software and its documentation for educational, research, and not-for-profit purposes, without fee and without a signed licensing agreement, is hereby granted, provided that the above copyright notice, this paragraph and the following two paragraphs appear in all copies, modifications, and distributions. Contact The Office of Technology Licensing, UC Berkeley, 2150 Shattuck Avenue, Suite 510, Berkeley, CA 94720-1620, (510) 643-7201, otl@berkeley.edu, http://ipira.berkeley.edu/industry-info for commercial licensing opportunities. IN NO EVENT SHALL REGENTS BE LIABLE TO ANY PARTY FOR DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, INCLUDING LOST PROFITS, ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN IF REGENTS HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. REGENTS SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE AND ACCOMPANYING DOCUMENTATION, IF ANY, PROVIDED HEREUNDER IS PROVIDED "AS IS". REGENTS HAS NO OBLIGATION TO PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. """ from .search import GQCNNSearch __all__ = ["GQCNNSearch"] ================================================ FILE: gqcnn/search/enums.py ================================================ # -*- coding: utf-8 -*- """ Copyright ©2017. The Regents of the University of California (Regents). All Rights Reserved. Permission to use, copy, modify, and distribute this software and its documentation for educational, research, and not-for-profit purposes, without fee and without a signed licensing agreement, is hereby granted, provided that the above copyright notice, this paragraph and the following two paragraphs appear in all copies, modifications, and distributions. Contact The Office of Technology Licensing, UC Berkeley, 2150 Shattuck Avenue, Suite 510, Berkeley, CA 94720-1620, (510) 643-7201, otl@berkeley.edu, http://ipira.berkeley.edu/industry-info for commercial licensing opportunities. IN NO EVENT SHALL REGENTS BE LIABLE TO ANY PARTY FOR DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, INCLUDING LOST PROFITS, ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN IF REGENTS HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. REGENTS SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE AND ACCOMPANYING DOCUMENTATION, IF ANY, PROVIDED HEREUNDER IS PROVIDED "AS IS". REGENTS HAS NO OBLIGATION TO PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. Enums for hyper-parameter search. Author ------ Vishal Satish """ class TrialConstants(object): TRIAL_CPU_LOAD = 300 # Decrease to get more aggressize CPU utilization. TRIAL_GPU_LOAD = 33 # Decrease to get more aggressize GPU utilization. # This really depends on model size (`TRIAL_GPU_LOAD` does too, but it's # not a hard limit per se). Ideally we would initialize models one-by-one # and monitor the space left, but because model initialization comes after # some metric calculation, we set this to be some upper bound based on the # largest model and do batch initalizations from there. TRIAL_GPU_MEM = 2000 class SearchConstants(object): SEARCH_THREAD_SLEEP = 2 MIN_TIME_BETWEEN_SCHEDULE_ATTEMPTS = 20 ================================================ FILE: gqcnn/search/resource_manager.py ================================================ # -*- coding: utf-8 -*- """ Copyright ©2017. The Regents of the University of California (Regents). All Rights Reserved. Permission to use, copy, modify, and distribute this software and its documentation for educational, research, and not-for-profit purposes, without fee and without a signed licensing agreement, is hereby granted, provided that the above copyright notice, this paragraph and the following two paragraphs appear in all copies, modifications, and distributions. Contact The Office of Technology Licensing, UC Berkeley, 2150 Shattuck Avenue, Suite 510, Berkeley, CA 94720-1620, (510) 643-7201, otl@berkeley.edu, http://ipira.berkeley.edu/industry-info for commercial licensing opportunities. IN NO EVENT SHALL REGENTS BE LIABLE TO ANY PARTY FOR DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, INCLUDING LOST PROFITS, ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN IF REGENTS HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. REGENTS SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE AND ACCOMPANYING DOCUMENTATION, IF ANY, PROVIDED HEREUNDER IS PROVIDED "AS IS". REGENTS HAS NO OBLIGATION TO PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. Intelligent resource manager for hyper-parameter search. Queries resources available and appropriately distributes resources over possible trials to run. Author ------ Vishal Satish """ import math import random import time import GPUtil import numpy as np import psutil from autolab_core import Logger CPU_LOAD_SAMPLE_INTERVAL = 4.0 # This is a hack because it seems that psutil is returning a lower load than # htop, which could be because htop takes into account queued tasks. CPU_LOAD_OFFSET = 50 GPU_STAT_NUM_SAMPLES = 4 GPU_STAT_SAMPLE_INTERVAL = 1.0 class ResourceManager(object): def __init__(self, trial_cpu_load, trial_gpu_load, trial_gpu_mem, monitor_cpu=True, monitor_gpu=True, cpu_cores=[], gpu_devices=[]): self._monitor_cpu = monitor_cpu self._monitor_gpu = monitor_gpu # Set up logger. self._logger = Logger.get_logger(self.__class__.__name__) if not monitor_cpu: self._logger.warning( "Not monitoring cpu resources is not advised.") if not monitor_gpu: self._logger.warning( "Not monitoring gpu resources is not advised.") self._trial_cpu_load = trial_cpu_load self._trial_gpu_load = trial_gpu_load self._trial_gpu_mem = trial_gpu_mem self._cpu_cores = cpu_cores if len(self._cpu_cores) == 0: self._logger.warning( "No CPU cores specified-proceeding to use all available cores." ) self._cpu_cores = range(psutil.cpu_count()) self._cpu_count = len(self._cpu_cores) self._gpu_devices = gpu_devices if len(self._gpu_devices) == 0: no_gpus_specified_msg = ("No GPU devices specified-proceeding to" " use all available devices.") self._logger.warning(no_gpus_specified_msg) self._gpu_devices = range(len(GPUtil.getGPUs())) @property def cpu_cores(self): return self._cpu_cores def _get_cpu_load(self): self._logger.info("Sampling cpu load...") cpu_core_loads = psutil.cpu_percent(interval=CPU_LOAD_SAMPLE_INTERVAL, percpu=True) total_load = 0 for core in self._cpu_cores: total_load += cpu_core_loads[core] return total_load + CPU_LOAD_OFFSET def _get_gpu_stats(self): self._logger.info("Sampling gpu memory and load...") gpu_samples = [] for _ in range(GPU_STAT_NUM_SAMPLES): gpu_samples.append(GPUtil.getGPUs()) time.sleep(GPU_STAT_SAMPLE_INTERVAL) num_gpus = len(gpu_samples[0]) sample_loads = np.zeros((num_gpus, GPU_STAT_NUM_SAMPLES)) sample_mems = np.zeros((num_gpus, GPU_STAT_NUM_SAMPLES)) total_mems = np.zeros((num_gpus, )) for i in range(GPU_STAT_NUM_SAMPLES): for gpu in gpu_samples[i]: if gpu.id in self._gpu_devices: sample_loads[gpu.id, i] = gpu.load * 100 sample_mems[gpu.id, i] = gpu.memoryUsed else: # Trick the manager into thinking the GPU is fully utilized # so it will never be chosen. sample_loads[gpu.id, i] = 100 sample_mems[gpu.id, i] = gpu.memoryTotal total_mems[gpu.id] = gpu.memoryTotal return total_mems.tolist(), np.mean( sample_loads, axis=1).tolist(), np.mean(sample_mems, axis=1).tolist() def _build_gpu_list(self, max_possible_trials_per_device): gpus_avail = [] for device_id, max_trials in enumerate(max_possible_trials_per_device): for _ in range(max_trials): gpus_avail.append(str(device_id)) # This is because we might truncate this list later because of a more # severe resource bottleneck, in which case we want to evenly # distribute the load. random.shuffle(gpus_avail) return gpus_avail def num_trials_to_schedule(self, num_pending_trials): num_trials_to_schedule = num_pending_trials if self._monitor_cpu: # Check cpu bandwith. cpu_load = min(self._get_cpu_load(), self._cpu_count * 100) max_possible_trials_cpu = int( (self._cpu_count * 100 - cpu_load) // self._trial_cpu_load) self._logger.info("CPU load: {}%, Max possible trials: {}".format( cpu_load, max_possible_trials_cpu)) num_trials_to_schedule = min(num_trials_to_schedule, max_possible_trials_cpu) if self._monitor_gpu: # Check gpu bandwith. total_gpu_mems, gpu_loads, gpu_mems = self._get_gpu_stats() max_possible_trials_gpu_load_per_device = [ int((100 - gpu_load) // self._trial_gpu_load) for gpu_load in gpu_loads ] max_possible_trials_gpu_mem_per_device = [ int((total_gpu_mem - gpu_mem) // self._trial_gpu_mem) for total_gpu_mem, gpu_mem in zip(total_gpu_mems, gpu_mems) ] max_possible_trials_gpu_per_device = map( lambda x: min(x[0], x[1]), zip(max_possible_trials_gpu_load_per_device, max_possible_trials_gpu_mem_per_device)) self._logger.info( "GPU loads: {}, GPU mems: {}, Max possible trials: {}".format( "% ".join([str(gpu_load) for gpu_load in gpu_loads]) + "%", "MiB ".join([str(gpu_mem) for gpu_mem in gpu_mems]) + "MiB", sum(max_possible_trials_gpu_per_device))) num_trials_to_schedule = min( num_trials_to_schedule, sum(max_possible_trials_gpu_per_device)) # Build the device list for scheduling trials on specific gpus. gpus_avail = self._build_gpu_list( max_possible_trials_gpu_per_device) else: # Just distribute load among gpus. num_gpus = self._get_gpu_count() trials_per_gpu = int(math.ceil(num_trials_to_schedule / num_gpus)) gpus_avail = self._build_gpu_list([trials_per_gpu] * num_gpus) gpus_avail = gpus_avail[:num_trials_to_schedule] self._logger.info( "Max possible trials overall: {}".format(num_trials_to_schedule)) return num_trials_to_schedule, gpus_avail ================================================ FILE: gqcnn/search/search.py ================================================ # -*- coding: utf-8 -*- """ Copyright ©2017. The Regents of the University of California (Regents). All Rights Reserved. Permission to use, copy, modify, and distribute this software and its documentation for educational, research, and not-for-profit purposes, without fee and without a signed licensing agreement, is hereby granted, provided that the above copyright notice, this paragraph and the following two paragraphs appear in all copies, modifications, and distributions. Contact The Office of Technology Licensing, UC Berkeley, 2150 Shattuck Avenue, Suite 510, Berkeley, CA 94720-1620, (510) 643-7201, otl@berkeley.edu, http://ipira.berkeley.edu/industry-info for commercial licensing opportunities. IN NO EVENT SHALL REGENTS BE LIABLE TO ANY PARTY FOR DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, INCLUDING LOST PROFITS, ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN IF REGENTS HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. REGENTS SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE AND ACCOMPANYING DOCUMENTATION, IF ANY, PROVIDED HEREUNDER IS PROVIDED "AS IS". REGENTS HAS NO OBLIGATION TO PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. Perform hyper-parameter search over a set of GQ-CNN model/training parameters. Actively monitor system resources and appropriately schedule trials. Author ------ Vishal Satish """ import os import time from .resource_manager import ResourceManager from .trial import (GQCNNTrainingAndAnalysisTrial, GQCNNFineTuningAndAnalysisTrial) from .utils import gen_trial_params, gen_timestamp, log_trial_status from .enums import TrialConstants, SearchConstants from autolab_core import Logger from ..utils import is_py2, GQCNNTrainingStatus if is_py2(): from Queue import Queue else: from queue import Queue class GQCNNSearch(object): def __init__(self, analysis_config, train_configs, datasets, split_names, base_models=[], output_dir=None, search_name=None, monitor_cpu=True, monitor_gpu=True, cpu_cores=[], gpu_devices=[]): self._analysis_cfg = analysis_config # Create trial output dir if not specified. if search_name is None: search_name = "gqcnn_hyperparam_search_{}".format(gen_timestamp()) if output_dir is None: output_dir = "models" self._trial_output_dir = os.path.join(output_dir, search_name) if not os.path.exists(self._trial_output_dir): os.makedirs(self._trial_output_dir) # Set up logger. self._logger = Logger.get_logger(self.__class__.__name__, log_file=os.path.join( self._trial_output_dir, "search.log"), global_log_file=True) # Init resource manager. self._resource_manager = ResourceManager(TrialConstants.TRIAL_CPU_LOAD, TrialConstants.TRIAL_GPU_LOAD, TrialConstants.TRIAL_GPU_MEM, monitor_cpu=monitor_cpu, monitor_gpu=monitor_gpu, cpu_cores=cpu_cores, gpu_devices=gpu_devices) # Parse train configs and generate individual trial parameters. if len(base_models) > 0: inconsistent_inputs_msg = ("Must have equal number of training" " configs, datasets, split_names, and" " base models!") assert len(train_configs) == len(datasets) == len( split_names) == len(base_models), inconsistent_inputs_msg else: inconsistent_inputs_msg = ("Must have equal number of training" " configs, datasets, and split_names!") assert len(train_configs) == len(datasets) == len( split_names), inconsistent_inputs_msg self._logger.info("Generating trial parameters...") trial_params = gen_trial_params(train_configs, datasets, split_names, base_models=base_models) # Create pending trial queue. self._trials_pending_queue = Queue() if len(base_models) > 0: for trial_name, hyperparam_summary, train_cfg, dataset, \ base_model, split_name in trial_params: self._trials_pending_queue.put( GQCNNFineTuningAndAnalysisTrial(self._analysis_cfg, train_cfg, dataset, base_model, split_name, self._trial_output_dir, trial_name, hyperparam_summary)) else: for trial_name, hyperparam_summary, train_cfg, dataset, \ split_name in trial_params: self._trials_pending_queue.put( GQCNNTrainingAndAnalysisTrial(self._analysis_cfg, train_cfg, dataset, split_name, self._trial_output_dir, trial_name, hyperparam_summary)) # Create containers to hold running, finished, and errored-out trials. self._trials_running = [] self._trials_finished = [] self._trials_errored = [] def search(self): self._logger.info("Beginning hyper-parameter search...") done = False waiting_for_trial_init = False last_schedule_attempt_time = -1 search_start_time = time.time() while not done: num_trials_pending = self._trials_pending_queue.qsize() num_trials_running = len(self._trials_running) num_trials_finished = len(self._trials_finished) num_trials_errored = len(self._trials_errored) self._logger.info( "----------------------------------------------------") self._logger.info( "Num trials pending: {}".format(num_trials_pending)) self._logger.info( "Num trials running: {}".format(num_trials_running)) self._logger.info( "Num trials finished: {}".format(num_trials_finished)) if num_trials_errored > 0: self._logger.info( "Num trials errored: {}".format(num_trials_errored)) if num_trials_pending > 0 and not waiting_for_trial_init and ( time.time() - last_schedule_attempt_time ) > SearchConstants.MIN_TIME_BETWEEN_SCHEDULE_ATTEMPTS: self._logger.info("Attempting to schedule more trials...") num_trials_to_schedule, gpus_avail = \ self._resource_manager.num_trials_to_schedule( num_trials_pending) self._logger.info( "Scheduling {} trials".format(num_trials_to_schedule)) if num_trials_to_schedule > 0: # Start trials. for _, gpu in zip(range(num_trials_to_schedule), gpus_avail): trial = self._trials_pending_queue.get() trial.begin( gpu_avail=gpu, cpu_cores_avail=self._resource_manager.cpu_cores) self._trials_running.append(trial) # Block scheduling until trials have started training (this # is when we know what resources are still available). waiting_for_trial_init = True last_schedule_attempt_time = time.time() # Check if trials have started training. if waiting_for_trial_init: training_has_started = [ trial.training_status == GQCNNTrainingStatus.TRAINING for trial in self._trials_running ] if all(training_has_started): waiting_for_trial_init = False # Log trial status. if len(self._trials_running) > 0: self._logger.info(log_trial_status(self._trials_running)) # Check if any trials have finished running or errored-out. finished_trials_to_move = [] errored_trials_to_move = [] for trial in self._trials_running: if trial.finished: finished_trials_to_move.append(trial) elif trial.errored_out: errored_trials_to_move.append(trial) self._trials_finished.extend(finished_trials_to_move) self._trials_errored.extend(errored_trials_to_move) for trial in finished_trials_to_move: self._trials_running.remove(trial) for trial in errored_trials_to_move: self._trials_running.remove(trial) # Update stopping criteria and sleep. done = (num_trials_pending == 0) and (num_trials_running == 0) time.sleep(SearchConstants.SEARCH_THREAD_SLEEP) self._logger.info( "------------------Successful Trials------------------") self._logger.info(log_trial_status(self._trials_finished)) if len(self._trials_errored) > 0: self._logger.info( "--------------------Failed Trials--------------------") self._logger.info(log_trial_status(self._trials_errored)) self._logger.info( "Hyper-parameter search finished in {} seconds.".format( time.time() - search_start_time)) ================================================ FILE: gqcnn/search/trial.py ================================================ # -*- coding: utf-8 -*- """ Copyright ©2017. The Regents of the University of California (Regents). All Rights Reserved. Permission to use, copy, modify, and distribute this software and its documentation for educational, research, and not-for-profit purposes, without fee and without a signed licensing agreement, is hereby granted, provided that the above copyright notice, this paragraph and the following two paragraphs appear in all copies, modifications, and distributions. Contact The Office of Technology Licensing, UC Berkeley, 2150 Shattuck Avenue, Suite 510, Berkeley, CA 94720-1620, (510) 643-7201, otl@berkeley.edu, http://ipira.berkeley.edu/industry-info for commercial licensing opportunities. IN NO EVENT SHALL REGENTS BE LIABLE TO ANY PARTY FOR DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, INCLUDING LOST PROFITS, ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN IF REGENTS HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. REGENTS SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE AND ACCOMPANYING DOCUMENTATION, IF ANY, PROVIDED HEREUNDER IS PROVIDED "AS IS". REGENTS HAS NO OBLIGATION TO PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. Trials for hyper-parameter search. Author ------ Vishal Satish """ from abc import ABC, abstractmethod import json import multiprocessing import os import sys import numpy as np from ..model import get_gqcnn_model from ..training import get_gqcnn_trainer from ..utils import GeneralConstants, GQCNNTrainingStatus from ..analysis import GQCNNAnalyzer class TrialStatus: PENDING = "pending" RUNNING = "running" FINISHED = "finished" EXCEPTION = "exception" class GQCNNTrialWithAnalysis(ABC): def __init__(self, analysis_cfg, train_cfg, dataset_dir, split_name, output_dir, model_name, hyperparam_summary): self._analysis_cfg = analysis_cfg self._train_cfg = train_cfg self._dataset_dir = dataset_dir self._split_name = split_name self._output_dir = output_dir self._model_name = model_name self._hyperparam_summary = hyperparam_summary self._manager = multiprocessing.Manager() # To communicate with training. self._train_progress_dict = self._build_train_progress_dict() # To communicate with trial. self._trial_progress_dict = self._build_trial_progress_dict() self._process = None def _build_train_progress_dict(self): progress_dict = self._manager.dict( training_status=GQCNNTrainingStatus.NOT_STARTED, epoch=np.nan, analysis=None) return progress_dict def _build_trial_progress_dict(self): progress_dict = self._manager.dict(status=TrialStatus.PENDING) return progress_dict @abstractmethod def _run(self, trainer): pass def _run_trial(self, analysis_config, train_config, dataset_dir, split_name, output_dir, model_name, train_progress_dict, trial_progress_dict, hyperparam_summary, gpu_avail="", cpu_cores_avail=[], backend="tf"): trial_progress_dict["status"] = TrialStatus.RUNNING try: os.system("taskset -pc {} {}".format( ",".join(str(i) for i in cpu_cores_avail), os.getpid())) os.environ["CUDA_VISIBLE_DEVICES"] = gpu_avail gqcnn = get_gqcnn_model(backend, verbose=False)(train_config["gqcnn"], verbose=False) trainer = get_gqcnn_trainer(backend)( gqcnn, dataset_dir, split_name, output_dir, train_config, name=model_name, progress_dict=train_progress_dict, verbose=False) self._run(trainer) with open( os.path.join(output_dir, model_name, "hyperparam_summary.json"), "wb") as fhandle: json.dump(hyperparam_summary, fhandle, indent=GeneralConstants.JSON_INDENT) train_progress_dict["training_status"] = "analyzing" analyzer = GQCNNAnalyzer(analysis_config, verbose=False) _, _, init_train_error, final_train_error, init_train_loss, \ final_train_loss, init_val_error, final_val_error, \ norm_final_val_error = analyzer.analyze( os.path.join(output_dir, model_name), output_dir) analysis_dict = {} analysis_dict["init_train_error"] = init_train_error analysis_dict["final_train_error"] = final_train_error analysis_dict["init_train_loss"] = init_train_loss analysis_dict["final_train_loss"] = final_train_loss analysis_dict["init_val_error"] = init_val_error analysis_dict["final_val_error"] = final_val_error analysis_dict["norm_final_val_error"] = norm_final_val_error train_progress_dict["analysis"] = analysis_dict train_progress_dict["training_status"] = "finished" trial_progress_dict["status"] = TrialStatus.FINISHED sys.exit(0) except Exception as e: trial_progress_dict["status"] = TrialStatus.EXCEPTION trial_progress_dict["error_msg"] = str(e) sys.exit(0) @property def finished(self): return self._trial_progress_dict["status"] == TrialStatus.FINISHED @property def errored_out(self): return self._trial_progress_dict["status"] == TrialStatus.EXCEPTION @property def error_msg(self): return self._trial_progress_dict["error_msg"] @property def training_status(self): return self._train_progress_dict["training_status"] def begin(self, gpu_avail="", cpu_cores_avail=[]): self._status = TrialStatus.RUNNING self._process = multiprocessing.Process( target=self._run_trial, args=(self._analysis_cfg, self._train_cfg, self._dataset_dir, self._split_name, self._output_dir, self._model_name, self._train_progress_dict, self._trial_progress_dict, self._hyperparam_summary, gpu_avail, cpu_cores_avail)) self._process.start() def __str__(self): trial_str = "Trial: {}, Training Stage: {}".format( self._model_name, self.training_status) if self.training_status == GQCNNTrainingStatus.TRAINING and not \ np.isnan(self._train_progress_dict["epoch"]): trial_str += ", Epoch: {}/{}".format( self._train_progress_dict["epoch"], self._train_cfg["num_epochs"]) if self.errored_out: trial_str += ", Error message: {}".format(self.error_msg) if self.training_status == "finished": finished_msg = (", Initial train error: {}, Final train error: {}," " Initial train loss: {}, Final train loss: {}," " Initial val error: {}, Final val error: {}, Norm" " final val error: {}") trial_str += finished_msg.format( self._train_progress_dict["analysis"]["init_train_error"], self._train_progress_dict["analysis"]["final_train_error"], self._train_progress_dict["analysis"]["init_train_loss"], self._train_progress_dict["analysis"]["final_train_loss"], self._train_progress_dict["analysis"]["init_val_error"], self._train_progress_dict["analysis"]["final_val_error"], self._train_progress_dict["analysis"]["norm_final_val_error"]) return trial_str class GQCNNTrainingAndAnalysisTrial(GQCNNTrialWithAnalysis): def _run(self, trainer): trainer.train() class GQCNNFineTuningAndAnalysisTrial(GQCNNTrialWithAnalysis): def __init__(self, analysis_cfg, train_cfg, dataset_dir, base_model_dir, split_name, output_dir, model_name, hyperparam_summary): GQCNNTrialWithAnalysis.__init__(self, analysis_cfg, train_cfg, dataset_dir, split_name, output_dir, model_name, hyperparam_summary) self._base_model_dir = base_model_dir def _run(self, trainer): trainer.finetune(self._base_model_dir) ================================================ FILE: gqcnn/search/utils.py ================================================ # -*- coding: utf-8 -*- """ Copyright ©2017. The Regents of the University of California (Regents). All Rights Reserved. Permission to use, copy, modify, and distribute this software and its documentation for educational, research, and not-for-profit purposes, without fee and without a signed licensing agreement, is hereby granted, provided that the above copyright notice, this paragraph and the following two paragraphs appear in all copies, modifications, and distributions. Contact The Office of Technology Licensing, UC Berkeley, 2150 Shattuck Avenue, Suite 510, Berkeley, CA 94720-1620, (510) 643-7201, otl@berkeley.edu, http://ipira.berkeley.edu/industry-info for commercial licensing opportunities. IN NO EVENT SHALL REGENTS BE LIABLE TO ANY PARTY FOR DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, INCLUDING LOST PROFITS, ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN IF REGENTS HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. REGENTS SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE AND ACCOMPANYING DOCUMENTATION, IF ANY, PROVIDED HEREUNDER IS PROVIDED "AS IS". REGENTS HAS NO OBLIGATION TO PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. Utility functions for hyper-parameter search. Author ------ Vishal Satish """ from collections import OrderedDict, defaultdict import copy from datetime import datetime import itertools def get_fields_to_search_over(train_config, prev_keys=[]): fields = [] anchored_fields = defaultdict(list) for key in train_config: if isinstance(train_config[key], list): prev_keys_copy = copy.deepcopy(prev_keys) prev_keys_copy.append(key) if isinstance(train_config[key][0], str) and train_config[key][0].startswith("anchor_"): anchored_fields[train_config[key][0]].append(prev_keys_copy) train_config[key] = train_config[key][1:] else: fields.append(prev_keys_copy) elif isinstance(train_config[key], OrderedDict): prev_keys_copy = copy.deepcopy(prev_keys) prev_keys_copy.append(key) sub_fields, sub_anchored_fields = get_fields_to_search_over( train_config[key], prev_keys=prev_keys_copy) fields.extend(sub_fields) update_dict(anchored_fields, sub_anchored_fields) return fields, anchored_fields def update_dict(dict1, dict2): for key, val in dict2.items(): if key in dict1: dict1[key].extend(val) else: dict1[key] = val def get_nested_key(cfg, key): val = cfg for k in key: val = val[k] return val def set_nested_key(cfg, key, val): root_field = cfg for k in key[:-1]: root_field = root_field[k] root_field[key[-1]] = val def gen_config_summary_dict(hyperparam_combination): summary_dict = {} for key, val in hyperparam_combination: summary_dict["/".join(key)] = val return summary_dict def parse_master_train_config(train_config): configs = [] hyperparam_search_fields, hyperparam_anchored_search_fields = \ get_fields_to_search_over(train_config) # Ensure a one-to-one mapping between hyperparameters of fields with # matching anchor tags. for anchor_tag, fields in hyperparam_anchored_search_fields.items(): num_params = [] for field in fields: num_params.append(len(get_nested_key(train_config, field))) invalid_anchor_tag_msg = ("All fields in anchor tag '{}' do not have" " the same # of parameters to search over!") assert max(num_params) == min( num_params), invalid_anchor_tag_msg.format(anchor_tag) # If there is nothing to search over just return the given config. if len(hyperparam_search_fields) == 0 and len( hyperparam_anchored_search_fields) == 0: return [("", train_config)] # Generate a list of all the possible hyper-parameters to search over. # Normal fields. hyperparam_search_params = [] for search_field in hyperparam_search_fields: search_field_params = [] for val in get_nested_key(train_config, search_field): search_field_params.append((search_field, val)) hyperparam_search_params.append(search_field_params) # Anchored fields. for anchored_fields in hyperparam_anchored_search_fields.values(): combinations = [[] for _ in range( len(get_nested_key(train_config, anchored_fields[0])))] for field in anchored_fields: for idx, val in enumerate(get_nested_key(train_config, field)): combinations[idx].append((field, val)) hyperparam_search_params.append(combinations) # Get all permutations of the possible hyper-parameters. hyperparam_combinations = list( itertools.product(*hyperparam_search_params)) def flatten_combo(combo): flattened = [] for item in combo: if isinstance(item, list): for sub_item in item: flattened.append(sub_item) else: flattened.append(item) return flattened hyperparam_combinations = [ flatten_combo(combo) for combo in hyperparam_combinations ] # Generate possible configs to search over. for combo in hyperparam_combinations: config = copy.deepcopy(train_config) for field, val in combo: set_nested_key(config, field, val) configs.append((gen_config_summary_dict(combo), config)) return configs def gen_timestamp(): return str(datetime.now()).split(".")[0].replace(" ", "_") def gen_trial_params_train(master_train_configs, datasets, split_names): trial_params = [] for master_train_config, dataset, split_name in zip( master_train_configs, datasets, split_names): train_configs = parse_master_train_config(master_train_config) for i, (hyperparam_summary_dict, train_config) in enumerate(train_configs): trial_name = "{}_{}_trial_{}_{}".format( dataset.split("/")[-3], split_name, i, gen_timestamp()) trial_params.append((trial_name, hyperparam_summary_dict, train_config, dataset, split_name)) return trial_params def gen_trial_params_finetune(master_train_configs, datasets, base_models, split_names): trial_params = [] for master_train_config, dataset, base_model, split_name in zip( master_train_configs, datasets, base_models, split_names): train_configs = parse_master_train_config(master_train_config) for i, (hyperparam_summary_dict, train_config) in enumerate(train_configs): trial_name = "{}_{}_trial_{}_{}".format( dataset.split("/")[-3], split_name, i, gen_timestamp()) trial_params.append( (trial_name, hyperparam_summary_dict, train_config, dataset, base_model, split_name)) return trial_params def gen_trial_params(master_train_configs, datasets, split_names, base_models=[]): if len(base_models) > 0: return gen_trial_params_finetune(master_train_configs, datasets, base_models, split_names) else: return gen_trial_params_train(master_train_configs, datasets, split_names) def log_trial_status(trials): status_str = "--------------------TRIAL STATUS--------------------" for trial in trials: status_str += "\n" status_str += "[{}]".format(str(trial)) return status_str ================================================ FILE: gqcnn/training/__init__.py ================================================ # -*- coding: utf-8 -*- """ Copyright ©2017. The Regents of the University of California (Regents). All Rights Reserved. Permission to use, copy, modify, and distribute this software and its documentation for educational, research, and not-for-profit purposes, without fee and without a signed licensing agreement, is hereby granted, provided that the above copyright notice, this paragraph and the following two paragraphs appear in all copies, modifications, and distributions. Contact The Office of Technology Licensing, UC Berkeley, 2150 Shattuck Avenue, Suite 510, Berkeley, CA 94720-1620, (510) 643-7201, otl@berkeley.edu, http://ipira.berkeley.edu/industry-info for commercial licensing opportunities. IN NO EVENT SHALL REGENTS BE LIABLE TO ANY PARTY FOR DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, INCLUDING LOST PROFITS, ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN IF REGENTS HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. REGENTS SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE AND ACCOMPANYING DOCUMENTATION, IF ANY, PROVIDED HEREUNDER IS PROVIDED "AS IS". REGENTS HAS NO OBLIGATION TO PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. Factory functions to obtain GQCNNTrainer class based on chosen deep learning backend. Currently only Tensorflow is supported. Author: Vishal Satish """ from .tf import GQCNNTrainerTF def get_gqcnn_trainer(backend="tf"): """Get the GQ-CNN Trainer for the provided backend. Note ---- Currently only TensorFlow is supported. Parameters ---------- backend : str The backend to use, currently only "tf" is supported. Returns ------- :obj:`gqcnn.training.tf.GQCNNTrainerTF` GQ-CNN Trainer with TensorFlow backend. """ # Return desired `GQCNNTrainer` instance based on backend. if backend == "tf": return GQCNNTrainerTF else: raise ValueError("Invalid backend: {}".format(backend)) ================================================ FILE: gqcnn/training/tf/__init__.py ================================================ # -*- coding: utf-8 -*- """ Copyright ©2017. The Regents of the University of California (Regents). All Rights Reserved. Permission to use, copy, modify, and distribute this software and its documentation for educational, research, and not-for-profit purposes, without fee and without a signed licensing agreement, is hereby granted, provided that the above copyright notice, this paragraph and the following two paragraphs appear in all copies, modifications, and distributions. Contact The Office of Technology Licensing, UC Berkeley, 2150 Shattuck Avenue, Suite 510, Berkeley, CA 94720-1620, (510) 643-7201, otl@berkeley.edu, http://ipira.berkeley.edu/industry-info for commercial licensing opportunities. IN NO EVENT SHALL REGENTS BE LIABLE TO ANY PARTY FOR DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, INCLUDING LOST PROFITS, ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN IF REGENTS HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. REGENTS SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE AND ACCOMPANYING DOCUMENTATION, IF ANY, PROVIDED HEREUNDER IS PROVIDED "AS IS". REGENTS HAS NO OBLIGATION TO PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. """ from .trainer_tf import GQCNNTrainerTF __all__ = ["GQCNNTrainerTF"] ================================================ FILE: gqcnn/training/tf/trainer_tf.py ================================================ # -*- coding: utf-8 -*- """ Copyright ©2017. The Regents of the University of California (Regents). All Rights Reserved. Permission to use, copy, modify, and distribute this software and its documentation for educational, research, and not-for-profit purposes, without fee and without a signed licensing agreement, is hereby granted, provided that the above copyright notice, this paragraph and the following two paragraphs appear in all copies, modifications, and distributions. Contact The Office of Technology Licensing, UC Berkeley, 2150 Shattuck Avenue, Suite 510, Berkeley, CA 94720-1620, (510) 643-7201, otl@berkeley.edu, http://ipira.berkeley.edu/industry-info for commercial licensing opportunities. IN NO EVENT SHALL REGENTS BE LIABLE TO ANY PARTY FOR DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, INCLUDING LOST PROFITS, ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN IF REGENTS HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. REGENTS SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE AND ACCOMPANYING DOCUMENTATION, IF ANY, PROVIDED HEREUNDER IS PROVIDED "AS IS". REGENTS HAS NO OBLIGATION TO PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. Trains a GQ-CNN network using Tensorflow backend. Author ------ Vishal Satish & Jeff Mahler """ import collections import json import multiprocessing as mp import os import random import shutil import signal import subprocess import sys import time from past.builtins import xrange import cv2 import numpy as np import scipy.stats as ss import tensorflow as tf from autolab_core import (BinaryClassificationResult, RegressionResult, TensorDataset, Logger) from autolab_core.constants import JSON_INDENT import autolab_core.utils as utils from ...utils import (TrainingMode, GripperMode, InputDepthMode, GeneralConstants, TrainStatsLogger, GQCNNTrainingStatus, GQCNNFilenames, pose_dim, read_pose_data, weight_name_to_layer_name, is_py2, imresize) if is_py2(): import Queue else: import queue as Queue class GQCNNTrainerTF(object): """Trains a GQ-CNN with Tensorflow backend.""" def __init__(self, gqcnn, dataset_dir, split_name, output_dir, config, name=None, progress_dict=None, verbose=True): """ Parameters ---------- gqcnn : :obj:`GQCNN` Grasp quality neural network to optimize. dataset_dir : str Path to the training/validation dataset. split_name : str Name of the split to train on. output_dir : str Path to save the model output. config : dict Dictionary of configuration parameters. name : str Name of the the model. """ self.gqcnn = gqcnn self.dataset_dir = dataset_dir self.split_name = split_name self.output_dir = output_dir self.cfg = config self.tensorboard_has_launched = False self.model_name = name self.progress_dict = progress_dict self.finetuning = False # Create a directory for the model. if self.model_name is None: model_id = utils.gen_experiment_id() self.model_name = "model_{}".format(model_id) self.model_dir = os.path.join(self.output_dir, self.model_name) if not os.path.exists(self.model_dir): os.mkdir(self.model_dir) # Set up logger. self.logger = Logger.get_logger(self.__class__.__name__, log_file=os.path.join( self.model_dir, "training.log"), silence=(not verbose), global_log_file=verbose) # Check default split. if split_name is None: self.logger.warning("Using default image-wise split.") self.split_name = "image_wise" # Update cfg for saving. self.cfg["dataset_dir"] = self.dataset_dir self.cfg["split_name"] = self.split_name def _create_loss(self): """Creates a loss based on config file. Returns ------- :obj:`tensorflow Tensor` Loss. """ if self.cfg["loss"] == "l2": return (1.0 / self.train_batch_size) * tf.nn.l2_loss( tf.subtract(tf.nn.sigmoid(self.train_net_output), self.train_labels_node)) elif self.cfg["loss"] == "sparse": if self._angular_bins > 0: log = tf.reshape( tf.dynamic_partition(self.train_net_output, self.train_pred_mask_node, 2)[1], (-1, 2)) return tf.reduce_mean( tf.nn.sparse_softmax_cross_entropy_with_logits( _sentinel=None, labels=self.train_labels_node, logits=log)) else: return tf.reduce_mean( tf.nn.sparse_softmax_cross_entropy_with_logits( _sentinel=None, labels=self.train_labels_node, logits=self.train_net_output, name=None)) elif self.cfg["loss"] == "weighted_cross_entropy": return tf.reduce_mean( tf.nn.weighted_cross_entropy_with_logits( targets=tf.reshape(self.train_labels_node, [-1, 1]), logits=self.train_net_output, pos_weight=self.pos_weight, name=None)) def _create_optimizer(self, loss, batch, var_list, learning_rate): """Create optimizer based on config file. Parameters ---------- loss : :obj:`tensorflow Tensor` Loss to use, generated with `_create_loss`. batch : :obj:`tf.Variable` Variable to keep track of the current gradient step number. var_list : :obj:`lst` List of tf.Variable objects to update to minimize loss (ex. network weights). learning_rate : float Learning rate for training. Returns ------- :obj:`tf.train.Optimizer` Optimizer. """ # Instantiate optimizer. if self.cfg["optimizer"] == "momentum": optimizer = tf.train.MomentumOptimizer(learning_rate, self.momentum_rate) elif self.cfg["optimizer"] == "adam": optimizer = tf.train.AdamOptimizer(learning_rate) elif self.cfg["optimizer"] == "rmsprop": optimizer = tf.train.RMSPropOptimizer(learning_rate) else: raise ValueError("Optimizer '{}' not supported".format( self.cfg["optimizer"])) # Compute gradients. gradients, variables = zip( *optimizer.compute_gradients(loss, var_list=var_list)) # Clip gradients to prevent exploding gradient problem. gradients, global_grad_norm = tf.clip_by_global_norm( gradients, self.max_global_grad_norm) # Generate op to apply gradients. apply_grads = optimizer.apply_gradients(zip(gradients, variables), global_step=batch) return apply_grads, global_grad_norm def _launch_tensorboard(self): """Launches Tensorboard to visualize training.""" FNULL = open(os.devnull, "w") tensorboard_launch_msg = ("Launching Tensorboard, please navigate to" " localhost:{} in your favorite web browser" " to view summaries.") self.logger.info(tensorboard_launch_msg.format(self._tensorboard_port)) self._tensorboard_proc = subprocess.Popen([ "tensorboard", "--port", str(self._tensorboard_port), "--logdir", self.summary_dir ], stdout=FNULL) def _close_tensorboard(self): """Closes Tensorboard process.""" self.logger.info("Closing Tensorboard...") self._tensorboard_proc.terminate() def train(self): """Perform optimization.""" with self.gqcnn.tf_graph.as_default(): self._train() def _train(self): """Perform optimization.""" # Run setup. if self.progress_dict is not None: self.progress_dict[ "training_status"] = GQCNNTrainingStatus.SETTING_UP self._setup() # Build network. self.gqcnn.initialize_network(self.input_im_node, self.input_pose_node) # Optimize weights. if self.progress_dict is not None: self.progress_dict[ "training_status"] = GQCNNTrainingStatus.TRAINING self._optimize_weights() def finetune(self, base_model_dir): """Perform fine-tuning. Parameters ---------- base_model_dir : str Path to the pre-trained base model to use. """ with self.gqcnn.tf_graph.as_default(): self._finetune(base_model_dir) def _finetune(self, base_model_dir): """Perform fine-tuning. Parameters ---------- base_model_dir : str Path to the pre-trained base model to use. """ # Set flag and base model for fine-tuning. self.finetuning = True self.base_model_dir = base_model_dir # Run setup. self._setup() # Build network. self.gqcnn.set_base_network(base_model_dir) self.gqcnn.initialize_network(self.input_im_node, self.input_pose_node) # Optimize weights. if self.progress_dict is not None: self.progress_dict[ "training_status"] = GQCNNTrainingStatus.TRAINING self._optimize_weights(finetune=True) def _optimize_weights(self, finetune=False): """Optimize the network weights.""" start_time = time.time() # Setup output. self.train_net_output = self.gqcnn.output if self.training_mode == TrainingMode.CLASSIFICATION: if self.cfg["loss"] == "weighted_cross_entropy": self.gqcnn.add_sigmoid_to_output() else: self.gqcnn.add_softmax_to_output() elif self.training_mode == TrainingMode.REGRESSION: self.gqcnn.add_sigmoid_to_output() else: raise ValueError("Training mode: {} not supported !".format( self.training_mode)) train_predictions = self.gqcnn.output drop_rate_in = self.gqcnn.input_drop_rate_node self.weights = self.gqcnn.weights # Once weights have been initialized, create TF saver for weights. self.saver = tf.train.Saver() # Form loss. with tf.name_scope("loss"): # Part 1: error. loss = self._create_loss() unregularized_loss = loss # Part 2: regularization. layer_weights = list(self.weights.values()) with tf.name_scope("regularization"): regularizers = tf.nn.l2_loss(layer_weights[0]) for w in layer_weights[1:]: regularizers = regularizers + tf.nn.l2_loss(w) loss += self.train_l2_regularizer * regularizers # Setup learning rate. batch = tf.Variable(0) learning_rate = tf.train.exponential_decay( self.base_lr, # Base learning rate. batch * self.train_batch_size, # Current index into the dataset. self.decay_step, # Decay step. self.decay_rate, # decay rate. staircase=True) # Setup variable list. var_list = list(self.weights.values()) if finetune: var_list = [] for weights_name, weights_val in self.weights.items(): layer_name = weight_name_to_layer_name(weights_name) if self.optimize_base_layers or \ layer_name not in self.gqcnn._base_layer_names: var_list.append(weights_val) # Create optimizer. with tf.name_scope("optimizer"): apply_grad_op, global_grad_norm = self._create_optimizer( loss, batch, var_list, learning_rate) # Add a handler for SIGINT for graceful exit. def handler(signum, frame): self.logger.info("caught CTRL+C, exiting...") self._cleanup() exit(0) signal.signal(signal.SIGINT, handler) # Now that everything in our graph is set up, we write the graph to the # summary event so it can be visualized in Tensorboard. self.summary_writer.add_graph(self.gqcnn.tf_graph) # Begin optimization loop. try: # Start prefetch queue workers. self.prefetch_q_workers = [] seed = self._seed for i in range(self.num_prefetch_q_workers): if self.num_prefetch_q_workers > 1 or not self._debug: seed = np.random.randint(GeneralConstants.SEED_SAMPLE_MAX) p = mp.Process(target=self._load_and_enqueue, args=(seed, )) p.start() self.prefetch_q_workers.append(p) # Init TF variables. init = tf.global_variables_initializer() self.sess.run(init) self.logger.info("Beginning Optimization...") # Create a `TrainStatsLogger` object to log training statistics at # certain intervals. self.train_stats_logger = TrainStatsLogger(self.model_dir) # Loop through training steps. training_range = xrange( int(self.num_epochs * self.num_train) // self.train_batch_size) for step in training_range: # Run optimization. step_start = time.time() if self._angular_bins > 0: images, poses, labels, masks = self.prefetch_q.get() _, l, ur_l, lr, predictions, raw_net_output = \ self.sess.run( [ apply_grad_op, loss, unregularized_loss, learning_rate, train_predictions, self.train_net_output ], feed_dict={ drop_rate_in: self.drop_rate, self.input_im_node: images, self.input_pose_node: poses, self.train_labels_node: labels, self.train_pred_mask_node: masks }, options=GeneralConstants.timeout_option) else: images, poses, labels = self.prefetch_q.get() _, l, ur_l, lr, predictions, raw_net_output = \ self.sess.run( [ apply_grad_op, loss, unregularized_loss, learning_rate, train_predictions, self.train_net_output ], feed_dict={ drop_rate_in: self.drop_rate, self.input_im_node: images, self.input_pose_node: poses, self.train_labels_node: labels }, options=GeneralConstants.timeout_option) step_stop = time.time() self.logger.info("Step took {} sec.".format( str(round(step_stop - step_start, 3)))) if self.training_mode == TrainingMode.REGRESSION: self.logger.info("Max " + str(np.max(predictions))) self.logger.info("Min " + str(np.min(predictions))) elif self.cfg["loss"] != "weighted_cross_entropy": if self._angular_bins == 0: ex = np.exp(raw_net_output - np.tile( np.max(raw_net_output, axis=1)[:, np.newaxis], [1, 2])) softmax = ex / np.tile( np.sum(ex, axis=1)[:, np.newaxis], [1, 2]) self.logger.info("Max " + str(np.max(softmax[:, 1]))) self.logger.info("Min " + str(np.min(softmax[:, 1]))) self.logger.info("Pred nonzero " + str(np.sum(softmax[:, 1] > 0.5))) self.logger.info("True nonzero " + str(np.sum(labels))) else: sigmoid = 1.0 / (1.0 + np.exp(-raw_net_output)) self.logger.info("Max " + str(np.max(sigmoid))) self.logger.info("Min " + str(np.min(sigmoid))) self.logger.info("Pred nonzero " + str(np.sum(sigmoid > 0.5))) self.logger.info("True nonzero " + str(np.sum(labels > 0.5))) if np.isnan(l) or np.any(np.isnan(poses)): self.logger.error( "Encountered NaN in loss or training poses!") raise Exception # Log output. if step % self.log_frequency == 0: elapsed_time = time.time() - start_time start_time = time.time() self.logger.info("Step {} (epoch {}), {} s".format( step, str( round( step * self.train_batch_size / self.num_train, 3)), str(round(1000 * elapsed_time / self.eval_frequency, 2)))) self.logger.info( "Minibatch loss: {}, learning rate: {}".format( str(round(l, 3)), str(round(lr, 6)))) if self.progress_dict is not None: self.progress_dict["epoch"] = str( round( step * self.train_batch_size / self.num_train, 2)) train_error = l if self.training_mode == TrainingMode.CLASSIFICATION: if self._angular_bins > 0: predictions = predictions[masks.astype( bool)].reshape((-1, 2)) classification_result = BinaryClassificationResult( predictions[:, 1], labels) train_error = classification_result.error_rate self.logger.info("Minibatch error: {}".format( str(round(train_error, 3)))) self.summary_writer.add_summary( self.sess.run( self.merged_log_summaries, feed_dict={ self.minibatch_error_placeholder: train_error, self.minibatch_loss_placeholder: l, self.learning_rate_placeholder: lr }), step) sys.stdout.flush() # Update the `TrainStatsLogger`. self.train_stats_logger.update(train_eval_iter=step, train_loss=l, train_error=train_error, total_train_error=None, val_eval_iter=None, val_error=None, learning_rate=lr) # Evaluate model. if step % self.eval_frequency == 0 and step > 0: if self.cfg["eval_total_train_error"]: train_result = self._error_rate_in_batches( validation_set=False) self.logger.info("Training error: {}".format( str(round(train_result.error_rate, 3)))) # Update the `TrainStatsLogger` and save. self.train_stats_logger.update( train_eval_iter=None, train_loss=None, train_error=None, total_train_error=train_result.error_rate, total_train_loss=train_result.cross_entropy_loss, val_eval_iter=None, val_error=None, learning_rate=None) self.train_stats_logger.log() if self.train_pct < 1.0: val_result = self._error_rate_in_batches() self.summary_writer.add_summary( self.sess.run( self.merged_eval_summaries, feed_dict={ self.val_error_placeholder: val_result. error_rate }), step) self.logger.info("Validation error: {}".format( str(round(val_result.error_rate, 3)))) self.logger.info("Validation loss: {}".format( str(round(val_result.cross_entropy_loss, 3)))) sys.stdout.flush() # Update the `TrainStatsLogger`. if self.train_pct < 1.0: self.train_stats_logger.update( train_eval_iter=None, train_loss=None, train_error=None, total_train_error=None, val_eval_iter=step, val_loss=val_result.cross_entropy_loss, val_error=val_result.error_rate, learning_rate=None) else: self.train_stats_logger.update(train_eval_iter=None, train_loss=None, train_error=None, total_train_error=None, val_eval_iter=step, learning_rate=None) # Save everything! self.train_stats_logger.log() # Save the model. if step % self.save_frequency == 0 and step > 0: self.saver.save( self.sess, os.path.join(self.model_dir, GQCNNFilenames.INTER_MODEL.format(step))) self.saver.save( self.sess, os.path.join(self.model_dir, GQCNNFilenames.FINAL_MODEL)) # Launch tensorboard only after the first iteration. if not self.tensorboard_has_launched: self.tensorboard_has_launched = True self._launch_tensorboard() # Get final errors and flush the stdout pipeline. final_val_result = self._error_rate_in_batches() self.logger.info("Final validation error: {}".format( str(round(final_val_result.error_rate, 3)))) self.logger.info("Final validation loss: {}".format( str(round(final_val_result.cross_entropy_loss, 3)))) if self.cfg["eval_total_train_error"]: final_train_result = self._error_rate_in_batches( validation_set=False) self.logger.info("Final training error: {}".format( final_train_result.error_rate)) self.logger.info("Final training loss: {}".format( final_train_result.cross_entropy_loss)) sys.stdout.flush() # Update the `TrainStatsLogger`. self.train_stats_logger.update( train_eval_iter=None, train_loss=None, train_error=None, total_train_error=None, val_eval_iter=step, val_loss=final_val_result.cross_entropy_loss, val_error=final_val_result.error_rate, learning_rate=None) # Log & save everything! self.train_stats_logger.log() self.saver.save( self.sess, os.path.join(self.model_dir, GQCNNFilenames.FINAL_MODEL)) except Exception as e: self._cleanup() raise e self._cleanup() def _compute_data_metrics(self): """Calculate image mean, image std, pose mean, pose std, normalization params.""" # Subsample tensors for faster runtime. random_file_indices = np.random.choice(self.num_tensors, size=self.num_random_files, replace=False) if self.gqcnn.input_depth_mode == InputDepthMode.POSE_STREAM: # Compute image stats. im_mean_filename = os.path.join(self.model_dir, GQCNNFilenames.IM_MEAN) im_std_filename = os.path.join(self.model_dir, GQCNNFilenames.IM_STD) if os.path.exists(im_mean_filename) and os.path.exists( im_std_filename): self.im_mean = np.load(im_mean_filename) self.im_std = np.load(im_std_filename) else: self.im_mean = 0 self.im_std = 0 # Compute mean. self.logger.info("Computing image mean") num_summed = 0 for k, i in enumerate(random_file_indices): if k % self.preproc_log_frequency == 0: self.logger.info( "Adding file {} of {} to image mean estimate". format(k + 1, random_file_indices.shape[0])) im_data = self.dataset.tensor(self.im_field_name, i).arr train_indices = self.train_index_map[i] if train_indices.shape[0] > 0: self.im_mean += np.sum(im_data[train_indices, ...]) num_summed += self.train_index_map[i].shape[ 0] * im_data.shape[1] * im_data.shape[2] self.im_mean = self.im_mean / num_summed # Compute std. self.logger.info("Computing image std") for k, i in enumerate(random_file_indices): if k % self.preproc_log_frequency == 0: self.logger.info( "Adding file {} of {} to image std estimate". format(k + 1, random_file_indices.shape[0])) im_data = self.dataset.tensor(self.im_field_name, i).arr train_indices = self.train_index_map[i] if train_indices.shape[0] > 0: self.im_std += np.sum( (im_data[train_indices, ...] - self.im_mean)**2) self.im_std = np.sqrt(self.im_std / num_summed) # Save. np.save(im_mean_filename, self.im_mean) np.save(im_std_filename, self.im_std) # Update GQ-CNN instance. self.gqcnn.set_im_mean(self.im_mean) self.gqcnn.set_im_std(self.im_std) # Compute pose stats. pose_mean_filename = os.path.join(self.model_dir, GQCNNFilenames.POSE_MEAN) pose_std_filename = os.path.join(self.model_dir, GQCNNFilenames.POSE_STD) if os.path.exists(pose_mean_filename) and os.path.exists( pose_std_filename): self.pose_mean = np.load(pose_mean_filename) self.pose_std = np.load(pose_std_filename) else: self.pose_mean = np.zeros(self.raw_pose_shape) self.pose_std = np.zeros(self.raw_pose_shape) # Compute mean. num_summed = 0 self.logger.info("Computing pose mean") for k, i in enumerate(random_file_indices): if k % self.preproc_log_frequency == 0: self.logger.info( "Adding file {} of {} to pose mean estimate". format(k + 1, random_file_indices.shape[0])) pose_data = self.dataset.tensor(self.pose_field_name, i).arr train_indices = self.train_index_map[i] if self.gripper_mode == GripperMode.SUCTION: rand_indices = np.random.choice( pose_data.shape[0], size=pose_data.shape[0] // 2, replace=False) pose_data[rand_indices, 4] = -pose_data[rand_indices, 4] elif self.gripper_mode == GripperMode.LEGACY_SUCTION: rand_indices = np.random.choice( pose_data.shape[0], size=pose_data.shape[0] // 2, replace=False) pose_data[rand_indices, 3] = -pose_data[rand_indices, 3] if train_indices.shape[0] > 0: pose_data = pose_data[train_indices, :] pose_data = pose_data[np.isfinite(pose_data[:, 3]), :] self.pose_mean += np.sum(pose_data, axis=0) num_summed += pose_data.shape[0] self.pose_mean = self.pose_mean / num_summed # Compute std. self.logger.info("Computing pose std") for k, i in enumerate(random_file_indices): if k % self.preproc_log_frequency == 0: self.logger.info( "Adding file {} of {} to pose std estimate".format( k + 1, random_file_indices.shape[0])) pose_data = self.dataset.tensor(self.pose_field_name, i).arr train_indices = self.train_index_map[i] if self.gripper_mode == GripperMode.SUCTION: rand_indices = np.random.choice( pose_data.shape[0], size=pose_data.shape[0] // 2, replace=False) pose_data[rand_indices, 4] = -pose_data[rand_indices, 4] elif self.gripper_mode == GripperMode.LEGACY_SUCTION: rand_indices = np.random.choice( pose_data.shape[0], size=pose_data.shape[0] // 2, replace=False) pose_data[rand_indices, 3] = -pose_data[rand_indices, 3] if train_indices.shape[0] > 0: pose_data = pose_data[train_indices, :] pose_data = pose_data[np.isfinite(pose_data[:, 3]), :] self.pose_std += np.sum( (pose_data - self.pose_mean)**2, axis=0) self.pose_std = np.sqrt(self.pose_std / num_summed) self.pose_std[self.pose_std == 0] = 1.0 # Save. self.pose_mean = read_pose_data(self.pose_mean, self.gripper_mode) self.pose_std = read_pose_data(self.pose_std, self.gripper_mode) np.save(pose_mean_filename, self.pose_mean) np.save(pose_std_filename, self.pose_std) # Update GQ-CNN instance. self.gqcnn.set_pose_mean(self.pose_mean) self.gqcnn.set_pose_std(self.pose_std) # Check for invalid values. if np.any(np.isnan(self.pose_mean)) or np.any( np.isnan(self.pose_std)): self.logger.error( "Pose mean or pose std is NaN! Check the input dataset") exit(0) elif self.gqcnn.input_depth_mode == InputDepthMode.SUB: # Compute (image - depth) stats. im_depth_sub_mean_filename = os.path.join( self.model_dir, GQCNNFilenames.IM_DEPTH_SUB_MEAN) im_depth_sub_std_filename = os.path.join( self.model_dir, GQCNNFilenames.IM_DEPTH_SUB_STD) if os.path.exists(im_depth_sub_mean_filename) and os.path.exists( im_depth_sub_std_filename): self.im_depth_sub_mean = np.load(im_depth_sub_mean_filename) self.im_depth_sub_std = np.load(im_depth_sub_std_filename) else: self.im_depth_sub_mean = 0 self.im_depth_sub_std = 0 # Compute mean. self.logger.info("Computing (image - depth) mean.") num_summed = 0 for k, i in enumerate(random_file_indices): if k % self.preproc_log_frequency == 0: self.logger.info("Adding file {} of {}...".format( k + 1, random_file_indices.shape[0])) im_data = self.dataset.tensor(self.im_field_name, i).arr depth_data = read_pose_data( self.dataset.tensor(self.pose_field_name, i).arr, self.gripper_mode) sub_data = im_data - np.tile( np.reshape(depth_data, (-1, 1, 1, 1)), (1, im_data.shape[1], im_data.shape[2], 1)) train_indices = self.train_index_map[i] if train_indices.shape[0] > 0: self.im_depth_sub_mean += np.sum( sub_data[train_indices, ...]) num_summed += self.train_index_map[i].shape[ 0] * im_data.shape[1] * im_data.shape[2] self.im_depth_sub_mean = self.im_depth_sub_mean / num_summed # Compute std. self.logger.info("Computing (image - depth) std.") for k, i in enumerate(random_file_indices): if k % self.preproc_log_frequency == 0: self.logger.info("Adding file {} of {}...".format( k + 1, random_file_indices.shape[0])) im_data = self.dataset.tensor(self.im_field_name, i).arr depth_data = read_pose_data( self.dataset.tensor(self.pose_field_name, i).arr, self.gripper_mode) sub_data = im_data - np.tile( np.reshape(depth_data, (-1, 1, 1, 1)), (1, im_data.shape[1], im_data.shape[2], 1)) train_indices = self.train_index_map[i] if train_indices.shape[0] > 0: self.im_depth_sub_std += np.sum( (sub_data[train_indices, ...] - self.im_depth_sub_mean)**2) self.im_depth_sub_std = np.sqrt(self.im_depth_sub_std / num_summed) # Save. np.save(im_depth_sub_mean_filename, self.im_depth_sub_mean) np.save(im_depth_sub_std_filename, self.im_depth_sub_std) # Update GQ-CNN instance. self.gqcnn.set_im_depth_sub_mean(self.im_depth_sub_mean) self.gqcnn.set_im_depth_sub_std(self.im_depth_sub_std) elif self.gqcnn.input_depth_mode == InputDepthMode.IM_ONLY: # Compute image stats. im_mean_filename = os.path.join(self.model_dir, GQCNNFilenames.IM_MEAN) im_std_filename = os.path.join(self.model_dir, GQCNNFilenames.IM_STD) if os.path.exists(im_mean_filename) and os.path.exists( im_std_filename): self.im_mean = np.load(im_mean_filename) self.im_std = np.load(im_std_filename) else: self.im_mean = 0 self.im_std = 0 # Compute mean. self.logger.info("Computing image mean.") num_summed = 0 for k, i in enumerate(random_file_indices): if k % self.preproc_log_frequency == 0: self.logger.info("Adding file {} of {}...".format( k + 1, random_file_indices.shape[0])) im_data = self.dataset.tensor(self.im_field_name, i).arr train_indices = self.train_index_map[i] if train_indices.shape[0] > 0: self.im_mean += np.sum(im_data[train_indices, ...]) num_summed += self.train_index_map[i].shape[ 0] * im_data.shape[1] * im_data.shape[2] self.im_mean = self.im_mean / num_summed # Compute std. self.logger.info("Computing image std.") for k, i in enumerate(random_file_indices): if k % self.preproc_log_frequency == 0: self.logger.info("Adding file {} of {}...".format( k + 1, random_file_indices.shape[0])) im_data = self.dataset.tensor(self.im_field_name, i).arr train_indices = self.train_index_map[i] if train_indices.shape[0] > 0: self.im_std += np.sum( (im_data[train_indices, ...] - self.im_mean)**2) self.im_std = np.sqrt(self.im_std / num_summed) # Save. np.save(im_mean_filename, self.im_mean) np.save(im_std_filename, self.im_std) # Update GQ-CNN instance. self.gqcnn.set_im_mean(self.im_mean) self.gqcnn.set_im_std(self.im_std) # Compute normalization parameters of the network. pct_pos_train_filename = os.path.join(self.model_dir, GQCNNFilenames.PCT_POS_TRAIN) pct_pos_val_filename = os.path.join(self.model_dir, GQCNNFilenames.PCT_POS_VAL) if os.path.exists(pct_pos_train_filename) and os.path.exists( pct_pos_val_filename): pct_pos_train = np.load(pct_pos_train_filename) pct_pos_val = np.load(pct_pos_val_filename) else: self.logger.info("Computing grasp quality metric stats.") all_train_metrics = None all_val_metrics = None # Read metrics. for k, i in enumerate(random_file_indices): if k % self.preproc_log_frequency == 0: self.logger.info("Adding file {} of {}...".format( k + 1, random_file_indices.shape[0])) metric_data = self.dataset.tensor(self.label_field_name, i).arr train_indices = self.train_index_map[i] val_indices = self.val_index_map[i] if train_indices.shape[0] > 0: train_metric_data = metric_data[train_indices] if all_train_metrics is None: all_train_metrics = train_metric_data else: all_train_metrics = np.r_[all_train_metrics, train_metric_data] if val_indices.shape[0] > 0: val_metric_data = metric_data[val_indices] if all_val_metrics is None: all_val_metrics = val_metric_data else: all_val_metrics = np.r_[all_val_metrics, val_metric_data] # Compute train stats. self.min_metric = np.min(all_train_metrics) self.max_metric = np.max(all_train_metrics) self.mean_metric = np.mean(all_train_metrics) self.median_metric = np.median(all_train_metrics) # Save metrics. pct_pos_train = np.sum(all_train_metrics > self.metric_thresh ) / all_train_metrics.shape[0] np.save(pct_pos_train_filename, np.array(pct_pos_train)) if self.train_pct < 1.0: pct_pos_val = np.sum(all_val_metrics > self.metric_thresh ) / all_val_metrics.shape[0] np.save(pct_pos_val_filename, np.array(pct_pos_val)) self.logger.info("Percent positive in train: " + str(pct_pos_train)) if self.train_pct < 1.0: self.logger.info("Percent positive in val: " + str(pct_pos_val)) if self._angular_bins > 0: self.logger.info("Calculating angular bin statistics...") bin_counts = np.zeros((self._angular_bins, )) for m in range(self.num_tensors): pose_arr = self.dataset.tensor(self.pose_field_name, m).arr angles = pose_arr[:, 3] neg_ind = np.where(angles < 0) angles = np.abs(angles) % self._max_angle angles[neg_ind] *= -1 g_90 = np.where(angles > (self._max_angle / 2)) l_neg_90 = np.where(angles < (-1 * (self._max_angle / 2))) angles[g_90] -= self._max_angle angles[l_neg_90] += self._max_angle # TODO(vsatish): Actually fix this. angles *= -1 # Hack to fix reverse angle convention. angles += (self._max_angle / 2) for i in range(angles.shape[0]): bin_counts[int(angles[i] // self._bin_width)] += 1 self.logger.info("Bin counts: {}.".format(bin_counts)) def _compute_split_indices(self): """Compute train and validation indices for each tensor to speed data accesses.""" # Read indices. train_indices, val_indices, _ = self.dataset.split(self.split_name) # Loop through tensors, assigning indices to each file. self.train_index_map = {} for i in range(self.dataset.num_tensors): self.train_index_map[i] = [] for i in train_indices: tensor_index = self.dataset.tensor_index(i) datapoint_indices = self.dataset.datapoint_indices_for_tensor( tensor_index) lowest = np.min(datapoint_indices) self.train_index_map[tensor_index].append(i - lowest) for i, indices in self.train_index_map.items(): self.train_index_map[i] = np.array(indices) self.val_index_map = {} for i in range(self.dataset.num_tensors): self.val_index_map[i] = [] for i in val_indices: tensor_index = self.dataset.tensor_index(i) if tensor_index not in self.val_index_map: self.val_index_map[tensor_index] = [] datapoint_indices = self.dataset.datapoint_indices_for_tensor( tensor_index) lowest = np.min(datapoint_indices) self.val_index_map[tensor_index].append(i - lowest) for i, indices in self.val_index_map.items(): self.val_index_map[i] = np.array(indices) def _setup_output_dirs(self): """Setup output directories.""" self.logger.info("Saving model to: {}".format(self.model_dir)) # Create the summary dir. self.summary_dir = os.path.join(self.model_dir, "tensorboard_summaries") if not os.path.exists(self.summary_dir): os.mkdir(self.summary_dir) else: # If the summary directory already exists, clean it out by deleting # all files in it. We don't want Tensorboard to get confused with # old logs while reusing the same directory. old_files = os.listdir(self.summary_dir) for f in old_files: os.remove(os.path.join(self.summary_dir, f)) # Setup filter directory. self.filter_dir = os.path.join(self.model_dir, "filters") if not os.path.exists(self.filter_dir): os.mkdir(self.filter_dir) def _save_configs(self): """Save training configuration.""" # Update config for fine-tuning. if self.finetuning: self.cfg["base_model_dir"] = self.base_model_dir # Save config. out_config_filename = os.path.join(self.model_dir, GQCNNFilenames.SAVED_CFG) tempOrderedDict = collections.OrderedDict() for key in self.cfg: tempOrderedDict[key] = self.cfg[key] with open(out_config_filename, "w") as outfile: json.dump(tempOrderedDict, outfile, indent=JSON_INDENT) # Save training script. this_filename = sys.argv[0] out_train_filename = os.path.join(self.model_dir, "training_script.py") shutil.copyfile(this_filename, out_train_filename) # Save architecture. out_architecture_filename = os.path.join(self.model_dir, GQCNNFilenames.SAVED_ARCH) json.dump(self.cfg["gqcnn"]["architecture"], open(out_architecture_filename, "w"), indent=JSON_INDENT) def _read_training_params(self): """Read training parameters from configuration file.""" # Splits. self.train_pct = self.cfg["train_pct"] self.total_pct = self.cfg["total_pct"] # Training sizes. self.train_batch_size = self.cfg["train_batch_size"] self.val_batch_size = self.cfg["val_batch_size"] self.max_files_eval = None if "max_files_eval" in self.cfg: self.max_files_eval = self.cfg["max_files_eval"] # Logging. self.num_epochs = self.cfg["num_epochs"] self.eval_frequency = self.cfg["eval_frequency"] self.save_frequency = self.cfg["save_frequency"] self.log_frequency = self.cfg["log_frequency"] # Optimization. self.train_l2_regularizer = self.cfg["train_l2_regularizer"] self.base_lr = self.cfg["base_lr"] self.decay_step_multiplier = self.cfg["decay_step_multiplier"] self.decay_rate = self.cfg["decay_rate"] self.momentum_rate = self.cfg["momentum_rate"] self.max_training_examples_per_load = self.cfg[ "max_training_examples_per_load"] self.drop_rate = self.cfg["drop_rate"] self.max_global_grad_norm = self.cfg["max_global_grad_norm"] self.optimize_base_layers = False if "optimize_base_layers" in self.cfg: self.optimize_base_layers = self.cfg["optimize_base_layers"] # Metrics. self.target_metric_name = self.cfg["target_metric_name"] self.metric_thresh = self.cfg["metric_thresh"] self.training_mode = self.cfg["training_mode"] if self.training_mode != TrainingMode.CLASSIFICATION: raise ValueError( "Training mode '{}' not currently supported!".format( self.training_mode)) # Tensorboad. self._tensorboard_port = self.cfg["tensorboard_port"] # Preprocessing. self.preproc_log_frequency = self.cfg["preproc_log_frequency"] self.num_random_files = self.cfg["num_random_files"] self.max_prefetch_q_size = GeneralConstants.MAX_PREFETCH_Q_SIZE if "max_prefetch_q_size" in self.cfg: self.max_prefetch_q_size = self.cfg["max_prefetch_q_size"] self.num_prefetch_q_workers = GeneralConstants.NUM_PREFETCH_Q_WORKERS if "num_prefetch_q_workers" in self.cfg: self.num_prefetch_q_workers = self.cfg["num_prefetch_q_workers"] # Re-weighting positives/negatives. self.pos_weight = 0.0 if "pos_weight" in self.cfg: self.pos_weight = self.cfg["pos_weight"] self.pos_accept_prob = 1.0 self.neg_accept_prob = 1.0 if self.pos_weight > 1: self.neg_accept_prob = 1 / self.pos_weight else: self.pos_accept_prob = self.pos_weight if self.train_pct < 0 or self.train_pct > 1: raise ValueError("Train percentage must be in range [0,1]") if self.total_pct < 0 or self.total_pct > 1: raise ValueError("Total percentage must be in range [0,1]") # Input normalization. self._norm_inputs = True if self.gqcnn.input_depth_mode == InputDepthMode.SUB: self._norm_inputs = False # Angular training. self._angular_bins = self.gqcnn.angular_bins self._max_angle = self.gqcnn.max_angle # During angular training, make sure symmetrization in denoising is # turned off and also set the angular bin width. if self._angular_bins > 0: symmetrization_msg = ("Symmetrization denoising must be turned off" " during angular training.") assert not self.cfg["symmetrize"], symmetrization_msg self._bin_width = self._max_angle / self._angular_bins # Debugging. self._debug = self.cfg["debug"] self._seed = self.cfg["seed"] if self._debug: if self.num_prefetch_q_workers > 1: self.logger.warning( "Deterministic execution is not possible with " "more than one prefetch queue worker even in debug mode.") # This reduces initialization time for fast debugging. self.num_random_files = self.cfg["debug_num_files"] np.random.seed(self._seed) random.seed(self._seed) def _setup_denoising_and_synthetic(self): """Setup denoising and synthetic data parameters.""" # Multiplicative denoising. if self.cfg["multiplicative_denoising"]: self.gamma_shape = self.cfg["gamma_shape"] self.gamma_scale = 1 / self.gamma_shape # Gaussian process noise. if self.cfg["gaussian_process_denoising"]: self.gp_rescale_factor = self.cfg[ "gaussian_process_scaling_factor"] self.gp_sample_height = int(self.im_height / self.gp_rescale_factor) self.gp_sample_width = int(self.im_width / self.gp_rescale_factor) self.gp_num_pix = self.gp_sample_height * self.gp_sample_width self.gp_sigma = self.cfg["gaussian_process_sigma"] def _open_dataset(self): """Open the dataset.""" # Read in filenames of training data (poses, images, labels). self.dataset = TensorDataset.open(self.dataset_dir) self.num_datapoints = self.dataset.num_datapoints self.num_tensors = self.dataset.num_tensors self.datapoints_per_file = self.dataset.datapoints_per_file self.num_random_files = min(self.num_tensors, self.num_random_files) # Read split. if not self.dataset.has_split(self.split_name): self.logger.info( "Dataset split: {} not found. Creating new split...".format( self.split_name)) self.dataset.make_split(self.split_name, train_pct=self.train_pct) else: self.logger.info("Training split: {} found in dataset.".format( self.split_name)) self._compute_split_indices() def _compute_data_params(self): """Compute parameters of the dataset.""" # Image params. self.im_field_name = self.cfg["image_field_name"] self.im_height = self.dataset.config["fields"][ self.im_field_name]["height"] self.im_width = self.dataset.config["fields"][ self.im_field_name]["width"] self.im_channels = self.dataset.config["fields"][ self.im_field_name]["channels"] # NOTE: There was originally some weird math going on here... self.im_center = np.array([self.im_height // 2, self.im_width // 2]) # Poses. self.pose_field_name = self.cfg["pose_field_name"] self.gripper_mode = self.gqcnn.gripper_mode self.pose_dim = pose_dim(self.gripper_mode) self.raw_pose_shape = self.dataset.config["fields"][ self.pose_field_name]["height"] # Outputs. self.label_field_name = self.target_metric_name self.num_categories = 2 # Compute the number of train and val examples. self.num_train = 0 self.num_val = 0 for train_indices in self.train_index_map.values(): self.num_train += train_indices.shape[0] for val_indices in self.train_index_map.values(): self.num_val += val_indices.shape[0] # Set params based on the number of training examples (convert epochs # to steps). self.eval_frequency = int( np.ceil(self.eval_frequency * (self.num_train / self.train_batch_size))) self.save_frequency = int( np.ceil(self.save_frequency * (self.num_train / self.train_batch_size))) self.decay_step = self.decay_step_multiplier * self.num_train def _setup_tensorflow(self): """Setup Tensorflow placeholders, session, and queue.""" # Set up training label and NumPy data types. if self.training_mode == TrainingMode.REGRESSION: train_label_dtype = tf.float32 self.numpy_dtype = np.float32 elif self.training_mode == TrainingMode.CLASSIFICATION: train_label_dtype = tf.int64 self.numpy_dtype = np.int64 if self.cfg["loss"] == "weighted_cross_entropy": train_label_dtype = tf.float32 self.numpy_dtype = np.float32 else: raise ValueError("Training mode '{}' not supported".format( self.training_mode)) # Set up placeholders. self.train_labels_node = tf.placeholder(train_label_dtype, (self.train_batch_size, )) self.input_im_node = tf.placeholder( tf.float32, (self.train_batch_size, self.im_height, self.im_width, self.im_channels)) self.input_pose_node = tf.placeholder( tf.float32, (self.train_batch_size, self.pose_dim)) if self._angular_bins > 0: self.train_pred_mask_node = tf.placeholder( tf.int32, (self.train_batch_size, self._angular_bins * 2)) # Create data prefetch queue. self.prefetch_q = mp.Queue(self.max_prefetch_q_size) # Get weights. self.weights = self.gqcnn.weights # Open a TF session for the GQ-CNN instance and store it also as the # optimizer session. self.sess = self.gqcnn.open_session() # Setup data prefetch queue worker termination event. self.term_event = mp.Event() self.term_event.clear() def _setup_summaries(self): """Sets up placeholders for summary values and creates summary writer.""" # Create placeholders for Python values because `tf.summary.scalar` # expects a placeholder. self.val_error_placeholder = tf.placeholder(tf.float32, []) self.minibatch_error_placeholder = tf.placeholder(tf.float32, []) self.minibatch_loss_placeholder = tf.placeholder(tf.float32, []) self.learning_rate_placeholder = tf.placeholder(tf.float32, []) # Tag the `tf.summary.scalar`s so that we can group them together and # write different batches of summaries at different intervals. tf.summary.scalar("val_error", self.val_error_placeholder, collections=["eval_frequency"]) tf.summary.scalar("minibatch_error", self.minibatch_error_placeholder, collections=["log_frequency"]) tf.summary.scalar("minibatch_loss", self.minibatch_loss_placeholder, collections=["log_frequency"]) tf.summary.scalar("learning_rate", self.learning_rate_placeholder, collections=["log_frequency"]) self.merged_eval_summaries = tf.summary.merge_all("eval_frequency") self.merged_log_summaries = tf.summary.merge_all("log_frequency") # Create a TF summary writer with the specified summary directory. self.summary_writer = tf.summary.FileWriter(self.summary_dir) # Initialize the variables again now that we have added some new ones. with self.sess.as_default(): tf.global_variables_initializer().run() def _cleanup(self): self.logger.info("Cleaning and preparing to exit optimization...") # Set termination event for prefetch queue workers. self.logger.info("Terminating prefetch queue workers...") self.term_event.set() # Flush prefetch queue. # NOTE: This prevents a deadlock with the worker process queue buffers. self._flush_prefetch_queue() # Join prefetch queue worker processes. for p in self.prefetch_q_workers: p.join() # Close tensorboard if started. if self.tensorboard_has_launched: self._close_tensorboard() # Close Tensorflow session. self.gqcnn.close_session() def _flush_prefetch_queue(self): """Flush prefetch queue.""" self.logger.info("Flushing prefetch queue...") for i in range(self.prefetch_q.qsize()): self.prefetch_q.get() def _setup(self): """Setup for training.""" # Initialize data prefetch queue thread exit booleans. self.queue_thread_exited = False self.forceful_exit = False # Setup output directories. self._setup_output_dirs() # Save training configuration. self._save_configs() # Read training parameters from config file. self._read_training_params() # Setup image and pose data files. self._open_dataset() # Compute data parameters. self._compute_data_params() # Setup denoising and synthetic data parameters. self._setup_denoising_and_synthetic() # Compute means, std's, and normalization metrics. self._compute_data_metrics() # Setup Tensorflow session/placeholders/queue. self._setup_tensorflow() # Setup summaries for visualizing metrics in Tensorboard. self._setup_summaries() def _load_and_enqueue(self, seed): """Loads and enqueues a batch of images for training.""" # When the parent process receives a SIGINT, it will itself handle # cleaning up child processes. signal.signal(signal.SIGINT, signal.SIG_IGN) # Set the random seed explicitly to prevent all workers from possible # inheriting the same seed on process initialization. np.random.seed(seed) random.seed(seed) # Open dataset. dataset = TensorDataset.open(self.dataset_dir) while not self.term_event.is_set(): # Loop through data. num_queued = 0 start_i = 0 end_i = 0 file_num = 0 queue_start = time.time() # Init buffers. train_images = np.zeros([ self.train_batch_size, self.im_height, self.im_width, self.im_channels ]).astype(np.float32) train_poses = np.zeros([self.train_batch_size, self.pose_dim]).astype(np.float32) train_labels = np.zeros(self.train_batch_size).astype( self.numpy_dtype) if self._angular_bins > 0: train_pred_mask = np.zeros( (self.train_batch_size, self._angular_bins * 2), dtype=bool) while start_i < self.train_batch_size: # Compute num remaining. num_remaining = self.train_batch_size - num_queued # Generate tensor index uniformly at random. file_num = np.random.choice(self.num_tensors, size=1)[0] read_start = time.time() train_images_tensor = dataset.tensor(self.im_field_name, file_num) train_poses_tensor = dataset.tensor(self.pose_field_name, file_num) train_labels_tensor = dataset.tensor(self.label_field_name, file_num) read_stop = time.time() self.logger.debug("Reading data took {} sec".format( str(round(read_stop - read_start, 3)))) self.logger.debug("File num: {}".format(file_num)) # Get batch indices uniformly at random. train_ind = self.train_index_map[file_num] np.random.shuffle(train_ind) if self.gripper_mode == GripperMode.LEGACY_SUCTION: tp_tmp = read_pose_data(train_poses_tensor.data, self.gripper_mode) train_ind = train_ind[np.isfinite(tp_tmp[train_ind, 1])] # Filter positives and negatives. if self.training_mode == TrainingMode.CLASSIFICATION and \ self.pos_weight != 0.0: labels = 1 * (train_labels_tensor.arr > self.metric_thresh) np.random.shuffle(train_ind) filtered_ind = [] for index in train_ind: if labels[index] == 0 and np.random.rand( ) < self.neg_accept_prob: filtered_ind.append(index) elif labels[index] == 1 and np.random.rand( ) < self.pos_accept_prob: filtered_ind.append(index) train_ind = np.array(filtered_ind) # Samples train indices. upper = min(num_remaining, train_ind.shape[0], self.max_training_examples_per_load) ind = train_ind[:upper] num_loaded = ind.shape[0] if num_loaded == 0: self.logger.warning("Queueing zero examples!!!!") continue # Subsample data. train_images_arr = train_images_tensor.arr[ind, ...] train_poses_arr = train_poses_tensor.arr[ind, ...] angles = train_poses_arr[:, 3] train_label_arr = train_labels_tensor.arr[ind] num_images = train_images_arr.shape[0] # Resize images. rescale_factor = self.im_height / train_images_arr.shape[1] if rescale_factor != 1.0: resized_train_images_arr = np.zeros([ num_images, self.im_height, self.im_width, self.im_channels ]).astype(np.float32) for i in range(num_images): for c in range(train_images_arr.shape[3]): resized_train_images_arr[i, :, :, c] = imresize( train_images_arr[i, :, :, c], rescale_factor, interp="bicubic") train_images_arr = resized_train_images_arr # Add noises to images. train_images_arr, train_poses_arr = self._distort( train_images_arr, train_poses_arr) # Slice poses. train_poses_arr = read_pose_data(train_poses_arr, self.gripper_mode) # Standardize inputs and outputs. if self._norm_inputs: train_images_arr = (train_images_arr - self.im_mean) / self.im_std if self.gqcnn.input_depth_mode == \ InputDepthMode.POSE_STREAM: train_poses_arr = (train_poses_arr - self.pose_mean) / self.pose_std train_label_arr = 1 * (train_label_arr > self.metric_thresh) train_label_arr = train_label_arr.astype(self.numpy_dtype) if self._angular_bins > 0: bins = np.zeros_like(train_label_arr) # Form prediction mask to use when calculating loss. neg_ind = np.where(angles < 0) angles = np.abs(angles) % self._max_angle angles[neg_ind] *= -1 g_90 = np.where(angles > (self._max_angle / 2)) l_neg_90 = np.where(angles < (-1 * (self._max_angle / 2))) angles[g_90] -= self._max_angle angles[l_neg_90] += self._max_angle # TODO(vsatish): Actually fix this. angles *= -1 # Hack to fix reverse angle convention. angles += (self._max_angle / 2) train_pred_mask_arr = np.zeros( (train_label_arr.shape[0], self._angular_bins * 2)) for i in range(angles.shape[0]): bins[i] = angles[i] // self._bin_width train_pred_mask_arr[i, int((angles[i] // self._bin_width) * 2)] = 1 train_pred_mask_arr[ i, int((angles[i] // self._bin_width) * 2 + 1)] = 1 # Compute the number of examples loaded. num_loaded = train_images_arr.shape[0] end_i = start_i + num_loaded # Enqueue training data batch. train_images[start_i:end_i, ...] = train_images_arr.copy() train_poses[start_i:end_i, :] = train_poses_arr.copy() train_labels[start_i:end_i] = train_label_arr.copy() if self._angular_bins > 0: train_pred_mask[start_i:end_i] = train_pred_mask_arr.copy() # Update start index. start_i = end_i num_queued += num_loaded # Send data to queue. if not self.term_event.is_set(): try: if self._angular_bins > 0: self.prefetch_q.put_nowait( (train_images, train_poses, train_labels, train_pred_mask)) else: self.prefetch_q.put_nowait( (train_images, train_poses, train_labels)) except Queue.Full: time.sleep(GeneralConstants.QUEUE_SLEEP) queue_stop = time.time() self.logger.debug("Queue batch took {} sec".format( str(round(queue_stop - queue_start, 3)))) def _distort(self, image_arr, pose_arr): """Adds noise to a batch of images.""" # Read params. num_images = image_arr.shape[0] # Denoising and synthetic data generation. if self.cfg["multiplicative_denoising"]: mult_samples = ss.gamma.rvs(self.gamma_shape, scale=self.gamma_scale, size=num_images) mult_samples = mult_samples[:, np.newaxis, np.newaxis, np.newaxis] image_arr = image_arr * np.tile( mult_samples, [1, self.im_height, self.im_width, self.im_channels]) # Add correlated Gaussian noise. if self.cfg["gaussian_process_denoising"]: for i in range(num_images): if np.random.rand() < self.cfg["gaussian_process_rate"]: train_image = image_arr[i, :, :, 0] gp_noise = ss.norm.rvs(scale=self.gp_sigma, size=self.gp_num_pix).reshape( self.gp_sample_height, self.gp_sample_width) gp_noise = imresize(gp_noise, self.gp_rescale_factor, interp="bicubic") train_image[train_image > 0] += gp_noise[train_image > 0] image_arr[i, :, :, 0] = train_image # Symmetrize images. if self.cfg["symmetrize"]: for i in range(num_images): train_image = image_arr[i, :, :, 0] # Rotate with 50% probability. if np.random.rand() < 0.5: theta = 180.0 rot_map = cv2.getRotationMatrix2D(tuple(self.im_center), theta, 1) train_image = cv2.warpAffine( train_image, rot_map, (self.im_height, self.im_width), flags=cv2.INTER_NEAREST) if self.gripper_mode == GripperMode.LEGACY_SUCTION: pose_arr[:, 3] = -pose_arr[:, 3] elif self.gripper_mode == GripperMode.SUCTION: pose_arr[:, 4] = -pose_arr[:, 4] # Reflect left right with 50% probability. if np.random.rand() < 0.5: train_image = np.fliplr(train_image) # Reflect up down with 50% probability. if np.random.rand() < 0.5: train_image = np.flipud(train_image) if self.gripper_mode == GripperMode.LEGACY_SUCTION: pose_arr[:, 3] = -pose_arr[:, 3] elif self.gripper_mode == GripperMode.SUCTION: pose_arr[:, 4] = -pose_arr[:, 4] image_arr[i, :, :, 0] = train_image return image_arr, pose_arr def _error_rate_in_batches(self, num_files_eval=None, validation_set=True): """Compute error and loss over either training or validation set. Returns ------- :obj:"autolab_core.BinaryClassificationResult` validation error """ all_predictions = [] all_labels = [] # Subsample files. file_indices = np.arange(self.num_tensors) if num_files_eval is None: num_files_eval = self.max_files_eval np.random.shuffle(file_indices) if self.max_files_eval is not None and num_files_eval > 0: file_indices = file_indices[:num_files_eval] for i in file_indices: # Load next file. images = self.dataset.tensor(self.im_field_name, i).arr poses = self.dataset.tensor(self.pose_field_name, i).arr raw_poses = np.array(poses, copy=True) labels = self.dataset.tensor(self.label_field_name, i).arr # If no datapoints from this file are in validation then just # continue. if validation_set: indices = self.val_index_map[i] else: indices = self.train_index_map[i] if len(indices) == 0: continue images = images[indices, ...] poses = read_pose_data(poses[indices, :], self.gripper_mode) raw_poses = raw_poses[indices, :] labels = labels[indices] if self.training_mode == TrainingMode.CLASSIFICATION: labels = 1 * (labels > self.metric_thresh) labels = labels.astype(np.uint8) if self._angular_bins > 0: # Form mask to extract predictions from ground-truth angular # bins. angles = raw_poses[:, 3] neg_ind = np.where(angles < 0) angles = np.abs(angles) % self._max_angle angles[neg_ind] *= -1 g_90 = np.where(angles > (self._max_angle / 2)) l_neg_90 = np.where(angles < (-1 * (self._max_angle / 2))) angles[g_90] -= self._max_angle angles[l_neg_90] += self._max_angle # TODO(vsatish): Actually fix this. angles *= -1 # Hack to fix reverse angle convention. angles += (self._max_angle / 2) pred_mask = np.zeros((labels.shape[0], self._angular_bins * 2), dtype=bool) for i in range(angles.shape[0]): pred_mask[i, int( (angles[i] // self._bin_width) * 2)] = True pred_mask[i, int((angles[i] // self._bin_width) * 2 + 1)] = True # Get predictions. predictions = self.gqcnn.predict(images, poses) if self._angular_bins > 0: predictions = predictions[pred_mask].reshape((-1, 2)) # Update. all_predictions.extend(predictions[:, 1].tolist()) all_labels.extend(labels.tolist()) # Get learning result. result = None if self.training_mode == TrainingMode.CLASSIFICATION: result = BinaryClassificationResult(all_predictions, all_labels) else: result = RegressionResult(all_predictions, all_labels) return result ================================================ FILE: gqcnn/utils/__init__.py ================================================ # -*- coding: utf-8 -*- """ Copyright ©2017. The Regents of the University of California (Regents). All Rights Reserved. Permission to use, copy, modify, and distribute this software and its documentation for educational, research, and not-for-profit purposes, without fee and without a signed licensing agreement, is hereby granted, provided that the above copyright notice, this paragraph and the following two paragraphs appear in all copies, modifications, and distributions. Contact The Office of Technology Licensing, UC Berkeley, 2150 Shattuck Avenue, Suite 510, Berkeley, CA 94720-1620, (510) 643-7201, otl@berkeley.edu, http://ipira.berkeley.edu/industry-info for commercial licensing opportunities. IN NO EVENT SHALL REGENTS BE LIABLE TO ANY PARTY FOR DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, INCLUDING LOST PROFITS, ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN IF REGENTS HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. REGENTS SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE AND ACCOMPANYING DOCUMENTATION, IF ANY, PROVIDED HEREUNDER IS PROVIDED "AS IS". REGENTS HAS NO OBLIGATION TO PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. """ from .enums import (ImageMode, TrainingMode, GripperMode, InputDepthMode, GeneralConstants, GQCNNTrainingStatus, GQCNNFilenames) from .policy_exceptions import (NoValidGraspsException, NoAntipodalPairsFoundException) from .train_stats_logger import TrainStatsLogger from .utils import (is_py2, set_cuda_visible_devices, pose_dim, read_pose_data, reduce_shape, weight_name_to_layer_name, imresize) __all__ = [ "is_py2", "set_cuda_visible_devices", "pose_dim", "read_pose_data", "reduce_shape", "weight_name_to_layer_name", "imresize", "ImageMode", "TrainingMode", "GripperMode", "InputDepthMode", "GeneralConstants", "GQCNNTrainingStatus", "NoValidGraspsException", "NoAntipodalPairsFoundException", "TrainStatsLogger", "GQCNNFilenames" ] ================================================ FILE: gqcnn/utils/enums.py ================================================ # -*- coding: utf-8 -*- """ Copyright ©2017. The Regents of the University of California (Regents). All Rights Reserved. Permission to use, copy, modify, and distribute this software and its documentation for educational, research, and not-for-profit purposes, without fee and without a signed licensing agreement, is hereby granted, provided that the above copyright notice, this paragraph and the following two paragraphs appear in all copies, modifications, and distributions. Contact The Office of Technology Licensing, UC Berkeley, 2150 Shattuck Avenue, Suite 510, Berkeley, CA 94720-1620, (510) 643-7201, otl@berkeley.edu, http://ipira.berkeley.edu/industry-info for commercial licensing opportunities. IN NO EVENT SHALL REGENTS BE LIABLE TO ANY PARTY FOR DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, INCLUDING LOST PROFITS, ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN IF REGENTS HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. REGENTS SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE AND ACCOMPANYING DOCUMENTATION, IF ANY, PROVIDED HEREUNDER IS PROVIDED "AS IS". REGENTS HAS NO OBLIGATION TO PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. Constants/enums. Author ------ Vishal Satish """ import math import tensorflow as tf # Other constants. class GeneralConstants(object): SEED = 3472134 SEED_SAMPLE_MAX = 2**32 - 1 # Max range for `np.random.seed`. timeout_option = tf.RunOptions(timeout_in_ms=1000000) MAX_PREFETCH_Q_SIZE = 250 NUM_PREFETCH_Q_WORKERS = 3 QUEUE_SLEEP = 0.001 PI = math.pi FIGSIZE = 16 # For visualization. # Enum for image modalities. class ImageMode(object): BINARY = "binary" DEPTH = "depth" BINARY_TF = "binary_tf" COLOR_TF = "color_tf" GRAY_TF = "gray_tf" DEPTH_TF = "depth_tf" DEPTH_TF_TABLE = "depth_tf_table" TF_DEPTH_IMS = "tf_depth_ims" # Enum for training modes. class TrainingMode(object): CLASSIFICATION = "classification" REGRESSION = "regression" # Has not been tested, for experimentation only! # Enum for input pose data formats. class GripperMode(object): PARALLEL_JAW = "parallel_jaw" SUCTION = "suction" MULTI_SUCTION = "multi_suction" LEGACY_PARALLEL_JAW = "legacy_parallel_jaw" LEGACY_SUCTION = "legacy_suction" # Enum for input depth mode. class InputDepthMode(object): POSE_STREAM = "pose_stream" SUB = "im_depth_sub" IM_ONLY = "im_only" # Enum for training status. class GQCNNTrainingStatus(object): NOT_STARTED = "not_started" SETTING_UP = "setting_up" TRAINING = "training" # Enum for filenames. class GQCNNFilenames(object): PCT_POS_VAL = "pct_pos_val.npy" PCT_POS_TRAIN = "pct_pos_train.npy" LEARNING_RATES = "learning_rates.npy" TRAIN_ITERS = "train_eval_iters.npy" TRAIN_LOSSES = "train_losses.npy" TRAIN_ERRORS = "train_errors.npy" TOTAL_TRAIN_LOSSES = "total_train_losses.npy" TOTAL_TRAIN_ERRORS = "total_train_errors.npy" VAL_ITERS = "val_eval_iters.npy" VAL_LOSSES = "val_losses.npy" VAL_ERRORS = "val_errors.npy" LEG_MEAN = "mean.npy" LEG_STD = "std.npy" IM_MEAN = "im_mean.npy" IM_STD = "im_std.npy" IM_DEPTH_SUB_MEAN = "im_depth_sub_mean.npy" IM_DEPTH_SUB_STD = "im_depth_sub_std.npy" POSE_MEAN = "pose_mean.npy" POSE_STD = "pose_std.npy" FINAL_MODEL = "model.ckpt" INTER_MODEL = "model_{}.ckpt" SAVED_ARCH = "architecture.json" SAVED_CFG = "config.json" ================================================ FILE: gqcnn/utils/policy_exceptions.py ================================================ # -*- coding: utf-8 -*- """ Copyright ©2017. The Regents of the University of California (Regents). All Rights Reserved. Permission to use, copy, modify, and distribute this software and its documentation for educational, research, and not-for-profit purposes, without fee and without a signed licensing agreement, is hereby granted, provided that the above copyright notice, this paragraph and the following two paragraphs appear in all copies, modifications, and distributions. Contact The Office of Technology Licensing, UC Berkeley, 2150 Shattuck Avenue, Suite 510, Berkeley, CA 94720-1620, (510) 643-7201, otl@berkeley.edu, http://ipira.berkeley.edu/industry-info for commercial licensing opportunities. IN NO EVENT SHALL REGENTS BE LIABLE TO ANY PARTY FOR DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, INCLUDING LOST PROFITS, ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN IF REGENTS HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. REGENTS SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE AND ACCOMPANYING DOCUMENTATION, IF ANY, PROVIDED HEREUNDER IS PROVIDED "AS IS". REGENTS HAS NO OBLIGATION TO PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. Exceptions that can be thrown by sub-classes of `GraspingPolicy`. Author ------ Vishal Satish """ class NoValidGraspsException(Exception): """Exception for when antipodal point pairs can be found in the depth image but none are valid grasps that can be executed.""" def __init__(self, in_collision=True, not_confident=False, *args, **kwargs): self.in_collision = in_collision self.not_confident = not_confident Exception.__init__(self, *args, **kwargs) class NoAntipodalPairsFoundException(Exception): """Exception for when no antipodal point pairs can be found in the depth image.""" pass ================================================ FILE: gqcnn/utils/train_stats_logger.py ================================================ # -*- coding: utf-8 -*- """ Copyright ©2017. The Regents of the University of California (Regents). All Rights Reserved. Permission to use, copy, modify, and distribute this software and its documentation for educational, research, and not-for-profit purposes, without fee and without a signed licensing agreement, is hereby granted, provided that the above copyright notice, this paragraph and the following two paragraphs appear in all copies, modifications, and distributions. Contact The Office of Technology Licensing, UC Berkeley, 2150 Shattuck Avenue, Suite 510, Berkeley, CA 94720-1620, (510) 643-7201, otl@berkeley.edu, http://ipira.berkeley.edu/industry-info for commercial licensing opportunities. IN NO EVENT SHALL REGENTS BE LIABLE TO ANY PARTY FOR DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, INCLUDING LOST PROFITS, ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN IF REGENTS HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. REGENTS SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE AND ACCOMPANYING DOCUMENTATION, IF ANY, PROVIDED HEREUNDER IS PROVIDED "AS IS". REGENTS HAS NO OBLIGATION TO PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. Handles logging of various training statistics. Author ------ Vishal Satish """ import os import numpy as np from .enums import GQCNNFilenames class TrainStatsLogger(object): """Logger for training statistics.""" def __init__(self, experiment_dir): """ Parameters ---------- experiment_dir : str The experiment directory to save statistics to. """ self.experiment_dir = experiment_dir self.train_eval_iters = [] self.train_losses = [] self.train_errors = [] self.total_train_errors = [] self.total_train_losses = [] self.val_eval_iters = [] self.val_losses = [] self.val_errors = [] self.val_losses = [] self.learning_rates = [] def log(self): """Flush all of the statistics to the given experiment directory.""" np.save(os.path.join(self.experiment_dir, GQCNNFilenames.TRAIN_ITERS), self.train_eval_iters) np.save(os.path.join(self.experiment_dir, GQCNNFilenames.TRAIN_LOSSES), self.train_losses) np.save(os.path.join(self.experiment_dir, GQCNNFilenames.TRAIN_ERRORS), self.train_errors) np.save( os.path.join(self.experiment_dir, GQCNNFilenames.TOTAL_TRAIN_ERRORS), self.total_train_errors) np.save( os.path.join(self.experiment_dir, GQCNNFilenames.TOTAL_TRAIN_LOSSES), self.total_train_losses) np.save(os.path.join(self.experiment_dir, GQCNNFilenames.VAL_ITERS), self.val_eval_iters) np.save(os.path.join(self.experiment_dir, GQCNNFilenames.VAL_LOSSES), self.val_losses) np.save(os.path.join(self.experiment_dir, GQCNNFilenames.VAL_ERRORS), self.val_errors) np.save( os.path.join(self.experiment_dir, GQCNNFilenames.LEARNING_RATES), self.learning_rates) def update(self, **stats): """Update training statistics. Note ---- Any statistic that is `None` in the argument dict will not be updated. Parameters ---------- stats : dict Dict of statistics to be updated. """ for stat, val in stats.items(): if stat == "train_eval_iter": if val is not None: self.train_eval_iters.append(val) elif stat == "train_loss": if val is not None: self.train_losses.append(val) elif stat == "train_error": if val is not None: self.train_errors.append(val) elif stat == "total_train_error": if val is not None: self.total_train_errors.append(val) elif stat == "total_train_loss": if val is not None: self.total_train_losses.append(val) elif stat == "val_eval_iter": if val is not None: self.val_eval_iters.append(val) elif stat == "val_loss": if val is not None: self.val_losses.append(val) elif stat == "val_error": if val is not None: self.val_errors.append(val) elif stat == "learning_rate": if val is not None: self.learning_rates.append(val) ================================================ FILE: gqcnn/utils/utils.py ================================================ # -*- coding: utf-8 -*- """ Copyright ©2017. The Regents of the University of California (Regents). All Rights Reserved. Permission to use, copy, modify, and distribute this software and its documentation for educational, research, and not-for-profit purposes, without fee and without a signed licensing agreement, is hereby granted, provided that the above copyright notice, this paragraph and the following two paragraphs appear in all copies, modifications, and distributions. Contact The Office of Technology Licensing, UC Berkeley, 2150 Shattuck Avenue, Suite 510, Berkeley, CA 94720-1620, (510) 643-7201, otl@berkeley.edu, http://ipira.berkeley.edu/industry-info for commercial licensing opportunities. IN NO EVENT SHALL REGENTS BE LIABLE TO ANY PARTY FOR DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, INCLUDING LOST PROFITS, ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN IF REGENTS HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. REGENTS SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE AND ACCOMPANYING DOCUMENTATION, IF ANY, PROVIDED HEREUNDER IS PROVIDED "AS IS". REGENTS HAS NO OBLIGATION TO PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. Simple utility functions. Authors ------- Jeff Mahler, Vishal Satish, Lucas Manuelli """ from functools import reduce import os import sys import numpy as np import skimage.transform as skt from autolab_core import Logger from .enums import GripperMode # Set up logger. logger = Logger.get_logger("gqcnn/utils/utils.py") def is_py2(): return sys.version[0] == "2" def set_cuda_visible_devices(gpu_list): """Sets CUDA_VISIBLE_DEVICES environment variable to only show certain gpus. Note ---- If gpu_list is empty does nothing. Parameters ---------- gpu_list : list List of gpus to set as visible. """ if len(gpu_list) == 0: return cuda_visible_devices = "" for gpu in gpu_list: cuda_visible_devices += str(gpu) + "," logger.info( "Setting CUDA_VISIBLE_DEVICES = {}".format(cuda_visible_devices)) os.environ["CUDA_VISIBLE_DEVICES"] = cuda_visible_devices def pose_dim(gripper_mode): """Returns the dimensions of the pose vector for the given gripper mode. Parameters ---------- gripper_mode: :obj:`GripperMode` Enum for gripper mode, see optimizer_constants.py for all possible gripper modes. Returns ------- :obj:`numpy.ndarray` Sliced pose_data corresponding to gripper mode. """ if gripper_mode == GripperMode.PARALLEL_JAW: return 1 elif gripper_mode == GripperMode.SUCTION: return 2 elif gripper_mode == GripperMode.MULTI_SUCTION: return 1 elif gripper_mode == GripperMode.LEGACY_PARALLEL_JAW: return 1 elif gripper_mode == GripperMode.LEGACY_SUCTION: return 2 else: raise ValueError( "Gripper mode '{}' not supported.".format(gripper_mode)) def read_pose_data(pose_arr, gripper_mode): """Read the pose data and slice it according to the specified gripper mode. Parameters ---------- pose_arr: :obj:`numpy.ndarray` Full pose data array read in from file. gripper_mode: :obj:`GripperMode` Enum for gripper mode, see optimizer_constants.py for all possible gripper modes. Returns ------- :obj:`numpy.ndarray` Sliced pose_data corresponding to input data mode. """ if gripper_mode == GripperMode.PARALLEL_JAW: if pose_arr.ndim == 1: return pose_arr[2:3] else: return pose_arr[:, 2:3] elif gripper_mode == GripperMode.SUCTION: if pose_arr.ndim == 1: return np.r_[pose_arr[2], pose_arr[4]] else: return np.c_[pose_arr[:, 2], pose_arr[:, 4]] elif gripper_mode == GripperMode.MULTI_SUCTION: if pose_arr.ndim == 1: return pose_arr[2:3] else: return pose_arr[:, 2:3] elif gripper_mode == GripperMode.LEGACY_PARALLEL_JAW: if pose_arr.ndim == 1: return pose_arr[2:3] else: return pose_arr[:, 2:3] elif gripper_mode == GripperMode.LEGACY_SUCTION: if pose_arr.ndim == 1: return pose_arr[2:4] else: return pose_arr[:, 2:4] else: raise ValueError( "Gripper mode '{}' not supported.".format(gripper_mode)) def reduce_shape(shape): """Get shape of a layer for flattening.""" shape = [x.value for x in shape[1:]] f = lambda x, y: 1 if y is None else x * y # noqa: E731 return reduce(f, shape, 1) def weight_name_to_layer_name(weight_name): """Convert the name of weights to the layer name.""" tokens = weight_name.split("_") type_name = tokens[-1] # Modern naming convention. if type_name == "weights" or type_name == "bias": if len(tokens) >= 3 and tokens[-3] == "input": return weight_name[:weight_name.rfind("input") - 1] return weight_name[:weight_name.rfind(type_name) - 1] # Legacy. if type_name == "im": return weight_name[:-4] if type_name == "pose": return weight_name[:-6] return weight_name[:-1] def imresize(image, size, interp="nearest"): """Wrapper over `skimage.transform.resize` to mimic `scipy.misc.imresize`. Copied from https://github.com/BerkeleyAutomation/autolab_core/blob/master/autolab_core/image.py#L32. # noqa: E501 Since `scipy.misc.imresize` has been removed in version 1.3.*, instead use `skimage.transform.resize`. The "lanczos" and "cubic" interpolation methods are not supported by `skimage.transform.resize`, however there is now "biquadratic", "biquartic", and "biquintic". Parameters ---------- image : :obj:`numpy.ndarray` The image to resize. size : int, float, or tuple * int - Percentage of current size. * float - Fraction of current size. * tuple - Size of the output image. interp : :obj:`str`, optional Interpolation to use for re-sizing ("neartest", "bilinear", "biquadratic", "bicubic", "biquartic", "biquintic"). Default is "nearest". Returns ------- :obj:`np.ndarray` The resized image. """ skt_interp_map = { "nearest": 0, "bilinear": 1, "biquadratic": 2, "bicubic": 3, "biquartic": 4, "biquintic": 5 } if interp in ("lanczos", "cubic"): raise ValueError("'lanczos' and 'cubic'" " interpolation are no longer supported.") assert interp in skt_interp_map, ("Interpolation '{}' not" " supported.".format(interp)) if isinstance(size, (tuple, list)): output_shape = size elif isinstance(size, (float)): np_shape = np.asarray(image.shape).astype(np.float32) np_shape[0:2] *= size output_shape = tuple(np_shape.astype(int)) elif isinstance(size, (int)): np_shape = np.asarray(image.shape).astype(np.float32) np_shape[0:2] *= size / 100.0 output_shape = tuple(np_shape.astype(int)) else: raise ValueError("Invalid type for size '{}'.".format(type(size))) return skt.resize(image, output_shape, order=skt_interp_map[interp], anti_aliasing=False, mode="constant") ================================================ FILE: gqcnn/version.py ================================================ __version__ = "1.3.0" ================================================ FILE: launch/grasp_planning_service.launch ================================================ ================================================ FILE: msg/Action.msg ================================================ # Copyright ©2017. The Regents of the University of California (Regents). # All Rights Reserved. Permission to use, copy, modify, and distribute this # software and its documentation for educational, research, and not-for-profit # purposes, without fee and without a signed licensing agreement, is hereby # granted, provided that the above copyright notice, this paragraph and the # following two paragraphs appear in all copies, modifications, and # distributions. Contact The Office of Technology Licensing, UC Berkeley, 2150 # Shattuck Avenue, Suite 510, Berkeley, CA 94720-1620, (510) 643-7201, # otl@berkeley.edu, # http://ipira.berkeley.edu/industry-info for commercial licensing opportunities. # IN NO EVENT SHALL REGENTS BE LIABLE TO ANY PARTY FOR DIRECT, INDIRECT, SPECIAL, # INCIDENTAL, OR CONSEQUENTIAL DAMAGES, INCLUDING LOST PROFITS, ARISING OUT OF # THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN IF REGENTS HAS BEEN # ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. # REGENTS SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, BUT NOT LIMITED TO, # THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR # PURPOSE. THE SOFTWARE AND ACCOMPANYING DOCUMENTATION, IF ANY, PROVIDED # HEREUNDER IS PROVIDED "AS IS". REGENTS HAS NO OBLIGATION TO PROVIDE # MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. uint32 width uint32 height uint8[] mask_data string action_type float32[] action_data float32 confidence ================================================ FILE: msg/BoundingBox.msg ================================================ # Copyright ©2017. The Regents of the University of California (Regents). # All Rights Reserved. Permission to use, copy, modify, and distribute this # software and its documentation for educational, research, and not-for-profit # purposes, without fee and without a signed licensing agreement, is hereby # granted, provided that the above copyright notice, this paragraph and the # following two paragraphs appear in all copies, modifications, and # distributions. Contact The Office of Technology Licensing, UC Berkeley, 2150 # Shattuck Avenue, Suite 510, Berkeley, CA 94720-1620, (510) 643-7201, # otl@berkeley.edu, # http://ipira.berkeley.edu/industry-info for commercial licensing opportunities. # IN NO EVENT SHALL REGENTS BE LIABLE TO ANY PARTY FOR DIRECT, INDIRECT, SPECIAL, # INCIDENTAL, OR CONSEQUENTIAL DAMAGES, INCLUDING LOST PROFITS, ARISING OUT OF # THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN IF REGENTS HAS BEEN # ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. # REGENTS SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, BUT NOT LIMITED TO, # THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR # PURPOSE. THE SOFTWARE AND ACCOMPANYING DOCUMENTATION, IF ANY, PROVIDED # HEREUNDER IS PROVIDED "AS IS". REGENTS HAS NO OBLIGATION TO PROVIDE # MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. float64 minX float64 minY float64 maxX float64 maxY ================================================ FILE: msg/GQCNNGrasp.msg ================================================ # Copyright ©2017. The Regents of the University of California (Regents). # All Rights Reserved. Permission to use, copy, modify, and distribute this # software and its documentation for educational, research, and not-for-profit # purposes, without fee and without a signed licensing agreement, is hereby # granted, provided that the above copyright notice, this paragraph and the # following two paragraphs appear in all copies, modifications, and # distributions. Contact The Office of Technology Licensing, UC Berkeley, 2150 # Shattuck Avenue, Suite 510, Berkeley, CA 94720-1620, (510) 643-7201, # otl@berkeley.edu, # http://ipira.berkeley.edu/industry-info for commercial licensing opportunities. # IN NO EVENT SHALL REGENTS BE LIABLE TO ANY PARTY FOR DIRECT, INDIRECT, SPECIAL, # INCIDENTAL, OR CONSEQUENTIAL DAMAGES, INCLUDING LOST PROFITS, ARISING OUT OF # THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN IF REGENTS HAS BEEN # ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. # REGENTS SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, BUT NOT LIMITED TO, # THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR # PURPOSE. THE SOFTWARE AND ACCOMPANYING DOCUMENTATION, IF ANY, PROVIDED # HEREUNDER IS PROVIDED "AS IS". REGENTS HAS NO OBLIGATION TO PROVIDE # MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. geometry_msgs/Pose pose float64 q_value uint8 PARALLEL_JAW=0 uint8 SUCTION=1 uint8 grasp_type float64[2] center_px float64 angle float64 depth sensor_msgs/Image thumbnail ================================================ FILE: msg/Observation.msg ================================================ # Copyright ©2017. The Regents of the University of California (Regents). # All Rights Reserved. Permission to use, copy, modify, and distribute this # software and its documentation for educational, research, and not-for-profit # purposes, without fee and without a signed licensing agreement, is hereby # granted, provided that the above copyright notice, this paragraph and the # following two paragraphs appear in all copies, modifications, and # distributions. Contact The Office of Technology Licensing, UC Berkeley, 2150 # Shattuck Avenue, Suite 510, Berkeley, CA 94720-1620, (510) 643-7201, # otl@berkeley.edu, # http://ipira.berkeley.edu/industry-info for commercial licensing opportunities. # IN NO EVENT SHALL REGENTS BE LIABLE TO ANY PARTY FOR DIRECT, INDIRECT, SPECIAL, # INCIDENTAL, OR CONSEQUENTIAL DAMAGES, INCLUDING LOST PROFITS, ARISING OUT OF # THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN IF REGENTS HAS BEEN # ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. # REGENTS SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, BUT NOT LIMITED TO, # THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR # PURPOSE. THE SOFTWARE AND ACCOMPANYING DOCUMENTATION, IF ANY, PROVIDED # HEREUNDER IS PROVIDED "AS IS". REGENTS HAS NO OBLIGATION TO PROVIDE # MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. uint32 width uint32 height float32[] image_data ================================================ FILE: package.xml ================================================ gqcnn 1.3.0 ROS package for deploying Grasp Quality Convolutional Neural Networks (GQ-CNNs). Vishal Satish Regents catkin rospy rospy message_generation message_runtime ================================================ FILE: post-checkout ================================================ #!/bin/sh # # An example hook script to prepare a packed repository for use over # dumb transports. # # To enable this hook, rename this file to "post-update". find . -name "*.pyc" -exec rm '{}' ';' ================================================ FILE: requirements/cpu_requirements.txt ================================================ autolab-core autolab-perception visualization numpy opencv-python scipy matplotlib tensorflow<=1.15.0 scikit-learn scikit-image gputil psutil ================================================ FILE: requirements/docs_requirements.txt ================================================ sphinx sphinxcontrib-napoleon sphinx_rtd_theme ================================================ FILE: requirements/gpu_requirements.txt ================================================ autolab-core autolab-perception visualization numpy opencv-python scipy matplotlib tensorflow-gpu<=1.15.0 scikit-learn scikit-image gputil psutil ================================================ FILE: ros_nodes/grasp_planner_node.py ================================================ #!/usr/bin/env python # -*- coding: utf-8 -*- """ Copyright ©2017. The Regents of the University of California (Regents). All Rights Reserved. Permission to use, copy, modify, and distribute this software and its documentation for educational, research, and not-for-profit purposes, without fee and without a signed licensing agreement, is hereby granted, provided that the above copyright notice, this paragraph and the following two paragraphs appear in all copies, modifications, and distributions. Contact The Office of Technology Licensing, UC Berkeley, 2150 Shattuck Avenue, Suite 510, Berkeley, CA 94720-1620, (510) 643-7201, otl@berkeley.edu, http://ipira.berkeley.edu/industry-info for commercial licensing opportunities. IN NO EVENT SHALL REGENTS BE LIABLE TO ANY PARTY FOR DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, INCLUDING LOST PROFITS, ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN IF REGENTS HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. REGENTS SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE AND ACCOMPANYING DOCUMENTATION, IF ANY, PROVIDED HEREUNDER IS PROVIDED "AS IS". REGENTS HAS NO OBLIGATION TO PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. ROS Server for planning GQ-CNN grasps. Author ----- Vishal Satish & Jeff Mahler """ import json import math import os import time from cv_bridge import CvBridge, CvBridgeError import numpy as np import rospy from autolab_core import (YamlConfig, CameraIntrinsics, ColorImage, DepthImage, BinaryImage, RgbdImage) from visualization import Visualizer2D as vis from gqcnn.grasping import (Grasp2D, SuctionPoint2D, RgbdImageState, RobustGraspingPolicy, CrossEntropyRobustGraspingPolicy, FullyConvolutionalGraspingPolicyParallelJaw, FullyConvolutionalGraspingPolicySuction) from gqcnn.utils import GripperMode, NoValidGraspsException from geometry_msgs.msg import PoseStamped from std_msgs.msg import Header from gqcnn.srv import (GQCNNGraspPlanner, GQCNNGraspPlannerBoundingBox, GQCNNGraspPlannerSegmask) from gqcnn.msg import GQCNNGrasp class GraspPlanner(object): def __init__(self, cfg, cv_bridge, grasping_policy, grasp_pose_publisher): """ Parameters ---------- cfg : dict Dictionary of configuration parameters. cv_bridge: :obj:`CvBridge` ROS `CvBridge`. grasping_policy: :obj:`GraspingPolicy` Grasping policy to use. grasp_pose_publisher: :obj:`Publisher` ROS publisher to publish pose of planned grasp for visualization. """ self.cfg = cfg self.cv_bridge = cv_bridge self.grasping_policy = grasping_policy self.grasp_pose_publisher = grasp_pose_publisher # Set minimum input dimensions. policy_type = "cem" if "type" in self.cfg["policy"]: policy_type = self.cfg["policy"]["type"] fully_conv_policy_types = {"fully_conv_suction", "fully_conv_pj"} if policy_type in fully_conv_policy_types: self.min_width = self.cfg["policy"]["gqcnn_recep_w"] self.min_height = self.cfg["policy"]["gqcnn_recep_h"] else: pad = max( math.ceil( np.sqrt(2) * (float(self.cfg["policy"]["metric"]["crop_width"]) / 2)), math.ceil( np.sqrt(2) * (float(self.cfg["policy"]["metric"]["crop_height"]) / 2))) self.min_width = 2 * pad + self.cfg["policy"]["metric"][ "crop_width"] self.min_height = 2 * pad + self.cfg["policy"]["metric"][ "crop_height"] def read_images(self, req): """Reads images from a ROS service request. Parameters --------- req: :obj:`ROS ServiceRequest` ROS ServiceRequest for grasp planner service. """ # Get the raw depth and color images as ROS `Image` objects. raw_color = req.color_image raw_depth = req.depth_image # Get the raw camera info as ROS `CameraInfo`. raw_camera_info = req.camera_info # Wrap the camera info in a BerkeleyAutomation/autolab_core # `CameraIntrinsics` object. camera_intr = CameraIntrinsics( raw_camera_info.header.frame_id, raw_camera_info.K[0], raw_camera_info.K[4], raw_camera_info.K[2], raw_camera_info.K[5], raw_camera_info.K[1], raw_camera_info.height, raw_camera_info.width) # Create wrapped BerkeleyAutomation/autolab_core RGB and depth images # by unpacking the ROS images using ROS `CvBridge` try: color_im = ColorImage(self.cv_bridge.imgmsg_to_cv2( raw_color, "rgb8"), frame=camera_intr.frame) depth_im = DepthImage(self.cv_bridge.imgmsg_to_cv2( raw_depth, desired_encoding="passthrough"), frame=camera_intr.frame) except CvBridgeError as cv_bridge_exception: rospy.logerr(cv_bridge_exception) # Check image sizes. if color_im.height != depth_im.height or \ color_im.width != depth_im.width: msg = ("Color image and depth image must be the same shape! Color" " is %d x %d but depth is %d x %d") % ( color_im.height, color_im.width, depth_im.height, depth_im.width) rospy.logerr(msg) raise rospy.ServiceException(msg) if (color_im.height < self.min_height or color_im.width < self.min_width): msg = ("Color image is too small! Must be at least %d x %d" " resolution but the requested image is only %d x %d") % ( self.min_height, self.min_width, color_im.height, color_im.width) rospy.logerr(msg) raise rospy.ServiceException(msg) return color_im, depth_im, camera_intr def plan_grasp(self, req): """Grasp planner request handler. Parameters --------- req: :obj:`ROS ServiceRequest` ROS `ServiceRequest` for grasp planner service. """ color_im, depth_im, camera_intr = self.read_images(req) return self._plan_grasp(color_im, depth_im, camera_intr) def plan_grasp_bb(self, req): """Grasp planner request handler. Parameters --------- req: :obj:`ROS ServiceRequest` `ROS ServiceRequest` for grasp planner service. """ color_im, depth_im, camera_intr = self.read_images(req) return self._plan_grasp(color_im, depth_im, camera_intr, bounding_box=req.bounding_box) def plan_grasp_segmask(self, req): """Grasp planner request handler. Parameters --------- req: :obj:`ROS ServiceRequest` ROS `ServiceRequest` for grasp planner service. """ color_im, depth_im, camera_intr = self.read_images(req) raw_segmask = req.segmask try: segmask = BinaryImage(self.cv_bridge.imgmsg_to_cv2( raw_segmask, desired_encoding="passthrough"), frame=camera_intr.frame) except CvBridgeError as cv_bridge_exception: rospy.logerr(cv_bridge_exception) if color_im.height != segmask.height or \ color_im.width != segmask.width: msg = ("Images and segmask must be the same shape! Color image is" " %d x %d but segmask is %d x %d") % ( color_im.height, color_im.width, segmask.height, segmask.width) rospy.logerr(msg) raise rospy.ServiceException(msg) return self._plan_grasp(color_im, depth_im, camera_intr, segmask=segmask) def _plan_grasp(self, color_im, depth_im, camera_intr, bounding_box=None, segmask=None): """Grasp planner request handler. Parameters --------- req: :obj:`ROS ServiceRequest` ROS `ServiceRequest` for grasp planner service. """ rospy.loginfo("Planning Grasp") # Inpaint images. color_im = color_im.inpaint( rescale_factor=self.cfg["inpaint_rescale_factor"]) depth_im = depth_im.inpaint( rescale_factor=self.cfg["inpaint_rescale_factor"]) # Init segmask. if segmask is None: segmask = BinaryImage(255 * np.ones(depth_im.shape).astype(np.uint8), frame=color_im.frame) # Visualize. if self.cfg["vis"]["color_image"]: vis.imshow(color_im) vis.show() if self.cfg["vis"]["depth_image"]: vis.imshow(depth_im) vis.show() if self.cfg["vis"]["segmask"] and segmask is not None: vis.imshow(segmask) vis.show() # Aggregate color and depth images into a single # BerkeleyAutomation/autolab_core `RgbdImage`. rgbd_im = RgbdImage.from_color_and_depth(color_im, depth_im) # Mask bounding box. if bounding_box is not None: # Calc bb parameters. min_x = bounding_box.minX min_y = bounding_box.minY max_x = bounding_box.maxX max_y = bounding_box.maxY # Contain box to image->don't let it exceed image height/width # bounds. if min_x < 0: min_x = 0 if min_y < 0: min_y = 0 if max_x > rgbd_im.width: max_x = rgbd_im.width if max_y > rgbd_im.height: max_y = rgbd_im.height # Mask. bb_segmask_arr = np.zeros([rgbd_im.height, rgbd_im.width]) bb_segmask_arr[min_y:max_y, min_x:max_x] = 255 bb_segmask = BinaryImage(bb_segmask_arr.astype(np.uint8), segmask.frame) segmask = segmask.mask_binary(bb_segmask) # Visualize. if self.cfg["vis"]["rgbd_state"]: masked_rgbd_im = rgbd_im.mask_binary(segmask) vis.figure() vis.subplot(1, 2, 1) vis.imshow(masked_rgbd_im.color) vis.subplot(1, 2, 2) vis.imshow(masked_rgbd_im.depth) vis.show() # Create an `RgbdImageState` with the cropped `RgbdImage` and # `CameraIntrinsics`. rgbd_state = RgbdImageState(rgbd_im, camera_intr, segmask=segmask) # Execute policy. try: return self.execute_policy(rgbd_state, self.grasping_policy, self.grasp_pose_publisher, camera_intr.frame) except NoValidGraspsException: rospy.logerr( ("While executing policy found no valid grasps from sampled" " antipodal point pairs. Aborting Policy!")) raise rospy.ServiceException( ("While executing policy found no valid grasps from sampled" " antipodal point pairs. Aborting Policy!")) def execute_policy(self, rgbd_image_state, grasping_policy, grasp_pose_publisher, pose_frame): """Executes a grasping policy on an `RgbdImageState`. Parameters ---------- rgbd_image_state: :obj:`RgbdImageState` `RgbdImageState` to encapsulate depth and color image along with camera intrinsics. grasping_policy: :obj:`GraspingPolicy` Grasping policy to use. grasp_pose_publisher: :obj:`Publisher` ROS publisher to publish pose of planned grasp for visualization. pose_frame: :obj:`str` Frame of reference to publish pose in. """ # Execute the policy"s action. grasp_planning_start_time = time.time() grasp = grasping_policy(rgbd_image_state) # Create `GQCNNGrasp` return msg and populate it. gqcnn_grasp = GQCNNGrasp() gqcnn_grasp.q_value = grasp.q_value gqcnn_grasp.pose = grasp.grasp.pose().pose_msg if isinstance(grasp.grasp, Grasp2D): gqcnn_grasp.grasp_type = GQCNNGrasp.PARALLEL_JAW elif isinstance(grasp.grasp, SuctionPoint2D): gqcnn_grasp.grasp_type = GQCNNGrasp.SUCTION else: rospy.logerr("Grasp type not supported!") raise rospy.ServiceException("Grasp type not supported!") # Store grasp representation in image space. gqcnn_grasp.center_px[0] = grasp.grasp.center[0] gqcnn_grasp.center_px[1] = grasp.grasp.center[1] gqcnn_grasp.angle = grasp.grasp.angle gqcnn_grasp.depth = grasp.grasp.depth gqcnn_grasp.thumbnail = grasp.image.rosmsg # Create and publish the pose alone for easy visualization of grasp # pose in Rviz. pose_stamped = PoseStamped() pose_stamped.pose = grasp.grasp.pose().pose_msg header = Header() header.stamp = rospy.Time.now() header.frame_id = pose_frame pose_stamped.header = header grasp_pose_publisher.publish(pose_stamped) # Return `GQCNNGrasp` msg. rospy.loginfo("Total grasp planning time: " + str(time.time() - grasp_planning_start_time) + " secs.") return gqcnn_grasp if __name__ == "__main__": # Initialize the ROS node. rospy.init_node("Grasp_Sampler_Server") # Initialize `CvBridge`. cv_bridge = CvBridge() # Get configs. model_name = rospy.get_param("~model_name") model_dir = rospy.get_param("~model_dir") fully_conv = rospy.get_param("~fully_conv") if model_dir.lower() == "default": model_dir = os.path.join(os.path.dirname(os.path.realpath(__file__)), "../models") model_dir = os.path.join(model_dir, model_name) model_config = json.load(open(os.path.join(model_dir, "config.json"), "r")) try: gqcnn_config = model_config["gqcnn"] gripper_mode = gqcnn_config["gripper_mode"] except KeyError: gqcnn_config = model_config["gqcnn_config"] input_data_mode = gqcnn_config["input_data_mode"] if input_data_mode == "tf_image": gripper_mode = GripperMode.LEGACY_PARALLEL_JAW elif input_data_mode == "tf_image_suction": gripper_mode = GripperMode.LEGACY_SUCTION elif input_data_mode == "suction": gripper_mode = GripperMode.SUCTION elif input_data_mode == "multi_suction": gripper_mode = GripperMode.MULTI_SUCTION elif input_data_mode == "parallel_jaw": gripper_mode = GripperMode.PARALLEL_JAW else: raise ValueError( "Input data mode {} not supported!".format(input_data_mode)) # Set config. if fully_conv: config_filename = os.path.join( os.path.dirname(os.path.realpath(__file__)), "..", "cfg/examples/ros/fc_gqcnn_suction.yaml") else: config_filename = os.path.join( os.path.dirname(os.path.realpath(__file__)), "..", "cfg/examples/ros/gqcnn_suction.yaml") if (gripper_mode == GripperMode.LEGACY_PARALLEL_JAW or gripper_mode == GripperMode.PARALLEL_JAW): if fully_conv: config_filename = os.path.join( os.path.dirname(os.path.realpath(__file__)), "..", "cfg/examples/ros/fc_gqcnn_pj.yaml") else: config_filename = os.path.join( os.path.dirname(os.path.realpath(__file__)), "..", "cfg/examples/ros/gqcnn_pj.yaml") # Read config. cfg = YamlConfig(config_filename) policy_cfg = cfg["policy"] policy_cfg["metric"]["gqcnn_model"] = model_dir # Create publisher to publish pose of final grasp. grasp_pose_publisher = rospy.Publisher("/gqcnn_grasp/pose", PoseStamped, queue_size=10) # Create a grasping policy. rospy.loginfo("Creating Grasping Policy") if fully_conv: # TODO(vsatish): We should really be doing this in some factory policy. if policy_cfg["type"] == "fully_conv_suction": grasping_policy = \ FullyConvolutionalGraspingPolicySuction(policy_cfg) elif policy_cfg["type"] == "fully_conv_pj": grasping_policy = \ FullyConvolutionalGraspingPolicyParallelJaw(policy_cfg) else: raise ValueError( "Invalid fully-convolutional policy type: {}".format( policy_cfg["type"])) else: policy_type = "cem" if "type" in policy_cfg: policy_type = policy_cfg["type"] if policy_type == "ranking": grasping_policy = RobustGraspingPolicy(policy_cfg) elif policy_type == "cem": grasping_policy = CrossEntropyRobustGraspingPolicy(policy_cfg) else: raise ValueError("Invalid policy type: {}".format(policy_type)) # Create a grasp planner. grasp_planner = GraspPlanner(cfg, cv_bridge, grasping_policy, grasp_pose_publisher) # Initialize the ROS service. grasp_planning_service = rospy.Service("grasp_planner", GQCNNGraspPlanner, grasp_planner.plan_grasp) grasp_planning_service_bb = rospy.Service("grasp_planner_bounding_box", GQCNNGraspPlannerBoundingBox, grasp_planner.plan_grasp_bb) grasp_planning_service_segmask = rospy.Service( "grasp_planner_segmask", GQCNNGraspPlannerSegmask, grasp_planner.plan_grasp_segmask) rospy.loginfo("Grasping Policy Initialized") # Spin forever. rospy.spin() ================================================ FILE: scripts/docker/build-docker.sh ================================================ #!/bin/bash # Copyright ©2017. The Regents of the University of California (Regents). # All Rights Reserved. Permission to use, copy, modify, and distribute this # software and its documentation for educational, research, and not-for-profit # purposes, without fee and without a signed licensing agreement, is hereby # granted, provided that the above copyright notice, this paragraph and the # following two paragraphs appear in all copies, modifications, and # distributions. Contact The Office of Technology Licensing, UC Berkeley, 2150 # Shattuck Avenue, Suite 510, Berkeley, CA 94720-1620, (510) 643-7201, # otl@berkeley.edu, # http://ipira.berkeley.edu/industry-info for commercial licensing opportunities. # IN NO EVENT SHALL REGENTS BE LIABLE TO ANY PARTY FOR DIRECT, INDIRECT, SPECIAL, # INCIDENTAL, OR CONSEQUENTIAL DAMAGES, INCLUDING LOST PROFITS, ARISING OUT OF # THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN IF REGENTS HAS BEEN # ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. # REGENTS SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, BUT NOT LIMITED TO, # THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR # PURPOSE. THE SOFTWARE AND ACCOMPANYING DOCUMENTATION, IF ANY, PROVIDED # HEREUNDER IS PROVIDED "AS IS". REGENTS HAS NO OBLIGATION TO PROVIDE # MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. # Build the CPU and GPU docker images. git archive --format=tar -o docker/gqcnn.tar --prefix=gqcnn/ master docker build --no-cache -t gqcnn/gpu -f docker/gpu/Dockerfile . docker build --no-cache -t gqcnn/cpu -f docker/cpu/Dockerfile . rm docker/gqcnn.tar ================================================ FILE: scripts/policies/run_all_dex-net_2.0_examples.sh ================================================ #!/bin/bash # Copyright ©2017. The Regents of the University of California (Regents). # All Rights Reserved. Permission to use, copy, modify, and distribute this # software and its documentation for educational, research, and not-for-profit # purposes, without fee and without a signed licensing agreement, is hereby # granted, provided that the above copyright notice, this paragraph and the # following two paragraphs appear in all copies, modifications, and # distributions. Contact The Office of Technology Licensing, UC Berkeley, 2150 # Shattuck Avenue, Suite 510, Berkeley, CA 94720-1620, (510) 643-7201, # otl@berkeley.edu, # http://ipira.berkeley.edu/industry-info for commercial licensing opportunities. # IN NO EVENT SHALL REGENTS BE LIABLE TO ANY PARTY FOR DIRECT, INDIRECT, SPECIAL, # INCIDENTAL, OR CONSEQUENTIAL DAMAGES, INCLUDING LOST PROFITS, ARISING OUT OF # THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN IF REGENTS HAS BEEN # ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. # REGENTS SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, BUT NOT LIMITED TO, # THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR # PURPOSE. THE SOFTWARE AND ACCOMPANYING DOCUMENTATION, IF ANY, PROVIDED # HEREUNDER IS PROVIDED "AS IS". REGENTS HAS NO OBLIGATION TO PROVIDE # MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. set -e echo "RUNNING EXAMPLE 1" python examples/policy.py GQCNN-2.0 --depth_image data/examples/single_object/primesense/depth_0.npy --segmask data/examples/single_object/primesense/segmask_0.png --config_filename cfg/examples/replication/dex-net_2.0.yaml echo "RUNNING EXAMPLE 2" python examples/policy.py GQCNN-2.0 --depth_image data/examples/single_object/primesense/depth_1.npy --segmask data/examples/single_object/primesense/segmask_1.png --config_filename cfg/examples/replication/dex-net_2.0.yaml echo "RUNNING EXAMPLE 3" python examples/policy.py GQCNN-2.0 --depth_image data/examples/single_object/primesense/depth_2.npy --segmask data/examples/single_object/primesense/segmask_2.png --config_filename cfg/examples/replication/dex-net_2.0.yaml echo "RUNNING EXAMPLE 4" python examples/policy.py GQCNN-2.0 --depth_image data/examples/single_object/primesense/depth_3.npy --segmask data/examples/single_object/primesense/segmask_3.png --config_filename cfg/examples/replication/dex-net_2.0.yaml echo "RUNNING EXAMPLE 5" python examples/policy.py GQCNN-2.0 --depth_image data/examples/single_object/primesense/depth_4.npy --segmask data/examples/single_object/primesense/segmask_4.png --config_filename cfg/examples/replication/dex-net_2.0.yaml echo "RUNNING EXAMPLE 6" python examples/policy.py GQCNN-2.0 --depth_image data/examples/single_object/primesense/depth_5.npy --segmask data/examples/single_object/primesense/segmask_5.png --config_filename cfg/examples/replication/dex-net_2.0.yaml echo "RUNNING EXAMPLE 7" python examples/policy.py GQCNN-2.0 --depth_image data/examples/single_object/primesense/depth_6.npy --segmask data/examples/single_object/primesense/segmask_6.png --config_filename cfg/examples/replication/dex-net_2.0.yaml echo "RUNNING EXAMPLE 8" python examples/policy.py GQCNN-2.0 --depth_image data/examples/single_object/primesense/depth_7.npy --segmask data/examples/single_object/primesense/segmask_7.png --config_filename cfg/examples/replication/dex-net_2.0.yaml echo "RUNNING EXAMPLE 9" python examples/policy.py GQCNN-2.0 --depth_image data/examples/single_object/primesense/depth_8.npy --segmask data/examples/single_object/primesense/segmask_8.png --config_filename cfg/examples/replication/dex-net_2.0.yaml echo "RUNNING EXAMPLE 10" python examples/policy.py GQCNN-2.0 --depth_image data/examples/single_object/primesense/depth_9.npy --segmask data/examples/single_object/primesense/segmask_9.png --config_filename cfg/examples/replication/dex-net_2.0.yaml ================================================ FILE: scripts/policies/run_all_dex-net_2.1_examples.sh ================================================ #!/bin/bash # Copyright ©2017. The Regents of the University of California (Regents). # All Rights Reserved. Permission to use, copy, modify, and distribute this # software and its documentation for educational, research, and not-for-profit # purposes, without fee and without a signed licensing agreement, is hereby # granted, provided that the above copyright notice, this paragraph and the # following two paragraphs appear in all copies, modifications, and # distributions. Contact The Office of Technology Licensing, UC Berkeley, 2150 # Shattuck Avenue, Suite 510, Berkeley, CA 94720-1620, (510) 643-7201, # otl@berkeley.edu, # http://ipira.berkeley.edu/industry-info for commercial licensing opportunities. # IN NO EVENT SHALL REGENTS BE LIABLE TO ANY PARTY FOR DIRECT, INDIRECT, SPECIAL, # INCIDENTAL, OR CONSEQUENTIAL DAMAGES, INCLUDING LOST PROFITS, ARISING OUT OF # THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN IF REGENTS HAS BEEN # ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. # REGENTS SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, BUT NOT LIMITED TO, # THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR # PURPOSE. THE SOFTWARE AND ACCOMPANYING DOCUMENTATION, IF ANY, PROVIDED # HEREUNDER IS PROVIDED "AS IS". REGENTS HAS NO OBLIGATION TO PROVIDE # MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. set -e echo "RUNNING EXAMPLE 1" python examples/policy.py GQCNN-2.1 --depth_image data/examples/clutter/primesense/depth_0.npy --segmask data/examples/clutter/primesense/segmask_0.png --config_filename cfg/examples/replication/dex-net_2.1.yaml echo "RUNNING EXAMPLE 2" python examples/policy.py GQCNN-2.1 --depth_image data/examples/clutter/primesense/depth_1.npy --segmask data/examples/clutter/primesense/segmask_1.png --config_filename cfg/examples/replication/dex-net_2.1.yaml echo "RUNNING EXAMPLE 3" python examples/policy.py GQCNN-2.1 --depth_image data/examples/clutter/primesense/depth_2.npy --segmask data/examples/clutter/primesense/segmask_2.png --config_filename cfg/examples/replication/dex-net_2.1.yaml echo "RUNNING EXAMPLE 4" python examples/policy.py GQCNN-2.1 --depth_image data/examples/clutter/primesense/depth_3.npy --segmask data/examples/clutter/primesense/segmask_3.png --config_filename cfg/examples/replication/dex-net_2.1.yaml echo "RUNNING EXAMPLE 5" python examples/policy.py GQCNN-2.1 --depth_image data/examples/clutter/primesense/depth_4.npy --segmask data/examples/clutter/primesense/segmask_4.png --config_filename cfg/examples/replication/dex-net_2.1.yaml ================================================ FILE: scripts/policies/run_all_dex-net_3.0_examples.sh ================================================ #!/bin/bash # Copyright ©2017. The Regents of the University of California (Regents). # All Rights Reserved. Permission to use, copy, modify, and distribute this # software and its documentation for educational, research, and not-for-profit # purposes, without fee and without a signed licensing agreement, is hereby # granted, provided that the above copyright notice, this paragraph and the # following two paragraphs appear in all copies, modifications, and # distributions. Contact The Office of Technology Licensing, UC Berkeley, 2150 # Shattuck Avenue, Suite 510, Berkeley, CA 94720-1620, (510) 643-7201, # otl@berkeley.edu, # http://ipira.berkeley.edu/industry-info for commercial licensing opportunities. # IN NO EVENT SHALL REGENTS BE LIABLE TO ANY PARTY FOR DIRECT, INDIRECT, SPECIAL, # INCIDENTAL, OR CONSEQUENTIAL DAMAGES, INCLUDING LOST PROFITS, ARISING OUT OF # THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN IF REGENTS HAS BEEN # ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. # REGENTS SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, BUT NOT LIMITED TO, # THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR # PURPOSE. THE SOFTWARE AND ACCOMPANYING DOCUMENTATION, IF ANY, PROVIDED # HEREUNDER IS PROVIDED "AS IS". REGENTS HAS NO OBLIGATION TO PROVIDE # MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. set -e echo "RUNNING EXAMPLE 1" python examples/policy.py GQCNN-3.0 --depth_image data/examples/single_object/primesense/depth_0.npy --segmask data/examples/single_object/primesense/segmask_0.png --config_filename cfg/examples/replication/dex-net_3.0.yaml echo "RUNNING EXAMPLE 2" python examples/policy.py GQCNN-3.0 --depth_image data/examples/single_object/primesense/depth_1.npy --segmask data/examples/single_object/primesense/segmask_1.png --config_filename cfg/examples/replication/dex-net_3.0.yaml echo "RUNNING EXAMPLE 3" python examples/policy.py GQCNN-3.0 --depth_image data/examples/single_object/primesense/depth_2.npy --segmask data/examples/single_object/primesense/segmask_2.png --config_filename cfg/examples/replication/dex-net_3.0.yaml echo "RUNNING EXAMPLE 4" python examples/policy.py GQCNN-3.0 --depth_image data/examples/single_object/primesense/depth_3.npy --segmask data/examples/single_object/primesense/segmask_3.png --config_filename cfg/examples/replication/dex-net_3.0.yaml echo "RUNNING EXAMPLE 5" python examples/policy.py GQCNN-3.0 --depth_image data/examples/single_object/primesense/depth_4.npy --segmask data/examples/single_object/primesense/segmask_4.png --config_filename cfg/examples/replication/dex-net_3.0.yaml echo "RUNNING EXAMPLE 6" python examples/policy.py GQCNN-3.0 --depth_image data/examples/single_object/primesense/depth_5.npy --segmask data/examples/single_object/primesense/segmask_5.png --config_filename cfg/examples/replication/dex-net_3.0.yaml echo "RUNNING EXAMPLE 7" python examples/policy.py GQCNN-3.0 --depth_image data/examples/single_object/primesense/depth_6.npy --segmask data/examples/single_object/primesense/segmask_6.png --config_filename cfg/examples/replication/dex-net_3.0.yaml echo "RUNNING EXAMPLE 8" python examples/policy.py GQCNN-3.0 --depth_image data/examples/single_object/primesense/depth_7.npy --segmask data/examples/single_object/primesense/segmask_7.png --config_filename cfg/examples/replication/dex-net_3.0.yaml echo "RUNNING EXAMPLE 9" python examples/policy.py GQCNN-3.0 --depth_image data/examples/single_object/primesense/depth_8.npy --segmask data/examples/single_object/primesense/segmask_8.png --config_filename cfg/examples/replication/dex-net_3.0.yaml echo "RUNNING EXAMPLE 10" python examples/policy.py GQCNN-3.0 --depth_image data/examples/single_object/primesense/depth_9.npy --segmask data/examples/single_object/primesense/segmask_9.png --config_filename cfg/examples/replication/dex-net_3.0.yaml ================================================ FILE: scripts/policies/run_all_dex-net_4.0_fc_pj_examples.sh ================================================ #!/bin/bash # Copyright ©2017. The Regents of the University of California (Regents). # All Rights Reserved. Permission to use, copy, modify, and distribute this # software and its documentation for educational, research, and not-for-profit # purposes, without fee and without a signed licensing agreement, is hereby # granted, provided that the above copyright notice, this paragraph and the # following two paragraphs appear in all copies, modifications, and # distributions. Contact The Office of Technology Licensing, UC Berkeley, 2150 # Shattuck Avenue, Suite 510, Berkeley, CA 94720-1620, (510) 643-7201, # otl@berkeley.edu, # http://ipira.berkeley.edu/industry-info for commercial licensing opportunities. # IN NO EVENT SHALL REGENTS BE LIABLE TO ANY PARTY FOR DIRECT, INDIRECT, SPECIAL, # INCIDENTAL, OR CONSEQUENTIAL DAMAGES, INCLUDING LOST PROFITS, ARISING OUT OF # THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN IF REGENTS HAS BEEN # ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. # REGENTS SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, BUT NOT LIMITED TO, # THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR # PURPOSE. THE SOFTWARE AND ACCOMPANYING DOCUMENTATION, IF ANY, PROVIDED # HEREUNDER IS PROVIDED "AS IS". REGENTS HAS NO OBLIGATION TO PROVIDE # MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. set -e echo "RUNNING EXAMPLE 1" python examples/policy.py FC-GQCNN-4.0-PJ --fully_conv --depth_image data/examples/clutter/phoxi/fcgqcnn/depth_0.npy --segmask data/examples/clutter/phoxi/fcgqcnn/segmask_0.png --config_filename cfg/examples/replication/dex-net_4.0_fc_pj.yaml --camera_intr data/calib/phoxi/phoxi.intr echo "RUNNING EXAMPLE 2" python examples/policy.py FC-GQCNN-4.0-PJ --fully_conv --depth_image data/examples/clutter/phoxi/fcgqcnn/depth_1.npy --segmask data/examples/clutter/phoxi/fcgqcnn/segmask_1.png --config_filename cfg/examples/replication/dex-net_4.0_fc_pj.yaml --camera_intr data/calib/phoxi/phoxi.intr echo "RUNNING EXAMPLE 3" python examples/policy.py FC-GQCNN-4.0-PJ --fully_conv --depth_image data/examples/clutter/phoxi/fcgqcnn/depth_2.npy --segmask data/examples/clutter/phoxi/fcgqcnn/segmask_2.png --config_filename cfg/examples/replication/dex-net_4.0_fc_pj.yaml --camera_intr data/calib/phoxi/phoxi.intr echo "RUNNING EXAMPLE 4" python examples/policy.py FC-GQCNN-4.0-PJ --fully_conv --depth_image data/examples/clutter/phoxi/fcgqcnn/depth_3.npy --segmask data/examples/clutter/phoxi/fcgqcnn/segmask_3.png --config_filename cfg/examples/replication/dex-net_4.0_fc_pj.yaml --camera_intr data/calib/phoxi/phoxi.intr echo "RUNNING EXAMPLE 5" python examples/policy.py FC-GQCNN-4.0-PJ --fully_conv --depth_image data/examples/clutter/phoxi/fcgqcnn/depth_4.npy --segmask data/examples/clutter/phoxi/fcgqcnn/segmask_4.png --config_filename cfg/examples/replication/dex-net_4.0_fc_pj.yaml --camera_intr data/calib/phoxi/phoxi.intr ================================================ FILE: scripts/policies/run_all_dex-net_4.0_fc_suction_examples.sh ================================================ #!/bin/bash # Copyright ©2017. The Regents of the University of California (Regents). # All Rights Reserved. Permission to use, copy, modify, and distribute this # software and its documentation for educational, research, and not-for-profit # purposes, without fee and without a signed licensing agreement, is hereby # granted, provided that the above copyright notice, this paragraph and the # following two paragraphs appear in all copies, modifications, and # distributions. Contact The Office of Technology Licensing, UC Berkeley, 2150 # Shattuck Avenue, Suite 510, Berkeley, CA 94720-1620, (510) 643-7201, # otl@berkeley.edu, # http://ipira.berkeley.edu/industry-info for commercial licensing opportunities. # IN NO EVENT SHALL REGENTS BE LIABLE TO ANY PARTY FOR DIRECT, INDIRECT, SPECIAL, # INCIDENTAL, OR CONSEQUENTIAL DAMAGES, INCLUDING LOST PROFITS, ARISING OUT OF # THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN IF REGENTS HAS BEEN # ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. # REGENTS SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, BUT NOT LIMITED TO, # THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR # PURPOSE. THE SOFTWARE AND ACCOMPANYING DOCUMENTATION, IF ANY, PROVIDED # HEREUNDER IS PROVIDED "AS IS". REGENTS HAS NO OBLIGATION TO PROVIDE # MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. set -e echo "RUNNING EXAMPLE 1" python examples/policy.py FC-GQCNN-4.0-SUCTION --fully_conv --depth_image data/examples/clutter/phoxi/fcgqcnn/depth_0.npy --segmask data/examples/clutter/phoxi/fcgqcnn/segmask_0.png --config_filename cfg/examples/replication/dex-net_4.0_fc_suction.yaml --camera_intr data/calib/phoxi/phoxi.intr echo "RUNNING EXAMPLE 2" python examples/policy.py FC-GQCNN-4.0-SUCTION --fully_conv --depth_image data/examples/clutter/phoxi/fcgqcnn/depth_1.npy --segmask data/examples/clutter/phoxi/fcgqcnn/segmask_1.png --config_filename cfg/examples/replication/dex-net_4.0_fc_suction.yaml --camera_intr data/calib/phoxi/phoxi.intr echo "RUNNING EXAMPLE 3" python examples/policy.py FC-GQCNN-4.0-SUCTION --fully_conv --depth_image data/examples/clutter/phoxi/fcgqcnn/depth_2.npy --segmask data/examples/clutter/phoxi/fcgqcnn/segmask_2.png --config_filename cfg/examples/replication/dex-net_4.0_fc_suction.yaml --camera_intr data/calib/phoxi/phoxi.intr echo "RUNNING EXAMPLE 4" python examples/policy.py FC-GQCNN-4.0-SUCTION --fully_conv --depth_image data/examples/clutter/phoxi/fcgqcnn/depth_3.npy --segmask data/examples/clutter/phoxi/fcgqcnn/segmask_3.png --config_filename cfg/examples/replication/dex-net_4.0_fc_suction.yaml --camera_intr data/calib/phoxi/phoxi.intr echo "RUNNING EXAMPLE 5" python examples/policy.py FC-GQCNN-4.0-SUCTION --fully_conv --depth_image data/examples/clutter/phoxi/fcgqcnn/depth_4.npy --segmask data/examples/clutter/phoxi/fcgqcnn/segmask_4.png --config_filename cfg/examples/replication/dex-net_4.0_fc_suction.yaml --camera_intr data/calib/phoxi/phoxi.intr ================================================ FILE: scripts/policies/run_all_dex-net_4.0_pj_examples.sh ================================================ #!/bin/bash # Copyright ©2017. The Regents of the University of California (Regents). # All Rights Reserved. Permission to use, copy, modify, and distribute this # software and its documentation for educational, research, and not-for-profit # purposes, without fee and without a signed licensing agreement, is hereby # granted, provided that the above copyright notice, this paragraph and the # following two paragraphs appear in all copies, modifications, and # distributions. Contact The Office of Technology Licensing, UC Berkeley, 2150 # Shattuck Avenue, Suite 510, Berkeley, CA 94720-1620, (510) 643-7201, # otl@berkeley.edu, # http://ipira.berkeley.edu/industry-info for commercial licensing opportunities. # IN NO EVENT SHALL REGENTS BE LIABLE TO ANY PARTY FOR DIRECT, INDIRECT, SPECIAL, # INCIDENTAL, OR CONSEQUENTIAL DAMAGES, INCLUDING LOST PROFITS, ARISING OUT OF # THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN IF REGENTS HAS BEEN # ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. # REGENTS SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, BUT NOT LIMITED TO, # THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR # PURPOSE. THE SOFTWARE AND ACCOMPANYING DOCUMENTATION, IF ANY, PROVIDED # HEREUNDER IS PROVIDED "AS IS". REGENTS HAS NO OBLIGATION TO PROVIDE # MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. set -e echo "RUNNING EXAMPLE 1" python examples/policy.py GQCNN-4.0-PJ --depth_image data/examples/clutter/phoxi/dex-net_4.0/depth_0.npy --segmask data/examples/clutter/phoxi/dex-net_4.0/segmask_0.png --config_filename cfg/examples/replication/dex-net_4.0_pj.yaml --camera_intr data/calib/phoxi/phoxi.intr echo "RUNNING EXAMPLE 2" python examples/policy.py GQCNN-4.0-PJ --depth_image data/examples/clutter/phoxi/dex-net_4.0/depth_1.npy --segmask data/examples/clutter/phoxi/dex-net_4.0/segmask_1.png --config_filename cfg/examples/replication/dex-net_4.0_pj.yaml --camera_intr data/calib/phoxi/phoxi.intr echo "RUNNING EXAMPLE 3" python examples/policy.py GQCNN-4.0-PJ --depth_image data/examples/clutter/phoxi/dex-net_4.0/depth_2.npy --segmask data/examples/clutter/phoxi/dex-net_4.0/segmask_2.png --config_filename cfg/examples/replication/dex-net_4.0_pj.yaml --camera_intr data/calib/phoxi/phoxi.intr echo "RUNNING EXAMPLE 4" python examples/policy.py GQCNN-4.0-PJ --depth_image data/examples/clutter/phoxi/dex-net_4.0/depth_3.npy --segmask data/examples/clutter/phoxi/dex-net_4.0/segmask_3.png --config_filename cfg/examples/replication/dex-net_4.0_pj.yaml --camera_intr data/calib/phoxi/phoxi.intr echo "RUNNING EXAMPLE 5" python examples/policy.py GQCNN-4.0-PJ --depth_image data/examples/clutter/phoxi/dex-net_4.0/depth_4.npy --segmask data/examples/clutter/phoxi/dex-net_4.0/segmask_4.png --config_filename cfg/examples/replication/dex-net_4.0_pj.yaml --camera_intr data/calib/phoxi/phoxi.intr ================================================ FILE: scripts/policies/run_all_dex-net_4.0_suction_examples.sh ================================================ #!/bin/bash # Copyright ©2017. The Regents of the University of California (Regents). # All Rights Reserved. Permission to use, copy, modify, and distribute this # software and its documentation for educational, research, and not-for-profit # purposes, without fee and without a signed licensing agreement, is hereby # granted, provided that the above copyright notice, this paragraph and the # following two paragraphs appear in all copies, modifications, and # distributions. Contact The Office of Technology Licensing, UC Berkeley, 2150 # Shattuck Avenue, Suite 510, Berkeley, CA 94720-1620, (510) 643-7201, # otl@berkeley.edu, # http://ipira.berkeley.edu/industry-info for commercial licensing opportunities. # IN NO EVENT SHALL REGENTS BE LIABLE TO ANY PARTY FOR DIRECT, INDIRECT, SPECIAL, # INCIDENTAL, OR CONSEQUENTIAL DAMAGES, INCLUDING LOST PROFITS, ARISING OUT OF # THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN IF REGENTS HAS BEEN # ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. # REGENTS SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, BUT NOT LIMITED TO, # THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR # PURPOSE. THE SOFTWARE AND ACCOMPANYING DOCUMENTATION, IF ANY, PROVIDED # HEREUNDER IS PROVIDED "AS IS". REGENTS HAS NO OBLIGATION TO PROVIDE # MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. set -e echo "RUNNING EXAMPLE 1" python examples/policy.py GQCNN-4.0-SUCTION --depth_image data/examples/clutter/phoxi/dex-net_4.0/depth_0.npy --segmask data/examples/clutter/phoxi/dex-net_4.0/segmask_0.png --config_filename cfg/examples/replication/dex-net_4.0_suction.yaml --camera_intr data/calib/phoxi/phoxi.intr echo "RUNNING EXAMPLE 2" python examples/policy.py GQCNN-4.0-SUCTION --depth_image data/examples/clutter/phoxi/dex-net_4.0/depth_1.npy --segmask data/examples/clutter/phoxi/dex-net_4.0/segmask_1.png --config_filename cfg/examples/replication/dex-net_4.0_suction.yaml --camera_intr data/calib/phoxi/phoxi.intr echo "RUNNING EXAMPLE 3" python examples/policy.py GQCNN-4.0-SUCTION --depth_image data/examples/clutter/phoxi/dex-net_4.0/depth_2.npy --segmask data/examples/clutter/phoxi/dex-net_4.0/segmask_2.png --config_filename cfg/examples/replication/dex-net_4.0_suction.yaml --camera_intr data/calib/phoxi/phoxi.intr echo "RUNNING EXAMPLE 4" python examples/policy.py GQCNN-4.0-SUCTION --depth_image data/examples/clutter/phoxi/dex-net_4.0/depth_3.npy --segmask data/examples/clutter/phoxi/dex-net_4.0/segmask_3.png --config_filename cfg/examples/replication/dex-net_4.0_suction.yaml --camera_intr data/calib/phoxi/phoxi.intr echo "RUNNING EXAMPLE 5" python examples/policy.py GQCNN-4.0-SUCTION --depth_image data/examples/clutter/phoxi/dex-net_4.0/depth_4.npy --segmask data/examples/clutter/phoxi/dex-net_4.0/segmask_4.png --config_filename cfg/examples/replication/dex-net_4.0_suction.yaml --camera_intr data/calib/phoxi/phoxi.intr ================================================ FILE: scripts/training/train_dex-net_2.0.sh ================================================ #!/bin/bash # Copyright ©2017. The Regents of the University of California (Regents). # All Rights Reserved. Permission to use, copy, modify, and distribute this # software and its documentation for educational, research, and not-for-profit # purposes, without fee and without a signed licensing agreement, is hereby # granted, provided that the above copyright notice, this paragraph and the # following two paragraphs appear in all copies, modifications, and # distributions. Contact The Office of Technology Licensing, UC Berkeley, 2150 # Shattuck Avenue, Suite 510, Berkeley, CA 94720-1620, (510) 643-7201, # otl@berkeley.edu, # http://ipira.berkeley.edu/industry-info for commercial licensing opportunities. # IN NO EVENT SHALL REGENTS BE LIABLE TO ANY PARTY FOR DIRECT, INDIRECT, SPECIAL, # INCIDENTAL, OR CONSEQUENTIAL DAMAGES, INCLUDING LOST PROFITS, ARISING OUT OF # THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN IF REGENTS HAS BEEN # ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. # REGENTS SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, BUT NOT LIMITED TO, # THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR # PURPOSE. THE SOFTWARE AND ACCOMPANYING DOCUMENTATION, IF ANY, PROVIDED # HEREUNDER IS PROVIDED "AS IS". REGENTS HAS NO OBLIGATION TO PROVIDE # MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. python tools/train.py data/training/dex-net_2.0 --config_filename cfg/train_dex-net_2.0.yaml --name GQCNN-2.0 ================================================ FILE: scripts/training/train_dex-net_2.1.sh ================================================ #!/bin/bash # Copyright ©2017. The Regents of the University of California (Regents). # All Rights Reserved. Permission to use, copy, modify, and distribute this # software and its documentation for educational, research, and not-for-profit # purposes, without fee and without a signed licensing agreement, is hereby # granted, provided that the above copyright notice, this paragraph and the # following two paragraphs appear in all copies, modifications, and # distributions. Contact The Office of Technology Licensing, UC Berkeley, 2150 # Shattuck Avenue, Suite 510, Berkeley, CA 94720-1620, (510) 643-7201, # otl@berkeley.edu, # http://ipira.berkeley.edu/industry-info for commercial licensing opportunities. # IN NO EVENT SHALL REGENTS BE LIABLE TO ANY PARTY FOR DIRECT, INDIRECT, SPECIAL, # INCIDENTAL, OR CONSEQUENTIAL DAMAGES, INCLUDING LOST PROFITS, ARISING OUT OF # THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN IF REGENTS HAS BEEN # ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. # REGENTS SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, BUT NOT LIMITED TO, # THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR # PURPOSE. THE SOFTWARE AND ACCOMPANYING DOCUMENTATION, IF ANY, PROVIDED # HEREUNDER IS PROVIDED "AS IS". REGENTS HAS NO OBLIGATION TO PROVIDE # MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. echo "Please contact Vishal Satish (vsatish@berkeley.edu) or Jeffrey Mahler (jmahler@berkeley.edu) for instructions on training Dex-Net 2.1." ================================================ FILE: scripts/training/train_dex-net_3.0.sh ================================================ #!/bin/bash # Copyright ©2017. The Regents of the University of California (Regents). # All Rights Reserved. Permission to use, copy, modify, and distribute this # software and its documentation for educational, research, and not-for-profit # purposes, without fee and without a signed licensing agreement, is hereby # granted, provided that the above copyright notice, this paragraph and the # following two paragraphs appear in all copies, modifications, and # distributions. Contact The Office of Technology Licensing, UC Berkeley, 2150 # Shattuck Avenue, Suite 510, Berkeley, CA 94720-1620, (510) 643-7201, # otl@berkeley.edu, # http://ipira.berkeley.edu/industry-info for commercial licensing opportunities. # IN NO EVENT SHALL REGENTS BE LIABLE TO ANY PARTY FOR DIRECT, INDIRECT, SPECIAL, # INCIDENTAL, OR CONSEQUENTIAL DAMAGES, INCLUDING LOST PROFITS, ARISING OUT OF # THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN IF REGENTS HAS BEEN # ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. # REGENTS SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, BUT NOT LIMITED TO, # THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR # PURPOSE. THE SOFTWARE AND ACCOMPANYING DOCUMENTATION, IF ANY, PROVIDED # HEREUNDER IS PROVIDED "AS IS". REGENTS HAS NO OBLIGATION TO PROVIDE # MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. python tools/train.py data/training/dex-net_3.0 --config_filename cfg/train_dex-net_3.0.yaml --name GQCNN-3.0 ================================================ FILE: scripts/training/train_dex-net_4.0_fc_pj.sh ================================================ #!/bin/bash # Copyright ©2017. The Regents of the University of California (Regents). # All Rights Reserved. Permission to use, copy, modify, and distribute this # software and its documentation for educational, research, and not-for-profit # purposes, without fee and without a signed licensing agreement, is hereby # granted, provided that the above copyright notice, this paragraph and the # following two paragraphs appear in all copies, modifications, and # distributions. Contact The Office of Technology Licensing, UC Berkeley, 2150 # Shattuck Avenue, Suite 510, Berkeley, CA 94720-1620, (510) 643-7201, # otl@berkeley.edu, # http://ipira.berkeley.edu/industry-info for commercial licensing opportunities. # IN NO EVENT SHALL REGENTS BE LIABLE TO ANY PARTY FOR DIRECT, INDIRECT, SPECIAL, # INCIDENTAL, OR CONSEQUENTIAL DAMAGES, INCLUDING LOST PROFITS, ARISING OUT OF # THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN IF REGENTS HAS BEEN # ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. # REGENTS SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, BUT NOT LIMITED TO, # THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR # PURPOSE. THE SOFTWARE AND ACCOMPANYING DOCUMENTATION, IF ANY, PROVIDED # HEREUNDER IS PROVIDED "AS IS". REGENTS HAS NO OBLIGATION TO PROVIDE # MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. python tools/train.py data/training/dex-net_4.0_fc_pj --config_filename cfg/train_dex-net_4.0_fc_pj.yaml --name GQCNN-4.0-FC-PJ ================================================ FILE: scripts/training/train_dex-net_4.0_fc_suction.sh ================================================ #!/bin/bash # Copyright ©2017. The Regents of the University of California (Regents). # All Rights Reserved. Permission to use, copy, modify, and distribute this # software and its documentation for educational, research, and not-for-profit # purposes, without fee and without a signed licensing agreement, is hereby # granted, provided that the above copyright notice, this paragraph and the # following two paragraphs appear in all copies, modifications, and # distributions. Contact The Office of Technology Licensing, UC Berkeley, 2150 # Shattuck Avenue, Suite 510, Berkeley, CA 94720-1620, (510) 643-7201, # otl@berkeley.edu, # http://ipira.berkeley.edu/industry-info for commercial licensing opportunities. # IN NO EVENT SHALL REGENTS BE LIABLE TO ANY PARTY FOR DIRECT, INDIRECT, SPECIAL, # INCIDENTAL, OR CONSEQUENTIAL DAMAGES, INCLUDING LOST PROFITS, ARISING OUT OF # THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN IF REGENTS HAS BEEN # ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. # REGENTS SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, BUT NOT LIMITED TO, # THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR # PURPOSE. THE SOFTWARE AND ACCOMPANYING DOCUMENTATION, IF ANY, PROVIDED # HEREUNDER IS PROVIDED "AS IS". REGENTS HAS NO OBLIGATION TO PROVIDE # MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. python tools/train.py data/training/dex-net_4.0_fc_suction --config_filename cfg/train_dex-net_4.0_fc_suction.yaml --name GQCNN-4.0-FC-Suction ================================================ FILE: scripts/training/train_dex-net_4.0_pj.sh ================================================ #!/bin/bash # Copyright ©2017. The Regents of the University of California (Regents). # All Rights Reserved. Permission to use, copy, modify, and distribute this # software and its documentation for educational, research, and not-for-profit # purposes, without fee and without a signed licensing agreement, is hereby # granted, provided that the above copyright notice, this paragraph and the # following two paragraphs appear in all copies, modifications, and # distributions. Contact The Office of Technology Licensing, UC Berkeley, 2150 # Shattuck Avenue, Suite 510, Berkeley, CA 94720-1620, (510) 643-7201, # otl@berkeley.edu, # http://ipira.berkeley.edu/industry-info for commercial licensing opportunities. # IN NO EVENT SHALL REGENTS BE LIABLE TO ANY PARTY FOR DIRECT, INDIRECT, SPECIAL, # INCIDENTAL, OR CONSEQUENTIAL DAMAGES, INCLUDING LOST PROFITS, ARISING OUT OF # THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN IF REGENTS HAS BEEN # ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. # REGENTS SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, BUT NOT LIMITED TO, # THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR # PURPOSE. THE SOFTWARE AND ACCOMPANYING DOCUMENTATION, IF ANY, PROVIDED # HEREUNDER IS PROVIDED "AS IS". REGENTS HAS NO OBLIGATION TO PROVIDE # MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. python tools/train.py data/training/dexnet_4_pj --config_filename cfg/train_dex-net_4.0_pj.yaml --name GQCNN-4.0-PJ ================================================ FILE: scripts/training/train_dex-net_4.0_suction.sh ================================================ #!/bin/bash # Copyright ©2017. The Regents of the University of California (Regents). # All Rights Reserved. Permission to use, copy, modify, and distribute this # software and its documentation for educational, research, and not-for-profit # purposes, without fee and without a signed licensing agreement, is hereby # granted, provided that the above copyright notice, this paragraph and the # following two paragraphs appear in all copies, modifications, and # distributions. Contact The Office of Technology Licensing, UC Berkeley, 2150 # Shattuck Avenue, Suite 510, Berkeley, CA 94720-1620, (510) 643-7201, # otl@berkeley.edu, # http://ipira.berkeley.edu/industry-info for commercial licensing opportunities. # IN NO EVENT SHALL REGENTS BE LIABLE TO ANY PARTY FOR DIRECT, INDIRECT, SPECIAL, # INCIDENTAL, OR CONSEQUENTIAL DAMAGES, INCLUDING LOST PROFITS, ARISING OUT OF # THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN IF REGENTS HAS BEEN # ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. # REGENTS SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, BUT NOT LIMITED TO, # THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR # PURPOSE. THE SOFTWARE AND ACCOMPANYING DOCUMENTATION, IF ANY, PROVIDED # HEREUNDER IS PROVIDED "AS IS". REGENTS HAS NO OBLIGATION TO PROVIDE # MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. python tools/train.py data/training/dexnet_4_suction --config_filename cfg/train_dex-net_4.0_suction.yaml --name GQCNN-4.0-Suction ================================================ FILE: setup.py ================================================ # -*- coding: utf-8 -*- """ Copyright ©2017. The Regents of the University of California (Regents). All Rights Reserved. Permission to use, copy, modify, and distribute this software and its documentation for educational, research, and not-for-profit purposes, without fee and without a signed licensing agreement, is hereby granted, provided that the above copyright notice, this paragraph and the following two paragraphs appear in all copies, modifications, and distributions. Contact The Office of Technology Licensing, UC Berkeley, 2150 Shattuck Avenue, Suite 510, Berkeley, CA 94720-1620, (510) 643-7201, otl@berkeley.edu, http://ipira.berkeley.edu/industry-info for commercial licensing opportunities. IN NO EVENT SHALL REGENTS BE LIABLE TO ANY PARTY FOR DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, INCLUDING LOST PROFITS, ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN IF REGENTS HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. REGENTS SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE AND ACCOMPANYING DOCUMENTATION, IF ANY, PROVIDED HEREUNDER IS PROVIDED "AS IS". REGENTS HAS NO OBLIGATION TO PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. Setup of `gqcnn` Python codebase. Author ------ Vishal Satish & Jeff Mahler """ import logging import os from setuptools import setup, find_packages from setuptools.command.develop import develop from setuptools.command.install import install import subprocess import sys TF_MAX_VERSION = "1.15.0" # Set up logger. logging.basicConfig() # Configure the root logger. logger = logging.getLogger("setup.py") logger.setLevel(logging.INFO) def get_tf_dep(): # Check whether or not the Nvidia driver and GPUs are available and add the # corresponding Tensorflow dependency. tf_dep = "tensorflow<={}".format(TF_MAX_VERSION) try: gpus = subprocess.check_output( ["nvidia-smi", "--query-gpu=gpu_name", "--format=csv"]).decode().strip().split("\n")[1:] if len(gpus) > 0: tf_dep = "tensorflow-gpu<={}".format(TF_MAX_VERSION) else: no_device_msg = ("Found Nvidia device driver but no" " devices...installing Tensorflow for CPU.") logger.warning(no_device_msg) except OSError: no_driver_msg = ("Could not find Nvidia device driver...installing" " Tensorflow for CPU.") logger.warning(no_driver_msg) return tf_dep # TODO(vsatish): Use inheritance here. class DevelopCmd(develop): user_options_custom = [ ("docker", None, "installing in Docker"), ] user_options = getattr(develop, "user_options", []) + user_options_custom def initialize_options(self): develop.initialize_options(self) # Initialize options. self.docker = False def finalize_options(self): develop.finalize_options(self) def run(self): # Install Tensorflow dependency. if not self.docker: tf_dep = get_tf_dep() subprocess.Popen([sys.executable, "-m", "pip", "install", tf_dep]).wait() else: # If we're using Docker, this will already have been installed # explicitly through the correct `{cpu/gpu}_requirements.txt`; # there is no way to check for CUDA/GPUs at Docker build time # because there is no easy way to set the Nvidia runtime. # TODO(vsatish): Figure out why this isn't printed. skip_tf_msg = ("Omitting Tensorflow dependency because of Docker" " installation.") logger.warning(skip_tf_msg) # Run installation. develop.run(self) class InstallCmd(install, object): user_options_custom = [ ("docker", None, "installing in Docker"), ] user_options = getattr(install, "user_options", []) + user_options_custom def initialize_options(self): install.initialize_options(self) # Initialize options. self.docker = False def finalize_options(self): install.finalize_options(self) def run(self): # Install Tensorflow dependency. if not self.docker: tf_dep = get_tf_dep() subprocess.Popen([sys.executable, "-m", "pip", "install", tf_dep]).wait() else: # If we're using Docker, this will already have been installed # explicitly through the correct `{cpu/gpu}_requirements.txt`; # there is no way to check for CUDA/GPUs at Docker build time # because there is no easy way to set the Nvidia runtime. # TODO (vsatish): Figure out why this isn't printed. skip_tf_msg = ("Omitting Tensorflow dependency because of Docker" " installation.") logger.warning(skip_tf_msg) # Run installation. install.run(self) requirements = [ "autolab-core", "autolab-perception", "visualization", "numpy", "scipy", "matplotlib", "opencv-python", "scikit-learn", "scikit-image", "psutil", "gputil" ] exec( open( os.path.join(os.path.dirname(os.path.realpath(__file__)), "gqcnn/version.py")).read()) setup( name="gqcnn", version=__version__, # noqa F821 description=("Project code for running Grasp Quality Convolutional" " Neural Networks"), author="Vishal Satish", author_email="vsatish@berkeley.edu", license="Berkeley Copyright", url="https://github.com/BerkeleyAutomation/gqcnn", keywords="robotics grasping vision deep learning", classifiers=[ "Development Status :: 4 - Beta", "Programming Language :: Python :: 3.5", "Programming Language :: Python :: 3.6", "Programming Language :: Python :: 3.7", "Natural Language :: English", # yapf: disable "Topic :: Scientific/Engineering" ], packages=find_packages(), install_requires=requirements, extras_require={ "docs": ["sphinx", "sphinxcontrib-napoleon", "sphinx_rtd_theme"], }, cmdclass={ "install": InstallCmd, "develop": DevelopCmd }) ================================================ FILE: srv/GQCNNGraspPlanner.srv ================================================ # Copyright ©2017. The Regents of the University of California (Regents). # All Rights Reserved. Permission to use, copy, modify, and distribute this # software and its documentation for educational, research, and not-for-profit # purposes, without fee and without a signed licensing agreement, is hereby # granted, provided that the above copyright notice, this paragraph and the # following two paragraphs appear in all copies, modifications, and # distributions. Contact The Office of Technology Licensing, UC Berkeley, 2150 # Shattuck Avenue, Suite 510, Berkeley, CA 94720-1620, (510) 643-7201, # otl@berkeley.edu, # http://ipira.berkeley.edu/industry-info for commercial licensing opportunities. # IN NO EVENT SHALL REGENTS BE LIABLE TO ANY PARTY FOR DIRECT, INDIRECT, SPECIAL, # INCIDENTAL, OR CONSEQUENTIAL DAMAGES, INCLUDING LOST PROFITS, ARISING OUT OF # THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN IF REGENTS HAS BEEN # ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. # REGENTS SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, BUT NOT LIMITED TO, # THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR # PURPOSE. THE SOFTWARE AND ACCOMPANYING DOCUMENTATION, IF ANY, PROVIDED # HEREUNDER IS PROVIDED "AS IS". REGENTS HAS NO OBLIGATION TO PROVIDE # MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. # request params sensor_msgs/Image color_image sensor_msgs/Image depth_image sensor_msgs/CameraInfo camera_info --- # response params GQCNNGrasp grasp ================================================ FILE: srv/GQCNNGraspPlannerBoundingBox.srv ================================================ # Copyright ©2017. The Regents of the University of California (Regents). # All Rights Reserved. Permission to use, copy, modify, and distribute this # software and its documentation for educational, research, and not-for-profit # purposes, without fee and without a signed licensing agreement, is hereby # granted, provided that the above copyright notice, this paragraph and the # following two paragraphs appear in all copies, modifications, and # distributions. Contact The Office of Technology Licensing, UC Berkeley, 2150 # Shattuck Avenue, Suite 510, Berkeley, CA 94720-1620, (510) 643-7201, # otl@berkeley.edu, # http://ipira.berkeley.edu/industry-info for commercial licensing opportunities. # IN NO EVENT SHALL REGENTS BE LIABLE TO ANY PARTY FOR DIRECT, INDIRECT, SPECIAL, # INCIDENTAL, OR CONSEQUENTIAL DAMAGES, INCLUDING LOST PROFITS, ARISING OUT OF # THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN IF REGENTS HAS BEEN # ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. # REGENTS SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, BUT NOT LIMITED TO, # THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR # PURPOSE. THE SOFTWARE AND ACCOMPANYING DOCUMENTATION, IF ANY, PROVIDED # HEREUNDER IS PROVIDED "AS IS". REGENTS HAS NO OBLIGATION TO PROVIDE # MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. # request params sensor_msgs/Image color_image sensor_msgs/Image depth_image sensor_msgs/CameraInfo camera_info BoundingBox bounding_box --- # response params GQCNNGrasp grasp ================================================ FILE: srv/GQCNNGraspPlannerFull.srv ================================================ # Copyright ©2017. The Regents of the University of California (Regents). # All Rights Reserved. Permission to use, copy, modify, and distribute this # software and its documentation for educational, research, and not-for-profit # purposes, without fee and without a signed licensing agreement, is hereby # granted, provided that the above copyright notice, this paragraph and the # following two paragraphs appear in all copies, modifications, and # distributions. Contact The Office of Technology Licensing, UC Berkeley, 2150 # Shattuck Avenue, Suite 510, Berkeley, CA 94720-1620, (510) 643-7201, # otl@berkeley.edu, # http://ipira.berkeley.edu/industry-info for commercial licensing opportunities. # IN NO EVENT SHALL REGENTS BE LIABLE TO ANY PARTY FOR DIRECT, INDIRECT, SPECIAL, # INCIDENTAL, OR CONSEQUENTIAL DAMAGES, INCLUDING LOST PROFITS, ARISING OUT OF # THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN IF REGENTS HAS BEEN # ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. # REGENTS SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, BUT NOT LIMITED TO, # THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR # PURPOSE. THE SOFTWARE AND ACCOMPANYING DOCUMENTATION, IF ANY, PROVIDED # HEREUNDER IS PROVIDED "AS IS". REGENTS HAS NO OBLIGATION TO PROVIDE # MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. # request params sensor_msgs/Image color_image sensor_msgs/Image depth_image sensor_msgs/CameraInfo camera_info BoundingBox bounding_box sensor_msgs/Image segmask --- # response params GQCNNGrasp grasp ================================================ FILE: srv/GQCNNGraspPlannerSegmask.srv ================================================ # Copyright ©2017. The Regents of the University of California (Regents). # All Rights Reserved. Permission to use, copy, modify, and distribute this # software and its documentation for educational, research, and not-for-profit # purposes, without fee and without a signed licensing agreement, is hereby # granted, provided that the above copyright notice, this paragraph and the # following two paragraphs appear in all copies, modifications, and # distributions. Contact The Office of Technology Licensing, UC Berkeley, 2150 # Shattuck Avenue, Suite 510, Berkeley, CA 94720-1620, (510) 643-7201, # otl@berkeley.edu, # http://ipira.berkeley.edu/industry-info for commercial licensing opportunities. # IN NO EVENT SHALL REGENTS BE LIABLE TO ANY PARTY FOR DIRECT, INDIRECT, SPECIAL, # INCIDENTAL, OR CONSEQUENTIAL DAMAGES, INCLUDING LOST PROFITS, ARISING OUT OF # THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN IF REGENTS HAS BEEN # ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. # REGENTS SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, BUT NOT LIMITED TO, # THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR # PURPOSE. THE SOFTWARE AND ACCOMPANYING DOCUMENTATION, IF ANY, PROVIDED # HEREUNDER IS PROVIDED "AS IS". REGENTS HAS NO OBLIGATION TO PROVIDE # MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. # request params sensor_msgs/Image color_image sensor_msgs/Image depth_image sensor_msgs/CameraInfo camera_info sensor_msgs/Image segmask --- # response params GQCNNGrasp grasp ================================================ FILE: tools/analyze_gqcnn_performance.py ================================================ # -*- coding: utf-8 -*- """ Copyright ©2017. The Regents of the University of California (Regents). All Rights Reserved. Permission to use, copy, modify, and distribute this software and its documentation for educational, research, and not-for-profit purposes, without fee and without a signed licensing agreement, is hereby granted, provided that the above copyright notice, this paragraph and the following two paragraphs appear in all copies, modifications, and distributions. Contact The Office of Technology Licensing, UC Berkeley, 2150 Shattuck Avenue, Suite 510, Berkeley, CA 94720-1620, (510) 643-7201, otl@berkeley.edu, http://ipira.berkeley.edu/industry-info for commercial licensing opportunities. IN NO EVENT SHALL REGENTS BE LIABLE TO ANY PARTY FOR DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, INCLUDING LOST PROFITS, ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN IF REGENTS HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. REGENTS SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE AND ACCOMPANYING DOCUMENTATION, IF ANY, PROVIDED HEREUNDER IS PROVIDED "AS IS". REGENTS HAS NO OBLIGATION TO PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. Analyzes a GQ-CNN model. Author ------ Vishal Satish & Jeff Mahler """ import argparse import os from autolab_core import YamlConfig, Logger from gqcnn import GQCNNAnalyzer # Setup logger. logger = Logger.get_logger("tools/analyze_gqcnn_performance.py") if __name__ == "__main__": # Parse args. parser = argparse.ArgumentParser( description=("Analyze a Grasp Quality Convolutional Neural Network" " with TensorFlow")) parser.add_argument("model_name", type=str, default=None, help="name of model to analyze") parser.add_argument("--output_dir", type=str, default=None, help="path to save the analysis") parser.add_argument( "--dataset_config_filename", type=str, default=None, help="path to a configuration file for testing on a custom dataset") parser.add_argument("--config_filename", type=str, default=None, help="path to the configuration file to use") parser.add_argument("--model_dir", type=str, default=None, help="path to the model") args = parser.parse_args() model_name = args.model_name output_dir = args.output_dir dataset_config_filename = args.dataset_config_filename config_filename = args.config_filename model_dir = args.model_dir # Create model dir. if model_dir is None: model_dir = os.path.join(os.path.dirname(os.path.realpath(__file__)), "../models") model_dir = os.path.join(model_dir, model_name) # If `model_dir` contains many models, analyze all of them. model_dir = [model_dir] if "config.json" not in os.listdir(model_dir[0]): logger.warning( "Found multiple models in model_dir, analyzing all of them...") models = os.listdir(model_dir[0]) model_dir = [os.path.join(model_dir[0], model) for model in models] # Set defaults. if output_dir is None: output_dir = os.path.join(os.path.dirname(os.path.realpath(__file__)), "../analysis") if config_filename is None: config_filename = os.path.join( os.path.dirname(os.path.realpath(__file__)), "..", "cfg/tools/analyze_gqcnn_performance.yaml") # Turn relative paths absolute. if not os.path.isabs(output_dir): output_dir = os.path.join(os.getcwd(), output_dir) if not os.path.isabs(config_filename): config_filename = os.path.join(os.getcwd(), config_filename) if dataset_config_filename is not None and not os.path.isabs( dataset_config_filename): dataset_config_filename = os.path.join(os.getcwd(), dataset_config_filename) # Make the output dir. if not os.path.exists(output_dir): os.mkdir(output_dir) # Read config. config = YamlConfig(config_filename) dataset_config = None if dataset_config_filename is not None: dataset_config = YamlConfig(dataset_config_filename) # Run the analyzer. analyzer = GQCNNAnalyzer(config, plot_backend="pdf") for model in model_dir: analyzer.analyze(model, output_dir, dataset_config) ================================================ FILE: tools/finetune.py ================================================ # -*- coding: utf-8 -*- """ Copyright ©2017. The Regents of the University of California (Regents). All Rights Reserved. Permission to use, copy, modify, and distribute this software and its documentation for educational, research, and not-for-profit purposes, without fee and without a signed licensing agreement, is hereby granted, provided that the above copyright notice, this paragraph and the following two paragraphs appear in all copies, modifications, and distributions. Contact The Office of Technology Licensing, UC Berkeley, 2150 Shattuck Avenue, Suite 510, Berkeley, CA 94720-1620, (510) 643-7201, otl@berkeley.edu, http://ipira.berkeley.edu/industry-info for commercial licensing opportunities. IN NO EVENT SHALL REGENTS BE LIABLE TO ANY PARTY FOR DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, INCLUDING LOST PROFITS, ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN IF REGENTS HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. REGENTS SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE AND ACCOMPANYING DOCUMENTATION, IF ANY, PROVIDED HEREUNDER IS PROVIDED "AS IS". REGENTS HAS NO OBLIGATION TO PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. Script for fine-tuning a Grasp Quality Convolutional Neural Network (GQ-CNN). Author ------ Vishal Satish & Jeff Mahler """ import argparse import os import time from autolab_core import YamlConfig, Logger import autolab_core.utils as utils from gqcnn import get_gqcnn_model, get_gqcnn_trainer from gqcnn import utils as gqcnn_utils # Setup logger. logger = Logger.get_logger("tools/finetune.py") if __name__ == "__main__": # Parse args. parser = argparse.ArgumentParser(description=( "Fine-Tune a pre-trained Grasp Quality Convolutional Neural Network" " with TensorFlow")) parser.add_argument( "dataset_dir", type=str, default=None, help="path to the dataset to use for training and validation") parser.add_argument("base_model_name", type=str, default=None, help="name of the pre-trained model to fine-tune") parser.add_argument("--split_name", type=str, default="image_wise", help="name of the split to train on") parser.add_argument("--output_dir", type=str, default=None, help="path to store the model") parser.add_argument("--tensorboard_port", type=int, default=None, help="port to launch tensorboard on") parser.add_argument("--seed", type=int, default=None, help="random seed for training") parser.add_argument("--config_filename", type=str, default=None, help="path to the configuration file to use") parser.add_argument("--model_dir", type=str, default=None, help="path to the pre-trained model to fine-tune") parser.add_argument("--name", type=str, default=None, help="name for the trained model") parser.add_argument( "--save_datetime", type=bool, default=False, help=("whether or not to save a model with the date and time of" " training")) parser.add_argument("--backend", type=str, default="tf", help="the deep learning framework to use") args = parser.parse_args() dataset_dir = args.dataset_dir base_model_name = args.base_model_name split_name = args.split_name output_dir = args.output_dir tensorboard_port = args.tensorboard_port seed = args.seed config_filename = args.config_filename model_dir = args.model_dir name = args.name save_datetime = args.save_datetime backend = args.backend # Set default output dir. if output_dir is None: output_dir = os.path.join(os.path.dirname(os.path.realpath(__file__)), "../models") # Set default config filename. if config_filename is None: config_filename = os.path.join( os.path.dirname(os.path.realpath(__file__)), "..", "cfg/finetune.yaml") # Set default model dir. if model_dir is None: model_dir = os.path.join(os.path.dirname(os.path.realpath(__file__)), "../models") # Turn relative paths absolute. if not os.path.isabs(dataset_dir): dataset_dir = os.path.join(os.getcwd(), dataset_dir) if not os.path.isabs(model_dir): model_dir = os.path.join(os.getcwd(), model_dir) if not os.path.isabs(output_dir): output_dir = os.path.join(os.getcwd(), output_dir) if not os.path.isabs(config_filename): config_filename = os.path.join(os.getcwd(), config_filename) # Create full path to the pre-trained model. model_dir = os.path.join(model_dir, base_model_name) # Create output dir if necessary. utils.mkdir_safe(output_dir) # Open train config. train_config = YamlConfig(config_filename) if seed is not None: train_config["seed"] = seed train_config["gqcnn"]["seed"] = seed if tensorboard_port is not None: train_config["tensorboard_port"] = tensorboard_port gqcnn_params = train_config["gqcnn"] # Create a unique output folder based on the date and time. if save_datetime: # Create output dir. unique_name = time.strftime("%Y%m%d-%H%M%S") output_dir = os.path.join(output_dir, unique_name) utils.mkdir_safe(output_dir) # Set visible devices. if "gpu_list" in train_config: gqcnn_utils.set_cuda_visible_devices(train_config["gpu_list"]) # Fine-tune the network. start_time = time.time() gqcnn = get_gqcnn_model(backend)(gqcnn_params) trainer = get_gqcnn_trainer(backend)(gqcnn, dataset_dir, split_name, output_dir, train_config, name=name) trainer.finetune(model_dir) logger.info("Total Fine-tuning Time: " + str(utils.get_elapsed_time(time.time() - start_time))) ================================================ FILE: tools/hyperparam_search.py ================================================ # -*- coding: utf-8 -*- """ Copyright ©2017. The Regents of the University of California (Regents). All Rights Reserved. Permission to use, copy, modify, and distribute this software and its documentation for educational, research, and not-for-profit purposes, without fee and without a signed licensing agreement, is hereby granted, provided that the above copyright notice, this paragraph and the following two paragraphs appear in all copies, modifications, and distributions. Contact The Office of Technology Licensing, UC Berkeley, 2150 Shattuck Avenue, Suite 510, Berkeley, CA 94720-1620, (510) 643-7201, otl@berkeley.edu, http://ipira.berkeley.edu/industry-info for commercial licensing opportunities. IN NO EVENT SHALL REGENTS BE LIABLE TO ANY PARTY FOR DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, INCLUDING LOST PROFITS, ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN IF REGENTS HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. REGENTS SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE AND ACCOMPANYING DOCUMENTATION, IF ANY, PROVIDED HEREUNDER IS PROVIDED "AS IS". REGENTS HAS NO OBLIGATION TO PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. Script for searching over Grasp Quality Convolutional Neural Network (GQ-CNN) hyper-parameters. Author ------ Vishal Satish """ import argparse from autolab_core import YamlConfig, Logger from gqcnn import GQCNNSearch # Set up logger. logger = Logger.get_logger("tools/hyperparam_search.py") if __name__ == "__main__": # Parse args. parser = argparse.ArgumentParser( description="Hyper-parameter search for GQ-CNN.") parser.add_argument("datasets", nargs="+", default=None, help="path to datasets") parser.add_argument("--base_model_dirs", nargs="+", default=[], help="path to pre-trained base models for fine-tuning") parser.add_argument("--train_configs", nargs="+", default=["cfg/train.yaml"], help="path to training configs") parser.add_argument("--analysis_config", type=str, default="cfg/tools/analyze_gqcnn_performance.yaml") parser.add_argument("--split_names", nargs="+", default=["image_wise"], help="dataset splits to use") parser.add_argument("--output_dir", type=str, default="models", help="path to store search data") parser.add_argument("--search_name", type=str, default=None, help="name of search") parser.add_argument("--cpu_cores", nargs="+", default=[], help="CPU cores to use") parser.add_argument("--gpu_devices", nargs="+", default=[], help="GPU devices to use") args = parser.parse_args() datasets = args.datasets base_model_dirs = args.base_model_dirs train_configs = args.train_configs analysis_config = args.analysis_config split_names = args.split_names output_dir = args.output_dir search_name = args.search_name cpu_cores = [int(core) for core in args.cpu_cores] gpu_devices = [int(device) for device in args.gpu_devices] assert len(datasets) == len( train_configs ), "Must have same number of datasets as training configs!" if len(base_model_dirs) > 0: models_datasets_mismatch_msg = ("Must have same number of base models" " for fine-tuning as datasets and" " training configs!") assert len(base_model_dirs) == len( datasets), models_datasets_mismatch_msg if len(split_names) < len(datasets): if len(split_names) == 1: logger.warning( "Using split '{}' for all datasets/configs...".format( split_names[0])) split_names *= len(datasets) else: not_enough_splits_msg = ("Can't have fewer splits that" " datasets/configs provided unless there" " is only one.") raise ValueError(not_enough_splits_msg) # Parse configs. analysis_config = YamlConfig(analysis_config) train_configs = [YamlConfig(cfg) for cfg in train_configs] # Search. search = GQCNNSearch(analysis_config, train_configs, datasets, split_names, output_dir=output_dir, search_name=search_name, cpu_cores=cpu_cores, gpu_devices=gpu_devices, base_models=base_model_dirs) search.search() ================================================ FILE: tools/plot_training_losses.py ================================================ # -*- coding: utf-8 -*- """ Copyright ©2017. The Regents of the University of California (Regents). All Rights Reserved. Permission to use, copy, modify, and distribute this software and its documentation for educational, research, and not-for-profit purposes, without fee and without a signed licensing agreement, is hereby granted, provided that the above copyright notice, this paragraph and the following two paragraphs appear in all copies, modifications, and distributions. Contact The Office of Technology Licensing, UC Berkeley, 2150 Shattuck Avenue, Suite 510, Berkeley, CA 94720-1620, (510) 643-7201, otl@berkeley.edu, http://ipira.berkeley.edu/industry-info for commercial licensing opportunities. IN NO EVENT SHALL REGENTS BE LIABLE TO ANY PARTY FOR DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, INCLUDING LOST PROFITS, ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN IF REGENTS HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. REGENTS SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE AND ACCOMPANYING DOCUMENTATION, IF ANY, PROVIDED HEREUNDER IS PROVIDED "AS IS". REGENTS HAS NO OBLIGATION TO PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. Script to plot the various errors saved during training. Author ------ Jeff Mahler Required Parameters ------------------- model_dir : str Command line argument, the path to the model whose errors are to plotted. All plots and other metrics will be saved to this directory. """ import os import sys import matplotlib.pyplot as plt import numpy as np from autolab_core import Logger from gqcnn.utils import GeneralConstants, GQCNNFilenames # Set up logger. logger = Logger.get_logger("tools/plot_training_losses.py") if __name__ == "__main__": result_dir = sys.argv[1] train_errors_filename = os.path.join(result_dir, GQCNNFilenames.TRAIN_ERRORS) val_errors_filename = os.path.join(result_dir, GQCNNFilenames.VAL_ERRORS) train_iters_filename = os.path.join(result_dir, GQCNNFilenames.TRAIN_ITERS) val_iters_filename = os.path.join(result_dir, GQCNNFilenames.VAL_ITERS) pct_pos_val_filename = os.path.join(result_dir, GQCNNFilenames.PCT_POS_VAL) train_losses_filename = os.path.join(result_dir, GQCNNFilenames.TRAIN_LOSS) val_losses_filename = os.path.join(result_dir, GQCNNFilenames.VAL_LOSS) raw_train_errors = np.load(train_errors_filename) val_errors = np.load(val_errors_filename) raw_train_iters = np.load(train_iters_filename) val_iters = np.load(val_iters_filename) pct_pos_val = float(val_errors[0]) if os.path.exists(pct_pos_val_filename): pct_pos_val = 100.0 * np.load(pct_pos_val_filename) raw_train_losses = np.load(train_losses_filename) val_losses = None try: val_losses = np.load(val_losses_filename) except FileNotFoundError: pass val_errors = np.r_[pct_pos_val, val_errors] val_iters = np.r_[0, val_iters] # Window the training error. i = 0 train_errors = [] train_losses = [] train_iters = [] while i < raw_train_errors.shape[0]: train_errors.append( np.mean(raw_train_errors[i:i + GeneralConstants.WINDOW])) train_losses.append( np.mean(raw_train_losses[i:i + GeneralConstants.WINDOW])) train_iters.append(i) i += GeneralConstants.WINDOW train_errors = np.array(train_errors) train_losses = np.array(train_losses) train_iters = np.array(train_iters) if val_losses is not None: val_losses = np.r_[train_losses[0], val_losses] init_val_error = val_errors[0] norm_train_errors = train_errors / init_val_error norm_val_errors = val_errors / init_val_error norm_final_val_error = val_errors[-1] / val_errors[0] if pct_pos_val > 0: norm_final_val_error = val_errors[-1] / pct_pos_val logger.info("TRAIN") logger.info("Original Error {}".format(train_errors[0])) logger.info("Final Error {}".format(train_errors[-1])) logger.info("Orig loss {}".format(train_losses[0])) logger.info("Final loss {}".format(train_losses[-1])) logger.info("VAL") logger.info("Original error {}".format(pct_pos_val)) logger.info("Final error {}".format(val_errors[-1])) logger.info("Normalized error {}".format(norm_final_val_error)) if val_losses is not None: logger.info("Orig loss {}".format(val_losses[0])) logger.info("Final loss {}".format(val_losses[-1])) plt.figure() plt.plot(train_iters, train_errors, linewidth=4, color="b") plt.plot(val_iters, val_errors, linewidth=4, color="g") plt.ylim(0, 100) plt.legend(("Training (Minibatch)", "Validation"), fontsize=15, loc="best") plt.xlabel("Iteration", fontsize=15) plt.ylabel("Error Rate", fontsize=15) plt.figure() plt.plot(train_iters, norm_train_errors, linewidth=4, color="b") plt.plot(val_iters, norm_val_errors, linewidth=4, color="g") plt.ylim(0, 2.0) plt.legend(("Training (Minibatch)", "Validation"), fontsize=15, loc="best") plt.xlabel("Iteration", fontsize=15) plt.ylabel("Normalized Error Rate", fontsize=15) train_losses[train_losses > 100.0] = 3.0 plt.figure() plt.plot(train_iters, train_losses, linewidth=4, color="b") plt.ylim(0, 2.0) plt.xlabel("Iteration", fontsize=15) plt.ylabel("Training Loss", fontsize=15) if val_losses is not None: val_losses[val_losses > 100.0] = 3.0 plt.figure() plt.plot(val_iters, val_losses, linewidth=4, color="b") plt.ylim(0, 2.0) plt.xlabel("Iteration", fontsize=15) plt.ylabel("Validation Loss", fontsize=15) plt.show() plt.figure(figsize=(8, 6)) plt.plot(train_iters, train_errors, linewidth=4, color="b") plt.plot(val_iters, val_errors, linewidth=4, color="g") plt.ylim(0, 100) plt.legend(("Training (Minibatch)", "Validation"), fontsize=15, loc="best") plt.xlabel("Iteration", fontsize=15) plt.ylabel("Error Rate", fontsize=15) plt.savefig(os.path.join(result_dir, "training_curve.jpg")) plt.figure(figsize=(8, 6)) plt.plot(train_iters, norm_train_errors, linewidth=4, color="b") plt.plot(val_iters, norm_val_errors, linewidth=4, color="g") plt.ylim(0, 2.0) plt.legend(("Training (Minibatch)", "Validation"), fontsize=15, loc="best") plt.xlabel("Iteration", fontsize=15) plt.ylabel("Normalized Error Rate", fontsize=15) plt.savefig(os.path.join(result_dir, "normalized_training_curve.jpg")) ================================================ FILE: tools/run_policy.py ================================================ # -*- coding: utf-8 -*- """ Copyright ©2017. The Regents of the University of California (Regents). All Rights Reserved. Permission to use, copy, modify, and distribute this software and its documentation for educational, research, and not-for-profit purposes, without fee and without a signed licensing agreement, is hereby granted, provided that the above copyright notice, this paragraph and the following two paragraphs appear in all copies, modifications, and distributions. Contact The Office of Technology Licensing, UC Berkeley, 2150 Shattuck Avenue, Suite 510, Berkeley, CA 94720-1620, (510) 643-7201, otl@berkeley.edu, http://ipira.berkeley.edu/industry-info for commercial licensing opportunities. IN NO EVENT SHALL REGENTS BE LIABLE TO ANY PARTY FOR DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, INCLUDING LOST PROFITS, ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN IF REGENTS HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. REGENTS SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE AND ACCOMPANYING DOCUMENTATION, IF ANY, PROVIDED HEREUNDER IS PROVIDED "AS IS". REGENTS HAS NO OBLIGATION TO PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. Script to run saved policy output from user. Author ------ Jeff Mahler """ import argparse import os import random import time import numpy as np from autolab_core import YamlConfig, Logger from visualization import Visualizer2D as vis2d from gqcnn import RgbdImageState, ParallelJawGrasp from gqcnn import CrossEntropyRobustGraspingPolicy # Set up logger. logger = Logger.get_logger("tools/run_policy.py") if __name__ == "__main__": # Parse args. parser = argparse.ArgumentParser( description=("Run a saved test case through a GQ-CNN policy. For" " debugging purposes only.")) parser.add_argument("test_case_path", type=str, default=None, help="path to test case") parser.add_argument("--config_filename", type=str, default="cfg/tools/run_policy.yaml", help="path to configuration file to use") parser.add_argument("--output_dir", type=str, default=None, help="directory to store output") parser.add_argument("--seed", type=int, default=None, help="random seed") args = parser.parse_args() test_case_path = args.test_case_path config_filename = args.config_filename output_dir = args.output_dir seed = args.seed # Make output dir. if output_dir is not None and not os.path.exists(output_dir): os.mkdir(output_dir) # Make relative paths absolute. if not os.path.isabs(config_filename): config_filename = os.path.join( os.path.dirname(os.path.realpath(__file__)), "..", config_filename) # Set random seed. if seed is not None: np.random.seed(seed) random.seed(seed) # Read config. config = YamlConfig(config_filename) policy_config = config["policy"] # Load test case. state_path = os.path.join(test_case_path, "state") action_path = os.path.join(test_case_path, "action") state = RgbdImageState.load(state_path) original_action = ParallelJawGrasp.load(action_path) # Init policy. policy = CrossEntropyRobustGraspingPolicy(policy_config) if policy_config["vis"]["input_images"]: vis2d.figure() if state.segmask is None: vis2d.subplot(1, 2, 1) vis2d.imshow(state.rgbd_im.color) vis2d.title("COLOR") vis2d.subplot(1, 2, 2) vis2d.imshow(state.rgbd_im.depth, vmin=policy_config["vis"]["vmin"], vmax=policy_config["vis"]["vmax"]) vis2d.title("DEPTH") else: vis2d.subplot(1, 3, 1) vis2d.imshow(state.rgbd_im.color) vis2d.title("COLOR") vis2d.subplot(1, 3, 2) vis2d.imshow(state.rgbd_im.depth, vmin=policy_config["vis"]["vmin"], vmax=policy_config["vis"]["vmax"]) vis2d.title("DEPTH") vis2d.subplot(1, 3, 3) vis2d.imshow(state.segmask) vis2d.title("SEGMASK") filename = None if output_dir is not None: filename = os.path.join(output_dir, "input_images.png") vis2d.show(filename) # Query policy. policy_start = time.time() action = policy(state) logger.info("Planning took %.3f sec" % (time.time() - policy_start)) # Vis final grasp. if policy_config["vis"]["final_grasp"]: vis2d.figure(size=(10, 10)) vis2d.subplot(1, 2, 1) vis2d.imshow(state.rgbd_im.depth, vmin=policy_config["vis"]["vmin"], vmax=policy_config["vis"]["vmax"]) vis2d.grasp(original_action.grasp, scale=policy_config["vis"]["grasp_scale"], show_center=False, show_axis=True, color="r") vis2d.title("Original (Q=%.3f) (Z=%.3f)" % (original_action.q_value, original_action.grasp.depth)) vis2d.subplot(1, 2, 2) vis2d.imshow(state.rgbd_im.depth, vmin=policy_config["vis"]["vmin"], vmax=policy_config["vis"]["vmax"]) vis2d.grasp(action.grasp, scale=policy_config["vis"]["grasp_scale"], show_center=False, show_axis=True, color="r") vis2d.title("New (Q=%.3f) (Z=%.3f)" % (action.q_value, action.grasp.depth)) filename = None if output_dir is not None: filename = os.path.join(output_dir, "planned_grasp.png") vis2d.show(filename) ================================================ FILE: tools/train.py ================================================ # -*- coding: utf-8 -*- """ Copyright ©2017. The Regents of the University of California (Regents). All Rights Reserved. Permission to use, copy, modify, and distribute this software and its documentation for educational, research, and not-for-profit purposes, without fee and without a signed licensing agreement, is hereby granted, provided that the above copyright notice, this paragraph and the following two paragraphs appear in all copies, modifications, and distributions. Contact The Office of Technology Licensing, UC Berkeley, 2150 Shattuck Avenue, Suite 510, Berkeley, CA 94720-1620, (510) 643-7201, otl@berkeley.edu, http://ipira.berkeley.edu/industry-info for commercial licensing opportunities. IN NO EVENT SHALL REGENTS BE LIABLE TO ANY PARTY FOR DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, INCLUDING LOST PROFITS, ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN IF REGENTS HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. REGENTS SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE AND ACCOMPANYING DOCUMENTATION, IF ANY, PROVIDED HEREUNDER IS PROVIDED "AS IS". REGENTS HAS NO OBLIGATION TO PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. Script for training a Grasp Quality Convolutional Neural Network (GQ-CNN). Author ------ Vishal Satish & Jeff Mahler """ import argparse import os import time from autolab_core import YamlConfig, Logger import autolab_core.utils as utils from gqcnn import get_gqcnn_model, get_gqcnn_trainer, utils as gqcnn_utils # Setup logger. logger = Logger.get_logger("tools/train.py") if __name__ == "__main__": # Parse args. parser = argparse.ArgumentParser( description=("Train a Grasp Quality Convolutional Neural Network with" " TensorFlow")) parser.add_argument( "dataset_dir", type=str, default=None, help="path to the dataset to use for training and validation") parser.add_argument("--split_name", type=str, default="image_wise", help="name of the split to train on") parser.add_argument("--output_dir", type=str, default=None, help="path to store the model") parser.add_argument("--tensorboard_port", type=int, default=None, help="port to launch tensorboard on") parser.add_argument("--seed", type=int, default=None, help="random seed for training") parser.add_argument("--config_filename", type=str, default=None, help="path to the configuration file to use") parser.add_argument("--name", type=str, default=None, help="name for the trained model") parser.add_argument( "--save_datetime", type=bool, default=False, help=("whether or not to save a model with the date and time of" " training")) parser.add_argument("--backend", type=str, default="tf", help="the deep learning framework to use") args = parser.parse_args() dataset_dir = args.dataset_dir split_name = args.split_name output_dir = args.output_dir tensorboard_port = args.tensorboard_port seed = args.seed config_filename = args.config_filename name = args.name save_datetime = args.save_datetime backend = args.backend # Set default output dir. if output_dir is None: output_dir = os.path.join(os.path.dirname(os.path.realpath(__file__)), "../models") # Set default config filename. if config_filename is None: config_filename = os.path.join( os.path.dirname(os.path.realpath(__file__)), "..", "cfg/train.yaml") # Turn relative paths absolute. if not os.path.isabs(dataset_dir): dataset_dir = os.path.join(os.getcwd(), dataset_dir) if not os.path.isabs(output_dir): output_dir = os.path.join(os.getcwd(), output_dir) if not os.path.isabs(config_filename): config_filename = os.path.join(os.getcwd(), config_filename) # Create output dir if necessary. utils.mkdir_safe(output_dir) # Open train config. train_config = YamlConfig(config_filename) if seed is not None: train_config["seed"] = seed train_config["gqcnn"]["seed"] = seed if tensorboard_port is not None: train_config["tensorboard_port"] = tensorboard_port gqcnn_params = train_config["gqcnn"] # Create a unique output folder based on the date and time. if save_datetime: # Create output dir. unique_name = time.strftime("%Y%m%d-%H%M%S") output_dir = os.path.join(output_dir, unique_name) utils.mkdir_safe(output_dir) # Set visible devices. if "gpu_list" in train_config: gqcnn_utils.set_cuda_visible_devices(train_config["gpu_list"]) # Train the network. start_time = time.time() gqcnn = get_gqcnn_model(backend)(gqcnn_params) trainer = get_gqcnn_trainer(backend)(gqcnn, dataset_dir, split_name, output_dir, train_config, name=name) trainer.train() logger.info("Total Training Time: " + str(utils.get_elapsed_time(time.time() - start_time)))