Repository: koide3/monocular_person_following Branch: master Commit: ed7f384fe8e5 Files: 45 Total size: 83.6 KB Directory structure: gitextract_ntcc8l0f/ ├── .github/ │ └── workflows/ │ └── build.yml ├── .travis.yml ├── CMakeLists.txt ├── README.md ├── docker/ │ ├── jp45/ │ │ ├── Dockerfile │ │ ├── Dockerfile.ros.noetic │ │ ├── build.sh │ │ ├── ros_entrypoint.sh │ │ └── run.sh │ └── noetic/ │ ├── Dockerfile │ ├── Dockerfile.ros.noetic │ ├── build.sh │ ├── ros_entrypoint.sh │ └── run.sh ├── include/ │ └── monocular_person_following/ │ ├── context.hpp │ ├── state/ │ │ ├── initial_state.hpp │ │ ├── initial_training_state.hpp │ │ ├── reid_state.hpp │ │ ├── state.hpp │ │ └── tracking_state.hpp │ └── tracklet.hpp ├── launch/ │ ├── jetson_person_following.launch │ ├── jetson_person_following_sim.launch │ ├── monocular_person_following.launch │ └── start_robot.launch ├── msg/ │ ├── BoundingBox2D.msg │ ├── FaceDetection.msg │ ├── FaceDetectionArray.msg │ └── Target.msg ├── package.xml ├── rviz/ │ └── monocular_person_following.rviz ├── scripts/ │ ├── eval/ │ │ ├── convert_dataset2bag.py │ │ └── generate_bb.py │ ├── face_detector_node.py │ ├── robot_controller.py │ ├── save_bag.sh │ ├── simple_gesture_recognition.py │ └── visualization.py ├── src/ │ ├── monocular_person_following/ │ │ ├── context.cpp │ │ └── state/ │ │ ├── initial_state.cpp │ │ ├── initial_training_state.cpp │ │ ├── reid_state.cpp │ │ └── tracking_state.cpp │ └── monocular_person_following_node.cpp └── srv/ └── Imprint.srv ================================================ FILE CONTENTS ================================================ ================================================ FILE: .github/workflows/build.yml ================================================ name: Build on: push: branches: [ master ] pull_request: branches: [ master ] # Allows you to run this workflow manually from the Actions tab workflow_dispatch: jobs: build: runs-on: ubuntu-latest strategy: matrix: ROS_DISTRO: [noetic] steps: - uses: actions/checkout@v2 with: submodule: true - name: Docker login uses: docker/login-action@v1 with: username: ${{ secrets.DOCKER_USERNAME }} password: ${{ secrets.DOCKER_TOKEN }} - name: Docker build uses: docker/build-push-action@v2 with: file: ${{github.workspace}}/docker/${{ matrix.ROS_DISTRO }}/Dockerfile context: . push: false ================================================ FILE: .travis.yml ================================================ sudo: required language: generic dist: xenial services: - docker matrix: include: - name: "Xenial kinetic" env: ROS_DISTRO=kinetic - name: "Bionic melodic" env: ROS_DISTRO=melodic script: - docker build -f ./docker/$ROS_DISTRO/Dockerfile . ================================================ FILE: CMakeLists.txt ================================================ cmake_minimum_required(VERSION 2.8.3) project(monocular_person_following) add_compile_options(-std=c++11) find_package(catkin REQUIRED tf rospy roscpp cv_bridge message_filters image_transport eigen_conversions message_generation ccf_person_identification monocular_people_tracking ) ######################## ## service generation ## ######################## add_service_files(FILES Imprint.srv ) ######################## ## message generation ## ######################## add_message_files(FILES BoundingBox2D.msg FaceDetection.msg FaceDetectionArray.msg Target.msg ) generate_messages(DEPENDENCIES std_msgs sensor_msgs) ################################### ## catkin specific configuration ## ################################### catkin_package( # INCLUDE_DIRS include # LIBRARIES monocular_person_following # CATKIN_DEPENDS other_catkin_pkg # DEPENDS system_lib ) ########### ## Build ## ########### include_directories( include ${catkin_INCLUDE_DIRS} ) add_executable(monocular_person_following_node src/monocular_person_following/state/initial_state.cpp src/monocular_person_following/state/initial_training_state.cpp src/monocular_person_following/state/tracking_state.cpp src/monocular_person_following/state/reid_state.cpp src/monocular_person_following/context.cpp src/monocular_person_following_node.cpp ) add_dependencies(monocular_person_following_node ${${PROJECT_NAME}_EXPORTED_TARGETS} ${catkin_EXPORTED_TARGETS}) target_link_libraries(monocular_person_following_node ${catkin_LIBRARIES} ) ================================================ FILE: README.md ================================================ # monocular_person_following This package provides a monocular vision-based person tracking and identification framework for person following robots. It first detects people using *tf-pose-estimation*, and then track them in the robot space with Unscented Kalman filter with the ground plane information. The combination of Convolutional Channel Features and Online Boosting runs on the top of the tracking module to keep tracking the target person with a re-identification capability. The entire system is designed so that it can be run on a Jetson TX2/Xavier, and it can easily be reproduced and reused on a new mobile robot platform. ![system](data/imgs/system.png) [[video]](https://www.youtube.com/watch?v=SsIrXxnOgaQ) [![Codacy Badge](https://api.codacy.com/project/badge/Grade/c7664fce1722461db5ffdc27eae59e9c)](https://www.codacy.com/app/koide3/monocular_person_following?utm_source=github.com&utm_medium=referral&utm_content=koide3/monocular_person_following&utm_campaign=Badge_Grade) [![Build](https://github.com/koide3/monocular_person_following/actions/workflows/build.yml/badge.svg)](https://github.com/koide3/monocular_person_following/actions/workflows/build.yml) ## Dependencies - dlib - flann - tensorflow - tf-pose-estimation ## Quick Test - [Quick test on a sample rosbag (Desktop PC with CUDA >= 10.2)](https://github.com/koide3/monocular_person_following/wiki/Quick-test-with-sample-rosbag) - [Quick test using a webcam (Desktop PC with CUDA >= 10.2 / Jetson Xavier with Jetpack 4.5.1)](https://github.com/koide3/monocular_person_following/wiki/Quick-test-with-USB-cam) ![screenshot](data/imgs/screenshot.jpg) ## Install We tested this package on Xavier/Jetpack 4.5.1 Note: We recommend using docker to avod the painful installation process - [[Install from source (Jetson Xavier with Jetpack 4.5.1)]](https://github.com/koide3/monocular_person_following/wiki/%5BJetpack-4.5%5D-Install-from-source) ## Setup your own person following robot [[Setup your robot]](https://github.com/koide3/monocular_person_following/wiki/Setup-your-own-person-following-robot) ## Related packages - [ccf_person_identification](https://github.com/koide3/ccf_person_identification) - [monocular_people_tracking](https://github.com/koide3/monocular_people_tracking) - [monocular_person_following](https://github.com/koide3/monocular_person_following) ## Papers - Kenji Koide, Jun Miura, and Emanuele Menegatti, Monocular Person Tracking and Identification with Online Deep Feature Selection for Person Following Robots, Robotics and Autonomous Systems [[link]](https://www.researchgate.net/publication/336871285_Monocular_Person_Tracking_and_Identification_with_On-line_Deep_Feature_Selection_for_Person_Following_Robots). - Kenji Koide and Jun Miura, Convolutional Channel Features-based Person Identification for Person Following Robots, 15th International Conference IAS-15, Baden-Baden, Germany, 2018 [[link]](https://www.researchgate.net/publication/325854919_Convolutional_Channel_Features-Based_Person_Identification_for_Person_Following_Robots_Proceedings_of_the_15th_International_Conference_IAS-15). ================================================ FILE: docker/jp45/Dockerfile ================================================ FROM koide3/jetson-ros-noetic-desktop:jp45 # editors RUN apt-get update && \ apt-get install -y --no-install-recommends \ vim nano tmux \ && rm -rf /var/lib/apt/lists/* # flann & openblas RUN apt-get update && \ apt-get install -y --no-install-recommends \ libflann-dev libopenblas-base libopenblas-dev \ && rm -rf /var/lib/apt/lists/* # tensorflow RUN apt-get update && \ apt-get install -y --no-install-recommends \ libhdf5-serial-dev hdf5-tools libhdf5-dev zlib1g-dev \ zip libjpeg8-dev liblapack-dev libblas-dev gfortran \ python3-pip \ && rm -rf /var/lib/apt/lists/* RUN pip3 install -U pip testresources setuptools RUN pip3 install --pre --extra-index-url https://developer.download.nvidia.com/compute/redist/jp/v45 "tensorflow<2" # tf-pose-estimation RUN apt-get update && \ apt-get install -y --no-install-recommends \ llvm-10 llvm-10-dev swig \ && rm -rf /var/lib/apt/lists/* RUN update-alternatives --install /usr/bin/llvm-config llvm-config /usr/bin/llvm-config-10 50 RUN pip3 install numba==0.45.0 WORKDIR /root/catkin_ws/src RUN git clone https://github.com/koide3/tf-pose-estimation WORKDIR /root/catkin_ws/src/tf-pose-estimation RUN pip3 install -r requirements.txt WORKDIR /root/catkin_ws/src/tf-pose-estimation/tf_pose/pafprocess RUN swig -python -c++ pafprocess.i && python3 setup.py build_ext --inplace WORKDIR /root/catkin_ws/src/tf-pose-estimation/models/graph/cmu RUN bash download.sh WORKDIR /root/catkin_ws/src/tf-pose-estimation RUN pip3 install -U matplotlib RUN python3 setup.py install # dlib WORKDIR /root RUN wget http://dlib.net/files/dlib-19.22.tar.bz2 RUN tar xvf dlib-19.22.tar.bz2 ENV DLIB_ROOT="/root/dlib-19.22" # other ros packages WORKDIR /root/catkin_ws/src RUN git clone https://github.com/ros-drivers/usb_cam.git RUN git clone https://github.com/ros-perception/image_transport_plugins.git RUN git clone https://github.com/ros-perception/vision_opencv.git RUN git clone https://github.com/ros-perception/image_common.git RUN git clone https://github.com/ros-perception/image_pipeline.git RUN touch /root/catkin_ws/src/image_transport_plugins/theora_image_transport/CATKIN_IGNORE RUN touch /root/catkin_ws/src/image_pipeline/stereo_image_proc/CATKIN_IGNORE WORKDIR /root/catkin_ws RUN /bin/bash -c '. /opt/ros/noetic/setup.bash; catkin_make -DCMAKE_BUILD_TYPE=Release' # WORKDIR /root/catkin_ws/src # RUN https://github.com/ros/geometry.git # WORKDIR /root/catkin_ws # RUN /bin/bash -c '. /opt/ros/noetic/setup.bash; catkin_make -DCMAKE_BUILD_TYPE=Release' # monocular_person_following WORKDIR /root/catkin_ws/src RUN git clone https://github.com/koide3/open_face_recognition.git RUN git clone https://github.com/koide3/ccf_person_identification.git RUN git clone https://github.com/koide3/monocular_people_tracking.git --recursive RUN git clone https://github.com/koide3/monocular_person_following.git WORKDIR /root/catkin_ws RUN /bin/bash -c '. /opt/ros/noetic/setup.bash; catkin_make -DCMAKE_BUILD_TYPE=Release' RUN sed -i "7i source \"/root/catkin_ws/devel/setup.bash\"" /ros_entrypoint.sh WORKDIR /root/catkin_ws/src ENTRYPOINT ["/ros_entrypoint.sh"] CMD ["bash"] ================================================ FILE: docker/jp45/Dockerfile.ros.noetic ================================================ # original dockerfile can be found at: https://github.com/dusty-nv/jetson-containers # # this dockerfile roughly follows the 'Installing from source' from: # http://wiki.ros.org/noetic/Installation/Source # ARG BASE_IMAGE=nvcr.io/nvidia/l4t-base:r32.5.0 FROM ${BASE_IMAGE} ARG ROS_PKG=desktop ENV ROS_DISTRO=noetic ENV ROS_ROOT=/opt/ros/${ROS_DISTRO} ENV ROS_PYTHON_VERSION=3 ENV DEBIAN_FRONTEND=noninteractive WORKDIR /workspace # add the ROS deb repo to the apt sources list RUN apt-get update && \ apt-get install -y --no-install-recommends \ git \ cmake \ build-essential \ curl \ wget \ gnupg2 \ lsb-release \ ca-certificates \ && rm -rf /var/lib/apt/lists/* RUN sh -c 'echo "deb http://packages.ros.org/ros/ubuntu $(lsb_release -sc) main" > /etc/apt/sources.list.d/ros-latest.list' RUN curl -s https://raw.githubusercontent.com/ros/rosdistro/master/ros.asc | apt-key add - # install bootstrap dependencies RUN apt-get update && \ apt-get install -y --no-install-recommends \ libpython3-dev \ python3-rosdep \ python3-rosinstall-generator \ python3-vcstool \ build-essential && \ rosdep init && \ rosdep update && \ rm -rf /var/lib/apt/lists/* RUN ln -s /usr/lib/aarch64-linux-gnu/libboost_python3.a /usr/lib/aarch64-linux-gnu/libboost_python37.a RUN ln -s /usr/lib/aarch64-linux-gnu/libboost_python3.so /usr/lib/aarch64-linux-gnu/libboost_python37.so # download/build the ROS source RUN mkdir ros_catkin_ws && \ cd ros_catkin_ws && \ rosinstall_generator ${ROS_PKG} vision_msgs --rosdistro ${ROS_DISTRO} --deps --tar > ${ROS_DISTRO}-${ROS_PKG}.rosinstall && \ mkdir src && \ vcs import --input ${ROS_DISTRO}-${ROS_PKG}.rosinstall ./src && \ apt-get update && \ rosdep install --from-paths ./src --ignore-packages-from-source --rosdistro ${ROS_DISTRO} --skip-keys python3-pykdl -y && \ python3 ./src/catkin/bin/catkin_make_isolated --install --install-space ${ROS_ROOT} -DCMAKE_BUILD_TYPE=Release && \ rm -rf /var/lib/apt/lists/* RUN rm -rf /workspace # setup entrypoint COPY ros_entrypoint.sh /ros_entrypoint.sh RUN echo 'source /opt/ros/${ROS_DISTRO}/setup.bash' >> /root/.bashrc ENTRYPOINT ["/ros_entrypoint.sh"] CMD ["bash"] WORKDIR / ================================================ FILE: docker/jp45/build.sh ================================================ #!/bin/bash # sudo docker build --tag koide3/jetson-ros-noetic-desktop:jp45 -f Dockerfile.ros.noetic . sudo docker build --tag koide3/monocular_person_following:jp45 -f Dockerfile . ================================================ FILE: docker/jp45/ros_entrypoint.sh ================================================ #!/bin/bash set -e ros_env_setup="/opt/ros/$ROS_DISTRO/setup.bash" echo "sourcing $ros_env_setup" source "$ros_env_setup" echo "ROS_ROOT $ROS_ROOT" echo "ROS_DISTRO $ROS_DISTRO" exec "$@" ================================================ FILE: docker/jp45/run.sh ================================================ #/bin/bash sudo docker run -it --rm \ --net host \ --gpus all \ --device /dev/video0:/dev/video0 \ -v $(realpath ~/.ros/camera_info):/root/.ros/camera_info \ koide3/monocular_person_following:jp45 $@ ================================================ FILE: docker/noetic/Dockerfile ================================================ FROM koide3/jetson-ros-noetic-desktop:noetic RUN apt-get update && \ apt-get install -y --no-install-recommends \ # editors vim nano tmux \ # flann & openblas libflann-dev libopenblas-base libopenblas-dev \ # tf-pose-estimation llvm-10 llvm-10-dev swig \ && rm -rf /var/lib/apt/lists/* RUN update-alternatives --install /usr/bin/llvm-config llvm-config /usr/bin/llvm-config-10 50 WORKDIR /root/catkin_ws/src RUN git clone https://github.com/koide3/tf-pose-estimation WORKDIR /root/catkin_ws/src/tf-pose-estimation RUN pip3 install numba==0.45.0 RUN pip3 install -r requirements.txt WORKDIR /root/catkin_ws/src/tf-pose-estimation/tf_pose/pafprocess RUN swig -python -c++ pafprocess.i && python3 setup.py build_ext --inplace WORKDIR /root/catkin_ws/src/tf-pose-estimation/models/graph/cmu RUN bash download.sh WORKDIR /root/catkin_ws/src/tf-pose-estimation RUN pip3 install -U matplotlib RUN python3 setup.py install # dlib WORKDIR /root RUN wget http://dlib.net/files/dlib-19.22.tar.bz2 RUN tar xvf dlib-19.22.tar.bz2 ENV DLIB_ROOT="/root/dlib-19.22" # other ros packages WORKDIR /root/catkin_ws/src RUN git clone https://github.com/ros-drivers/usb_cam.git RUN git clone https://github.com/ros-perception/image_transport_plugins.git RUN git clone https://github.com/ros-perception/vision_opencv.git RUN git clone https://github.com/ros-perception/image_common.git RUN git clone https://github.com/ros-perception/image_pipeline.git RUN touch /root/catkin_ws/src/image_transport_plugins/theora_image_transport/CATKIN_IGNORE RUN touch /root/catkin_ws/src/image_pipeline/stereo_image_proc/CATKIN_IGNORE WORKDIR /root/catkin_ws RUN /bin/bash -c '. /opt/ros/noetic/setup.bash; catkin_make -DCMAKE_BUILD_TYPE=Release' # monocular_person_following WORKDIR /root/catkin_ws/src RUN git clone https://github.com/koide3/open_face_recognition.git RUN git clone https://github.com/koide3/ccf_person_identification.git RUN git clone https://github.com/koide3/monocular_people_tracking.git --recursive COPY . /root/catkin_ws/monocular_person_following WORKDIR /root/catkin_ws RUN /bin/bash -c '. /opt/ros/noetic/setup.bash; catkin_make -DCMAKE_BUILD_TYPE=Release' RUN sed -i "7i source \"/root/catkin_ws/devel/setup.bash\"" /ros_entrypoint.sh WORKDIR /root/catkin_ws/src ENTRYPOINT ["/ros_entrypoint.sh"] CMD ["bash"] ================================================ FILE: docker/noetic/Dockerfile.ros.noetic ================================================ # original dockerfile can be found at: https://github.com/dusty-nv/jetson-containers # # this dockerfile roughly follows the 'Installing from source' from: # http://wiki.ros.org/noetic/Installation/Source # FROM tensorflow/tensorflow:1.15.5-gpu-py3 ARG ROS_PKG=desktop ENV ROS_DISTRO=noetic ENV ROS_ROOT=/opt/ros/${ROS_DISTRO} ENV ROS_PYTHON_VERSION=3 ENV DEBIAN_FRONTEND=noninteractive WORKDIR /workspace # add the ROS deb repo to the apt sources list RUN apt-get update && \ apt-get install -y --no-install-recommends \ git cmake build-essential curl wget \ gnupg2 lsb-release ca-certificates \ && rm -rf /var/lib/apt/lists/* RUN sh -c 'echo "deb http://packages.ros.org/ros/ubuntu $(lsb_release -sc) main" > /etc/apt/sources.list.d/ros-latest.list' RUN curl -s https://raw.githubusercontent.com/ros/rosdistro/master/ros.asc | apt-key add - # install bootstrap dependencies RUN apt-get update && \ apt-get install -y --no-install-recommends \ libpython3-dev python3-rosdep python3-rosinstall-generator \ python3-vcstool build-essential && \ rosdep init && \ rosdep update && \ rm -rf /var/lib/apt/lists/* RUN ln -s /usr/lib/x86_64-linux-gnu/libboost_python3.a /usr/lib/x86_64-linux-gnu/libboost_python37.a RUN ln -s /usr/lib/x86_64-linux-gnu/libboost_python3.so /usr/lib/x86_64-linux-gnu/libboost_python37.so # download/build the ROS source RUN mkdir ros_catkin_ws && \ cd ros_catkin_ws && \ rosinstall_generator ${ROS_PKG} vision_msgs --rosdistro ${ROS_DISTRO} --deps --tar > ${ROS_DISTRO}-${ROS_PKG}.rosinstall && \ mkdir src && \ vcs import --input ${ROS_DISTRO}-${ROS_PKG}.rosinstall ./src && \ apt-get update && \ rosdep install --from-paths ./src --ignore-packages-from-source --rosdistro ${ROS_DISTRO} --skip-keys python3-pykdl -y && \ python ./src/catkin/bin/catkin_make_isolated --install --install-space ${ROS_ROOT} -DCMAKE_BUILD_TYPE=Release && \ rm -rf /var/lib/apt/lists/* && \ rm -rf /workspace # setup entrypoint COPY ros_entrypoint.sh /ros_entrypoint.sh RUN echo 'source /opt/ros/${ROS_DISTRO}/setup.bash' >> /root/.bashrc ENTRYPOINT ["/ros_entrypoint.sh"] CMD ["bash"] WORKDIR / ================================================ FILE: docker/noetic/build.sh ================================================ #!/bin/bash # sudo docker build --tag koide3/jetson-ros-noetic-desktop:noetic -f Dockerfile.ros.noetic . sudo docker build --tag koide3/monocular_person_following:noetic -f Dockerfile . ================================================ FILE: docker/noetic/ros_entrypoint.sh ================================================ #!/bin/bash set -e ros_env_setup="/opt/ros/$ROS_DISTRO/setup.bash" echo "sourcing $ros_env_setup" source "$ros_env_setup" echo "ROS_ROOT $ROS_ROOT" echo "ROS_DISTRO $ROS_DISTRO" exec "$@" ================================================ FILE: docker/noetic/run.sh ================================================ #/bin/bash sudo docker run -it --rm \ --net host \ --gpus all \ --device /dev/video0:/dev/video0 \ -v $(realpath ~/.ros/camera_info):/root/.ros/camera_info \ koide3/monocular_person_following:noetic $@ ================================================ FILE: include/monocular_person_following/context.hpp ================================================ #ifndef MONOCULAR_PERSON_FOLLOWING_CONTEXT_HPP #define MONOCULAR_PERSON_FOLLOWING_CONTEXT_HPP #include #include #include #include #include #include namespace ccf_person_classifier { class PersonInput; class PersonFeatures; class PersonClassifier; } namespace monocular_person_following { class Context { public: using PersonInput = ccf_person_classifier::PersonInput; using PersonFeatures = ccf_person_classifier::PersonFeatures; using PersonClassifier = ccf_person_classifier::PersonClassifier; Context(ros::NodeHandle& nh); ~Context(); public: void extract_features(const cv::Mat& bgr_image, std::unordered_map& tracks); bool update_classifier(double label, const Tracklet::Ptr& track); boost::optional predict(const Tracklet::Ptr& track); std::vector classifier_names() const; cv::Mat visualize_body_features(); private: std::mt19937 mt; std::unique_ptr classifier; std::unordered_map> classifier_confidences; boost::circular_buffer> pos_feature_bank; boost::circular_buffer> neg_feature_bank; }; } #endif ================================================ FILE: include/monocular_person_following/state/initial_state.hpp ================================================ #ifndef MONOCULAR_PERSON_FOLLOWING_INITIAL_STATE_HPP #define MONOCULAR_PERSON_FOLLOWING_INITIAL_STATE_HPP #include namespace monocular_person_following { class InitialState : public State { public: InitialState() {} virtual ~InitialState() override {} virtual std::string state_name() const override { return "initial"; } virtual State* update(ros::NodeHandle& nh, Context& context, const std::unordered_map& tracks) override; private: long select_target(ros::NodeHandle& nh, const std::unordered_map& tracks); }; } #endif // INITIAL_STATE_HPP ================================================ FILE: include/monocular_person_following/state/initial_training_state.hpp ================================================ #ifndef MONOCULAR_PERSON_FOLLOWING_INITIAL_TRAINING_STATE_HPP #define MONOCULAR_PERSON_FOLLOWING_INITIAL_TRAINING_STATE_HPP #include namespace monocular_person_following { class InitialTrainingState : public State { public: InitialTrainingState(long target_id) : target_id(target_id), num_pos_samples(0) {} virtual ~InitialTrainingState() override {} virtual long target() const override { return target_id; } virtual std::string state_name() const override { return "initial_training"; } virtual State* update(ros::NodeHandle& nh, Context& context, const std::unordered_map& tracks) override; private: long target_id; long num_pos_samples; }; } #endif // INITIAL_TRAINING_STATE_HPP ================================================ FILE: include/monocular_person_following/state/reid_state.hpp ================================================ #ifndef REID_STATE_HPP #define REID_STATE_HPP #include namespace monocular_person_following { class ReidState : public State { public: ReidState() {} virtual ~ReidState() override {} virtual std::string state_name() const override { return "re-identification"; } virtual State* update(ros::NodeHandle& nh, Context& context, const std::unordered_map& tracks) override; private: std::unordered_map positive_count; }; } #endif // REID_STATE_HPP ================================================ FILE: include/monocular_person_following/state/state.hpp ================================================ #ifndef MONOCULAR_PERSON_FOLLOWING_STATE_HPP #define MONOCULAR_PERSON_FOLLOWING_STATE_HPP #include namespace monocular_person_following { class State { public: State() {} virtual ~State() {} virtual long target() const { return -1; } virtual std::string state_name() const = 0; virtual State* update(ros::NodeHandle& nh, Context& context, const std::unordered_map& tracks) = 0; private: }; } #endif ================================================ FILE: include/monocular_person_following/state/tracking_state.hpp ================================================ #ifndef TRACKING_STATE_HPP #define TRACKING_STATE_HPP #include namespace monocular_person_following { class TrackingState : public State { public: TrackingState(long target_id) : target_id(target_id) {} virtual ~TrackingState() override {} virtual long target() const override { return target_id; } virtual std::string state_name() const override { return "tracking"; } virtual State* update(ros::NodeHandle& nh, Context& context, const std::unordered_map& tracks) override; private: long target_id; }; } #endif // TRACKING_STATE_HPP ================================================ FILE: include/monocular_person_following/tracklet.hpp ================================================ #ifndef TRACKLET_HPP #define TRACKLET_HPP #include #include #include #include #include #include #include namespace monocular_person_following { struct Tracklet { public: EIGEN_MAKE_ALIGNED_OPERATOR_NEW using Ptr = std::shared_ptr; Tracklet(tf::TransformListener& tf_listener, const std_msgs::Header& header, const monocular_people_tracking::Track& track_msg) : confidence(boost::none), track_msg(&track_msg) { geometry_msgs::PointStamped point; point.header = header; point.point = track_msg.pos; geometry_msgs::PointStamped transformed; tf_listener.transformPoint("base_link", point, transformed); pos_in_baselink = Eigen::Vector2f(transformed.point.x, transformed.point.y); } public: boost::optional confidence; std::vector classifier_confidences; cv::Mat face_image; boost::optional person_region; ccf_person_classifier::Input::Ptr input; ccf_person_classifier::Features::Ptr features; Eigen::Vector2f pos_in_baselink; const monocular_people_tracking::Track* track_msg; }; } #endif // TRACKLET_HPP ================================================ FILE: launch/jetson_person_following.launch ================================================ ================================================ FILE: launch/jetson_person_following_sim.launch ================================================ ================================================ FILE: launch/monocular_person_following.launch ================================================ ================================================ FILE: launch/start_robot.launch ================================================ ================================================ FILE: msg/BoundingBox2D.msg ================================================ int32 x int32 y int32 width int32 height ================================================ FILE: msg/FaceDetection.msg ================================================ int64 track_id BoundingBox2D[] face_roi BoundingBox2D[] face_region sensor_msgs/Image[] face_image ================================================ FILE: msg/FaceDetectionArray.msg ================================================ std_msgs/Header header uint32 image_width uint32 image_height FaceDetection[] faces ================================================ FILE: msg/Target.msg ================================================ std_msgs/Header header std_msgs/String state int64 target_id int64[] track_ids float32[] confidences std_msgs/String[] classifier_names float32[] classifier_confidences ================================================ FILE: package.xml ================================================ monocular_person_following 0.0.0 The monocular_person_following package koide BSD catkin tf rospy roscpp cv_bridge image_transport message_filters message_generation eigen_conversions ccf_person_identification monocular_people_tracking ================================================ FILE: rviz/monocular_person_following.rviz ================================================ Panels: - Class: rviz/Displays Help Height: 84 Name: Displays Property Tree Widget: Expanded: - /Global Options1 - /TF1/Frames1 Splitter Ratio: 0.5 Tree Height: 100 - Class: rviz/Selection Name: Selection - Class: rviz/Tool Properties Expanded: - /2D Pose Estimate1 - /2D Nav Goal1 - /Publish Point1 Name: Tool Properties Splitter Ratio: 0.5886790156364441 - Class: rviz/Views Expanded: - /Current View1 Name: Views Splitter Ratio: 0.5 - Class: rviz/Time Experimental: false Name: Time SyncMode: 0 SyncSource: "" Preferences: PromptSaveOnExit: true Visualization Manager: Class: "" Displays: - Alpha: 0.5 Cell Size: 1 Class: rviz/Grid Color: 160; 160; 164 Enabled: true Line Style: Line Width: 0.029999999329447746 Value: Lines Name: Grid Normal Cell Count: 0 Offset: X: 0 Y: 0 Z: 0 Plane: XY Plane Cell Count: 10 Reference Frame: Value: true - Alpha: 1 Autocompute Intensity Bounds: true Autocompute Value Bounds: Max Value: 10 Min Value: -10 Value: true Axis: Z Channel Name: intensity Class: rviz/LaserScan Color: 255; 255; 255 Color Transformer: Intensity Decay Time: 0 Enabled: false Invert Rainbow: false Max Color: 255; 255; 255 Max Intensity: 13068 Min Color: 0; 0; 0 Min Intensity: 0 Name: LaserScan Position Transformer: XYZ Queue Size: 10 Selectable: true Size (Pixels): 3 Size (m): 0.009999999776482582 Style: Flat Squares Topic: /scan_tf Unreliable: false Use Fixed Frame: true Use rainbow: true Value: false - Alpha: 1 Autocompute Intensity Bounds: true Autocompute Value Bounds: Max Value: 10 Min Value: -10 Value: true Axis: Z Channel Name: intensity Class: rviz/LaserScan Color: 255; 255; 255 Color Transformer: Intensity Decay Time: 0 Enabled: false Invert Rainbow: false Max Color: 255; 255; 255 Max Intensity: 12958 Min Color: 0; 0; 0 Min Intensity: 0 Name: LaserScan Position Transformer: XYZ Queue Size: 10 Selectable: true Size (Pixels): 3 Size (m): 0.009999999776482582 Style: Flat Squares Topic: /scan_tr Unreliable: false Use Fixed Frame: true Use rainbow: true Value: false - Alpha: 1 Autocompute Intensity Bounds: true Autocompute Value Bounds: Max Value: 10 Min Value: -10 Value: true Axis: Z Channel Name: intensity Class: rviz/LaserScan Color: 255; 255; 255 Color Transformer: Intensity Decay Time: 0 Enabled: false Invert Rainbow: false Max Color: 255; 255; 255 Max Intensity: 15634 Min Color: 0; 0; 0 Min Intensity: 0 Name: LaserScan Position Transformer: XYZ Queue Size: 10 Selectable: true Size (Pixels): 3 Size (m): 0.009999999776482582 Style: Flat Squares Topic: /scan_bf Unreliable: false Use Fixed Frame: true Use rainbow: true Value: false - Alpha: 1 Autocompute Intensity Bounds: true Autocompute Value Bounds: Max Value: 10 Min Value: -10 Value: true Axis: Z Channel Name: intensity Class: rviz/LaserScan Color: 255; 255; 255 Color Transformer: Intensity Decay Time: 0 Enabled: false Invert Rainbow: false Max Color: 255; 255; 255 Max Intensity: 11609 Min Color: 0; 0; 0 Min Intensity: 0 Name: LaserScan Position Transformer: XYZ Queue Size: 10 Selectable: true Size (Pixels): 3 Size (m): 0.009999999776482582 Style: Flat Squares Topic: /scan_br Unreliable: false Use Fixed Frame: true Use rainbow: true Value: false - Class: rviz/TF Enabled: true Frame Timeout: 15 Frames: All Enabled: false Marker Scale: 1 Name: TF Show Arrows: true Show Axes: true Show Names: true Tree: {} Update Interval: 0 Value: true - Alpha: 1 Autocompute Intensity Bounds: true Autocompute Value Bounds: Max Value: 10 Min Value: -10 Value: true Axis: Z Channel Name: intensity Class: rviz/LaserScan Color: 255; 255; 255 Color Transformer: Intensity Decay Time: 0 Enabled: true Invert Rainbow: false Max Color: 255; 255; 255 Max Intensity: 4101 Min Color: 0; 0; 0 Min Intensity: 0 Name: LaserScan Position Transformer: XYZ Queue Size: 10 Selectable: true Size (Pixels): 3 Size (m): 0.009999999776482582 Style: Flat Squares Topic: /scan_bottom Unreliable: false Use Fixed Frame: true Use rainbow: true Value: true - Alpha: 1 Autocompute Intensity Bounds: true Autocompute Value Bounds: Max Value: 10 Min Value: -10 Value: true Axis: Z Channel Name: intensity Class: rviz/LaserScan Color: 255; 255; 255 Color Transformer: Intensity Decay Time: 0 Enabled: true Invert Rainbow: false Max Color: 255; 255; 255 Max Intensity: 11846 Min Color: 0; 0; 0 Min Intensity: 0 Name: LaserScan Position Transformer: XYZ Queue Size: 10 Selectable: true Size (Pixels): 3 Size (m): 0.009999999776482582 Style: Flat Squares Topic: /scan_top Unreliable: false Use Fixed Frame: true Use rainbow: true Value: true - Alpha: 1 Autocompute Intensity Bounds: true Autocompute Value Bounds: Max Value: 10 Min Value: -10 Value: true Axis: Z Channel Name: intensity Class: rviz/LaserScan Color: 255; 255; 255 Color Transformer: Intensity Decay Time: 0 Enabled: true Invert Rainbow: false Max Color: 255; 255; 255 Max Intensity: 892 Min Color: 0; 0; 0 Min Intensity: 148 Name: LaserScan Position Transformer: XYZ Queue Size: 10 Selectable: true Size (Pixels): 3 Size (m): 0.009999999776482582 Style: Flat Squares Topic: /scan Unreliable: false Use Fixed Frame: true Use rainbow: true Value: true - Class: rviz/MarkerArray Enabled: true Marker Topic: /monocular_people_tracking/markers Name: MarkerArray Namespaces: {} Queue Size: 100 Value: true - Class: rviz/Image Enabled: true Image Topic: /monocular_person_following/features Max Value: 1 Median window: 5 Min Value: 0 Name: Image Normalize Range: true Queue Size: 2 Transport Hint: raw Unreliable: false Value: true - Class: rviz/Image Enabled: true Image Topic: /visualization_node/visualize Max Value: 1 Median window: 5 Min Value: 0 Name: Image Normalize Range: true Queue Size: 2 Transport Hint: raw Unreliable: false Value: true Enabled: true Global Options: Background Color: 48; 48; 48 Default Light: true Fixed Frame: base_link Frame Rate: 30 Name: root Tools: - Class: rviz/Interact Hide Inactive Objects: true - Class: rviz/MoveCamera - Class: rviz/Select - Class: rviz/FocusCamera - Class: rviz/Measure - Class: rviz/SetInitialPose Topic: /initialpose - Class: rviz/SetGoal Topic: /move_base_simple/goal - Class: rviz/PublishPoint Single click: true Topic: /clicked_point Value: true Views: Current: Class: rviz/Orbit Distance: 12.880599975585938 Enable Stereo Rendering: Stereo Eye Separation: 0.05999999865889549 Stereo Focal Distance: 1 Swap Stereo Eyes: false Value: false Focal Point: X: 1.4971981048583984 Y: -0.11331728100776672 Z: -1.1117677688598633 Focal Shape Fixed Size: true Focal Shape Size: 0.05000000074505806 Invert Z Axis: false Name: Current View Near Clip Distance: 0.009999999776482582 Pitch: 0.9347967505455017 Target Frame: Value: Orbit (rviz) Yaw: 2.420490026473999 Saved: ~ Window Geometry: "&Displays": collapsed: false "&Image": collapsed: false "&Time": collapsed: false "&Views": collapsed: false Height: 1021 Hide Left Dock: false Hide Right Dock: false I&mage: collapsed: false QMainWindow State: 000000ff00000000fd0000000400000000000003e800000345fc0200000012fb0000001200530065006c0065006300740069006f006e00000001e10000009b0000007601000003fb0000001e0054006f006f006c002000500072006f007000650072007400690065007302000001ed000001df00000185000000a3fb000000120056006900650077007300200054006f006f02000001df000002110000018500000122fb000000200054006f006f006c002000500072006f0070006500720074006900650073003203000002880000011d000002210000017afb000000100044006900730070006c006100790073010000004400000105000000f701000003fb0000002000730065006c0065006300740069006f006e00200062007500660066006500720200000138000000aa0000023a00000294fb00000014005700690064006500530074006500720065006f02000000e6000000d2000003ee0000030bfb0000000c004b0069006e0065006300740200000186000001060000030c00000261fb0000000a0049006d006100670065010000014a0000018a0000002201000003fb0000000a0049006d00610067006501000002d5000000b40000002201000003fb0000000a0049006d006100670065000000022e000000cc0000000000000000fb0000000a0049006d00610067006500000001c3000000e90000000000000000fb0000000a0049006d0061006700650100000245000001530000000000000000fb0000000a0049006d0061006700650000000238000001600000000000000000fb0000000a0049006d006100670065010000016c000001890000000000000000fb0000000a0049006d00610067006501000002f8000000a00000000000000000fb0000000a0049006d00610067006501000001120000028d0000000000000000fb0000000a0049006d006100670065010000029e000000eb0000000000000000000000010000011000000345fc0200000003fb0000001e0054006f006f006c002000500072006f00700065007200740069006500730100000041000000780000000000000000fc0000004400000345000000d000fffffffa000000010100000002fb0000000a0049006d0061006700650000000000ffffffff0000000000000000fb0000000a0056006900650077007301000006300000010f0000011001000003fb0000001200530065006c0065006300740069006f006e010000025a000000b200000000000000000000000200000490000000a9fc0100000001fb0000000a00560069006500770073030000004e00000080000002e100000197000000030000073f0000004cfc0100000002fb0000000800540069006d006501000000000000073f000002ad01000003fb0000000800540069006d00650100000000000004500000000000000000000002450000034500000004000000040000000800000008fc0000000100000002000000010000000a0054006f006f006c00730100000000ffffffff0000000000000000 Selection: collapsed: false Tool Properties: collapsed: false Width: 1855 X: 57 Y: 0 ================================================ FILE: scripts/eval/convert_dataset2bag.py ================================================ #!/usr/bin/python import os import sys import cv2 import numpy import rospy import rosbag from sensor_msgs.msg import * def main(): input_dir = sys.argv[1] output_bag = sys.argv[2] camera_info = CameraInfo() camera_info.K = [300, 0, 320, 0, 300, 240, 0, 0, 1] camera_info.D = [0, 0, 0, 0] image_files = sorted([input_dir + '/' + x for x in os.listdir(input_dir) if '.jpg' in x]) with rosbag.Bag(output_bag, 'w') as dst_bag: stamp = rospy.Time(0) for image_file in image_files: print image_file image = cv2.imread(image_file) if image is None: print 'failed to read image...' continue stamp += rospy.Duration(1.0 / 14.2) compressed_msg = CompressedImage() compressed_msg.header.stamp = stamp compressed_msg.header.frame_id = 'camera_optical_frame' camera_info.header = compressed_msg.header ret, encoded = cv2.imencode('.jpg', image) buf = numpy.vstack([encoded]).tostring() compressed_msg.data = buf dst_bag.write('/top_front_camera/camera_info', camera_info, stamp) dst_bag.write('/top_front_camera/image_raw/compressed', compressed_msg, stamp) if rospy.is_shutdown(): break if __name__ == '__main__': main() ================================================ FILE: scripts/eval/generate_bb.py ================================================ #!/usr/bin/python import sys import numpy import rosbag def main(): images_bag = sys.argv[1] results_bag = images_bag[:images_bag.find('.bag')] + '_results.bag' dst_file = sys.argv[2] 'read image timestamps' image_stamps = [] with rosbag.Bag(images_bag, 'r') as bag: for topic, msg, stamp in bag.read_messages(): if 'compressed' not in topic: continue image_stamps.append(msg.header.stamp) timestamp_map = {} for i, stamp in enumerate(image_stamps): timestamp_map[stamp] = i init_bbox = None with rosbag.Bag(results_bag, 'r') as bag: print 'read results' all_msgs = sorted([x for x in bag.read_messages()], key=lambda x: x[2]) target_id = -1 bbs = {} for topic, msg, stamp in all_msgs: if 'target' in topic: target_id = msg.target_id if 'tracks' in topic: target = [x for x in msg.tracks if x.id == target_id] if not len(target): continue if False: neck_ankle = target[0].expected_measurement_mean neck = numpy.float32(neck_ankle[:2]) ankle = numpy.float32(neck_ankle[2:]) else: if not len(target[0].associated_neck_ankle): continue neck_ankle = target[0].associated_neck_ankle neck = numpy.float32([neck_ankle[0].x, neck_ankle[0].y]) ankle = numpy.float32([neck_ankle[1].x, neck_ankle[1].y]) center = (neck + ankle) / 2.0 height = (ankle[1] - neck[1]) width = height * 0.25 center[1] -= height * 0.1 height = height * 1.1 if width > 1e6 or height > 1e6: continue tl = numpy.int32((center - (width / 2, height / 2))) wh = numpy.int32((width, height)) image_id = timestamp_map[msg.header.stamp] bbs[image_id] = tl.tolist() + wh.tolist() if init_bbox is None: init_bbox = tl.tolist() + wh.tolist() print init_bbox with open(dst_file, 'w') as file: bbox = init_bbox for i, timestamp in enumerate(image_stamps): if i in bbs: bbox = bbs[i] print >> sys.stdout, '%d\t%d\t%d\t%d\t%d' % tuple([i] + bbox) print >> file, '%d\t%d\t%d\t%d\t%d' % tuple([i] + bbox) if __name__ == '__main__': main() ================================================ FILE: scripts/face_detector_node.py ================================================ #!/usr/bin/python import cv2 import numpy import dlib import rospy import cv_bridge import message_filters from tfpose_ros.msg import * from sensor_msgs.msg import * from monocular_people_tracking.msg import * from monocular_person_following.msg import * class FaceDetectorNode: def __init__(self): self.cv_bridge = cv_bridge.CvBridge() self.face_detector = dlib.get_frontal_face_detector() self.roi_scale = rospy.get_param('~roi_scale', 1.0) self.face_detection_upscale_pyramid = rospy.get_param('~face_detection_upscale_pyramid', 1) self.confidence_thresh = rospy.get_param('~confidence_thresh', -0.5) self.face_expantion_scale = rospy.get_param('~face_expantion_scale', 1.5) subs = [ message_filters.Subscriber('image_rect', Image), message_filters.Subscriber('/monocular_people_tracking/tracks', TrackArray) ] self.sync = message_filters.TimeSynchronizer(subs, 30) self.sync.registerCallback(self.callback) self.faces_pub = rospy.Publisher('~faces', FaceDetectionArray, queue_size=5) def callback(self, image_msg, tracks_msg): image = self.cv_bridge.imgmsg_to_cv2(image_msg, 'bgr8') faces_msg = FaceDetectionArray() faces_msg.header = image_msg.header faces_msg.image_width = image_msg.width faces_msg.image_height = image_msg.height for person in tracks_msg.tracks: face_msg = FaceDetection() face_msg.track_id = person.id faces_msg.faces.append(face_msg) roi = self.calc_roi(image.shape[1], image.shape[0], person) if roi is None: continue face_msg.face_roi.append(self.tlbr2bb(roi[0], roi[1])) face = self.detect_face(image, roi) if face is None: continue face_msg.face_region.append(self.tlbr2bb(face[0], face[1])) face_image = self.extract_roi(image, face[0], face[1]) face_msg.face_image.append(self.cv_bridge.cv2_to_imgmsg(face_image, 'bgr8')) self.faces_pub.publish(faces_msg) def calc_roi(self, width, height, track_msg): if not len(track_msg.associated): return None pose = track_msg.associated[0] head = [x for x in pose.body_part if x.part_id == 0] neck = [x for x in pose.body_part if x.part_id == 1] if not len(head) or not len(neck): return None image_size = numpy.float32([width, height]) head = numpy.float32([head[0].x, head[0].y]) * image_size neck = numpy.float32([neck[0].x, neck[0].y]) * image_size neck2head = (neck[1] - head[1]) tl = head - neck2head * self.roi_scale br = head + neck2head * self.roi_scale tl = max(0, min(width, tl[0])), max(0, min(height, tl[1])) br = max(0, min(width, br[0])), max(0, min(height, br[1])) return tuple(numpy.int32(tl)), tuple(numpy.int32(br)) def detect_face(self, image, roi): if roi[1][0] - roi[0][0] < 10 or roi[1][1] - roi[0][1] < 10: print 'too small roi!!' return None tl, br = roi roi_image = self.extract_roi(image, tl, br) dets, confidences, labels = self.face_detector.run(roi_image, self.face_detection_upscale_pyramid, self.confidence_thresh) if not len(dets): return None largest = sorted(dets, key=lambda x: -x.area())[0] center = largest.dcenter() face_left = int(center.x - (center.x - largest.left()) * self.face_expantion_scale) face_top = int(center.y - (center.y - largest.top()) * self.face_expantion_scale) face_right = int(center.x + (center.x - largest.left()) * self.face_expantion_scale) face_bottom = int(center.y + (center.y - largest.top()) * self.face_expantion_scale) face_tl = (tl[0] + face_left, tl[1] + face_top) face_br = (tl[0] + face_right, tl[1] + face_bottom) return face_tl, face_br def extract_roi(self, image, tl, br): return image[tl[1]: br[1], tl[0]: br[0], :] def tlbr2bb(self, tl, br): bb = BoundingBox2D() bb.x = tl[0] bb.y = tl[1] bb.width = br[0] - tl[0] bb.height = br[1] - tl[1] return bb def main(): print '--- face_detector_node ---' rospy.init_node('face_detector_node') node = FaceDetectorNode() print 'ready' rospy.spin() if __name__ == '__main__': main() ================================================ FILE: scripts/robot_controller.py ================================================ #!/usr/bin/python import tf import math import rospy from geometry_msgs.msg import * from monocular_people_tracking.msg import * from monocular_person_following.msg import * class RobotControllerNode: def __init__(self): self.tf_listener = tf.TransformListener() self.enable_back = rospy.get_param('~enable_back', True) self.max_vx = rospy.get_param('~max_vx', 0.2) self.max_va = rospy.get_param('~max_va', 0.5) self.gain_vx = rospy.get_param('~gain_vx', 0.3) self.gain_va = rospy.get_param('~gain_va', 0.3) self.distance = rospy.get_param('~distance', 3.25) self.timeout = rospy.get_param('~timeout', 1.0) self.last_time = rospy.Time(0) self.target_id = -1 self.cmd_vel_pub = rospy.Publisher('/cmd_vel', Twist, queue_size=1) self.target_sub = rospy.Subscriber('/monocular_person_following/target', Target, self.target_callback) self.tracks_sub = rospy.Subscriber('/monocular_people_tracking/tracks', TrackArray, self.tracks_callback) def target_callback(self, target_msg): self.target_id = target_msg.target_id def tracks_callback(self, tracks_msg): target = [x for x in tracks_msg.tracks if x.id == self.target_id] if not len(target): self.cmd_vel_pub.publish(Twist()) return point = PointStamped() point.header = tracks_msg.header point.point = target[0].pos try: self.tf_listener.waitForTransform('base_link', tracks_msg.header.frame_id, tracks_msg.header.stamp, rospy.Duration(0.5)) target_pos = self.tf_listener.transformPoint('base_link', point) except: print 'failed to lookup transform between base_link and', tracks_msg.header.frame_id self.cmd_vel_pub.publish(Twist()) return theta = math.atan2(target_pos.point.y, target_pos.point.x) print 'x, y, theta:', target_pos.point.x, target_pos.point.y, theta, math.radians(45) va = min(self.max_va, max(-self.max_va, theta * self.gain_va)) vx = 0.0 if abs(theta) < math.radians(45): vx = (target_pos.point.x - self.distance) * self.gain_vx print 'raw vx', vx min_vx = -self.max_vx if self.enable_back else 0.0 vx = min(self.max_vx, max(min_vx, vx)) else: print 'rotation too big' twist = Twist() twist.linear.x = vx twist.angular.z = va self.cmd_vel_pub.publish(twist) self.last_time = tracks_msg.header.stamp def spin(self): if (rospy.Time.now() - self.last_time).to_sec() > self.timeout: print 'timeout!!' self.cmd_vel_pub.publish(Twist()) def main(): rospy.init_node('robot_controller_node') node = RobotControllerNode() rate = rospy.Rate(10) while not rospy.is_shutdown(): node.spin() rate.sleep() if __name__ == '__main__': main() ================================================ FILE: scripts/save_bag.sh ================================================ #!/bin/bash rosbag record -O $1 "/visualization_node/visualize/compressed" "/tf" "/scan" -e ".*/markers" -e ".*/tracks" -e ".*/target" -e ".*/features/compressed" -e ".*/pose" ================================================ FILE: scripts/simple_gesture_recognition.py ================================================ #!/usr/bin/env python3 import rospy from monocular_people_tracking.msg import * from monocular_person_following.srv import * class GestureRecognizer: def __init__(self, track_id): self.track_id = track_id self.last_stamp = rospy.Time.now() def callback(self, track_msg, imprint): if not len(track_msg.associated): self.last_stamp = rospy.Time.now() return skeleton = track_msg.associated[0] neck = [x for x in skeleton.body_part if x.part_id == 1] r_elbow = [x for x in skeleton.body_part if x.part_id == 3] r_hand = [x for x in skeleton.body_part if x.part_id == 4] if not len(neck) or not len(r_elbow) or not len(r_hand): self.last_stamp = rospy.Time.now() return if r_elbow[0].y < neck[0].y and r_hand[0].y < neck[0].y: print(self.track_id, rospy.Time.now() - self.last_stamp) if rospy.Time.now() - self.last_stamp > rospy.Duration(5.0): imprint(track_msg.id) self.last_stamp = rospy.Time.now() class SimpleGestureRecognitionNode: def __init__(self): print('--- simple_gesture_recognition ---') self.recognizers = {} print('wait for service') rospy.wait_for_service('/monocular_person_following/imprint') self.imprint_service = rospy.ServiceProxy('/monocular_person_following/imprint', Imprint) self.sub = rospy.Subscriber('/monocular_people_tracking/tracks', TrackArray, self.callback) print('done') def callback(self, track_msg): for track in track_msg.tracks: if track.id not in self.recognizers: self.recognizers[track.id] = GestureRecognizer(track.id) self.recognizers[track.id].callback(track, self.imprint) def imprint(self, target_id): print('reset target', target_id) req = ImprintRequest() req.target_id = target_id self.imprint_service(req) def main(): rospy.init_node('simple_gesture_recognition') node = SimpleGestureRecognitionNode() rospy.spin() if __name__ == '__main__': main() ================================================ FILE: scripts/visualization.py ================================================ #!/usr/bin/env python3 import cv2 import math import numpy import tf import rospy import cv_bridge import message_filters from tfpose_ros.msg import * from sensor_msgs.msg import * from monocular_people_tracking.msg import * from monocular_person_following.msg import * from tf_pose import common from tf_pose.estimator import Human, BodyPart class VisualizationNode: def __init__(self): self.tf_listener = tf.TransformListener() self.image_pub = rospy.Publisher('~visualize', Image, queue_size=1) color_palette = numpy.uint8([(180 / 10 * i, 255, 255) for i in range(10)]).reshape(-1, 1, 3) self.color_palette = cv2.cvtColor(color_palette, cv2.COLOR_HSV2BGR).reshape(-1, 3) self.target_id = 0 self.state_name = "NONE" self.confidences = {} self.use_face = rospy.get_param('~use_face', True) self.target_sub = rospy.Subscriber('/monocular_person_following/target', Target, self.target_callback) self.image = numpy.zeros((128, 128, 3), dtype=numpy.uint8) subs = [ message_filters.Subscriber('image_rect', Image), message_filters.Subscriber('/pose_estimator/pose', Persons), message_filters.Subscriber('/monocular_people_tracking/tracks', TrackArray) ] if self.use_face: subs.append(message_filters.Subscriber('/face_detector/faces', FaceDetectionArray)) self.sync = message_filters.TimeSynchronizer(subs, 50) self.sync.registerCallback(self.callback) else: self.sync = message_filters.TimeSynchronizer(subs, 50) self.sync.registerCallback(self.callback_wo_face) def target_callback(self, target_msg): self.state_name = target_msg.state.data self.target_id = target_msg.target_id for i in range(len(target_msg.track_ids)): track_id = target_msg.track_ids[i] confidence = target_msg.confidences[i] if track_id not in self.confidences: self.confidences[track_id] = {} self.confidences[track_id]['conf'] = confidence num_classifiers = len(target_msg.classifier_names) for j in range(num_classifiers): self.confidences[track_id][target_msg.classifier_names[j].data] = target_msg.classifier_confidences[num_classifiers * i + j] def callback_wo_face(self, image_msg, poses_msg, tracks_msg): self.callback(image_msg, poses_msg, tracks_msg, None) def callback(self, image_msg, poses_msg, tracks_msg, faces_msg): image = cv_bridge.CvBridge().imgmsg_to_cv2(image_msg, 'bgr8') humans = [] for p_idx, person in enumerate(poses_msg.persons): human = Human([]) for body_part in person.body_part: part = BodyPart('', body_part.part_id, body_part.x, body_part.y, body_part.confidence) human.body_parts[body_part.part_id] = part humans.append(human) image = self.draw_humans(image, humans, imgcopy=False) for track in tracks_msg.tracks: self.draw_expected_measurement(image, track) self.draw_bounding_box(image, track) if track.id == self.target_id: self.draw_target_icon(image, track) if faces_msg is not None: face_scale = image_msg.width / float(faces_msg.image_width) def draw_face(bb, color, line_width): tl = numpy.float32((bb.x, bb.y)) * face_scale br = numpy.float32((bb.x + bb.width, bb.y + bb.height)) * face_scale cv2.rectangle(image, tuple(tl.astype(numpy.int32)), tuple(br.astype(numpy.int32)), color, line_width) for face in faces_msg.faces: if not len(face.face_roi): continue draw_face(face.face_roi[0], (255, 0, 0), 1) if not len(face.face_region): continue face_confidence = 0.0 if face.track_id in self.confidences: face_confidence = self.confidences[face.track_id]['face'] face_confidence = (face_confidence + 1.0) / 2.0 color = (0, int(face_confidence * 255), int((1.0 - face_confidence) * 255)) draw_face(face.face_region[0], color, 2) cv2.putText(image, self.state_name, (15, 30), cv2.FONT_HERSHEY_PLAIN, 1.5, (0, 0, 0), 3) cv2.putText(image, self.state_name, (15, 30), cv2.FONT_HERSHEY_PLAIN, 1.5, (255, 255, 255), 1) self.image = image # taken from tfpose_ros/tf_pose/estimator.py def draw_humans(self, npimg, humans, imgcopy=False): if imgcopy: npimg = np.copy(npimg) canvas = npimg.copy() image_h, image_w = npimg.shape[:2] centers = {} for human in humans: # draw point for i in range(common.CocoPart.Background.value): if i not in human.body_parts.keys(): continue body_part = human.body_parts[i] center = (int(body_part.x * image_w + 0.5), int(body_part.y * image_h + 0.5)) centers[i] = center cv2.circle(canvas, center, 3, common.CocoColors[i], thickness=3, lineType=8, shift=0) # draw line for pair_order, pair in enumerate(common.CocoPairsRender): if pair[0] not in human.body_parts.keys() or pair[1] not in human.body_parts.keys(): continue # npimg = cv2.line(npimg, centers[pair[0]], centers[pair[1]], common.CocoColors[pair_order], 3) cv2.line(canvas, centers[pair[0]], centers[pair[1]], common.CocoColors[pair_order], 3) npimg = npimg / 2 + canvas / 2 return npimg def draw_expected_measurement(self, image, track): meas_mean = numpy.float32(track.expected_measurement_mean).flatten() meas_cov = numpy.float32(track.expected_measurement_cov).reshape(4, 4) def error_ellipse(cov, kai): w, v = numpy.linalg.eig(cov) extents = numpy.sqrt(kai * kai * w) angle = math.atan2(v[0, 1], v[1, 1]) return (extents[0], extents[1], angle) neck_ellipse = error_ellipse(meas_cov[:2, :2], 3.0) ankle_ellipse = error_ellipse(meas_cov[2:, 2:], 3.0) neck_pos = tuple(meas_mean[:2].astype(numpy.int32)) ankle_pos = tuple(meas_mean[2:].astype(numpy.int32)) color = self.color_palette[track.id % len(self.color_palette)] color = tuple(int(x) for x in color) cv2.ellipse(image, neck_pos, neck_ellipse[:2], neck_ellipse[-1], 0, 360, color, 2) cv2.ellipse(image, ankle_pos, ankle_ellipse[:2], neck_ellipse[-1], 0, 360, color, 2) cv2.line(image, neck_pos, ankle_pos, color, 2) def draw_bounding_box(self, image, track): neck_ankle = numpy.float32(track.expected_measurement_mean).flatten() center = (neck_ankle[:2] + neck_ankle[2:]) / 2.0 height = (neck_ankle[-1] - neck_ankle[1]) * 1.5 width = height * 0.25 half_extents = (width / 2.0, height / 2.0) tl = tuple(numpy.int32(center - half_extents)) br = tuple(numpy.int32(center + half_extents)) cv2.putText(image, "id:%d" % track.id, (tl[0] + 5, tl[1] + 15), cv2.FONT_HERSHEY_PLAIN, 1.0, (0, 0, 0), 2) cv2.putText(image, "id:%d" % track.id, (tl[0] + 5, tl[1] + 15), cv2.FONT_HERSHEY_PLAIN, 1.0, (255, 255, 255), 1) body_confidence = 0.0 if track.id in self.confidences: body_confidence = self.confidences[track.id]['body'] def order(name): return {'conf': 0, 'body': 1, 'face': 2}[name] keys = sorted(self.confidences[track.id].keys(), key=order) for i, key in enumerate(keys): conf = self.confidences[track.id][key] cv2.putText(image, "%s:%.2f" % (key, conf), (tl[0] + 5, br[1] - 5 - 12 * i), cv2.FONT_HERSHEY_PLAIN, 1.0, (0, 0, 0), 2) cv2.putText(image, "%s:%.2f" % (key, conf), (tl[0] + 5, br[1] - 5 - 12 * i), cv2.FONT_HERSHEY_PLAIN, 1.0, (255, 255, 255), 1) body_confidence = body_confidence + 0.5 color = (0, int(255 * body_confidence), int(255 * (1 - body_confidence))) cv2.rectangle(image, tl, br, color, 2) def draw_target_icon(self, image, track): neck_ankle = numpy.float32(track.expected_measurement_mean).flatten() height = neck_ankle[-1] - neck_ankle[1] pt = (neck_ankle[:2] + (0, -height * 0.33)).astype(numpy.int32) pts = numpy.array([pt, pt + (15, -15), pt + (-15, -15)]).reshape(-1, 1, 2) cv2.polylines(image, [pts], True, (0, 0, 255), 2) def spin(self): if rospy.get_param('~show', True): cv2.imshow('image', self.image) cv2.waitKey(10) if self.image_pub.get_num_connections(): img_msg = cv_bridge.CvBridge().cv2_to_imgmsg(self.image.astype(numpy.uint8), 'bgr8') self.image_pub.publish(img_msg) def main(): rospy.init_node('visualization_node') node = VisualizationNode() rate = rospy.Rate(15) while not rospy.is_shutdown(): node.spin() rate.sleep() if __name__ == '__main__': main() ================================================ FILE: src/monocular_person_following/context.cpp ================================================ #include #include #include namespace monocular_person_following { Context::Context(ros::NodeHandle& nh) { classifier.reset(new PersonClassifier(nh)); pos_feature_bank.resize(nh.param("feature_bank_size", 32)); neg_feature_bank.resize(nh.param("feature_bank_size", 32)); } Context::~Context() {} void Context::extract_features(const cv::Mat& bgr_image, std::unordered_map& tracks) { for(auto& track: tracks) { if(!track.second->person_region) { continue; } if(track.second->person_region->width < 20 || track.second->person_region->height < 20) { continue; } track.second->input.reset(new PersonInput()); track.second->features.reset(new PersonFeatures()); std::unordered_map images; images["body"] = cv::Mat(bgr_image, *track.second->person_region); images["face"] = track.second->face_image; if(!classifier->extractInput(track.second->input, images)) { ROS_WARN_STREAM("failed to extract input data"); continue; } if(!classifier->extractFeatures(track.second->features, track.second->input)) { ROS_WARN_STREAM("failed to extract input features"); continue; } } } bool Context::update_classifier(double label, const Tracklet::Ptr& track) { auto pred = classifier->predict(track->features, classifier_confidences[track->track_msg->id]); if(pred) { track->confidence = *pred; } track->classifier_confidences = classifier_confidences[track->track_msg->id]; auto& p_bank = label > 0.0 ? pos_feature_bank : neg_feature_bank; auto& n_bank = label > 0.0 ? neg_feature_bank : pos_feature_bank; if(!n_bank.empty()) { size_t i = std::uniform_int_distribution<>(0, n_bank.size())(mt); classifier->update(-label, n_bank[i]); } if(!p_bank.full()) { p_bank.push_back(track->features); } else { size_t i = std::uniform_int_distribution<>(0, p_bank.size())(mt); p_bank[i] = track->features; } return classifier->update(label, track->features); } boost::optional Context::predict(const Tracklet::Ptr& track) { auto pred = classifier->predict(track->features, classifier_confidences[track->track_msg->id]); if(pred) { track->confidence = *pred; } track->classifier_confidences = classifier_confidences[track->track_msg->id]; return pred; } std::vector Context::classifier_names() const { return classifier->classifierNames(); } cv::Mat Context::visualize_body_features() { ccf_person_classifier::BodyClassifier::Ptr body_classifier = classifier->getClassifier("body"); if(body_classifier) { cv::Mat feature_map = body_classifier->visualize(); return feature_map; } return cv::Mat(); } } ================================================ FILE: src/monocular_person_following/state/initial_state.cpp ================================================ #include #include namespace monocular_person_following { State* InitialState::update(ros::NodeHandle& nh, Context& context, const std::unordered_map& tracks) { long target_id = select_target(nh, tracks); if(target_id < 0) { return this; } return new InitialTrainingState(target_id); } long InitialState::select_target(ros::NodeHandle& nh, const std::unordered_map& tracks) { long target_id = -1; double distance = 0.0; for(const auto& track: tracks) { const auto& pos = track.second->pos_in_baselink; if(pos.x() > nh.param("imprinting_max_dist", 4.0) || std::abs(pos.y()) > pos.x()) { continue; } if(target_id == -1 || distance > pos.norm()) { target_id = track.first; distance = pos.norm(); } } return target_id; } } ================================================ FILE: src/monocular_person_following/state/initial_training_state.cpp ================================================ #include #include #include namespace monocular_person_following { State* InitialTrainingState::update(ros::NodeHandle& nh, Context& context, const std::unordered_map& tracks) { auto found = tracks.find(target_id); if(found == tracks.end()) { ROS_INFO_STREAM("lost target during the initial training!!"); return new InitialState(); } for(const auto& track: tracks) { double label = track.first == target_id ? 1.0 : -1.0; boost::optional pred = context.predict(track.second); context.update_classifier(label, track.second); if(label > 0.0) { num_pos_samples ++; } } if(num_pos_samples >= nh.param("initial_training_num_samples", 10)) { return new TrackingState(target_id); } return this; } } ================================================ FILE: src/monocular_person_following/state/reid_state.cpp ================================================ #include #include namespace monocular_person_following { State* ReidState::update(ros::NodeHandle& nh, Context& context, const std::unordered_map& tracks) { for(const auto& track: tracks) { boost::optional pred = context.predict(track.second); if(!pred || pred < nh.param("reid_confidence_thresh", 0.2)) { continue; } auto found = positive_count.find(track.first); if(found == positive_count.end()) { positive_count[track.first] = 0; } positive_count[track.first] ++; if(positive_count[track.first] >= nh.param("reid_positive_count", 5)) { return new TrackingState(track.first); } } return this; } } ================================================ FILE: src/monocular_person_following/state/tracking_state.cpp ================================================ #include #include namespace monocular_person_following { State* TrackingState::update(ros::NodeHandle& nh, Context& context, const std::unordered_map& tracks) { auto found = tracks.find(target_id); if(found == tracks.end()) { ROS_INFO_STREAM("target lost!!"); return new ReidState(); } boost::optional pred = context.predict(found->second); if(pred && *pred < nh.param("id_switch_detection_thresh", -0.1)) { ROS_INFO_STREAM("ID switch detected!!"); return new ReidState(); } if(!pred || *pred < nh.param("min_target_cofidence", 0.1)) { ROS_INFO_STREAM("do not update"); return this; } for(const auto& track: tracks) { double label = track.first == target_id ? 1.0 : -1.0; context.update_classifier(label, track.second); } return this; } } ================================================ FILE: src/monocular_person_following_node.cpp ================================================ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include namespace monocular_person_following { class MonocularPersonFollowingNode { public: MonocularPersonFollowingNode() : nh(), private_nh("~"), target_pub(nh.advertise("/monocular_person_following/target", 1)), image_trans(nh), features_pub(image_trans.advertise("/monocular_person_following/features", 1)), image_sub(nh, "image", 10), tracks_sub(nh, "/monocular_people_tracking/tracks", 10), faces_sub(nh, "/face_detector/faces", 10), sync(image_sub, tracks_sub, 30), sync_w_face(image_sub, tracks_sub, faces_sub, 30), reset_sub(private_nh.subscribe("reset", 10, &MonocularPersonFollowingNode::reset_callback, this)), reset_service_server(private_nh.advertiseService("reset", &MonocularPersonFollowingNode::reset_service, this)), imprint_service_server(private_nh.advertiseService("imprint", &MonocularPersonFollowingNode::imprint_service, this)) { state.reset(new InitialState()); context.reset(new Context(private_nh)); if(private_nh.param("use_face", true)) { sync_w_face.registerCallback(boost::bind(&MonocularPersonFollowingNode::callback, this, _1, _2, _3)); } else { sync.registerCallback(boost::bind(&MonocularPersonFollowingNode::callback, this, _1, _2, nullptr)); } } void callback(const sensor_msgs::ImageConstPtr& image_msg, const monocular_people_tracking::TrackArrayConstPtr& tracks_msg, const monocular_person_following::FaceDetectionArrayConstPtr& faces_msg) { auto cv_image = cv_bridge::toCvCopy(image_msg, "bgr8"); std::unordered_map tracks; std::unordered_map face_msgs; if(faces_msg != nullptr) { for(const auto& face : faces_msg->faces) { face_msgs[face.track_id] = &face; } } for(const auto& track : tracks_msg->tracks) { tracks[track.id].reset(new Tracklet(tf_listener, tracks_msg->header, track)); if(track.associated_neck_ankle.empty()) { continue; } cv::Rect person_region = calc_person_region(track, cv_image->image.size()); tracks[track.id]->person_region = person_region; auto face = face_msgs.find(track.id); if(face != face_msgs.end() && !face->second->face_image.empty()) { auto face_image = cv_bridge::toCvCopy(face->second->face_image[0], "bgr8"); tracks[track.id]->face_image = face_image->image; } } std::lock_guard lock(context_mutex); context->extract_features(cv_image->image, tracks); State* next_state = state->update(private_nh, *context, tracks); if(next_state != state.get()) { state.reset(next_state); } if(target_pub.getNumSubscribers()) { Target target; target.header = image_msg->header; target.state.data = state->state_name(); target.target_id = state->target(); target.track_ids.reserve(tracks_msg->tracks.size()); target.confidences.reserve(tracks_msg->tracks.size()); target.classifier_confidences.reserve(tracks_msg->tracks.size() * 2); std::vector classifier_names = context->classifier_names(); for(const auto& name: classifier_names) { std_msgs::String classifier_name; classifier_name.data = name; target.classifier_names.push_back(classifier_name); } for(const auto& track : tracks) { if(track.second->confidence) { target.track_ids.push_back(track.first); target.confidences.push_back(*track.second->confidence); if(track.second->classifier_confidences.size() != target.classifier_names.size()) { ROS_ERROR_STREAM("num_classifiers did not match!!"); ROS_ERROR_STREAM(track.second->classifier_confidences.size() << " : " << target.classifier_names.size()); } std::copy(track.second->classifier_confidences.begin(), track.second->classifier_confidences.end(), std::back_inserter(target.classifier_confidences)); } } target_pub.publish(target); } if(features_pub.getNumSubscribers()) { cv::Mat features = context->visualize_body_features(); if(features.data) { cv_bridge::CvImage cv_image(image_msg->header, "bgr8", features); features_pub.publish(cv_image.toImageMsg()); } } } bool imprint_service(ImprintRequest& req, ImprintResponse& res) { reset(req.target_id); res.success = true; return true; } bool reset_service(std_srvs::EmptyRequest& req, std_srvs::EmptyResponse& res) { reset(); return true; } void reset_callback(const std_msgs::EmptyConstPtr& empty_msg) { reset(); } private: cv::Rect2f calc_person_region(const monocular_people_tracking::Track& track, const cv::Size& image_size) { Eigen::Vector2f neck(track.associated_neck_ankle[0].x, track.associated_neck_ankle[0].y); Eigen::Vector2f ankle(track.associated_neck_ankle[1].x, track.associated_neck_ankle[1].y); Eigen::Vector2f center = (neck + ankle) / 2.0f; float height = (ankle.y() - neck.y()) * 1.25f; float width = height * 0.25f; cv::Rect rect(center.x() - width / 2.0f, center.y() - height / 2.0f, width, height); cv::Point tl = rect.tl(); cv::Point br = rect.br(); tl.x = std::min(image_size.width, std::max(0, tl.x)); tl.y = std::min(image_size.height, std::max(0, tl.y)); br.x = std::min(image_size.width, std::max(0, br.x)); br.y = std::min(image_size.height, std::max(0, br.y)); return cv::Rect(tl, br); } void reset(long target_id = -1) { ROS_INFO_STREAM("reset identification!!"); std::lock_guard lock(context_mutex); if(target_id < 0) { state.reset(new InitialState()); } else { state.reset(new InitialTrainingState(target_id)); } context.reset(new Context(private_nh)); } private: ros::NodeHandle nh; ros::NodeHandle private_nh; tf::TransformListener tf_listener; ros::Publisher target_pub; image_transport::ImageTransport image_trans; image_transport::Publisher features_pub; message_filters::Subscriber image_sub; message_filters::Subscriber tracks_sub; message_filters::Subscriber faces_sub; message_filters::TimeSynchronizer sync; message_filters::TimeSynchronizer sync_w_face; // reset service callbacks ros::Subscriber reset_sub; ros::ServiceServer reset_service_server; ros::ServiceServer imprint_service_server; std::mutex context_mutex; std::shared_ptr state; std::unique_ptr context; }; } int main(int argc, char** argv) { ros::init(argc, argv, "monocular_person_following"); std::unique_ptr node(new monocular_person_following::MonocularPersonFollowingNode()); ros::spin(); return 0; } ================================================ FILE: srv/Imprint.srv ================================================ int64 target_id --- bool success