Repository: koide3/monocular_person_following
Branch: master
Commit: ed7f384fe8e5
Files: 45
Total size: 83.6 KB

Directory structure:
gitextract_ntcc8l0f/

├── .github/
│   └── workflows/
│       └── build.yml
├── .travis.yml
├── CMakeLists.txt
├── README.md
├── docker/
│   ├── jp45/
│   │   ├── Dockerfile
│   │   ├── Dockerfile.ros.noetic
│   │   ├── build.sh
│   │   ├── ros_entrypoint.sh
│   │   └── run.sh
│   └── noetic/
│       ├── Dockerfile
│       ├── Dockerfile.ros.noetic
│       ├── build.sh
│       ├── ros_entrypoint.sh
│       └── run.sh
├── include/
│   └── monocular_person_following/
│       ├── context.hpp
│       ├── state/
│       │   ├── initial_state.hpp
│       │   ├── initial_training_state.hpp
│       │   ├── reid_state.hpp
│       │   ├── state.hpp
│       │   └── tracking_state.hpp
│       └── tracklet.hpp
├── launch/
│   ├── jetson_person_following.launch
│   ├── jetson_person_following_sim.launch
│   ├── monocular_person_following.launch
│   └── start_robot.launch
├── msg/
│   ├── BoundingBox2D.msg
│   ├── FaceDetection.msg
│   ├── FaceDetectionArray.msg
│   └── Target.msg
├── package.xml
├── rviz/
│   └── monocular_person_following.rviz
├── scripts/
│   ├── eval/
│   │   ├── convert_dataset2bag.py
│   │   └── generate_bb.py
│   ├── face_detector_node.py
│   ├── robot_controller.py
│   ├── save_bag.sh
│   ├── simple_gesture_recognition.py
│   └── visualization.py
├── src/
│   ├── monocular_person_following/
│   │   ├── context.cpp
│   │   └── state/
│   │       ├── initial_state.cpp
│   │       ├── initial_training_state.cpp
│   │       ├── reid_state.cpp
│   │       └── tracking_state.cpp
│   └── monocular_person_following_node.cpp
└── srv/
    └── Imprint.srv

================================================
FILE CONTENTS
================================================

================================================
FILE: .github/workflows/build.yml
================================================
name: Build

on:
  push:
    branches: [ master ]
  pull_request:
    branches: [ master ]

  # Allows you to run this workflow manually from the Actions tab
  workflow_dispatch:

jobs:
  build:
    runs-on: ubuntu-latest
    strategy:
      matrix:
        ROS_DISTRO: [noetic]

    steps:
      - uses: actions/checkout@v2
        with:
          submodule: true

      - name: Docker login
        uses: docker/login-action@v1
        with:
          username: ${{ secrets.DOCKER_USERNAME }}
          password: ${{ secrets.DOCKER_TOKEN }}

      - name: Docker build
        uses: docker/build-push-action@v2
        with:
          file: ${{github.workspace}}/docker/${{ matrix.ROS_DISTRO }}/Dockerfile
          context: .
          push: false


================================================
FILE: .travis.yml
================================================
sudo: required
language: generic
dist: xenial

services:
  - docker

matrix:
  include:
    - name: "Xenial kinetic"
      env: ROS_DISTRO=kinetic
    - name: "Bionic melodic"
      env: ROS_DISTRO=melodic

script:
  - docker build -f ./docker/$ROS_DISTRO/Dockerfile .


================================================
FILE: CMakeLists.txt
================================================
cmake_minimum_required(VERSION 2.8.3)
project(monocular_person_following)

add_compile_options(-std=c++11)

find_package(catkin REQUIRED
  tf
  rospy
  roscpp
  cv_bridge
  message_filters
  image_transport
  eigen_conversions
  message_generation
  ccf_person_identification
  monocular_people_tracking
)

########################
## service generation ##
########################
add_service_files(FILES
  Imprint.srv
)

########################
## message generation ##
########################
add_message_files(FILES
  BoundingBox2D.msg
  FaceDetection.msg
  FaceDetectionArray.msg
  Target.msg
)

generate_messages(DEPENDENCIES std_msgs sensor_msgs)

###################################
## catkin specific configuration ##
###################################
catkin_package(
    #  INCLUDE_DIRS include
    #  LIBRARIES monocular_person_following
    #  CATKIN_DEPENDS other_catkin_pkg
    #  DEPENDS system_lib
    )

###########
## Build ##
###########

include_directories(
  include
  ${catkin_INCLUDE_DIRS}
)

add_executable(monocular_person_following_node
  src/monocular_person_following/state/initial_state.cpp
  src/monocular_person_following/state/initial_training_state.cpp
  src/monocular_person_following/state/tracking_state.cpp
  src/monocular_person_following/state/reid_state.cpp
  src/monocular_person_following/context.cpp
  src/monocular_person_following_node.cpp
)
add_dependencies(monocular_person_following_node ${${PROJECT_NAME}_EXPORTED_TARGETS} ${catkin_EXPORTED_TARGETS})
target_link_libraries(monocular_person_following_node
  ${catkin_LIBRARIES}
)


================================================
FILE: README.md
================================================
# monocular_person_following

This package provides a monocular vision-based person tracking and identification framework for person following robots. It first detects people using *tf-pose-estimation*, and then track them in the robot space with Unscented Kalman filter with the ground plane information. The combination of Convolutional Channel Features and Online Boosting runs on the top of the tracking module to keep tracking the target person with a re-identification capability. The entire system is designed so that it can be run on a Jetson TX2/Xavier, and it can easily be reproduced and reused on a new mobile robot platform.

![system](data/imgs/system.png)

[[video]](https://www.youtube.com/watch?v=SsIrXxnOgaQ)

[![Codacy Badge](https://api.codacy.com/project/badge/Grade/c7664fce1722461db5ffdc27eae59e9c)](https://www.codacy.com/app/koide3/monocular_person_following?utm_source=github.com&amp;utm_medium=referral&amp;utm_content=koide3/monocular_person_following&amp;utm_campaign=Badge_Grade)　[![Build](https://github.com/koide3/monocular_person_following/actions/workflows/build.yml/badge.svg)](https://github.com/koide3/monocular_person_following/actions/workflows/build.yml)

## Dependencies

- dlib
- flann
- tensorflow
- tf-pose-estimation

## Quick Test

- [Quick test on a sample rosbag (Desktop PC with CUDA >= 10.2)](https://github.com/koide3/monocular_person_following/wiki/Quick-test-with-sample-rosbag)
- [Quick test using a webcam (Desktop PC with CUDA >= 10.2 / Jetson Xavier with Jetpack 4.5.1)](https://github.com/koide3/monocular_person_following/wiki/Quick-test-with-USB-cam)

![screenshot](data/imgs/screenshot.jpg)

## Install

We tested this package on Xavier/Jetpack 4.5.1  
Note: We recommend using docker to avod the painful installation process

- [[Install from source (Jetson Xavier with Jetpack 4.5.1)]](https://github.com/koide3/monocular_person_following/wiki/%5BJetpack-4.5%5D-Install-from-source)


## Setup your own person following robot

[[Setup your robot]](https://github.com/koide3/monocular_person_following/wiki/Setup-your-own-person-following-robot)

## Related packages

- [ccf_person_identification](https://github.com/koide3/ccf_person_identification)
- [monocular_people_tracking](https://github.com/koide3/monocular_people_tracking)
- [monocular_person_following](https://github.com/koide3/monocular_person_following)


## Papers
- Kenji Koide, Jun Miura, and Emanuele Menegatti, Monocular Person Tracking and Identification with Online Deep Feature Selection for Person Following Robots, Robotics and Autonomous Systems [[link]](https://www.researchgate.net/publication/336871285_Monocular_Person_Tracking_and_Identification_with_On-line_Deep_Feature_Selection_for_Person_Following_Robots).


- Kenji Koide and Jun Miura, Convolutional Channel Features-based Person Identification for Person Following Robots, 15th International Conference IAS-15, Baden-Baden, Germany, 2018 [[link]](https://www.researchgate.net/publication/325854919_Convolutional_Channel_Features-Based_Person_Identification_for_Person_Following_Robots_Proceedings_of_the_15th_International_Conference_IAS-15).


================================================
FILE: docker/jp45/Dockerfile
================================================
FROM koide3/jetson-ros-noetic-desktop:jp45

# editors
RUN apt-get update && \
    apt-get install -y --no-install-recommends \
      vim nano tmux \
      && rm -rf /var/lib/apt/lists/*

# flann & openblas
RUN apt-get update && \
    apt-get install -y --no-install-recommends \
        libflann-dev libopenblas-base libopenblas-dev \
      && rm -rf /var/lib/apt/lists/*

# tensorflow
RUN apt-get update && \
    apt-get install -y --no-install-recommends \
        libhdf5-serial-dev hdf5-tools libhdf5-dev zlib1g-dev \
        zip libjpeg8-dev liblapack-dev libblas-dev gfortran \
        python3-pip \
      && rm -rf /var/lib/apt/lists/*

RUN pip3 install -U pip testresources setuptools
RUN pip3 install --pre --extra-index-url https://developer.download.nvidia.com/compute/redist/jp/v45 "tensorflow<2"

# tf-pose-estimation
RUN apt-get update && \
    apt-get install -y --no-install-recommends \
      llvm-10 llvm-10-dev swig \
      && rm -rf /var/lib/apt/lists/*

RUN update-alternatives --install /usr/bin/llvm-config llvm-config /usr/bin/llvm-config-10 50
RUN pip3 install numba==0.45.0

WORKDIR /root/catkin_ws/src
RUN git clone https://github.com/koide3/tf-pose-estimation

WORKDIR /root/catkin_ws/src/tf-pose-estimation
RUN pip3 install -r requirements.txt

WORKDIR /root/catkin_ws/src/tf-pose-estimation/tf_pose/pafprocess
RUN swig -python -c++ pafprocess.i && python3 setup.py build_ext --inplace

WORKDIR /root/catkin_ws/src/tf-pose-estimation/models/graph/cmu
RUN bash download.sh

WORKDIR /root/catkin_ws/src/tf-pose-estimation
RUN pip3 install -U matplotlib
RUN python3 setup.py install

# dlib
WORKDIR /root
RUN wget http://dlib.net/files/dlib-19.22.tar.bz2
RUN tar xvf dlib-19.22.tar.bz2
ENV DLIB_ROOT="/root/dlib-19.22"

# other ros packages
WORKDIR /root/catkin_ws/src
RUN git clone https://github.com/ros-drivers/usb_cam.git
RUN git clone https://github.com/ros-perception/image_transport_plugins.git
RUN git clone https://github.com/ros-perception/vision_opencv.git
RUN git clone https://github.com/ros-perception/image_common.git
RUN git clone https://github.com/ros-perception/image_pipeline.git

RUN touch /root/catkin_ws/src/image_transport_plugins/theora_image_transport/CATKIN_IGNORE
RUN touch /root/catkin_ws/src/image_pipeline/stereo_image_proc/CATKIN_IGNORE

WORKDIR /root/catkin_ws
RUN /bin/bash -c '. /opt/ros/noetic/setup.bash; catkin_make -DCMAKE_BUILD_TYPE=Release'

# WORKDIR /root/catkin_ws/src
# RUN https://github.com/ros/geometry.git

# WORKDIR /root/catkin_ws
# RUN /bin/bash -c '. /opt/ros/noetic/setup.bash; catkin_make -DCMAKE_BUILD_TYPE=Release'


# monocular_person_following
WORKDIR /root/catkin_ws/src
RUN git clone https://github.com/koide3/open_face_recognition.git
RUN git clone https://github.com/koide3/ccf_person_identification.git
RUN git clone https://github.com/koide3/monocular_people_tracking.git --recursive
RUN git clone https://github.com/koide3/monocular_person_following.git

WORKDIR /root/catkin_ws
RUN /bin/bash -c '. /opt/ros/noetic/setup.bash; catkin_make -DCMAKE_BUILD_TYPE=Release'
RUN sed -i "7i source \"/root/catkin_ws/devel/setup.bash\"" /ros_entrypoint.sh

WORKDIR /root/catkin_ws/src

ENTRYPOINT ["/ros_entrypoint.sh"]
CMD ["bash"]


================================================
FILE: docker/jp45/Dockerfile.ros.noetic
================================================
# original dockerfile can be found at: https://github.com/dusty-nv/jetson-containers
#
# this dockerfile roughly follows the 'Installing from source' from:
#   http://wiki.ros.org/noetic/Installation/Source
#
ARG BASE_IMAGE=nvcr.io/nvidia/l4t-base:r32.5.0
FROM ${BASE_IMAGE}

ARG ROS_PKG=desktop
ENV ROS_DISTRO=noetic
ENV ROS_ROOT=/opt/ros/${ROS_DISTRO}
ENV ROS_PYTHON_VERSION=3

ENV DEBIAN_FRONTEND=noninteractive

WORKDIR /workspace

# add the ROS deb repo to the apt sources list
RUN apt-get update && \
    apt-get install -y --no-install-recommends \
          git \
		cmake \
		build-essential \
		curl \
		wget \
		gnupg2 \
		lsb-release \
		ca-certificates \
    && rm -rf /var/lib/apt/lists/*

RUN sh -c 'echo "deb http://packages.ros.org/ros/ubuntu $(lsb_release -sc) main" > /etc/apt/sources.list.d/ros-latest.list'
RUN curl -s https://raw.githubusercontent.com/ros/rosdistro/master/ros.asc | apt-key add -

# install bootstrap dependencies
RUN apt-get update && \
    apt-get install -y --no-install-recommends \
          libpython3-dev \
          python3-rosdep \
          python3-rosinstall-generator \
          python3-vcstool \
          build-essential && \
    rosdep init && \
    rosdep update && \
    rm -rf /var/lib/apt/lists/*

RUN ln -s /usr/lib/aarch64-linux-gnu/libboost_python3.a /usr/lib/aarch64-linux-gnu/libboost_python37.a
RUN ln -s /usr/lib/aarch64-linux-gnu/libboost_python3.so /usr/lib/aarch64-linux-gnu/libboost_python37.so

# download/build the ROS source
RUN mkdir ros_catkin_ws && \
    cd ros_catkin_ws && \
    rosinstall_generator ${ROS_PKG} vision_msgs --rosdistro ${ROS_DISTRO} --deps --tar > ${ROS_DISTRO}-${ROS_PKG}.rosinstall && \
    mkdir src && \
    vcs import --input ${ROS_DISTRO}-${ROS_PKG}.rosinstall ./src && \
    apt-get update && \
    rosdep install --from-paths ./src --ignore-packages-from-source --rosdistro ${ROS_DISTRO} --skip-keys python3-pykdl -y && \
    python3 ./src/catkin/bin/catkin_make_isolated --install --install-space ${ROS_ROOT} -DCMAKE_BUILD_TYPE=Release && \
    rm -rf /var/lib/apt/lists/*

RUN rm -rf /workspace

# setup entrypoint
COPY ros_entrypoint.sh /ros_entrypoint.sh
RUN echo 'source /opt/ros/${ROS_DISTRO}/setup.bash' >> /root/.bashrc
ENTRYPOINT ["/ros_entrypoint.sh"]
CMD ["bash"]
WORKDIR /


================================================
FILE: docker/jp45/build.sh
================================================
#!/bin/bash
# sudo docker build --tag koide3/jetson-ros-noetic-desktop:jp45 -f Dockerfile.ros.noetic .
sudo docker build --tag koide3/monocular_person_following:jp45 -f Dockerfile .


================================================
FILE: docker/jp45/ros_entrypoint.sh
================================================
#!/bin/bash
set -e

ros_env_setup="/opt/ros/$ROS_DISTRO/setup.bash"
echo "sourcing   $ros_env_setup"
source "$ros_env_setup"

echo "ROS_ROOT   $ROS_ROOT"
echo "ROS_DISTRO $ROS_DISTRO"

exec "$@"


================================================
FILE: docker/jp45/run.sh
================================================
#/bin/bash
sudo docker run -it --rm \
                --net host \
                --gpus all \
                --device /dev/video0:/dev/video0 \
                -v $(realpath ~/.ros/camera_info):/root/.ros/camera_info \
                koide3/monocular_person_following:jp45 $@


================================================
FILE: docker/noetic/Dockerfile
================================================
FROM koide3/jetson-ros-noetic-desktop:noetic

RUN apt-get update && \
    apt-get install -y --no-install-recommends \
      # editors
      vim nano tmux \
      # flann & openblas
      libflann-dev libopenblas-base libopenblas-dev \
      # tf-pose-estimation
      llvm-10 llvm-10-dev swig \
      && rm -rf /var/lib/apt/lists/*

RUN update-alternatives --install /usr/bin/llvm-config llvm-config /usr/bin/llvm-config-10 50

WORKDIR /root/catkin_ws/src
RUN git clone https://github.com/koide3/tf-pose-estimation

WORKDIR /root/catkin_ws/src/tf-pose-estimation
RUN pip3 install numba==0.45.0
RUN pip3 install -r requirements.txt

WORKDIR /root/catkin_ws/src/tf-pose-estimation/tf_pose/pafprocess
RUN swig -python -c++ pafprocess.i && python3 setup.py build_ext --inplace

WORKDIR /root/catkin_ws/src/tf-pose-estimation/models/graph/cmu
RUN bash download.sh

WORKDIR /root/catkin_ws/src/tf-pose-estimation
RUN pip3 install -U matplotlib
RUN python3 setup.py install

# dlib
WORKDIR /root
RUN wget http://dlib.net/files/dlib-19.22.tar.bz2
RUN tar xvf dlib-19.22.tar.bz2
ENV DLIB_ROOT="/root/dlib-19.22"

# other ros packages
WORKDIR /root/catkin_ws/src
RUN git clone https://github.com/ros-drivers/usb_cam.git
RUN git clone https://github.com/ros-perception/image_transport_plugins.git
RUN git clone https://github.com/ros-perception/vision_opencv.git
RUN git clone https://github.com/ros-perception/image_common.git
RUN git clone https://github.com/ros-perception/image_pipeline.git

RUN touch /root/catkin_ws/src/image_transport_plugins/theora_image_transport/CATKIN_IGNORE
RUN touch /root/catkin_ws/src/image_pipeline/stereo_image_proc/CATKIN_IGNORE

WORKDIR /root/catkin_ws
RUN /bin/bash -c '. /opt/ros/noetic/setup.bash; catkin_make -DCMAKE_BUILD_TYPE=Release'

# monocular_person_following
WORKDIR /root/catkin_ws/src
RUN git clone https://github.com/koide3/open_face_recognition.git
RUN git clone https://github.com/koide3/ccf_person_identification.git
RUN git clone https://github.com/koide3/monocular_people_tracking.git --recursive
COPY . /root/catkin_ws/monocular_person_following

WORKDIR /root/catkin_ws
RUN /bin/bash -c '. /opt/ros/noetic/setup.bash; catkin_make -DCMAKE_BUILD_TYPE=Release'
RUN sed -i "7i source \"/root/catkin_ws/devel/setup.bash\"" /ros_entrypoint.sh

WORKDIR /root/catkin_ws/src

ENTRYPOINT ["/ros_entrypoint.sh"]
CMD ["bash"]


================================================
FILE: docker/noetic/Dockerfile.ros.noetic
================================================
# original dockerfile can be found at: https://github.com/dusty-nv/jetson-containers
#
# this dockerfile roughly follows the 'Installing from source' from:
#   http://wiki.ros.org/noetic/Installation/Source
#
FROM tensorflow/tensorflow:1.15.5-gpu-py3

ARG ROS_PKG=desktop
ENV ROS_DISTRO=noetic
ENV ROS_ROOT=/opt/ros/${ROS_DISTRO}
ENV ROS_PYTHON_VERSION=3

ENV DEBIAN_FRONTEND=noninteractive

WORKDIR /workspace

# add the ROS deb repo to the apt sources list
RUN apt-get update && \
    apt-get install -y --no-install-recommends \
        git cmake build-essential curl wget \
		gnupg2 lsb-release ca-certificates \
    && rm -rf /var/lib/apt/lists/*

RUN sh -c 'echo "deb http://packages.ros.org/ros/ubuntu $(lsb_release -sc) main" > /etc/apt/sources.list.d/ros-latest.list'
RUN curl -s https://raw.githubusercontent.com/ros/rosdistro/master/ros.asc | apt-key add -

# install bootstrap dependencies
RUN apt-get update && \
    apt-get install -y --no-install-recommends \
        libpython3-dev python3-rosdep python3-rosinstall-generator \
        python3-vcstool build-essential && \
    rosdep init && \
    rosdep update && \
    rm -rf /var/lib/apt/lists/*

RUN ln -s /usr/lib/x86_64-linux-gnu/libboost_python3.a /usr/lib/x86_64-linux-gnu/libboost_python37.a
RUN ln -s /usr/lib/x86_64-linux-gnu/libboost_python3.so /usr/lib/x86_64-linux-gnu/libboost_python37.so

# download/build the ROS source
RUN mkdir ros_catkin_ws && \
    cd ros_catkin_ws && \
    rosinstall_generator ${ROS_PKG} vision_msgs --rosdistro ${ROS_DISTRO} --deps --tar > ${ROS_DISTRO}-${ROS_PKG}.rosinstall && \
    mkdir src && \
    vcs import --input ${ROS_DISTRO}-${ROS_PKG}.rosinstall ./src && \
    apt-get update && \
    rosdep install --from-paths ./src --ignore-packages-from-source --rosdistro ${ROS_DISTRO} --skip-keys python3-pykdl -y && \
    python ./src/catkin/bin/catkin_make_isolated --install --install-space ${ROS_ROOT} -DCMAKE_BUILD_TYPE=Release && \
    rm -rf /var/lib/apt/lists/* && \
    rm -rf /workspace

# setup entrypoint
COPY ros_entrypoint.sh /ros_entrypoint.sh
RUN echo 'source /opt/ros/${ROS_DISTRO}/setup.bash' >> /root/.bashrc
ENTRYPOINT ["/ros_entrypoint.sh"]
CMD ["bash"]
WORKDIR /


================================================
FILE: docker/noetic/build.sh
================================================
#!/bin/bash
# sudo docker build --tag koide3/jetson-ros-noetic-desktop:noetic -f Dockerfile.ros.noetic .
sudo docker build --tag koide3/monocular_person_following:noetic -f Dockerfile .


================================================
FILE: docker/noetic/ros_entrypoint.sh
================================================
#!/bin/bash
set -e

ros_env_setup="/opt/ros/$ROS_DISTRO/setup.bash"
echo "sourcing   $ros_env_setup"
source "$ros_env_setup"

echo "ROS_ROOT   $ROS_ROOT"
echo "ROS_DISTRO $ROS_DISTRO"

exec "$@"


================================================
FILE: docker/noetic/run.sh
================================================
#/bin/bash
sudo docker run -it --rm \
                --net host \
                --gpus all \
                --device /dev/video0:/dev/video0 \
                -v $(realpath ~/.ros/camera_info):/root/.ros/camera_info \
                koide3/monocular_person_following:noetic $@


================================================
FILE: include/monocular_person_following/context.hpp
================================================
#ifndef MONOCULAR_PERSON_FOLLOWING_CONTEXT_HPP
#define MONOCULAR_PERSON_FOLLOWING_CONTEXT_HPP

#include <random>
#include <unordered_map>
#include <boost/optional.hpp>
#include <boost/circular_buffer.hpp>

#include <ros/ros.h>
#include <monocular_person_following/tracklet.hpp>

namespace ccf_person_classifier {

class PersonInput;
class PersonFeatures;
class PersonClassifier;

}


namespace monocular_person_following {

class Context {
public:
    using PersonInput = ccf_person_classifier::PersonInput;
    using PersonFeatures = ccf_person_classifier::PersonFeatures;
    using PersonClassifier = ccf_person_classifier::PersonClassifier;

    Context(ros::NodeHandle& nh);
    ~Context();

public:
    void extract_features(const cv::Mat& bgr_image, std::unordered_map<long, Tracklet::Ptr>& tracks);
    bool update_classifier(double label, const Tracklet::Ptr& track);
    boost::optional<double> predict(const Tracklet::Ptr& track);

    std::vector<std::string> classifier_names() const;
    cv::Mat visualize_body_features();

private:
    std::mt19937 mt;
    std::unique_ptr<PersonClassifier> classifier;
    std::unordered_map<long, std::vector<double>> classifier_confidences;

    boost::circular_buffer<std::shared_ptr<ccf_person_classifier::Features>> pos_feature_bank;
    boost::circular_buffer<std::shared_ptr<ccf_person_classifier::Features>> neg_feature_bank;
};

}

#endif


================================================
FILE: include/monocular_person_following/state/initial_state.hpp
================================================
#ifndef MONOCULAR_PERSON_FOLLOWING_INITIAL_STATE_HPP
#define MONOCULAR_PERSON_FOLLOWING_INITIAL_STATE_HPP

#include <monocular_person_following/state/state.hpp>

namespace monocular_person_following {

class InitialState : public State {
public:
    InitialState() {}

    virtual ~InitialState() override {}

    virtual std::string state_name() const override {
        return "initial";
    }

    virtual State* update(ros::NodeHandle& nh, Context& context, const std::unordered_map<long, Tracklet::Ptr>& tracks) override;

private:
    long select_target(ros::NodeHandle& nh, const std::unordered_map<long, Tracklet::Ptr>& tracks);
};

}

#endif // INITIAL_STATE_HPP


================================================
FILE: include/monocular_person_following/state/initial_training_state.hpp
================================================
#ifndef MONOCULAR_PERSON_FOLLOWING_INITIAL_TRAINING_STATE_HPP
#define MONOCULAR_PERSON_FOLLOWING_INITIAL_TRAINING_STATE_HPP

#include <monocular_person_following/state/state.hpp>

namespace monocular_person_following {

class InitialTrainingState : public State {
public:
    InitialTrainingState(long target_id)
        : target_id(target_id),
          num_pos_samples(0)
    {}

    virtual ~InitialTrainingState() override {}

    virtual long target() const override {
        return target_id;
    }

    virtual std::string state_name() const override {
        return "initial_training";
    }

    virtual State* update(ros::NodeHandle& nh, Context& context, const std::unordered_map<long, Tracklet::Ptr>& tracks) override;

private:
    long target_id;
    long num_pos_samples;
};

}

#endif // INITIAL_TRAINING_STATE_HPP


================================================
FILE: include/monocular_person_following/state/reid_state.hpp
================================================
#ifndef REID_STATE_HPP
#define REID_STATE_HPP

#include <monocular_person_following/state/state.hpp>

namespace monocular_person_following {

class ReidState : public State {
public:
    ReidState() {}
    virtual ~ReidState() override {}

    virtual std::string state_name() const override {
        return "re-identification";
    }

    virtual State* update(ros::NodeHandle& nh, Context& context, const std::unordered_map<long, Tracklet::Ptr>& tracks) override;

private:
    std::unordered_map<long, int> positive_count;
};

}

#endif // REID_STATE_HPP


================================================
FILE: include/monocular_person_following/state/state.hpp
================================================
#ifndef MONOCULAR_PERSON_FOLLOWING_STATE_HPP
#define MONOCULAR_PERSON_FOLLOWING_STATE_HPP

#include <monocular_person_following/context.hpp>

namespace monocular_person_following {

class State {
public:
    State() {}
    virtual ~State() {}

    virtual long target() const { return -1; }

    virtual std::string state_name() const = 0;

    virtual State* update(ros::NodeHandle& nh, Context& context, const std::unordered_map<long, Tracklet::Ptr>& tracks) = 0;
private:

};

}

#endif


================================================
FILE: include/monocular_person_following/state/tracking_state.hpp
================================================
#ifndef TRACKING_STATE_HPP
#define TRACKING_STATE_HPP

#include <monocular_person_following/state/state.hpp>

namespace monocular_person_following {

class TrackingState : public State {
public:
    TrackingState(long target_id)
        : target_id(target_id)
    {}

    virtual ~TrackingState() override {}

    virtual long target() const override {
        return target_id;
    }

    virtual std::string state_name() const override {
        return "tracking";
    }

    virtual State* update(ros::NodeHandle& nh, Context& context, const std::unordered_map<long, Tracklet::Ptr>& tracks) override;

private:
    long target_id;
};

}

#endif // TRACKING_STATE_HPP


================================================
FILE: include/monocular_person_following/tracklet.hpp
================================================
#ifndef TRACKLET_HPP
#define TRACKLET_HPP

#include <vector>
#include <Eigen/Dense>
#include <boost/optional.hpp>
#include <opencv2/opencv.hpp>

#include <tf/transform_listener.h>
#include <monocular_people_tracking/Track.h>
#include <ccf_person_identification/online_classifier.hpp>

namespace monocular_person_following {

struct Tracklet {
public:
    EIGEN_MAKE_ALIGNED_OPERATOR_NEW

    using Ptr = std::shared_ptr<Tracklet>;

    Tracklet(tf::TransformListener& tf_listener, const std_msgs::Header& header, const monocular_people_tracking::Track& track_msg)
        : confidence(boost::none),
          track_msg(&track_msg)
    {
        geometry_msgs::PointStamped point;
        point.header = header;
        point.point = track_msg.pos;

        geometry_msgs::PointStamped transformed;
        tf_listener.transformPoint("base_link", point, transformed);

        pos_in_baselink = Eigen::Vector2f(transformed.point.x, transformed.point.y);
    }

public:
    boost::optional<double> confidence;
    std::vector<double> classifier_confidences;

    cv::Mat face_image;

    boost::optional<cv::Rect> person_region;
    ccf_person_classifier::Input::Ptr input;
    ccf_person_classifier::Features::Ptr features;

    Eigen::Vector2f pos_in_baselink;
    const monocular_people_tracking::Track* track_msg;
};

}

#endif // TRACKLET_HPP


================================================
FILE: launch/jetson_person_following.launch
================================================
<?xml version="1.0"?>
<launch>
  <arg name="camera_name" default="/csi_cam_0/sd"/>

  <arg name="use_face"    default="false"/>
  <arg name="face_camera_name" default="$(arg camera_name)"/>
  <arg name="allow_growth" default="false"/>

  <!-- *** people detection *** -->
  <include file="$(find monocular_people_tracking)/launch/tf_openpose.launch" if="true">
    <arg name="image_topic" value="$(arg camera_name)/image_rect"/>
    <!--
    /// @arg model       : "cmu", "mobilenet_thin", "mobilenet_v2_large", or "mobilenet_v2_small"
    /// @arg resolution  : e.g., "368x368", "432x368", "656x368"
    ///
    /// @brief parameter recommendation
    ///        for TX2         "mobilenet_thin" & "368x368"
    ///        for Xavier      "mobilenet_thin" & "656x368"
    ///        for desktop PC  "cmu" & "656x368" (depending on the GPU)
    -->
    <arg name="model" value="mobilenet_thin"/>
    <arg name="resolution" value="656x368"/>
    <arg name="allow_growth" value="$(arg allow_growth)"/>
  </include>

  <!-- *** people tracking *** -->
  <node pkg="monocular_people_tracking" type="monocular_people_tracking_node" name="monocular_people_tracking">
    <remap from="camera_info" to="$(arg camera_name)/camera_info"/>

    <!-- detection parameters -->
    <param name="detection_confidence_thresh" value="0.1"/>
    <param name="detection_border_thresh_w" value="100"/>
    <param name="detection_border_thresh_h" value="25"/>

    <!-- UKF parameters -->
    <param name="measurement_noise_pix_cov" value="100"/>
    <param name="process_noise_pos_cov" value="0.03"/>
    <param name="process_noise_vel_cov" value="0.01"/>
    <param name="process_noise_height_cov" value="1e-10"/>

    <!-- tracking parameters -->
    <param name="init_cov_scale" value="0.25"/>
    <param name="association_maha_sq_thresh" value="9.0"/>
    <param name="association_neck_ankle_max_dist" value="200"/>
    <param name="association_neck_max_dist" value="150"/>
    <param name="tracking_remove_trace_thresh" value="2.0"/>
    <param name="tracking_newtrack_dist2exists_thersh" value="100"/>
  </node>

  <!-- *** face detection *** -->
  <node pkg="monocular_person_following" type="face_detector_node.py" name="face_detector" output="screen" if="$(arg use_face)">
    <remap from="camera_info" to="$(arg camera_name)/camera_info"/>
    <remap from="image_rect" to="$(arg face_camera_name)/image_rect"/>

    <param name="roi_scale" value="1.0"/>
    <param name="face_detection_upscale_pyramid" value="1"/>
    <param name="confidence_thresh" value="-0.5"/>
    <param name="face_detection_expantion_scale" value="1.5"/>
  </node>

  <!-- *** face feature extraction *** -->
  <node pkg="open_face_recognition" type="open_face_recognition_node.py" name="open_face_recognition" output="screen" if="$(arg use_face)">
    <param name="embedding_framework" value="facenet"/>
  </node>

  <!-- *** person identification *** -->
  <node pkg="monocular_person_following" type="monocular_person_following_node" name="monocular_person_following" output="screen">
    <remap from="image" to="$(arg camera_name)/image_rect"/>
    <param name="use_face" value="$(arg use_face)"/>
    <param name="use_body" value="true"/>

    <!--
    /// @brief
    /// Initial state:
    ///   if there is a person in front of the camera (within imprinting_max_dist),
    ///   the person is registered as the target
    /// Initial Training state:
    ///   the target person features are added to the classifier a certain time (initial_training_num_samples),
    ///   then, the sytem transits to the tracking state
    /// Tracking state:
    ///   if the identification confidence of the target is lower than min_target_confidence,
    ///   the system judges that the target is lost, and transits to ReID state
    /// ReID state:
    ///   if a track shows a confidence higher than reid_confidence_thresh several times (reid_positive_count),
    ///   the track is reidentified as the target, and the system transits to Tracking state
    -->
    <param name="imprinting_max_dist" value="4.0"/>
    <param name="initial_training_num_samples" value="10"/>
    <param name="min_target_confidence" value="0.1"/>
    <param name="id_switch_detection_thresh" value="-0.1"/>
    <param name="reid_confidence_thresh" value="0.1"/>
    <param name="reid_positive_count" value="5"/>
  </node>

  <!-- *** visualization *** -->
  <node pkg="monocular_person_following" type="visualization.py" name="visualization_node" output="screen">
    <remap from="image_rect" to="$(arg camera_name)/image_rect"/>
    <param name="show" value="false"/>
    <param name="use_face" value="$(arg use_face)"/>
  </node>
  <node pkg="image_transport" type="republish" name="compress_visualize" args="raw in:=/visualization_node/visualize compressed out:=/visualization_node/visualize"/>
  <node pkg="topic_tools" type="throttle" name="throttle_visualize" args="messages /visualization_node/visualize/compressed 4 /visualization_node/visualize_slow/compressed"/>

  <!-- *** gesture recognition *** -->
  <node pkg="monocular_person_following" type="simple_gesture_recognition.py" name="simple_gesture_recognition" output="screen"/>

  <!-- *** robot controller *** -->
  <node pkg="monocular_person_following" type="robot_controller.py" name="robot_controller" if="false">
    <remap from="cmd_vel" to="/RosAria/cmd_vel"/>
    <param name="enable_back" value="false"/>
    <param name="max_vx" value="0.1"/>
    <param name="max_va" value="0.1"/>
    <param name="gain_vx" value="0.0"/>
    <param name="gain_va" value="0.1"/>
    <param name="distance" value="2.5"/>
    <param name="timeout" value="0.5"/>
  </node>
</launch>


================================================
FILE: launch/jetson_person_following_sim.launch
================================================
<?xml version="1.0"?>
<launch>
  <arg name="camera_name" default="/csi_cam_0"/>

  <node pkg="monocular_people_tracking" type="monocular_people_tracking_node" name="monocular_people_tracking">
    <remap from="camera_info" to="$(arg camera_name)/camera_info"/>

    <!-- detection parameters -->
    <param name="detection_confidence_thresh" value="0.0"/>
    <param name="detection_border_thresh_w" value="100"/>
    <param name="detection_border_thresh_h" value="25"/>

    <!-- UKF parameters -->
    <param name="measurement_noise_pix_cov" value="300"/>
    <param name="process_noise_pos_cov" value="0.02"/>
    <param name="process_noise_vel_cov" value="0.02"/>
    <param name="process_noise_height_cov" value="1e-12"/>

    <!-- tracking parameters -->
    <param name="association_maha_sq_thresh" value="990.0"/>
    <param name="association_neck_ankle_max_dist" value="1900"/>
    <param name="association_neck_max_dist" value="0"/>
    <param name="tracking_remove_trace_thresh" value="25"/>
    <param name="tracking_newtrack_dist2exists_thersh" value="300"/>
  </node>

  <node pkg="monocular_person_following" type="monocular_person_following_node" name="monocular_person_following_node" output="screen">
    <remap from="image" to="$(arg camera_name)/image_rect"/>
    <!--
    /// @brief
    /// Initial state:
    ///   if there is a person in front of the camera (within imprinting_max_dist),
    ///   the person is registered as the target
    /// Initial Training state:
    ///   the target person features are added to the classifier a certain time (initial_training_num_samples),
    ///   then, the sytem transits to the tracking state
    /// Tracking state:
    ///   if the identification confidence of the target is lower than min_target_confidence,
    ///   the system judges that the target is lost, and transits to ReID state
    /// ReID state:
    ///   if a track shows a confidence higher than reid_confidence_thresh several times (reid_positive_count),
    ///   the track is reidentified as the target, and the system transits to Tracking state
    -->
    <param name="imprinting_max_dist" value="4.0"/>
    <param name="initial_training_num_samples" value="10"/>
    <param name="min_target_confidence" value="0.1"/>
    <param name="id_switch_detection_thresh" value="-0.1"/>
    <param name="reid_confidence_thresh" value="0.1"/>
    <param name="reid_positive_count" value="5"/>
  </node>

  <node pkg="monocular_person_following" type="visualization.py" name="visualization_node" output="screen">
    <remap from="image_rect" to="$(arg camera_name)/image_rect"/>
    <param name="show" value="false"/>
  </node>
  <node pkg="image_transport" type="republish" name="compress_visualize" args="raw in:=/visualization_node/visualize compressed out:=/visualization_node/visualize"/>

  <node pkg="monocular_person_following" type="robot_controller.py" name="robot_controller" if="false">
    <remap from="cmd_vel" to="/RosAria/cmd_vel"/>
    <param name="enable_back" value="false"/>
    <param name="max_vx" value="0.1"/>
    <param name="max_va" value="0.1"/>
    <param name="gain_vx" value="0.0"/>
    <param name="gain_va" value="0.1"/>
    <param name="distance" value="2.5"/>
    <param name="timeout" value="0.5"/>
  </node>
</launch>


================================================
FILE: launch/monocular_person_following.launch
================================================
<?xml version="1.0"?>
<launch>
  <arg name="camera_name" default="/top_front_camera"/>

  <node pkg="monocular_people_tracking" type="monocular_people_tracking_node" name="monocular_people_tracking">
    <remap from="camera_info" to="$(arg camera_name)/camera_info"/>

    <!-- detection parameters -->
    <param name="detection_confidence_thresh" value="0.1"/>
    <param name="detection_border_thresh_w" value="100"/>
    <param name="detection_border_thresh_h" value="25"/>

    <!-- UKF parameters -->
    <param name="measurement_noise_pix_cov" value="100"/>
    <param name="process_noise_pos_cov" value="0.03"/>
    <param name="process_noise_vel_cov" value="0.01"/>
    <param name="process_noise_height_cov" value="1e-10"/>

    <!-- tracking parameters -->
    <param name="association_maha_sq_thresh" value="9.0"/>
    <param name="association_neck_ankle_max_dist" value="200"/>
    <param name="association_neck_max_dist" value="150"/>
    <param name="tracking_remove_trace_thresh" value="3.0"/>
    <param name="tracking_newtrack_dist2exists_thersh" value="100"/>
  </node>

  <node pkg="monocular_person_following" type="monocular_person_following_node" name="monocular_person_following_node" output="screen">
    <remap from="image" to="$(arg camera_name)/image_rect"/>
    <!--
    /// @brief
    /// Initial state:
    ///   if there is a person in front of the camera (within imprinting_max_dist),
    ///   the person is registered as the target
    /// Initial Training state:
    ///   the target person features are added to the classifier a certain time (initial_training_num_samples),
    ///   then, the sytem transits to the tracking state
    /// Tracking state:
    ///   if the identification confidence of the target is lower than id_switch_detection_thresh,
    ///   the system judges that the target is lost, and transits to ReID state
    /// ReID state:
    ///   if a track shows a confidence higher than reid_confidence_thresh several times (reid_positive_count),
    ///   the track is reidentified as the target, and the system transits to Tracking state
    -->
    <param name="imprinting_max_dist" value="2.0"/>
    <param name="initial_training_num_samples" value="10"/>
    <param name="min_target_confidence" value="0.1"/>
    <param name="id_switch_detection_thresh" value="-0.1"/>
    <param name="reid_confidence_thresh" value="0.1"/>
    <param name="reid_positive_count" value="5"/>
  </node>

  <node pkg="monocular_person_following" type="visualization.py" name="visualization_node" output="screen">
    <remap from="image_rect" to="$(arg camera_name)/image_rect"/>
    <param name="show" value="false"/>
  </node>

  <node pkg="monocular_person_following" type="robot_controller.py" name="robot_controller">
    <param name="enable_back" value="false"/>
    <param name="max_vx" value="0.1"/>
    <param name="max_va" value="0.1"/>
    <param name="gain_vx" value="0.1"/>
    <param name="gain_va" value="0.1"/>
    <param name="distance" value="4.0"/>
    <param name="timeout" value="0.5"/>
  </node>
</launch>


================================================
FILE: launch/start_robot.launch
================================================
<?xml version="1.0"?>
<launch>
  <!--
  /// @arg sim                  : if true, do not start camera node but run republish node to decompress compressed images
  /// @arg webcam               : if true, use usb_cam instead of csi_cam
  /// @arg rosaria              : if true, start rosaria pioneer driver
  /// @arg publish_dummy_frames : if true, publish dummy (identity) transformations ("odom" <-> "base_link", "base_link" <-> "base_footprint")
  /// @arg camera_xyz           : camera position w.r.t. base_link
  /// @arg camera_rpy           : camera_rotation w.r.t. base_link
  -->
  <arg name="sim" default="false"/>
  <arg name="webcam" default="false"/>
  <arg name="rosaria" default="false"/>
  <arg name="publish_dummy_frames" default="false"/>
  <arg name="camera_xyz" default="0 0 0.905"/>
  <arg name="camera_rpy" default="0 0.005 0"/>
  <arg name="camera_image_width" default="1920"/>
  <arg name="camera_image_height" default="1080"/>

  <include file="$(find monocular_people_tracking)/launch/web_camera.launch" if="$(arg webcam)">
    <arg name="sim" value="$(arg sim)"/>
    <arg name="camera_image_width" value="$(arg camera_image_width)"/>
    <arg name="camera_image_height" value="$(arg camera_image_height)"/>
  </include>
  <include file="$(find monocular_people_tracking)/launch/jetson_csi_camera.launch" unless="$(arg webcam)">
    <arg name="sim" value="$(arg sim)"/>
  </include>

  <include file="$(find monocular_people_tracking)/launch/robot_frames.launch">
    <arg name="publish_dummy_frames" value="$(arg publish_dummy_frames)"/>
    <arg name="camera_optical_frame" value="csi_cam_0_link" unless="$(arg webcam)"/>
    <arg name="camera_optical_frame" value="top_front_camera_optical_frame" if="$(arg webcam)"/>
    <arg name="camera_xyz" value="$(arg camera_xyz)"/>
    <arg name="camera_rpy" value="$(arg camera_rpy)"/>
  </include>

  <node pkg="tf" type="static_transform_publisher" name="base_link2base_footprint" args="0 0 0 0 0 0 base_link base_footprint 10" unless="$(arg publish_dummy_frames)"/>
  <node pkg="rosaria" type="RosAria" name="RosAria" if="$(arg rosaria)"/>
</launch>


================================================
FILE: msg/BoundingBox2D.msg
================================================
int32 x
int32 y
int32 width
int32 height


================================================
FILE: msg/FaceDetection.msg
================================================
int64 track_id

BoundingBox2D[] face_roi
BoundingBox2D[] face_region
sensor_msgs/Image[] face_image


================================================
FILE: msg/FaceDetectionArray.msg
================================================
std_msgs/Header header

uint32 image_width
uint32 image_height

FaceDetection[] faces


================================================
FILE: msg/Target.msg
================================================
std_msgs/Header header

std_msgs/String state
int64 target_id

int64[] track_ids
float32[] confidences

std_msgs/String[] classifier_names
float32[] classifier_confidences


================================================
FILE: package.xml
================================================
<?xml version="1.0"?>
<package format="2">
  <name>monocular_person_following</name>
  <version>0.0.0</version>
  <description>The monocular_person_following package</description>

  <maintainer email="koide@dei.unipd.it">koide</maintainer>

  <license>BSD</license>

  <buildtool_depend>catkin</buildtool_depend>
  <depend>tf</depend>
  <depend>rospy</depend>
  <depend>roscpp</depend>
  <depend>cv_bridge</depend>
  <depend>image_transport</depend>
  <depend>message_filters</depend>
  <depend>message_generation</depend>
  <depend>eigen_conversions</depend>
  <depend>ccf_person_identification</depend>
  <depend>monocular_people_tracking</depend>

  <export>
  </export>
</package>


================================================
FILE: rviz/monocular_person_following.rviz
================================================
Panels:
  - Class: rviz/Displays
    Help Height: 84
    Name: Displays
    Property Tree Widget:
      Expanded:
        - /Global Options1
        - /TF1/Frames1
      Splitter Ratio: 0.5
    Tree Height: 100
  - Class: rviz/Selection
    Name: Selection
  - Class: rviz/Tool Properties
    Expanded:
      - /2D Pose Estimate1
      - /2D Nav Goal1
      - /Publish Point1
    Name: Tool Properties
    Splitter Ratio: 0.5886790156364441
  - Class: rviz/Views
    Expanded:
      - /Current View1
    Name: Views
    Splitter Ratio: 0.5
  - Class: rviz/Time
    Experimental: false
    Name: Time
    SyncMode: 0
    SyncSource: ""
Preferences:
  PromptSaveOnExit: true
Visualization Manager:
  Class: ""
  Displays:
    - Alpha: 0.5
      Cell Size: 1
      Class: rviz/Grid
      Color: 160; 160; 164
      Enabled: true
      Line Style:
        Line Width: 0.029999999329447746
        Value: Lines
      Name: Grid
      Normal Cell Count: 0
      Offset:
        X: 0
        Y: 0
        Z: 0
      Plane: XY
      Plane Cell Count: 10
      Reference Frame: <Fixed Frame>
      Value: true
    - Alpha: 1
      Autocompute Intensity Bounds: true
      Autocompute Value Bounds:
        Max Value: 10
        Min Value: -10
        Value: true
      Axis: Z
      Channel Name: intensity
      Class: rviz/LaserScan
      Color: 255; 255; 255
      Color Transformer: Intensity
      Decay Time: 0
      Enabled: false
      Invert Rainbow: false
      Max Color: 255; 255; 255
      Max Intensity: 13068
      Min Color: 0; 0; 0
      Min Intensity: 0
      Name: LaserScan
      Position Transformer: XYZ
      Queue Size: 10
      Selectable: true
      Size (Pixels): 3
      Size (m): 0.009999999776482582
      Style: Flat Squares
      Topic: /scan_tf
      Unreliable: false
      Use Fixed Frame: true
      Use rainbow: true
      Value: false
    - Alpha: 1
      Autocompute Intensity Bounds: true
      Autocompute Value Bounds:
        Max Value: 10
        Min Value: -10
        Value: true
      Axis: Z
      Channel Name: intensity
      Class: rviz/LaserScan
      Color: 255; 255; 255
      Color Transformer: Intensity
      Decay Time: 0
      Enabled: false
      Invert Rainbow: false
      Max Color: 255; 255; 255
      Max Intensity: 12958
      Min Color: 0; 0; 0
      Min Intensity: 0
      Name: LaserScan
      Position Transformer: XYZ
      Queue Size: 10
      Selectable: true
      Size (Pixels): 3
      Size (m): 0.009999999776482582
      Style: Flat Squares
      Topic: /scan_tr
      Unreliable: false
      Use Fixed Frame: true
      Use rainbow: true
      Value: false
    - Alpha: 1
      Autocompute Intensity Bounds: true
      Autocompute Value Bounds:
        Max Value: 10
        Min Value: -10
        Value: true
      Axis: Z
      Channel Name: intensity
      Class: rviz/LaserScan
      Color: 255; 255; 255
      Color Transformer: Intensity
      Decay Time: 0
      Enabled: false
      Invert Rainbow: false
      Max Color: 255; 255; 255
      Max Intensity: 15634
      Min Color: 0; 0; 0
      Min Intensity: 0
      Name: LaserScan
      Position Transformer: XYZ
      Queue Size: 10
      Selectable: true
      Size (Pixels): 3
      Size (m): 0.009999999776482582
      Style: Flat Squares
      Topic: /scan_bf
      Unreliable: false
      Use Fixed Frame: true
      Use rainbow: true
      Value: false
    - Alpha: 1
      Autocompute Intensity Bounds: true
      Autocompute Value Bounds:
        Max Value: 10
        Min Value: -10
        Value: true
      Axis: Z
      Channel Name: intensity
      Class: rviz/LaserScan
      Color: 255; 255; 255
      Color Transformer: Intensity
      Decay Time: 0
      Enabled: false
      Invert Rainbow: false
      Max Color: 255; 255; 255
      Max Intensity: 11609
      Min Color: 0; 0; 0
      Min Intensity: 0
      Name: LaserScan
      Position Transformer: XYZ
      Queue Size: 10
      Selectable: true
      Size (Pixels): 3
      Size (m): 0.009999999776482582
      Style: Flat Squares
      Topic: /scan_br
      Unreliable: false
      Use Fixed Frame: true
      Use rainbow: true
      Value: false
    - Class: rviz/TF
      Enabled: true
      Frame Timeout: 15
      Frames:
        All Enabled: false
      Marker Scale: 1
      Name: TF
      Show Arrows: true
      Show Axes: true
      Show Names: true
      Tree:
        {}
      Update Interval: 0
      Value: true
    - Alpha: 1
      Autocompute Intensity Bounds: true
      Autocompute Value Bounds:
        Max Value: 10
        Min Value: -10
        Value: true
      Axis: Z
      Channel Name: intensity
      Class: rviz/LaserScan
      Color: 255; 255; 255
      Color Transformer: Intensity
      Decay Time: 0
      Enabled: true
      Invert Rainbow: false
      Max Color: 255; 255; 255
      Max Intensity: 4101
      Min Color: 0; 0; 0
      Min Intensity: 0
      Name: LaserScan
      Position Transformer: XYZ
      Queue Size: 10
      Selectable: true
      Size (Pixels): 3
      Size (m): 0.009999999776482582
      Style: Flat Squares
      Topic: /scan_bottom
      Unreliable: false
      Use Fixed Frame: true
      Use rainbow: true
      Value: true
    - Alpha: 1
      Autocompute Intensity Bounds: true
      Autocompute Value Bounds:
        Max Value: 10
        Min Value: -10
        Value: true
      Axis: Z
      Channel Name: intensity
      Class: rviz/LaserScan
      Color: 255; 255; 255
      Color Transformer: Intensity
      Decay Time: 0
      Enabled: true
      Invert Rainbow: false
      Max Color: 255; 255; 255
      Max Intensity: 11846
      Min Color: 0; 0; 0
      Min Intensity: 0
      Name: LaserScan
      Position Transformer: XYZ
      Queue Size: 10
      Selectable: true
      Size (Pixels): 3
      Size (m): 0.009999999776482582
      Style: Flat Squares
      Topic: /scan_top
      Unreliable: false
      Use Fixed Frame: true
      Use rainbow: true
      Value: true
    - Alpha: 1
      Autocompute Intensity Bounds: true
      Autocompute Value Bounds:
        Max Value: 10
        Min Value: -10
        Value: true
      Axis: Z
      Channel Name: intensity
      Class: rviz/LaserScan
      Color: 255; 255; 255
      Color Transformer: Intensity
      Decay Time: 0
      Enabled: true
      Invert Rainbow: false
      Max Color: 255; 255; 255
      Max Intensity: 892
      Min Color: 0; 0; 0
      Min Intensity: 148
      Name: LaserScan
      Position Transformer: XYZ
      Queue Size: 10
      Selectable: true
      Size (Pixels): 3
      Size (m): 0.009999999776482582
      Style: Flat Squares
      Topic: /scan
      Unreliable: false
      Use Fixed Frame: true
      Use rainbow: true
      Value: true
    - Class: rviz/MarkerArray
      Enabled: true
      Marker Topic: /monocular_people_tracking/markers
      Name: MarkerArray
      Namespaces:
        {}
      Queue Size: 100
      Value: true
    - Class: rviz/Image
      Enabled: true
      Image Topic: /monocular_person_following/features
      Max Value: 1
      Median window: 5
      Min Value: 0
      Name: Image
      Normalize Range: true
      Queue Size: 2
      Transport Hint: raw
      Unreliable: false
      Value: true
    - Class: rviz/Image
      Enabled: true
      Image Topic: /visualization_node/visualize
      Max Value: 1
      Median window: 5
      Min Value: 0
      Name: Image
      Normalize Range: true
      Queue Size: 2
      Transport Hint: raw
      Unreliable: false
      Value: true
  Enabled: true
  Global Options:
    Background Color: 48; 48; 48
    Default Light: true
    Fixed Frame: base_link
    Frame Rate: 30
  Name: root
  Tools:
    - Class: rviz/Interact
      Hide Inactive Objects: true
    - Class: rviz/MoveCamera
    - Class: rviz/Select
    - Class: rviz/FocusCamera
    - Class: rviz/Measure
    - Class: rviz/SetInitialPose
      Topic: /initialpose
    - Class: rviz/SetGoal
      Topic: /move_base_simple/goal
    - Class: rviz/PublishPoint
      Single click: true
      Topic: /clicked_point
  Value: true
  Views:
    Current:
      Class: rviz/Orbit
      Distance: 12.880599975585938
      Enable Stereo Rendering:
        Stereo Eye Separation: 0.05999999865889549
        Stereo Focal Distance: 1
        Swap Stereo Eyes: false
        Value: false
      Focal Point:
        X: 1.4971981048583984
        Y: -0.11331728100776672
        Z: -1.1117677688598633
      Focal Shape Fixed Size: true
      Focal Shape Size: 0.05000000074505806
      Invert Z Axis: false
      Name: Current View
      Near Clip Distance: 0.009999999776482582
      Pitch: 0.9347967505455017
      Target Frame: <Fixed Frame>
      Value: Orbit (rviz)
      Yaw: 2.420490026473999
    Saved: ~
Window Geometry:
  "&Displays":
    collapsed: false
  "&Image":
    collapsed: false
  "&Time":
    collapsed: false
  "&Views":
    collapsed: false
  Height: 1021
  Hide Left Dock: false
  Hide Right Dock: false
  I&mage:
    collapsed: false
  QMainWindow State: 000000ff00000000fd0000000400000000000003e800000345fc0200000012fb0000001200530065006c0065006300740069006f006e00000001e10000009b0000007601000003fb0000001e0054006f006f006c002000500072006f007000650072007400690065007302000001ed000001df00000185000000a3fb000000120056006900650077007300200054006f006f02000001df000002110000018500000122fb000000200054006f006f006c002000500072006f0070006500720074006900650073003203000002880000011d000002210000017afb000000100044006900730070006c006100790073010000004400000105000000f701000003fb0000002000730065006c0065006300740069006f006e00200062007500660066006500720200000138000000aa0000023a00000294fb00000014005700690064006500530074006500720065006f02000000e6000000d2000003ee0000030bfb0000000c004b0069006e0065006300740200000186000001060000030c00000261fb0000000a0049006d006100670065010000014a0000018a0000002201000003fb0000000a0049006d00610067006501000002d5000000b40000002201000003fb0000000a0049006d006100670065000000022e000000cc0000000000000000fb0000000a0049006d00610067006500000001c3000000e90000000000000000fb0000000a0049006d0061006700650100000245000001530000000000000000fb0000000a0049006d0061006700650000000238000001600000000000000000fb0000000a0049006d006100670065010000016c000001890000000000000000fb0000000a0049006d00610067006501000002f8000000a00000000000000000fb0000000a0049006d00610067006501000001120000028d0000000000000000fb0000000a0049006d006100670065010000029e000000eb0000000000000000000000010000011000000345fc0200000003fb0000001e0054006f006f006c002000500072006f00700065007200740069006500730100000041000000780000000000000000fc0000004400000345000000d000fffffffa000000010100000002fb0000000a0049006d0061006700650000000000ffffffff0000000000000000fb0000000a0056006900650077007301000006300000010f0000011001000003fb0000001200530065006c0065006300740069006f006e010000025a000000b200000000000000000000000200000490000000a9fc0100000001fb0000000a00560069006500770073030000004e00000080000002e100000197000000030000073f0000004cfc0100000002fb0000000800540069006d006501000000000000073f000002ad01000003fb0000000800540069006d00650100000000000004500000000000000000000002450000034500000004000000040000000800000008fc0000000100000002000000010000000a0054006f006f006c00730100000000ffffffff0000000000000000
  Selection:
    collapsed: false
  Tool Properties:
    collapsed: false
  Width: 1855
  X: 57
  Y: 0


================================================
FILE: scripts/eval/convert_dataset2bag.py
================================================
#!/usr/bin/python
import os
import sys
import cv2
import numpy

import rospy
import rosbag

from sensor_msgs.msg import *


def main():
	input_dir = sys.argv[1]
	output_bag = sys.argv[2]

	camera_info = CameraInfo()
	camera_info.K = [300, 0, 320, 0, 300, 240, 0, 0, 1]
	camera_info.D = [0, 0, 0, 0]

	image_files = sorted([input_dir + '/' + x for x in os.listdir(input_dir) if '.jpg' in x])

	with rosbag.Bag(output_bag, 'w') as dst_bag:
		stamp = rospy.Time(0)
		for image_file in image_files:
			print image_file
			image = cv2.imread(image_file)
			if image is None:
				print 'failed to read image...'
				continue

			stamp += rospy.Duration(1.0 / 14.2)

			compressed_msg = CompressedImage()
			compressed_msg.header.stamp = stamp
			compressed_msg.header.frame_id = 'camera_optical_frame'
			camera_info.header = compressed_msg.header

			ret, encoded = cv2.imencode('.jpg', image)
			buf = numpy.vstack([encoded]).tostring()

			compressed_msg.data = buf

			dst_bag.write('/top_front_camera/camera_info', camera_info, stamp)
			dst_bag.write('/top_front_camera/image_raw/compressed', compressed_msg, stamp)

			if rospy.is_shutdown():
				break


if __name__ == '__main__':
	main()


================================================
FILE: scripts/eval/generate_bb.py
================================================
#!/usr/bin/python
import sys
import numpy
import rosbag


def main():
	images_bag = sys.argv[1]
	results_bag = images_bag[:images_bag.find('.bag')] + '_results.bag'

	dst_file = sys.argv[2]

	'read image timestamps'
	image_stamps = []
	with rosbag.Bag(images_bag, 'r') as bag:
		for topic, msg, stamp in bag.read_messages():
			if 'compressed' not in topic:
				continue

			image_stamps.append(msg.header.stamp)

	timestamp_map = {}
	for i, stamp in enumerate(image_stamps):
		timestamp_map[stamp] = i

	init_bbox = None
	with rosbag.Bag(results_bag, 'r') as bag:
		print 'read results'
		all_msgs = sorted([x for x in bag.read_messages()], key=lambda x: x[2])

		target_id = -1

		bbs = {}
		for topic, msg, stamp in all_msgs:
			if 'target' in topic:
				target_id = msg.target_id
			if 'tracks' in topic:
				target = [x for x in msg.tracks if x.id == target_id]

				if not len(target):
					continue

				if False:
					neck_ankle = target[0].expected_measurement_mean
					neck = numpy.float32(neck_ankle[:2])
					ankle = numpy.float32(neck_ankle[2:])
				else:
					if not len(target[0].associated_neck_ankle):
						continue

					neck_ankle = target[0].associated_neck_ankle
					neck = numpy.float32([neck_ankle[0].x, neck_ankle[0].y])
					ankle = numpy.float32([neck_ankle[1].x, neck_ankle[1].y])

				center = (neck + ankle) / 2.0
				height = (ankle[1] - neck[1])
				width = height * 0.25

				center[1] -= height * 0.1
				height = height * 1.1

				if width > 1e6 or height > 1e6:
					continue

				tl = numpy.int32((center - (width / 2, height / 2)))
				wh = numpy.int32((width, height))

				image_id = timestamp_map[msg.header.stamp]
				bbs[image_id] = tl.tolist() + wh.tolist()

				if init_bbox is None:
					init_bbox = tl.tolist() + wh.tolist()

	print init_bbox
	with open(dst_file, 'w') as file:
		bbox = init_bbox
		for i, timestamp in enumerate(image_stamps):
			if i in bbs:
				bbox = bbs[i]

			print >> sys.stdout, '%d\t%d\t%d\t%d\t%d' % tuple([i] + bbox)
			print >> file, '%d\t%d\t%d\t%d\t%d' % tuple([i] + bbox)


if __name__ == '__main__':
	main()


================================================
FILE: scripts/face_detector_node.py
================================================
#!/usr/bin/python
import cv2
import numpy
import dlib
import rospy
import cv_bridge
import message_filters
from tfpose_ros.msg import *
from sensor_msgs.msg import *
from monocular_people_tracking.msg import *
from monocular_person_following.msg import *


class FaceDetectorNode:
	def __init__(self):
		self.cv_bridge = cv_bridge.CvBridge()
		self.face_detector = dlib.get_frontal_face_detector()
		self.roi_scale = rospy.get_param('~roi_scale', 1.0)
		self.face_detection_upscale_pyramid = rospy.get_param('~face_detection_upscale_pyramid', 1)
		self.confidence_thresh = rospy.get_param('~confidence_thresh', -0.5)
		self.face_expantion_scale = rospy.get_param('~face_expantion_scale', 1.5)

		subs = [
			message_filters.Subscriber('image_rect', Image),
			message_filters.Subscriber('/monocular_people_tracking/tracks', TrackArray)
		]
		self.sync = message_filters.TimeSynchronizer(subs, 30)
		self.sync.registerCallback(self.callback)

		self.faces_pub = rospy.Publisher('~faces', FaceDetectionArray, queue_size=5)

	def callback(self, image_msg, tracks_msg):
		image = self.cv_bridge.imgmsg_to_cv2(image_msg, 'bgr8')

		faces_msg = FaceDetectionArray()
		faces_msg.header = image_msg.header
		faces_msg.image_width = image_msg.width
		faces_msg.image_height = image_msg.height

		for person in tracks_msg.tracks:
			face_msg = FaceDetection()
			face_msg.track_id = person.id
			faces_msg.faces.append(face_msg)

			roi = self.calc_roi(image.shape[1], image.shape[0], person)
			if roi is None:
				continue

			face_msg.face_roi.append(self.tlbr2bb(roi[0], roi[1]))

			face = self.detect_face(image, roi)
			if face is None:
				continue

			face_msg.face_region.append(self.tlbr2bb(face[0], face[1]))
			face_image = self.extract_roi(image, face[0], face[1])
			face_msg.face_image.append(self.cv_bridge.cv2_to_imgmsg(face_image, 'bgr8'))

		self.faces_pub.publish(faces_msg)

	def calc_roi(self, width, height, track_msg):
		if not len(track_msg.associated):
			return None

		pose = track_msg.associated[0]
		head = [x for x in pose.body_part if x.part_id == 0]
		neck = [x for x in pose.body_part if x.part_id == 1]

		if not len(head) or not len(neck):
			return None

		image_size = numpy.float32([width, height])
		head = numpy.float32([head[0].x, head[0].y]) * image_size
		neck = numpy.float32([neck[0].x, neck[0].y]) * image_size

		neck2head = (neck[1] - head[1])

		tl = head - neck2head * self.roi_scale
		br = head + neck2head * self.roi_scale

		tl = max(0, min(width, tl[0])), max(0, min(height, tl[1]))
		br = max(0, min(width, br[0])), max(0, min(height, br[1]))

		return tuple(numpy.int32(tl)), tuple(numpy.int32(br))

	def detect_face(self, image, roi):
		if roi[1][0] - roi[0][0] < 10 or roi[1][1] - roi[0][1] < 10:
			print 'too small roi!!'
			return None

		tl, br = roi

		roi_image = self.extract_roi(image, tl, br)
		dets, confidences, labels = self.face_detector.run(roi_image, self.face_detection_upscale_pyramid, self.confidence_thresh)

		if not len(dets):
			return None

		largest = sorted(dets, key=lambda x: -x.area())[0]

		center = largest.dcenter()
		face_left = int(center.x - (center.x - largest.left()) * self.face_expantion_scale)
		face_top = int(center.y - (center.y - largest.top()) * self.face_expantion_scale)
		face_right = int(center.x + (center.x - largest.left()) * self.face_expantion_scale)
		face_bottom = int(center.y + (center.y - largest.top()) * self.face_expantion_scale)

		face_tl = (tl[0] + face_left, tl[1] + face_top)
		face_br = (tl[0] + face_right, tl[1] + face_bottom)
		return face_tl, face_br

	def extract_roi(self, image, tl, br):
		return image[tl[1]: br[1], tl[0]: br[0], :]

	def tlbr2bb(self, tl, br):
		bb = BoundingBox2D()
		bb.x = tl[0]
		bb.y = tl[1]
		bb.width = br[0] - tl[0]
		bb.height = br[1] - tl[1]

		return bb


def main():
	print '--- face_detector_node ---'
	rospy.init_node('face_detector_node')
	node = FaceDetectorNode()

	print 'ready'
	rospy.spin()

if __name__ == '__main__':
	main()


================================================
FILE: scripts/robot_controller.py
================================================
#!/usr/bin/python
import tf
import math
import rospy

from geometry_msgs.msg import *
from monocular_people_tracking.msg import *
from monocular_person_following.msg import *


class RobotControllerNode:
	def __init__(self):
		self.tf_listener = tf.TransformListener()

		self.enable_back = rospy.get_param('~enable_back', True)
		self.max_vx = rospy.get_param('~max_vx', 0.2)
		self.max_va = rospy.get_param('~max_va', 0.5)
		self.gain_vx = rospy.get_param('~gain_vx', 0.3)
		self.gain_va = rospy.get_param('~gain_va', 0.3)
		self.distance = rospy.get_param('~distance', 3.25)
		self.timeout = rospy.get_param('~timeout', 1.0)

		self.last_time = rospy.Time(0)
		self.target_id = -1

		self.cmd_vel_pub = rospy.Publisher('/cmd_vel', Twist, queue_size=1)
		self.target_sub = rospy.Subscriber('/monocular_person_following/target', Target, self.target_callback)
		self.tracks_sub = rospy.Subscriber('/monocular_people_tracking/tracks', TrackArray, self.tracks_callback)

	def target_callback(self, target_msg):
		self.target_id = target_msg.target_id

	def tracks_callback(self, tracks_msg):
		target = [x for x in tracks_msg.tracks if x.id == self.target_id]

		if not len(target):
			self.cmd_vel_pub.publish(Twist())
			return

		point = PointStamped()
		point.header = tracks_msg.header
		point.point = target[0].pos

		try:
			self.tf_listener.waitForTransform('base_link', tracks_msg.header.frame_id, tracks_msg.header.stamp, rospy.Duration(0.5))
			target_pos = self.tf_listener.transformPoint('base_link', point)
		except:
			print 'failed to lookup transform between base_link and', tracks_msg.header.frame_id
			self.cmd_vel_pub.publish(Twist())
			return

		theta = math.atan2(target_pos.point.y, target_pos.point.x)
		print 'x, y, theta:', target_pos.point.x, target_pos.point.y, theta, math.radians(45)

		va = min(self.max_va, max(-self.max_va, theta * self.gain_va))
		vx = 0.0
		if abs(theta) < math.radians(45):
			vx = (target_pos.point.x - self.distance) * self.gain_vx
			print 'raw vx', vx
			min_vx = -self.max_vx if self.enable_back else 0.0
			vx = min(self.max_vx, max(min_vx, vx))
		else:
			print 'rotation too big'


		twist = Twist()
		twist.linear.x = vx
		twist.angular.z = va

		self.cmd_vel_pub.publish(twist)
		self.last_time = tracks_msg.header.stamp

	def spin(self):
		if (rospy.Time.now() - self.last_time).to_sec() > self.timeout:
			print 'timeout!!'
			self.cmd_vel_pub.publish(Twist())


def main():
	rospy.init_node('robot_controller_node')
	node = RobotControllerNode()

	rate = rospy.Rate(10)
	while not rospy.is_shutdown():
		node.spin()
		rate.sleep()

if __name__ == '__main__':
	main()


================================================
FILE: scripts/save_bag.sh
================================================
#!/bin/bash

rosbag record -O $1 "/visualization_node/visualize/compressed" "/tf" "/scan" -e ".*/markers"  -e ".*/tracks" -e ".*/target" -e ".*/features/compressed" -e ".*/pose"


================================================
FILE: scripts/simple_gesture_recognition.py
================================================
#!/usr/bin/env python3
import rospy
from monocular_people_tracking.msg import *
from monocular_person_following.srv import *


class GestureRecognizer:
	def __init__(self, track_id):
		self.track_id = track_id
		self.last_stamp = rospy.Time.now()

	def callback(self, track_msg, imprint):
		if not len(track_msg.associated):
			self.last_stamp = rospy.Time.now()
			return

		skeleton = track_msg.associated[0]

		neck = [x for x in skeleton.body_part if x.part_id == 1]
		r_elbow = [x for x in skeleton.body_part if x.part_id == 3]
		r_hand = [x for x in skeleton.body_part if x.part_id == 4]

		if not len(neck) or not len(r_elbow) or not len(r_hand):
			self.last_stamp = rospy.Time.now()
			return

		if r_elbow[0].y < neck[0].y and r_hand[0].y < neck[0].y:
			print(self.track_id, rospy.Time.now() - self.last_stamp)
			if rospy.Time.now() - self.last_stamp > rospy.Duration(5.0):
				imprint(track_msg.id)
				self.last_stamp = rospy.Time.now()


class SimpleGestureRecognitionNode:
	def __init__(self):
		print('--- simple_gesture_recognition ---')
		self.recognizers = {}
		print('wait for service')
		rospy.wait_for_service('/monocular_person_following/imprint')
		self.imprint_service = rospy.ServiceProxy('/monocular_person_following/imprint', Imprint)

		self.sub = rospy.Subscriber('/monocular_people_tracking/tracks', TrackArray, self.callback)
		print('done')

	def callback(self, track_msg):
		for track in track_msg.tracks:
			if track.id not in self.recognizers:
				self.recognizers[track.id] = GestureRecognizer(track.id)

			self.recognizers[track.id].callback(track, self.imprint)

	def imprint(self, target_id):
		print('reset target', target_id)
		req = ImprintRequest()
		req.target_id = target_id

		self.imprint_service(req)


def main():
	rospy.init_node('simple_gesture_recognition')
	node = SimpleGestureRecognitionNode()
	rospy.spin()


if __name__ == '__main__':
	main()


================================================
FILE: scripts/visualization.py
================================================
#!/usr/bin/env python3
import cv2
import math
import numpy

import tf
import rospy
import cv_bridge
import message_filters
from tfpose_ros.msg import *
from sensor_msgs.msg import *
from monocular_people_tracking.msg import *
from monocular_person_following.msg import *

from tf_pose import common
from tf_pose.estimator import Human, BodyPart


class VisualizationNode:
	def __init__(self):
		self.tf_listener = tf.TransformListener()
		self.image_pub = rospy.Publisher('~visualize', Image, queue_size=1)

		color_palette = numpy.uint8([(180 / 10 * i, 255, 255) for i in range(10)]).reshape(-1, 1, 3)
		self.color_palette = cv2.cvtColor(color_palette, cv2.COLOR_HSV2BGR).reshape(-1, 3)

		self.target_id = 0
		self.state_name = "NONE"
		self.confidences = {}
		self.use_face = rospy.get_param('~use_face', True)
		self.target_sub = rospy.Subscriber('/monocular_person_following/target', Target, self.target_callback)

		self.image = numpy.zeros((128, 128, 3), dtype=numpy.uint8)
		subs = [
			message_filters.Subscriber('image_rect', Image),
			message_filters.Subscriber('/pose_estimator/pose', Persons),
			message_filters.Subscriber('/monocular_people_tracking/tracks', TrackArray)
		]

		if self.use_face:
			subs.append(message_filters.Subscriber('/face_detector/faces', FaceDetectionArray))
			self.sync = message_filters.TimeSynchronizer(subs, 50)
			self.sync.registerCallback(self.callback)
		else:
			self.sync = message_filters.TimeSynchronizer(subs, 50)
			self.sync.registerCallback(self.callback_wo_face)

	def target_callback(self, target_msg):
		self.state_name = target_msg.state.data
		self.target_id = target_msg.target_id

		for i in range(len(target_msg.track_ids)):
			track_id = target_msg.track_ids[i]
			confidence = target_msg.confidences[i]
			if track_id not in self.confidences:
				self.confidences[track_id] = {}

			self.confidences[track_id]['conf'] = confidence

			num_classifiers = len(target_msg.classifier_names)
			for j in range(num_classifiers):
				self.confidences[track_id][target_msg.classifier_names[j].data] = target_msg.classifier_confidences[num_classifiers * i + j]

	def callback_wo_face(self, image_msg, poses_msg, tracks_msg):
		self.callback(image_msg, poses_msg, tracks_msg, None)

	def callback(self, image_msg, poses_msg, tracks_msg, faces_msg):
		image = cv_bridge.CvBridge().imgmsg_to_cv2(image_msg, 'bgr8')

		humans = []
		for p_idx, person in enumerate(poses_msg.persons):
			human = Human([])
			for body_part in person.body_part:
				part = BodyPart('', body_part.part_id, body_part.x, body_part.y, body_part.confidence)
				human.body_parts[body_part.part_id] = part

			humans.append(human)

		image = self.draw_humans(image, humans, imgcopy=False)

		for track in tracks_msg.tracks:
			self.draw_expected_measurement(image, track)
			self.draw_bounding_box(image, track)

			if track.id == self.target_id:
				self.draw_target_icon(image, track)

		if faces_msg is not None:
			face_scale = image_msg.width / float(faces_msg.image_width)

			def draw_face(bb, color, line_width):
				tl = numpy.float32((bb.x, bb.y)) * face_scale
				br = numpy.float32((bb.x + bb.width, bb.y + bb.height)) * face_scale

				cv2.rectangle(image, tuple(tl.astype(numpy.int32)), tuple(br.astype(numpy.int32)), color, line_width)

			for face in faces_msg.faces:
				if not len(face.face_roi):
					continue
				draw_face(face.face_roi[0], (255, 0, 0), 1)

				if not len(face.face_region):
					continue

				face_confidence = 0.0
				if face.track_id in self.confidences:
					face_confidence = self.confidences[face.track_id]['face']
				face_confidence = (face_confidence + 1.0) / 2.0

				color = (0, int(face_confidence * 255), int((1.0 - face_confidence) * 255))
				draw_face(face.face_region[0], color, 2)

		cv2.putText(image, self.state_name, (15, 30), cv2.FONT_HERSHEY_PLAIN, 1.5, (0, 0, 0), 3)
		cv2.putText(image, self.state_name, (15, 30), cv2.FONT_HERSHEY_PLAIN, 1.5, (255, 255, 255), 1)

		self.image = image

	# taken from tfpose_ros/tf_pose/estimator.py
	def draw_humans(self, npimg, humans, imgcopy=False):
		if imgcopy:
			npimg = np.copy(npimg)

		canvas = npimg.copy()
		image_h, image_w = npimg.shape[:2]
		centers = {}
		for human in humans:
			# draw point
			for i in range(common.CocoPart.Background.value):
				if i not in human.body_parts.keys():
					continue

				body_part = human.body_parts[i]
				center = (int(body_part.x * image_w + 0.5), int(body_part.y * image_h + 0.5))
				centers[i] = center
				cv2.circle(canvas, center, 3, common.CocoColors[i], thickness=3, lineType=8, shift=0)

			# draw line
			for pair_order, pair in enumerate(common.CocoPairsRender):
				if pair[0] not in human.body_parts.keys() or pair[1] not in human.body_parts.keys():
					continue

				# npimg = cv2.line(npimg, centers[pair[0]], centers[pair[1]], common.CocoColors[pair_order], 3)
				cv2.line(canvas, centers[pair[0]], centers[pair[1]], common.CocoColors[pair_order], 3)

		npimg = npimg / 2 + canvas / 2
		return npimg

	def draw_expected_measurement(self, image, track):
		meas_mean = numpy.float32(track.expected_measurement_mean).flatten()
		meas_cov = numpy.float32(track.expected_measurement_cov).reshape(4, 4)

		def error_ellipse(cov, kai):
			w, v = numpy.linalg.eig(cov)

			extents = numpy.sqrt(kai * kai * w)
			angle = math.atan2(v[0, 1], v[1, 1])

			return (extents[0], extents[1], angle)

		neck_ellipse = error_ellipse(meas_cov[:2, :2], 3.0)
		ankle_ellipse = error_ellipse(meas_cov[2:, 2:], 3.0)
		neck_pos = tuple(meas_mean[:2].astype(numpy.int32))
		ankle_pos = tuple(meas_mean[2:].astype(numpy.int32))

		color = self.color_palette[track.id % len(self.color_palette)]
		color = tuple(int(x) for x in color)

		cv2.ellipse(image, neck_pos, neck_ellipse[:2], neck_ellipse[-1], 0, 360, color, 2)
		cv2.ellipse(image, ankle_pos, ankle_ellipse[:2], neck_ellipse[-1], 0, 360, color, 2)
		cv2.line(image, neck_pos, ankle_pos, color, 2)

	def draw_bounding_box(self, image, track):
		neck_ankle = numpy.float32(track.expected_measurement_mean).flatten()
		center = (neck_ankle[:2] + neck_ankle[2:]) / 2.0
		height = (neck_ankle[-1] - neck_ankle[1]) * 1.5
		width = height * 0.25
		half_extents = (width / 2.0, height / 2.0)

		tl = tuple(numpy.int32(center - half_extents))
		br = tuple(numpy.int32(center + half_extents))

		cv2.putText(image, "id:%d" % track.id, (tl[0] + 5, tl[1] + 15), cv2.FONT_HERSHEY_PLAIN, 1.0, (0, 0, 0), 2)
		cv2.putText(image, "id:%d" % track.id, (tl[0] + 5, tl[1] + 15), cv2.FONT_HERSHEY_PLAIN, 1.0, (255, 255, 255), 1)

		body_confidence = 0.0
		if track.id in self.confidences:
			body_confidence = self.confidences[track.id]['body']

			def order(name):
				return {'conf': 0, 'body': 1, 'face': 2}[name]
			keys = sorted(self.confidences[track.id].keys(), key=order)

			for i, key in enumerate(keys):
				conf = self.confidences[track.id][key]

				cv2.putText(image, "%s:%.2f" % (key, conf), (tl[0] + 5, br[1] - 5 - 12 * i), cv2.FONT_HERSHEY_PLAIN, 1.0, (0, 0, 0), 2)
				cv2.putText(image, "%s:%.2f" % (key, conf), (tl[0] + 5, br[1] - 5 - 12 * i), cv2.FONT_HERSHEY_PLAIN, 1.0, (255, 255, 255), 1)

		body_confidence = body_confidence + 0.5
		color = (0, int(255 * body_confidence), int(255 * (1 - body_confidence)))
		cv2.rectangle(image, tl, br, color, 2)

	def draw_target_icon(self, image, track):
		neck_ankle = numpy.float32(track.expected_measurement_mean).flatten()
		height = neck_ankle[-1] - neck_ankle[1]

		pt = (neck_ankle[:2] + (0, -height * 0.33)).astype(numpy.int32)
		pts = numpy.array([pt, pt + (15, -15), pt + (-15, -15)]).reshape(-1, 1, 2)
		cv2.polylines(image, [pts], True, (0, 0, 255), 2)

	def spin(self):
		if rospy.get_param('~show', True):
			cv2.imshow('image', self.image)
			cv2.waitKey(10)

		if self.image_pub.get_num_connections():
			img_msg = cv_bridge.CvBridge().cv2_to_imgmsg(self.image.astype(numpy.uint8), 'bgr8')
			self.image_pub.publish(img_msg)


def main():
	rospy.init_node('visualization_node')
	node = VisualizationNode()

	rate = rospy.Rate(15)
	while not rospy.is_shutdown():
		node.spin()
		rate.sleep()


if __name__ == '__main__':
	main()


================================================
FILE: src/monocular_person_following/context.cpp
================================================
﻿#include <monocular_person_following/context.hpp>

#include <cv_bridge/cv_bridge.h>
#include <ccf_person_identification/person_classifier.hpp>

namespace monocular_person_following {

Context::Context(ros::NodeHandle& nh) {
    classifier.reset(new PersonClassifier(nh));

    pos_feature_bank.resize(nh.param<int>("feature_bank_size", 32));
    neg_feature_bank.resize(nh.param<int>("feature_bank_size", 32));
}

Context::~Context() {}


void Context::extract_features(const cv::Mat& bgr_image, std::unordered_map<long, Tracklet::Ptr>& tracks) {
    for(auto& track: tracks) {
        if(!track.second->person_region) {
            continue;
        }

        if(track.second->person_region->width < 20 || track.second->person_region->height < 20) {
            continue;
        }

        track.second->input.reset(new PersonInput());
        track.second->features.reset(new PersonFeatures());

        std::unordered_map<std::string, cv::Mat> images;
        images["body"] = cv::Mat(bgr_image, *track.second->person_region);
        images["face"] = track.second->face_image;

        if(!classifier->extractInput(track.second->input, images)) {
            ROS_WARN_STREAM("failed to extract input data");
            continue;
        }

        if(!classifier->extractFeatures(track.second->features, track.second->input)) {
            ROS_WARN_STREAM("failed to extract input features");
            continue;
        }
    }
}


bool Context::update_classifier(double label, const Tracklet::Ptr& track) {
    auto pred = classifier->predict(track->features, classifier_confidences[track->track_msg->id]);
    if(pred) {
        track->confidence = *pred;
    }
    track->classifier_confidences = classifier_confidences[track->track_msg->id];

    auto& p_bank = label > 0.0 ? pos_feature_bank : neg_feature_bank;
    auto& n_bank = label > 0.0 ? neg_feature_bank : pos_feature_bank;

    if(!n_bank.empty()) {
        size_t i = std::uniform_int_distribution<>(0, n_bank.size())(mt);
        classifier->update(-label, n_bank[i]);
    }

    if(!p_bank.full()) {
        p_bank.push_back(track->features);
    } else {
        size_t i = std::uniform_int_distribution<>(0, p_bank.size())(mt);
        p_bank[i] = track->features;
    }

    return classifier->update(label, track->features);
}

boost::optional<double> Context::predict(const Tracklet::Ptr& track) {
    auto pred = classifier->predict(track->features, classifier_confidences[track->track_msg->id]);
    if(pred) {
        track->confidence = *pred;
    }
    track->classifier_confidences = classifier_confidences[track->track_msg->id];

    return pred;
}

std::vector<std::string> Context::classifier_names() const {
    return classifier->classifierNames();
}


cv::Mat Context::visualize_body_features() {
    ccf_person_classifier::BodyClassifier::Ptr body_classifier = classifier->getClassifier<ccf_person_classifier::BodyClassifier>("body");
    if(body_classifier) {
        cv::Mat feature_map = body_classifier->visualize();
        return feature_map;
    }

    return cv::Mat();
}


}


================================================
FILE: src/monocular_person_following/state/initial_state.cpp
================================================
#include <monocular_person_following/state/initial_state.hpp>

#include <monocular_person_following/state/initial_training_state.hpp>

namespace monocular_person_following {

State* InitialState::update(ros::NodeHandle& nh, Context& context, const std::unordered_map<long, Tracklet::Ptr>& tracks) {
    long target_id = select_target(nh, tracks);

    if(target_id < 0) {
        return this;
    }

    return new InitialTrainingState(target_id);
}

long InitialState::select_target(ros::NodeHandle& nh, const std::unordered_map<long, Tracklet::Ptr>& tracks) {
    long target_id = -1;
    double distance = 0.0;

    for(const auto& track: tracks) {
        const auto& pos = track.second->pos_in_baselink;
        if(pos.x() > nh.param<double>("imprinting_max_dist", 4.0) || std::abs(pos.y()) > pos.x()) {
            continue;
        }

        if(target_id == -1 || distance > pos.norm()) {
            target_id = track.first;
            distance = pos.norm();
        }
    }

    return target_id;
}

}


================================================
FILE: src/monocular_person_following/state/initial_training_state.cpp
================================================
#include <monocular_person_following/state/initial_training_state.hpp>

#include <monocular_person_following/state/initial_state.hpp>
#include <monocular_person_following/state/tracking_state.hpp>

namespace monocular_person_following {

State* InitialTrainingState::update(ros::NodeHandle& nh, Context& context, const std::unordered_map<long, Tracklet::Ptr>& tracks) {
    auto found = tracks.find(target_id);
    if(found == tracks.end()) {
        ROS_INFO_STREAM("lost target during the initial training!!");
        return new InitialState();
    }

    for(const auto& track: tracks) {
        double label = track.first == target_id ? 1.0 : -1.0;
        boost::optional<double> pred = context.predict(track.second);

        context.update_classifier(label, track.second);

        if(label > 0.0) {
            num_pos_samples ++;
        }
    }

    if(num_pos_samples >= nh.param<int>("initial_training_num_samples", 10)) {
        return new TrackingState(target_id);
    }

    return this;
}

}


================================================
FILE: src/monocular_person_following/state/reid_state.cpp
================================================
#include <monocular_person_following/state/reid_state.hpp>

#include <monocular_person_following/state/tracking_state.hpp>

namespace monocular_person_following {

State* ReidState::update(ros::NodeHandle& nh, Context& context, const std::unordered_map<long, Tracklet::Ptr>& tracks) {
    for(const auto& track: tracks) {
        boost::optional<double> pred = context.predict(track.second);

        if(!pred || pred < nh.param<double>("reid_confidence_thresh", 0.2)) {
            continue;
        }

        auto found = positive_count.find(track.first);
        if(found == positive_count.end()) {
            positive_count[track.first] = 0;
        }
        positive_count[track.first] ++;

        if(positive_count[track.first] >= nh.param<int>("reid_positive_count", 5)) {
            return new TrackingState(track.first);
        }
    }

    return this;
}

}


================================================
FILE: src/monocular_person_following/state/tracking_state.cpp
================================================
#include <monocular_person_following/state/tracking_state.hpp>

#include <monocular_person_following/state/reid_state.hpp>

namespace monocular_person_following {

State* TrackingState::update(ros::NodeHandle& nh, Context& context, const std::unordered_map<long, Tracklet::Ptr>& tracks) {
    auto found = tracks.find(target_id);
    if(found == tracks.end()) {
        ROS_INFO_STREAM("target lost!!");
        return new ReidState();
    }

    boost::optional<double> pred = context.predict(found->second);
    if(pred && *pred < nh.param<double>("id_switch_detection_thresh", -0.1)) {
        ROS_INFO_STREAM("ID switch detected!!");
        return new ReidState();
    }

    if(!pred || *pred < nh.param<double>("min_target_cofidence", 0.1)) {
        ROS_INFO_STREAM("do not update");
        return this;
    }

    for(const auto& track: tracks) {
        double label = track.first == target_id ? 1.0 : -1.0;
        context.update_classifier(label, track.second);
    }

    return this;
}


}


================================================
FILE: src/monocular_person_following_node.cpp
================================================
#include <mutex>
#include <iostream>
#include <unordered_map>

#include <Eigen/Dense>
#include <opencv2/opencv.hpp>

#include <ros/ros.h>
#include <cv_bridge/cv_bridge.h>
#include <tf/transform_listener.h>
#include <message_filters/subscriber.h>
#include <message_filters/time_synchronizer.h>
#include <image_transport/image_transport.h>

#include <std_msgs/Empty.h>
#include <std_srvs/Empty.h>
#include <std_msgs/String.h>
#include <sensor_msgs/Image.h>
#include <tfpose_ros/Persons.h>
#include <monocular_person_following/Target.h>
#include <monocular_person_following/Imprint.h>
#include <monocular_person_following/FaceDetectionArray.h>
#include <monocular_people_tracking/TrackArray.h>

#include <monocular_person_following/context.hpp>
#include <monocular_person_following/tracklet.hpp>
#include <monocular_person_following/state/state.hpp>
#include <monocular_person_following/state/initial_state.hpp>
#include <monocular_person_following/state/initial_training_state.hpp>


namespace monocular_person_following {

class MonocularPersonFollowingNode {
public:
    MonocularPersonFollowingNode()
        : nh(),
          private_nh("~"),
          target_pub(nh.advertise<Target>("/monocular_person_following/target", 1)),
          image_trans(nh),
          features_pub(image_trans.advertise("/monocular_person_following/features", 1)),
          image_sub(nh, "image", 10),
          tracks_sub(nh, "/monocular_people_tracking/tracks", 10),
          faces_sub(nh, "/face_detector/faces", 10),
          sync(image_sub, tracks_sub, 30),
          sync_w_face(image_sub, tracks_sub, faces_sub, 30),
          reset_sub(private_nh.subscribe<std_msgs::Empty>("reset", 10, &MonocularPersonFollowingNode::reset_callback, this)),
          reset_service_server(private_nh.advertiseService("reset", &MonocularPersonFollowingNode::reset_service, this)),
          imprint_service_server(private_nh.advertiseService("imprint", &MonocularPersonFollowingNode::imprint_service, this))
    {
        state.reset(new InitialState());
        context.reset(new Context(private_nh));

        if(private_nh.param<bool>("use_face", true)) {
            sync_w_face.registerCallback(boost::bind(&MonocularPersonFollowingNode::callback, this, _1, _2, _3));
        } else {
            sync.registerCallback(boost::bind(&MonocularPersonFollowingNode::callback, this, _1, _2, nullptr));
        }
    }

    void callback(const sensor_msgs::ImageConstPtr& image_msg, const monocular_people_tracking::TrackArrayConstPtr& tracks_msg, const monocular_person_following::FaceDetectionArrayConstPtr& faces_msg) {
        auto cv_image = cv_bridge::toCvCopy(image_msg, "bgr8");

        std::unordered_map<long, Tracklet::Ptr> tracks;

        std::unordered_map<long, FaceDetection const*> face_msgs;
        if(faces_msg != nullptr) {
            for(const auto& face : faces_msg->faces) {
                face_msgs[face.track_id] = &face;
            }
        }

        for(const auto& track : tracks_msg->tracks) {
            tracks[track.id].reset(new Tracklet(tf_listener, tracks_msg->header, track));

            if(track.associated_neck_ankle.empty()) {
                continue;
            }

            cv::Rect person_region = calc_person_region(track, cv_image->image.size());
            tracks[track.id]->person_region = person_region;

            auto face = face_msgs.find(track.id);
            if(face != face_msgs.end() && !face->second->face_image.empty()) {
                auto face_image = cv_bridge::toCvCopy(face->second->face_image[0], "bgr8");
                tracks[track.id]->face_image = face_image->image;
            }
        }

        std::lock_guard<std::mutex> lock(context_mutex);
        context->extract_features(cv_image->image, tracks);

        State* next_state = state->update(private_nh, *context, tracks);
        if(next_state != state.get()) {
            state.reset(next_state);
        }

        if(target_pub.getNumSubscribers()) {
            Target target;
            target.header = image_msg->header;
            target.state.data = state->state_name();
            target.target_id = state->target();

            target.track_ids.reserve(tracks_msg->tracks.size());
            target.confidences.reserve(tracks_msg->tracks.size());
            target.classifier_confidences.reserve(tracks_msg->tracks.size() * 2);

            std::vector<std::string> classifier_names = context->classifier_names();
            for(const auto& name: classifier_names) {
                std_msgs::String classifier_name;
                classifier_name.data = name;
                target.classifier_names.push_back(classifier_name);
            }

            for(const auto& track : tracks) {
                if(track.second->confidence) {
                    target.track_ids.push_back(track.first);
                    target.confidences.push_back(*track.second->confidence);

                    if(track.second->classifier_confidences.size() != target.classifier_names.size()) {
                        ROS_ERROR_STREAM("num_classifiers did not match!!");
                        ROS_ERROR_STREAM(track.second->classifier_confidences.size() << " : " << target.classifier_names.size());
                    }
                    std::copy(track.second->classifier_confidences.begin(), track.second->classifier_confidences.end(), std::back_inserter(target.classifier_confidences));
                }
            }

            target_pub.publish(target);
        }

        if(features_pub.getNumSubscribers()) {
            cv::Mat features = context->visualize_body_features();
            if(features.data) {
                cv_bridge::CvImage cv_image(image_msg->header, "bgr8", features);
                features_pub.publish(cv_image.toImageMsg());
            }
        }
    }

    bool imprint_service(ImprintRequest& req, ImprintResponse& res) {
        reset(req.target_id);
        res.success = true;

        return true;
    }

    bool reset_service(std_srvs::EmptyRequest& req, std_srvs::EmptyResponse& res) {
        reset();

        return true;
    }

    void reset_callback(const std_msgs::EmptyConstPtr& empty_msg) {
        reset();
    }

private:
    cv::Rect2f calc_person_region(const monocular_people_tracking::Track& track, const cv::Size& image_size) {
        Eigen::Vector2f neck(track.associated_neck_ankle[0].x, track.associated_neck_ankle[0].y);
        Eigen::Vector2f ankle(track.associated_neck_ankle[1].x, track.associated_neck_ankle[1].y);

        Eigen::Vector2f center = (neck + ankle) / 2.0f;
        float height = (ankle.y() - neck.y()) * 1.25f;
        float width = height * 0.25f;

        cv::Rect rect(center.x() - width / 2.0f, center.y() - height / 2.0f, width, height);

        cv::Point tl = rect.tl();
        cv::Point br = rect.br();

        tl.x = std::min(image_size.width, std::max(0, tl.x));
        tl.y = std::min(image_size.height, std::max(0, tl.y));
        br.x = std::min(image_size.width, std::max(0, br.x));
        br.y = std::min(image_size.height, std::max(0, br.y));

        return cv::Rect(tl, br);
    }

    void reset(long target_id = -1) {
        ROS_INFO_STREAM("reset identification!!");
        std::lock_guard<std::mutex> lock(context_mutex);

        if(target_id < 0) {
            state.reset(new InitialState());
        } else {
            state.reset(new InitialTrainingState(target_id));
        }
        context.reset(new Context(private_nh));
    }

private:
    ros::NodeHandle nh;
    ros::NodeHandle private_nh;

    tf::TransformListener tf_listener;

    ros::Publisher target_pub;

    image_transport::ImageTransport image_trans;
    image_transport::Publisher features_pub;

    message_filters::Subscriber<sensor_msgs::Image> image_sub;
    message_filters::Subscriber<monocular_people_tracking::TrackArray> tracks_sub;
    message_filters::Subscriber<monocular_person_following::FaceDetectionArray> faces_sub;
    message_filters::TimeSynchronizer<sensor_msgs::Image, monocular_people_tracking::TrackArray> sync;
    message_filters::TimeSynchronizer<sensor_msgs::Image, monocular_people_tracking::TrackArray, monocular_person_following::FaceDetectionArray> sync_w_face;

    // reset service callbacks
    ros::Subscriber reset_sub;
    ros::ServiceServer reset_service_server;
    ros::ServiceServer imprint_service_server;

    std::mutex context_mutex;
    std::shared_ptr<State> state;
    std::unique_ptr<Context> context;
};

}

int main(int argc, char** argv) {
    ros::init(argc, argv, "monocular_person_following");
    std::unique_ptr<monocular_person_following::MonocularPersonFollowingNode> node(new monocular_person_following::MonocularPersonFollowingNode());
    ros::spin();

    return 0;
}


================================================
FILE: srv/Imprint.srv
================================================
int64 target_id
---
bool success