Repository: chili-epfl/attention-tracker
Branch: master
Commit: 3ecf4639c731
Files: 16
Total size: 36.1 KB

Directory structure:
gitextract_zm7yli8l/

├── .gitignore
├── CMakeLists.txt
├── README.md
├── calib/
│   └── logitech-c920_640x360.ini
├── launch/
│   └── attention_tracking.launch
├── package.xml
├── samples/
│   ├── head_pose_estimation_single_frame.cpp
│   └── head_pose_estimation_test.cpp
├── share/
│   └── targets.json
└── src/
    ├── estimate_focus.cpp
    ├── head_pose_estimation.cpp
    ├── head_pose_estimation.hpp
    ├── head_pose_estimation_ros.cpp
    ├── ros_head_pose_estimator.cpp
    ├── ros_head_pose_estimator.hpp
    └── withmeness.py

================================================
FILE CONTENTS
================================================

================================================
FILE: .gitignore
================================================
build*/


================================================
FILE: CMakeLists.txt
================================================
cmake_minimum_required(VERSION 2.8.3)
project(attention_tracker)

set(CMAKE_POSITION_INDEPENDENT_CODE True)
add_definitions(-std=c++11)

set(DLIB_PATH "" CACHE PATH "Path to DLIB")
include(${DLIB_PATH}/dlib/cmake)

option(DEBUG "Enable debug visualizations" ON)
option(WITH_TESTS "Compile sample test application" ON)
option(WITH_ROS "Build ROS nodes -- Requires OpenCV2!" OFF)

include_directories(${OpenCV_INCLUDE_DIRS})

if(WITH_ROS)

    find_package(catkin REQUIRED COMPONENTS 
        roscpp 
        tf
        std_msgs
        visualization_msgs
        sensor_msgs
        cv_bridge
        image_transport
        image_geometry
        )

    include_directories(${catkin_INCLUDE_DIRS})

    catkin_package(
    CATKIN_DEPENDS 
        tf
    DEPENDS OpenCV
    LIBRARIES 
    )
endif()

if(DEBUG)
    find_package(OpenCV COMPONENTS core imgproc calib3d highgui REQUIRED)
else()
    find_package(OpenCV COMPONENTS core imgproc calib3d REQUIRED)
endif()

message(STATUS "OpenCV version: ${OpenCV_VERSION}")
if(${OpenCV_VERSION} VERSION_GREATER 2.9.0)
    set(OPENCV3 TRUE)
    add_definitions(-DOPENCV3)
endif()

if(DEBUG)
    add_definitions(-DHEAD_POSE_ESTIMATION_DEBUG)
endif()

add_library(head_pose_estimation SHARED src/head_pose_estimation.cpp)
target_link_libraries(head_pose_estimation dlib ${OpenCV_LIBRARIES})

if(WITH_ROS)

    add_executable(estimate_focus src/estimate_focus.cpp)
    target_link_libraries(estimate_focus ${catkin_LIBRARIES} ${OpenCV_LIBRARIES})

    add_executable(estimate src/head_pose_estimation_ros.cpp src/ros_head_pose_estimator.cpp)
    target_link_libraries(estimate head_pose_estimation ${catkin_LIBRARIES})

    install(TARGETS estimate_focus head_pose_estimation estimate
    ARCHIVE DESTINATION ${CATKIN_PACKAGE_LIB_DESTINATION}
    LIBRARY DESTINATION ${CATKIN_PACKAGE_LIB_DESTINATION}
    RUNTIME DESTINATION ${CATKIN_PACKAGE_BIN_DESTINATION}
    )

    install(PROGRAMS src/withmeness.py
    DESTINATION ${CATKIN_PACKAGE_BIN_DESTINATION}
    )

    install(FILES
        launch/attention_tracking.launch
        calib/logitech-c920_640x360.ini
        share/shape_predictor_68_face_landmarks.dat
        share/targets.json
        DESTINATION ${CATKIN_PACKAGE_SHARE_DESTINATION}
    )
endif()

if(WITH_TESTS)
    if(OPENCV3)
        if (DEBUG)
            find_package(OpenCV COMPONENTS core imgproc calib3d highgui imgcodecs videoio REQUIRED)
        else()
            find_package(OpenCV COMPONENTS core imgproc calib3d imgcodecs videoio REQUIRED)
        endif()
    else()
        find_package(OpenCV COMPONENTS core imgproc calib3d highgui REQUIRED)
    endif()

    add_executable(head_pose_single_frame samples/head_pose_estimation_single_frame.cpp)
    target_link_libraries(head_pose_single_frame head_pose_estimation ${OpenCV_LIBRARIES})

    if(DEBUG)
	    add_executable(head_pose_test samples/head_pose_estimation_test.cpp)
	    target_link_libraries(head_pose_test head_pose_estimation ${OpenCV_LIBRARIES})
    endif()

endif()


================================================
FILE: README.md
================================================

⚠️️ **Attention**: This library is currently not maintained. Please use [the gazr fork](https://github.com/severin-lemaignan/gazr) instead. ⚠️️


Attention Tracker
=================


![Face tracking for head pose estimation](doc/screenshot.jpg)


Head pose estimation
--------------------

This library (`libhead_pose_estimation.so`) performs 3D head pose estimation
based on the fantastic [dlib](http://dlib.net/) face detector and a bit of
[OpenCV's
solvePnP](http://docs.opencv.org/modules/calib3d/doc/camera_calibration_and_3d_reconstruction.html#solvepnp) magic (it uses [adult male anthropometric](https://github.com/chili-epfl/attention-tracker/blob/5dcef870c96892d80ca17959528efba0b2d0ce1c/src/head_pose_estimation.hpp#L12) data to match a real 3D head to the projected image).

The library returns a 4x4 transformation matrix.

It supports detection and tracking of multiple faces at the same time, and runs
on-line, but it *does not* feature face identification.

Installation
------------

**Note**: The library has only been tested on Linux. We can only provide limited
support for other operating systems!

### Pre-requisites

Dlib: You need to [download](http://dlib.net/) and extract ``Dlib`` somewhere. This
application has been tested with ``dlib-18.16``.

OpenCV: You need to install [OpenCV](http://opencv.org/). If you're using Ubuntu, you could run:

```text
sudo apt-get install libopencv-dev
```

### Installation

The library uses a standard ``CMake`` workflow:

```
$ mkdir build && cd build
$ cmake -DDLIB_PATH=<path to dlib> ..
$ make
```

Note that the first time you compile the project, ``dlib`` will compile as well.
It takes a few minutes. This won't happen the next times.

To test the library, run:

```text
./head_pose_test ../share/shape_predictor_68_face_landmarks.dat
```

You should get something very similar to the picture above.

Finally, to install the library:

```
$ make install
```

ROS support
-----------

The [ROS](http://www.ros.org/) wrapper provides a convenient node that exposes
each detected face as a TF frame.

Enable the compilation of the ROS wrapper with:

```
cmake -DWITH_ROS=ON
```


================================================
FILE: calib/logitech-c920_640x360.ini
================================================
# Camera intrinsics
# for Logitech c920 webcam

[image]

width
640

height
360

[default]

camera matrix
455.69 0.00000 313.32 
0.00000 455.10 167.62 
0.00000 0.00000 1.00 

distortion
0.16 -0.7 0.0015 -0.0167 1.07 


rectification
1.00000 0.00000 0.00000 
0.00000 1.00000 0.00000 
0.00000 0.00000 1.00000 

projection
455.69 0.00000 313.32 0.0
0.00000 455.10 167.62 0.0
0.00000 0.00000 1.00 0.0


================================================
FILE: launch/attention_tracking.launch
================================================
<launch>

  <arg name="head_tracking_frame" default="head_tracking_camera"/>

  <arg name="device" default="/dev/video0"/>
  <arg name="calibration" default="package://attention_tracker/logitech-c920_640x360.ini"/>

  <node name="webcam_source" pkg="gscam" type="gscam" output="screen">
    <param name="camera_name" value="default"/>
    <param name="camera_info_url" value="$(arg calibration)"/>
    <param name="gscam_config" value="v4l2src device=$(arg device) ! video/x-raw-yuv,width=640,height=360 ! ffmpegcolorspace"/>
    <param name="frame_id" value="$(arg head_tracking_frame)"/>
    <param name="sync_sink" value="true"/>
  </node>


  <node name="tablet_transform" pkg="tf" type="static_transform_publisher" args="0.3 0 0 0 0 0 /base_footprint /tablet 100"/>
  <node name="selection_tablet_transform" pkg="tf" type="static_transform_publisher" args="0.3 -0.3 0 0 0 0 /base_footprint /selection_tablet 100"/>
  <node name="experimenter_transform" pkg="tf" type="static_transform_publisher" args="0.2 -0.6 0.6 0 0 0 /base_footprint /experimenter 100"/>
  <node name="observers_transform" pkg="tf" type="static_transform_publisher" args="-2.7 1 0.7 0 0 0 /base_footprint /observer 100"/>
  <node name="camera_transform" pkg="tf" type="static_transform_publisher" args="0.1 0 0 1.57 0 1. /base_footprint $(arg head_tracking_frame) 100"/>

  <node name="base_footprint_transform" pkg="tf" type="static_transform_publisher" args="0 0 0 0 0 0 /map /base_footprint 100"/>
  <node name="robot_transform" pkg="tf" type="static_transform_publisher" args="0 0 0.5 0 0 0 /base_footprint /robot_head 100"/>

  <node pkg="attention_tracker" type="estimate_focus" name="estimate_focus" />

  <node pkg="attention_tracker" type="estimate" name="head_pose_estimation">
    <param name="face_model" value="$(find attention_tracker)/shape_predictor_68_face_landmarks.dat" />
    <remap from="/image" to="/camera/image_raw"/>
  </node>

  <node pkg="attention_tracker" type="withmeness.py" name="withmeness">
      <param name="targets" value="$(find attention_tracker)/targets.json"/>
  </node>

</launch>


================================================
FILE: package.xml
================================================
<?xml version="1.0"?>
<package>
  <name>attention_tracker</name>
  <version>0.1.0</version>
  <description>Assess in real time the user's attention focus to compute a level of engagement</description>

  <maintainer email="severin.lemaignan@epfl.ch">Séverin Lemaignan</maintainer>
  <license>BSD</license>
  <author>Séverin Lemaignan</author>
  <author>Alexis Jacq</author>
  <author>Fernando Garcia</author>

  <buildtool_depend>catkin</buildtool_depend>
  <build_depend>std_msgs</build_depend>
  <build_depend>tf</build_depend>
  <build_depend>visualization_msgs</build_depend>
  <build_depend>sensor_msgs</build_depend>
  <build_depend>cv_bridge</build_depend>
  <build_depend>image_transport</build_depend>
  <build_depend>image_geometry</build_depend>

  <run_depend>tf</run_depend>

</package>


================================================
FILE: samples/head_pose_estimation_single_frame.cpp
================================================
#ifdef OPENCV3
#include <opencv2/imgcodecs.hpp>
#else
#include <opencv2/highgui/highgui.hpp>
#endif

#include <iostream>

#include "../src/head_pose_estimation.hpp"

using namespace std;
using namespace cv;

const static size_t NB_TESTS = 100; // number of time the detection is run, to get better average detection duration

int main(int argc, char **argv)
{
    Mat frame;

    if(argc < 3) {
        cerr << "Usage: " << endl <<
                "head_pose model.dat frame.{jpg|png}" << endl;
#ifdef HEAD_POSE_ESTIMATION_DEBUG
        cerr <<  "Output: a new frame 'head_pose_<frame>.png'" << endl;
#endif
        return 1;
    }


    auto estimator = HeadPoseEstimation(argv[1]);

    cout << "Estimating head pose on " << argv[2] << endl;
#ifdef OPENCV3
    Mat img = imread(argv[2],IMREAD_COLOR);
#else
    Mat img = imread(argv[2],CV_LOAD_IMAGE_COLOR);
#endif

    estimator.focalLength = 500;


    cout << "Running " << NB_TESTS << " loops to get a good performance estimate..." << endl;
#ifdef HEAD_POSE_ESTIMATION_DEBUG
    cerr <<  "ATTENTION! The benchmark is compiled in DEBUG mode: the performance is no going to be good!!" << endl;
#endif
    auto t_start = getTickCount();

    auto nbfaces = 0;

    for(size_t i = 0; i < NB_TESTS; i++) {
        estimator.update(img);
    }
    auto t_detection = getTickCount();

    for(size_t i = 0; i < NB_TESTS; i++) {
        auto poses = estimator.poses();
        nbfaces += poses.size(); // this is completly artifical: the only purpose is to make sure the compiler does not optimize away estimator.poses()
    }
    auto t_end = getTickCount();

    cout << "Found " << nbfaces/NB_TESTS << " faces" << endl;

    auto poses = estimator.poses();
    for(auto pose : poses) {
    cout << "Head pose: (" << pose(0,3) << ", " << pose(1,3) << ", " << pose(2,3) << ")" << endl;

    }

    cout << "Face feature detection: " <<((t_detection-t_start) / NB_TESTS) /getTickFrequency() * 1000. << "ms;";
    cout << "Pose estimation: " <<((t_end-t_detection) / NB_TESTS) /getTickFrequency() * 1000. << "ms;";
    cout << "Total time: " << ((t_end-t_start) / NB_TESTS) / getTickFrequency() * 1000. << "ms" << endl;

#ifdef HEAD_POSE_ESTIMATION_DEBUG
    imwrite("head_pose.png", estimator._debug);
#endif

}


================================================
FILE: samples/head_pose_estimation_test.cpp
================================================
#include <opencv2/highgui/highgui.hpp>
#include <iostream>

#include "../src/head_pose_estimation.hpp"

using namespace std;
using namespace cv;

int main(int argc, char **argv)
{
    Mat frame;

    namedWindow("headpose");

    if(argc < 2) {
        cerr << "Usage: " << endl <<
                "head_pose model.dat" << endl;
        return 1;
    }


    auto estimator = HeadPoseEstimation(argv[1]);

    // Configure the video capture
    // ===========================

    VideoCapture video_in(0);

    // adjust for your webcam!
    video_in.set(CV_CAP_PROP_FRAME_WIDTH, 640);
    video_in.set(CV_CAP_PROP_FRAME_HEIGHT, 480);
    estimator.focalLength = 500;
    estimator.opticalCenterX = 320;
    estimator.opticalCenterY = 240;

    if(!video_in.isOpened()) {
        cerr << "Couldn't open camera" << endl;
        return 1;
    }


    while(true) {
        video_in >> frame;

        auto t_start = getTickCount();
        estimator.update(frame);

        for(auto pose : estimator.poses()) {

        cout << "Head pose: (" << pose(0,3) << ", " << pose(1,3) << ", " << pose(2,3) << ")" << endl;
        auto t_end = getTickCount();
        cout << "Processing time for this frame: " << (t_end-t_start) / getTickFrequency() * 1000. << "ms" << endl;

        }

#ifdef HEAD_POSE_ESTIMATION_DEBUG
        imshow("headpose", estimator._debug);
        if (waitKey(10) >= 0) break;
#endif

    }
}


================================================
FILE: share/targets.json
================================================
{"WAITING_FOR_WORD": ["_/selection_tablet","_/experimentator"],"RESPONDING_TO_NEW_WORD": ["_/robot_head","_/tablet","_/experimentator"],"WAITING_FOR_LETTER_TO_FINISH": ["_/robot_head","_/tablet","_/experimentator"],"ASKING_FOR_FEEDBACK": "_/robot_head","PUBLISHING_WORD": ["_/robot_head","_/selection_tablet","_/experimentator"],"WAITING_FOR_FEEDBACK": ["_/tablet","_/experimentator","_/selection_tablet"],"WAITING_FOR_ROBOT_TO_CONNECT": ["_/tablet","_/experimentator","_/robot_head"],"WAITING_FOR_TABLET_TO_CONNECT": ["_/tablet","_/experimentator","_/robot_head"],"RESPONDING_TO_DEMONSTRATION_FULL_WORD": ["_/robot_head","_/tablet","_/experimentator"],"RESPONDING_TO_DEMONSTRATION": ["_/robot_head","_/tablet","_/experimentator"]}


================================================
FILE: src/estimate_focus.cpp
================================================
#include <string>
#include <vector>
#include <sstream>

#include <ros/ros.h>
#include <visualization_msgs/Marker.h>
#include <sensor_msgs/Range.h>
#include <std_msgs/ColorRGBA.h>
#include <std_msgs/String.h>
#include <tf/transform_listener.h>

using namespace std;

static const string HUMAN_FRAME_PREFIX = "face_";

static const double FOV = 30. / 180 * M_PI; // radians
static const float RANGE = 3; //m

std::vector<std_msgs::ColorRGBA> colors;
static std_msgs::ColorRGBA GREEN;
static std_msgs::ColorRGBA BLUE;
static std_msgs::ColorRGBA RED;


visualization_msgs::Marker makeMarker(int id, const string& frame, std_msgs::ColorRGBA color) {

    visualization_msgs::Marker marker;
    // Set the frame ID and timestamp.  See the TF tutorials for information on these.
    marker.header.frame_id = frame;
    marker.header.stamp = ros::Time::now();

    // Set the namespace and id for this marker.  This serves to create a unique ID
    // Any marker sent with the same namespace and id will overwrite the old one
    marker.ns = "focus_of_attention";
    marker.id = id;

    marker.type = visualization_msgs::Marker::SPHERE;

    // Set the marker action.  Options are ADD, DELETE, and new in ROS Indigo: 3 (DELETEALL)
    marker.action = visualization_msgs::Marker::ADD;

    // Set the pose of the marker.  This is a full 6DOF pose relative to the frame/time specified in the header
    marker.pose.position.x = 0;
    marker.pose.position.y = 0;
    marker.pose.position.z = 0;
    marker.pose.orientation.x = 0.0;
    marker.pose.orientation.y = 0.0;
    marker.pose.orientation.z = 0.0;
    marker.pose.orientation.w = 1.0;

    marker.scale.x = 0.04;
    marker.scale.y = 0.04;
    marker.scale.z = 0.04;

    marker.color = color;

    marker.lifetime = ros::Duration(0.5);

    return marker;
}

bool isInFieldOfView(const tf::TransformListener& listener, const string& target_frame, const string& observer_frame) {

    tf::StampedTransform transform;

    try{
        listener.lookupTransform(observer_frame, target_frame, ros::Time(0), transform);
    }
    catch (tf::TransformException ex){
        ROS_ERROR("%s",ex.what());
        ros::Duration(1.0).sleep();
    }

    // object behind the observer?
    if (transform.getOrigin().x() < 0) return false;

    // the field of view's main axis is the observer's X axis. So, distance to main axis is
    // simply sqrt(y^2 + z^2)
    double distance_to_main_axis = sqrt(transform.getOrigin().y() * transform.getOrigin().y() + transform.getOrigin().z() * transform.getOrigin().z());

    double fov_radius_at_x = tan(FOV/2) * transform.getOrigin().x();

    //ROS_INFO_STREAM(target_frame << ": distance_to_main_axis: " << distance_to_main_axis << ", fov_radius_at_x: " << fov_radius_at_x);
    if (distance_to_main_axis < fov_radius_at_x) return true;

    return false;

}

int main( int argc, char** argv )
{
    GREEN.r = 0.; GREEN.g = 1.; GREEN.b = 0.; GREEN.a = 1.;
    BLUE.r = 0.; BLUE.g = 0.; BLUE.b = 1.; BLUE.a = 1.;
    RED.r = 1.; RED.g = 0.; RED.b = 0.; RED.a = 1.;
    colors.push_back(GREEN);
    colors.push_back(BLUE);
    colors.push_back(RED);

    ros::init(argc, argv, "estimate_focus");
    ros::NodeHandle n;
    ros::Rate r(30);
    ros::Publisher marker_pub = n.advertise<visualization_msgs::Marker>("estimate_focus", 1);
    ros::Publisher fov_pub = n.advertise<sensor_msgs::Range>("face_0_field_of_view", 1);
    ros::Publisher frames_in_fov_pub = n.advertise<std_msgs::String>("actual_focus_of_attention", 1);

    tf::TransformListener listener;
    vector<string> frames;

    vector<string> monitored_frames = {"/robot_head", "/tablet", "/selection_tablet", "/experimenter"};


    // Prepare a range sensor msg to represent the fields of view
    // (cone for visualization)
    sensor_msgs::Range fov;

    fov.radiation_type = sensor_msgs::Range::INFRARED;
    fov.field_of_view = FOV;
    fov.min_range = 0;
    fov.max_range = 10;
    fov.range = RANGE;

  ROS_INFO("Waiting until a face becomes visible...");
  while (!listener.waitForTransform("base_footprint", "face_0", ros::Time::now(), ros::Duration(5.0))) {
        ROS_DEBUG("Still no face visible...");
        r.sleep();
  }

  ROS_INFO("Face detected! We can start estimating the focus of attention...");

  while (ros::ok())
  {
    
    // Publish the marker
    /*while (marker_pub.getNumSubscribers() < 1 && fov_pub.getNumSubscribers() < 1)
    {
      if (!ros::ok())
      {
        return 0;
      }
      ROS_WARN_ONCE("Please create a subscriber to the marker or field of view");
      sleep(1);
    }*/

    frames.clear();
    listener.getFrameStrings(frames);
    int nb_faces = 0;

    for(auto frame : frames) {
        if(frame.find(HUMAN_FRAME_PREFIX) == 0) {

            nb_faces++;

            int face_idx = stoi(frame.substr(HUMAN_FRAME_PREFIX.length(), 1));

            if (face_idx == 0) {
                
                stringstream ss;
                for(size_t i = 0 ; i < monitored_frames.size(); ++i) {
                    if(isInFieldOfView(listener, monitored_frames[i], frame)) {
                        ROS_DEBUG_STREAM(monitored_frames[i] << " is in the field of view of " << frame);
                        marker_pub.publish(makeMarker(i, monitored_frames[i], colors[i]));
                        // create new marker with special color for the observed frame ? 
                        if (ss.str().empty()) ss << "_";
                        ss << monitored_frames[i];
                        //ss is the frame that the observer is looking
                    }
                }
                frames_in_fov_pub.publish(ss.str());

                fov.range = RANGE;
                fov.header.stamp = ros::Time::now();
                fov.header.frame_id = frame;
                fov_pub.publish(fov); 
            }
        }
    }
    if (nb_faces == 0) {

        // hide the field of view
        fov.range = 0;

        fov.header.stamp = ros::Time::now();
        fov.header.frame_id = "face_0";
        fov_pub.publish(fov);
        
        //stringstream ss;
        //ss << "test";
        //frames_in_fov_pub.publish(ss.str());

    }
    r.sleep();
  }
}


================================================
FILE: src/head_pose_estimation.cpp
================================================
#include <cmath>
#include <ctime>

#include <opencv2/calib3d/calib3d.hpp>

#ifdef HEAD_POSE_ESTIMATION_DEBUG
#include <opencv2/imgproc/imgproc.hpp>
#include <iostream>
#endif

#include "head_pose_estimation.hpp"

using namespace dlib;
using namespace std;
using namespace cv;

inline Point2f toCv(const dlib::point& p)
{
    return Point2f(p.x(), p.y());
}


HeadPoseEstimation::HeadPoseEstimation(const string& face_detection_model, float focalLength) :
        focalLength(focalLength),
        opticalCenterX(-1),
        opticalCenterY(-1)
{

        // Load face detection and pose estimation models.
        detector = get_frontal_face_detector();
        deserialize(face_detection_model) >> pose_model;

}


void HeadPoseEstimation::update(cv::InputArray _image)
{

    Mat image = _image.getMat();

    if (opticalCenterX == -1) // not initialized yet
    {
        opticalCenterX = image.cols / 2;
        opticalCenterY = image.rows / 2;
#ifdef HEAD_POSE_ESTIMATION_DEBUG
        cerr << "Setting the optical center to (" << opticalCenterX << ", " << opticalCenterY << ")" << endl;
#endif
    }

    current_image = cv_image<bgr_pixel>(image);

    faces = detector(current_image);

    // Find the pose of each face.
    shapes.clear();
    for (auto face : faces){
        shapes.push_back(pose_model(current_image, face));
    }

#ifdef HEAD_POSE_ESTIMATION_DEBUG
    // Draws the contours of the face and face features onto the image
    
    _debug = image.clone();

    auto color = Scalar(0,128,128);

    for (unsigned long i = 0; i < shapes.size(); ++i)
    {
        const full_object_detection& d = shapes[i];

        for (unsigned long i = 1; i <= 16; ++i)
            line(_debug, toCv(d.part(i)), toCv(d.part(i-1)), color, 2, CV_AA);

        for (unsigned long i = 28; i <= 30; ++i)
            line(_debug, toCv(d.part(i)), toCv(d.part(i-1)), color, 2, CV_AA);

        for (unsigned long i = 18; i <= 21; ++i)
            line(_debug, toCv(d.part(i)), toCv(d.part(i-1)), color, 2, CV_AA);
        for (unsigned long i = 23; i <= 26; ++i)
            line(_debug, toCv(d.part(i)), toCv(d.part(i-1)), color, 2, CV_AA);
        for (unsigned long i = 31; i <= 35; ++i)
            line(_debug, toCv(d.part(i)), toCv(d.part(i-1)), color, 2, CV_AA);
        line(_debug, toCv(d.part(30)), toCv(d.part(35)), color, 2, CV_AA);

        for (unsigned long i = 37; i <= 41; ++i)
            line(_debug, toCv(d.part(i)), toCv(d.part(i-1)), color, 2, CV_AA);
        line(_debug, toCv(d.part(36)), toCv(d.part(41)), color, 2, CV_AA);

        for (unsigned long i = 43; i <= 47; ++i)
            line(_debug, toCv(d.part(i)), toCv(d.part(i-1)), color, 2, CV_AA);
        line(_debug, toCv(d.part(42)), toCv(d.part(47)), color, 2, CV_AA);

        for (unsigned long i = 49; i <= 59; ++i)
            line(_debug, toCv(d.part(i)), toCv(d.part(i-1)), color, 2, CV_AA);
        line(_debug, toCv(d.part(48)), toCv(d.part(59)), color, 2, CV_AA);

        for (unsigned long i = 61; i <= 67; ++i)
            line(_debug, toCv(d.part(i)), toCv(d.part(i-1)), color, 2, CV_AA);
        line(_debug, toCv(d.part(60)), toCv(d.part(67)), color, 2, CV_AA);

        for (auto i = 0; i < 68 ; i++) {
            putText(_debug, to_string(i), toCv(d.part(i)), FONT_HERSHEY_DUPLEX, 0.6, Scalar(255,255,255));
        }
    }
#endif
}

head_pose HeadPoseEstimation::pose(size_t face_idx) const
{

    cv::Mat projectionMat = cv::Mat::zeros(3,3,CV_32F);
    cv::Matx33f projection = projectionMat;
    projection(0,0) = focalLength;
    projection(1,1) = focalLength;
    projection(0,2) = opticalCenterX;
    projection(1,2) = opticalCenterY;
    projection(2,2) = 1;

    std::vector<Point3f> head_points;

    head_points.push_back(P3D_SELLION);
    head_points.push_back(P3D_RIGHT_EYE);
    head_points.push_back(P3D_LEFT_EYE);
    head_points.push_back(P3D_RIGHT_EAR);
    head_points.push_back(P3D_LEFT_EAR);
    head_points.push_back(P3D_MENTON);
    head_points.push_back(P3D_NOSE);
    head_points.push_back(P3D_STOMMION);

    std::vector<Point2f> detected_points;

    detected_points.push_back(coordsOf(face_idx, SELLION));
    detected_points.push_back(coordsOf(face_idx, RIGHT_EYE));
    detected_points.push_back(coordsOf(face_idx, LEFT_EYE));
    detected_points.push_back(coordsOf(face_idx, RIGHT_SIDE));
    detected_points.push_back(coordsOf(face_idx, LEFT_SIDE));
    detected_points.push_back(coordsOf(face_idx, MENTON));
    detected_points.push_back(coordsOf(face_idx, NOSE));

    auto stomion = (coordsOf(face_idx, MOUTH_CENTER_TOP) + coordsOf(face_idx, MOUTH_CENTER_BOTTOM)) * 0.5;
    detected_points.push_back(stomion);

    Mat rvec, tvec;

    // Find the 3D pose of our head
    solvePnP(head_points, detected_points,
            projection, noArray(),
            rvec, tvec, false,
#ifdef OPENCV3
            cv::SOLVEPNP_ITERATIVE);
#else
            cv::ITERATIVE);
#endif

    Matx33d rotation;
    Rodrigues(rvec, rotation);


    head_pose pose = {
        rotation(0,0),    rotation(0,1),    rotation(0,2),    tvec.at<double>(0)/1000,
        rotation(1,0),    rotation(1,1),    rotation(1,2),    tvec.at<double>(1)/1000,
        rotation(2,0),    rotation(2,1),    rotation(2,2),    tvec.at<double>(2)/1000,
                    0,                0,                0,                     1
    };

#ifdef HEAD_POSE_ESTIMATION_DEBUG

    std::vector<Point2f> reprojected_points;

    projectPoints(head_points, rvec, tvec, projection, noArray(), reprojected_points);

    for (auto point : reprojected_points) {
        circle(_debug, point,2, Scalar(0,255,255),2);
    }

    std::vector<Point3f> axes;
    axes.push_back(Point3f(0,0,0));
    axes.push_back(Point3f(50,0,0));
    axes.push_back(Point3f(0,50,0));
    axes.push_back(Point3f(0,0,50));
    std::vector<Point2f> projected_axes;

    projectPoints(axes, rvec, tvec, projection, noArray(), projected_axes);

    line(_debug, projected_axes[0], projected_axes[3], Scalar(255,0,0),2,CV_AA);
    line(_debug, projected_axes[0], projected_axes[2], Scalar(0,255,0),2,CV_AA);
    line(_debug, projected_axes[0], projected_axes[1], Scalar(0,0,255),2,CV_AA);

    putText(_debug, "(" + to_string(int(pose(0,3) * 100)) + "cm, " + to_string(int(pose(1,3) * 100)) + "cm, " + to_string(int(pose(2,3) * 100)) + "cm)", coordsOf(face_idx, SELLION), FONT_HERSHEY_SIMPLEX, 0.5, Scalar(0,0,255),2);


#endif

    return pose;
}

std::vector<head_pose> HeadPoseEstimation::poses() const {

    std::vector<head_pose> res;

    for (auto i = 0; i < faces.size(); i++){
        res.push_back(pose(i));
    }

    return res;

}

Point2f HeadPoseEstimation::coordsOf(size_t face_idx, FACIAL_FEATURE feature) const
{
    return toCv(shapes[face_idx].part(feature));
}

// Finds the intersection of two lines, or returns false.
// The lines are defined by (o1, p1) and (o2, p2).
// taken from: http://stackoverflow.com/a/7448287/828379
bool HeadPoseEstimation::intersection(Point2f o1, Point2f p1, Point2f o2, Point2f p2,
                                      Point2f &r) const
{
    Point2f x = o2 - o1;
    Point2f d1 = p1 - o1;
    Point2f d2 = p2 - o2;

    float cross = d1.x*d2.y - d1.y*d2.x;
    if (abs(cross) < /*EPS*/1e-8)
        return false;

    double t1 = (x.x * d2.y - x.y * d2.x)/cross;
    r = o1 + d1 * t1;
    return true;
}


================================================
FILE: src/head_pose_estimation.hpp
================================================
#ifndef __HEAD_POSE_ESTIMATION
#define __HEAD_POSE_ESTIMATION

#include <opencv2/core/core.hpp>
#include <dlib/opencv.h>
#include <dlib/image_processing.h>
#include <dlib/image_processing/frontal_face_detector.h>

#include <vector>
#include <array>
#include <string>

// Anthropometric for male adult
// Relative position of various facial feature relative to sellion
// Values taken from https://en.wikipedia.org/wiki/Human_head
// X points forward
const static cv::Point3f P3D_SELLION(0., 0.,0.);
const static cv::Point3f P3D_RIGHT_EYE(-20., -65.5,-5.);
const static cv::Point3f P3D_LEFT_EYE(-20., 65.5,-5.);
const static cv::Point3f P3D_RIGHT_EAR(-100., -77.5,-6.);
const static cv::Point3f P3D_LEFT_EAR(-100., 77.5,-6.);
const static cv::Point3f P3D_NOSE(21.0, 0., -48.0);
const static cv::Point3f P3D_STOMMION(10.0, 0., -75.0);
const static cv::Point3f P3D_MENTON(0., 0.,-133.0);


static const int MAX_FEATURES_TO_TRACK=100;

// Interesting facial features with their landmark index
enum FACIAL_FEATURE {
    NOSE=30,
    RIGHT_EYE=36,
    LEFT_EYE=45,
    RIGHT_SIDE=0,
    LEFT_SIDE=16,
    EYEBROW_RIGHT=21,
    EYEBROW_LEFT=22,
    MOUTH_UP=51,
    MOUTH_DOWN=57,
    MOUTH_RIGHT=48,
    MOUTH_LEFT=54,
    SELLION=27,
    MOUTH_CENTER_TOP=62,
    MOUTH_CENTER_BOTTOM=66,
    MENTON=8
};


typedef cv::Matx44d head_pose;

class HeadPoseEstimation {

public:

    HeadPoseEstimation(const std::string& face_detection_model = "shape_predictor_68_face_landmarks.dat", float focalLength=455.);

    void update(cv::InputArray image);

    head_pose pose(size_t face_idx) const;

    std::vector<head_pose> poses() const;

    float focalLength;
    float opticalCenterX;
    float opticalCenterY;

#ifdef HEAD_POSE_ESTIMATION_DEBUG
    mutable cv::Mat _debug;
#endif

private:

    dlib::cv_image<dlib::bgr_pixel> current_image;

    dlib::frontal_face_detector detector;
    dlib::shape_predictor pose_model;

    std::vector<dlib::rectangle> faces;

    std::vector<dlib::full_object_detection> shapes;


    /** Return the point corresponding to the dictionary marker.
    */
    cv::Point2f coordsOf(size_t face_idx, FACIAL_FEATURE feature) const;

    /** Returns true if the lines intersect (and set r to the intersection
     *  coordinates), false otherwise.
     */
    bool intersection(cv::Point2f o1, cv::Point2f p1,
                      cv::Point2f o2, cv::Point2f p2,
                      cv::Point2f &r) const;

};

#endif // __HEAD_POSE_ESTIMATION


================================================
FILE: src/head_pose_estimation_ros.cpp
================================================
#include <string>
#include <ros/ros.h>

#include "ros_head_pose_estimator.hpp"

using namespace std;

int main(int argc, char* argv[])
{
    //ROS initialization
    ros::init(argc, argv, "ros_markers");
    ros::NodeHandle rosNode;
    ros::NodeHandle _private_node("~");

    // load parameters
    string modelFilename;
    _private_node.param<string>("face_model", modelFilename, "");

    if (modelFilename.empty()) {
        ROS_ERROR_STREAM("You must provide the face model with the parameter face_model.\n" <<
                         "For instance, _face_model:=shape_predictor_68_face_landmarks.dat");
        return(1);
    }

    // initialize the detector by subscribing to the camera video stream
    ROS_INFO_STREAM("Initializing the face detector with the model " << modelFilename <<"...");
    HeadPoseEstimator estimator(rosNode, modelFilename);
    ROS_INFO("head_pose_estimator is ready. TF frames of detected faces will be published when detected.");
    ros::spin();

    return 0;
}


================================================
FILE: src/ros_head_pose_estimator.cpp
================================================
#include "ros_head_pose_estimator.hpp"

using namespace std;
using namespace cv;

// how many second in the *future* the face transformation should be published?
// this allow to compensate for the 'slowness' of face detection, but introduce
// some lag in TF.
#define TRANSFORM_FUTURE_DATING 0

HeadPoseEstimator::HeadPoseEstimator(ros::NodeHandle& rosNode,
                                     const string& modelFilename) :
            rosNode(rosNode),
            it(rosNode),
            warnUncalibratedImage(true),
            estimator(modelFilename)

{
    sub = it.subscribeCamera("image", 1, &HeadPoseEstimator::detectFaces, this);

    nb_detected_faces_pub = rosNode.advertise<std_msgs::Char>("nb_detected_faces", 1);

#ifdef HEAD_POSE_ESTIMATION_DEBUG
    pub = it.advertise("attention_tracker/faces/image",1);
#endif
}

void HeadPoseEstimator::detectFaces(const sensor_msgs::ImageConstPtr& msg, 
                                    const sensor_msgs::CameraInfoConstPtr& camerainfo)
{
    // updating the camera model is cheap if not modified
    cameramodel.fromCameraInfo(camerainfo);
    // publishing uncalibrated images? -> return (according to CameraInfo message documentation,
    // K[0] == 0.0 <=> uncalibrated).
    if(cameramodel.intrinsicMatrix()(0,0) == 0.0) {
        if(warnUncalibratedImage) {
            warnUncalibratedImage = false;
            ROS_ERROR("Camera publishes uncalibrated images. Can not estimate face position.");
            ROS_WARN("Detection will start over again when camera info is available.");
        }
        return;
    }
    warnUncalibratedImage = true;
    
    estimator.focalLength = cameramodel.fx(); 
    estimator.opticalCenterX = cameramodel.cx();
    estimator.opticalCenterY = cameramodel.cy();

    // hopefully no copy here:
    //  - assignement operator of cv::Mat does not copy the data
    //  - toCvShare does no copy if the default (source) encoding is used.
    inputImage = cv_bridge::toCvShare(msg)->image; 

    /********************************************************************
    *                      Faces detection                           *
    ********************************************************************/

    estimator.update(inputImage);

    auto poses = estimator.poses();
    ROS_INFO_STREAM(poses.size() << " faces detected.");

    nb_detected_faces_pub.publish(poses.size());

    for(size_t face_idx = 0; face_idx < poses.size(); ++face_idx) {

        auto trans = poses[face_idx];

        tf::Transform face_pose;
        
        // Frame orientation of the camera follows the classical camera
        // convention (Z forward)

        auto z = -trans(2,3);

        if (z < 0) continue; // the head can not be *behind* the camera!

        face_pose.setOrigin( tf::Vector3( trans(0,3),
                                          trans(1,3),
                                          z) );

        tf::Quaternion qrot;
        tf::Matrix3x3 mrot(
                trans(0,0), trans(0,1), trans(0,2),
                trans(1,0), trans(1,1), trans(1,2),
                trans(2,0), trans(2,1), trans(2,2));
        mrot.getRotation(qrot);
        face_pose.setRotation(qrot);

        br.sendTransform(
                tf::StampedTransform(face_pose, 
                                     ros::Time::now() + ros::Duration(TRANSFORM_FUTURE_DATING), 
                                     cameramodel.tfFrame(),
                                     "face_" + to_string(face_idx)));

    }

#ifdef HEAD_POSE_ESTIMATION_DEBUG
    if(pub.getNumSubscribers() > 0) {
        ROS_INFO_ONCE("Starting to publish face tracking output for debug");
        auto debugmsg = cv_bridge::CvImage(msg->header, "bgr8", estimator._debug).toImageMsg();
        pub.publish(debugmsg);
    }
#endif
}


================================================
FILE: src/ros_head_pose_estimator.hpp
================================================
#include <string>
#include <set>

#include "head_pose_estimation.hpp"

// opencv2
#ifdef OPENCV3
#include <opencv2/core.hpp>
#else
#include <opencv2/core/core.hpp>
#endif

// ROS
#include <ros/ros.h>
#include <std_msgs/Char.h>
#include <tf/transform_broadcaster.h>
#include <image_transport/image_transport.h>

#include <image_geometry/pinhole_camera_model.h>

#include <cv_bridge/cv_bridge.h>


class HeadPoseEstimator
{
public:

    HeadPoseEstimator(ros::NodeHandle& rosNode,
                      const std::string& modelFilename = "");

private:

    ros::NodeHandle& rosNode;
    image_transport::ImageTransport it;
    image_transport::CameraSubscriber sub;
    image_transport::Publisher pub;

    ros::Publisher nb_detected_faces_pub;

    tf::TransformBroadcaster br;
    tf::Transform transform;

    image_geometry::PinholeCameraModel cameramodel;
    cv::Mat cameraMatrix, distCoeffs;

    cv::Mat inputImage;
    HeadPoseEstimation estimator;

    bool warnUncalibratedImage;

    void detectFaces(const sensor_msgs::ImageConstPtr& msg,
                     const sensor_msgs::CameraInfoConstPtr& camerainfo);
};


================================================
FILE: src/withmeness.py
================================================
#!/usr/bin/env python
#coding: utf-8

import sys
import time
import rospy
import json
from std_msgs.msg import String, Empty, Float64

pub_withmeness = rospy.Publisher('withmeness_topic', Float64, queue_size=1)

"""
task_targets = {
"WAITING_FOR_WORD": {"_/selection_tablet","_/experimentator"},
"RESPONDING_TO_NEW_WORD": {"_/robot_head","_/tablet","_/experimentator"},
"WAITING_FOR_LETTER_TO_FINISH": {"_/robot_head","_/tablet","_/experimentator"},
"ASKING_FOR_FEEDBACK": {"_/robot_head"},
"PUBLISHING_WORD": {"_/robot_head","_/selection_tablet","_/experimentator"},
"WAITING_FOR_FEEDBACK": {"_/tablet","_/experimentator","_/selection_tablet"},
"WAITING_FOR_ROBOT_TO_CONNECT": {"_/tablet","_/experimentator","_/robot_head"},
"WAITING_FOR_TABLET_TO_CONNECT": {"_/tablet","_/experimentator","_/robot_head"},
"RESPONDING_TO_DEMONSTRATION_FULL_WORD": {"_/robot_head","_/tablet","_/experimentator"},
"RESPONDING_TO_DEMONSTRATION": {"_/robot_head","_/tablet","_/experimentator"},
 }"""


current_task = "WAITING_FOR_WORD"
current_target = "_"
value = 0.5
mu = 0.1

def onChangeTask(msg):
    global current_task
    current_task = (str)(msg.data)

def onChangeTarget(msg):
    global current_target
    current_target = (str)(msg.data)

if __name__=='__main__':

    global withmeness_value

    rospy.init_node("withmeness")

    test = rospy.search_param("targets")
    target_list = rospy.get_param(test)
    with open(target_list) as target_list_json:
        task_targets = json.load(target_list_json)

    while(True):
        # get current task:
        rospy.Subscriber("state_activity", String, onChangeTask)

        # get current target:
        rospy.Subscriber("actual_focus_of_attention", String, onChangeTarget)

        # compute EMA of online-with_me_ness:
        if current_task in task_targets:
            if current_target in task_targets[current_task]:
                value = (1-mu)*value + mu
            else:
                if current_target!="" and current_target!="_":
                    value = (1-mu)*value

        msg = Float64()
        msg.data = value
        pub_withmeness.publish(msg)

        rospy.sleep(1.0)

    rospy.spin()