[
  {
    "path": ".gitignore",
    "content": "build*/\n"
  },
  {
    "path": "CMakeLists.txt",
    "content": "cmake_minimum_required(VERSION 2.8.3)\nproject(attention_tracker)\n\nset(CMAKE_POSITION_INDEPENDENT_CODE True)\nadd_definitions(-std=c++11)\n\nset(DLIB_PATH \"\" CACHE PATH \"Path to DLIB\")\ninclude(${DLIB_PATH}/dlib/cmake)\n\noption(DEBUG \"Enable debug visualizations\" ON)\noption(WITH_TESTS \"Compile sample test application\" ON)\noption(WITH_ROS \"Build ROS nodes -- Requires OpenCV2!\" OFF)\n\ninclude_directories(${OpenCV_INCLUDE_DIRS})\n\nif(WITH_ROS)\n\n    find_package(catkin REQUIRED COMPONENTS \n        roscpp \n        tf\n        std_msgs\n        visualization_msgs\n        sensor_msgs\n        cv_bridge\n        image_transport\n        image_geometry\n        )\n\n    include_directories(${catkin_INCLUDE_DIRS})\n\n    catkin_package(\n    CATKIN_DEPENDS \n        tf\n    DEPENDS OpenCV\n    LIBRARIES \n    )\nendif()\n\nif(DEBUG)\n    find_package(OpenCV COMPONENTS core imgproc calib3d highgui REQUIRED)\nelse()\n    find_package(OpenCV COMPONENTS core imgproc calib3d REQUIRED)\nendif()\n\nmessage(STATUS \"OpenCV version: ${OpenCV_VERSION}\")\nif(${OpenCV_VERSION} VERSION_GREATER 2.9.0)\n    set(OPENCV3 TRUE)\n    add_definitions(-DOPENCV3)\nendif()\n\nif(DEBUG)\n    add_definitions(-DHEAD_POSE_ESTIMATION_DEBUG)\nendif()\n\nadd_library(head_pose_estimation SHARED src/head_pose_estimation.cpp)\ntarget_link_libraries(head_pose_estimation dlib ${OpenCV_LIBRARIES})\n\nif(WITH_ROS)\n\n    add_executable(estimate_focus src/estimate_focus.cpp)\n    target_link_libraries(estimate_focus ${catkin_LIBRARIES} ${OpenCV_LIBRARIES})\n\n    add_executable(estimate src/head_pose_estimation_ros.cpp src/ros_head_pose_estimator.cpp)\n    target_link_libraries(estimate head_pose_estimation ${catkin_LIBRARIES})\n\n    install(TARGETS estimate_focus head_pose_estimation estimate\n    ARCHIVE DESTINATION ${CATKIN_PACKAGE_LIB_DESTINATION}\n    LIBRARY DESTINATION ${CATKIN_PACKAGE_LIB_DESTINATION}\n    RUNTIME DESTINATION ${CATKIN_PACKAGE_BIN_DESTINATION}\n    )\n\n    install(PROGRAMS src/withmeness.py\n    DESTINATION ${CATKIN_PACKAGE_BIN_DESTINATION}\n    )\n\n    install(FILES\n        launch/attention_tracking.launch\n        calib/logitech-c920_640x360.ini\n        share/shape_predictor_68_face_landmarks.dat\n        share/targets.json\n        DESTINATION ${CATKIN_PACKAGE_SHARE_DESTINATION}\n    )\nendif()\n\nif(WITH_TESTS)\n    if(OPENCV3)\n        if (DEBUG)\n            find_package(OpenCV COMPONENTS core imgproc calib3d highgui imgcodecs videoio REQUIRED)\n        else()\n            find_package(OpenCV COMPONENTS core imgproc calib3d imgcodecs videoio REQUIRED)\n        endif()\n    else()\n        find_package(OpenCV COMPONENTS core imgproc calib3d highgui REQUIRED)\n    endif()\n\n    add_executable(head_pose_single_frame samples/head_pose_estimation_single_frame.cpp)\n    target_link_libraries(head_pose_single_frame head_pose_estimation ${OpenCV_LIBRARIES})\n\n    if(DEBUG)\n\t    add_executable(head_pose_test samples/head_pose_estimation_test.cpp)\n\t    target_link_libraries(head_pose_test head_pose_estimation ${OpenCV_LIBRARIES})\n    endif()\n\nendif()\n"
  },
  {
    "path": "README.md",
    "content": "\n⚠️️ **Attention**: This library is currently not maintained. Please use [the gazr fork](https://github.com/severin-lemaignan/gazr) instead. ⚠️️\n\n\nAttention Tracker\n=================\n\n\n![Face tracking for head pose estimation](doc/screenshot.jpg)\n\n\nHead pose estimation\n--------------------\n\nThis library (`libhead_pose_estimation.so`) performs 3D head pose estimation\nbased on the fantastic [dlib](http://dlib.net/) face detector and a bit of\n[OpenCV's\nsolvePnP](http://docs.opencv.org/modules/calib3d/doc/camera_calibration_and_3d_reconstruction.html#solvepnp) magic (it uses [adult male anthropometric](https://github.com/chili-epfl/attention-tracker/blob/5dcef870c96892d80ca17959528efba0b2d0ce1c/src/head_pose_estimation.hpp#L12) data to match a real 3D head to the projected image).\n\nThe library returns a 4x4 transformation matrix.\n\nIt supports detection and tracking of multiple faces at the same time, and runs\non-line, but it *does not* feature face identification.\n\nInstallation\n------------\n\n**Note**: The library has only been tested on Linux. We can only provide limited\nsupport for other operating systems!\n\n### Pre-requisites\n\nDlib: You need to [download](http://dlib.net/) and extract ``Dlib`` somewhere. This\napplication has been tested with ``dlib-18.16``.\n\nOpenCV: You need to install [OpenCV](http://opencv.org/). If you're using Ubuntu, you could run:\n\n```text\nsudo apt-get install libopencv-dev\n```\n\n### Installation\n\nThe library uses a standard ``CMake`` workflow:\n\n```\n$ mkdir build && cd build\n$ cmake -DDLIB_PATH=<path to dlib> ..\n$ make\n```\n\nNote that the first time you compile the project, ``dlib`` will compile as well.\nIt takes a few minutes. This won't happen the next times.\n\nTo test the library, run:\n\n```text\n./head_pose_test ../share/shape_predictor_68_face_landmarks.dat\n```\n\nYou should get something very similar to the picture above.\n\nFinally, to install the library:\n\n```\n$ make install\n```\n\nROS support\n-----------\n\nThe [ROS](http://www.ros.org/) wrapper provides a convenient node that exposes\neach detected face as a TF frame.\n\nEnable the compilation of the ROS wrapper with:\n\n```\ncmake -DWITH_ROS=ON\n```\n\n"
  },
  {
    "path": "calib/logitech-c920_640x360.ini",
    "content": "# Camera intrinsics\n# for Logitech c920 webcam\n\n[image]\n\nwidth\n640\n\nheight\n360\n\n[default]\n\ncamera matrix\n455.69 0.00000 313.32 \n0.00000 455.10 167.62 \n0.00000 0.00000 1.00 \n\ndistortion\n0.16 -0.7 0.0015 -0.0167 1.07 \n\n\nrectification\n1.00000 0.00000 0.00000 \n0.00000 1.00000 0.00000 \n0.00000 0.00000 1.00000 \n\nprojection\n455.69 0.00000 313.32 0.0\n0.00000 455.10 167.62 0.0\n0.00000 0.00000 1.00 0.0\n\n"
  },
  {
    "path": "launch/attention_tracking.launch",
    "content": "<launch>\n\n  <arg name=\"head_tracking_frame\" default=\"head_tracking_camera\"/>\n\n  <arg name=\"device\" default=\"/dev/video0\"/>\n  <arg name=\"calibration\" default=\"package://attention_tracker/logitech-c920_640x360.ini\"/>\n\n  <node name=\"webcam_source\" pkg=\"gscam\" type=\"gscam\" output=\"screen\">\n    <param name=\"camera_name\" value=\"default\"/>\n    <param name=\"camera_info_url\" value=\"$(arg calibration)\"/>\n    <param name=\"gscam_config\" value=\"v4l2src device=$(arg device) ! video/x-raw-yuv,width=640,height=360 ! ffmpegcolorspace\"/>\n    <param name=\"frame_id\" value=\"$(arg head_tracking_frame)\"/>\n    <param name=\"sync_sink\" value=\"true\"/>\n  </node>\n\n\n\n  <node name=\"tablet_transform\" pkg=\"tf\" type=\"static_transform_publisher\" args=\"0.3 0 0 0 0 0 /base_footprint /tablet 100\"/>\n  <node name=\"selection_tablet_transform\" pkg=\"tf\" type=\"static_transform_publisher\" args=\"0.3 -0.3 0 0 0 0 /base_footprint /selection_tablet 100\"/>\n  <node name=\"experimenter_transform\" pkg=\"tf\" type=\"static_transform_publisher\" args=\"0.2 -0.6 0.6 0 0 0 /base_footprint /experimenter 100\"/>\n  <node name=\"observers_transform\" pkg=\"tf\" type=\"static_transform_publisher\" args=\"-2.7 1 0.7 0 0 0 /base_footprint /observer 100\"/>\n  <node name=\"camera_transform\" pkg=\"tf\" type=\"static_transform_publisher\" args=\"0.1 0 0 1.57 0 1. /base_footprint $(arg head_tracking_frame) 100\"/>\n\n  <node name=\"base_footprint_transform\" pkg=\"tf\" type=\"static_transform_publisher\" args=\"0 0 0 0 0 0 /map /base_footprint 100\"/>\n  <node name=\"robot_transform\" pkg=\"tf\" type=\"static_transform_publisher\" args=\"0 0 0.5 0 0 0 /base_footprint /robot_head 100\"/>\n\n  <node pkg=\"attention_tracker\" type=\"estimate_focus\" name=\"estimate_focus\" />\n\n  <node pkg=\"attention_tracker\" type=\"estimate\" name=\"head_pose_estimation\">\n    <param name=\"face_model\" value=\"$(find attention_tracker)/shape_predictor_68_face_landmarks.dat\" />\n    <remap from=\"/image\" to=\"/camera/image_raw\"/>\n  </node>\n\n  <node pkg=\"attention_tracker\" type=\"withmeness.py\" name=\"withmeness\">\n      <param name=\"targets\" value=\"$(find attention_tracker)/targets.json\"/>\n  </node>\n\n</launch>\n"
  },
  {
    "path": "package.xml",
    "content": "<?xml version=\"1.0\"?>\n<package>\n  <name>attention_tracker</name>\n  <version>0.1.0</version>\n  <description>Assess in real time the user's attention focus to compute a level of engagement</description>\n\n  <maintainer email=\"severin.lemaignan@epfl.ch\">Séverin Lemaignan</maintainer>\n  <license>BSD</license>\n  <author>Séverin Lemaignan</author>\n  <author>Alexis Jacq</author>\n  <author>Fernando Garcia</author>\n\n  <buildtool_depend>catkin</buildtool_depend>\n  <build_depend>std_msgs</build_depend>\n  <build_depend>tf</build_depend>\n  <build_depend>visualization_msgs</build_depend>\n  <build_depend>sensor_msgs</build_depend>\n  <build_depend>cv_bridge</build_depend>\n  <build_depend>image_transport</build_depend>\n  <build_depend>image_geometry</build_depend>\n\n  <run_depend>tf</run_depend>\n\n</package>\n"
  },
  {
    "path": "samples/head_pose_estimation_single_frame.cpp",
    "content": "#ifdef OPENCV3\n#include <opencv2/imgcodecs.hpp>\n#else\n#include <opencv2/highgui/highgui.hpp>\n#endif\n\n#include <iostream>\n\n#include \"../src/head_pose_estimation.hpp\"\n\nusing namespace std;\nusing namespace cv;\n\nconst static size_t NB_TESTS = 100; // number of time the detection is run, to get better average detection duration\n\nint main(int argc, char **argv)\n{\n    Mat frame;\n\n    if(argc < 3) {\n        cerr << \"Usage: \" << endl <<\n                \"head_pose model.dat frame.{jpg|png}\" << endl;\n#ifdef HEAD_POSE_ESTIMATION_DEBUG\n        cerr <<  \"Output: a new frame 'head_pose_<frame>.png'\" << endl;\n#endif\n        return 1;\n    }\n\n\n    auto estimator = HeadPoseEstimation(argv[1]);\n\n    cout << \"Estimating head pose on \" << argv[2] << endl;\n#ifdef OPENCV3\n    Mat img = imread(argv[2],IMREAD_COLOR);\n#else\n    Mat img = imread(argv[2],CV_LOAD_IMAGE_COLOR);\n#endif\n\n    estimator.focalLength = 500;\n\n\n    cout << \"Running \" << NB_TESTS << \" loops to get a good performance estimate...\" << endl;\n#ifdef HEAD_POSE_ESTIMATION_DEBUG\n    cerr <<  \"ATTENTION! The benchmark is compiled in DEBUG mode: the performance is no going to be good!!\" << endl;\n#endif\n    auto t_start = getTickCount();\n\n    auto nbfaces = 0;\n\n    for(size_t i = 0; i < NB_TESTS; i++) {\n        estimator.update(img);\n    }\n    auto t_detection = getTickCount();\n\n    for(size_t i = 0; i < NB_TESTS; i++) {\n        auto poses = estimator.poses();\n        nbfaces += poses.size(); // this is completly artifical: the only purpose is to make sure the compiler does not optimize away estimator.poses()\n    }\n    auto t_end = getTickCount();\n\n    cout << \"Found \" << nbfaces/NB_TESTS << \" faces\" << endl;\n\n    auto poses = estimator.poses();\n    for(auto pose : poses) {\n    cout << \"Head pose: (\" << pose(0,3) << \", \" << pose(1,3) << \", \" << pose(2,3) << \")\" << endl;\n\n    }\n\n    cout << \"Face feature detection: \" <<((t_detection-t_start) / NB_TESTS) /getTickFrequency() * 1000. << \"ms;\";\n    cout << \"Pose estimation: \" <<((t_end-t_detection) / NB_TESTS) /getTickFrequency() * 1000. << \"ms;\";\n    cout << \"Total time: \" << ((t_end-t_start) / NB_TESTS) / getTickFrequency() * 1000. << \"ms\" << endl;\n\n#ifdef HEAD_POSE_ESTIMATION_DEBUG\n    imwrite(\"head_pose.png\", estimator._debug);\n#endif\n\n}\n\n\n"
  },
  {
    "path": "samples/head_pose_estimation_test.cpp",
    "content": "#include <opencv2/highgui/highgui.hpp>\n#include <iostream>\n\n#include \"../src/head_pose_estimation.hpp\"\n\nusing namespace std;\nusing namespace cv;\n\nint main(int argc, char **argv)\n{\n    Mat frame;\n\n    namedWindow(\"headpose\");\n\n    if(argc < 2) {\n        cerr << \"Usage: \" << endl <<\n                \"head_pose model.dat\" << endl;\n        return 1;\n    }\n\n\n    auto estimator = HeadPoseEstimation(argv[1]);\n\n    // Configure the video capture\n    // ===========================\n\n    VideoCapture video_in(0);\n\n    // adjust for your webcam!\n    video_in.set(CV_CAP_PROP_FRAME_WIDTH, 640);\n    video_in.set(CV_CAP_PROP_FRAME_HEIGHT, 480);\n    estimator.focalLength = 500;\n    estimator.opticalCenterX = 320;\n    estimator.opticalCenterY = 240;\n\n    if(!video_in.isOpened()) {\n        cerr << \"Couldn't open camera\" << endl;\n        return 1;\n    }\n\n\n    while(true) {\n        video_in >> frame;\n\n        auto t_start = getTickCount();\n        estimator.update(frame);\n\n        for(auto pose : estimator.poses()) {\n\n        cout << \"Head pose: (\" << pose(0,3) << \", \" << pose(1,3) << \", \" << pose(2,3) << \")\" << endl;\n        auto t_end = getTickCount();\n        cout << \"Processing time for this frame: \" << (t_end-t_start) / getTickFrequency() * 1000. << \"ms\" << endl;\n\n        }\n\n#ifdef HEAD_POSE_ESTIMATION_DEBUG\n        imshow(\"headpose\", estimator._debug);\n        if (waitKey(10) >= 0) break;\n#endif\n\n    }\n}\n\n\n\n"
  },
  {
    "path": "share/targets.json",
    "content": "{\"WAITING_FOR_WORD\": [\"_/selection_tablet\",\"_/experimentator\"],\"RESPONDING_TO_NEW_WORD\": [\"_/robot_head\",\"_/tablet\",\"_/experimentator\"],\"WAITING_FOR_LETTER_TO_FINISH\": [\"_/robot_head\",\"_/tablet\",\"_/experimentator\"],\"ASKING_FOR_FEEDBACK\": \"_/robot_head\",\"PUBLISHING_WORD\": [\"_/robot_head\",\"_/selection_tablet\",\"_/experimentator\"],\"WAITING_FOR_FEEDBACK\": [\"_/tablet\",\"_/experimentator\",\"_/selection_tablet\"],\"WAITING_FOR_ROBOT_TO_CONNECT\": [\"_/tablet\",\"_/experimentator\",\"_/robot_head\"],\"WAITING_FOR_TABLET_TO_CONNECT\": [\"_/tablet\",\"_/experimentator\",\"_/robot_head\"],\"RESPONDING_TO_DEMONSTRATION_FULL_WORD\": [\"_/robot_head\",\"_/tablet\",\"_/experimentator\"],\"RESPONDING_TO_DEMONSTRATION\": [\"_/robot_head\",\"_/tablet\",\"_/experimentator\"]}\n"
  },
  {
    "path": "src/estimate_focus.cpp",
    "content": "#include <string>\n#include <vector>\n#include <sstream>\n\n#include <ros/ros.h>\n#include <visualization_msgs/Marker.h>\n#include <sensor_msgs/Range.h>\n#include <std_msgs/ColorRGBA.h>\n#include <std_msgs/String.h>\n#include <tf/transform_listener.h>\n\nusing namespace std;\n\nstatic const string HUMAN_FRAME_PREFIX = \"face_\";\n\nstatic const double FOV = 30. / 180 * M_PI; // radians\nstatic const float RANGE = 3; //m\n\nstd::vector<std_msgs::ColorRGBA> colors;\nstatic std_msgs::ColorRGBA GREEN;\nstatic std_msgs::ColorRGBA BLUE;\nstatic std_msgs::ColorRGBA RED;\n\n\nvisualization_msgs::Marker makeMarker(int id, const string& frame, std_msgs::ColorRGBA color) {\n\n    visualization_msgs::Marker marker;\n    // Set the frame ID and timestamp.  See the TF tutorials for information on these.\n    marker.header.frame_id = frame;\n    marker.header.stamp = ros::Time::now();\n\n    // Set the namespace and id for this marker.  This serves to create a unique ID\n    // Any marker sent with the same namespace and id will overwrite the old one\n    marker.ns = \"focus_of_attention\";\n    marker.id = id;\n\n    marker.type = visualization_msgs::Marker::SPHERE;\n\n    // Set the marker action.  Options are ADD, DELETE, and new in ROS Indigo: 3 (DELETEALL)\n    marker.action = visualization_msgs::Marker::ADD;\n\n    // Set the pose of the marker.  This is a full 6DOF pose relative to the frame/time specified in the header\n    marker.pose.position.x = 0;\n    marker.pose.position.y = 0;\n    marker.pose.position.z = 0;\n    marker.pose.orientation.x = 0.0;\n    marker.pose.orientation.y = 0.0;\n    marker.pose.orientation.z = 0.0;\n    marker.pose.orientation.w = 1.0;\n\n    marker.scale.x = 0.04;\n    marker.scale.y = 0.04;\n    marker.scale.z = 0.04;\n\n    marker.color = color;\n\n    marker.lifetime = ros::Duration(0.5);\n\n    return marker;\n}\n\nbool isInFieldOfView(const tf::TransformListener& listener, const string& target_frame, const string& observer_frame) {\n\n    tf::StampedTransform transform;\n\n    try{\n        listener.lookupTransform(observer_frame, target_frame, ros::Time(0), transform);\n    }\n    catch (tf::TransformException ex){\n        ROS_ERROR(\"%s\",ex.what());\n        ros::Duration(1.0).sleep();\n    }\n\n    // object behind the observer?\n    if (transform.getOrigin().x() < 0) return false;\n\n    // the field of view's main axis is the observer's X axis. So, distance to main axis is\n    // simply sqrt(y^2 + z^2)\n    double distance_to_main_axis = sqrt(transform.getOrigin().y() * transform.getOrigin().y() + transform.getOrigin().z() * transform.getOrigin().z());\n\n    double fov_radius_at_x = tan(FOV/2) * transform.getOrigin().x();\n\n    //ROS_INFO_STREAM(target_frame << \": distance_to_main_axis: \" << distance_to_main_axis << \", fov_radius_at_x: \" << fov_radius_at_x);\n    if (distance_to_main_axis < fov_radius_at_x) return true;\n\n    return false;\n\n}\n\nint main( int argc, char** argv )\n{\n    GREEN.r = 0.; GREEN.g = 1.; GREEN.b = 0.; GREEN.a = 1.;\n    BLUE.r = 0.; BLUE.g = 0.; BLUE.b = 1.; BLUE.a = 1.;\n    RED.r = 1.; RED.g = 0.; RED.b = 0.; RED.a = 1.;\n    colors.push_back(GREEN);\n    colors.push_back(BLUE);\n    colors.push_back(RED);\n\n    ros::init(argc, argv, \"estimate_focus\");\n    ros::NodeHandle n;\n    ros::Rate r(30);\n    ros::Publisher marker_pub = n.advertise<visualization_msgs::Marker>(\"estimate_focus\", 1);\n    ros::Publisher fov_pub = n.advertise<sensor_msgs::Range>(\"face_0_field_of_view\", 1);\n    ros::Publisher frames_in_fov_pub = n.advertise<std_msgs::String>(\"actual_focus_of_attention\", 1);\n\n    tf::TransformListener listener;\n    vector<string> frames;\n\n    vector<string> monitored_frames = {\"/robot_head\", \"/tablet\", \"/selection_tablet\", \"/experimenter\"};\n\n\n    // Prepare a range sensor msg to represent the fields of view\n    // (cone for visualization)\n    sensor_msgs::Range fov;\n\n    fov.radiation_type = sensor_msgs::Range::INFRARED;\n    fov.field_of_view = FOV;\n    fov.min_range = 0;\n    fov.max_range = 10;\n    fov.range = RANGE;\n\n  ROS_INFO(\"Waiting until a face becomes visible...\");\n  while (!listener.waitForTransform(\"base_footprint\", \"face_0\", ros::Time::now(), ros::Duration(5.0))) {\n        ROS_DEBUG(\"Still no face visible...\");\n        r.sleep();\n  }\n\n  ROS_INFO(\"Face detected! We can start estimating the focus of attention...\");\n\n  while (ros::ok())\n  {\n    \n    // Publish the marker\n    /*while (marker_pub.getNumSubscribers() < 1 && fov_pub.getNumSubscribers() < 1)\n    {\n      if (!ros::ok())\n      {\n        return 0;\n      }\n      ROS_WARN_ONCE(\"Please create a subscriber to the marker or field of view\");\n      sleep(1);\n    }*/\n\n    frames.clear();\n    listener.getFrameStrings(frames);\n    int nb_faces = 0;\n\n    for(auto frame : frames) {\n        if(frame.find(HUMAN_FRAME_PREFIX) == 0) {\n\n            nb_faces++;\n\n            int face_idx = stoi(frame.substr(HUMAN_FRAME_PREFIX.length(), 1));\n\n            if (face_idx == 0) {\n                \n                stringstream ss;\n                for(size_t i = 0 ; i < monitored_frames.size(); ++i) {\n                    if(isInFieldOfView(listener, monitored_frames[i], frame)) {\n                        ROS_DEBUG_STREAM(monitored_frames[i] << \" is in the field of view of \" << frame);\n                        marker_pub.publish(makeMarker(i, monitored_frames[i], colors[i]));\n                        // create new marker with special color for the observed frame ? \n                        if (ss.str().empty()) ss << \"_\";\n                        ss << monitored_frames[i];\n                        //ss is the frame that the observer is looking\n                    }\n                }\n                frames_in_fov_pub.publish(ss.str());\n\n                fov.range = RANGE;\n                fov.header.stamp = ros::Time::now();\n                fov.header.frame_id = frame;\n                fov_pub.publish(fov); \n            }\n        }\n    }\n    if (nb_faces == 0) {\n\n        // hide the field of view\n        fov.range = 0;\n\n        fov.header.stamp = ros::Time::now();\n        fov.header.frame_id = \"face_0\";\n        fov_pub.publish(fov);\n        \n        //stringstream ss;\n        //ss << \"test\";\n        //frames_in_fov_pub.publish(ss.str());\n\n    }\n    r.sleep();\n  }\n}\n"
  },
  {
    "path": "src/head_pose_estimation.cpp",
    "content": "#include <cmath>\n#include <ctime>\n\n#include <opencv2/calib3d/calib3d.hpp>\n\n#ifdef HEAD_POSE_ESTIMATION_DEBUG\n#include <opencv2/imgproc/imgproc.hpp>\n#include <iostream>\n#endif\n\n#include \"head_pose_estimation.hpp\"\n\nusing namespace dlib;\nusing namespace std;\nusing namespace cv;\n\ninline Point2f toCv(const dlib::point& p)\n{\n    return Point2f(p.x(), p.y());\n}\n\n\nHeadPoseEstimation::HeadPoseEstimation(const string& face_detection_model, float focalLength) :\n        focalLength(focalLength),\n        opticalCenterX(-1),\n        opticalCenterY(-1)\n{\n\n        // Load face detection and pose estimation models.\n        detector = get_frontal_face_detector();\n        deserialize(face_detection_model) >> pose_model;\n\n}\n\n\nvoid HeadPoseEstimation::update(cv::InputArray _image)\n{\n\n    Mat image = _image.getMat();\n\n    if (opticalCenterX == -1) // not initialized yet\n    {\n        opticalCenterX = image.cols / 2;\n        opticalCenterY = image.rows / 2;\n#ifdef HEAD_POSE_ESTIMATION_DEBUG\n        cerr << \"Setting the optical center to (\" << opticalCenterX << \", \" << opticalCenterY << \")\" << endl;\n#endif\n    }\n\n    current_image = cv_image<bgr_pixel>(image);\n\n    faces = detector(current_image);\n\n    // Find the pose of each face.\n    shapes.clear();\n    for (auto face : faces){\n        shapes.push_back(pose_model(current_image, face));\n    }\n\n#ifdef HEAD_POSE_ESTIMATION_DEBUG\n    // Draws the contours of the face and face features onto the image\n    \n    _debug = image.clone();\n\n    auto color = Scalar(0,128,128);\n\n    for (unsigned long i = 0; i < shapes.size(); ++i)\n    {\n        const full_object_detection& d = shapes[i];\n\n        for (unsigned long i = 1; i <= 16; ++i)\n            line(_debug, toCv(d.part(i)), toCv(d.part(i-1)), color, 2, CV_AA);\n\n        for (unsigned long i = 28; i <= 30; ++i)\n            line(_debug, toCv(d.part(i)), toCv(d.part(i-1)), color, 2, CV_AA);\n\n        for (unsigned long i = 18; i <= 21; ++i)\n            line(_debug, toCv(d.part(i)), toCv(d.part(i-1)), color, 2, CV_AA);\n        for (unsigned long i = 23; i <= 26; ++i)\n            line(_debug, toCv(d.part(i)), toCv(d.part(i-1)), color, 2, CV_AA);\n        for (unsigned long i = 31; i <= 35; ++i)\n            line(_debug, toCv(d.part(i)), toCv(d.part(i-1)), color, 2, CV_AA);\n        line(_debug, toCv(d.part(30)), toCv(d.part(35)), color, 2, CV_AA);\n\n        for (unsigned long i = 37; i <= 41; ++i)\n            line(_debug, toCv(d.part(i)), toCv(d.part(i-1)), color, 2, CV_AA);\n        line(_debug, toCv(d.part(36)), toCv(d.part(41)), color, 2, CV_AA);\n\n        for (unsigned long i = 43; i <= 47; ++i)\n            line(_debug, toCv(d.part(i)), toCv(d.part(i-1)), color, 2, CV_AA);\n        line(_debug, toCv(d.part(42)), toCv(d.part(47)), color, 2, CV_AA);\n\n        for (unsigned long i = 49; i <= 59; ++i)\n            line(_debug, toCv(d.part(i)), toCv(d.part(i-1)), color, 2, CV_AA);\n        line(_debug, toCv(d.part(48)), toCv(d.part(59)), color, 2, CV_AA);\n\n        for (unsigned long i = 61; i <= 67; ++i)\n            line(_debug, toCv(d.part(i)), toCv(d.part(i-1)), color, 2, CV_AA);\n        line(_debug, toCv(d.part(60)), toCv(d.part(67)), color, 2, CV_AA);\n\n        for (auto i = 0; i < 68 ; i++) {\n            putText(_debug, to_string(i), toCv(d.part(i)), FONT_HERSHEY_DUPLEX, 0.6, Scalar(255,255,255));\n        }\n    }\n#endif\n}\n\nhead_pose HeadPoseEstimation::pose(size_t face_idx) const\n{\n\n    cv::Mat projectionMat = cv::Mat::zeros(3,3,CV_32F);\n    cv::Matx33f projection = projectionMat;\n    projection(0,0) = focalLength;\n    projection(1,1) = focalLength;\n    projection(0,2) = opticalCenterX;\n    projection(1,2) = opticalCenterY;\n    projection(2,2) = 1;\n\n    std::vector<Point3f> head_points;\n\n    head_points.push_back(P3D_SELLION);\n    head_points.push_back(P3D_RIGHT_EYE);\n    head_points.push_back(P3D_LEFT_EYE);\n    head_points.push_back(P3D_RIGHT_EAR);\n    head_points.push_back(P3D_LEFT_EAR);\n    head_points.push_back(P3D_MENTON);\n    head_points.push_back(P3D_NOSE);\n    head_points.push_back(P3D_STOMMION);\n\n    std::vector<Point2f> detected_points;\n\n    detected_points.push_back(coordsOf(face_idx, SELLION));\n    detected_points.push_back(coordsOf(face_idx, RIGHT_EYE));\n    detected_points.push_back(coordsOf(face_idx, LEFT_EYE));\n    detected_points.push_back(coordsOf(face_idx, RIGHT_SIDE));\n    detected_points.push_back(coordsOf(face_idx, LEFT_SIDE));\n    detected_points.push_back(coordsOf(face_idx, MENTON));\n    detected_points.push_back(coordsOf(face_idx, NOSE));\n\n    auto stomion = (coordsOf(face_idx, MOUTH_CENTER_TOP) + coordsOf(face_idx, MOUTH_CENTER_BOTTOM)) * 0.5;\n    detected_points.push_back(stomion);\n\n    Mat rvec, tvec;\n\n    // Find the 3D pose of our head\n    solvePnP(head_points, detected_points,\n            projection, noArray(),\n            rvec, tvec, false,\n#ifdef OPENCV3\n            cv::SOLVEPNP_ITERATIVE);\n#else\n            cv::ITERATIVE);\n#endif\n\n    Matx33d rotation;\n    Rodrigues(rvec, rotation);\n\n\n    head_pose pose = {\n        rotation(0,0),    rotation(0,1),    rotation(0,2),    tvec.at<double>(0)/1000,\n        rotation(1,0),    rotation(1,1),    rotation(1,2),    tvec.at<double>(1)/1000,\n        rotation(2,0),    rotation(2,1),    rotation(2,2),    tvec.at<double>(2)/1000,\n                    0,                0,                0,                     1\n    };\n\n#ifdef HEAD_POSE_ESTIMATION_DEBUG\n\n    std::vector<Point2f> reprojected_points;\n\n    projectPoints(head_points, rvec, tvec, projection, noArray(), reprojected_points);\n\n    for (auto point : reprojected_points) {\n        circle(_debug, point,2, Scalar(0,255,255),2);\n    }\n\n    std::vector<Point3f> axes;\n    axes.push_back(Point3f(0,0,0));\n    axes.push_back(Point3f(50,0,0));\n    axes.push_back(Point3f(0,50,0));\n    axes.push_back(Point3f(0,0,50));\n    std::vector<Point2f> projected_axes;\n\n    projectPoints(axes, rvec, tvec, projection, noArray(), projected_axes);\n\n    line(_debug, projected_axes[0], projected_axes[3], Scalar(255,0,0),2,CV_AA);\n    line(_debug, projected_axes[0], projected_axes[2], Scalar(0,255,0),2,CV_AA);\n    line(_debug, projected_axes[0], projected_axes[1], Scalar(0,0,255),2,CV_AA);\n\n    putText(_debug, \"(\" + to_string(int(pose(0,3) * 100)) + \"cm, \" + to_string(int(pose(1,3) * 100)) + \"cm, \" + to_string(int(pose(2,3) * 100)) + \"cm)\", coordsOf(face_idx, SELLION), FONT_HERSHEY_SIMPLEX, 0.5, Scalar(0,0,255),2);\n\n\n#endif\n\n    return pose;\n}\n\nstd::vector<head_pose> HeadPoseEstimation::poses() const {\n\n    std::vector<head_pose> res;\n\n    for (auto i = 0; i < faces.size(); i++){\n        res.push_back(pose(i));\n    }\n\n    return res;\n\n}\n\nPoint2f HeadPoseEstimation::coordsOf(size_t face_idx, FACIAL_FEATURE feature) const\n{\n    return toCv(shapes[face_idx].part(feature));\n}\n\n// Finds the intersection of two lines, or returns false.\n// The lines are defined by (o1, p1) and (o2, p2).\n// taken from: http://stackoverflow.com/a/7448287/828379\nbool HeadPoseEstimation::intersection(Point2f o1, Point2f p1, Point2f o2, Point2f p2,\n                                      Point2f &r) const\n{\n    Point2f x = o2 - o1;\n    Point2f d1 = p1 - o1;\n    Point2f d2 = p2 - o2;\n\n    float cross = d1.x*d2.y - d1.y*d2.x;\n    if (abs(cross) < /*EPS*/1e-8)\n        return false;\n\n    double t1 = (x.x * d2.y - x.y * d2.x)/cross;\n    r = o1 + d1 * t1;\n    return true;\n}\n\n"
  },
  {
    "path": "src/head_pose_estimation.hpp",
    "content": "#ifndef __HEAD_POSE_ESTIMATION\n#define __HEAD_POSE_ESTIMATION\n\n#include <opencv2/core/core.hpp>\n#include <dlib/opencv.h>\n#include <dlib/image_processing.h>\n#include <dlib/image_processing/frontal_face_detector.h>\n\n#include <vector>\n#include <array>\n#include <string>\n\n// Anthropometric for male adult\n// Relative position of various facial feature relative to sellion\n// Values taken from https://en.wikipedia.org/wiki/Human_head\n// X points forward\nconst static cv::Point3f P3D_SELLION(0., 0.,0.);\nconst static cv::Point3f P3D_RIGHT_EYE(-20., -65.5,-5.);\nconst static cv::Point3f P3D_LEFT_EYE(-20., 65.5,-5.);\nconst static cv::Point3f P3D_RIGHT_EAR(-100., -77.5,-6.);\nconst static cv::Point3f P3D_LEFT_EAR(-100., 77.5,-6.);\nconst static cv::Point3f P3D_NOSE(21.0, 0., -48.0);\nconst static cv::Point3f P3D_STOMMION(10.0, 0., -75.0);\nconst static cv::Point3f P3D_MENTON(0., 0.,-133.0);\n\n\n\nstatic const int MAX_FEATURES_TO_TRACK=100;\n\n// Interesting facial features with their landmark index\nenum FACIAL_FEATURE {\n    NOSE=30,\n    RIGHT_EYE=36,\n    LEFT_EYE=45,\n    RIGHT_SIDE=0,\n    LEFT_SIDE=16,\n    EYEBROW_RIGHT=21,\n    EYEBROW_LEFT=22,\n    MOUTH_UP=51,\n    MOUTH_DOWN=57,\n    MOUTH_RIGHT=48,\n    MOUTH_LEFT=54,\n    SELLION=27,\n    MOUTH_CENTER_TOP=62,\n    MOUTH_CENTER_BOTTOM=66,\n    MENTON=8\n};\n\n\ntypedef cv::Matx44d head_pose;\n\nclass HeadPoseEstimation {\n\npublic:\n\n    HeadPoseEstimation(const std::string& face_detection_model = \"shape_predictor_68_face_landmarks.dat\", float focalLength=455.);\n\n    void update(cv::InputArray image);\n\n    head_pose pose(size_t face_idx) const;\n\n    std::vector<head_pose> poses() const;\n\n    float focalLength;\n    float opticalCenterX;\n    float opticalCenterY;\n\n#ifdef HEAD_POSE_ESTIMATION_DEBUG\n    mutable cv::Mat _debug;\n#endif\n\nprivate:\n\n    dlib::cv_image<dlib::bgr_pixel> current_image;\n\n    dlib::frontal_face_detector detector;\n    dlib::shape_predictor pose_model;\n\n    std::vector<dlib::rectangle> faces;\n\n    std::vector<dlib::full_object_detection> shapes;\n\n\n    /** Return the point corresponding to the dictionary marker.\n    */\n    cv::Point2f coordsOf(size_t face_idx, FACIAL_FEATURE feature) const;\n\n    /** Returns true if the lines intersect (and set r to the intersection\n     *  coordinates), false otherwise.\n     */\n    bool intersection(cv::Point2f o1, cv::Point2f p1,\n                      cv::Point2f o2, cv::Point2f p2,\n                      cv::Point2f &r) const;\n\n};\n\n#endif // __HEAD_POSE_ESTIMATION\n"
  },
  {
    "path": "src/head_pose_estimation_ros.cpp",
    "content": "#include <string>\n#include <ros/ros.h>\n\n#include \"ros_head_pose_estimator.hpp\"\n\nusing namespace std;\n\nint main(int argc, char* argv[])\n{\n    //ROS initialization\n    ros::init(argc, argv, \"ros_markers\");\n    ros::NodeHandle rosNode;\n    ros::NodeHandle _private_node(\"~\");\n\n    // load parameters\n    string modelFilename;\n    _private_node.param<string>(\"face_model\", modelFilename, \"\");\n\n    if (modelFilename.empty()) {\n        ROS_ERROR_STREAM(\"You must provide the face model with the parameter face_model.\\n\" <<\n                         \"For instance, _face_model:=shape_predictor_68_face_landmarks.dat\");\n        return(1);\n    }\n\n    // initialize the detector by subscribing to the camera video stream\n    ROS_INFO_STREAM(\"Initializing the face detector with the model \" << modelFilename <<\"...\");\n    HeadPoseEstimator estimator(rosNode, modelFilename);\n    ROS_INFO(\"head_pose_estimator is ready. TF frames of detected faces will be published when detected.\");\n    ros::spin();\n\n    return 0;\n}\n\n"
  },
  {
    "path": "src/ros_head_pose_estimator.cpp",
    "content": "#include \"ros_head_pose_estimator.hpp\"\n\nusing namespace std;\nusing namespace cv;\n\n// how many second in the *future* the face transformation should be published?\n// this allow to compensate for the 'slowness' of face detection, but introduce\n// some lag in TF.\n#define TRANSFORM_FUTURE_DATING 0\n\nHeadPoseEstimator::HeadPoseEstimator(ros::NodeHandle& rosNode,\n                                     const string& modelFilename) :\n            rosNode(rosNode),\n            it(rosNode),\n            warnUncalibratedImage(true),\n            estimator(modelFilename)\n\n{\n    sub = it.subscribeCamera(\"image\", 1, &HeadPoseEstimator::detectFaces, this);\n\n    nb_detected_faces_pub = rosNode.advertise<std_msgs::Char>(\"nb_detected_faces\", 1);\n\n#ifdef HEAD_POSE_ESTIMATION_DEBUG\n    pub = it.advertise(\"attention_tracker/faces/image\",1);\n#endif\n}\n\nvoid HeadPoseEstimator::detectFaces(const sensor_msgs::ImageConstPtr& msg, \n                                    const sensor_msgs::CameraInfoConstPtr& camerainfo)\n{\n    // updating the camera model is cheap if not modified\n    cameramodel.fromCameraInfo(camerainfo);\n    // publishing uncalibrated images? -> return (according to CameraInfo message documentation,\n    // K[0] == 0.0 <=> uncalibrated).\n    if(cameramodel.intrinsicMatrix()(0,0) == 0.0) {\n        if(warnUncalibratedImage) {\n            warnUncalibratedImage = false;\n            ROS_ERROR(\"Camera publishes uncalibrated images. Can not estimate face position.\");\n            ROS_WARN(\"Detection will start over again when camera info is available.\");\n        }\n        return;\n    }\n    warnUncalibratedImage = true;\n    \n    estimator.focalLength = cameramodel.fx(); \n    estimator.opticalCenterX = cameramodel.cx();\n    estimator.opticalCenterY = cameramodel.cy();\n\n    // hopefully no copy here:\n    //  - assignement operator of cv::Mat does not copy the data\n    //  - toCvShare does no copy if the default (source) encoding is used.\n    inputImage = cv_bridge::toCvShare(msg)->image; \n\n    /********************************************************************\n    *                      Faces detection                           *\n    ********************************************************************/\n\n    estimator.update(inputImage);\n\n    auto poses = estimator.poses();\n    ROS_INFO_STREAM(poses.size() << \" faces detected.\");\n\n    nb_detected_faces_pub.publish(poses.size());\n\n    for(size_t face_idx = 0; face_idx < poses.size(); ++face_idx) {\n\n        auto trans = poses[face_idx];\n\n        tf::Transform face_pose;\n        \n        // Frame orientation of the camera follows the classical camera\n        // convention (Z forward)\n\n        auto z = -trans(2,3);\n\n        if (z < 0) continue; // the head can not be *behind* the camera!\n\n        face_pose.setOrigin( tf::Vector3( trans(0,3),\n                                          trans(1,3),\n                                          z) );\n\n        tf::Quaternion qrot;\n        tf::Matrix3x3 mrot(\n                trans(0,0), trans(0,1), trans(0,2),\n                trans(1,0), trans(1,1), trans(1,2),\n                trans(2,0), trans(2,1), trans(2,2));\n        mrot.getRotation(qrot);\n        face_pose.setRotation(qrot);\n\n        br.sendTransform(\n                tf::StampedTransform(face_pose, \n                                     ros::Time::now() + ros::Duration(TRANSFORM_FUTURE_DATING), \n                                     cameramodel.tfFrame(),\n                                     \"face_\" + to_string(face_idx)));\n\n    }\n\n#ifdef HEAD_POSE_ESTIMATION_DEBUG\n    if(pub.getNumSubscribers() > 0) {\n        ROS_INFO_ONCE(\"Starting to publish face tracking output for debug\");\n        auto debugmsg = cv_bridge::CvImage(msg->header, \"bgr8\", estimator._debug).toImageMsg();\n        pub.publish(debugmsg);\n    }\n#endif\n}\n\n"
  },
  {
    "path": "src/ros_head_pose_estimator.hpp",
    "content": "#include <string>\n#include <set>\n\n#include \"head_pose_estimation.hpp\"\n\n// opencv2\n#ifdef OPENCV3\n#include <opencv2/core.hpp>\n#else\n#include <opencv2/core/core.hpp>\n#endif\n\n// ROS\n#include <ros/ros.h>\n#include <std_msgs/Char.h>\n#include <tf/transform_broadcaster.h>\n#include <image_transport/image_transport.h>\n\n#include <image_geometry/pinhole_camera_model.h>\n\n#include <cv_bridge/cv_bridge.h>\n\n\nclass HeadPoseEstimator\n{\npublic:\n\n    HeadPoseEstimator(ros::NodeHandle& rosNode,\n                      const std::string& modelFilename = \"\");\n\nprivate:\n\n    ros::NodeHandle& rosNode;\n    image_transport::ImageTransport it;\n    image_transport::CameraSubscriber sub;\n    image_transport::Publisher pub;\n\n    ros::Publisher nb_detected_faces_pub;\n\n    tf::TransformBroadcaster br;\n    tf::Transform transform;\n\n    image_geometry::PinholeCameraModel cameramodel;\n    cv::Mat cameraMatrix, distCoeffs;\n\n    cv::Mat inputImage;\n    HeadPoseEstimation estimator;\n\n    bool warnUncalibratedImage;\n\n    void detectFaces(const sensor_msgs::ImageConstPtr& msg,\n                     const sensor_msgs::CameraInfoConstPtr& camerainfo);\n};\n\n"
  },
  {
    "path": "src/withmeness.py",
    "content": "#!/usr/bin/env python\n#coding: utf-8\n\nimport sys\nimport time\nimport rospy\nimport json\nfrom std_msgs.msg import String, Empty, Float64\n\npub_withmeness = rospy.Publisher('withmeness_topic', Float64, queue_size=1)\n\n\"\"\"\ntask_targets = {\n\"WAITING_FOR_WORD\": {\"_/selection_tablet\",\"_/experimentator\"},\n\"RESPONDING_TO_NEW_WORD\": {\"_/robot_head\",\"_/tablet\",\"_/experimentator\"},\n\"WAITING_FOR_LETTER_TO_FINISH\": {\"_/robot_head\",\"_/tablet\",\"_/experimentator\"},\n\"ASKING_FOR_FEEDBACK\": {\"_/robot_head\"},\n\"PUBLISHING_WORD\": {\"_/robot_head\",\"_/selection_tablet\",\"_/experimentator\"},\n\"WAITING_FOR_FEEDBACK\": {\"_/tablet\",\"_/experimentator\",\"_/selection_tablet\"},\n\"WAITING_FOR_ROBOT_TO_CONNECT\": {\"_/tablet\",\"_/experimentator\",\"_/robot_head\"},\n\"WAITING_FOR_TABLET_TO_CONNECT\": {\"_/tablet\",\"_/experimentator\",\"_/robot_head\"},\n\"RESPONDING_TO_DEMONSTRATION_FULL_WORD\": {\"_/robot_head\",\"_/tablet\",\"_/experimentator\"},\n\"RESPONDING_TO_DEMONSTRATION\": {\"_/robot_head\",\"_/tablet\",\"_/experimentator\"},\n }\"\"\"\n\n\n\ncurrent_task = \"WAITING_FOR_WORD\"\ncurrent_target = \"_\"\nvalue = 0.5\nmu = 0.1\n\ndef onChangeTask(msg):\n    global current_task\n    current_task = (str)(msg.data)\n\ndef onChangeTarget(msg):\n    global current_target\n    current_target = (str)(msg.data)\n\nif __name__=='__main__':\n\n    global withmeness_value\n\n    rospy.init_node(\"withmeness\")\n\n    test = rospy.search_param(\"targets\")\n    target_list = rospy.get_param(test)\n    with open(target_list) as target_list_json:\n        task_targets = json.load(target_list_json)\n\n    while(True):\n        # get current task:\n        rospy.Subscriber(\"state_activity\", String, onChangeTask)\n\n        # get current target:\n        rospy.Subscriber(\"actual_focus_of_attention\", String, onChangeTarget)\n\n        # compute EMA of online-with_me_ness:\n        if current_task in task_targets:\n            if current_target in task_targets[current_task]:\n                value = (1-mu)*value + mu\n            else:\n                if current_target!=\"\" and current_target!=\"_\":\n                    value = (1-mu)*value\n\n        msg = Float64()\n        msg.data = value\n        pub_withmeness.publish(msg)\n\n        rospy.sleep(1.0)\n\n    rospy.spin()\n        \n\n\n"
  }
]