Repository: chili-epfl/attention-tracker
Branch: master
Commit: 3ecf4639c731
Files: 16
Total size: 36.1 KB
Directory structure:
gitextract_zm7yli8l/
├── .gitignore
├── CMakeLists.txt
├── README.md
├── calib/
│ └── logitech-c920_640x360.ini
├── launch/
│ └── attention_tracking.launch
├── package.xml
├── samples/
│ ├── head_pose_estimation_single_frame.cpp
│ └── head_pose_estimation_test.cpp
├── share/
│ └── targets.json
└── src/
├── estimate_focus.cpp
├── head_pose_estimation.cpp
├── head_pose_estimation.hpp
├── head_pose_estimation_ros.cpp
├── ros_head_pose_estimator.cpp
├── ros_head_pose_estimator.hpp
└── withmeness.py
================================================
FILE CONTENTS
================================================
================================================
FILE: .gitignore
================================================
build*/
================================================
FILE: CMakeLists.txt
================================================
cmake_minimum_required(VERSION 2.8.3)
project(attention_tracker)
set(CMAKE_POSITION_INDEPENDENT_CODE True)
add_definitions(-std=c++11)
set(DLIB_PATH "" CACHE PATH "Path to DLIB")
include(${DLIB_PATH}/dlib/cmake)
option(DEBUG "Enable debug visualizations" ON)
option(WITH_TESTS "Compile sample test application" ON)
option(WITH_ROS "Build ROS nodes -- Requires OpenCV2!" OFF)
include_directories(${OpenCV_INCLUDE_DIRS})
if(WITH_ROS)
find_package(catkin REQUIRED COMPONENTS
roscpp
tf
std_msgs
visualization_msgs
sensor_msgs
cv_bridge
image_transport
image_geometry
)
include_directories(${catkin_INCLUDE_DIRS})
catkin_package(
CATKIN_DEPENDS
tf
DEPENDS OpenCV
LIBRARIES
)
endif()
if(DEBUG)
find_package(OpenCV COMPONENTS core imgproc calib3d highgui REQUIRED)
else()
find_package(OpenCV COMPONENTS core imgproc calib3d REQUIRED)
endif()
message(STATUS "OpenCV version: ${OpenCV_VERSION}")
if(${OpenCV_VERSION} VERSION_GREATER 2.9.0)
set(OPENCV3 TRUE)
add_definitions(-DOPENCV3)
endif()
if(DEBUG)
add_definitions(-DHEAD_POSE_ESTIMATION_DEBUG)
endif()
add_library(head_pose_estimation SHARED src/head_pose_estimation.cpp)
target_link_libraries(head_pose_estimation dlib ${OpenCV_LIBRARIES})
if(WITH_ROS)
add_executable(estimate_focus src/estimate_focus.cpp)
target_link_libraries(estimate_focus ${catkin_LIBRARIES} ${OpenCV_LIBRARIES})
add_executable(estimate src/head_pose_estimation_ros.cpp src/ros_head_pose_estimator.cpp)
target_link_libraries(estimate head_pose_estimation ${catkin_LIBRARIES})
install(TARGETS estimate_focus head_pose_estimation estimate
ARCHIVE DESTINATION ${CATKIN_PACKAGE_LIB_DESTINATION}
LIBRARY DESTINATION ${CATKIN_PACKAGE_LIB_DESTINATION}
RUNTIME DESTINATION ${CATKIN_PACKAGE_BIN_DESTINATION}
)
install(PROGRAMS src/withmeness.py
DESTINATION ${CATKIN_PACKAGE_BIN_DESTINATION}
)
install(FILES
launch/attention_tracking.launch
calib/logitech-c920_640x360.ini
share/shape_predictor_68_face_landmarks.dat
share/targets.json
DESTINATION ${CATKIN_PACKAGE_SHARE_DESTINATION}
)
endif()
if(WITH_TESTS)
if(OPENCV3)
if (DEBUG)
find_package(OpenCV COMPONENTS core imgproc calib3d highgui imgcodecs videoio REQUIRED)
else()
find_package(OpenCV COMPONENTS core imgproc calib3d imgcodecs videoio REQUIRED)
endif()
else()
find_package(OpenCV COMPONENTS core imgproc calib3d highgui REQUIRED)
endif()
add_executable(head_pose_single_frame samples/head_pose_estimation_single_frame.cpp)
target_link_libraries(head_pose_single_frame head_pose_estimation ${OpenCV_LIBRARIES})
if(DEBUG)
add_executable(head_pose_test samples/head_pose_estimation_test.cpp)
target_link_libraries(head_pose_test head_pose_estimation ${OpenCV_LIBRARIES})
endif()
endif()
================================================
FILE: README.md
================================================
⚠️️ **Attention**: This library is currently not maintained. Please use [the gazr fork](https://github.com/severin-lemaignan/gazr) instead. ⚠️️
Attention Tracker
=================

Head pose estimation
--------------------
This library (`libhead_pose_estimation.so`) performs 3D head pose estimation
based on the fantastic [dlib](http://dlib.net/) face detector and a bit of
[OpenCV's
solvePnP](http://docs.opencv.org/modules/calib3d/doc/camera_calibration_and_3d_reconstruction.html#solvepnp) magic (it uses [adult male anthropometric](https://github.com/chili-epfl/attention-tracker/blob/5dcef870c96892d80ca17959528efba0b2d0ce1c/src/head_pose_estimation.hpp#L12) data to match a real 3D head to the projected image).
The library returns a 4x4 transformation matrix.
It supports detection and tracking of multiple faces at the same time, and runs
on-line, but it *does not* feature face identification.
Installation
------------
**Note**: The library has only been tested on Linux. We can only provide limited
support for other operating systems!
### Pre-requisites
Dlib: You need to [download](http://dlib.net/) and extract ``Dlib`` somewhere. This
application has been tested with ``dlib-18.16``.
OpenCV: You need to install [OpenCV](http://opencv.org/). If you're using Ubuntu, you could run:
```text
sudo apt-get install libopencv-dev
```
### Installation
The library uses a standard ``CMake`` workflow:
```
$ mkdir build && cd build
$ cmake -DDLIB_PATH=<path to dlib> ..
$ make
```
Note that the first time you compile the project, ``dlib`` will compile as well.
It takes a few minutes. This won't happen the next times.
To test the library, run:
```text
./head_pose_test ../share/shape_predictor_68_face_landmarks.dat
```
You should get something very similar to the picture above.
Finally, to install the library:
```
$ make install
```
ROS support
-----------
The [ROS](http://www.ros.org/) wrapper provides a convenient node that exposes
each detected face as a TF frame.
Enable the compilation of the ROS wrapper with:
```
cmake -DWITH_ROS=ON
```
================================================
FILE: calib/logitech-c920_640x360.ini
================================================
# Camera intrinsics
# for Logitech c920 webcam
[image]
width
640
height
360
[default]
camera matrix
455.69 0.00000 313.32
0.00000 455.10 167.62
0.00000 0.00000 1.00
distortion
0.16 -0.7 0.0015 -0.0167 1.07
rectification
1.00000 0.00000 0.00000
0.00000 1.00000 0.00000
0.00000 0.00000 1.00000
projection
455.69 0.00000 313.32 0.0
0.00000 455.10 167.62 0.0
0.00000 0.00000 1.00 0.0
================================================
FILE: launch/attention_tracking.launch
================================================
<launch>
<arg name="head_tracking_frame" default="head_tracking_camera"/>
<arg name="device" default="/dev/video0"/>
<arg name="calibration" default="package://attention_tracker/logitech-c920_640x360.ini"/>
<node name="webcam_source" pkg="gscam" type="gscam" output="screen">
<param name="camera_name" value="default"/>
<param name="camera_info_url" value="$(arg calibration)"/>
<param name="gscam_config" value="v4l2src device=$(arg device) ! video/x-raw-yuv,width=640,height=360 ! ffmpegcolorspace"/>
<param name="frame_id" value="$(arg head_tracking_frame)"/>
<param name="sync_sink" value="true"/>
</node>
<node name="tablet_transform" pkg="tf" type="static_transform_publisher" args="0.3 0 0 0 0 0 /base_footprint /tablet 100"/>
<node name="selection_tablet_transform" pkg="tf" type="static_transform_publisher" args="0.3 -0.3 0 0 0 0 /base_footprint /selection_tablet 100"/>
<node name="experimenter_transform" pkg="tf" type="static_transform_publisher" args="0.2 -0.6 0.6 0 0 0 /base_footprint /experimenter 100"/>
<node name="observers_transform" pkg="tf" type="static_transform_publisher" args="-2.7 1 0.7 0 0 0 /base_footprint /observer 100"/>
<node name="camera_transform" pkg="tf" type="static_transform_publisher" args="0.1 0 0 1.57 0 1. /base_footprint $(arg head_tracking_frame) 100"/>
<node name="base_footprint_transform" pkg="tf" type="static_transform_publisher" args="0 0 0 0 0 0 /map /base_footprint 100"/>
<node name="robot_transform" pkg="tf" type="static_transform_publisher" args="0 0 0.5 0 0 0 /base_footprint /robot_head 100"/>
<node pkg="attention_tracker" type="estimate_focus" name="estimate_focus" />
<node pkg="attention_tracker" type="estimate" name="head_pose_estimation">
<param name="face_model" value="$(find attention_tracker)/shape_predictor_68_face_landmarks.dat" />
<remap from="/image" to="/camera/image_raw"/>
</node>
<node pkg="attention_tracker" type="withmeness.py" name="withmeness">
<param name="targets" value="$(find attention_tracker)/targets.json"/>
</node>
</launch>
================================================
FILE: package.xml
================================================
<?xml version="1.0"?>
<package>
<name>attention_tracker</name>
<version>0.1.0</version>
<description>Assess in real time the user's attention focus to compute a level of engagement</description>
<maintainer email="severin.lemaignan@epfl.ch">Séverin Lemaignan</maintainer>
<license>BSD</license>
<author>Séverin Lemaignan</author>
<author>Alexis Jacq</author>
<author>Fernando Garcia</author>
<buildtool_depend>catkin</buildtool_depend>
<build_depend>std_msgs</build_depend>
<build_depend>tf</build_depend>
<build_depend>visualization_msgs</build_depend>
<build_depend>sensor_msgs</build_depend>
<build_depend>cv_bridge</build_depend>
<build_depend>image_transport</build_depend>
<build_depend>image_geometry</build_depend>
<run_depend>tf</run_depend>
</package>
================================================
FILE: samples/head_pose_estimation_single_frame.cpp
================================================
#ifdef OPENCV3
#include <opencv2/imgcodecs.hpp>
#else
#include <opencv2/highgui/highgui.hpp>
#endif
#include <iostream>
#include "../src/head_pose_estimation.hpp"
using namespace std;
using namespace cv;
const static size_t NB_TESTS = 100; // number of time the detection is run, to get better average detection duration
int main(int argc, char **argv)
{
Mat frame;
if(argc < 3) {
cerr << "Usage: " << endl <<
"head_pose model.dat frame.{jpg|png}" << endl;
#ifdef HEAD_POSE_ESTIMATION_DEBUG
cerr << "Output: a new frame 'head_pose_<frame>.png'" << endl;
#endif
return 1;
}
auto estimator = HeadPoseEstimation(argv[1]);
cout << "Estimating head pose on " << argv[2] << endl;
#ifdef OPENCV3
Mat img = imread(argv[2],IMREAD_COLOR);
#else
Mat img = imread(argv[2],CV_LOAD_IMAGE_COLOR);
#endif
estimator.focalLength = 500;
cout << "Running " << NB_TESTS << " loops to get a good performance estimate..." << endl;
#ifdef HEAD_POSE_ESTIMATION_DEBUG
cerr << "ATTENTION! The benchmark is compiled in DEBUG mode: the performance is no going to be good!!" << endl;
#endif
auto t_start = getTickCount();
auto nbfaces = 0;
for(size_t i = 0; i < NB_TESTS; i++) {
estimator.update(img);
}
auto t_detection = getTickCount();
for(size_t i = 0; i < NB_TESTS; i++) {
auto poses = estimator.poses();
nbfaces += poses.size(); // this is completly artifical: the only purpose is to make sure the compiler does not optimize away estimator.poses()
}
auto t_end = getTickCount();
cout << "Found " << nbfaces/NB_TESTS << " faces" << endl;
auto poses = estimator.poses();
for(auto pose : poses) {
cout << "Head pose: (" << pose(0,3) << ", " << pose(1,3) << ", " << pose(2,3) << ")" << endl;
}
cout << "Face feature detection: " <<((t_detection-t_start) / NB_TESTS) /getTickFrequency() * 1000. << "ms;";
cout << "Pose estimation: " <<((t_end-t_detection) / NB_TESTS) /getTickFrequency() * 1000. << "ms;";
cout << "Total time: " << ((t_end-t_start) / NB_TESTS) / getTickFrequency() * 1000. << "ms" << endl;
#ifdef HEAD_POSE_ESTIMATION_DEBUG
imwrite("head_pose.png", estimator._debug);
#endif
}
================================================
FILE: samples/head_pose_estimation_test.cpp
================================================
#include <opencv2/highgui/highgui.hpp>
#include <iostream>
#include "../src/head_pose_estimation.hpp"
using namespace std;
using namespace cv;
int main(int argc, char **argv)
{
Mat frame;
namedWindow("headpose");
if(argc < 2) {
cerr << "Usage: " << endl <<
"head_pose model.dat" << endl;
return 1;
}
auto estimator = HeadPoseEstimation(argv[1]);
// Configure the video capture
// ===========================
VideoCapture video_in(0);
// adjust for your webcam!
video_in.set(CV_CAP_PROP_FRAME_WIDTH, 640);
video_in.set(CV_CAP_PROP_FRAME_HEIGHT, 480);
estimator.focalLength = 500;
estimator.opticalCenterX = 320;
estimator.opticalCenterY = 240;
if(!video_in.isOpened()) {
cerr << "Couldn't open camera" << endl;
return 1;
}
while(true) {
video_in >> frame;
auto t_start = getTickCount();
estimator.update(frame);
for(auto pose : estimator.poses()) {
cout << "Head pose: (" << pose(0,3) << ", " << pose(1,3) << ", " << pose(2,3) << ")" << endl;
auto t_end = getTickCount();
cout << "Processing time for this frame: " << (t_end-t_start) / getTickFrequency() * 1000. << "ms" << endl;
}
#ifdef HEAD_POSE_ESTIMATION_DEBUG
imshow("headpose", estimator._debug);
if (waitKey(10) >= 0) break;
#endif
}
}
================================================
FILE: share/targets.json
================================================
{"WAITING_FOR_WORD": ["_/selection_tablet","_/experimentator"],"RESPONDING_TO_NEW_WORD": ["_/robot_head","_/tablet","_/experimentator"],"WAITING_FOR_LETTER_TO_FINISH": ["_/robot_head","_/tablet","_/experimentator"],"ASKING_FOR_FEEDBACK": "_/robot_head","PUBLISHING_WORD": ["_/robot_head","_/selection_tablet","_/experimentator"],"WAITING_FOR_FEEDBACK": ["_/tablet","_/experimentator","_/selection_tablet"],"WAITING_FOR_ROBOT_TO_CONNECT": ["_/tablet","_/experimentator","_/robot_head"],"WAITING_FOR_TABLET_TO_CONNECT": ["_/tablet","_/experimentator","_/robot_head"],"RESPONDING_TO_DEMONSTRATION_FULL_WORD": ["_/robot_head","_/tablet","_/experimentator"],"RESPONDING_TO_DEMONSTRATION": ["_/robot_head","_/tablet","_/experimentator"]}
================================================
FILE: src/estimate_focus.cpp
================================================
#include <string>
#include <vector>
#include <sstream>
#include <ros/ros.h>
#include <visualization_msgs/Marker.h>
#include <sensor_msgs/Range.h>
#include <std_msgs/ColorRGBA.h>
#include <std_msgs/String.h>
#include <tf/transform_listener.h>
using namespace std;
static const string HUMAN_FRAME_PREFIX = "face_";
static const double FOV = 30. / 180 * M_PI; // radians
static const float RANGE = 3; //m
std::vector<std_msgs::ColorRGBA> colors;
static std_msgs::ColorRGBA GREEN;
static std_msgs::ColorRGBA BLUE;
static std_msgs::ColorRGBA RED;
visualization_msgs::Marker makeMarker(int id, const string& frame, std_msgs::ColorRGBA color) {
visualization_msgs::Marker marker;
// Set the frame ID and timestamp. See the TF tutorials for information on these.
marker.header.frame_id = frame;
marker.header.stamp = ros::Time::now();
// Set the namespace and id for this marker. This serves to create a unique ID
// Any marker sent with the same namespace and id will overwrite the old one
marker.ns = "focus_of_attention";
marker.id = id;
marker.type = visualization_msgs::Marker::SPHERE;
// Set the marker action. Options are ADD, DELETE, and new in ROS Indigo: 3 (DELETEALL)
marker.action = visualization_msgs::Marker::ADD;
// Set the pose of the marker. This is a full 6DOF pose relative to the frame/time specified in the header
marker.pose.position.x = 0;
marker.pose.position.y = 0;
marker.pose.position.z = 0;
marker.pose.orientation.x = 0.0;
marker.pose.orientation.y = 0.0;
marker.pose.orientation.z = 0.0;
marker.pose.orientation.w = 1.0;
marker.scale.x = 0.04;
marker.scale.y = 0.04;
marker.scale.z = 0.04;
marker.color = color;
marker.lifetime = ros::Duration(0.5);
return marker;
}
bool isInFieldOfView(const tf::TransformListener& listener, const string& target_frame, const string& observer_frame) {
tf::StampedTransform transform;
try{
listener.lookupTransform(observer_frame, target_frame, ros::Time(0), transform);
}
catch (tf::TransformException ex){
ROS_ERROR("%s",ex.what());
ros::Duration(1.0).sleep();
}
// object behind the observer?
if (transform.getOrigin().x() < 0) return false;
// the field of view's main axis is the observer's X axis. So, distance to main axis is
// simply sqrt(y^2 + z^2)
double distance_to_main_axis = sqrt(transform.getOrigin().y() * transform.getOrigin().y() + transform.getOrigin().z() * transform.getOrigin().z());
double fov_radius_at_x = tan(FOV/2) * transform.getOrigin().x();
//ROS_INFO_STREAM(target_frame << ": distance_to_main_axis: " << distance_to_main_axis << ", fov_radius_at_x: " << fov_radius_at_x);
if (distance_to_main_axis < fov_radius_at_x) return true;
return false;
}
int main( int argc, char** argv )
{
GREEN.r = 0.; GREEN.g = 1.; GREEN.b = 0.; GREEN.a = 1.;
BLUE.r = 0.; BLUE.g = 0.; BLUE.b = 1.; BLUE.a = 1.;
RED.r = 1.; RED.g = 0.; RED.b = 0.; RED.a = 1.;
colors.push_back(GREEN);
colors.push_back(BLUE);
colors.push_back(RED);
ros::init(argc, argv, "estimate_focus");
ros::NodeHandle n;
ros::Rate r(30);
ros::Publisher marker_pub = n.advertise<visualization_msgs::Marker>("estimate_focus", 1);
ros::Publisher fov_pub = n.advertise<sensor_msgs::Range>("face_0_field_of_view", 1);
ros::Publisher frames_in_fov_pub = n.advertise<std_msgs::String>("actual_focus_of_attention", 1);
tf::TransformListener listener;
vector<string> frames;
vector<string> monitored_frames = {"/robot_head", "/tablet", "/selection_tablet", "/experimenter"};
// Prepare a range sensor msg to represent the fields of view
// (cone for visualization)
sensor_msgs::Range fov;
fov.radiation_type = sensor_msgs::Range::INFRARED;
fov.field_of_view = FOV;
fov.min_range = 0;
fov.max_range = 10;
fov.range = RANGE;
ROS_INFO("Waiting until a face becomes visible...");
while (!listener.waitForTransform("base_footprint", "face_0", ros::Time::now(), ros::Duration(5.0))) {
ROS_DEBUG("Still no face visible...");
r.sleep();
}
ROS_INFO("Face detected! We can start estimating the focus of attention...");
while (ros::ok())
{
// Publish the marker
/*while (marker_pub.getNumSubscribers() < 1 && fov_pub.getNumSubscribers() < 1)
{
if (!ros::ok())
{
return 0;
}
ROS_WARN_ONCE("Please create a subscriber to the marker or field of view");
sleep(1);
}*/
frames.clear();
listener.getFrameStrings(frames);
int nb_faces = 0;
for(auto frame : frames) {
if(frame.find(HUMAN_FRAME_PREFIX) == 0) {
nb_faces++;
int face_idx = stoi(frame.substr(HUMAN_FRAME_PREFIX.length(), 1));
if (face_idx == 0) {
stringstream ss;
for(size_t i = 0 ; i < monitored_frames.size(); ++i) {
if(isInFieldOfView(listener, monitored_frames[i], frame)) {
ROS_DEBUG_STREAM(monitored_frames[i] << " is in the field of view of " << frame);
marker_pub.publish(makeMarker(i, monitored_frames[i], colors[i]));
// create new marker with special color for the observed frame ?
if (ss.str().empty()) ss << "_";
ss << monitored_frames[i];
//ss is the frame that the observer is looking
}
}
frames_in_fov_pub.publish(ss.str());
fov.range = RANGE;
fov.header.stamp = ros::Time::now();
fov.header.frame_id = frame;
fov_pub.publish(fov);
}
}
}
if (nb_faces == 0) {
// hide the field of view
fov.range = 0;
fov.header.stamp = ros::Time::now();
fov.header.frame_id = "face_0";
fov_pub.publish(fov);
//stringstream ss;
//ss << "test";
//frames_in_fov_pub.publish(ss.str());
}
r.sleep();
}
}
================================================
FILE: src/head_pose_estimation.cpp
================================================
#include <cmath>
#include <ctime>
#include <opencv2/calib3d/calib3d.hpp>
#ifdef HEAD_POSE_ESTIMATION_DEBUG
#include <opencv2/imgproc/imgproc.hpp>
#include <iostream>
#endif
#include "head_pose_estimation.hpp"
using namespace dlib;
using namespace std;
using namespace cv;
inline Point2f toCv(const dlib::point& p)
{
return Point2f(p.x(), p.y());
}
HeadPoseEstimation::HeadPoseEstimation(const string& face_detection_model, float focalLength) :
focalLength(focalLength),
opticalCenterX(-1),
opticalCenterY(-1)
{
// Load face detection and pose estimation models.
detector = get_frontal_face_detector();
deserialize(face_detection_model) >> pose_model;
}
void HeadPoseEstimation::update(cv::InputArray _image)
{
Mat image = _image.getMat();
if (opticalCenterX == -1) // not initialized yet
{
opticalCenterX = image.cols / 2;
opticalCenterY = image.rows / 2;
#ifdef HEAD_POSE_ESTIMATION_DEBUG
cerr << "Setting the optical center to (" << opticalCenterX << ", " << opticalCenterY << ")" << endl;
#endif
}
current_image = cv_image<bgr_pixel>(image);
faces = detector(current_image);
// Find the pose of each face.
shapes.clear();
for (auto face : faces){
shapes.push_back(pose_model(current_image, face));
}
#ifdef HEAD_POSE_ESTIMATION_DEBUG
// Draws the contours of the face and face features onto the image
_debug = image.clone();
auto color = Scalar(0,128,128);
for (unsigned long i = 0; i < shapes.size(); ++i)
{
const full_object_detection& d = shapes[i];
for (unsigned long i = 1; i <= 16; ++i)
line(_debug, toCv(d.part(i)), toCv(d.part(i-1)), color, 2, CV_AA);
for (unsigned long i = 28; i <= 30; ++i)
line(_debug, toCv(d.part(i)), toCv(d.part(i-1)), color, 2, CV_AA);
for (unsigned long i = 18; i <= 21; ++i)
line(_debug, toCv(d.part(i)), toCv(d.part(i-1)), color, 2, CV_AA);
for (unsigned long i = 23; i <= 26; ++i)
line(_debug, toCv(d.part(i)), toCv(d.part(i-1)), color, 2, CV_AA);
for (unsigned long i = 31; i <= 35; ++i)
line(_debug, toCv(d.part(i)), toCv(d.part(i-1)), color, 2, CV_AA);
line(_debug, toCv(d.part(30)), toCv(d.part(35)), color, 2, CV_AA);
for (unsigned long i = 37; i <= 41; ++i)
line(_debug, toCv(d.part(i)), toCv(d.part(i-1)), color, 2, CV_AA);
line(_debug, toCv(d.part(36)), toCv(d.part(41)), color, 2, CV_AA);
for (unsigned long i = 43; i <= 47; ++i)
line(_debug, toCv(d.part(i)), toCv(d.part(i-1)), color, 2, CV_AA);
line(_debug, toCv(d.part(42)), toCv(d.part(47)), color, 2, CV_AA);
for (unsigned long i = 49; i <= 59; ++i)
line(_debug, toCv(d.part(i)), toCv(d.part(i-1)), color, 2, CV_AA);
line(_debug, toCv(d.part(48)), toCv(d.part(59)), color, 2, CV_AA);
for (unsigned long i = 61; i <= 67; ++i)
line(_debug, toCv(d.part(i)), toCv(d.part(i-1)), color, 2, CV_AA);
line(_debug, toCv(d.part(60)), toCv(d.part(67)), color, 2, CV_AA);
for (auto i = 0; i < 68 ; i++) {
putText(_debug, to_string(i), toCv(d.part(i)), FONT_HERSHEY_DUPLEX, 0.6, Scalar(255,255,255));
}
}
#endif
}
head_pose HeadPoseEstimation::pose(size_t face_idx) const
{
cv::Mat projectionMat = cv::Mat::zeros(3,3,CV_32F);
cv::Matx33f projection = projectionMat;
projection(0,0) = focalLength;
projection(1,1) = focalLength;
projection(0,2) = opticalCenterX;
projection(1,2) = opticalCenterY;
projection(2,2) = 1;
std::vector<Point3f> head_points;
head_points.push_back(P3D_SELLION);
head_points.push_back(P3D_RIGHT_EYE);
head_points.push_back(P3D_LEFT_EYE);
head_points.push_back(P3D_RIGHT_EAR);
head_points.push_back(P3D_LEFT_EAR);
head_points.push_back(P3D_MENTON);
head_points.push_back(P3D_NOSE);
head_points.push_back(P3D_STOMMION);
std::vector<Point2f> detected_points;
detected_points.push_back(coordsOf(face_idx, SELLION));
detected_points.push_back(coordsOf(face_idx, RIGHT_EYE));
detected_points.push_back(coordsOf(face_idx, LEFT_EYE));
detected_points.push_back(coordsOf(face_idx, RIGHT_SIDE));
detected_points.push_back(coordsOf(face_idx, LEFT_SIDE));
detected_points.push_back(coordsOf(face_idx, MENTON));
detected_points.push_back(coordsOf(face_idx, NOSE));
auto stomion = (coordsOf(face_idx, MOUTH_CENTER_TOP) + coordsOf(face_idx, MOUTH_CENTER_BOTTOM)) * 0.5;
detected_points.push_back(stomion);
Mat rvec, tvec;
// Find the 3D pose of our head
solvePnP(head_points, detected_points,
projection, noArray(),
rvec, tvec, false,
#ifdef OPENCV3
cv::SOLVEPNP_ITERATIVE);
#else
cv::ITERATIVE);
#endif
Matx33d rotation;
Rodrigues(rvec, rotation);
head_pose pose = {
rotation(0,0), rotation(0,1), rotation(0,2), tvec.at<double>(0)/1000,
rotation(1,0), rotation(1,1), rotation(1,2), tvec.at<double>(1)/1000,
rotation(2,0), rotation(2,1), rotation(2,2), tvec.at<double>(2)/1000,
0, 0, 0, 1
};
#ifdef HEAD_POSE_ESTIMATION_DEBUG
std::vector<Point2f> reprojected_points;
projectPoints(head_points, rvec, tvec, projection, noArray(), reprojected_points);
for (auto point : reprojected_points) {
circle(_debug, point,2, Scalar(0,255,255),2);
}
std::vector<Point3f> axes;
axes.push_back(Point3f(0,0,0));
axes.push_back(Point3f(50,0,0));
axes.push_back(Point3f(0,50,0));
axes.push_back(Point3f(0,0,50));
std::vector<Point2f> projected_axes;
projectPoints(axes, rvec, tvec, projection, noArray(), projected_axes);
line(_debug, projected_axes[0], projected_axes[3], Scalar(255,0,0),2,CV_AA);
line(_debug, projected_axes[0], projected_axes[2], Scalar(0,255,0),2,CV_AA);
line(_debug, projected_axes[0], projected_axes[1], Scalar(0,0,255),2,CV_AA);
putText(_debug, "(" + to_string(int(pose(0,3) * 100)) + "cm, " + to_string(int(pose(1,3) * 100)) + "cm, " + to_string(int(pose(2,3) * 100)) + "cm)", coordsOf(face_idx, SELLION), FONT_HERSHEY_SIMPLEX, 0.5, Scalar(0,0,255),2);
#endif
return pose;
}
std::vector<head_pose> HeadPoseEstimation::poses() const {
std::vector<head_pose> res;
for (auto i = 0; i < faces.size(); i++){
res.push_back(pose(i));
}
return res;
}
Point2f HeadPoseEstimation::coordsOf(size_t face_idx, FACIAL_FEATURE feature) const
{
return toCv(shapes[face_idx].part(feature));
}
// Finds the intersection of two lines, or returns false.
// The lines are defined by (o1, p1) and (o2, p2).
// taken from: http://stackoverflow.com/a/7448287/828379
bool HeadPoseEstimation::intersection(Point2f o1, Point2f p1, Point2f o2, Point2f p2,
Point2f &r) const
{
Point2f x = o2 - o1;
Point2f d1 = p1 - o1;
Point2f d2 = p2 - o2;
float cross = d1.x*d2.y - d1.y*d2.x;
if (abs(cross) < /*EPS*/1e-8)
return false;
double t1 = (x.x * d2.y - x.y * d2.x)/cross;
r = o1 + d1 * t1;
return true;
}
================================================
FILE: src/head_pose_estimation.hpp
================================================
#ifndef __HEAD_POSE_ESTIMATION
#define __HEAD_POSE_ESTIMATION
#include <opencv2/core/core.hpp>
#include <dlib/opencv.h>
#include <dlib/image_processing.h>
#include <dlib/image_processing/frontal_face_detector.h>
#include <vector>
#include <array>
#include <string>
// Anthropometric for male adult
// Relative position of various facial feature relative to sellion
// Values taken from https://en.wikipedia.org/wiki/Human_head
// X points forward
const static cv::Point3f P3D_SELLION(0., 0.,0.);
const static cv::Point3f P3D_RIGHT_EYE(-20., -65.5,-5.);
const static cv::Point3f P3D_LEFT_EYE(-20., 65.5,-5.);
const static cv::Point3f P3D_RIGHT_EAR(-100., -77.5,-6.);
const static cv::Point3f P3D_LEFT_EAR(-100., 77.5,-6.);
const static cv::Point3f P3D_NOSE(21.0, 0., -48.0);
const static cv::Point3f P3D_STOMMION(10.0, 0., -75.0);
const static cv::Point3f P3D_MENTON(0., 0.,-133.0);
static const int MAX_FEATURES_TO_TRACK=100;
// Interesting facial features with their landmark index
enum FACIAL_FEATURE {
NOSE=30,
RIGHT_EYE=36,
LEFT_EYE=45,
RIGHT_SIDE=0,
LEFT_SIDE=16,
EYEBROW_RIGHT=21,
EYEBROW_LEFT=22,
MOUTH_UP=51,
MOUTH_DOWN=57,
MOUTH_RIGHT=48,
MOUTH_LEFT=54,
SELLION=27,
MOUTH_CENTER_TOP=62,
MOUTH_CENTER_BOTTOM=66,
MENTON=8
};
typedef cv::Matx44d head_pose;
class HeadPoseEstimation {
public:
HeadPoseEstimation(const std::string& face_detection_model = "shape_predictor_68_face_landmarks.dat", float focalLength=455.);
void update(cv::InputArray image);
head_pose pose(size_t face_idx) const;
std::vector<head_pose> poses() const;
float focalLength;
float opticalCenterX;
float opticalCenterY;
#ifdef HEAD_POSE_ESTIMATION_DEBUG
mutable cv::Mat _debug;
#endif
private:
dlib::cv_image<dlib::bgr_pixel> current_image;
dlib::frontal_face_detector detector;
dlib::shape_predictor pose_model;
std::vector<dlib::rectangle> faces;
std::vector<dlib::full_object_detection> shapes;
/** Return the point corresponding to the dictionary marker.
*/
cv::Point2f coordsOf(size_t face_idx, FACIAL_FEATURE feature) const;
/** Returns true if the lines intersect (and set r to the intersection
* coordinates), false otherwise.
*/
bool intersection(cv::Point2f o1, cv::Point2f p1,
cv::Point2f o2, cv::Point2f p2,
cv::Point2f &r) const;
};
#endif // __HEAD_POSE_ESTIMATION
================================================
FILE: src/head_pose_estimation_ros.cpp
================================================
#include <string>
#include <ros/ros.h>
#include "ros_head_pose_estimator.hpp"
using namespace std;
int main(int argc, char* argv[])
{
//ROS initialization
ros::init(argc, argv, "ros_markers");
ros::NodeHandle rosNode;
ros::NodeHandle _private_node("~");
// load parameters
string modelFilename;
_private_node.param<string>("face_model", modelFilename, "");
if (modelFilename.empty()) {
ROS_ERROR_STREAM("You must provide the face model with the parameter face_model.\n" <<
"For instance, _face_model:=shape_predictor_68_face_landmarks.dat");
return(1);
}
// initialize the detector by subscribing to the camera video stream
ROS_INFO_STREAM("Initializing the face detector with the model " << modelFilename <<"...");
HeadPoseEstimator estimator(rosNode, modelFilename);
ROS_INFO("head_pose_estimator is ready. TF frames of detected faces will be published when detected.");
ros::spin();
return 0;
}
================================================
FILE: src/ros_head_pose_estimator.cpp
================================================
#include "ros_head_pose_estimator.hpp"
using namespace std;
using namespace cv;
// how many second in the *future* the face transformation should be published?
// this allow to compensate for the 'slowness' of face detection, but introduce
// some lag in TF.
#define TRANSFORM_FUTURE_DATING 0
HeadPoseEstimator::HeadPoseEstimator(ros::NodeHandle& rosNode,
const string& modelFilename) :
rosNode(rosNode),
it(rosNode),
warnUncalibratedImage(true),
estimator(modelFilename)
{
sub = it.subscribeCamera("image", 1, &HeadPoseEstimator::detectFaces, this);
nb_detected_faces_pub = rosNode.advertise<std_msgs::Char>("nb_detected_faces", 1);
#ifdef HEAD_POSE_ESTIMATION_DEBUG
pub = it.advertise("attention_tracker/faces/image",1);
#endif
}
void HeadPoseEstimator::detectFaces(const sensor_msgs::ImageConstPtr& msg,
const sensor_msgs::CameraInfoConstPtr& camerainfo)
{
// updating the camera model is cheap if not modified
cameramodel.fromCameraInfo(camerainfo);
// publishing uncalibrated images? -> return (according to CameraInfo message documentation,
// K[0] == 0.0 <=> uncalibrated).
if(cameramodel.intrinsicMatrix()(0,0) == 0.0) {
if(warnUncalibratedImage) {
warnUncalibratedImage = false;
ROS_ERROR("Camera publishes uncalibrated images. Can not estimate face position.");
ROS_WARN("Detection will start over again when camera info is available.");
}
return;
}
warnUncalibratedImage = true;
estimator.focalLength = cameramodel.fx();
estimator.opticalCenterX = cameramodel.cx();
estimator.opticalCenterY = cameramodel.cy();
// hopefully no copy here:
// - assignement operator of cv::Mat does not copy the data
// - toCvShare does no copy if the default (source) encoding is used.
inputImage = cv_bridge::toCvShare(msg)->image;
/********************************************************************
* Faces detection *
********************************************************************/
estimator.update(inputImage);
auto poses = estimator.poses();
ROS_INFO_STREAM(poses.size() << " faces detected.");
nb_detected_faces_pub.publish(poses.size());
for(size_t face_idx = 0; face_idx < poses.size(); ++face_idx) {
auto trans = poses[face_idx];
tf::Transform face_pose;
// Frame orientation of the camera follows the classical camera
// convention (Z forward)
auto z = -trans(2,3);
if (z < 0) continue; // the head can not be *behind* the camera!
face_pose.setOrigin( tf::Vector3( trans(0,3),
trans(1,3),
z) );
tf::Quaternion qrot;
tf::Matrix3x3 mrot(
trans(0,0), trans(0,1), trans(0,2),
trans(1,0), trans(1,1), trans(1,2),
trans(2,0), trans(2,1), trans(2,2));
mrot.getRotation(qrot);
face_pose.setRotation(qrot);
br.sendTransform(
tf::StampedTransform(face_pose,
ros::Time::now() + ros::Duration(TRANSFORM_FUTURE_DATING),
cameramodel.tfFrame(),
"face_" + to_string(face_idx)));
}
#ifdef HEAD_POSE_ESTIMATION_DEBUG
if(pub.getNumSubscribers() > 0) {
ROS_INFO_ONCE("Starting to publish face tracking output for debug");
auto debugmsg = cv_bridge::CvImage(msg->header, "bgr8", estimator._debug).toImageMsg();
pub.publish(debugmsg);
}
#endif
}
================================================
FILE: src/ros_head_pose_estimator.hpp
================================================
#include <string>
#include <set>
#include "head_pose_estimation.hpp"
// opencv2
#ifdef OPENCV3
#include <opencv2/core.hpp>
#else
#include <opencv2/core/core.hpp>
#endif
// ROS
#include <ros/ros.h>
#include <std_msgs/Char.h>
#include <tf/transform_broadcaster.h>
#include <image_transport/image_transport.h>
#include <image_geometry/pinhole_camera_model.h>
#include <cv_bridge/cv_bridge.h>
class HeadPoseEstimator
{
public:
HeadPoseEstimator(ros::NodeHandle& rosNode,
const std::string& modelFilename = "");
private:
ros::NodeHandle& rosNode;
image_transport::ImageTransport it;
image_transport::CameraSubscriber sub;
image_transport::Publisher pub;
ros::Publisher nb_detected_faces_pub;
tf::TransformBroadcaster br;
tf::Transform transform;
image_geometry::PinholeCameraModel cameramodel;
cv::Mat cameraMatrix, distCoeffs;
cv::Mat inputImage;
HeadPoseEstimation estimator;
bool warnUncalibratedImage;
void detectFaces(const sensor_msgs::ImageConstPtr& msg,
const sensor_msgs::CameraInfoConstPtr& camerainfo);
};
================================================
FILE: src/withmeness.py
================================================
#!/usr/bin/env python
#coding: utf-8
import sys
import time
import rospy
import json
from std_msgs.msg import String, Empty, Float64
pub_withmeness = rospy.Publisher('withmeness_topic', Float64, queue_size=1)
"""
task_targets = {
"WAITING_FOR_WORD": {"_/selection_tablet","_/experimentator"},
"RESPONDING_TO_NEW_WORD": {"_/robot_head","_/tablet","_/experimentator"},
"WAITING_FOR_LETTER_TO_FINISH": {"_/robot_head","_/tablet","_/experimentator"},
"ASKING_FOR_FEEDBACK": {"_/robot_head"},
"PUBLISHING_WORD": {"_/robot_head","_/selection_tablet","_/experimentator"},
"WAITING_FOR_FEEDBACK": {"_/tablet","_/experimentator","_/selection_tablet"},
"WAITING_FOR_ROBOT_TO_CONNECT": {"_/tablet","_/experimentator","_/robot_head"},
"WAITING_FOR_TABLET_TO_CONNECT": {"_/tablet","_/experimentator","_/robot_head"},
"RESPONDING_TO_DEMONSTRATION_FULL_WORD": {"_/robot_head","_/tablet","_/experimentator"},
"RESPONDING_TO_DEMONSTRATION": {"_/robot_head","_/tablet","_/experimentator"},
}"""
current_task = "WAITING_FOR_WORD"
current_target = "_"
value = 0.5
mu = 0.1
def onChangeTask(msg):
global current_task
current_task = (str)(msg.data)
def onChangeTarget(msg):
global current_target
current_target = (str)(msg.data)
if __name__=='__main__':
global withmeness_value
rospy.init_node("withmeness")
test = rospy.search_param("targets")
target_list = rospy.get_param(test)
with open(target_list) as target_list_json:
task_targets = json.load(target_list_json)
while(True):
# get current task:
rospy.Subscriber("state_activity", String, onChangeTask)
# get current target:
rospy.Subscriber("actual_focus_of_attention", String, onChangeTarget)
# compute EMA of online-with_me_ness:
if current_task in task_targets:
if current_target in task_targets[current_task]:
value = (1-mu)*value + mu
else:
if current_target!="" and current_target!="_":
value = (1-mu)*value
msg = Float64()
msg.data = value
pub_withmeness.publish(msg)
rospy.sleep(1.0)
rospy.spin()
gitextract_zm7yli8l/
├── .gitignore
├── CMakeLists.txt
├── README.md
├── calib/
│ └── logitech-c920_640x360.ini
├── launch/
│ └── attention_tracking.launch
├── package.xml
├── samples/
│ ├── head_pose_estimation_single_frame.cpp
│ └── head_pose_estimation_test.cpp
├── share/
│ └── targets.json
└── src/
├── estimate_focus.cpp
├── head_pose_estimation.cpp
├── head_pose_estimation.hpp
├── head_pose_estimation_ros.cpp
├── ros_head_pose_estimator.cpp
├── ros_head_pose_estimator.hpp
└── withmeness.py
SYMBOL INDEX (14 symbols across 8 files)
FILE: samples/head_pose_estimation_single_frame.cpp
function main (line 16) | int main(int argc, char **argv)
FILE: samples/head_pose_estimation_test.cpp
function main (line 9) | int main(int argc, char **argv)
FILE: src/estimate_focus.cpp
function makeMarker (line 25) | visualization_msgs::Marker makeMarker(int id, const string& frame, std_m...
function isInFieldOfView (line 62) | bool isInFieldOfView(const tf::TransformListener& listener, const string...
function main (line 90) | int main( int argc, char** argv )
FILE: src/head_pose_estimation.cpp
function Point2f (line 17) | inline Point2f toCv(const dlib::point& p)
function head_pose (line 108) | head_pose HeadPoseEstimation::pose(size_t face_idx) const
function Point2f (line 209) | Point2f HeadPoseEstimation::coordsOf(size_t face_idx, FACIAL_FEATURE fea...
FILE: src/head_pose_estimation.hpp
type FACIAL_FEATURE (line 31) | enum FACIAL_FEATURE {
class HeadPoseEstimation (line 52) | class HeadPoseEstimation {
FILE: src/head_pose_estimation_ros.cpp
function main (line 8) | int main(int argc, char* argv[])
FILE: src/ros_head_pose_estimator.hpp
class HeadPoseEstimator (line 24) | class HeadPoseEstimator
FILE: src/withmeness.py
function onChangeTask (line 33) | def onChangeTask(msg):
function onChangeTarget (line 37) | def onChangeTarget(msg):
Condensed preview — 16 files, each showing path, character count, and a content snippet. Download the .json file or copy for the full structured content (40K chars).
[
{
"path": ".gitignore",
"chars": 8,
"preview": "build*/\n"
},
{
"path": "CMakeLists.txt",
"chars": 2999,
"preview": "cmake_minimum_required(VERSION 2.8.3)\nproject(attention_tracker)\n\nset(CMAKE_POSITION_INDEPENDENT_CODE True)\nadd_definiti"
},
{
"path": "README.md",
"chars": 2149,
"preview": "\n⚠️️ **Attention**: This library is currently not maintained. Please use [the gazr fork](https://github.com/severin-lema"
},
{
"path": "calib/logitech-c920_640x360.ini",
"chars": 397,
"preview": "# Camera intrinsics\n# for Logitech c920 webcam\n\n[image]\n\nwidth\n640\n\nheight\n360\n\n[default]\n\ncamera matrix\n455.69 0.00000 "
},
{
"path": "launch/attention_tracking.launch",
"chars": 2099,
"preview": "<launch>\n\n <arg name=\"head_tracking_frame\" default=\"head_tracking_camera\"/>\n\n <arg name=\"device\" default=\"/dev/video0\""
},
{
"path": "package.xml",
"chars": 800,
"preview": "<?xml version=\"1.0\"?>\n<package>\n <name>attention_tracker</name>\n <version>0.1.0</version>\n <description>Assess in rea"
},
{
"path": "samples/head_pose_estimation_single_frame.cpp",
"chars": 2262,
"preview": "#ifdef OPENCV3\n#include <opencv2/imgcodecs.hpp>\n#else\n#include <opencv2/highgui/highgui.hpp>\n#endif\n\n#include <iostream>"
},
{
"path": "samples/head_pose_estimation_test.cpp",
"chars": 1415,
"preview": "#include <opencv2/highgui/highgui.hpp>\n#include <iostream>\n\n#include \"../src/head_pose_estimation.hpp\"\n\nusing namespace "
},
{
"path": "share/targets.json",
"chars": 732,
"preview": "{\"WAITING_FOR_WORD\": [\"_/selection_tablet\",\"_/experimentator\"],\"RESPONDING_TO_NEW_WORD\": [\"_/robot_head\",\"_/tablet\",\"_/e"
},
{
"path": "src/estimate_focus.cpp",
"chars": 6179,
"preview": "#include <string>\n#include <vector>\n#include <sstream>\n\n#include <ros/ros.h>\n#include <visualization_msgs/Marker.h>\n#inc"
},
{
"path": "src/head_pose_estimation.cpp",
"chars": 7326,
"preview": "#include <cmath>\n#include <ctime>\n\n#include <opencv2/calib3d/calib3d.hpp>\n\n#ifdef HEAD_POSE_ESTIMATION_DEBUG\n#include <o"
},
{
"path": "src/head_pose_estimation.hpp",
"chars": 2473,
"preview": "#ifndef __HEAD_POSE_ESTIMATION\n#define __HEAD_POSE_ESTIMATION\n\n#include <opencv2/core/core.hpp>\n#include <dlib/opencv.h>"
},
{
"path": "src/head_pose_estimation_ros.cpp",
"chars": 1007,
"preview": "#include <string>\n#include <ros/ros.h>\n\n#include \"ros_head_pose_estimator.hpp\"\n\nusing namespace std;\n\nint main(int argc,"
},
{
"path": "src/ros_head_pose_estimator.cpp",
"chars": 3798,
"preview": "#include \"ros_head_pose_estimator.hpp\"\n\nusing namespace std;\nusing namespace cv;\n\n// how many second in the *future* the"
},
{
"path": "src/ros_head_pose_estimator.hpp",
"chars": 1128,
"preview": "#include <string>\n#include <set>\n\n#include \"head_pose_estimation.hpp\"\n\n// opencv2\n#ifdef OPENCV3\n#include <opencv2/core."
},
{
"path": "src/withmeness.py",
"chars": 2177,
"preview": "#!/usr/bin/env python\n#coding: utf-8\n\nimport sys\nimport time\nimport rospy\nimport json\nfrom std_msgs.msg import String, E"
}
]
About this extraction
This page contains the full source code of the chili-epfl/attention-tracker GitHub repository, extracted and formatted as plain text for AI agents and large language models (LLMs). The extraction includes 16 files (36.1 KB), approximately 10.4k tokens, and a symbol index with 14 extracted functions, classes, methods, constants, and types. Use this with OpenClaw, Claude, ChatGPT, Cursor, Windsurf, or any other AI tool that accepts text input. You can copy the full output to your clipboard or download it as a .txt file.
Extracted by GitExtract — free GitHub repo to text converter for AI. Built by Nikandr Surkov.