Repository: chili-epfl/attention-tracker Branch: master Commit: 3ecf4639c731 Files: 16 Total size: 36.1 KB Directory structure: gitextract_zm7yli8l/ ├── .gitignore ├── CMakeLists.txt ├── README.md ├── calib/ │ └── logitech-c920_640x360.ini ├── launch/ │ └── attention_tracking.launch ├── package.xml ├── samples/ │ ├── head_pose_estimation_single_frame.cpp │ └── head_pose_estimation_test.cpp ├── share/ │ └── targets.json └── src/ ├── estimate_focus.cpp ├── head_pose_estimation.cpp ├── head_pose_estimation.hpp ├── head_pose_estimation_ros.cpp ├── ros_head_pose_estimator.cpp ├── ros_head_pose_estimator.hpp └── withmeness.py ================================================ FILE CONTENTS ================================================ ================================================ FILE: .gitignore ================================================ build*/ ================================================ FILE: CMakeLists.txt ================================================ cmake_minimum_required(VERSION 2.8.3) project(attention_tracker) set(CMAKE_POSITION_INDEPENDENT_CODE True) add_definitions(-std=c++11) set(DLIB_PATH "" CACHE PATH "Path to DLIB") include(${DLIB_PATH}/dlib/cmake) option(DEBUG "Enable debug visualizations" ON) option(WITH_TESTS "Compile sample test application" ON) option(WITH_ROS "Build ROS nodes -- Requires OpenCV2!" OFF) include_directories(${OpenCV_INCLUDE_DIRS}) if(WITH_ROS) find_package(catkin REQUIRED COMPONENTS roscpp tf std_msgs visualization_msgs sensor_msgs cv_bridge image_transport image_geometry ) include_directories(${catkin_INCLUDE_DIRS}) catkin_package( CATKIN_DEPENDS tf DEPENDS OpenCV LIBRARIES ) endif() if(DEBUG) find_package(OpenCV COMPONENTS core imgproc calib3d highgui REQUIRED) else() find_package(OpenCV COMPONENTS core imgproc calib3d REQUIRED) endif() message(STATUS "OpenCV version: ${OpenCV_VERSION}") if(${OpenCV_VERSION} VERSION_GREATER 2.9.0) set(OPENCV3 TRUE) add_definitions(-DOPENCV3) endif() if(DEBUG) add_definitions(-DHEAD_POSE_ESTIMATION_DEBUG) endif() add_library(head_pose_estimation SHARED src/head_pose_estimation.cpp) target_link_libraries(head_pose_estimation dlib ${OpenCV_LIBRARIES}) if(WITH_ROS) add_executable(estimate_focus src/estimate_focus.cpp) target_link_libraries(estimate_focus ${catkin_LIBRARIES} ${OpenCV_LIBRARIES}) add_executable(estimate src/head_pose_estimation_ros.cpp src/ros_head_pose_estimator.cpp) target_link_libraries(estimate head_pose_estimation ${catkin_LIBRARIES}) install(TARGETS estimate_focus head_pose_estimation estimate ARCHIVE DESTINATION ${CATKIN_PACKAGE_LIB_DESTINATION} LIBRARY DESTINATION ${CATKIN_PACKAGE_LIB_DESTINATION} RUNTIME DESTINATION ${CATKIN_PACKAGE_BIN_DESTINATION} ) install(PROGRAMS src/withmeness.py DESTINATION ${CATKIN_PACKAGE_BIN_DESTINATION} ) install(FILES launch/attention_tracking.launch calib/logitech-c920_640x360.ini share/shape_predictor_68_face_landmarks.dat share/targets.json DESTINATION ${CATKIN_PACKAGE_SHARE_DESTINATION} ) endif() if(WITH_TESTS) if(OPENCV3) if (DEBUG) find_package(OpenCV COMPONENTS core imgproc calib3d highgui imgcodecs videoio REQUIRED) else() find_package(OpenCV COMPONENTS core imgproc calib3d imgcodecs videoio REQUIRED) endif() else() find_package(OpenCV COMPONENTS core imgproc calib3d highgui REQUIRED) endif() add_executable(head_pose_single_frame samples/head_pose_estimation_single_frame.cpp) target_link_libraries(head_pose_single_frame head_pose_estimation ${OpenCV_LIBRARIES}) if(DEBUG) add_executable(head_pose_test samples/head_pose_estimation_test.cpp) target_link_libraries(head_pose_test head_pose_estimation ${OpenCV_LIBRARIES}) endif() endif() ================================================ FILE: README.md ================================================ ⚠️️ **Attention**: This library is currently not maintained. Please use [the gazr fork](https://github.com/severin-lemaignan/gazr) instead. ⚠️️ Attention Tracker ================= ![Face tracking for head pose estimation](doc/screenshot.jpg) Head pose estimation -------------------- This library (`libhead_pose_estimation.so`) performs 3D head pose estimation based on the fantastic [dlib](http://dlib.net/) face detector and a bit of [OpenCV's solvePnP](http://docs.opencv.org/modules/calib3d/doc/camera_calibration_and_3d_reconstruction.html#solvepnp) magic (it uses [adult male anthropometric](https://github.com/chili-epfl/attention-tracker/blob/5dcef870c96892d80ca17959528efba0b2d0ce1c/src/head_pose_estimation.hpp#L12) data to match a real 3D head to the projected image). The library returns a 4x4 transformation matrix. It supports detection and tracking of multiple faces at the same time, and runs on-line, but it *does not* feature face identification. Installation ------------ **Note**: The library has only been tested on Linux. We can only provide limited support for other operating systems! ### Pre-requisites Dlib: You need to [download](http://dlib.net/) and extract ``Dlib`` somewhere. This application has been tested with ``dlib-18.16``. OpenCV: You need to install [OpenCV](http://opencv.org/). If you're using Ubuntu, you could run: ```text sudo apt-get install libopencv-dev ``` ### Installation The library uses a standard ``CMake`` workflow: ``` $ mkdir build && cd build $ cmake -DDLIB_PATH= .. $ make ``` Note that the first time you compile the project, ``dlib`` will compile as well. It takes a few minutes. This won't happen the next times. To test the library, run: ```text ./head_pose_test ../share/shape_predictor_68_face_landmarks.dat ``` You should get something very similar to the picture above. Finally, to install the library: ``` $ make install ``` ROS support ----------- The [ROS](http://www.ros.org/) wrapper provides a convenient node that exposes each detected face as a TF frame. Enable the compilation of the ROS wrapper with: ``` cmake -DWITH_ROS=ON ``` ================================================ FILE: calib/logitech-c920_640x360.ini ================================================ # Camera intrinsics # for Logitech c920 webcam [image] width 640 height 360 [default] camera matrix 455.69 0.00000 313.32 0.00000 455.10 167.62 0.00000 0.00000 1.00 distortion 0.16 -0.7 0.0015 -0.0167 1.07 rectification 1.00000 0.00000 0.00000 0.00000 1.00000 0.00000 0.00000 0.00000 1.00000 projection 455.69 0.00000 313.32 0.0 0.00000 455.10 167.62 0.0 0.00000 0.00000 1.00 0.0 ================================================ FILE: launch/attention_tracking.launch ================================================ ================================================ FILE: package.xml ================================================ attention_tracker 0.1.0 Assess in real time the user's attention focus to compute a level of engagement Séverin Lemaignan BSD Séverin Lemaignan Alexis Jacq Fernando Garcia catkin std_msgs tf visualization_msgs sensor_msgs cv_bridge image_transport image_geometry tf ================================================ FILE: samples/head_pose_estimation_single_frame.cpp ================================================ #ifdef OPENCV3 #include #else #include #endif #include #include "../src/head_pose_estimation.hpp" using namespace std; using namespace cv; const static size_t NB_TESTS = 100; // number of time the detection is run, to get better average detection duration int main(int argc, char **argv) { Mat frame; if(argc < 3) { cerr << "Usage: " << endl << "head_pose model.dat frame.{jpg|png}" << endl; #ifdef HEAD_POSE_ESTIMATION_DEBUG cerr << "Output: a new frame 'head_pose_.png'" << endl; #endif return 1; } auto estimator = HeadPoseEstimation(argv[1]); cout << "Estimating head pose on " << argv[2] << endl; #ifdef OPENCV3 Mat img = imread(argv[2],IMREAD_COLOR); #else Mat img = imread(argv[2],CV_LOAD_IMAGE_COLOR); #endif estimator.focalLength = 500; cout << "Running " << NB_TESTS << " loops to get a good performance estimate..." << endl; #ifdef HEAD_POSE_ESTIMATION_DEBUG cerr << "ATTENTION! The benchmark is compiled in DEBUG mode: the performance is no going to be good!!" << endl; #endif auto t_start = getTickCount(); auto nbfaces = 0; for(size_t i = 0; i < NB_TESTS; i++) { estimator.update(img); } auto t_detection = getTickCount(); for(size_t i = 0; i < NB_TESTS; i++) { auto poses = estimator.poses(); nbfaces += poses.size(); // this is completly artifical: the only purpose is to make sure the compiler does not optimize away estimator.poses() } auto t_end = getTickCount(); cout << "Found " << nbfaces/NB_TESTS << " faces" << endl; auto poses = estimator.poses(); for(auto pose : poses) { cout << "Head pose: (" << pose(0,3) << ", " << pose(1,3) << ", " << pose(2,3) << ")" << endl; } cout << "Face feature detection: " <<((t_detection-t_start) / NB_TESTS) /getTickFrequency() * 1000. << "ms;"; cout << "Pose estimation: " <<((t_end-t_detection) / NB_TESTS) /getTickFrequency() * 1000. << "ms;"; cout << "Total time: " << ((t_end-t_start) / NB_TESTS) / getTickFrequency() * 1000. << "ms" << endl; #ifdef HEAD_POSE_ESTIMATION_DEBUG imwrite("head_pose.png", estimator._debug); #endif } ================================================ FILE: samples/head_pose_estimation_test.cpp ================================================ #include #include #include "../src/head_pose_estimation.hpp" using namespace std; using namespace cv; int main(int argc, char **argv) { Mat frame; namedWindow("headpose"); if(argc < 2) { cerr << "Usage: " << endl << "head_pose model.dat" << endl; return 1; } auto estimator = HeadPoseEstimation(argv[1]); // Configure the video capture // =========================== VideoCapture video_in(0); // adjust for your webcam! video_in.set(CV_CAP_PROP_FRAME_WIDTH, 640); video_in.set(CV_CAP_PROP_FRAME_HEIGHT, 480); estimator.focalLength = 500; estimator.opticalCenterX = 320; estimator.opticalCenterY = 240; if(!video_in.isOpened()) { cerr << "Couldn't open camera" << endl; return 1; } while(true) { video_in >> frame; auto t_start = getTickCount(); estimator.update(frame); for(auto pose : estimator.poses()) { cout << "Head pose: (" << pose(0,3) << ", " << pose(1,3) << ", " << pose(2,3) << ")" << endl; auto t_end = getTickCount(); cout << "Processing time for this frame: " << (t_end-t_start) / getTickFrequency() * 1000. << "ms" << endl; } #ifdef HEAD_POSE_ESTIMATION_DEBUG imshow("headpose", estimator._debug); if (waitKey(10) >= 0) break; #endif } } ================================================ FILE: share/targets.json ================================================ {"WAITING_FOR_WORD": ["_/selection_tablet","_/experimentator"],"RESPONDING_TO_NEW_WORD": ["_/robot_head","_/tablet","_/experimentator"],"WAITING_FOR_LETTER_TO_FINISH": ["_/robot_head","_/tablet","_/experimentator"],"ASKING_FOR_FEEDBACK": "_/robot_head","PUBLISHING_WORD": ["_/robot_head","_/selection_tablet","_/experimentator"],"WAITING_FOR_FEEDBACK": ["_/tablet","_/experimentator","_/selection_tablet"],"WAITING_FOR_ROBOT_TO_CONNECT": ["_/tablet","_/experimentator","_/robot_head"],"WAITING_FOR_TABLET_TO_CONNECT": ["_/tablet","_/experimentator","_/robot_head"],"RESPONDING_TO_DEMONSTRATION_FULL_WORD": ["_/robot_head","_/tablet","_/experimentator"],"RESPONDING_TO_DEMONSTRATION": ["_/robot_head","_/tablet","_/experimentator"]} ================================================ FILE: src/estimate_focus.cpp ================================================ #include #include #include #include #include #include #include #include #include using namespace std; static const string HUMAN_FRAME_PREFIX = "face_"; static const double FOV = 30. / 180 * M_PI; // radians static const float RANGE = 3; //m std::vector colors; static std_msgs::ColorRGBA GREEN; static std_msgs::ColorRGBA BLUE; static std_msgs::ColorRGBA RED; visualization_msgs::Marker makeMarker(int id, const string& frame, std_msgs::ColorRGBA color) { visualization_msgs::Marker marker; // Set the frame ID and timestamp. See the TF tutorials for information on these. marker.header.frame_id = frame; marker.header.stamp = ros::Time::now(); // Set the namespace and id for this marker. This serves to create a unique ID // Any marker sent with the same namespace and id will overwrite the old one marker.ns = "focus_of_attention"; marker.id = id; marker.type = visualization_msgs::Marker::SPHERE; // Set the marker action. Options are ADD, DELETE, and new in ROS Indigo: 3 (DELETEALL) marker.action = visualization_msgs::Marker::ADD; // Set the pose of the marker. This is a full 6DOF pose relative to the frame/time specified in the header marker.pose.position.x = 0; marker.pose.position.y = 0; marker.pose.position.z = 0; marker.pose.orientation.x = 0.0; marker.pose.orientation.y = 0.0; marker.pose.orientation.z = 0.0; marker.pose.orientation.w = 1.0; marker.scale.x = 0.04; marker.scale.y = 0.04; marker.scale.z = 0.04; marker.color = color; marker.lifetime = ros::Duration(0.5); return marker; } bool isInFieldOfView(const tf::TransformListener& listener, const string& target_frame, const string& observer_frame) { tf::StampedTransform transform; try{ listener.lookupTransform(observer_frame, target_frame, ros::Time(0), transform); } catch (tf::TransformException ex){ ROS_ERROR("%s",ex.what()); ros::Duration(1.0).sleep(); } // object behind the observer? if (transform.getOrigin().x() < 0) return false; // the field of view's main axis is the observer's X axis. So, distance to main axis is // simply sqrt(y^2 + z^2) double distance_to_main_axis = sqrt(transform.getOrigin().y() * transform.getOrigin().y() + transform.getOrigin().z() * transform.getOrigin().z()); double fov_radius_at_x = tan(FOV/2) * transform.getOrigin().x(); //ROS_INFO_STREAM(target_frame << ": distance_to_main_axis: " << distance_to_main_axis << ", fov_radius_at_x: " << fov_radius_at_x); if (distance_to_main_axis < fov_radius_at_x) return true; return false; } int main( int argc, char** argv ) { GREEN.r = 0.; GREEN.g = 1.; GREEN.b = 0.; GREEN.a = 1.; BLUE.r = 0.; BLUE.g = 0.; BLUE.b = 1.; BLUE.a = 1.; RED.r = 1.; RED.g = 0.; RED.b = 0.; RED.a = 1.; colors.push_back(GREEN); colors.push_back(BLUE); colors.push_back(RED); ros::init(argc, argv, "estimate_focus"); ros::NodeHandle n; ros::Rate r(30); ros::Publisher marker_pub = n.advertise("estimate_focus", 1); ros::Publisher fov_pub = n.advertise("face_0_field_of_view", 1); ros::Publisher frames_in_fov_pub = n.advertise("actual_focus_of_attention", 1); tf::TransformListener listener; vector frames; vector monitored_frames = {"/robot_head", "/tablet", "/selection_tablet", "/experimenter"}; // Prepare a range sensor msg to represent the fields of view // (cone for visualization) sensor_msgs::Range fov; fov.radiation_type = sensor_msgs::Range::INFRARED; fov.field_of_view = FOV; fov.min_range = 0; fov.max_range = 10; fov.range = RANGE; ROS_INFO("Waiting until a face becomes visible..."); while (!listener.waitForTransform("base_footprint", "face_0", ros::Time::now(), ros::Duration(5.0))) { ROS_DEBUG("Still no face visible..."); r.sleep(); } ROS_INFO("Face detected! We can start estimating the focus of attention..."); while (ros::ok()) { // Publish the marker /*while (marker_pub.getNumSubscribers() < 1 && fov_pub.getNumSubscribers() < 1) { if (!ros::ok()) { return 0; } ROS_WARN_ONCE("Please create a subscriber to the marker or field of view"); sleep(1); }*/ frames.clear(); listener.getFrameStrings(frames); int nb_faces = 0; for(auto frame : frames) { if(frame.find(HUMAN_FRAME_PREFIX) == 0) { nb_faces++; int face_idx = stoi(frame.substr(HUMAN_FRAME_PREFIX.length(), 1)); if (face_idx == 0) { stringstream ss; for(size_t i = 0 ; i < monitored_frames.size(); ++i) { if(isInFieldOfView(listener, monitored_frames[i], frame)) { ROS_DEBUG_STREAM(monitored_frames[i] << " is in the field of view of " << frame); marker_pub.publish(makeMarker(i, monitored_frames[i], colors[i])); // create new marker with special color for the observed frame ? if (ss.str().empty()) ss << "_"; ss << monitored_frames[i]; //ss is the frame that the observer is looking } } frames_in_fov_pub.publish(ss.str()); fov.range = RANGE; fov.header.stamp = ros::Time::now(); fov.header.frame_id = frame; fov_pub.publish(fov); } } } if (nb_faces == 0) { // hide the field of view fov.range = 0; fov.header.stamp = ros::Time::now(); fov.header.frame_id = "face_0"; fov_pub.publish(fov); //stringstream ss; //ss << "test"; //frames_in_fov_pub.publish(ss.str()); } r.sleep(); } } ================================================ FILE: src/head_pose_estimation.cpp ================================================ #include #include #include #ifdef HEAD_POSE_ESTIMATION_DEBUG #include #include #endif #include "head_pose_estimation.hpp" using namespace dlib; using namespace std; using namespace cv; inline Point2f toCv(const dlib::point& p) { return Point2f(p.x(), p.y()); } HeadPoseEstimation::HeadPoseEstimation(const string& face_detection_model, float focalLength) : focalLength(focalLength), opticalCenterX(-1), opticalCenterY(-1) { // Load face detection and pose estimation models. detector = get_frontal_face_detector(); deserialize(face_detection_model) >> pose_model; } void HeadPoseEstimation::update(cv::InputArray _image) { Mat image = _image.getMat(); if (opticalCenterX == -1) // not initialized yet { opticalCenterX = image.cols / 2; opticalCenterY = image.rows / 2; #ifdef HEAD_POSE_ESTIMATION_DEBUG cerr << "Setting the optical center to (" << opticalCenterX << ", " << opticalCenterY << ")" << endl; #endif } current_image = cv_image(image); faces = detector(current_image); // Find the pose of each face. shapes.clear(); for (auto face : faces){ shapes.push_back(pose_model(current_image, face)); } #ifdef HEAD_POSE_ESTIMATION_DEBUG // Draws the contours of the face and face features onto the image _debug = image.clone(); auto color = Scalar(0,128,128); for (unsigned long i = 0; i < shapes.size(); ++i) { const full_object_detection& d = shapes[i]; for (unsigned long i = 1; i <= 16; ++i) line(_debug, toCv(d.part(i)), toCv(d.part(i-1)), color, 2, CV_AA); for (unsigned long i = 28; i <= 30; ++i) line(_debug, toCv(d.part(i)), toCv(d.part(i-1)), color, 2, CV_AA); for (unsigned long i = 18; i <= 21; ++i) line(_debug, toCv(d.part(i)), toCv(d.part(i-1)), color, 2, CV_AA); for (unsigned long i = 23; i <= 26; ++i) line(_debug, toCv(d.part(i)), toCv(d.part(i-1)), color, 2, CV_AA); for (unsigned long i = 31; i <= 35; ++i) line(_debug, toCv(d.part(i)), toCv(d.part(i-1)), color, 2, CV_AA); line(_debug, toCv(d.part(30)), toCv(d.part(35)), color, 2, CV_AA); for (unsigned long i = 37; i <= 41; ++i) line(_debug, toCv(d.part(i)), toCv(d.part(i-1)), color, 2, CV_AA); line(_debug, toCv(d.part(36)), toCv(d.part(41)), color, 2, CV_AA); for (unsigned long i = 43; i <= 47; ++i) line(_debug, toCv(d.part(i)), toCv(d.part(i-1)), color, 2, CV_AA); line(_debug, toCv(d.part(42)), toCv(d.part(47)), color, 2, CV_AA); for (unsigned long i = 49; i <= 59; ++i) line(_debug, toCv(d.part(i)), toCv(d.part(i-1)), color, 2, CV_AA); line(_debug, toCv(d.part(48)), toCv(d.part(59)), color, 2, CV_AA); for (unsigned long i = 61; i <= 67; ++i) line(_debug, toCv(d.part(i)), toCv(d.part(i-1)), color, 2, CV_AA); line(_debug, toCv(d.part(60)), toCv(d.part(67)), color, 2, CV_AA); for (auto i = 0; i < 68 ; i++) { putText(_debug, to_string(i), toCv(d.part(i)), FONT_HERSHEY_DUPLEX, 0.6, Scalar(255,255,255)); } } #endif } head_pose HeadPoseEstimation::pose(size_t face_idx) const { cv::Mat projectionMat = cv::Mat::zeros(3,3,CV_32F); cv::Matx33f projection = projectionMat; projection(0,0) = focalLength; projection(1,1) = focalLength; projection(0,2) = opticalCenterX; projection(1,2) = opticalCenterY; projection(2,2) = 1; std::vector head_points; head_points.push_back(P3D_SELLION); head_points.push_back(P3D_RIGHT_EYE); head_points.push_back(P3D_LEFT_EYE); head_points.push_back(P3D_RIGHT_EAR); head_points.push_back(P3D_LEFT_EAR); head_points.push_back(P3D_MENTON); head_points.push_back(P3D_NOSE); head_points.push_back(P3D_STOMMION); std::vector detected_points; detected_points.push_back(coordsOf(face_idx, SELLION)); detected_points.push_back(coordsOf(face_idx, RIGHT_EYE)); detected_points.push_back(coordsOf(face_idx, LEFT_EYE)); detected_points.push_back(coordsOf(face_idx, RIGHT_SIDE)); detected_points.push_back(coordsOf(face_idx, LEFT_SIDE)); detected_points.push_back(coordsOf(face_idx, MENTON)); detected_points.push_back(coordsOf(face_idx, NOSE)); auto stomion = (coordsOf(face_idx, MOUTH_CENTER_TOP) + coordsOf(face_idx, MOUTH_CENTER_BOTTOM)) * 0.5; detected_points.push_back(stomion); Mat rvec, tvec; // Find the 3D pose of our head solvePnP(head_points, detected_points, projection, noArray(), rvec, tvec, false, #ifdef OPENCV3 cv::SOLVEPNP_ITERATIVE); #else cv::ITERATIVE); #endif Matx33d rotation; Rodrigues(rvec, rotation); head_pose pose = { rotation(0,0), rotation(0,1), rotation(0,2), tvec.at(0)/1000, rotation(1,0), rotation(1,1), rotation(1,2), tvec.at(1)/1000, rotation(2,0), rotation(2,1), rotation(2,2), tvec.at(2)/1000, 0, 0, 0, 1 }; #ifdef HEAD_POSE_ESTIMATION_DEBUG std::vector reprojected_points; projectPoints(head_points, rvec, tvec, projection, noArray(), reprojected_points); for (auto point : reprojected_points) { circle(_debug, point,2, Scalar(0,255,255),2); } std::vector axes; axes.push_back(Point3f(0,0,0)); axes.push_back(Point3f(50,0,0)); axes.push_back(Point3f(0,50,0)); axes.push_back(Point3f(0,0,50)); std::vector projected_axes; projectPoints(axes, rvec, tvec, projection, noArray(), projected_axes); line(_debug, projected_axes[0], projected_axes[3], Scalar(255,0,0),2,CV_AA); line(_debug, projected_axes[0], projected_axes[2], Scalar(0,255,0),2,CV_AA); line(_debug, projected_axes[0], projected_axes[1], Scalar(0,0,255),2,CV_AA); putText(_debug, "(" + to_string(int(pose(0,3) * 100)) + "cm, " + to_string(int(pose(1,3) * 100)) + "cm, " + to_string(int(pose(2,3) * 100)) + "cm)", coordsOf(face_idx, SELLION), FONT_HERSHEY_SIMPLEX, 0.5, Scalar(0,0,255),2); #endif return pose; } std::vector HeadPoseEstimation::poses() const { std::vector res; for (auto i = 0; i < faces.size(); i++){ res.push_back(pose(i)); } return res; } Point2f HeadPoseEstimation::coordsOf(size_t face_idx, FACIAL_FEATURE feature) const { return toCv(shapes[face_idx].part(feature)); } // Finds the intersection of two lines, or returns false. // The lines are defined by (o1, p1) and (o2, p2). // taken from: http://stackoverflow.com/a/7448287/828379 bool HeadPoseEstimation::intersection(Point2f o1, Point2f p1, Point2f o2, Point2f p2, Point2f &r) const { Point2f x = o2 - o1; Point2f d1 = p1 - o1; Point2f d2 = p2 - o2; float cross = d1.x*d2.y - d1.y*d2.x; if (abs(cross) < /*EPS*/1e-8) return false; double t1 = (x.x * d2.y - x.y * d2.x)/cross; r = o1 + d1 * t1; return true; } ================================================ FILE: src/head_pose_estimation.hpp ================================================ #ifndef __HEAD_POSE_ESTIMATION #define __HEAD_POSE_ESTIMATION #include #include #include #include #include #include #include // Anthropometric for male adult // Relative position of various facial feature relative to sellion // Values taken from https://en.wikipedia.org/wiki/Human_head // X points forward const static cv::Point3f P3D_SELLION(0., 0.,0.); const static cv::Point3f P3D_RIGHT_EYE(-20., -65.5,-5.); const static cv::Point3f P3D_LEFT_EYE(-20., 65.5,-5.); const static cv::Point3f P3D_RIGHT_EAR(-100., -77.5,-6.); const static cv::Point3f P3D_LEFT_EAR(-100., 77.5,-6.); const static cv::Point3f P3D_NOSE(21.0, 0., -48.0); const static cv::Point3f P3D_STOMMION(10.0, 0., -75.0); const static cv::Point3f P3D_MENTON(0., 0.,-133.0); static const int MAX_FEATURES_TO_TRACK=100; // Interesting facial features with their landmark index enum FACIAL_FEATURE { NOSE=30, RIGHT_EYE=36, LEFT_EYE=45, RIGHT_SIDE=0, LEFT_SIDE=16, EYEBROW_RIGHT=21, EYEBROW_LEFT=22, MOUTH_UP=51, MOUTH_DOWN=57, MOUTH_RIGHT=48, MOUTH_LEFT=54, SELLION=27, MOUTH_CENTER_TOP=62, MOUTH_CENTER_BOTTOM=66, MENTON=8 }; typedef cv::Matx44d head_pose; class HeadPoseEstimation { public: HeadPoseEstimation(const std::string& face_detection_model = "shape_predictor_68_face_landmarks.dat", float focalLength=455.); void update(cv::InputArray image); head_pose pose(size_t face_idx) const; std::vector poses() const; float focalLength; float opticalCenterX; float opticalCenterY; #ifdef HEAD_POSE_ESTIMATION_DEBUG mutable cv::Mat _debug; #endif private: dlib::cv_image current_image; dlib::frontal_face_detector detector; dlib::shape_predictor pose_model; std::vector faces; std::vector shapes; /** Return the point corresponding to the dictionary marker. */ cv::Point2f coordsOf(size_t face_idx, FACIAL_FEATURE feature) const; /** Returns true if the lines intersect (and set r to the intersection * coordinates), false otherwise. */ bool intersection(cv::Point2f o1, cv::Point2f p1, cv::Point2f o2, cv::Point2f p2, cv::Point2f &r) const; }; #endif // __HEAD_POSE_ESTIMATION ================================================ FILE: src/head_pose_estimation_ros.cpp ================================================ #include #include #include "ros_head_pose_estimator.hpp" using namespace std; int main(int argc, char* argv[]) { //ROS initialization ros::init(argc, argv, "ros_markers"); ros::NodeHandle rosNode; ros::NodeHandle _private_node("~"); // load parameters string modelFilename; _private_node.param("face_model", modelFilename, ""); if (modelFilename.empty()) { ROS_ERROR_STREAM("You must provide the face model with the parameter face_model.\n" << "For instance, _face_model:=shape_predictor_68_face_landmarks.dat"); return(1); } // initialize the detector by subscribing to the camera video stream ROS_INFO_STREAM("Initializing the face detector with the model " << modelFilename <<"..."); HeadPoseEstimator estimator(rosNode, modelFilename); ROS_INFO("head_pose_estimator is ready. TF frames of detected faces will be published when detected."); ros::spin(); return 0; } ================================================ FILE: src/ros_head_pose_estimator.cpp ================================================ #include "ros_head_pose_estimator.hpp" using namespace std; using namespace cv; // how many second in the *future* the face transformation should be published? // this allow to compensate for the 'slowness' of face detection, but introduce // some lag in TF. #define TRANSFORM_FUTURE_DATING 0 HeadPoseEstimator::HeadPoseEstimator(ros::NodeHandle& rosNode, const string& modelFilename) : rosNode(rosNode), it(rosNode), warnUncalibratedImage(true), estimator(modelFilename) { sub = it.subscribeCamera("image", 1, &HeadPoseEstimator::detectFaces, this); nb_detected_faces_pub = rosNode.advertise("nb_detected_faces", 1); #ifdef HEAD_POSE_ESTIMATION_DEBUG pub = it.advertise("attention_tracker/faces/image",1); #endif } void HeadPoseEstimator::detectFaces(const sensor_msgs::ImageConstPtr& msg, const sensor_msgs::CameraInfoConstPtr& camerainfo) { // updating the camera model is cheap if not modified cameramodel.fromCameraInfo(camerainfo); // publishing uncalibrated images? -> return (according to CameraInfo message documentation, // K[0] == 0.0 <=> uncalibrated). if(cameramodel.intrinsicMatrix()(0,0) == 0.0) { if(warnUncalibratedImage) { warnUncalibratedImage = false; ROS_ERROR("Camera publishes uncalibrated images. Can not estimate face position."); ROS_WARN("Detection will start over again when camera info is available."); } return; } warnUncalibratedImage = true; estimator.focalLength = cameramodel.fx(); estimator.opticalCenterX = cameramodel.cx(); estimator.opticalCenterY = cameramodel.cy(); // hopefully no copy here: // - assignement operator of cv::Mat does not copy the data // - toCvShare does no copy if the default (source) encoding is used. inputImage = cv_bridge::toCvShare(msg)->image; /******************************************************************** * Faces detection * ********************************************************************/ estimator.update(inputImage); auto poses = estimator.poses(); ROS_INFO_STREAM(poses.size() << " faces detected."); nb_detected_faces_pub.publish(poses.size()); for(size_t face_idx = 0; face_idx < poses.size(); ++face_idx) { auto trans = poses[face_idx]; tf::Transform face_pose; // Frame orientation of the camera follows the classical camera // convention (Z forward) auto z = -trans(2,3); if (z < 0) continue; // the head can not be *behind* the camera! face_pose.setOrigin( tf::Vector3( trans(0,3), trans(1,3), z) ); tf::Quaternion qrot; tf::Matrix3x3 mrot( trans(0,0), trans(0,1), trans(0,2), trans(1,0), trans(1,1), trans(1,2), trans(2,0), trans(2,1), trans(2,2)); mrot.getRotation(qrot); face_pose.setRotation(qrot); br.sendTransform( tf::StampedTransform(face_pose, ros::Time::now() + ros::Duration(TRANSFORM_FUTURE_DATING), cameramodel.tfFrame(), "face_" + to_string(face_idx))); } #ifdef HEAD_POSE_ESTIMATION_DEBUG if(pub.getNumSubscribers() > 0) { ROS_INFO_ONCE("Starting to publish face tracking output for debug"); auto debugmsg = cv_bridge::CvImage(msg->header, "bgr8", estimator._debug).toImageMsg(); pub.publish(debugmsg); } #endif } ================================================ FILE: src/ros_head_pose_estimator.hpp ================================================ #include #include #include "head_pose_estimation.hpp" // opencv2 #ifdef OPENCV3 #include #else #include #endif // ROS #include #include #include #include #include #include class HeadPoseEstimator { public: HeadPoseEstimator(ros::NodeHandle& rosNode, const std::string& modelFilename = ""); private: ros::NodeHandle& rosNode; image_transport::ImageTransport it; image_transport::CameraSubscriber sub; image_transport::Publisher pub; ros::Publisher nb_detected_faces_pub; tf::TransformBroadcaster br; tf::Transform transform; image_geometry::PinholeCameraModel cameramodel; cv::Mat cameraMatrix, distCoeffs; cv::Mat inputImage; HeadPoseEstimation estimator; bool warnUncalibratedImage; void detectFaces(const sensor_msgs::ImageConstPtr& msg, const sensor_msgs::CameraInfoConstPtr& camerainfo); }; ================================================ FILE: src/withmeness.py ================================================ #!/usr/bin/env python #coding: utf-8 import sys import time import rospy import json from std_msgs.msg import String, Empty, Float64 pub_withmeness = rospy.Publisher('withmeness_topic', Float64, queue_size=1) """ task_targets = { "WAITING_FOR_WORD": {"_/selection_tablet","_/experimentator"}, "RESPONDING_TO_NEW_WORD": {"_/robot_head","_/tablet","_/experimentator"}, "WAITING_FOR_LETTER_TO_FINISH": {"_/robot_head","_/tablet","_/experimentator"}, "ASKING_FOR_FEEDBACK": {"_/robot_head"}, "PUBLISHING_WORD": {"_/robot_head","_/selection_tablet","_/experimentator"}, "WAITING_FOR_FEEDBACK": {"_/tablet","_/experimentator","_/selection_tablet"}, "WAITING_FOR_ROBOT_TO_CONNECT": {"_/tablet","_/experimentator","_/robot_head"}, "WAITING_FOR_TABLET_TO_CONNECT": {"_/tablet","_/experimentator","_/robot_head"}, "RESPONDING_TO_DEMONSTRATION_FULL_WORD": {"_/robot_head","_/tablet","_/experimentator"}, "RESPONDING_TO_DEMONSTRATION": {"_/robot_head","_/tablet","_/experimentator"}, }""" current_task = "WAITING_FOR_WORD" current_target = "_" value = 0.5 mu = 0.1 def onChangeTask(msg): global current_task current_task = (str)(msg.data) def onChangeTarget(msg): global current_target current_target = (str)(msg.data) if __name__=='__main__': global withmeness_value rospy.init_node("withmeness") test = rospy.search_param("targets") target_list = rospy.get_param(test) with open(target_list) as target_list_json: task_targets = json.load(target_list_json) while(True): # get current task: rospy.Subscriber("state_activity", String, onChangeTask) # get current target: rospy.Subscriber("actual_focus_of_attention", String, onChangeTarget) # compute EMA of online-with_me_ness: if current_task in task_targets: if current_target in task_targets[current_task]: value = (1-mu)*value + mu else: if current_target!="" and current_target!="_": value = (1-mu)*value msg = Float64() msg.data = value pub_withmeness.publish(msg) rospy.sleep(1.0) rospy.spin()