Repository: Asadullah-Dal17/Eyes-Position-Estimator-Mediapipe
Branch: master
Commit: 813780bdac9b
Files: 12
Total size: 67.2 KB
Directory structure:
gitextract_gedk2ntz/
├── .gitignore
├── Eye_Tracking_part1/
│ ├── main.py
│ └── utils.py
├── Eye_Tracking_part2/
│ ├── main.py
│ └── utils.py
├── Eye_Tracking_part3/
│ ├── main.py
│ └── utils.py
├── Eye_Tracking_part4/
│ ├── Create Sound/
│ │ └── create_sound.py
│ ├── main.py
│ └── utils.py
├── LICENSE
└── README.md
================================================
FILE CONTENTS
================================================
================================================
FILE: .gitignore
================================================
# Byte-compiled / optimized / DLL files
__pycache__/
*.py[cod]
*$py.class
# C extensions
*.so
*.mp4
# Distribution / packaging
.Python
build/
develop-eggs/
dist/
downloads/
eggs/
.eggs/
lib/
lib64/
parts/
sdist/
var/
wheels/
pip-wheel-metadata/
share/python-wheels/
*.egg-info/
.installed.cfg
*.egg
MANIFEST
# PyInstaller
# Usually these files are written by a python script from a template
# before PyInstaller builds the exe, so as to inject date/other infos into it.
*.manifest
*.spec
# Installer logs
pip-log.txt
pip-delete-this-directory.txt
# Unit test / coverage reports
htmlcov/
.tox/
.nox/
.coverage
.coverage.*
.cache
nosetests.xml
coverage.xml
*.cover
*.py,cover
.hypothesis/
.pytest_cache/
# Translations
*.mo
*.pot
# Django stuff:
*.log
local_settings.py
db.sqlite3
db.sqlite3-journal
# Flask stuff:
instance/
.webassets-cache
# Scrapy stuff:
.scrapy
# Sphinx documentation
docs/_build/
# PyBuilder
target/
# Jupyter Notebook
.ipynb_checkpoints
# IPython
profile_default/
ipython_config.py
# pyenv
.python-version
# pipenv
# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
# However, in case of collaboration, if having platform-specific dependencies or dependencies
# having no cross-platform support, pipenv may install dependencies that don't work, or not
# install all needed dependencies.
#Pipfile.lock
# PEP 582; used by e.g. github.com/David-OConnor/pyflow
__pypackages__/
# Celery stuff
celerybeat-schedule
celerybeat.pid
# SageMath parsed files
*.sage.py
# Environments
.env
.venv
env/
venv/
ENV/
env.bak/
venv.bak/
# Spyder project settings
.spyderproject
.spyproject
# Rope project settings
.ropeproject
# mkdocs documentation
/site
# mypy
.mypy_cache/
.dmypy.json
dmypy.json
# Pyre type checker
.pyre/
================================================
FILE: Eye_Tracking_part1/main.py
================================================
import cv2 as cv
import mediapipe as mp
import time
from numpy import greater
import utils
# variables
frame_counter =0
# constants
FONTS =cv.FONT_HERSHEY_COMPLEX
# face bounder indices
FACE_OVAL=[ 10, 338, 297, 332, 284, 251, 389, 356, 454, 323, 361, 288, 397, 365, 379, 378, 400, 377, 152, 148, 176, 149, 150, 136, 172, 58, 132, 93, 234, 127, 162, 21, 54, 103,67, 109]
# lips indices for Landmarks
LIPS=[ 61, 146, 91, 181, 84, 17, 314, 405, 321, 375,291, 308, 324, 318, 402, 317, 14, 87, 178, 88, 95,185, 40, 39, 37,0 ,267 ,269 ,270 ,409, 415, 310, 311, 312, 13, 82, 81, 42, 183, 78 ]
LOWER_LIPS =[61, 146, 91, 181, 84, 17, 314, 405, 321, 375, 291, 308, 324, 318, 402, 317, 14, 87, 178, 88, 95]
UPPER_LIPS=[ 185, 40, 39, 37,0 ,267 ,269 ,270 ,409, 415, 310, 311, 312, 13, 82, 81, 42, 183, 78]
# Left eyes indices
LEFT_EYE =[ 362, 382, 381, 380, 374, 373, 390, 249, 263, 466, 388, 387, 386, 385,384, 398 ]
LEFT_EYEBROW =[ 336, 296, 334, 293, 300, 276, 283, 282, 295, 285 ]
# right eyes indices
RIGHT_EYE=[ 33, 7, 163, 144, 145, 153, 154, 155, 133, 173, 157, 158, 159, 160, 161 , 246 ]
RIGHT_EYEBROW=[ 70, 63, 105, 66, 107, 55, 65, 52, 53, 46 ]
map_face_mesh = mp.solutions.face_mesh
# camera object
camera = cv.VideoCapture("VideoFile.mp4")
# landmark detection function
def landmarksDetection(img, results, draw=False):
img_height, img_width= img.shape[:2]
# list[(x,y), (x,y)....]
mesh_coord = [(int(point.x * img_width), int(point.y * img_height)) for point in results.multi_face_landmarks[0].landmark]
if draw :
[cv.circle(img, p, 2, utils.GREEN, -1) for p in mesh_coord]
# returning the list of tuples for each landmarks
return mesh_coord
with map_face_mesh.FaceMesh(min_detection_confidence =0.5, min_tracking_confidence=0.5) as face_mesh:
# starting time here
start_time = time.time()
# starting Video loop here.
while True:
frame_counter +=1 # frame counter
ret, frame = camera.read() # getting frame from camera
if not ret:
break # no more frames break
# resizing frame
# frame = cv.resize(frame, None, fx=2.0, fy=2.0, interpolation=cv.INTER_CUBIC)
# writing orginal image image thumbnail
# cv.imwrite(f'img/img_{frame_counter}.png', frame)
# print(frame_counter)
rgb_frame = cv.cvtColor(frame, cv.COLOR_RGB2BGR)
results = face_mesh.process(rgb_frame)
if results.multi_face_landmarks:
mesh_coords = landmarksDetection(frame, results, False)
frame =utils.fillPolyTrans(frame, [mesh_coords[p] for p in FACE_OVAL], utils.WHITE, opacity=0.4)
frame =utils.fillPolyTrans(frame, [mesh_coords[p] for p in LEFT_EYE], utils.GREEN, opacity=0.4)
frame =utils.fillPolyTrans(frame, [mesh_coords[p] for p in RIGHT_EYE], utils.GREEN, opacity=0.4)
frame =utils.fillPolyTrans(frame, [mesh_coords[p] for p in LEFT_EYEBROW], utils.ORANGE, opacity=0.4)
frame =utils.fillPolyTrans(frame, [mesh_coords[p] for p in RIGHT_EYEBROW], utils.ORANGE, opacity=0.4)
frame =utils.fillPolyTrans(frame, [mesh_coords[p] for p in LIPS], utils.BLACK, opacity=0.3 )
# Changes for Thumbnail of youtube Video
[cv.circle(frame,mesh_coords[p], 1, utils.GREEN , -1, cv.LINE_AA) for p in LIPS]
[cv.circle(frame,mesh_coords[p], 1, utils.BLACK ,- 1, cv.LINE_AA) for p in RIGHT_EYE]
[cv.circle(frame,mesh_coords[p], 1, utils.BLACK , -1, cv.LINE_AA) for p in LEFT_EYE]
[cv.circle(frame,mesh_coords[p], 1, utils.BLACK , -1, cv.LINE_AA) for p in RIGHT_EYEBROW]
[cv.circle(frame,mesh_coords[p], 1, utils.BLACK , -1, cv.LINE_AA) for p in LEFT_EYEBROW]
[cv.circle(frame,mesh_coords[p], 1, utils.RED , -1, cv.LINE_AA) for p in FACE_OVAL]
# calculating frame per seconds FPS
end_time = time.time()-start_time
fps = frame_counter/end_time
frame =utils.textWithBackground(frame,f'FPS: {round(fps,1)}',FONTS, 1.0, (20, 50), bgOpacity=0.9, textThickness=2)
# writing image for thumbnail drawing shape
# cv.imwrite(f'img/frame_{frame_counter}.png', frame)
cv.imshow('frame', frame)
key = cv.waitKey(1)
if key==ord('q') or key ==ord('Q'):
break
cv.destroyAllWindows()
camera.release()
================================================
FILE: Eye_Tracking_part1/utils.py
================================================
'''
Author: Asadullah Dal
Youtube Channel: https://www.youtube.com/c/aiphile
'''
import cv2 as cv
import numpy as np
# colors
# values =(blue, green, red) opencv accepts BGR values not RGB
BLACK = (0,0,0)
WHITE = (255,255,255)
BLUE = (255,0,0)
RED = (0,0,255)
CYAN = (255,255,0)
YELLOW =(0,255,255)
MAGENTA = (255,0,255)
GRAY = (128,128,128)
GREEN = (0,255,0)
PURPLE = (128,0,128)
ORANGE = (0,165,255)
PINK = (147,20,255)
points_list =[(200, 300), (150, 150), (400, 200)]
def drawColor(img, colors):
x, y = 0,10
w, h = 20, 30
for color in colors:
x += w+5
# y += 10
cv.rectangle(img, (x-6, y-5 ), (x+w+5, y+h+5), (10, 50, 10), -1)
cv.rectangle(img, (x, y ), (x+w, y+h), color, -1)
def textWithBackground(img, text, font, fontScale, textPos, textThickness=1,textColor=(0,255,0), bgColor=(0,0,0), pad_x=3, pad_y=3, bgOpacity=0.5):
"""
Draws text with background, with control transparency
@param img:(mat) which you want to draw text
@param text: (string) text you want draw
@param font: fonts face, like FONT_HERSHEY_COMPLEX, FONT_HERSHEY_PLAIN etc.
@param fontScale: (double) the size of text, how big it should be.
@param textPos: tuple(x,y) position where you want to draw text
@param textThickness:(int) fonts weight, how bold it should be
@param textPos: tuple(x,y) position where you want to draw text
@param textThickness:(int) fonts weight, how bold it should be.
@param textColor: tuple(BGR), values -->0 to 255 each
@param bgColor: tuple(BGR), values -->0 to 255 each
@param pad_x: int(pixels) padding of in x direction
@param pad_y: int(pixels) 1 to 1.0 (), controls transparency of text background
@return: img(mat) with draw with background
"""
(t_w, t_h), _= cv.getTextSize(text, font, fontScale, textThickness) # getting the text size
x, y = textPos
overlay = img.copy() # coping the image
cv.rectangle(overlay, (x-pad_x, y+ pad_y), (x+t_w+pad_x, y-t_h-pad_y), bgColor,-1) # draw rectangle
new_img = cv.addWeighted(overlay, bgOpacity, img, 1 - bgOpacity, 0) # overlaying the rectangle on the image.
cv.putText(new_img,text, textPos,font, fontScale, textColor,textThickness ) # draw in text
img = new_img
return img
def textBlurBackground(img, text, font, fontScale, textPos, textThickness=1,textColor=(0,255,0),kneral=(33,33) , pad_x=3, pad_y=3):
"""
Draw text with background blured, control the blur value, with kernal(odd, odd)
@param img:(mat) which you want to draw text
@param text: (string) text you want draw
@param font: fonts face, like FONT_HERSHEY_COMPLEX, FONT_HERSHEY_PLAIN etc.
@param fontScale: (double) the size of text, how big it should be.
@param textPos: tuple(x,y) position where you want to draw text
@param textThickness:(int) fonts weight, how bold it should be.
@param textColor: tuple(BGR), values -->0 to 255 each
@param kneral: tuple(3,3) int as odd number: higher the value, more blurry background would be
@param pad_x: int(pixels) padding of in x direction
@param pad_y: int(pixels) padding of in y direction
@return: img mat, with text drawn, with background blured
call the function:
img =textBlurBackground(img, 'Blured Background Text', cv2.FONT_HERSHEY_COMPLEX, 0.9, (20, 60),2, (0,255, 0), (49,49), 13, 13 )
"""
(t_w, t_h), _= cv.getTextSize(text, font, fontScale, textThickness) # getting the text size
x, y = textPos
blur_roi = img[y-pad_y-t_h: y+pad_y, x-pad_x:x+t_w+pad_x] # croping Text Background
img[y-pad_y-t_h: y+pad_y, x-pad_x:x+t_w+pad_x]=cv.blur(blur_roi, kneral) # merging the blured background to img
cv.putText(img,text, textPos,font, fontScale, textColor,textThickness )
# cv.imshow('blur roi', blur_roi)
# cv.imshow('blured', img)
return img
def fillPolyTrans(img, points, color, opacity):
"""
@param img: (mat) input image, where shape is drawn.
@param points: list [tuples(int, int) these are the points custom shape,FillPoly
@param color: (tuples (int, int, int)
@param opacity: it is transparency of image.
@return: img(mat) image with rectangle draw.
"""
list_to_np_array = np.array(points, dtype=np.int32)
overlay = img.copy() # coping the image
cv.fillPoly(overlay,[list_to_np_array], color )
new_img = cv.addWeighted(overlay, opacity, img, 1 - opacity, 0)
# print(points_list)
img = new_img
cv.polylines(img, [list_to_np_array], True, color,1, cv.LINE_AA)
return img
def rectTrans(img, pt1, pt2, color, thickness, opacity):
"""
@param img: (mat) input image, where shape is drawn.
@param pt1: tuple(int,int) it specifies the starting point(x,y) os rectangle
@param pt2: tuple(int,int) it nothing but width and height of rectangle
@param color: (tuples (int, int, int), it tuples of BGR values
@param thickness: it thickness of board line rectangle, if (-1) passed then rectangle will be fulled with color.
@param opacity: it is transparency of image.
@return:
"""
overlay = img.copy()
cv.rectangle(overlay, pt1, pt2, color, thickness)
new_img = cv.addWeighted(overlay, opacity, img, 1 - opacity, 0) # overlaying the rectangle on the image.
img = new_img
return img
def main():
cap = cv.VideoCapture('Girl.mp4')
counter =0
while True:
success, img = cap.read()
# img = np.zeros((1000,1000, 3), dtype=np.uint8)
img=rectTrans(img, pt1=(30, 320), pt2=(160, 260), color=(0,255,255),thickness=-1, opacity=0.6)
img =fillPolyTrans(img=img, points=points_list, color=(0,255,0), opacity=.5)
drawColor(img, [BLACK,WHITE ,BLUE,RED,CYAN,YELLOW,MAGENTA,GRAY ,GREEN,PURPLE,ORANGE,PINK])
textBlurBackground(img, 'Blured Background Text', cv.FONT_HERSHEY_COMPLEX, 0.8, (60, 140),2, YELLOW, (71,71), 13, 13)
img=textWithBackground(img, 'Colored Background Texts', cv.FONT_HERSHEY_SIMPLEX, 0.8, (60,80), textThickness=2, bgColor=GREEN, textColor=BLACK, bgOpacity=0.7, pad_x=6, pad_y=6)
imgGray = cv.cvtColor(img, cv.COLOR_BGR2GRAY)
# cv.imwrite('color_image.png', img)
counter +=1
cv.imshow('img', img)
cv.imwrite(f'image/image_{counter}.png', img)
if cv.waitKey(1) ==ord('q'):
break
if __name__ == "__main__":
main()
================================================
FILE: Eye_Tracking_part2/main.py
================================================
import cv2 as cv
import mediapipe as mp
import time
import utils, math
import numpy as np
# variables
frame_counter =0
CEF_COUNTER =0
TOTAL_BLINKS =0
# constants
CLOSED_EYES_FRAME =3
FONTS =cv.FONT_HERSHEY_COMPLEX
# face bounder indices
FACE_OVAL=[ 10, 338, 297, 332, 284, 251, 389, 356, 454, 323, 361, 288, 397, 365, 379, 378, 400, 377, 152, 148, 176, 149, 150, 136, 172, 58, 132, 93, 234, 127, 162, 21, 54, 103,67, 109]
# lips indices for Landmarks
LIPS=[ 61, 146, 91, 181, 84, 17, 314, 405, 321, 375,291, 308, 324, 318, 402, 317, 14, 87, 178, 88, 95,185, 40, 39, 37,0 ,267 ,269 ,270 ,409, 415, 310, 311, 312, 13, 82, 81, 42, 183, 78 ]
LOWER_LIPS =[61, 146, 91, 181, 84, 17, 314, 405, 321, 375, 291, 308, 324, 318, 402, 317, 14, 87, 178, 88, 95]
UPPER_LIPS=[ 185, 40, 39, 37,0 ,267 ,269 ,270 ,409, 415, 310, 311, 312, 13, 82, 81, 42, 183, 78]
# Left eyes indices
LEFT_EYE =[ 362, 382, 381, 380, 374, 373, 390, 249, 263, 466, 388, 387, 386, 385,384, 398 ]
LEFT_EYEBROW =[ 336, 296, 334, 293, 300, 276, 283, 282, 295, 285 ]
# right eyes indices
RIGHT_EYE=[ 33, 7, 163, 144, 145, 153, 154, 155, 133, 173, 157, 158, 159, 160, 161 , 246 ]
RIGHT_EYEBROW=[ 70, 63, 105, 66, 107, 55, 65, 52, 53, 46 ]
map_face_mesh = mp.solutions.face_mesh
# camera object
camera = cv.VideoCapture(1)
# landmark detection function
def landmarksDetection(img, results, draw=False):
img_height, img_width= img.shape[:2]
# list[(x,y), (x,y)....]
mesh_coord = [(int(point.x * img_width), int(point.y * img_height)) for point in results.multi_face_landmarks[0].landmark]
if draw :
[cv.circle(img, p, 2, (0,255,0), -1) for p in mesh_coord]
# returning the list of tuples for each landmarks
return mesh_coord
# Euclaidean distance
def euclaideanDistance(point, point1):
x, y = point
x1, y1 = point1
distance = math.sqrt((x1 - x)**2 + (y1 - y)**2)
return distance
# Blinking Ratio
def blinkRatio(img, landmarks, right_indices, left_indices):
# Right eyes
# horizontal line
rh_right = landmarks[right_indices[0]]
rh_left = landmarks[right_indices[8]]
# vertical line
rv_top = landmarks[right_indices[12]]
rv_bottom = landmarks[right_indices[4]]
# draw lines on right eyes
# cv.line(img, rh_right, rh_left, utils.GREEN, 2)
# cv.line(img, rv_top, rv_bottom, utils.WHITE, 2)
# LEFT_EYE
# horizontal line
lh_right = landmarks[left_indices[0]]
lh_left = landmarks[left_indices[8]]
# vertical line
lv_top = landmarks[left_indices[12]]
lv_bottom = landmarks[left_indices[4]]
rhDistance = euclaideanDistance(rh_right, rh_left)
rvDistance = euclaideanDistance(rv_top, rv_bottom)
lvDistance = euclaideanDistance(lv_top, lv_bottom)
lhDistance = euclaideanDistance(lh_right, lh_left)
reRatio = rhDistance/rvDistance
leRatio = lhDistance/lvDistance
ratio = (reRatio+leRatio)/2
return ratio
with map_face_mesh.FaceMesh(min_detection_confidence =0.5, min_tracking_confidence=0.5) as face_mesh:
# starting time here
start_time = time.time()
# starting Video loop here.
while True:
frame_counter +=1 # frame counter
ret, frame = camera.read() # getting frame from camera
if not ret:
break # no more frames break
# resizing frame
frame = cv.resize(frame, None, fx=1.5, fy=1.5, interpolation=cv.INTER_CUBIC)
frame_height, frame_width= frame.shape[:2]
rgb_frame = cv.cvtColor(frame, cv.COLOR_RGB2BGR)
results = face_mesh.process(rgb_frame)
if results.multi_face_landmarks:
mesh_coords = landmarksDetection(frame, results, False)
ratio = blinkRatio(frame, mesh_coords, RIGHT_EYE, LEFT_EYE)
# cv.putText(frame, f'ratio {ratio}', (100, 100), FONTS, 1.0, utils.GREEN, 2)
utils.colorBackgroundText(frame, f'Ratio : {round(ratio,2)}', FONTS, 0.7, (30,100),2, utils.PINK, utils.YELLOW)
if ratio >5.5:
CEF_COUNTER +=1
# cv.putText(frame, 'Blink', (200, 50), FONTS, 1.3, utils.PINK, 2)
utils.colorBackgroundText(frame, f'Blink', FONTS, 1.7, (int(frame_height/2), 100), 2, utils.YELLOW, pad_x=6, pad_y=6, )
else:
if CEF_COUNTER>CLOSED_EYES_FRAME:
TOTAL_BLINKS +=1
CEF_COUNTER =0
# cv.putText(frame, f'Total Blinks: {TOTAL_BLINKS}', (100, 150), FONTS, 0.6, utils.GREEN, 2)
utils.colorBackgroundText(frame, f'Total Blinks: {TOTAL_BLINKS}', FONTS, 0.7, (30,150),2)
cv.polylines(frame, [np.array([mesh_coords[p] for p in LEFT_EYE ], dtype=np.int32)], True, utils.GREEN, 1, cv.LINE_AA)
cv.polylines(frame, [np.array([mesh_coords[p] for p in RIGHT_EYE ], dtype=np.int32)], True, utils.GREEN, 1, cv.LINE_AA)
# calculating frame per seconds FPS
end_time = time.time()-start_time
fps = frame_counter/end_time
frame =utils.textWithBackground(frame,f'FPS: {round(fps,1)}',FONTS, 1.0, (30, 50), bgOpacity=0.9, textThickness=2)
# writing image for thumbnail drawing shape
# cv.imwrite(f'img/frame_{frame_counter}.png', frame)
cv.imshow('frame', frame)
key = cv.waitKey(2)
if key==ord('q') or key ==ord('Q'):
break
cv.destroyAllWindows()
camera.release()
================================================
FILE: Eye_Tracking_part2/utils.py
================================================
'''
Author: Asadullah Dal
Youtube Channel: https://www.youtube.com/c/aiphile
'''
import cv2 as cv
import numpy as np
# colors
# values =(blue, green, red) opencv accepts BGR values not RGB
BLACK = (0,0,0)
WHITE = (255,255,255)
BLUE = (255,0,0)
RED = (0,0,255)
CYAN = (255,255,0)
YELLOW =(0,255,255)
MAGENTA = (255,0,255)
GRAY = (128,128,128)
GREEN = (0,255,0)
PURPLE = (128,0,128)
ORANGE = (0,165,255)
PINK = (147,20,255)
points_list =[(200, 300), (150, 150), (400, 200)]
def drawColor(img, colors):
x, y = 0,10
w, h = 20, 30
for color in colors:
x += w+5
# y += 10
cv.rectangle(img, (x-6, y-5 ), (x+w+5, y+h+5), (10, 50, 10), -1)
cv.rectangle(img, (x, y ), (x+w, y+h), color, -1)
def colorBackgroundText(img, text, font, fontScale, textPos, textThickness=1,textColor=(0,255,0), bgColor=(0,0,0), pad_x=3, pad_y=3):
"""
Draws text with background, with control transparency
@param img:(mat) which you want to draw text
@param text: (string) text you want draw
@param font: fonts face, like FONT_HERSHEY_COMPLEX, FONT_HERSHEY_PLAIN etc.
@param fontScale: (double) the size of text, how big it should be.
@param textPos: tuple(x,y) position where you want to draw text
@param textThickness:(int) fonts weight, how bold it should be
@param textPos: tuple(x,y) position where you want to draw text
@param textThickness:(int) fonts weight, how bold it should be.
@param textColor: tuple(BGR), values -->0 to 255 each
@param bgColor: tuple(BGR), values -->0 to 255 each
@param pad_x: int(pixels) padding of in x direction
@param pad_y: int(pixels) 1 to 1.0 (), controls transparency of text background
@return: img(mat) with draw with background
"""
(t_w, t_h), _= cv.getTextSize(text, font, fontScale, textThickness) # getting the text size
x, y = textPos
cv.rectangle(img, (x-pad_x, y+ pad_y), (x+t_w+pad_x, y-t_h-pad_y), bgColor,-1) # draw rectangle
cv.putText(img,text, textPos,font, fontScale, textColor,textThickness ) # draw in text
return img
def textWithBackground(img, text, font, fontScale, textPos, textThickness=1,textColor=(0,255,0), bgColor=(0,0,0), pad_x=3, pad_y=3, bgOpacity=0.5):
"""
Draws text with background, with control transparency
@param img:(mat) which you want to draw text
@param text: (string) text you want draw
@param font: fonts face, like FONT_HERSHEY_COMPLEX, FONT_HERSHEY_PLAIN etc.
@param fontScale: (double) the size of text, how big it should be.
@param textPos: tuple(x,y) position where you want to draw text
@param textThickness:(int) fonts weight, how bold it should be
@param textPos: tuple(x,y) position where you want to draw text
@param textThickness:(int) fonts weight, how bold it should be.
@param textColor: tuple(BGR), values -->0 to 255 each
@param bgColor: tuple(BGR), values -->0 to 255 each
@param pad_x: int(pixels) padding of in x direction
@param pad_y: int(pixels) 1 to 1.0 (), controls transparency of text background
@return: img(mat) with draw with background
"""
(t_w, t_h), _= cv.getTextSize(text, font, fontScale, textThickness) # getting the text size
x, y = textPos
overlay = img.copy() # coping the image
cv.rectangle(overlay, (x-pad_x, y+ pad_y), (x+t_w+pad_x, y-t_h-pad_y), bgColor,-1) # draw rectangle
new_img = cv.addWeighted(overlay, bgOpacity, img, 1 - bgOpacity, 0) # overlaying the rectangle on the image.
cv.putText(new_img,text, textPos,font, fontScale, textColor,textThickness ) # draw in text
img = new_img
return img
def textBlurBackground(img, text, font, fontScale, textPos, textThickness=1,textColor=(0,255,0),kneral=(33,33) , pad_x=3, pad_y=3):
"""
Draw text with background blured, control the blur value, with kernal(odd, odd)
@param img:(mat) which you want to draw text
@param text: (string) text you want draw
@param font: fonts face, like FONT_HERSHEY_COMPLEX, FONT_HERSHEY_PLAIN etc.
@param fontScale: (double) the size of text, how big it should be.
@param textPos: tuple(x,y) position where you want to draw text
@param textThickness:(int) fonts weight, how bold it should be.
@param textColor: tuple(BGR), values -->0 to 255 each
@param kneral: tuple(3,3) int as odd number: higher the value, more blurry background would be
@param pad_x: int(pixels) padding of in x direction
@param pad_y: int(pixels) padding of in y direction
@return: img mat, with text drawn, with background blured
call the function:
img =textBlurBackground(img, 'Blured Background Text', cv2.FONT_HERSHEY_COMPLEX, 0.9, (20, 60),2, (0,255, 0), (49,49), 13, 13 )
"""
(t_w, t_h), _= cv.getTextSize(text, font, fontScale, textThickness) # getting the text size
x, y = textPos
blur_roi = img[y-pad_y-t_h: y+pad_y, x-pad_x:x+t_w+pad_x] # croping Text Background
img[y-pad_y-t_h: y+pad_y, x-pad_x:x+t_w+pad_x]=cv.blur(blur_roi, kneral) # merging the blured background to img
cv.putText(img,text, textPos,font, fontScale, textColor,textThickness )
# cv.imshow('blur roi', blur_roi)
# cv.imshow('blured', img)
return img
def fillPolyTrans(img, points, color, opacity):
"""
@param img: (mat) input image, where shape is drawn.
@param points: list [tuples(int, int) these are the points custom shape,FillPoly
@param color: (tuples (int, int, int)
@param opacity: it is transparency of image.
@return: img(mat) image with rectangle draw.
"""
list_to_np_array = np.array(points, dtype=np.int32)
overlay = img.copy() # coping the image
cv.fillPoly(overlay,[list_to_np_array], color )
new_img = cv.addWeighted(overlay, opacity, img, 1 - opacity, 0)
# print(points_list)
img = new_img
cv.polylines(img, [list_to_np_array], True, color,1, cv.LINE_AA)
return img
# def pollyLines(img, points, color):
# list_to_np_array = np.array(points, dtype=np.int32)
# cv.polylines(img, [list_to_np_array], True, color,1, cv.LINE_AA)
# return img
def rectTrans(img, pt1, pt2, color, thickness, opacity):
"""
@param img: (mat) input image, where shape is drawn.
@param pt1: tuple(int,int) it specifies the starting point(x,y) os rectangle
@param pt2: tuple(int,int) it nothing but width and height of rectangle
@param color: (tuples (int, int, int), it tuples of BGR values
@param thickness: it thickness of board line rectangle, if (-1) passed then rectangle will be fulled with color.
@param opacity: it is transparency of image.
@return:
"""
overlay = img.copy()
cv.rectangle(overlay, pt1, pt2, color, thickness)
new_img = cv.addWeighted(overlay, opacity, img, 1 - opacity, 0) # overlaying the rectangle on the image.
img = new_img
return img
def main():
cap = cv.VideoCapture('Girl.mp4')
counter =0
while True:
success, img = cap.read()
# img = np.zeros((1000,1000, 3), dtype=np.uint8)
img=rectTrans(img, pt1=(30, 320), pt2=(160, 260), color=(0,255,255),thickness=-1, opacity=0.6)
img =fillPolyTrans(img=img, points=points_list, color=(0,255,0), opacity=.5)
drawColor(img, [BLACK,WHITE ,BLUE,RED,CYAN,YELLOW,MAGENTA,GRAY ,GREEN,PURPLE,ORANGE,PINK])
textBlurBackground(img, 'Blured Background Text', cv.FONT_HERSHEY_COMPLEX, 0.8, (60, 140),2, YELLOW, (71,71), 13, 13)
img=textWithBackground(img, 'Colored Background Texts', cv.FONT_HERSHEY_SIMPLEX, 0.8, (60,80), textThickness=2, bgColor=GREEN, textColor=BLACK, bgOpacity=0.7, pad_x=6, pad_y=6)
imgGray = cv.cvtColor(img, cv.COLOR_BGR2GRAY)
# cv.imwrite('color_image.png', img)
counter +=1
cv.imshow('img', img)
cv.imwrite(f'image/image_{counter}.png', img)
if cv.waitKey(1) ==ord('q'):
break
if __name__ == "__main__":
main()
================================================
FILE: Eye_Tracking_part3/main.py
================================================
import cv2 as cv
import mediapipe as mp
import time
import utils, math
import numpy as np
# variables
frame_counter =0
CEF_COUNTER =0
TOTAL_BLINKS =0
# constants
CLOSED_EYES_FRAME =3
FONTS =cv.FONT_HERSHEY_COMPLEX
# face bounder indices
FACE_OVAL=[ 10, 338, 297, 332, 284, 251, 389, 356, 454, 323, 361, 288, 397, 365, 379, 378, 400, 377, 152, 148, 176, 149, 150, 136, 172, 58, 132, 93, 234, 127, 162, 21, 54, 103,67, 109]
# lips indices for Landmarks
LIPS=[ 61, 146, 91, 181, 84, 17, 314, 405, 321, 375,291, 308, 324, 318, 402, 317, 14, 87, 178, 88, 95,185, 40, 39, 37,0 ,267 ,269 ,270 ,409, 415, 310, 311, 312, 13, 82, 81, 42, 183, 78 ]
LOWER_LIPS =[61, 146, 91, 181, 84, 17, 314, 405, 321, 375, 291, 308, 324, 318, 402, 317, 14, 87, 178, 88, 95]
UPPER_LIPS=[ 185, 40, 39, 37,0 ,267 ,269 ,270 ,409, 415, 310, 311, 312, 13, 82, 81, 42, 183, 78]
# Left eyes indices
LEFT_EYE =[ 362, 382, 381, 380, 374, 373, 390, 249, 263, 466, 388, 387, 386, 385,384, 398 ]
LEFT_EYEBROW =[ 336, 296, 334, 293, 300, 276, 283, 282, 295, 285 ]
# right eyes indices
RIGHT_EYE=[ 33, 7, 163, 144, 145, 153, 154, 155, 133, 173, 157, 158, 159, 160, 161 , 246 ]
RIGHT_EYEBROW=[ 70, 63, 105, 66, 107, 55, 65, 52, 53, 46 ]
map_face_mesh = mp.solutions.face_mesh
# camera object
camera = cv.VideoCapture(0)
# landmark detection function
def landmarksDetection(img, results, draw=False):
img_height, img_width= img.shape[:2]
# list[(x,y), (x,y)....]
mesh_coord = [(int(point.x * img_width), int(point.y * img_height)) for point in results.multi_face_landmarks[0].landmark]
if draw :
[cv.circle(img, p, 2, (0,255,0), -1) for p in mesh_coord]
# returning the list of tuples for each landmarks
return mesh_coord
# Euclaidean distance
def euclaideanDistance(point, point1):
x, y = point
x1, y1 = point1
distance = math.sqrt((x1 - x)**2 + (y1 - y)**2)
return distance
# Blinking Ratio
def blinkRatio(img, landmarks, right_indices, left_indices):
# Right eyes
# horizontal line
rh_right = landmarks[right_indices[0]]
rh_left = landmarks[right_indices[8]]
# vertical line
rv_top = landmarks[right_indices[12]]
rv_bottom = landmarks[right_indices[4]]
# draw lines on right eyes
# cv.line(img, rh_right, rh_left, utils.GREEN, 2)
# cv.line(img, rv_top, rv_bottom, utils.WHITE, 2)
# LEFT_EYE
# horizontal line
lh_right = landmarks[left_indices[0]]
lh_left = landmarks[left_indices[8]]
# vertical line
lv_top = landmarks[left_indices[12]]
lv_bottom = landmarks[left_indices[4]]
rhDistance = euclaideanDistance(rh_right, rh_left)
rvDistance = euclaideanDistance(rv_top, rv_bottom)
lvDistance = euclaideanDistance(lv_top, lv_bottom)
lhDistance = euclaideanDistance(lh_right, lh_left)
reRatio = rhDistance/rvDistance
leRatio = lhDistance/lvDistance
ratio = (reRatio+leRatio)/2
return ratio
# Eyes Extrctor function,
def eyesExtractor(img, right_eye_coords, left_eye_coords):
# converting color image to scale image
gray = cv.cvtColor(img, cv.COLOR_BGR2GRAY)
# getting the dimension of image
dim = gray.shape
# creating mask from gray scale dim
mask = np.zeros(dim, dtype=np.uint8)
# drawing Eyes Shape on mask with white color
cv.fillPoly(mask, [np.array(right_eye_coords, dtype=np.int32)], 255)
cv.fillPoly(mask, [np.array(left_eye_coords, dtype=np.int32)], 255)
# showing the mask
# cv.imshow('mask', mask)
# draw eyes image on mask, where white shape is
eyes = cv.bitwise_and(gray, gray, mask=mask)
# change black color to gray other than eys
# cv.imshow('eyes draw', eyes)
eyes[mask==0]=155
# getting minium and maximum x and y for right and left eyes
# For Right Eye
r_max_x = (max(right_eye_coords, key=lambda item: item[0]))[0]
r_min_x = (min(right_eye_coords, key=lambda item: item[0]))[0]
r_max_y = (max(right_eye_coords, key=lambda item : item[1]))[1]
r_min_y = (min(right_eye_coords, key=lambda item: item[1]))[1]
# For LEFT Eye
l_max_x = (max(left_eye_coords, key=lambda item: item[0]))[0]
l_min_x = (min(left_eye_coords, key=lambda item: item[0]))[0]
l_max_y = (max(left_eye_coords, key=lambda item : item[1]))[1]
l_min_y = (min(left_eye_coords, key=lambda item: item[1]))[1]
# croping the eyes from mask
cropped_right = eyes[r_min_y: r_max_y, r_min_x: r_max_x]
cropped_left = eyes[l_min_y: l_max_y, l_min_x: l_max_x]
# returning the cropped eyes
return cropped_right, cropped_left
# Eyes Postion Estimator
def positionEstimator(cropped_eye):
# getting height and width of eye
h, w =cropped_eye.shape
# remove the noise from images
gaussain_blur = cv.GaussianBlur(cropped_eye, (9,9),0)
median_blur = cv.medianBlur(gaussain_blur, 3)
# applying thrsholding to convert binary_image
ret, threshed_eye = cv.threshold(median_blur, 130, 255, cv.THRESH_BINARY)
# create fixd part for eye with
piece = int(w/3)
# slicing the eyes into three parts
right_piece = threshed_eye[0:h, 0:piece]
center_piece = threshed_eye[0:h, piece: piece+piece]
left_piece = threshed_eye[0:h, piece +piece:w]
# calling pixel counter function
eye_position, color = pixelCounter(right_piece, center_piece, left_piece)
return eye_position, color
# creating pixel counter function
def pixelCounter(first_piece, second_piece, third_piece):
# counting black pixel in each part
right_part = np.sum(first_piece==0)
center_part = np.sum(second_piece==0)
left_part = np.sum(third_piece==0)
# creating list of these values
eye_parts = [right_part, center_part, left_part]
# getting the index of max values in the list
max_index = eye_parts.index(max(eye_parts))
pos_eye =''
if max_index==0:
pos_eye="RIGHT"
color=[utils.BLACK, utils.GREEN]
elif max_index==1:
pos_eye = 'CENTER'
color = [utils.YELLOW, utils.PINK]
elif max_index ==2:
pos_eye = 'LEFT'
color = [utils.GRAY, utils.YELLOW]
else:
pos_eye="Closed"
color = [utils.GRAY, utils.YELLOW]
return pos_eye, color
with map_face_mesh.FaceMesh(min_detection_confidence =0.5, min_tracking_confidence=0.5) as face_mesh:
# starting time here
start_time = time.time()
# starting Video loop here.
while True:
frame_counter +=1 # frame counter
ret, frame = camera.read() # getting frame from camera
if not ret:
break # no more frames break
# resizing frame
frame = cv.resize(frame, None, fx=1.5, fy=1.5, interpolation=cv.INTER_CUBIC)
frame_height, frame_width= frame.shape[:2]
rgb_frame = cv.cvtColor(frame, cv.COLOR_RGB2BGR)
results = face_mesh.process(rgb_frame)
if results.multi_face_landmarks:
mesh_coords = landmarksDetection(frame, results, False)
ratio = blinkRatio(frame, mesh_coords, RIGHT_EYE, LEFT_EYE)
# cv.putText(frame, f'ratio {ratio}', (100, 100), FONTS, 1.0, utils.GREEN, 2)
utils.colorBackgroundText(frame, f'Ratio : {round(ratio,2)}', FONTS, 0.7, (30,100),2, utils.PINK, utils.YELLOW)
if ratio >5.5:
CEF_COUNTER +=1
# cv.putText(frame, 'Blink', (200, 50), FONTS, 1.3, utils.PINK, 2)
utils.colorBackgroundText(frame, f'Blink', FONTS, 1.7, (int(frame_height/2), 100), 2, utils.YELLOW, pad_x=6, pad_y=6, )
else:
if CEF_COUNTER>CLOSED_EYES_FRAME:
TOTAL_BLINKS +=1
CEF_COUNTER =0
# cv.putText(frame, f'Total Blinks: {TOTAL_BLINKS}', (100, 150), FONTS, 0.6, utils.GREEN, 2)
utils.colorBackgroundText(frame, f'Total Blinks: {TOTAL_BLINKS}', FONTS, 0.7, (30,150),2)
cv.polylines(frame, [np.array([mesh_coords[p] for p in LEFT_EYE ], dtype=np.int32)], True, utils.GREEN, 1, cv.LINE_AA)
cv.polylines(frame, [np.array([mesh_coords[p] for p in RIGHT_EYE ], dtype=np.int32)], True, utils.GREEN, 1, cv.LINE_AA)
# Blink Detector Counter Completed
right_coords = [mesh_coords[p] for p in RIGHT_EYE]
left_coords = [mesh_coords[p] for p in LEFT_EYE]
crop_right, crop_left = eyesExtractor(frame, right_coords, left_coords)
# cv.imshow('right', crop_right)
# cv.imshow('left', crop_left)
eye_position, color = positionEstimator(crop_right)
utils.colorBackgroundText(frame, f'R: {eye_position}', FONTS, 1.0, (40, 220), 2, color[0], color[1], 8, 8)
eye_position_left, color = positionEstimator(crop_left)
utils.colorBackgroundText(frame, f'L: {eye_position_left}', FONTS, 1.0, (40, 320), 2, color[0], color[1], 8, 8)
# calculating frame per seconds FPS
end_time = time.time()-start_time
fps = frame_counter/end_time
frame =utils.textWithBackground(frame,f'FPS: {round(fps,1)}',FONTS, 1.0, (30, 50), bgOpacity=0.9, textThickness=2)
# writing image for thumbnail drawing shape
# cv.imwrite(f'img/frame_{frame_counter}.png', frame)
cv.imshow('frame', frame)
key = cv.waitKey(2)
if key==ord('q') or key ==ord('Q'):
break
cv.destroyAllWindows()
camera.release()
================================================
FILE: Eye_Tracking_part3/utils.py
================================================
'''
Author: Asadullah Dal
Youtube Channel: https://www.youtube.com/c/aiphile
'''
import cv2 as cv
import numpy as np
# colors
# values =(blue, green, red) opencv accepts BGR values not RGB
BLACK = (0,0,0)
WHITE = (255,255,255)
BLUE = (255,0,0)
RED = (0,0,255)
CYAN = (255,255,0)
YELLOW =(0,255,255)
MAGENTA = (255,0,255)
GRAY = (128,128,128)
GREEN = (0,255,0)
PURPLE = (128,0,128)
ORANGE = (0,165,255)
PINK = (147,20,255)
points_list =[(200, 300), (150, 150), (400, 200)]
def drawColor(img, colors):
x, y = 0,10
w, h = 20, 30
for color in colors:
x += w+5
# y += 10
cv.rectangle(img, (x-6, y-5 ), (x+w+5, y+h+5), (10, 50, 10), -1)
cv.rectangle(img, (x, y ), (x+w, y+h), color, -1)
def colorBackgroundText(img, text, font, fontScale, textPos, textThickness=1,textColor=(0,255,0), bgColor=(0,0,0), pad_x=3, pad_y=3):
"""
Draws text with background, with control transparency
@param img:(mat) which you want to draw text
@param text: (string) text you want draw
@param font: fonts face, like FONT_HERSHEY_COMPLEX, FONT_HERSHEY_PLAIN etc.
@param fontScale: (double) the size of text, how big it should be.
@param textPos: tuple(x,y) position where you want to draw text
@param textThickness:(int) fonts weight, how bold it should be
@param textPos: tuple(x,y) position where you want to draw text
@param textThickness:(int) fonts weight, how bold it should be.
@param textColor: tuple(BGR), values -->0 to 255 each
@param bgColor: tuple(BGR), values -->0 to 255 each
@param pad_x: int(pixels) padding of in x direction
@param pad_y: int(pixels) 1 to 1.0 (), controls transparency of text background
@return: img(mat) with draw with background
"""
(t_w, t_h), _= cv.getTextSize(text, font, fontScale, textThickness) # getting the text size
x, y = textPos
cv.rectangle(img, (x-pad_x, y+ pad_y), (x+t_w+pad_x, y-t_h-pad_y), bgColor,-1) # draw rectangle
cv.putText(img,text, textPos,font, fontScale, textColor,textThickness ) # draw in text
return img
def textWithBackground(img, text, font, fontScale, textPos, textThickness=1,textColor=(0,255,0), bgColor=(0,0,0), pad_x=3, pad_y=3, bgOpacity=0.5):
"""
Draws text with background, with control transparency
@param img:(mat) which you want to draw text
@param text: (string) text you want draw
@param font: fonts face, like FONT_HERSHEY_COMPLEX, FONT_HERSHEY_PLAIN etc.
@param fontScale: (double) the size of text, how big it should be.
@param textPos: tuple(x,y) position where you want to draw text
@param textThickness:(int) fonts weight, how bold it should be
@param textPos: tuple(x,y) position where you want to draw text
@param textThickness:(int) fonts weight, how bold it should be.
@param textColor: tuple(BGR), values -->0 to 255 each
@param bgColor: tuple(BGR), values -->0 to 255 each
@param pad_x: int(pixels) padding of in x direction
@param pad_y: int(pixels) 1 to 1.0 (), controls transparency of text background
@return: img(mat) with draw with background
"""
(t_w, t_h), _= cv.getTextSize(text, font, fontScale, textThickness) # getting the text size
x, y = textPos
overlay = img.copy() # coping the image
cv.rectangle(overlay, (x-pad_x, y+ pad_y), (x+t_w+pad_x, y-t_h-pad_y), bgColor,-1) # draw rectangle
new_img = cv.addWeighted(overlay, bgOpacity, img, 1 - bgOpacity, 0) # overlaying the rectangle on the image.
cv.putText(new_img,text, textPos,font, fontScale, textColor,textThickness ) # draw in text
img = new_img
return img
def textBlurBackground(img, text, font, fontScale, textPos, textThickness=1,textColor=(0,255,0),kneral=(33,33) , pad_x=3, pad_y=3):
"""
Draw text with background blured, control the blur value, with kernal(odd, odd)
@param img:(mat) which you want to draw text
@param text: (string) text you want draw
@param font: fonts face, like FONT_HERSHEY_COMPLEX, FONT_HERSHEY_PLAIN etc.
@param fontScale: (double) the size of text, how big it should be.
@param textPos: tuple(x,y) position where you want to draw text
@param textThickness:(int) fonts weight, how bold it should be.
@param textColor: tuple(BGR), values -->0 to 255 each
@param kneral: tuple(3,3) int as odd number: higher the value, more blurry background would be
@param pad_x: int(pixels) padding of in x direction
@param pad_y: int(pixels) padding of in y direction
@return: img mat, with text drawn, with background blured
call the function:
img =textBlurBackground(img, 'Blured Background Text', cv2.FONT_HERSHEY_COMPLEX, 0.9, (20, 60),2, (0,255, 0), (49,49), 13, 13 )
"""
(t_w, t_h), _= cv.getTextSize(text, font, fontScale, textThickness) # getting the text size
x, y = textPos
blur_roi = img[y-pad_y-t_h: y+pad_y, x-pad_x:x+t_w+pad_x] # croping Text Background
img[y-pad_y-t_h: y+pad_y, x-pad_x:x+t_w+pad_x]=cv.blur(blur_roi, kneral) # merging the blured background to img
cv.putText(img,text, textPos,font, fontScale, textColor,textThickness )
# cv.imshow('blur roi', blur_roi)
# cv.imshow('blured', img)
return img
def fillPolyTrans(img, points, color, opacity):
"""
@param img: (mat) input image, where shape is drawn.
@param points: list [tuples(int, int) these are the points custom shape,FillPoly
@param color: (tuples (int, int, int)
@param opacity: it is transparency of image.
@return: img(mat) image with rectangle draw.
"""
list_to_np_array = np.array(points, dtype=np.int32)
overlay = img.copy() # coping the image
cv.fillPoly(overlay,[list_to_np_array], color )
new_img = cv.addWeighted(overlay, opacity, img, 1 - opacity, 0)
# print(points_list)
img = new_img
cv.polylines(img, [list_to_np_array], True, color,1, cv.LINE_AA)
return img
# def pollyLines(img, points, color):
# list_to_np_array = np.array(points, dtype=np.int32)
# cv.polylines(img, [list_to_np_array], True, color,1, cv.LINE_AA)
# return img
def rectTrans(img, pt1, pt2, color, thickness, opacity):
"""
@param img: (mat) input image, where shape is drawn.
@param pt1: tuple(int,int) it specifies the starting point(x,y) os rectangle
@param pt2: tuple(int,int) it nothing but width and height of rectangle
@param color: (tuples (int, int, int), it tuples of BGR values
@param thickness: it thickness of board line rectangle, if (-1) passed then rectangle will be fulled with color.
@param opacity: it is transparency of image.
@return:
"""
overlay = img.copy()
cv.rectangle(overlay, pt1, pt2, color, thickness)
new_img = cv.addWeighted(overlay, opacity, img, 1 - opacity, 0) # overlaying the rectangle on the image.
img = new_img
return img
def main():
cap = cv.VideoCapture('Girl.mp4')
counter =0
while True:
success, img = cap.read()
# img = np.zeros((1000,1000, 3), dtype=np.uint8)
img=rectTrans(img, pt1=(30, 320), pt2=(160, 260), color=(0,255,255),thickness=-1, opacity=0.6)
img =fillPolyTrans(img=img, points=points_list, color=(0,255,0), opacity=.5)
drawColor(img, [BLACK,WHITE ,BLUE,RED,CYAN,YELLOW,MAGENTA,GRAY ,GREEN,PURPLE,ORANGE,PINK])
textBlurBackground(img, 'Blured Background Text', cv.FONT_HERSHEY_COMPLEX, 0.8, (60, 140),2, YELLOW, (71,71), 13, 13)
img=textWithBackground(img, 'Colored Background Texts', cv.FONT_HERSHEY_SIMPLEX, 0.8, (60,80), textThickness=2, bgColor=GREEN, textColor=BLACK, bgOpacity=0.7, pad_x=6, pad_y=6)
imgGray = cv.cvtColor(img, cv.COLOR_BGR2GRAY)
# cv.imwrite('color_image.png', img)
counter +=1
cv.imshow('img', img)
cv.imwrite(f'image/image_{counter}.png', img)
if cv.waitKey(1) ==ord('q'):
break
if __name__ == "__main__":
main()
================================================
FILE: Eye_Tracking_part4/Create Sound/create_sound.py
================================================
import pyttsx3
engine = pyttsx3.init() # object creation
text = 'Looking at center!'
""" RATE"""
engine.setProperty('rate', 120)
rate = engine.getProperty('rate') # getting details of current speaking rate
print (rate) #printing current voice rate
# setting up new voice rate
"""VOLUME"""
volume = engine.getProperty('volume') #getting to know current volume level (min=0 and max=1)
print (volume) #printing current volume level
engine.setProperty('volume',1.0) # setting up volume level between 0 and 1
"""VOICE"""
voices = engine.getProperty('voices') #getting details of current voice
#engine.setProperty('voice', voices[0].id) #changing index, changes voices. o for male
engine.setProperty('voice', voices[1].id) #changing index, changes voices. 1 for female
engine.say(text)
engine.runAndWait()
engine.stop()
"""Saving Voice to a file"""
# On linux make sure that 'espeak' and 'ffmpeg' are installed
engine.save_to_file(text, 'center.wav')
engine.runAndWait()
================================================
FILE: Eye_Tracking_part4/main.py
================================================
import cv2 as cv
import mediapipe as mp
import time
import utils, math
import numpy as np
import pygame
from pygame import mixer
# variables
frame_counter =0
CEF_COUNTER =0
TOTAL_BLINKS =0
start_voice= False
counter_right=0
counter_left =0
counter_center =0
# constants
CLOSED_EYES_FRAME =3
FONTS =cv.FONT_HERSHEY_COMPLEX
# initialize mixer
mixer.init()
# loading in the voices/sounds
voice_left = mixer.Sound('Voice/left.wav')
voice_right = mixer.Sound('Voice/Right.wav')
voice_center = mixer.Sound('Voice/center.wav')
# face bounder indices
FACE_OVAL=[ 10, 338, 297, 332, 284, 251, 389, 356, 454, 323, 361, 288, 397, 365, 379, 378, 400, 377, 152, 148, 176, 149, 150, 136, 172, 58, 132, 93, 234, 127, 162, 21, 54, 103,67, 109]
# lips indices for Landmarks
LIPS=[ 61, 146, 91, 181, 84, 17, 314, 405, 321, 375,291, 308, 324, 318, 402, 317, 14, 87, 178, 88, 95,185, 40, 39, 37,0 ,267 ,269 ,270 ,409, 415, 310, 311, 312, 13, 82, 81, 42, 183, 78 ]
LOWER_LIPS =[61, 146, 91, 181, 84, 17, 314, 405, 321, 375, 291, 308, 324, 318, 402, 317, 14, 87, 178, 88, 95]
UPPER_LIPS=[ 185, 40, 39, 37,0 ,267 ,269 ,270 ,409, 415, 310, 311, 312, 13, 82, 81, 42, 183, 78]
# Left eyes indices
LEFT_EYE =[ 362, 382, 381, 380, 374, 373, 390, 249, 263, 466, 388, 387, 386, 385,384, 398 ]
LEFT_EYEBROW =[ 336, 296, 334, 293, 300, 276, 283, 282, 295, 285 ]
# right eyes indices
RIGHT_EYE=[ 33, 7, 163, 144, 145, 153, 154, 155, 133, 173, 157, 158, 159, 160, 161 , 246 ]
RIGHT_EYEBROW=[ 70, 63, 105, 66, 107, 55, 65, 52, 53, 46 ]
map_face_mesh = mp.solutions.face_mesh
# camera object
camera = cv.VideoCapture(1)
_, frame = camera.read()
img = cv.resize(frame, None, fx=1.5, fy=1.5, interpolation=cv.INTER_CUBIC)
img_hieght, img_width = img.shape[:2]
print(img_hieght, img_width)
# video Recording setup
fourcc = cv.VideoWriter_fourcc(*'XVID')
out = cv.VideoWriter('output21.mp4', fourcc, 30.0, (img_width, img_hieght))
# landmark detection function
def landmarksDetection(img, results, draw=False):
img_height, img_width= img.shape[:2]
# list[(x,y), (x,y)....]
mesh_coord = [(int(point.x * img_width), int(point.y * img_height)) for point in results.multi_face_landmarks[0].landmark]
if draw :
[cv.circle(img, p, 2, (0,255,0), -1) for p in mesh_coord]
# returning the list of tuples for each landmarks
return mesh_coord
# Euclaidean distance
def euclaideanDistance(point, point1):
x, y = point
x1, y1 = point1
distance = math.sqrt((x1 - x)**2 + (y1 - y)**2)
return distance
# Blinking Ratio
def blinkRatio(img, landmarks, right_indices, left_indices):
# Right eyes
# horizontal line
rh_right = landmarks[right_indices[0]]
rh_left = landmarks[right_indices[8]]
# vertical line
rv_top = landmarks[right_indices[12]]
rv_bottom = landmarks[right_indices[4]]
# draw lines on right eyes
# cv.line(img, rh_right, rh_left, utils.GREEN, 2)
# cv.line(img, rv_top, rv_bottom, utils.WHITE, 2)
# LEFT_EYE
# horizontal line
lh_right = landmarks[left_indices[0]]
lh_left = landmarks[left_indices[8]]
# vertical line
lv_top = landmarks[left_indices[12]]
lv_bottom = landmarks[left_indices[4]]
rhDistance = euclaideanDistance(rh_right, rh_left)
rvDistance = euclaideanDistance(rv_top, rv_bottom)
lvDistance = euclaideanDistance(lv_top, lv_bottom)
lhDistance = euclaideanDistance(lh_right, lh_left)
reRatio = rhDistance/rvDistance
leRatio = lhDistance/lvDistance
ratio = (reRatio+leRatio)/2
return ratio
# Eyes Extrctor function,
def eyesExtractor(img, right_eye_coords, left_eye_coords):
# converting color image to scale image
gray = cv.cvtColor(img, cv.COLOR_BGR2GRAY)
# getting the dimension of image
dim = gray.shape
# creating mask from gray scale dim
mask = np.zeros(dim, dtype=np.uint8)
# drawing Eyes Shape on mask with white color
cv.fillPoly(mask, [np.array(right_eye_coords, dtype=np.int32)], 255)
cv.fillPoly(mask, [np.array(left_eye_coords, dtype=np.int32)], 255)
# showing the mask
# cv.imshow('mask', mask)
# draw eyes image on mask, where white shape is
eyes = cv.bitwise_and(gray, gray, mask=mask)
# change black color to gray other than eys
# cv.imshow('eyes draw', eyes)
eyes[mask==0]=155
# getting minium and maximum x and y for right and left eyes
# For Right Eye
r_max_x = (max(right_eye_coords, key=lambda item: item[0]))[0]
r_min_x = (min(right_eye_coords, key=lambda item: item[0]))[0]
r_max_y = (max(right_eye_coords, key=lambda item : item[1]))[1]
r_min_y = (min(right_eye_coords, key=lambda item: item[1]))[1]
# For LEFT Eye
l_max_x = (max(left_eye_coords, key=lambda item: item[0]))[0]
l_min_x = (min(left_eye_coords, key=lambda item: item[0]))[0]
l_max_y = (max(left_eye_coords, key=lambda item : item[1]))[1]
l_min_y = (min(left_eye_coords, key=lambda item: item[1]))[1]
# croping the eyes from mask
cropped_right = eyes[r_min_y: r_max_y, r_min_x: r_max_x]
cropped_left = eyes[l_min_y: l_max_y, l_min_x: l_max_x]
# returning the cropped eyes
return cropped_right, cropped_left
# Eyes Postion Estimator
def positionEstimator(cropped_eye):
# getting height and width of eye
h, w =cropped_eye.shape
# remove the noise from images
gaussain_blur = cv.GaussianBlur(cropped_eye, (9,9),0)
median_blur = cv.medianBlur(gaussain_blur, 3)
# applying thrsholding to convert binary_image
ret, threshed_eye = cv.threshold(median_blur, 130, 255, cv.THRESH_BINARY)
# create fixd part for eye with
piece = int(w/3)
# slicing the eyes into three parts
right_piece = threshed_eye[0:h, 0:piece]
center_piece = threshed_eye[0:h, piece: piece+piece]
left_piece = threshed_eye[0:h, piece +piece:w]
# calling pixel counter function
eye_position, color = pixelCounter(right_piece, center_piece, left_piece)
return eye_position, color
# creating pixel counter function
def pixelCounter(first_piece, second_piece, third_piece):
# counting black pixel in each part
right_part = np.sum(first_piece==0)
center_part = np.sum(second_piece==0)
left_part = np.sum(third_piece==0)
# creating list of these values
eye_parts = [right_part, center_part, left_part]
# getting the index of max values in the list
max_index = eye_parts.index(max(eye_parts))
pos_eye =''
if max_index==0:
pos_eye="RIGHT"
color=[utils.BLACK, utils.GREEN]
elif max_index==1:
pos_eye = 'CENTER'
color = [utils.YELLOW, utils.PINK]
elif max_index ==2:
pos_eye = 'LEFT'
color = [utils.GRAY, utils.YELLOW]
else:
pos_eye="Closed"
color = [utils.GRAY, utils.YELLOW]
return pos_eye, color
with map_face_mesh.FaceMesh(min_detection_confidence =0.5, min_tracking_confidence=0.5) as face_mesh:
# starting time here
start_time = time.time()
# starting Video loop here.
while True:
frame_counter +=1 # frame counter
ret, frame = camera.read() # getting frame from camera
if not ret:
break # no more frames break
# resizing frame
frame = cv.resize(frame, None, fx=1.5, fy=1.5, interpolation=cv.INTER_CUBIC)
frame_height, frame_width= frame.shape[:2]
rgb_frame = cv.cvtColor(frame, cv.COLOR_RGB2BGR)
results = face_mesh.process(rgb_frame)
if results.multi_face_landmarks:
mesh_coords = landmarksDetection(frame, results, False)
ratio = blinkRatio(frame, mesh_coords, RIGHT_EYE, LEFT_EYE)
# cv.putText(frame, f'ratio {ratio}', (100, 100), FONTS, 1.0, utils.GREEN, 2)
utils.colorBackgroundText(frame, f'Ratio : {round(ratio,2)}', FONTS, 0.7, (30,100),2, utils.PINK, utils.YELLOW)
if ratio >5.5:
CEF_COUNTER +=1
# cv.putText(frame, 'Blink', (200, 50), FONTS, 1.3, utils.PINK, 2)
utils.colorBackgroundText(frame, f'Blink', FONTS, 1.7, (int(frame_height/2), 100), 2, utils.YELLOW, pad_x=6, pad_y=6, )
else:
if CEF_COUNTER>CLOSED_EYES_FRAME:
TOTAL_BLINKS +=1
CEF_COUNTER =0
# cv.putText(frame, f'Total Blinks: {TOTAL_BLINKS}', (100, 150), FONTS, 0.6, utils.GREEN, 2)
utils.colorBackgroundText(frame, f'Total Blinks: {TOTAL_BLINKS}', FONTS, 0.7, (30,150),2)
cv.polylines(frame, [np.array([mesh_coords[p] for p in LEFT_EYE ], dtype=np.int32)], True, utils.GREEN, 1, cv.LINE_AA)
cv.polylines(frame, [np.array([mesh_coords[p] for p in RIGHT_EYE ], dtype=np.int32)], True, utils.GREEN, 1, cv.LINE_AA)
# Blink Detector Counter Completed
right_coords = [mesh_coords[p] for p in RIGHT_EYE]
left_coords = [mesh_coords[p] for p in LEFT_EYE]
crop_right, crop_left = eyesExtractor(frame, right_coords, left_coords)
# cv.imshow('right', crop_right)
# cv.imshow('left', crop_left)
eye_position_right, color = positionEstimator(crop_right)
utils.colorBackgroundText(frame, f'R: {eye_position_right}', FONTS, 1.0, (40, 220), 2, color[0], color[1], 8, 8)
eye_position_left, color = positionEstimator(crop_left)
utils.colorBackgroundText(frame, f'L: {eye_position_left}', FONTS, 1.0, (40, 320), 2, color[0], color[1], 8, 8)
# Starting Voice Indicator
if eye_position_right=="RIGHT" and pygame.mixer.get_busy()==0 and counter_right<2:
# starting counter
counter_right+=1
# resetting counters
counter_center=0
counter_left=0
# playing voice
voice_right.play()
if eye_position_right=="CENTER" and pygame.mixer.get_busy()==0 and counter_center <2:
# starting Counter
counter_center +=1
# resetting counters
counter_right=0
counter_left=0
# playing voice
voice_center.play()
if eye_position_right=="LEFT" and pygame.mixer.get_busy()==0 and counter_left<2:
counter_left +=1
# resetting counters
counter_center=0
counter_right=0
# playing Voice
voice_left.play()
# calculating frame per seconds FPS
end_time = time.time()-start_time
fps = frame_counter/end_time
frame =utils.textWithBackground(frame,f'FPS: {round(fps,1)}',FONTS, 1.0, (30, 50), bgOpacity=0.9, textThickness=2)
# writing image for thumbnail drawing shape
# cv.imwrite(f'img/frame_{frame_counter}.png', frame)
# wirting the video for demo purpose
out.write(frame)
cv.imshow('frame', frame)
key = cv.waitKey(2)
if key==ord('q') or key ==ord('Q'):
break
cv.destroyAllWindows()
camera.release()
================================================
FILE: Eye_Tracking_part4/utils.py
================================================
'''
Author: Asadullah Dal
Youtube Channel: https://www.youtube.com/c/aiphile
'''
import cv2 as cv
import numpy as np
# colors
# values =(blue, green, red) opencv accepts BGR values not RGB
BLACK = (0,0,0)
WHITE = (255,255,255)
BLUE = (255,0,0)
RED = (0,0,255)
CYAN = (255,255,0)
YELLOW =(0,255,255)
MAGENTA = (255,0,255)
GRAY = (128,128,128)
GREEN = (0,255,0)
PURPLE = (128,0,128)
ORANGE = (0,165,255)
PINK = (147,20,255)
points_list =[(200, 300), (150, 150), (400, 200)]
def drawColor(img, colors):
x, y = 0,10
w, h = 20, 30
for color in colors:
x += w+5
# y += 10
cv.rectangle(img, (x-6, y-5 ), (x+w+5, y+h+5), (10, 50, 10), -1)
cv.rectangle(img, (x, y ), (x+w, y+h), color, -1)
def colorBackgroundText(img, text, font, fontScale, textPos, textThickness=1,textColor=(0,255,0), bgColor=(0,0,0), pad_x=3, pad_y=3):
"""
Draws text with background, with control transparency
@param img:(mat) which you want to draw text
@param text: (string) text you want draw
@param font: fonts face, like FONT_HERSHEY_COMPLEX, FONT_HERSHEY_PLAIN etc.
@param fontScale: (double) the size of text, how big it should be.
@param textPos: tuple(x,y) position where you want to draw text
@param textThickness:(int) fonts weight, how bold it should be
@param textPos: tuple(x,y) position where you want to draw text
@param textThickness:(int) fonts weight, how bold it should be.
@param textColor: tuple(BGR), values -->0 to 255 each
@param bgColor: tuple(BGR), values -->0 to 255 each
@param pad_x: int(pixels) padding of in x direction
@param pad_y: int(pixels) 1 to 1.0 (), controls transparency of text background
@return: img(mat) with draw with background
"""
(t_w, t_h), _= cv.getTextSize(text, font, fontScale, textThickness) # getting the text size
x, y = textPos
cv.rectangle(img, (x-pad_x, y+ pad_y), (x+t_w+pad_x, y-t_h-pad_y), bgColor,-1) # draw rectangle
cv.putText(img,text, textPos,font, fontScale, textColor,textThickness ) # draw in text
return img
def textWithBackground(img, text, font, fontScale, textPos, textThickness=1,textColor=(0,255,0), bgColor=(0,0,0), pad_x=3, pad_y=3, bgOpacity=0.5):
"""
Draws text with background, with control transparency
@param img:(mat) which you want to draw text
@param text: (string) text you want draw
@param font: fonts face, like FONT_HERSHEY_COMPLEX, FONT_HERSHEY_PLAIN etc.
@param fontScale: (double) the size of text, how big it should be.
@param textPos: tuple(x,y) position where you want to draw text
@param textThickness:(int) fonts weight, how bold it should be
@param textPos: tuple(x,y) position where you want to draw text
@param textThickness:(int) fonts weight, how bold it should be.
@param textColor: tuple(BGR), values -->0 to 255 each
@param bgColor: tuple(BGR), values -->0 to 255 each
@param pad_x: int(pixels) padding of in x direction
@param pad_y: int(pixels) 1 to 1.0 (), controls transparency of text background
@return: img(mat) with draw with background
"""
(t_w, t_h), _= cv.getTextSize(text, font, fontScale, textThickness) # getting the text size
x, y = textPos
overlay = img.copy() # coping the image
cv.rectangle(overlay, (x-pad_x, y+ pad_y), (x+t_w+pad_x, y-t_h-pad_y), bgColor,-1) # draw rectangle
new_img = cv.addWeighted(overlay, bgOpacity, img, 1 - bgOpacity, 0) # overlaying the rectangle on the image.
cv.putText(new_img,text, textPos,font, fontScale, textColor,textThickness ) # draw in text
img = new_img
return img
def textBlurBackground(img, text, font, fontScale, textPos, textThickness=1,textColor=(0,255,0),kneral=(33,33) , pad_x=3, pad_y=3):
"""
Draw text with background blured, control the blur value, with kernal(odd, odd)
@param img:(mat) which you want to draw text
@param text: (string) text you want draw
@param font: fonts face, like FONT_HERSHEY_COMPLEX, FONT_HERSHEY_PLAIN etc.
@param fontScale: (double) the size of text, how big it should be.
@param textPos: tuple(x,y) position where you want to draw text
@param textThickness:(int) fonts weight, how bold it should be.
@param textColor: tuple(BGR), values -->0 to 255 each
@param kneral: tuple(3,3) int as odd number: higher the value, more blurry background would be
@param pad_x: int(pixels) padding of in x direction
@param pad_y: int(pixels) padding of in y direction
@return: img mat, with text drawn, with background blured
call the function:
img =textBlurBackground(img, 'Blured Background Text', cv2.FONT_HERSHEY_COMPLEX, 0.9, (20, 60),2, (0,255, 0), (49,49), 13, 13 )
"""
(t_w, t_h), _= cv.getTextSize(text, font, fontScale, textThickness) # getting the text size
x, y = textPos
blur_roi = img[y-pad_y-t_h: y+pad_y, x-pad_x:x+t_w+pad_x] # croping Text Background
img[y-pad_y-t_h: y+pad_y, x-pad_x:x+t_w+pad_x]=cv.blur(blur_roi, kneral) # merging the blured background to img
cv.putText(img,text, textPos,font, fontScale, textColor,textThickness )
# cv.imshow('blur roi', blur_roi)
# cv.imshow('blured', img)
return img
def fillPolyTrans(img, points, color, opacity):
"""
@param img: (mat) input image, where shape is drawn.
@param points: list [tuples(int, int) these are the points custom shape,FillPoly
@param color: (tuples (int, int, int)
@param opacity: it is transparency of image.
@return: img(mat) image with rectangle draw.
"""
list_to_np_array = np.array(points, dtype=np.int32)
overlay = img.copy() # coping the image
cv.fillPoly(overlay,[list_to_np_array], color )
new_img = cv.addWeighted(overlay, opacity, img, 1 - opacity, 0)
# print(points_list)
img = new_img
cv.polylines(img, [list_to_np_array], True, color,1, cv.LINE_AA)
return img
# def pollyLines(img, points, color):
# list_to_np_array = np.array(points, dtype=np.int32)
# cv.polylines(img, [list_to_np_array], True, color,1, cv.LINE_AA)
# return img
def rectTrans(img, pt1, pt2, color, thickness, opacity):
"""
@param img: (mat) input image, where shape is drawn.
@param pt1: tuple(int,int) it specifies the starting point(x,y) os rectangle
@param pt2: tuple(int,int) it nothing but width and height of rectangle
@param color: (tuples (int, int, int), it tuples of BGR values
@param thickness: it thickness of board line rectangle, if (-1) passed then rectangle will be fulled with color.
@param opacity: it is transparency of image.
@return:
"""
overlay = img.copy()
cv.rectangle(overlay, pt1, pt2, color, thickness)
new_img = cv.addWeighted(overlay, opacity, img, 1 - opacity, 0) # overlaying the rectangle on the image.
img = new_img
return img
def main():
cap = cv.VideoCapture('Girl.mp4')
counter =0
while True:
success, img = cap.read()
# img = np.zeros((1000,1000, 3), dtype=np.uint8)
img=rectTrans(img, pt1=(30, 320), pt2=(160, 260), color=(0,255,255),thickness=-1, opacity=0.6)
img =fillPolyTrans(img=img, points=points_list, color=(0,255,0), opacity=.5)
drawColor(img, [BLACK,WHITE ,BLUE,RED,CYAN,YELLOW,MAGENTA,GRAY ,GREEN,PURPLE,ORANGE,PINK])
textBlurBackground(img, 'Blured Background Text', cv.FONT_HERSHEY_COMPLEX, 0.8, (60, 140),2, YELLOW, (71,71), 13, 13)
img=textWithBackground(img, 'Colored Background Texts', cv.FONT_HERSHEY_SIMPLEX, 0.8, (60,80), textThickness=2, bgColor=GREEN, textColor=BLACK, bgOpacity=0.7, pad_x=6, pad_y=6)
imgGray = cv.cvtColor(img, cv.COLOR_BGR2GRAY)
# cv.imwrite('color_image.png', img)
counter +=1
cv.imshow('img', img)
cv.imwrite(f'image/image_{counter}.png', img)
if cv.waitKey(1) ==ord('q'):
break
if __name__ == "__main__":
main()
================================================
FILE: LICENSE
================================================
MIT License
Copyright (c) 2021 Asadullah Dal
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.
================================================
FILE: README.md
================================================
# Eyes-Position-Estimator-Mediapipe
  
> *This is Eyes :eye: :eye: Tracking Project, here we will use computer vision techniques to extracting the eyes, mediapipe python modules will provide the landmarks*,
## Eyes Position Estimation Demo without Voice
https://user-images.githubusercontent.com/66181793/134312531-d8f6b068-13d9-4590-a1d2-232ea4cf5681.mp4
## Iris Tracking Mediapipe Opencv python [Video Tutorial 📹](https://youtu.be/DNKAvDeqH_Y)
https://user-images.githubusercontent.com/66181793/150673670-7b12506f-67d6-4540-96f7-ea6233c01bd6.mp4
## TODO
##### Eyes Tracking Part 1 [Video Tutorial](https://youtu.be/-jFobb6ARc4)
- [x] Detecting face landmarks with Mediapipe 👨💻
- [x] Detecting Eyes Landmarks 👁️🗨️ 👁️🗨️
- [x] Draw Eyes,Eyebrows, lips and Face Oval with transparent shapes.
##### Eyes Tracking Part 2 [Video Tutorial](https://youtu.be/XIJD43rbI-4)
- [x] Detecting the blinks
- [x] Counting Blinks
##### Eyes Tracking Part 3 [Video Tutorial](https://youtu.be/Y-mCtkv41rk)
- [x] Extracting Eyes from the frame using Masking techniques
- [x] Thresholding Eyes to get black ⚫ and white ⚪ pixels separated
- [x] Dividing Each Eye into three 3️⃣ pieces, right_piece, center_piece, left_piece
- [x] Counting the Black Pixel in each piece, and estimating position.
##### Eyes Tracking Part 4 [Video Tutorial](https://youtu.be/oAgu20kuRQw)
- [x] Voice position indicator using pygame.
#### [**Blog Post:** ](https://aiphile.blogspot.com/2021/08/eyes-tracking-mediapipe-part1.html)
### Face Mesh Points
Face Mesh Points
Face Mesh Map Points

## 📫 Let's Connect