基于openCV的手势识别鼠标控制系统
引言
随着计算机视觉技术的发展,手势识别已经成为一种新的交互方式,广泛应用于智能家居、虚拟现实等领域。本文将介绍一个基于手势识别的鼠标控制系统,通过摄像头捕捉手部动作,实现鼠标指针的移动和点击操作。我们将从代码结构、功能实现以及关键知识点等方面进行详细讲解。
技术栈
- OpenCV:用于视频捕获和图像处理。
- MediaPipe:用于手部姿态检测。
- PyAutoGUI:用于模拟鼠标和键盘操作。
代码结构
整个系统的代码分为几个部分:
- HandsDetector 类:负责手部姿态检测。
- GestureControl 类:负责视频捕获、手势识别和鼠标控制。
- 主程序:初始化并运行手势控制系统。
HandsDetector 类
首先,我们需要一个手部姿态检测器类来处理手部姿态检测。假设这个类已经实现,并且提供了 detect_hands_landmarks
方法来检测手部关键点。
# utils/hands_detector.py
import cv2
import mediapipe as mp
class HandsDetector:
def __init__(self, static_image_mode=False, max_num_hands=2, model_complexity=1, min_detection_confidence=0.5, min_tracking_confidence=0.5):
self.mp_hands = mp.solutions.hands
self.hands = self.mp_hands.Hands(static_image_mode, max_num_hands, model_complexity, min_detection_confidence, min_tracking_confidence)
self.mp_drawing = mp.solutions.drawing_utils
def detect_hands_landmarks(self, img, show_hand_connections=True, show_landmarks=True, show_landmarks_id=True):
img_rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
results = self.hands.process(img_rgb)
hands_landmarks_dict = {}
if results.multi_hand_landmarks:
for hand_idx, hand_landmarks in enumerate(results.multi_hand_landmarks):
hands_landmarks_dict[hand_idx] = []
for idx, lm in enumerate(hand_landmarks.landmark):
h, w, c = img.shape
cx, cy = int(lm.x * w), int(lm.y * h)
hands_landmarks_dict[hand_idx].append((cx, cy))
if show_landmarks:
cv2.circle(img, (cx, cy), 5, (0, 255, 0), cv2.FILLED)
if show_landmarks_id:
cv2.putText(img, str(idx), (cx + 5, cy + 5), cv2.FONT_HERSHEY_PLAIN, 1, (0, 0, 255), 1)
if show_hand_connections:
self.mp_drawing.draw_landmarks(img, hand_landmarks, self.mp_hands.HAND_CONNECTIONS)
return hands_landmarks_dict
GestureControl 类
接下来,我们实现 GestureControl
类,负责视频捕获、手势识别和鼠标控制。
import math
import time
from typing import List
import cv2
import numpy as np
import pyautogui
from utils.hands_detector import HandsDetector
class GestureControl:
"""
手势控制
"""
def __init__(self):
self.camera_width, self.camera_height = (800, 480)
self.cap = cv2.VideoCapture(0)
self.cap.set(cv2.CAP_PROP_FRAME_WIDTH, self.camera_width)
self.cap.set(cv2.CAP_PROP_FRAME_HEIGHT, self.camera_height)
self.cap.set(cv2.CAP_PROP_BRIGHTNESS, 200)
self.hands_detector = HandsDetector(
static_image_mode=False,
max_num_hands=1,
model_complexity=1,
min_detection_confidence=0.8,
min_tracking_confidence=0.7
)
self.hands_detector_dict = dict()
self.thumb_tip = list()
self.index_finger_tip = list()
self.middle_finger_tip = list()
self.index_point = list()
self.index_point_color_unselected = (255, 255, 0)
self.index_point_color_selected = (255, 0, 255)
self.index_point_color = self.index_point_color_unselected
self.thumb_first_joint_len = 0
pyautogui.PAUSE = 0
self.screen_width, self.screen_height = pyautogui.size()
self.mouse_smoothing = 5
self.last_mouse_point = [self.screen_width // 2, self.screen_height // 2]
self.camera_padding = 100
self.mouse_left_status_up = 1
self.mouse_left_status_down = 2
self.mouse_left_status = self.mouse_left_status_up
self.mouse_right_status_up = 1
self.mouse_right_status_down = 2
self.mouse_right_status = self.mouse_right_status_up
@staticmethod
def two_point_distance(pt1: List[int], pt2: List[int]):
"""
计算两点之间的距离
:param pt1: 第一个点的坐标
:param pt2: 第二个点的坐标
:return: 两点之间的距离
"""
return math.hypot(pt1[0] - pt2[0], pt1[1] - pt2[1])
def extract_gesture_finger_keypoint(self):
"""
提取手势手指关键点
:return:
"""
if self.hands_detector_dict:
self.thumb_tip = self.hands_detector_dict[0][4]
self.index_finger_tip = self.hands_detector_dict[0][8]
self.middle_finger_tip = self.hands_detector_dict[0][12]
thumb_ip = self.hands_detector_dict[0][3]
self.index_point = [
(self.index_finger_tip[0] + self.thumb_tip[0]) / 2,
(self.index_finger_tip[1] + self.thumb_tip[1]) / 2
]
self.thumb_first_joint_len = self.two_point_distance(self.thumb_tip, thumb_ip)
def move_mouse_point(self):
"""
移动鼠标指针
:return:
"""
if not self.hands_detector_dict:
return
screen_index_point = self.camera_to_screen_point(camera_point=self.index_point)
last_x, last_y = self.last_mouse_point
current_x, current_y = screen_index_point
smoothing_screen_index_point_x = last_x + (current_x - last_x) / self.mouse_smoothing
smoothing_screen_index_point_y = last_y + (current_y - last_y) / self.mouse_smoothing
screen_index_point = [smoothing_screen_index_point_x, smoothing_screen_index_point_y]
pyautogui.moveTo(x=screen_index_point[0], y=screen_index_point[1], duration=0.02)
self.last_mouse_point = screen_index_point
def camera_to_screen_point(self, camera_point: List[int]):
"""
将摄像头坐标转换为屏幕坐标
:param camera_point: 摄像头中的坐标
:return: 屏幕中的坐标
"""
safe_px = 10
screen_x = erp(
x=camera_point[0],
xp=[self.camera_padding, self.camera_width - self.camera_padding],
fp=[safe_px, self.screen_width - safe_px],
left=safe_px,
right=self.screen_width - safe_px
)
screen_y = erp(
x=camera_point[1],
xp=[self.camera_padding, self.camera_height - self.camera_padding],
fp=[safe_px, self.screen_height - safe_px],
left=safe_px,
right=self.screen_height - safe_px
)
return [screen_x, screen_y]
def control_mouse_button(self):
"""
控制鼠标按钮
:return:
"""
if self.hands_detector_dict:
thumb_index_tip_distance = self.two_point_distance(self.thumb_tip, self.index_finger_tip)
thumb_middle_tip_distance = self.two_point_distance(self.thumb_tip, self.middle_finger_tip)
threshold_len_min = self.thumb_first_joint_len * 0.5
threshold_len_max = self.thumb_first_joint_len * 0.6
if thumb_index_tip_distance >= threshold_len_max and thumb_middle_tip_distance >= threshold_len_max:
if self.mouse_left_status == self.mouse_left_status_down:
pyautogui.mouseUp(button='left')
self.mouse_left_status = self.mouse_left_status_up
if self.mouse_right_status == self.mouse_right_status_down:
pyautogui.mouseUp(button='right')
self.mouse_right_status = self.mouse_right_status_up
self.index_point_color = self.index_point_color_unselected
elif thumb_index_tip_distance < threshold_len_min and thumb_middle_tip_distance >= threshold_len_max:
if self.mouse_left_status == self.mouse_left_status_up:
pyautogui.mouseDown(button='left')
self.mouse_left_status = self.mouse_left_status_down
if self.mouse_right_status == self.mouse_right_status_down:
pyautogui.mouseUp(button='right')
self.mouse_right_status = self.mouse_right_status_up
self.index_point_color = self.index_point_color_selected
elif thumb_index_tip_distance < threshold_len_min and thumb_middle_tip_distance < threshold_len_min:
if self.mouse_left_status == self.mouse_left_status_up:
pyautogui.mouseDown(button='left')
self.mouse_left_status = self.mouse_left_status_down
if self.mouse_right_status == self.mouse_right_status_up:
pyautogui.mouseDown(button='right')
self.mouse_right_status = self.mouse_right_status_down
else:
pass
def run(self):
last_time = time.time()
pyautogui.moveTo(x=self.last_mouse_point[0], y=self.last_mouse_point[1])
while self.cap.isOpened():
success, img = self.cap.read()
if not success:
break
img = cv2.flip(src=img, flipCode=1)
img = cv2.resize(src=img, dsize=(self.camera_width, self.camera_height))
self.hands_detector_dict = self.hands_detector.detect_hands_landmarks(
img=img,
show_hand_connections=True,
show_landmarks=False,
show_landmarks_id=False
)
self.extract_gesture_finger_keypoint()
self.move_mouse_point()
self.control_mouse_button()
current_time = time.time()
fps = round(1.0 / (current_time - last_time), 2)
last_time = current_time
if self.hands_detector_dict:
cv2.line(img, 32(self.index_finger_tip), 32(self.thumb_tip), (255, 0, 0), 1)
cv2.circle(img, 32(self.index_point), 3, self.index_point_color, cv2.FILLED)
cv2.putText(img, f'fps: {fps}', (10, 20), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 0, 0), 2)
cv2.rectangle(img, (self.camera_padding, self.camera_padding),
(self.camera_width - self.camera_padding, self.camera_height - self.camera_padding),
(255, 0, 0), 1)
cv2.imshow('img', img)
if cv2.waitKey(1) & 0xff == ord('q'):
break
self.cap.release()
if __name__ == '__main__':
gc = GestureControl()
gc.run()
代码详解
初始化
def __init__(self):
self.camera_width, self.camera_height = (800, 480)
self.cap = cv2.VideoCapture(0)
self.cap.set(cv2.CAP_PROP_FRAME_WIDTH, self.camera_width)
self.cap.set(cv2.CAP_PROP_FRAME_HEIGHT, self.camera_height)
self.cap.set(cv2.CAP_PROP_BRIGHTNESS, 200)
self.hands_detector = HandsDetector(
static_image_mode=False,
max_num_hands=1,
model_complexity=1,
min_detection_confidence=0.8,
min_tracking_confidence=0.7
)
self.hands_detector_dict = dict()
self.thumb_tip = list()
self.index_finger_tip = list()
self.middle_finger_tip = list()
self.index_point = list()
self.index_point_color_unselected = (255, 255, 0)
self.index_point_color_selected = (255, 0, 255)
self.index_point_color = self.index_point_color_unselected
self.thumb_first_joint_len = 0
pyautogui.PAUSE = 0
self.screen_width, self.screen_height = pyautogui.size()
self.mouse_smoothing = 5
self.last_mouse_point = [self.screen_width // 2, self.screen_height // 2]
self.camera_padding = 100
self.mouse_left_status_up = 1
self.mouse_left_status_down = 2
self.mouse_left_status = self.mouse_left_status_up
self.mouse_right_status_up = 1
self.mouse_right_status_down = 2
self.mouse_right_status = self.mouse_right_status_up
self.cap = cv2.VideoCapture(0)
:初始化视频捕获设备,参数0
表示连接到第一个摄像头。self.hands_detector = HandsDetector(...)
:初始化手部姿态检测器。self.index_point_color_unselected
和self.index_point_color_selected
:定义鼠标指针未选中和选中时的颜色。self.mouse_smoothing
:定义鼠标平滑系数,用于减少鼠标移动时的抖动。self.last_mouse_point
:初始化鼠标在屏幕上的初始位置。self.camera_padding
:定义摄像头画面内的边距,作为手势可操作区域。
计算两点之间的距离
@staticmethod
def two_point_distance(pt1: List[int], pt2: List[int]):
return math.hypot(pt1[0] - pt2[0], pt1[1] - pt2[1])
math.hypot
:计算两点之间的欧几里得距离。
提取手势手指关键点
def extract_gesture_finger_keypoint(self):
if self.hands_detector_dict:
self.thumb_tip = self.hands_detector_dict[0][4]
self.index_finger_tip = self.hands_detector_dict[0][8]
self.middle_finger_tip = self.hands_detector_dict[0][12]
thumb_ip = self.hands_detector_dict[0][3]
self.index_point = [
(self.index_finger_tip[0] + self.thumb_tip[0]) / 2,
(self.index_finger_tip[1] + self.thumb_tip[1]) / 2
]
self.thumb_first_joint_len = self.two_point_distance(self.thumb_tip, thumb_ip)
- 提取拇指、食指和中指的指尖坐标。
- 计算拇指和食指指尖连线的中点坐标。
- 计算拇指指尖与拇指远心端第一个关节的距离。
移动鼠标指针
def move_mouse_point(self):
if not self.hands_detector_dict:
return
screen_index_point = self.camera_to_screen_point(camera_point=self.index_point)
last_x, last_y = self.last_mouse_point
current_x, current_y = screen_index_point
smoothing_screen_index_point_x = last_x + (current_x - last_x) / self.mouse_smoothing
smoothing_screen_index_point_y = last_y + (current_y - last_y) / self.mouse_smoothing
screen_index_point = [smoothing_screen_index_point_x, smoothing_screen_index_point_y]
pyautogui.moveTo(x=screen_index_point[0], y=screen_index_point[1], duration=0.02)
self.last_mouse_point = screen_index_point
- 将摄像头坐标转换为屏幕坐标。
- 使用平滑算法减少鼠标移动时的抖动。
- 使用
pyautogui.moveTo
移动鼠标指针。
将摄像头坐标转换为屏幕坐标
def camera_to_screen_point(self, camera_point: List[int]):
safe_px = 10
screen_x = erp(
x=camera_point[0],
xp=[self.camera_padding, self.camera_width - self.camera_padding],
fp=[safe_px, self.screen_width - safe_px],
left=safe_px,
right=self.screen_width - safe_px
)
screen_y = erp(
x=camera_point[1],
xp=[self.camera_padding, self.camera_height - self.camera_padding],
fp=[safe_px, self.screen_height - safe_px],
left=safe_px,
right=self.screen_height - safe_px
)
return [screen_x, screen_y]
- 使用
erp
进行插值,将摄像头画面中的坐标映射到屏幕上的坐标。
控制鼠标按钮
def control_mouse_button(self):
if self.hands_detector_dict:
thumb_index_tip_distance = self.two_point_distance(self.thumb_tip, self.index_finger_tip)
thumb_middle_tip_distance = self.two_point_distance(self.thumb_tip, self.middle_finger_tip)
threshold_len_min = self.thumb_first_joint_len * 0.5
threshold_len_max = self.thumb_first_joint_len * 0.6
if thumb_index_tip_distance >= threshold_len_max and thumb_middle_tip_distance >= threshold_len_max:
if self.mouse_left_status == self.mouse_left_status_down:
pyautogui.mouseUp(button='left')
self.mouse_left_status = self.mouse_left_status_up
if self.mouse_right_status == self.mouse_right_status_down:
pyautogui.mouseUp(button='right')
self.mouse_right_status = self.mouse_right_status_up
self.index_point_color = self.index_point_color_unselected
elif thumb_index_tip_distance < threshold_len_min and thumb_middle_tip_distance >= threshold_len_max:
if self.mouse_left_status == self.mouse_left_status_up:
pyautogui.mouseDown(button='left')
self.mouse_left_status = self.mouse_left_status_down
if self.mouse_right_status == self.mouse_right_status_down:
pyautogui.mouseUp(button='right')
self.mouse_right_status = self.mouse_right_status_up
self.index_point_color = self.index_point_color_selected
elif thumb_index_tip_distance < threshold_len_min and thumb_middle_tip_distance < threshold_len_min:
if self.mouse_left_status == self.mouse_left_status_up:
pyautogui.mouseDown(button='left')
self.mouse_left_status = self.mouse_left_status_down
if self.mouse_right_status == self.mouse_right_status_up:
pyautogui.mouseDown(button='right')
self.mouse_right_status = self.mouse_right_status_down
else:
pass
- 计算拇指与食指、中指指尖的距离。
- 根据距离判断手势状态,控制鼠标左键和右键的按下和抬起。
主循环
def run(self):
last_time = time.time()
pyautogui.moveTo(x=self.last_mouse_point[0], y=self.last_mouse_point[1])
while self.cap.isOpened():
success, img = self.cap.read()
if not success:
break
img = cv2.flip(src=img, flipCode=1)
img = cv2.resize(src=img, dsize=(self.camera_width, self.camera_height))
self.hands_detector_dict = self.hands_detector.detect_hands_landmarks(
img=img,
show_hand_connections=True,
show_landmarks=False,
show_landmarks_id=False
)
self.extract_gesture_finger_keypoint()
self.move_mouse_point()
self.control_mouse_button()
current_time = time.time()
fps = round(1.0 / (current_time - last_time), 2)
last_time = current_time
if self.hands_detector_dict:
cv2.line(img, 32(self.index_finger_tip), 32(self.thumb_tip), (255, 0, 0), 1)
cv2.circle(img, 32(self.index_point), 3, self.index_point_color, cv2.FILLED)
cv2.putText(img, f'fps: {fps}', (10, 20), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 0, 0), 2)
cv2.rectangle(img, (self.camera_padding, self.camera_padding),
(self.camera_width - self.camera_padding, self.camera_height - self.camera_padding),
(255, 0, 0), 1)
cv2.imshow('img', img)
if cv2.waitKey(1) & 0xff == ord('q'):
break
self.cap.release()
last_time = time.time()
:记录上次帧的时间。while self.cap.isOpened()
:进入主循环,直到视频捕获设备关闭。success, img = self.cap.read()
:读取一帧图像,success
表示读取是否成功,img
是读取到的图像。img = cv2.flip(src=img, flipCode=1)
:水平翻转图像,使用户看到的是镜像效果。img = cv2.resize(src=img, dsize=(self.camera_width, self.camera_height))
:调整图像大小。self.hands_detector_dict = self.hands_detector.detect_hands_landmarks(...)
:检测手部关键点。self.extract_gesture_finger_keypoint()
:提取手势手指关键点。self.move_mouse_point()
:移动鼠标指针。self.control_mouse_button()
:控制鼠标按钮。cv2.putText(...)
:在图像上显示帧率。cv2.rectangle(...)
:绘制摄像头画面内的边距矩形。cv2.imshow('img', img)
:显示图像。if cv2.waitKey(1) & 0xff == ord('q')
:等待按键输入,按q
键退出循环。self.cap.release()
:释放视频捕获设备。
总结
本文详细介绍了如何使用 OpenCV、MediaPipe 和 PyAutoGUI 实现一个基于手势识别的鼠标控制系统。通过本文的学习,你将能够理解手势识别的基本原理,并掌握如何在实际项目中应用这些技术。