基于openCV的手势识别鼠标控制系统-天翼云

基于openCV的手势识别鼠标控制系统

引言

随着计算机视觉技术的发展，手势识别已经成为一种新的交互方式，广泛应用于智能家居、虚拟现实等领域。本文将介绍一个基于手势识别的鼠标控制系统，通过摄像头捕捉手部动作，实现鼠标指针的移动和点击操作。我们将从代码结构、功能实现以及关键知识点等方面进行详细讲解。

技术栈

OpenCV：用于视频捕获和图像处理。
MediaPipe：用于手部姿态检测。
PyAutoGUI：用于模拟鼠标和键盘操作。

代码结构

整个系统的代码分为几个部分：

HandsDetector 类：负责手部姿态检测。
GestureControl 类：负责视频捕获、手势识别和鼠标控制。
主程序：初始化并运行手势控制系统。

HandsDetector 类

首先，我们需要一个手部姿态检测器类来处理手部姿态检测。假设这个类已经实现，并且提供了 detect_hands_landmarks 方法来检测手部关键点。

# utils/hands_detector.py
import cv2
import mediapipe as mp

class HandsDetector:
    def __init__(self, static_image_mode=False, max_num_hands=2, model_complexity=1, min_detection_confidence=0.5, min_tracking_confidence=0.5):
        self.mp_hands = mp.solutions.hands
        self.hands = self.mp_hands.Hands(static_image_mode, max_num_hands, model_complexity, min_detection_confidence, min_tracking_confidence)
        self.mp_drawing = mp.solutions.drawing_utils

    def detect_hands_landmarks(self, img, show_hand_connections=True, show_landmarks=True, show_landmarks_id=True):
        img_rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
        results = self.hands.process(img_rgb)
        hands_landmarks_dict = {}

        if results.multi_hand_landmarks:
            for hand_idx, hand_landmarks in enumerate(results.multi_hand_landmarks):
                hands_landmarks_dict[hand_idx] = []
                for idx, lm in enumerate(hand_landmarks.landmark):
                    h, w, c = img.shape
                    cx, cy = int(lm.x * w), int(lm.y * h)
                    hands_landmarks_dict[hand_idx].append((cx, cy))
                    if show_landmarks:
                        cv2.circle(img, (cx, cy), 5, (0, 255, 0), cv2.FILLED)
                    if show_landmarks_id:
                        cv2.putText(img, str(idx), (cx + 5, cy + 5), cv2.FONT_HERSHEY_PLAIN, 1, (0, 0, 255), 1)
                if show_hand_connections:
                    self.mp_drawing.draw_landmarks(img, hand_landmarks, self.mp_hands.HAND_CONNECTIONS)

        return hands_landmarks_dict

GestureControl 类

接下来，我们实现 GestureControl 类，负责视频捕获、手势识别和鼠标控制。

import math
import time
from typing import List

import cv2
import numpy as np
import pyautogui

from utils.hands_detector import HandsDetector


class GestureControl:
    """
    手势控制
    """
    def __init__(self):
        self.camera_width, self.camera_height = (800, 480)
        self.cap = cv2.VideoCapture(0)
        self.cap.set(cv2.CAP_PROP_FRAME_WIDTH, self.camera_width)
        self.cap.set(cv2.CAP_PROP_FRAME_HEIGHT, self.camera_height)
        self.cap.set(cv2.CAP_PROP_BRIGHTNESS, 200)
        self.hands_detector = HandsDetector(
            static_image_mode=False,
            max_num_hands=1,
            model_complexity=1,
            min_detection_confidence=0.8,
            min_tracking_confidence=0.7
        )
        self.hands_detector_dict = dict()

        self.thumb_tip = list()
        self.index_finger_tip = list()
        self.middle_finger_tip = list()
        self.index_point = list()

        self.index_point_color_unselected = (255, 255, 0)
        self.index_point_color_selected = (255, 0, 255)
        self.index_point_color = self.index_point_color_unselected

        self.thumb_first_joint_len = 0

        pyautogui.PAUSE = 0
        self.screen_width, self.screen_height = pyautogui.size()

        self.mouse_smoothing = 5
        self.last_mouse_point = [self.screen_width // 2, self.screen_height // 2]

        self.camera_padding = 100

        self.mouse_left_status_up = 1
        self.mouse_left_status_down = 2
        self.mouse_left_status = self.mouse_left_status_up

        self.mouse_right_status_up = 1
        self.mouse_right_status_down = 2
        self.mouse_right_status = self.mouse_right_status_up

    @staticmethod
    def two_point_distance(pt1: List[int], pt2: List[int]):
        """
        计算两点之间的距离
        :param pt1: 第一个点的坐标
        :param pt2: 第二个点的坐标
        :return: 两点之间的距离
        """
        return math.hypot(pt1[0] - pt2[0], pt1[1] - pt2[1])

    def extract_gesture_finger_keypoint(self):
        """
        提取手势手指关键点
        :return:
        """
        if self.hands_detector_dict:
            self.thumb_tip = self.hands_detector_dict[0][4]
            self.index_finger_tip = self.hands_detector_dict[0][8]
            self.middle_finger_tip = self.hands_detector_dict[0][12]
            thumb_ip = self.hands_detector_dict[0][3]

            self.index_point = [
                (self.index_finger_tip[0] + self.thumb_tip[0]) / 2,
                (self.index_finger_tip[1] + self.thumb_tip[1]) / 2
            ]

            self.thumb_first_joint_len = self.two_point_distance(self.thumb_tip, thumb_ip)

    def move_mouse_point(self):
        """
        移动鼠标指针
        :return:
        """
        if not self.hands_detector_dict:
            return

        screen_index_point = self.camera_to_screen_point(camera_point=self.index_point)

        last_x, last_y = self.last_mouse_point
        current_x, current_y = screen_index_point
        smoothing_screen_index_point_x = last_x + (current_x - last_x) / self.mouse_smoothing
        smoothing_screen_index_point_y = last_y + (current_y - last_y) / self.mouse_smoothing
        screen_index_point = [smoothing_screen_index_point_x, smoothing_screen_index_point_y]

        pyautogui.moveTo(x=screen_index_point[0], y=screen_index_point[1], duration=0.02)
        self.last_mouse_point = screen_index_point

    def camera_to_screen_point(self, camera_point: List[int]):
        """
        将摄像头坐标转换为屏幕坐标
        :param camera_point: 摄像头中的坐标
        :return: 屏幕中的坐标
        """
        safe_px = 10

        screen_x = erp(
            x=camera_point[0],
            xp=[self.camera_padding, self.camera_width - self.camera_padding],
            fp=[safe_px, self.screen_width - safe_px],
            left=safe_px,
            right=self.screen_width - safe_px
        )
        screen_y = erp(
            x=camera_point[1],
            xp=[self.camera_padding, self.camera_height - self.camera_padding],
            fp=[safe_px, self.screen_height - safe_px],
            left=safe_px,
            right=self.screen_height - safe_px
        )
        return [screen_x, screen_y]

    def control_mouse_button(self):
        """
        控制鼠标按钮
        :return:
        """
        if self.hands_detector_dict:
            thumb_index_tip_distance = self.two_point_distance(self.thumb_tip, self.index_finger_tip)
            thumb_middle_tip_distance = self.two_point_distance(self.thumb_tip, self.middle_finger_tip)

            threshold_len_min = self.thumb_first_joint_len * 0.5
            threshold_len_max = self.thumb_first_joint_len * 0.6

            if thumb_index_tip_distance >= threshold_len_max and thumb_middle_tip_distance >= threshold_len_max:
                if self.mouse_left_status == self.mouse_left_status_down:
                    pyautogui.mouseUp(button='left')
                    self.mouse_left_status = self.mouse_left_status_up
                if self.mouse_right_status == self.mouse_right_status_down:
                    pyautogui.mouseUp(button='right')
                    self.mouse_right_status = self.mouse_right_status_up
                self.index_point_color = self.index_point_color_unselected
            elif thumb_index_tip_distance < threshold_len_min and thumb_middle_tip_distance >= threshold_len_max:
                if self.mouse_left_status == self.mouse_left_status_up:
                    pyautogui.mouseDown(button='left')
                    self.mouse_left_status = self.mouse_left_status_down
                if self.mouse_right_status == self.mouse_right_status_down:
                    pyautogui.mouseUp(button='right')
                    self.mouse_right_status = self.mouse_right_status_up
                self.index_point_color = self.index_point_color_selected
            elif thumb_index_tip_distance < threshold_len_min and thumb_middle_tip_distance < threshold_len_min:
                if self.mouse_left_status == self.mouse_left_status_up:
                    pyautogui.mouseDown(button='left')
                    self.mouse_left_status = self.mouse_left_status_down
                if self.mouse_right_status == self.mouse_right_status_up:
                    pyautogui.mouseDown(button='right')
                    self.mouse_right_status = self.mouse_right_status_down
            else:
                pass

    def run(self):
        last_time = time.time()
        pyautogui.moveTo(x=self.last_mouse_point[0], y=self.last_mouse_point[1])

        while self.cap.isOpened():
            success, img = self.cap.read()
            if not success:
                break
            img = cv2.flip(src=img, flipCode=1)
            img = cv2.resize(src=img, dsize=(self.camera_width, self.camera_height))

            self.hands_detector_dict = self.hands_detector.detect_hands_landmarks(
                img=img,
                show_hand_connections=True,
                show_landmarks=False,
                show_landmarks_id=False
            )

            self.extract_gesture_finger_keypoint()
            self.move_mouse_point()
            self.control_mouse_button()

            current_time = time.time()
            fps = round(1.0 / (current_time - last_time), 2)
            last_time = current_time

            if self.hands_detector_dict:
                cv2.line(img, 32(self.index_finger_tip), 32(self.thumb_tip), (255, 0, 0), 1)
                cv2.circle(img, 32(self.index_point), 3, self.index_point_color, cv2.FILLED)

            cv2.putText(img, f'fps: {fps}', (10, 20), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 0, 0), 2)
            cv2.rectangle(img, (self.camera_padding, self.camera_padding),
                          (self.camera_width - self.camera_padding, self.camera_height - self.camera_padding),
                          (255, 0, 0), 1)
            cv2.imshow('img', img)
            if cv2.waitKey(1) & 0xff == ord('q'):
                break
        self.cap.release()


if __name__ == '__main__':
    gc = GestureControl()
    gc.run()

代码详解

初始化

def __init__(self):
    self.camera_width, self.camera_height = (800, 480)
    self.cap = cv2.VideoCapture(0)
    self.cap.set(cv2.CAP_PROP_FRAME_WIDTH, self.camera_width)
    self.cap.set(cv2.CAP_PROP_FRAME_HEIGHT, self.camera_height)
    self.cap.set(cv2.CAP_PROP_BRIGHTNESS, 200)
    self.hands_detector = HandsDetector(
        static_image_mode=False,
        max_num_hands=1,
        model_complexity=1,
        min_detection_confidence=0.8,
        min_tracking_confidence=0.7
    )
    self.hands_detector_dict = dict()

    self.thumb_tip = list()
    self.index_finger_tip = list()
    self.middle_finger_tip = list()
    self.index_point = list()

    self.index_point_color_unselected = (255, 255, 0)
    self.index_point_color_selected = (255, 0, 255)
    self.index_point_color = self.index_point_color_unselected

    self.thumb_first_joint_len = 0

    pyautogui.PAUSE = 0
    self.screen_width, self.screen_height = pyautogui.size()

    self.mouse_smoothing = 5
    self.last_mouse_point = [self.screen_width // 2, self.screen_height // 2]

    self.camera_padding = 100

    self.mouse_left_status_up = 1
    self.mouse_left_status_down = 2
    self.mouse_left_status = self.mouse_left_status_up

    self.mouse_right_status_up = 1
    self.mouse_right_status_down = 2
    self.mouse_right_status = self.mouse_right_status_up

self.cap = cv2.VideoCapture(0)：初始化视频捕获设备，参数 0 表示连接到第一个摄像头。
self.hands_detector = HandsDetector(...)：初始化手部姿态检测器。
self.index_point_color_unselected 和 self.index_point_color_selected：定义鼠标指针未选中和选中时的颜色。
self.mouse_smoothing：定义鼠标平滑系数，用于减少鼠标移动时的抖动。
self.last_mouse_point：初始化鼠标在屏幕上的初始位置。
self.camera_padding：定义摄像头画面内的边距，作为手势可操作区域。

计算两点之间的距离

@staticmethod
def two_point_distance(pt1: List[int], pt2: List[int]):
    return math.hypot(pt1[0] - pt2[0], pt1[1] - pt2[1])

math.hypot：计算两点之间的欧几里得距离。

提取手势手指关键点

def extract_gesture_finger_keypoint(self):
    if self.hands_detector_dict:
        self.thumb_tip = self.hands_detector_dict[0][4]
        self.index_finger_tip = self.hands_detector_dict[0][8]
        self.middle_finger_tip = self.hands_detector_dict[0][12]
        thumb_ip = self.hands_detector_dict[0][3]

        self.index_point = [
            (self.index_finger_tip[0] + self.thumb_tip[0]) / 2,
            (self.index_finger_tip[1] + self.thumb_tip[1]) / 2
        ]

        self.thumb_first_joint_len = self.two_point_distance(self.thumb_tip, thumb_ip)

提取拇指、食指和中指的指尖坐标。
计算拇指和食指指尖连线的中点坐标。
计算拇指指尖与拇指远心端第一个关节的距离。

移动鼠标指针

def move_mouse_point(self):
    if not self.hands_detector_dict:
        return

    screen_index_point = self.camera_to_screen_point(camera_point=self.index_point)

    last_x, last_y = self.last_mouse_point
    current_x, current_y = screen_index_point
    smoothing_screen_index_point_x = last_x + (current_x - last_x) / self.mouse_smoothing
    smoothing_screen_index_point_y = last_y + (current_y - last_y) / self.mouse_smoothing
    screen_index_point = [smoothing_screen_index_point_x, smoothing_screen_index_point_y]

    pyautogui.moveTo(x=screen_index_point[0], y=screen_index_point[1], duration=0.02)
    self.last_mouse_point = screen_index_point

将摄像头坐标转换为屏幕坐标。
使用平滑算法减少鼠标移动时的抖动。
使用 pyautogui.moveTo 移动鼠标指针。

将摄像头坐标转换为屏幕坐标

def camera_to_screen_point(self, camera_point: List[int]):
    safe_px = 10

    screen_x = erp(
        x=camera_point[0],
        xp=[self.camera_padding, self.camera_width - self.camera_padding],
        fp=[safe_px, self.screen_width - safe_px],
        left=safe_px,
        right=self.screen_width - safe_px
    )
    screen_y = erp(
        x=camera_point[1],
        xp=[self.camera_padding, self.camera_height - self.camera_padding],
        fp=[safe_px, self.screen_height - safe_px],
        left=safe_px,
        right=self.screen_height - safe_px
    )
    return [screen_x, screen_y]

使用 erp 进行插值，将摄像头画面中的坐标映射到屏幕上的坐标。

控制鼠标按钮

def control_mouse_button(self):
    if self.hands_detector_dict:
        thumb_index_tip_distance = self.two_point_distance(self.thumb_tip, self.index_finger_tip)
        thumb_middle_tip_distance = self.two_point_distance(self.thumb_tip, self.middle_finger_tip)

        threshold_len_min = self.thumb_first_joint_len * 0.5
        threshold_len_max = self.thumb_first_joint_len * 0.6

        if thumb_index_tip_distance >= threshold_len_max and thumb_middle_tip_distance >= threshold_len_max:
            if self.mouse_left_status == self.mouse_left_status_down:
                pyautogui.mouseUp(button='left')
                self.mouse_left_status = self.mouse_left_status_up
            if self.mouse_right_status == self.mouse_right_status_down:
                pyautogui.mouseUp(button='right')
                self.mouse_right_status = self.mouse_right_status_up
            self.index_point_color = self.index_point_color_unselected
        elif thumb_index_tip_distance < threshold_len_min and thumb_middle_tip_distance >= threshold_len_max:
            if self.mouse_left_status == self.mouse_left_status_up:
                pyautogui.mouseDown(button='left')
                self.mouse_left_status = self.mouse_left_status_down
            if self.mouse_right_status == self.mouse_right_status_down:
                pyautogui.mouseUp(button='right')
                self.mouse_right_status = self.mouse_right_status_up
            self.index_point_color = self.index_point_color_selected
        elif thumb_index_tip_distance < threshold_len_min and thumb_middle_tip_distance < threshold_len_min:
            if self.mouse_left_status == self.mouse_left_status_up:
                pyautogui.mouseDown(button='left')
                self.mouse_left_status = self.mouse_left_status_down
            if self.mouse_right_status == self.mouse_right_status_up:
                pyautogui.mouseDown(button='right')
                self.mouse_right_status = self.mouse_right_status_down
        else:
            pass

计算拇指与食指、中指指尖的距离。
根据距离判断手势状态，控制鼠标左键和右键的按下和抬起。

主循环

def run(self):
    last_time = time.time()
    pyautogui.moveTo(x=self.last_mouse_point[0], y=self.last_mouse_point[1])

    while self.cap.isOpened():
        success, img = self.cap.read()
        if not success:
            break
        img = cv2.flip(src=img, flipCode=1)
        img = cv2.resize(src=img, dsize=(self.camera_width, self.camera_height))

        self.hands_detector_dict = self.hands_detector.detect_hands_landmarks(
            img=img,
            show_hand_connections=True,
            show_landmarks=False,
            show_landmarks_id=False
        )

        self.extract_gesture_finger_keypoint()
        self.move_mouse_point()
        self.control_mouse_button()

        current_time = time.time()
        fps = round(1.0 / (current_time - last_time), 2)
        last_time = current_time

        if self.hands_detector_dict:
            cv2.line(img, 32(self.index_finger_tip), 32(self.thumb_tip), (255, 0, 0), 1)
            cv2.circle(img, 32(self.index_point), 3, self.index_point_color, cv2.FILLED)

        cv2.putText(img, f'fps: {fps}', (10, 20), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 0, 0), 2)
        cv2.rectangle(img, (self.camera_padding, self.camera_padding),
                      (self.camera_width - self.camera_padding, self.camera_height - self.camera_padding),
                      (255, 0, 0), 1)
        cv2.imshow('img', img)
        if cv2.waitKey(1) & 0xff == ord('q'):
            break
    self.cap.release()

last_time = time.time()：记录上次帧的时间。
while self.cap.isOpened()：进入主循环，直到视频捕获设备关闭。
success, img = self.cap.read()：读取一帧图像，success 表示读取是否成功，img 是读取到的图像。
img = cv2.flip(src=img, flipCode=1)：水平翻转图像，使用户看到的是镜像效果。
img = cv2.resize(src=img, dsize=(self.camera_width, self.camera_height))：调整图像大小。
self.hands_detector_dict = self.hands_detector.detect_hands_landmarks(...)：检测手部关键点。
self.extract_gesture_finger_keypoint()：提取手势手指关键点。
self.move_mouse_point()：移动鼠标指针。
self.control_mouse_button()：控制鼠标按钮。
cv2.putText(...)：在图像上显示帧率。
cv2.rectangle(...)：绘制摄像头画面内的边距矩形。
cv2.imshow('img', img)：显示图像。
if cv2.waitKey(1) & 0xff == ord('q')：等待按键输入，按 q 键退出循环。
self.cap.release()：释放视频捕获设备。

总结

本文详细介绍了如何使用 OpenCV、MediaPipe 和 PyAutoGUI 实现一个基于手势识别的鼠标控制系统。通过本文的学习，你将能够理解手势识别的基本原理，并掌握如何在实际项目中应用这些技术。

活动

应用商城

合作伙伴

开发者

支持与服务

了解天翼云

基于openCV的手势识别鼠标控制系统

基于openCV的手势识别鼠标控制系统

基于openCV的手势识别鼠标控制系统

引言

技术栈

代码结构

HandsDetector 类

GestureControl 类

代码详解

初始化

计算两点之间的距离

提取手势手指关键点

移动鼠标指针

将摄像头坐标转换为屏幕坐标

控制鼠标按钮

主循环

总结

相关文章

初学Android,手势检测(四十七)

初学Android,手势缩放图片(四十八)

使用 OpenCV 进行人眼检测

使用 Python 和 OpenCV 实现实时人脸识别

使用 OpenCV 读取和显示图像与视频

java 使用OpenCV拍照

使用 OpenCV 和 Pyzbar 检测二维码和条码

使用 OpenCV 实现鼠标事件回调绘制图形

使用Python和OpenCV连接并处理IP摄像头视频流

Python 使用 OpenCV 进行全景拼接

作者介绍

最新文章

基于 OpenCV 和自定义姿态检测器的实时姿态跟踪系统

热门文章

基于 OpenCV 和自定义姿态检测器的实时姿态跟踪系统

热门标签

相关产品

弹性云主机

天翼云电脑（公众版）

对象存储

云硬盘

随机文章

基于 OpenCV 和自定义姿态检测器的实时姿态跟踪系统