深入解析基于OpenCV年龄与性别识别系统
在这篇博客中,我们将详细解析一个使用OpenCV进行年龄和性别识别的Python脚本。这个脚本展示了如何利用深度学习模型,从视频或图像中检测人脸并预测每个人脸的年龄和性别。
1. 导入必要的模块
import cv2 as cv
import math
import time
import argparse
首先,脚本开始于导入必需的Python模块。这里cv2
是OpenCV库的Python接口,主要用于图像处理和计算机视觉任务。argparse
用于处理命令行参数,time
用于测量执行时间。
2. 人脸检测功能
def getFaceBox(net, frame, conf_threshold=0.7):
frameOpencvDnn = frame.copy()
frameHeight = frameOpencvDnn.shape[0]
frameWidth = frameOpencvDnn.shape[1]
blob = cv.dnn.blobFromImage(frameOpencvDnn, 1.0, (300, 300), [104, 117, 123], True, False)
net.setInput(blob)
detections = net.forward()
bboxes = []
for i in range(detections.shape[2]):
confidence = detections[0, 0, i, 2]
if confidence > conf_threshold:
x1 = int(detections[0, 0, i, 3] * frameWidth)
y1 = int(detections[0, 0, i, 4] * frameHeight)
x2 = int(detections[0, 0, i, 5] * frameWidth)
y2 = int(detections[0, 0, i, 6] * frameHeight)
bboxes.append([x1, y1, x2, y2])
cv.rectangle(frameOpencvDnn, (x1, y1), (x2, y2), (0, 255, 0), int(round(frameHeight/150)), 8)
return frameOpencvDnn, bboxes
getFaceBox`是脚本的核心,用于从给定的帧中检测人脸。它首先创建一个图像的blob(一个经过预处理的图像数组),然后通过预训练的神经网络进行前向传播,检测出图像中的人脸。对于每个检测到的人脸,如果其置信度高于阈值,它计算出人脸的边界框,并在图像上绘制矩形。
3. 解析命令行参数和模型加载
if __name__ == '__main__':
parser = argparse.ArgumentParser(description='Use this script to run age and gender recognition using OpenCV.')
parser.add_argument('--input', help='Path to input image or video file. Skip this argument to capture frames from a camera.')
parser.add_argument("--device", default="cpu", help="Device to inference on")
args = parser.parse_args()
在脚本的主部分,它首先定义了命令行参数解析器,允许用户指定输入源(图像或视频文件,或者摄像头流)和推理设备(CPU或GPU)。
faceNet = cv.dnn.readNet(faceModel, faceProto)
ageNet = cv.dnn.readNet(ageModel, ageProto)
genderNet = cv.dnn.readNet(genderModel, genderProto)
这里,脚本加载了三个预训练模型:人脸检测、年龄预测和性别预测。每个模型都通过OpenCV的深度神经网络模块读取。
4. 主循环
while cv.waitKey(1) < 0:
hasFrame, frame = cap.read()
if notFrame:
cv.waitKey()
break
frameFace, bboxes = getFaceBox(faceNet, frame)
if not bboxes:
print("No face Detected, Checking next frame")
continue
在主循环中,脚本从视频捕获设备或图像文件中读取帧。然后它调用getFaceBox
函数来获取每帧中的人脸边界框。
5. 性别和年龄识别
for bbox in bboxes:
face = frame[max(0,bbox[1]-padding):min(bbox[3]+padding,frame.shape[0]-1),max(0,bbox[0]-padding):min(bbox[2]+padding, frame.shape[1]-1)]
blob = cv.dnn.blobFromImage(face, 1.0, (227, 227), MODEL_MEAN_VALUES, swapRB=False)
genderNet.setInput(blob)
genderPreds = genderNet.forward()
gender = genderList[genderPreds[0].argmax()]
ageNet.setInput(blob)
agePreds = ageNet.forward()
age = ageList[agePreds[0].argmax()]
label = "{},{}".format(gender, age)
cv.putText(frameFace, label, (bbox[0], bbox[1]-10), cv.FONT_HERSHEY_SIMPLEX, 0.8, (0, 255, 255), 2, cv.LINE_AA)
cv.imshow("Age Gender Demo", frameFace)
对于每个检测到的人脸,脚本提取人脸图像并生成一个blob,然后将其输入到性别和年龄识别模型中。通过模型的输出,它确定每个人脸的性别和年龄,并在原始图像上标记出这些信息。
完整代码
# Import required modules
import cv2 as cv
import math
import time
import argparse
def getFaceBox(net, frame, conf_threshold=0.7):
frameOpencvDnn = frame.copy()
frameHeight = frameOpencvDnn.shape[0]
frameWidth = frameOpencvDnn.shape[1]
blob = cv.dnn.blobFromImage(frameOpencvDnn, 1.0, (300, 300), [104, 117, 123], True, False)
net.setInput(blob)
detections = net.forward()
bboxes = []
for i in range(detections.shape[2]):
confidence = detections[0, 0, i, 2]
if confidence > conf_threshold:
x1 = int(detections[0, 0, i, 3] * frameWidth)
y1 = int(detections[0, 0, i, 4] * frameHeight)
x2 = int(detections[0, 0, i, 5] * frameWidth)
y2 = int(detections[0, 0, i, 6] * frameHeight)
bboxes.append([x1, y1, x2, y2])
cv.rectangle(frameOpencvDnn, (x1, y1), (x2, y2), (0, 255, 0), int(round(frameHeight/150)), 8)
return frameOpencvDnn, bboxes
if __name__ == '__main__':
parser = argparse.ArgumentParser(description='Use this script to run age and gender recognition using OpenCV.')
parser.add_argument('--input', help='Path to input image or video file. Skip this argument to capture frames from a camera.')
parser.add_argument("--device", default="cpu", help="Device to inference on")
args = parser.parse_args()
# args = parser.parse_args()
faceProto = "opencv_face_detector.pbtxt"
faceModel = "opencv_face_detector_uint8.pb"
ageProto = "age_deploy.prototxt"
ageModel = "age_net.caffemodel"
genderProto = "gender_deploy.prototxt"
genderModel = "gender_net.caffemodel"
MODEL_MEAN_VALUES = (78.4263377603, 87.7689143744, 114.895847746)
ageList = ['(0-2)', '(4-6)', '(8-12)', '(15-20)', '(25-32)', '(38-43)', '(48-53)', '(60-100)']
genderList = ['Male', 'Female']
# Load network
ageNet = cv.dnn.readNet(ageModel, ageProto)
genderNet = cv.dnn.readNet(genderModel, genderProto)
faceNet = cv.dnn.readNet(faceModel, faceProto)
if args.device == "cpu":
ageNet.setPreferableBackend(cv.dnn.DNN_TARGET_CPU)
genderNet.setPreferableBackend(cv.dnn.DNN_TARGET_CPU)
faceNet.setPreferableBackend(cv.dnn.DNN_TARGET_CPU)
print("Using CPU device")
elif args.device == "gpu":
ageNet.setPreferableBackend(cv.dnn.DNN_BACKEND_CUDA)
ageNet.setPreferableTarget(cv.dnn.DNN_TARGET_CUDA)
genderNet.setPreferableBackend(cv.dnn.DNN_BACKEND_CUDA)
genderNet.setPreferableTarget(cv.dnn.DNN_TARGET_CUDA)
genderNet.setPreferableBackend(cv.dnn.DNN_BACKEND_CUDA)
genderNet.setPreferableTarget(cv.dnn.DNN_TARGET_CUDA)
print("Using GPU device")
#
# Open a video file or an image file or a camera stream
cap = cv.VideoCapture(args.input if args.input else 0)
padding = 20
while cv.waitKey(1) < 0:
# Read frame
t = time.time()
hasFrame, frame = cap.read()
if not hasFrame:
cv.waitKey()
break
frameFace, bboxes = getFaceBox(faceNet, frame)
if not bboxes:
print("No face Detected, Checking next frame")
continue
for bbox in bboxes:
# print(bbox)
face = frame[max(0,bbox[1]-padding):min(bbox[3]+padding,frame.shape[0]-1),max(0,bbox[0]-padding):min(bbox[2]+padding, frame.shape[1]-1)]
blob = cv.dnn.blobFromImage(face, 1.0, (227, 227), MODEL_MEAN_VALUES, swapRB=False)
genderNet.setInput(blob)
genderPreds = genderNet.forward()
gender = genderList[genderPreds[0].argmax()]
# print("Gender Output : {}".format(genderPreds))
print("Gender : {}, conf = {:.3f}".format(gender, genderPreds[0].max()))
ageNet.setInput(blob)
agePreds = ageNet.forward()
age = ageList[agePreds[0].argmax()]
print("Age Output : {}".format(agePreds))
print("Age : {}, conf = {:.3f}".format(age, agePreds[0].max()))
label = "{},{}".format(gender, age)
cv.putText(frameFace, label, (bbox[0], bbox[1]-10), cv.FONT_HERSHEY_SIMPLEX, 0.8, (0, 255, 255), 2, cv.LINE_AA)
cv.imshow("Age Gender Demo", frameFace)
# cv.imwrite("age-gender-out-{}".format(args.input),frameFace)
print("time : {:.3f}".format(time.time() - t))
# cmake -DCMAKE_BUILD_TYPE=RELEASE -DCMAKE_INSTALL_PREFIX=~/opencv_gpu -DINSTALL_PYTHON_EXAMPLES=OFF -DINSTALL_C_EXAMPLES=OFF -DOPENCV_ENABLE_NONFREE=ON -DOPENCV_EXTRA_MODULES_PATH=~/cv2_gpu/opencv_contrib/modules -DPYTHON_EXECUTABLE=~/env/bin/python3 -DBUILD_EXAMPLES=ON -DWITH_CUDA=ON -DWITH_CUDNN=ON -DOPENCV_DNN_CUDA=ON -DENABLE_FAST_MATH=ON -DCUDA_FAST_MATH=ON -DWITH_CUBLAS=ON -DCUDA_TOOLKIT_ROOT_DIR=/usr/local/cuda-10.2 -DOpenCL_LIBRARY=/usr/local/cuda-10.2/lib64/libOpenCL.so -DOpenCL_INCLUDE_DIR=/usr/local/cuda-10.2/include/ ..