YOLOv8 ONNX推理代码讲解
本文将详细讲解YOLOv8 ONNX推理的Python代码,包含如何进行本地摄像头、图像和视频推理的实现。该代码使用了OpenCV、ONNX Runtime等库。
1. 引入必要的库
import onnxruntime
import cv2
import numpy as np
import time
import yaml
引入了onnxruntime
用于加载和运行ONNX模型,cv2
用于图像处理,numpy
用于数组操作,time
用于计时,yaml
用于读取标签文件。
2. 图像处理函数
resize_image_cv2函数
将输入图像按比例缩放到目标尺寸,同时填充背景使得图像保持原比例不变。
def resize_image_cv2(image, size):
ih, iw, ic = image.shape
w, h = size
scale = min(w / iw, h / ih)
nw = int(iw * scale)
nh = int(ih * scale)
image = cv2.resize(image, (nw, nh))
new_image = np.ones((size[0], size[1], 3), dtype='uint8') * 128
start_h = (h - nh) / 2
start_w = (w - nw) / 2
end_h = size[1] - start_h
end_w = size[0] - start_w
new_image[int(start_h):int(end_h), int(start_w):int(end_w)] = image
return new_image, nw, nh
data_process_cv2函数
对图像进行预处理,包括调整大小、归一化以及格式转换。
def data_process_cv2(frame, input_shape):
'''
对输入的图像进行预处理
:param frame:
:param input_shape:
:return:
'''
image_data, nw, nh = resize_image_cv2(frame, (input_shape[1], input_shape[0]))
org_data = image_data.copy()
np_data = np.array(image_data, np.float32)
np_data = np_data / 255.
image_data = np.expand_dims(np.transpose(np_data, (2, 0, 1)), 0)
image_data = np.ascontiguousarray(image_data)
return image_data, org_data
3. 非极大值抑制 (NMS)
non_max_suppression函数
执行非极大值抑制,去除重叠的检测框。
def non_max_suppression(prediction,
conf_thres=0.25,
iou_thres=0.35,
classes=None,
agnostic=False,
multi_label=False,
labels=(),
max_det=300,
nm=0 # number of masks
):
"""
Perform Non-Maximum Suppression (NMS) on the boxes to filter out overlapping boxes.
Parameters:
prediction (ndarray): Predictions from the model.
conf_thres (float): Confidence threshold to filter boxes.
iou_thres (float): Intersection over Union (IoU) threshold for NMS.
classes (list): Filter boxes by classes.
agnostic (bool): If True, perform class-agnostic NMS.
multi_label (bool): If True, perform multi-label NMS.
labels (list): Labels for auto-labelling.
max_det (int): Maximum number of detections.
nm (int): Number of masks.
Returns:
list: A list of filtered boxes.
"""
bs = prediction.shape[0] # batch size
nc = prediction.shape[2] - nm - 5 # number of classes
xc = prediction[..., 4] > conf_thres # candidates
max_wh = 7680 # (pixels) maximum box width and height
max_nms = 30000 # maximum number of boxes into torchvision.ops.nms()
time_limit = 0.5 + 0.05 * bs # seconds to quit after
# redundant = True # require redundant detections
multi_label &= nc > 1 # multiple labels per box (adds 0.5ms/img)
# merge = False # use merge-NMS
t = time.time()
mi = 5 + nc # mask start index
output = [np.zeros((0, 6 + nm))] * bs
for xi, x in enumerate(prediction): # image index, image inference
# Apply constraints
x = x[xc[xi]] # confidence
# Cat apriori labels if autolabelling
if labels and len(labels[xi]):
lb = labels[xi]
v = np.zeros((len(lb), nc + nm + 5))
v[:, :4] = lb[:, 1:5] # box
v[:, 4] = 1.0 # conf
v[np.arange(len(lb)), lb[:, 0].astype(int) + 5] = 1.0 # cls
x = np.concatenate((x, v), 0)
# If none remain process next image
if not x.shape[0]:
continue
# Compute conf
x[:, 5:] *= x[:, 4:5] # conf = obj_conf * cls_conf
# Box/Mask
box = xywh2xyxy(x[:, :4]) # center_x, center_y, width, height) to (x1, y1, x2, y2)
mask = x[:, mi:] # zero columns if no masks
# Detections matrix nx6 (xyxy, conf, cls)
if multi_label:
i, j = np.nonzero(x[:, 5:mi] > conf_thres)
x = np.concatenate((box[i], x[i, 5 + j][:, None], j[:, None].astype(float), mask[i]), 1)
else: # best class only
conf = x[:, 5:mi].max(1, keepdims=True)
j = x[:, 5:mi].argmax(1,keepdims=True)
x = np.concatenate((box, conf, j.astype(float), mask), 1)[conf[:, 0] > conf_thres]
# Filter by class
if classes is not None:
x = x[(x[:, 5:6] == np.array(classes)[:, None]).any(1)]
# Check shape
n = x.shape[0] # number of boxes
if not n: # no boxes
continue
sorted_indices = np.argsort(x[:, 4])[::-1]
x = x[sorted_indices][:max_nms] # sort by confidence and remove excess boxes
# Batched NMS
c = x[:, 5:6] * (0 if agnostic else max_wh) # classes
boxes, scores = x[:, :4] + c, x[:, 4] # boxes (offset by class), scores
i = nms(boxes, scores, iou_thres) # NMS
i = i[:max_det] # limit detections
output[xi] = x[i]
# if mps:
# output[xi] = output[xi].to(device)
if (time.time() - t) > time_limit:
# LOGGER.warning(f'WARNING NMS time limit {time_limit:.3f}s exceeded')
break # time limit exceeded
return output
nms函数
使用numpy实现的NMS。
def nms(boxes, scores, iou_threshold):
def nms(boxes, scores, iou_threshold):
"""
Perform Non-Maximum Suppression (NMS) on the given boxes with scores using numpy.
Parameters:
boxes (ndarray): The bounding boxes, shaped (N, 4).
scores (ndarray): The confidence scores for each box, shaped (N,).
iou_threshold (float): The IoU threshold for suppressing overlapping boxes.
Returns:
ndarray: The indices of the selected boxes after NMS.
"""
if len(boxes) == 0:
return []
# Sort boxes by their scores
indices = np.argsort(scores)[::-1]
selected_indices = []
while len(indices) > 0:
# Select the box with the highest score
current_index = indices[0]
selected_indices.append(current_index)
# Compute IoU between the current box and all other boxes
current_box = boxes[current_index]
other_boxes = boxes[indices[1:]]
iou = calculate_iou(current_box, other_boxes)
# Remove boxes with IoU higher than the threshold
indices = indices[1:][iou <= iou_threshold]
return np.array(selected_indices)
calculate_iou函数
计算交并比(IoU)。
def calculate_iou(box, boxes):
"""
Calculate the Intersection over Union (IoU) between a given box and a set of boxes.
Parameters:
box (ndarray): The coordinates of the first box, shaped (4,).
boxes (ndarray): The coordinates of the other boxes, shaped (N, 4).
Returns:
ndarray: The IoU between the given box and each box in the set, shaped (N,).
"""
# Calculate intersection coordinates
x1 = np.maximum(box[0], boxes[:, 0])
y1 = np.maximum(box[1], boxes[:, 1])
x2 = np.minimum(box[2], boxes[:, 2])
y2 = np.minimum(box[3], boxes[:, 3])
# Calculate intersection area
intersection_area = np.maximum(x2 - x1, 0) * np.maximum(y2 - y1, 0)
# Calculate areas of both bounding boxes
box_area = (box[2] - box[0]) * (box[3] - box[1])
boxes_area = (boxes[:, 2] - boxes[:, 0]) * (boxes[:, 3] - boxes[:, 1])
# Calculate IoU
iou = intersection_area / (box_area + boxes_area - intersection_area)
return iou
4. 辅助函数
xywh2xyxy函数
将检测框从中心坐标形式转换为边框坐标形式。
def xywh2xyxy(x):
"""
Convert bounding boxes from (center_x, center_y, width, height) to (x1, y1, x2, y2) format.
Parameters:
x (ndarray): Bounding boxes in (center_x, center_y, width, height) format, shaped (N, 4).
Returns:
ndarray: Bounding boxes in (x1, y1, x2, y2) format, shaped (N, 4).
"""
y = x.copy()
y[:, 0] = x[:, 0] - x[:, 2] / 2
y[:, 1] = x[:, 1] - x[:, 3] / 2
y[:, 2] = x[:, 0] + x[:, 2] / 2
y[:, 3] = x[:, 1] + x[:, 3] / 2
return y
process函数
模型预处理
def process(frame, input_shape):
'''
Process a frame to be used as input for the model.
:param frame:
:param input_shape:
:return:
'''
image_data, nw, nh = resize_image_cv2(frame, (input_shape[1], input_shape[0]))
org_data = image_data.copy()
np_data = np.array(image_data, np.float32)
np_data = np_data / 255.
image_data = np.expand_dims(np.transpose(np_data, (2, 0, 1)), 0)
image_data = np.ascontiguousarray(image_data)
return image_data, org_data
post_process_yolo函数
将检测结果绘制到图像上。
def post_process_yolo(det, im, label_path='coco128.yaml'):
if len(det):
det[:, :4] = scale_boxes(im.shape[:2], det[:, :4], im.shape).round()
names = yaml_load(label_path)['names']
colors = Colors()
for *xyxy, conf, cls in reversed(det):
c = int(cls)
label = names[c]
box_label(im, xyxy, label, color=colors(c, True))
return im
scale_boxes函数
将检测框坐标从缩放后图像转换回原始图像。
def scale_boxes(img1_shape, boxes, img0_shape, ratio_pad=None):
if ratio_pad is None:
gain = min(img1_shape[0] / img0_shape[0], img1_shape[1] / img0_shape[1])
pad = (img1_shape[1] - img0_shape[1] * gain) / 2, (img1_shape[0] - img0_shape[0] * gain)
else:
gain = ratio_pad[0][0]
pad = ratio_pad[1]
boxes[..., [0, 2]] -= pad[0]
boxes[..., [1, 3]] -= pad[1]
boxes[..., :4] /= gain
clip_boxes(boxes, img0_shape)
return boxes
clip_boxes函数
将检测框限制在图像范围内。
def clip_boxes(boxes, shape):
boxes[..., [0, 2]] = boxes[..., [0, 2]].clip(0, shape[1])
boxes[..., [1, 3]] = boxes[..., [1, 3]].clip(0, shape[0])
yaml_load函数
加载标签文件。
def yaml_load(file='coco128.yaml'):
with open(file, errors='ignore') as f:
return yaml.safe_load(f)
Colors类
用于生成不同颜色。
class Colors:
def __init__(self):
hexs = (
"FF3838",
"FF9D97",
"FF701F",
"FFB21D",
"CFD231",
"48F90A",
"92CC17",
"3DDB86",
"1A9334",
"00D4BB",
"2C99A8",
"00C2FF",
"344593",
"6473FF",
"0018EC",
"8438FF",
"520085",
"CB38FF",
"FF95C8",
"FF37C7",
)
self.palette = [self.hex2rgb(f"#{c}") for c in hexs]
self.n = len(self.palette)
def __call__(self, i, bgr=False):
c = self.palette[int(i) % self.n]
return (c[2], c[1], c[0]) if bgr else c
@staticmethod
def hex2rgb(h):
return tuple(int(h[1 + i: 1 + i + 2], 16) for i in (0, 2, 4))
box_label函数
在图像上绘制检测框及标签。
def box_label(im, box, label="", color=(128, 128, 128), txt_color=(255, 255, 255)):
lw = 2
p1, p2 = (int(box[0]), int(box[1])), (int(box[2]), int(box[3]))
cv2.rectangle(im, p1, p2, color, thickness=lw, lineType=cv2.LINE_AA)
if label:
tf = max(lw - 1, 1)
w, h = cv2.getTextSize(label, 0, fontScale=lw / 3, thickness=tf)[0]
outside = p1[1] - h >= 3
p2 = p1[0] + w, p1[1] - h - 3 if outside else p1[1] + h + 3
cv2.rectangle(im, p1, p2, color, -1, cv2.LINE_AA)
cv2.putText(im, label, (p1[0], p1[1] - 2 if outside else p1[1] + h + 2), 0, lw / 3, txt_color, thickness=tf, lineType=cv2.LINE_AA)
5. 加载ONNX模型
onnx_load函数
加载ONNX模型并获取输出名称。
def onnx_load(w):
providers = ['CPUExecutionProvider', "CUDAExecutionProvider"]
session = onnxruntime.InferenceSession(w, providers=providers)
output_names = [ for x in session.get_outputs()]
print('-------', output_names)
return session, output_names
6. YOLOv8Detector类
YOLOv8Detector类
实现了YOLOv8推理的主要逻辑。
class YOLOV8Detector:
def __init__(self, model_path='yolov8s.onnx'):
self.model_path = model_path
self.session, self.output_names = onnx_load(self.model_path)
self.imgsz = [640, 640]
def detect_objects(self, image):
im, org_data = data_process_cv2(image, self.imgsz)
y = self.session.run(self.output_names, {self.session.get_inputs()[0].name: im})[0]
pred = y.transpose([0, 2, 1])
pred_class = pred[..., 4:]
pred_conf = np.max(pred_class, axis=-1)
pred = np.insert(pred, 4, pred_conf, axis[-1])
pred = non_max_suppression(pred, conf_thres=0.25, iou_thres=0.45, max_det=1000)
res_img = post_process_yolov5(pred[0], org_data)
return res_img
def detect_video(self, video_path, output_path):
cap = cv2.VideoCapture(video_path)
width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
fps = cap.get(cv2.CAP_PROP_FPS)
fourcc = cv2.VideoWriter_fourcc(*'mp4v')
out = cv2.VideoWriter(output_path, fourcc, fps, (width, height))
while cap.isOpened():
ret, frame = cap.read()
if not ret:
break
result_frame = self.detect_objects(frame)
out.write(result_frame)
cap.release()
out.release()
def detect_camera(self):
cap = cv2.VideoCapture(0)
while cap.isOpened():
ret, frame = cap.read()
if not ret:
break
result_frame = self.detect_objects(frame)
cv2.imshow('YOLOv5 Detection', result_frame)
if cv2.waitKey(1) & 0xFF == ord('q'):
break
cap.release()
cv2.destroyAllWindows()
__init__
: 初始化YOLOv8Detector类,加载ONNX模型。detect_objects
: 对单张图像进行推理,返回带有检测结果的图像。detect_video
: 对视频文件进行推理,将结果保存到新的视频文件。detect_camera
: 使用本地摄像头进行实时推理,并显示检测结果。
7. 主函数
运行主函数进行摄像头推理。
if __name__ == '__main__':
detector = YOLOV8Detector()
detector.detect_camera()
完整代码
import onnxruntime
import cv2
import numpy as np
import time
import yaml
def resize_image_cv2(image, size):
ih, iw, ic = image.shape
w, h = size
scale = min(w / iw, h / ih)
nw = int(iw * scale)
nh = int(ih * scale)
image = cv2.resize(image, (nw, nh))
new_image = np.ones((size[0], size[1], 3), dtype='uint8') * 128
start_h = (h - nh) / 2
start_w = (w - nw) / 2
end_h = size[1] - start_h
end_w = size[0] - start_w
new_image[int(start_h):int(end_h), int(start_w):int(end_w)] = image
return new_image, nw, nh
def data_process_cv2(frame, input_shape):
'''
对输入的图像进行预处理
:param frame:
:param input_shape:
:return:
'''
image_data, nw, nh = resize_image_cv2(frame, (input_shape[1], input_shape[0]))
org_data = image_data.copy()
np_data = np.array(image_data, np.float32)
np_data = np_data / 255.
image_data = np.expand_dims(np.transpose(np_data, (2, 0, 1)), 0)
image_data = np.ascontiguousarray(image_data)
return image_data, org_data
def non_max_suppression(prediction,
conf_thres=0.25,
iou_thres=0.35,
classes=None,
agnostic=False,
multi_label=False,
labels=(),
max_det=300,
nm=0 # number of masks
):
"""
Perform Non-Maximum Suppression (NMS) on the boxes to filter out overlapping boxes.
Parameters:
prediction (ndarray): Predictions from the model.
conf_thres (float): Confidence threshold to filter boxes.
iou_thres (float): Intersection over Union (IoU) threshold for NMS.
classes (list): Filter boxes by classes.
agnostic (bool): If True, perform class-agnostic NMS.
multi_label (bool): If True, perform multi-label NMS.
labels (list): Labels for auto-labelling.
max_det (int): Maximum number of detections.
nm (int): Number of masks.
Returns:
list: A list of filtered boxes.
"""
bs = prediction.shape[0] # batch size
nc = prediction.shape[2] - nm - 5 # number of classes
xc = prediction[..., 4] > conf_thres # candidates
max_wh = 7680 # (pixels) maximum box width and height
max_nms = 30000 # maximum number of boxes into torchvision.ops.nms()
time_limit = 0.5 + 0.05 * bs # seconds to quit after
# redundant = True # require redundant detections
multi_label &= nc > 1 # multiple labels per box (adds 0.5ms/img)
# merge = False # use merge-NMS
t = time.time()
mi = 5 + nc # mask start index
output = [np.zeros((0, 6 + nm))] * bs
for xi, x in enumerate(prediction): # image index, image inference
# Apply constraints
x = x[xc[xi]] # confidence
# Cat apriori labels if autolabelling
if labels and len(labels[xi]):
lb = labels[xi]
v = np.zeros((len(lb), nc + nm + 5))
v[:, :4] = lb[:, 1:5] # box
v[:, 4] = 1.0 # conf
v[np.arange(len(lb)), lb[:, 0].astype(int) + 5] = 1.0 # cls
x = np.concatenate((x, v), 0)
# If none remain process next image
if not x.shape[0]:
continue
# Compute conf
x[:, 5:] *= x[:, 4:5] # conf = obj_conf * cls_conf
# Box/Mask
box = xywh2xyxy(x[:, :4]) # center_x, center_y, width, height) to (x1, y1, x2, y2)
mask = x[:, mi:] # zero columns if no masks
# Detections matrix nx6 (xyxy, conf, cls)
if multi_label:
i, j = np.nonzero(x[:, 5:mi] > conf_thres)
x = np.concatenate((box[i], x[i, 5 + j][:, None], j[:, None].astype(float), mask[i]), 1)
else: # best class only
conf = x[:, 5:mi].max(1, keepdims=True)
j = x[:, 5:mi].argmax(1,keepdims=True)
x = np.concatenate((box, conf, j.astype(float), mask), 1)[conf[:, 0] > conf_thres]
# Filter by class
if classes is not None:
x = x[(x[:, 5:6] == np.array(classes)[:, None]).any(1)]
# Check shape
n = x.shape[0] # number of boxes
if not n: # no boxes
continue
sorted_indices = np.argsort(x[:, 4])[::-1]
x = x[sorted_indices][:max_nms] # sort by confidence and remove excess boxes
# Batched NMS
c = x[:, 5:6] * (0 if agnostic else max_wh) # classes
boxes, scores = x[:, :4] + c, x[:, 4] # boxes (offset by class), scores
i = nms(boxes, scores, iou_thres) # NMS
i = i[:max_det] # limit detections
output[xi] = x[i]
# if mps:
# output[xi] = output[xi].to(device)
if (time.time() - t) > time_limit:
# LOGGER.warning(f'WARNING NMS time limit {time_limit:.3f}s exceeded')
break # time limit exceeded
return output
# Define the function for NMS using numpy
def nms(boxes, scores, iou_threshold):
"""
Perform Non-Maximum Suppression (NMS) on the given boxes with scores using numpy.
Parameters:
boxes (ndarray): The bounding boxes, shaped (N, 4).
scores (ndarray): The confidence scores for each box, shaped (N,).
iou_threshold (float): The IoU threshold for suppressing overlapping boxes.
Returns:
ndarray: The indices of the selected boxes after NMS.
"""
if len(boxes) == 0:
return []
# Sort boxes by their scores
indices = np.argsort(scores)[::-1]
selected_indices = []
while len(indices) > 0:
# Select the box with the highest score
current_index = indices[0]
selected_indices.append(current_index)
# Compute IoU between the current box and all other boxes
current_box = boxes[current_index]
other_boxes = boxes[indices[1:]]
iou = calculate_iou(current_box, other_boxes)
# Remove boxes with IoU higher than the threshold
indices = indices[1:][iou <= iou_threshold]
return np.array(selected_indices)
def calculate_iou(box, boxes):
"""
Calculate the Intersection over Union (IoU) between a given box and a set of boxes.
Parameters:
box (ndarray): The coordinates of the first box, shaped (4,).
boxes (ndarray): The coordinates of the other boxes, shaped (N, 4).
Returns:
ndarray: The IoU between the given box and each box in the set, shaped (N,).
"""
# Calculate intersection coordinates
x1 = np.maximum(box[0], boxes[:, 0])
y1 = np.maximum(box[1], boxes[:, 1])
x2 = np.minimum(box[2], boxes[:, 2])
y2 = np.minimum(box[3], boxes[:, 3])
# Calculate intersection area
intersection_area = np.maximum(x2 - x1, 0) * np.maximum(y2 - y1, 0)
# Calculate areas of both bounding boxes
box_area = (box[2] - box[0]) * (box[3] - box[1])
boxes_area = (boxes[:, 2] - boxes[:, 0]) * (boxes[:, 3] - boxes[:, 1])
# Calculate IoU
iou = intersection_area / (box_area + boxes_area - intersection_area)
return iou
# Define xywh2xyxy function for converting bounding box format
def xywh2xyxy(x):
"""
Convert bounding boxes from (center_x, center_y, width, height) to (x1, y1, x2, y2) format.
Parameters:
x (ndarray): Bounding boxes in (center_x, center_y, width, height) format, shaped (N, 4).
Returns:
ndarray: Bounding boxes in (x1, y1, x2, y2) format, shaped (N, 4).
"""
y = x.copy()
y[:, 0] = x[:, 0] - x[:, 2] / 2
y[:, 1] = x[:, 1] - x[:, 3] / 2
y[:, 2] = x[:, 0] + x[:, 2] / 2
y[:, 3] = x[:, 1] + x[:, 3] / 2
return y
def process(frame, input_shape):
'''
Process a frame to be used as input for the model.
:param frame:
:param input_shape:
:return:
'''
image_data, nw, nh = resize_image_cv2(frame, (input_shape[1], input_shape[0]))
org_data = image_data.copy()
np_data = np.array(image_data, np.float32)
np_data = np_data / 255.
image_data = np.expand_dims(np.transpose(np_data, (2, 0, 1)), 0)
image_data = np.ascontiguousarray(image_data)
return image_data, org_data
def post_process_yolo(det, im, label_path='coco128.yaml'):
if len(det):
det[:, :4] = scale_boxes(im.shape[:2], det[:, :4], im.shape).round()
names = yaml_load(label_path)['names']
colors = Colors()
for *xyxy, conf, cls in reversed(det):
c = int(cls)
label = names[c]
box_label(im, xyxy, label, color=colors(c, True))
return im
def scale_boxes(img1_shape, boxes, img0_shape, ratio_pad=None):
if ratio_pad is None:
gain = min(img1_shape[0] / img0_shape[0], img1_shape[1] / img0_shape[1])
pad = (img1_shape[1] - img0_shape[1] * gain) / 2, (img1_shape[0] - img0_shape[0] * gain)
else:
gain = ratio_pad[0][0]
pad = ratio_pad[1]
boxes[..., [0, 2]] -= pad[0]
boxes[..., [1, 3]] -= pad[1]
boxes[..., :4] /= gain
clip_boxes(boxes, img0_shape)
return boxes
def clip_boxes(boxes, shape):
# if isinstance(boxes, torch.Tensor):
# boxes[..., 0].clamp_(0, shape[1])
# boxes[..., 1].clamp_(0, shape[0])
# boxes[..., 2].clamp_(0, shape[1])
# boxes[..., 3].clamp_(0, shape[0])
# else:
boxes[..., [0, 2]] = boxes[..., [0, 2]].clip(0, shape[1])
boxes[..., [1, 3]] = boxes[..., [1, 3]].clip(0, shape[0])
def yaml_load(file='coco128.yaml'):
with open(file, errors='ignore') as f:
return yaml.safe_load(f)
class Colors:
# Ultralytics color palette https:///
def __init__(self):
"""
Initializes the Colors class with a palette derived from Ultralytics color scheme, converting hex codes to RGB.
Colors derived from `hex = matplotlib.colors.TABLEAU_COLORS.values()`.
"""
hexs = (
"FF3838",
"FF9D97",
"FF701F",
"FFB21D",
"CFD231",
"48F90A",
"92CC17",
"3DDB86",
"1A9334",
"00D4BB",
"2C99A8",
"00C2FF",
"344593",
"6473FF",
"0018EC",
"8438FF",
"520085",
"CB38FF",
"FF95C8",
"FF37C7",
)
self.palette = [self.hex2rgb(f"#{c}") for c in hexs]
self.n = len(self.palette)
def __call__(self, i, bgr=False):
"""Returns color from palette by index `i`, in BGR format if `bgr=True`, else RGB; `i` is an integer index."""
c = self.palette[int(i) % self.n]
return (c[2], c[1], c[0]) if bgr else c
@staticmethod
def hex2rgb(h):
"""Converts hex color codes to RGB values (i.e. default PIL order)."""
return tuple(int(h[1 + i: 1 + i + 2], 16) for i in (0, 2, 4))
def box_label(im, box, label="", color=(128, 128, 128), txt_color=(255, 255, 255)):
lw = 2
p1, p2 = (int(box[0]), int(box[1])), (int(box[2]), int(box[3]))
cv2.rectangle(im, p1, p2, color, thickness=lw, lineType=cv2.LINE_AA)
if label:
tf = max(lw - 1, 1)
w, h = cv2.getTextSize(label, 0, fontScale=lw / 3, thickness=tf)[0]
outside = p1[1] - h >= 3
p2 = p1[0] + w, p1[1] - h - 3 if outside else p1[1] + h + 3
cv2.rectangle(im, p1, p2, color, -1, cv2.LINE_AA)
cv2.putText(im, label, (p1[0], p1[1] - 2 if outside else p1[1] + h + 2),
0, lw / 3, txt_color, thickness=tf, lineType=cv2.LINE_AA)
def onnx_load(w):
providers = ['CPUExecutionProvider',"CUDAExecutionProvider"]
session = onnxruntime.InferenceSession(w, providers=providers)
output_names = [ for x in session.get_outputs()]
print('-------', output_names)
return session, output_names
class YOLOV8Detector:
def __init__(self, model_path='yolov8s.onnx'):
self.model_path = model_path
self.session, self.output_names = onnx_load(self.model_path)
self.imgsz = [640, 640]
# conf_thres = 0.25, iou_thres = 0.45, max_det = 1000
def detect_objects(self, image):
im, org_data = data_process_cv2(image, self.imgsz)
y = self.session.run(self.output_names, {self.session.get_inputs()[0].name: im})[0]
pred = y.transpose([0, 2, 1])
pred_class = pred[..., 4:]
pred_conf = np.max(pred_class, axis=-1)
pred = np.insert(pred, 4, pred_conf, axis=-1)
pred = non_max_suppression(pred, conf_thres=0.25, iou_thres=0.45, max_det=1000)
res_img = post_process_yolo(pred[0], org_data)
return res_img
def detect_video(self, video_path, output_path):
cap = cv2.VideoCapture(video_path)
width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
fps = cap.get(cv2.CAP_PROP_FPS)
fourcc = cv2.VideoWriter_fourcc(*'mp4v')
out = cv2.VideoWriter(output_path, fourcc, fps, (width, height))
while cap.isOpened():
ret, frame = cap.read()
if not ret:
break
result_frame = self.detect_objects(frame)
out.write(result_frame)
cap.release()
out.release()
def detect_camera(self):
cap = cv2.VideoCapture(0) # 0 表示第一个摄像头,可以根据需要修改成其他摄像头编号
while cap.isOpened():
ret, frame = cap.read()
if not ret:
break
result_frame = self.detect_objects(frame)
cv2.imshow('YOLOv5 Detection', result_frame)
if cv2.waitKey(1) & 0xFF == ord('q'):
break
cap.release()
cv2.destroyAllWindows()
# Helper functions (e.g., onnx_load, data_process_cv2, non_max_suppression, post_process_yolo) need to be defined or imported.
if __name__ == '__main__':
detector = YOLOV8Detector()
detector.detect_camera()
通过以上代码,我们可以实现YOLOv8模型在本地进行图像、视频和摄像头的推理。希望本文能够帮助你理解并实现YOLOv8的本地推理。