如何开发一个AI应用
以下是针对零基础用户、使用YOLOv5实现车流检测统计并部署到RK3588的详细指南:
一、整体流程图
[摄像头输入] → [YOLOv5实时检测] → [车辆计数逻辑] → [Tkinter图形界面显示]
↑ ↓
[RK3588 NPU加速] [统计结果存储]
二、详细步骤与代码
步骤1:环境准备
硬件要求:
RK3588开发板(搭载Debian系统)
USB摄像头(或MIPI摄像头)
显示器(HDMI接口)
软件安装(在RK3588上执行):
# 安装基础依赖
sudo apt-get update
sudo apt-get install python3-pip libopencv-dev
# 安装Python库
pip3 install numpy opencv-python rknn-toolkit2-latest torch==1.10.0 torchvision==0.11.1 --extra-index-url https://download.pytorch.org/whl/cpu
步骤2:获取YOLOv5模型
下载官方代码与权重:
git clone https://github.com/ultralytics/yolov5
cd yolov5
wget https://github.com/ultralytics/yolov5/releases/download/v6.0/yolov5s.pt # 小型模型
步骤3:模型转换(PC端操作)
转换为ONNX格式:
# export_onnx.py
import torch
model = torch.hub.load('ultralytics/yolov5', 'yolov5s', pretrained=True)
model.eval()
dummy_input = torch.randn(1, 3, 640, 640)
torch.onnx.export(model, dummy_input, "yolov5s.onnx", opset_version=12)
转换为RKNN格式:
# convert_rknn.py
from rknn.api import RKNN
rknn = RKNN()
rknn.config(target_platform='rk3588')
rknn.load_onnx(model='yolov5s.onnx')
rknn.build(do_quantization=True, dataset='./calib_images') # 准备100张校准图片
rknn.export_rknn('./yolov5s.rknn')
步骤4:编写车流检测程序
完整代码(保存为car_counter.py
):
import cv2
import numpy as np
from rknnlite import RKNNLite
import tkinter as tk
from tkinter import ttk
from threading import Thread
# ================== 初始化RKNN模型 ==================
rknn = RKNNLite()
rknn.load_rknn('yolov5s.rknn')
rknn.init_runtime(core_mask=RKNNLite.NPU_CORE_0)
# ================== 车辆计数类 ==================
class VehicleCounter:
def __init__(self):
self.count = 0
self.prev_boxes = []
def update(self, current_boxes):
# 简单基于IOU的计数逻辑
new_count = 0
for curr_box in current_boxes:
is_new = True
for prev_box in self.prev_boxes:
iou = self.calculate_iou(curr_box, prev_box)
if iou > 0.3: # 阈值可调
is_new = False
break
if is_new:
new_count += 1
self.count += new_count
self.prev_boxes = current_boxes
return self.count
@staticmethod
def calculate_iou(box1, box2):
# 计算两个框的交并比
x1, y1, w1, h1 = box1
x2, y2, w2, h2 = box2
inter_x1 = max(x1, x2)
inter_y1 = max(y1, y2)
inter_x2 = min(x1+w1, x2+w2)
inter_y2 = min(y1+h1, y2+h2)
if inter_x2 < inter_x1 or inter_y2 < inter_y1:
return 0.0
area_inter = (inter_x2 - inter_x1) * (inter_y2 - inter_y1)
area_union = w1*h1 + w2*h2 - area_inter
return area_inter / area_union
# ================== GUI界面类 ==================
class App:
def __init__(self, window):
self.window = window
self.window.title("实时车流统计")
# 视频显示区域
self.video_label = ttk.Label(window)
self.video_label.grid(row=0, column=0, padx=10, pady=10)
# 统计信息
self.count_label = ttk.Label(window, text="当前车辆总数:0", font=('Arial', 14))
self.count_label.grid(row=1, column=0)
# 控制按钮
self.btn_start = ttk.Button(window, text="开始检测", command=self.start_detection)
self.btn_start.grid(row=2, column=0)
self.cap = None
self.is_running = False
self.counter = VehicleCounter()
def start_detection(self):
if not self.is_running:
self.cap = cv2.VideoCapture(0) # 0表示默认摄像头
self.is_running = True
Thread(target=self.detect_frame).start()
def detect_frame(self):
while self.is_running:
ret, frame = self.cap.read()
if not ret: break
# 预处理
img = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
img = cv2.resize(img, (640, 640))
img = np.expand_dims(img, 0).transpose(0, 3, 1, 2)
# NPU推理
outputs = rknn.inference(inputs=[img])
# 后处理
boxes = self.postprocess(outputs)
current_count = self.counter.update(boxes)
# 更新界面
display_img = cv2.resize(frame, (640, 480))
for (x, y, w, h) in boxes:
cv2.rectangle(display_img, (x, y), (x+w, y+h), (0,255,0), 2)
photo = cv2.cvtColor(display_img, cv2.COLOR_BGR2RGB)
photo = ImageTk.PhotoImage(image=Image.fromarray(photo))
self.video_label.configure(image=photo)
self.video_label.image = photo
self.count_label.config(text=f"当前车辆总数:{current_count}")
def postprocess(self, outputs, conf_thresh=0.5):
# 简化的后处理逻辑
predictions = outputs[0][0]
boxes = []
for pred in predictions:
if pred[4] > conf_thresh and int(pred[5]) == 2: # 假设类别2为汽车
x, y, w, h = pred[0:4] # 需根据实际输出调整
boxes.append((int(x), int(y), int(w), int(h)))
return boxes
# ================== 启动程序 ==================
if __name__ == "__main__":
root = tk.Tk()
app = App(root)
root.mainloop()
rknn.release()
步骤5:部署与运行
将以下文件拷贝到RK3588:
yolov5s.rknn
(转换后的模型)car_counter.py
(主程序)
在终端执行:
python3 car_counter.py
点击界面上的"开始检测"按钮
三、关键问题说明
1. 车辆识别精度优化:
修改
postprocess
中的conf_thresh
(置信度阈值)在YOLOv5训练时加入更多本地车辆数据
2. 性能优化技巧:
# 在RKNN初始化时启用多核
rknn.init_runtime(core_mask=RKNNLite.NPU_CORE_0 | RKNNLite.NPU_CORE_1)
# 调整摄像头分辨率(在VideoCapture后添加)
self.cap.set(cv2.CAP_PROP_FRAME_WIDTH, 640)
self.cap.set(cv2.CAP_PROP_FRAME_HEIGHT, 480)
3. 常见错误处理:
问题: 无法打开摄像头 解决: 检查
/dev/video0
权限:sudo chmod 666 /dev/video0
问题: 推理速度慢 解决: 在RK3588上执行:
sudo echo performance > /sys/devices/system/cpu/cpufreq/policy0/scaling_governor
四、效果展示
将呈现一个包含以下元素的界面:
实时视频画面(带检测框)
醒目的车辆计数显示
开始/停止控制按钮
(注:此处应为实际界面截图)
通过这个方案,即使是编程新手也可以在2小时内完成基础部署,后续可根据实际道路场景调整检测参数。