14、TX2上运行YOLO5
作者:互联网
一、下载yolov5、tensorrtx,yolov5s.pt
预备基础环境:
python3
torch1.7或者更高
1、生成权重文件(可以在pc上或者是在TX2上生成都可以)
1、在TX2的终端直接输入下面的命令
git clone https://github.com/wang-xinyu/tensorrtx.git
git clone https://github.com/ultralytics/yolov5.git
2、下载yolov5s.pt
https://github.com/ultralytics/yolov5/releases/download/v4.0/yolov5s.pt
3、文件搬移
①
cp tensorrtx/yolov5/gen_wts.py yolov5
②
将yolov5s.pt(预训练模型)放到yolov5的weights文件夹下,当然也可以是其他位置,具体调整可以参考下面的python脚本
4、生成权重文件
python gen_wts.py
执行这个脚本的时候可能会出现如下问题,大概如下(可能不全,但是方法是一样的)
①其实是需要一些依赖的环境和包,
这是环境中缺少了tqdm进度条的安装包,需要使用conda或者pip命令进行安装
pip install tqdm
缺少其他的包就按照同样的方法即可。
②python库中urllib3 (1.22) or chardet (2.2.1) 的版本不兼容
解决方法如下:
pip uninstall urllib3
pip uninstall chardet
pip install requests
2、生成部署引擎(必须在部署的硬件平台上编译,此处在TX2上)
将yolov5s.wts文件放到tensorrtx/yolov5文件夹中
mv yolov5s.wts ~/tensorrtx/yolov5
在yolov5.cpp文件中还可以修改fp16还是fp32(nano不支持int8)、device(选择哪一个GPU设备)、nms_thresh(nms的阈值)、conf_thresh(conf的置信度)、batch_size(批次大小),此处可以使用默认的参数。
打开yololayer.h文件,修改他的num总数,根据你训练模型的类个数来(如果是你自己针对特定的数据集训练的模型的话)。
除此之外,我们还可以修改输入图片的尺寸,但必须是32的倍数。缩小输入尺寸可以一定程度上加快推理速度。
然后反序列化引擎对图像做处理
sudo ./yolov5 -d yolov5s.engine ../samples
二、视频检测
代码如下:
import time
import cv2
import pycuda.autoinit # This is needed for initializing CUDA driver
import numpy as np
import ctypes
import tensorrt as trt
import pycuda.driver as cuda
import threading
import random
INPUT_W = 608
INPUT_H = 608
CONF_THRESH = 0.2
IOU_THRESHOLD = 0.4
categories = ['vehicle','bicyle','pedestrain','road_sign']
def plot_one_box(x, img, color=None, label=None, line_thickness=None):
"""
description: Plots one bounding box on image img,
this function comes from YoLov5 project.
param:
x: a box likes [x1,y1,x2,y2]
img: a opencv image object
color: color to draw rectangle, such as (0,255,0)
label: str
line_thickness: int
return:
no return
"""
tl = (
line_thickness or round(0.002 * (img.shape[0] + img.shape[1]) / 2) + 1
) # line/font thickness
color = color or [random.randint(0, 255) for _ in range(3)]
c1, c2 = (int(x[0]), int(x[1])), (int(x[2]), int(x[3]))
cv2.rectangle(img, c1, c2, color, thickness=tl, lineType=cv2.LINE_AA)
if label:
tf = max(tl - 1, 1) # font thickness
t_size = cv2.getTextSize(label, 0, fontScale=tl / 3, thickness=tf)[0]
c2 = c1[0] + t_size[0], c1[1] - t_size[1] - 3
cv2.rectangle(img, c1, c2, color, -1, cv2.LINE_AA) # filled
cv2.putText(
img,
label,
(c1[0], c1[1] - 2),
0,
tl / 3,
[225, 255, 255],
thickness=tf,
lineType=cv2.LINE_AA,
)
def draw_boxes(image_raw, result_boxes, result_scores, result_classid):
for i in range(len(result_boxes)):
box = result_boxes[i]
plot_one_box(
box,
image_raw,
label="{}:{:.2f}".format(
categories[int(result_classid[i])], result_scores[i]
),
)
return image_raw
class YoLov5TRT(object):
"""
description: A YOLOv5 class that warps TensorRT ops, preprocess and postprocess ops.
"""
def __init__(self, engine_file_path):
# Create a Context on this device,
self.cfx = cuda.Device(0).make_context()
stream = cuda.Stream()
TRT_LOGGER = trt.Logger(trt.Logger.INFO)
runtime = trt.Runtime(TRT_LOGGER)
# Deserialize the engine from file
with open(engine_file_path, "rb") as f:
engine = runtime.deserialize_cuda_engine(f.read())
context = engine.create_execution_context()
host_inputs = []
cuda_inputs = []
host_outputs = []
cuda_outputs = []
bindings = []
for binding in engine:
size = trt.volume(engine.get_binding_shape(binding)) * engine.max_batch_size
dtype = trt.nptype(engine.get_binding_dtype(binding))
# Allocate host and device buffers
host_mem = cuda.pagelocked_empty(size, dtype)
cuda_mem = cuda.mem_alloc(host_mem.nbytes)
# Append the device buffer to device bindings.
bindings.append(int(cuda_mem))
# Append to the appropriate list.
if engine.binding_is_input(binding):
host_inputs.append(host_mem)
cuda_inputs.append(cuda_mem)
else:
host_outputs.append(host_mem)
cuda_outputs.append(cuda_mem)
# Store
self.stream = stream
self.context = context
self.engine = engine
self.host_inputs = host_inputs
self.cuda_inputs = cuda_inputs
self.host_outputs = host_outputs
self.cuda_outputs = cuda_outputs
self.bindings = bindings
# 释放引擎,释放GPU显存,释放CUDA流
def __del__(self):
print("delete object to release memory")
def infer(self, image_raw):
threading.Thread.__init__(self)
# Make self the active context, pushing it on top of the context stack.
self.cfx.push()
# Restore
stream = self.stream
context = self.context
engine = self.engine
host_inputs = self.host_inputs
cuda_inputs = self.cuda_inputs
host_outputs = self.host_outputs
cuda_outputs = self.cuda_outputs
bindings = self.bindings
# Do image preprocess
input_image, image_raw, origin_h, origin_w = self.preprocess_image(
image_raw
)
# Copy input image to host buffer
np.copyto(host_inputs[0], input_image.ravel())
# Transfer input data to the GPU.
cuda.memcpy_htod_async(cuda_inputs[0], host_inputs[0], stream)
# Run inference.
context.execute_async(bindings=bindings, stream_handle=stream.handle)
# Transfer predictions back from the GPU.
cuda.memcpy_dtoh_async(host_outputs[0], cuda_outputs[0], stream)
# Synchronize the stream
stream.synchronize()
# Remove any context from the top of the context stack, deactivating it.
self.cfx.pop()
# Here we use the first row of output in that batch_size = 1
output = host_outputs[0]
# Do postprocess
result_boxes, result_scores, result_classid = self.post_process(
output, origin_h, origin_w
)
return image_raw, result_boxes, result_scores, result_classid
def destroy(self):
# Remove any context from the top of the context stack, deactivating it.
self.cfx.pop()
def preprocess_image(self, image_raw):
"""
description: Read an image from image path, convert it to RGB,
resize and pad it to target size, normalize to [0,1],
transform to NCHW format.
param:
input_image_path: str, image path
return:
image: the processed image
image_raw: the original image
h: original height
w: original width
"""
h, w, c = image_raw.shape
image = cv2.cvtColor(image_raw, cv2.COLOR_BGR2RGB)
# Calculate widht and height and paddings
r_w = INPUT_W / w
r_h = INPUT_H / h
if r_h > r_w:
tw = INPUT_W
th = int(r_w * h)
tx1 = tx2 = 0
ty1 = int((INPUT_H - th) / 2)
ty2 = INPUT_H - th - ty1
else:
tw = int(r_h * w)
th = INPUT_H
tx1 = int((INPUT_W - tw) / 2)
tx2 = INPUT_W - tw - tx1
ty1 = ty2 = 0
# Resize the image with long side while maintaining ratio
image = cv2.resize(image, (tw, th))
# Pad the short side with (128,128,128)
image = cv2.copyMakeBorder(
image, ty1, ty2, tx1, tx2, cv2.BORDER_CONSTANT, (128, 128, 128)
)
image = image.astype(np.float32)
# Normalize to [0,1]
image /= 255.0
# HWC to CHW format:
image = np.transpose(image, [2, 0, 1])
# CHW to NCHW format
image = np.expand_dims(image, axis=0)
# Convert the image to row-major order, also known as "C order":
image = np.ascontiguousarray(image)
return image, image_raw, h, w
def xywh2xyxy(self, origin_h, origin_w, x):
"""
description: Convert nx4 boxes from [x, y, w, h] to [x1, y1, x2, y2] where xy1=top-left, xy2=bottom-right
param:
origin_h: height of original image
origin_w: width of original image
x: A boxes tensor, each row is a box [center_x, center_y, w, h]
return:
y: A boxes tensor, each row is a box [x1, y1, x2, y2]
"""
# y = torch.zeros_like(x) if isinstance(x, torch.Tensor) else np.zeros_like(x)
y = np.zeros_like(x)
r_w = INPUT_W / origin_w
r_h = INPUT_H / origin_h
if r_h > r_w:
y[:, 0] = x[:, 0] - x[:, 2] / 2
y[:, 2] = x[:, 0] + x[:, 2] / 2
y[:, 1] = x[:, 1] - x[:, 3] / 2 - (INPUT_H - r_w * origin_h) / 2
y[:, 3] = x[:, 1] + x[:, 3] / 2 - (INPUT_H - r_w * origin_h) / 2
y /= r_w
else:
y[:, 0] = x[:, 0] - x[:, 2] / 2 - (INPUT_W - r_h * origin_w) / 2
y[:, 2] = x[:, 0] + x[:, 2] / 2 - (INPUT_W - r_h * origin_w) / 2
y[:, 1] = x[:, 1] - x[:, 3] / 2
y[:, 3] = x[:, 1] + x[:, 3] / 2
y /= r_h
return y
def nms(self, boxes, scores, iou_threshold=IOU_THRESHOLD):
x1 = boxes[:, 0]
y1 = boxes[:, 1]
x2 = boxes[:, 2]
y2 = boxes[:, 3]
areas = (y2 - y1 + 1) * (x2 - x1 + 1)
scores = scores
keep = []
index = scores.argsort()[::-1]
while index.size > 0:
i = index[0] # every time the first is the biggst, and add it directly
keep.append(i)
x11 = np.maximum(x1[i], x1[index[1:]]) # calculate the points of overlap
y11 = np.maximum(y1[i], y1[index[1:]])
x22 = np.minimum(x2[i], x2[index[1:]])
y22 = np.minimum(y2[i], y2[index[1:]])
w = np.maximum(0, x22 - x11 + 1) # the weights of overlap
h = np.maximum(0, y22 - y11 + 1) # the height of overlap
overlaps = w * h
ious = overlaps / (areas[i] + areas[index[1:]] - overlaps)
idx = np.where(ious <= iou_threshold)[0]
index = index[idx + 1] # because index start from 1
return keep
def post_process(self, output, origin_h, origin_w):
"""
description: postprocess the prediction
param:
output: A tensor likes [num_boxes,cx,cy,w,h,conf,cls_id, cx,cy,w,h,conf,cls_id, ...]
origin_h: height of original image
origin_w: width of original image
return:
result_boxes: finally boxes, a boxes tensor, each row is a box [x1, y1, x2, y2]
result_scores: finally scores, a tensor, each element is the score correspoing to box
result_classid: finally classid, a tensor, each element is the classid correspoing to box
"""
# Get the num of boxes detected
num = int(output[0])
# Reshape to a two dimentional ndarray
pred = np.reshape(output[1:], (-1, 6))[:num, :]
# to a torch Tensor
# pred = torch.Tensor(pred).cuda()
# Get the boxes
boxes = pred[:, :4]
# Get the scores
scores = pred[:, 4]
# Get the classid
classid = pred[:, 5]
# Choose those boxes that score > CONF_THRESH
si = scores > CONF_THRESH
boxes = boxes[si, :]
scores = scores[si]
classid = classid[si]
# Trandform bbox from [center_x, center_y, w, h] to [x1, y1, x2, y2]
boxes = self.xywh2xyxy(origin_h, origin_w, boxes)
# Do nms
# indices = torchvision.ops.nms(boxes, scores, iou_threshold=IOU_THRESHOLD).cpu()
# result_boxes = boxes[indices, :].cpu()
# result_scores = scores[indices].cpu()
# result_classid = classid[indices].cpu()
# return result_boxes, result_scores, result_classid
indices = self.nms(boxes, scores, IOU_THRESHOLD)
result_boxes = boxes[indices, :]
result_scores = scores[indices]
result_classid = classid[indices]
return result_boxes, result_scores, result_classid
def detect_one(img, yolov5_wrapper):
full_scrn = False
tic = time.clock()
##开始检测,并将结果写到result.jpg中
img, result_boxes, result_scores, result_classid = yolov5_wrapper.infer(img)
toc = time.clock()
curr_fps = (toc - tic)
print("boxes: "+str(result_boxes))
print("clss: "+str(result_classid))
print("confs: "+str(result_scores))
img = draw_boxes(img, result_boxes, result_scores, result_classid)
cv2.imwrite("result.jpg",img)
print("time: "+str(curr_fps)+"(sec)")
def main_one():
filename = "1.jpg"
img = cv2.imread(filename)
# load custom plugins
#PLUGIN_LIBRARY = "yolov5s/libmyplugins.so"
PLUGIN_LIBRARY = "/home/nvidia/tensorrtx/yolov5/build/libmyplugins.so"
ctypes.CDLL(PLUGIN_LIBRARY)
engine_file_path = "/home/nvidia/tensorrtx/yolov5/build/yolov5s.engine"
# a YoLov5TRT instance
yolov5_wrapper = YoLov5TRT(engine_file_path)
print("start detection!")
detect_one(img, yolov5_wrapper)
cv2.destroyAllWindows()
print("finish!")
from IPython.display import Image
main_one()
Image("result.jpg")
def detect_video(video, yolov5_wrapper):
full_scrn = False
fps = 0.0
tic = time.time()
frame_width = int(video.get(cv2.CAP_PROP_FRAME_WIDTH))
frame_height = int(video.get(cv2.CAP_PROP_FRAME_HEIGHT))
fps = video.get(cv2.CAP_PROP_FPS)
#print(str(frame_width)+str(frame_height))
##定义输入编码
fourcc = cv2.VideoWriter_fourcc('M', 'P', '4', 'V')
videoWriter = cv2.VideoWriter('result.AVI', fourcc, fps, (frame_width,frame_height))
##开始循环检测,并将结果写到result.mp4中
while True:
ret,img = video.read()
if img is not None:
img, result_boxes, result_scores, result_classid = yolov5_wrapper.infer(img)
img = draw_boxes(img, result_boxes, result_scores, result_classid)
videoWriter.write(img)
toc = time.time()
curr_fps = 1.0 / (toc - tic)
fps = curr_fps if fps == 0.0 else (fps*0.95 + curr_fps*0.05)
tic = toc
print("\rfps: "+str(fps),end="")
else:
break
def main_loop():
filename = "video2.mp4"
video = cv2.VideoCapture(filename)
# load custom plugins
PLUGIN_LIBRARY = "yolov5x/libmyplugins.so"
ctypes.CDLL(PLUGIN_LIBRARY)
engine_file_path = "yolov5x/yolov5x.engine"
# a YoLov5TRT instance
yolov5_wrapper = YoLov5TRT(engine_file_path)
print("start detection!")
detect_video(video, yolov5_wrapper)
video.release()
cv2.destroyAllWindows()
print("\nfinish!")
main_loop()
from IPython.display import Video
Video("result-ffmpeg4.mp4")
报错:
①少了cv2的模块
事实上这里是启动了默认的conda环境,在这个环境里面没有cv2,所以应该关闭这个环境。
②找不到文件
这里刚开始是以为不能在/usr/lib找到库文件的原因,所以下面采用软连接的方式。
sudo find / -iname "找不到的库文件名"
ln -s ~/tensorrtx/yolov5/build /usr/lib
sudo ldconfig
但是并没有解决,于是循着报错信息进一步排错。
打开yolov5_trt.py文件,发现文件路径写错了。
但是修改过后又出现了新的问题。
代更--------
标签:14,image,cv2,YOLO5,self,boxes,result,TX2,cuda 来源: https://blog.csdn.net/wuzhishiwo/article/details/115110488