RV1109上测试rknn_yolov5_demo以及分析
一、代码执行流程分析
1.1 加载模型
/* Create the neural network */
printf("Loading mode...\n");
int model_data_size = 0;
unsigned char* model_data = load_model(model_name, &model_data_size);
ret = rknn_init(&ctx, model_data, model_data_size, 0);
1.2 获取输入输出通道属性
先获取通道个数,再获取通道属性
rknn_input_output_num io_num;
ret = rknn_query(ctx, RKNN_QUERY_IN_OUT_NUM, &io_num, sizeof(io_num));
if (ret < 0) {
printf("rknn_init error ret=%d\n", ret);
return -1;
}
printf("model input num: %d, output num: %d\n", io_num.n_input, io_num.n_output);
rknn_tensor_attr input_attrs[io_num.n_input];
memset(input_attrs, 0, sizeof(input_attrs));
for (int i = 0; i < io_num.n_input; i++) {
input_attrs[i].index = i;
ret = rknn_query(ctx, RKNN_QUERY_INPUT_ATTR, &(input_attrs[i]), sizeof(rknn_tensor_attr));
if (ret < 0) {
printf("rknn_init error ret=%d\n", ret);
return -1;
}
dump_tensor_attr(&(input_attrs[i]));
}
rknn_tensor_attr output_attrs[io_num.n_output];
memset(output_attrs, 0, sizeof(output_attrs));
for (int i = 0; i < io_num.n_output; i++) {
output_attrs[i].index = i;
ret = rknn_query(ctx, RKNN_QUERY_OUTPUT_ATTR, &(output_attrs[i]), sizeof(rknn_tensor_attr));
dump_tensor_attr(&(output_attrs[i]));
if (output_attrs[i].qnt_type != RKNN_TENSOR_QNT_AFFINE_ASYMMETRIC || output_attrs[i].type != RKNN_TENSOR_UINT8) {
fprintf(stderr,
"The Demo required for a Affine asymmetric u8 quantized rknn model, but output quant type is %s, output "
"data type is %s\n",
get_qnt_type_string(output_attrs[i].qnt_type), get_type_string(output_attrs[i].type));
return -1;
}
}
1.3 加载源图像
根据上一步的输入图像数据格式来设置图像宽高
int channel = 3;
int width = 0;
int height = 0;
if (input_attrs[0].fmt == RKNN_TENSOR_NCHW) {
printf("model is NCHW input fmt\n");
width = input_attrs[0].dims[0];
height = input_attrs[0].dims[1];
} else {
printf("model is NHWC input fmt\n");
width = input_attrs[0].dims[1];
height = input_attrs[0].dims[2];
}
printf("model input height=%d, width=%d, channel=%d\n", height, width, channel);
// Load image
CImg<unsigned char> img(image_name);
unsigned char* input_data = NULL;
input_data = load_image(image_name, &img_height, &img_width, &img_channel, &input_attrs[0]);
if (!input_data) {
return -1;
}
rknn_input inputs[1];
memset(inputs, 0, sizeof(inputs));
inputs[0].index = 0;
inputs[0].type = RKNN_TENSOR_UINT8;
inputs[0].size = width * height * channel;
inputs[0].fmt = RKNN_TENSOR_NHWC;
inputs[0].pass_through = 0;
1.4 分配buffer
// DRM alloc buffer
drm_fd = drm_init(&drm_ctx);
drm_buf = drm_buf_alloc(&drm_ctx, drm_fd, img_width, img_height, channel * 8, &buf_fd, &handle, &actual_size);
memcpy(drm_buf, input_data, img_width * img_height * channel);
void* resize_buf = malloc(height * width * channel);
1.5 初始化rga context
// init rga context
RGA_init(&rga_ctx);
img_resize_slow(&rga_ctx, drm_buf, img_width, img_height, resize_buf, width, height);
inputs[0].buf = resize_buf;
gettimeofday(&start_time, NULL);
rknn_inputs_set(ctx, io_num.n_input, inputs);
rknn_output outputs[io_num.n_output];
memset(outputs, 0, sizeof(outputs));
for (int i = 0; i < io_num.n_output; i++) {
outputs[i].want_float = 0;
}
1.6 运行模型
ret = rknn_run(ctx, NULL);
ret = rknn_outputs_get(ctx, io_num.n_output, outputs, NULL);
gettimeofday(&stop_time, NULL);
printf("once run use %f ms\n", (__get_us(stop_time) - __get_us(start_time)) / 1000);
1.7 后处理
float scale_w = (float)width / img_width;
float scale_h = (float)height / img_height;
detect_result_group_t detect_result_group;
std::vector<float> out_scales;
std::vector<uint32_t> out_zps;
for (int i = 0; i < io_num.n_output; ++i) {
out_scales.push_back(output_attrs[i].scale);
out_zps.push_back(output_attrs[i].zp);
}
post_process((uint8_t*)outputs[0].buf, (uint8_t*)outputs[1].buf, (uint8_t*)outputs[2].buf, height, width,
box_conf_threshold, nms_threshold, scale_w, scale_h, out_zps, out_scales, &detect_result_group);
1.8 画框
// Draw Objects
char text[256];
const unsigned char blue[] = {0, 0, 255};
const unsigned char white[] = {255, 255, 255};
for (int i = 0; i < detect_result_group.count; i++) {
detect_result_t* det_result = &(detect_result_group.results[i]);
sprintf(text, "%s %.2f", det_result->name, det_result->prop);
printf("%s @ (%d %d %d %d) %f\n", det_result->name, det_result->box.left, det_result->box.top,
det_result->box.right, det_result->box.bottom, det_result->prop);
int x1 = det_result->box.left;
int y1 = det_result->box.top;
int x2 = det_result->box.right;
int y2 = det_result->box.bottom;
// draw box
img.draw_rectangle(x1, y1, x2, y2, blue, 1, ~0U);
img.draw_text(x1, y1 - 12, text, white);
}
1.9 释放资源
ret = rknn_outputs_release(ctx, io_num.n_output, outputs);
// release
ret = rknn_destroy(ctx);
drm_buf_destroy(&drm_ctx, drm_fd, buf_fd, handle, drm_buf, actual_size);
drm_deinit(&drm_ctx, drm_fd);
RGA_deinit(&rga_ctx);
if (model_data) {
free(model_data);
}
if (resize_buf) {
free(resize_buf);
}
stbi_image_free(input_data);
二、板端打印
[root@RV1126_RV1109:/userdata/fuhang/rknn_yolov5_demo/install/rknn_yolov5_demo]# ./rknn_yolov5_demo model/rv1109_rv1126/yolov5s_relu_rv1109_rv1126_out_opt.rknn mode
l/bus.bmp
post process config: box_conf_threshold = 0.50, nms_threshold = 0.60
Loading mode...
sdk version: librknn_runtime version 1.6.1 (fa099c6 build: 2021-04-25 10:56:29 base: 1126) driver version: 6.4.3.5.293908
model input num: 1, output num: 3
index=0, name=images_165, n_dims=4, dims=[1, 3, 640, 640], n_elems=1228800, size=1228800, fmt=NCHW, type=UINT8, qnt_type=AFFINE, zp=0, scale=0.003922
index=0, name=Conv_Conv_159/out0_0, n_dims=4, dims=[1, 255, 80, 80], n_elems=1632000, size=1632000, fmt=NCHW, type=UINT8, qnt_type=AFFINE, zp=187, scale=0.127843
index=1, name=Conv_Conv_160/out0_1, n_dims=4, dims=[1, 255, 40, 40], n_elems=408000, size=408000, fmt=NCHW, type=UINT8, qnt_type=AFFINE, zp=182, scale=0.113217
index=2, name=Conv_Conv_161/out0_2, n_dims=4, dims=[1, 255, 20, 20], n_elems=102000, size=102000, fmt=NCHW, type=UINT8, qnt_type=AFFINE, zp=172, scale=0.103272
model is NCHW input fmt
model input height=640, width=640, channel=3
Rga built version:1.04 356036d+2022-06-20 16:58:30
once run use 88.869000 ms
loadLabelName ./model/coco_80_labels_list.txt
person @ (478 248 559 520) 0.999287
person @ (105 239 224 532) 0.998786
person @ (82 338 119 512) 0.992262
person @ (212 242 285 514) 0.991466
bus @ (104 131 557 441) 0.960888
三、注意事项:
使用rknn-toolkit版本大于等于1.7.0。
本Demo只支持8比特非对称量化的rknn模型推理。
切换成自己训练的模型时,请注意对齐anchor等后处理参数,否则会导致后处理解析出错。
官网和rk预训练模型都是检测80类的目标,如果自己训练的模型,自行更改include/postprocess.h中的OBJ_CLASS_NUM以及NMS_THRESH,BOX_THRESH后处理参数后再编译。
由于硬件限制,该demo的模型默认把 yolov5 模型的后处理部分,移至cpu实现。本demo附带的模型均使用relu为激活函数,相比silu激活函数精度略微下降,推理速度更快。
关于加载时间:model目录下均是预编译rknn模型,加载速度比非预编译rknn模型快。convert_rknn_demo目录下的转换脚本生成非预编译rknn模型,如需重新生成预编译rknn模型,请参考rknn-toolkit的User Guide.
版权归原作者 月光下的麦克 所有, 如有侵权,请联系我们删除。