0


rknn_yolov5执行流程

    RV1109上测试rknn_yolov5_demo以及分析

    

一、代码执行流程分析

1.1 加载模型

  /* Create the neural network */
  printf("Loading mode...\n");
  int            model_data_size = 0;
  unsigned char* model_data      = load_model(model_name, &model_data_size);
  ret                            = rknn_init(&ctx, model_data, model_data_size, 0);

1.2 获取输入输出通道属性

    先获取通道个数,再获取通道属性
rknn_input_output_num io_num;
  ret = rknn_query(ctx, RKNN_QUERY_IN_OUT_NUM, &io_num, sizeof(io_num));
  if (ret < 0) {
    printf("rknn_init error ret=%d\n", ret);
    return -1;
  }
  printf("model input num: %d, output num: %d\n", io_num.n_input, io_num.n_output);

  rknn_tensor_attr input_attrs[io_num.n_input];
  memset(input_attrs, 0, sizeof(input_attrs));
  for (int i = 0; i < io_num.n_input; i++) {
    input_attrs[i].index = i;
    ret                  = rknn_query(ctx, RKNN_QUERY_INPUT_ATTR, &(input_attrs[i]), sizeof(rknn_tensor_attr));
    if (ret < 0) {
      printf("rknn_init error ret=%d\n", ret);
      return -1;
    }
    dump_tensor_attr(&(input_attrs[i]));
  }

  rknn_tensor_attr output_attrs[io_num.n_output];
  memset(output_attrs, 0, sizeof(output_attrs));
  for (int i = 0; i < io_num.n_output; i++) {
    output_attrs[i].index = i;
    ret                   = rknn_query(ctx, RKNN_QUERY_OUTPUT_ATTR, &(output_attrs[i]), sizeof(rknn_tensor_attr));
    dump_tensor_attr(&(output_attrs[i]));
    if (output_attrs[i].qnt_type != RKNN_TENSOR_QNT_AFFINE_ASYMMETRIC || output_attrs[i].type != RKNN_TENSOR_UINT8) {
      fprintf(stderr,
              "The Demo required for a Affine asymmetric u8 quantized rknn model, but output quant type is %s, output "
              "data type is %s\n",
              get_qnt_type_string(output_attrs[i].qnt_type), get_type_string(output_attrs[i].type));
      return -1;
    }
  }

1.3 加载源图像

    根据上一步的输入图像数据格式来设置图像宽高
  int channel = 3;
  int width   = 0;
  int height  = 0;
  if (input_attrs[0].fmt == RKNN_TENSOR_NCHW) {
    printf("model is NCHW input fmt\n");
    width  = input_attrs[0].dims[0];
    height = input_attrs[0].dims[1];
  } else {
    printf("model is NHWC input fmt\n");
    width  = input_attrs[0].dims[1];
    height = input_attrs[0].dims[2];
  }

  printf("model input height=%d, width=%d, channel=%d\n", height, width, channel);

  // Load image
  CImg<unsigned char> img(image_name);
  unsigned char*      input_data = NULL;
  input_data                     = load_image(image_name, &img_height, &img_width, &img_channel, &input_attrs[0]);
  if (!input_data) {
    return -1;
  }

  rknn_input inputs[1];
  memset(inputs, 0, sizeof(inputs));
  inputs[0].index        = 0;
  inputs[0].type         = RKNN_TENSOR_UINT8;
  inputs[0].size         = width * height * channel;
  inputs[0].fmt          = RKNN_TENSOR_NHWC;
  inputs[0].pass_through = 0;

1.4 分配buffer

  // DRM alloc buffer
  drm_fd  = drm_init(&drm_ctx);
  drm_buf = drm_buf_alloc(&drm_ctx, drm_fd, img_width, img_height, channel * 8, &buf_fd, &handle, &actual_size);
  memcpy(drm_buf, input_data, img_width * img_height * channel);
  void* resize_buf = malloc(height * width * channel);

1.5 初始化rga context

  // init rga context
  RGA_init(&rga_ctx);
  img_resize_slow(&rga_ctx, drm_buf, img_width, img_height, resize_buf, width, height);
  inputs[0].buf = resize_buf;
  gettimeofday(&start_time, NULL);
  rknn_inputs_set(ctx, io_num.n_input, inputs);

  rknn_output outputs[io_num.n_output];
  memset(outputs, 0, sizeof(outputs));
  for (int i = 0; i < io_num.n_output; i++) {
    outputs[i].want_float = 0;
  }

1.6 运行模型

  ret = rknn_run(ctx, NULL);
  ret = rknn_outputs_get(ctx, io_num.n_output, outputs, NULL);
  gettimeofday(&stop_time, NULL);
  printf("once run use %f ms\n", (__get_us(stop_time) - __get_us(start_time)) / 1000);

1.7 后处理

  float scale_w = (float)width / img_width;
  float scale_h = (float)height / img_height;

  detect_result_group_t detect_result_group;
  std::vector<float>    out_scales;
  std::vector<uint32_t> out_zps;
  for (int i = 0; i < io_num.n_output; ++i) {
    out_scales.push_back(output_attrs[i].scale);
    out_zps.push_back(output_attrs[i].zp);
  }
  post_process((uint8_t*)outputs[0].buf, (uint8_t*)outputs[1].buf, (uint8_t*)outputs[2].buf, height, width,
               box_conf_threshold, nms_threshold, scale_w, scale_h, out_zps, out_scales, &detect_result_group);

1.8 画框

  // Draw Objects
  char                text[256];
  const unsigned char blue[]  = {0, 0, 255};
  const unsigned char white[] = {255, 255, 255};
  for (int i = 0; i < detect_result_group.count; i++) {
    detect_result_t* det_result = &(detect_result_group.results[i]);
    sprintf(text, "%s %.2f", det_result->name, det_result->prop);
    printf("%s @ (%d %d %d %d) %f\n", det_result->name, det_result->box.left, det_result->box.top,
           det_result->box.right, det_result->box.bottom, det_result->prop);
    int x1 = det_result->box.left;
    int y1 = det_result->box.top;
    int x2 = det_result->box.right;
    int y2 = det_result->box.bottom;
    // draw box
    img.draw_rectangle(x1, y1, x2, y2, blue, 1, ~0U);
    img.draw_text(x1, y1 - 12, text, white);
  }

1.9 释放资源

ret = rknn_outputs_release(ctx, io_num.n_output, outputs);

  // release
  ret = rknn_destroy(ctx);
  drm_buf_destroy(&drm_ctx, drm_fd, buf_fd, handle, drm_buf, actual_size);

  drm_deinit(&drm_ctx, drm_fd);
  RGA_deinit(&rga_ctx);
  if (model_data) {
    free(model_data);
  }

  if (resize_buf) {
    free(resize_buf);
  }
  stbi_image_free(input_data);

二、板端打印

[root@RV1126_RV1109:/userdata/fuhang/rknn_yolov5_demo/install/rknn_yolov5_demo]# ./rknn_yolov5_demo model/rv1109_rv1126/yolov5s_relu_rv1109_rv1126_out_opt.rknn mode
l/bus.bmp
post process config: box_conf_threshold = 0.50, nms_threshold = 0.60
Loading mode...
sdk version: librknn_runtime version 1.6.1 (fa099c6 build: 2021-04-25 10:56:29 base: 1126) driver version: 6.4.3.5.293908
model input num: 1, output num: 3
  index=0, name=images_165, n_dims=4, dims=[1, 3, 640, 640], n_elems=1228800, size=1228800, fmt=NCHW, type=UINT8, qnt_type=AFFINE, zp=0, scale=0.003922
  index=0, name=Conv_Conv_159/out0_0, n_dims=4, dims=[1, 255, 80, 80], n_elems=1632000, size=1632000, fmt=NCHW, type=UINT8, qnt_type=AFFINE, zp=187, scale=0.127843
  index=1, name=Conv_Conv_160/out0_1, n_dims=4, dims=[1, 255, 40, 40], n_elems=408000, size=408000, fmt=NCHW, type=UINT8, qnt_type=AFFINE, zp=182, scale=0.113217
  index=2, name=Conv_Conv_161/out0_2, n_dims=4, dims=[1, 255, 20, 20], n_elems=102000, size=102000, fmt=NCHW, type=UINT8, qnt_type=AFFINE, zp=172, scale=0.103272
model is NCHW input fmt
model input height=640, width=640, channel=3
Rga built version:1.04 356036d+2022-06-20 16:58:30
once run use 88.869000 ms
loadLabelName ./model/coco_80_labels_list.txt
person @ (478 248 559 520) 0.999287
person @ (105 239 224 532) 0.998786
person @ (82 338 119 512) 0.992262
person @ (212 242 285 514) 0.991466
bus @ (104 131 557 441) 0.960888

三、注意事项:

  1. 使用rknn-toolkit版本大于等于1.7.0。

  2. 本Demo只支持8比特非对称量化的rknn模型推理。

  3. 切换成自己训练的模型时,请注意对齐anchor等后处理参数,否则会导致后处理解析出错。

  4. 官网和rk预训练模型都是检测80类的目标,如果自己训练的模型,自行更改include/postprocess.h中的OBJ_CLASS_NUM以及NMS_THRESH,BOX_THRESH后处理参数后再编译。

  5. 由于硬件限制,该demo的模型默认把 yolov5 模型的后处理部分,移至cpu实现。本demo附带的模型均使用relu为激活函数,相比silu激活函数精度略微下降,推理速度更快。

  6. 关于加载时间:model目录下均是预编译rknn模型,加载速度比非预编译rknn模型快。convert_rknn_demo目录下的转换脚本生成非预编译rknn模型,如需重新生成预编译rknn模型,请参考rknn-toolkit的User Guide.


本文转载自: https://blog.csdn.net/fuhanga123/article/details/128133713
版权归原作者 月光下的麦克 所有, 如有侵权,请联系我们删除。

“rknn_yolov5执行流程”的评论:

还没有评论