1、下载模型转换项目rknn_model_zoo#

github链接:https://github.com/airockchip/rknn_model_zoo/tree/main
2、在板子(瑞芯微)配置后对应的运行环境#

**rknn-toolkit2:**https://github.com/airockchip/rknn-toolkit2
OpenCV
Boost
Eigen3
3、使用该项目下对应的后处理postprocess代码(很重要!!!)#

这里是以yolo的姿态检测为例:
进入路径 /rknn_model_zoo/examples/yolov8_pose/cpp/
该路径下的代码都是在瑞芯微板子中运行目标姿态检测的代码
示例代码如下:
1
// 1、rknn初始化使用
2
#include <stdio.h>
3
#include <stdlib.h>
4
#include <string.h>
5
#include <math.h>
6

7
#include "yolov8-pose.h"
8
#include "common.h"
9
#include "file_utils.h"
10
#include "image_utils.h"
11

12
#include <sys/time.h>
13

14
static inline int64_t getCurrentTimeUs()
15
{
16
  struct timeval tv;
17
  gettimeofday(&tv, NULL);
18
  return tv.tv_sec * 1000000 + tv.tv_usec;
19
}
20

21
static void dump_tensor_attr(rknn_tensor_attr *attr)
22
{
23
    printf("  index=%d, name=%s, n_dims=%d, dims=[%d, %d, %d, %d], n_elems=%d, size=%d, fmt=%s, type=%s, qnt_type=%s, "
24
           "zp=%d, scale=%f\n",
25
           attr->index, attr->name, attr->n_dims, attr->dims[0], attr->dims[1], attr->dims[2], attr->dims[3],
26
           attr->n_elems, attr->size, get_format_string(attr->fmt), get_type_string(attr->type),
27
           get_qnt_type_string(attr->qnt_type), attr->zp, attr->scale);
28
}
29

30
int init_yolov8_pose_model(const char *model_path, rknn_app_context_t *app_ctx)
31
{
32
    int ret;
33
    rknn_context ctx = 0;
34

35
    ret = rknn_init(&ctx, (char *)model_path, 0, 0, NULL);
36
    if (ret < 0)
37
    {
38
        printf("rknn_init fail! ret=%d\n", ret);
39
        return -1;
40
    }
41

42
    // Get Model Input Output Number
43
    rknn_input_output_num io_num;
44
    ret = rknn_query(ctx, RKNN_QUERY_IN_OUT_NUM, &io_num, sizeof(io_num));
45
    if (ret != RKNN_SUCC)
46
    {
47
        printf("rknn_query fail! ret=%d\n", ret);
48
        return -1;
49
    }
50
    printf("model input num: %d, output num: %d\n", io_num.n_input, io_num.n_output);
51

52
    // Get Model Input Info
53
    printf("input tensors:\n");
54
    rknn_tensor_attr input_attrs[io_num.n_input];
55
    memset(input_attrs, 0, sizeof(input_attrs));
56
    for (int i = 0; i < io_num.n_input; i++)
57
    {
58
        input_attrs[i].index = i;
59
        ret = rknn_query(ctx, RKNN_QUERY_INPUT_ATTR, &(input_attrs[i]), sizeof(rknn_tensor_attr));
60
        if (ret != RKNN_SUCC)
61
        {
62
            printf("rknn_query fail! ret=%d\n", ret);
63
            return -1;
64
        }
65
        dump_tensor_attr(&(input_attrs[i]));
66
    }
67

68
    // Get Model Output Info
69
    printf("output tensors:\n");
70
    rknn_tensor_attr output_attrs[io_num.n_output];
71
    memset(output_attrs, 0, sizeof(output_attrs));
72
    for (int i = 0; i < io_num.n_output; i++)
73
    {
74
        output_attrs[i].index = i;
75
        ret = rknn_query(ctx, RKNN_QUERY_OUTPUT_ATTR, &(output_attrs[i]), sizeof(rknn_tensor_attr));
76
        if (ret != RKNN_SUCC)
77
        {
78
            printf("rknn_query fail! ret=%d\n", ret);
79
            return -1;
80
        }
81
        dump_tensor_attr(&(output_attrs[i]));
82
    }
83

84
    // Set to context
85
    app_ctx->rknn_ctx = ctx;
86

87
    // TODO
88
    if (output_attrs[0].qnt_type == RKNN_TENSOR_QNT_AFFINE_ASYMMETRIC && output_attrs[0].type != RKNN_TENSOR_FLOAT16)
89
    {
90
        app_ctx->is_quant = true;
91
    }
92
    else
93
    {
94
        app_ctx->is_quant = false;
95
    }
96

97
    app_ctx->io_num = io_num;
98
    app_ctx->input_attrs = (rknn_tensor_attr *)malloc(io_num.n_input * sizeof(rknn_tensor_attr));
99
    memcpy(app_ctx->input_attrs, input_attrs, io_num.n_input * sizeof(rknn_tensor_attr));
100
    app_ctx->output_attrs = (rknn_tensor_attr *)malloc(io_num.n_output * sizeof(rknn_tensor_attr));
101
    memcpy(app_ctx->output_attrs, output_attrs, io_num.n_output * sizeof(rknn_tensor_attr));
102

103
    if (input_attrs[0].fmt == RKNN_TENSOR_NCHW)
104
    {
105
        printf("model is NCHW input fmt\n");
106
        app_ctx->model_channel = input_attrs[0].dims[1];
107
        app_ctx->model_height = input_attrs[0].dims[2];
108
        app_ctx->model_width = input_attrs[0].dims[3];
109
    }
110
    else
111
    {
112
        printf("model is NHWC input fmt\n");
113
        app_ctx->model_height = input_attrs[0].dims[1];
114
        app_ctx->model_width = input_attrs[0].dims[2];
115
        app_ctx->model_channel = input_attrs[0].dims[3];
116
    }
117
    printf("model input height=%d, width=%d, channel=%d\n",
118
           app_ctx->model_height, app_ctx->model_width, app_ctx->model_channel);
119

120
    return 0;
121
}
122

123
int release_yolov8_pose_model(rknn_app_context_t *app_ctx)
124
{
125
    if (app_ctx->input_attrs != NULL)
126
    {
127
        free(app_ctx->input_attrs);
128
        app_ctx->input_attrs = NULL;
129
    }
130
    if (app_ctx->output_attrs != NULL)
131
    {
132
        free(app_ctx->output_attrs);
133
        app_ctx->output_attrs = NULL;
134
    }
135
    if (app_ctx->rknn_ctx != 0)
136
    {
137
        rknn_destroy(app_ctx->rknn_ctx);
138
        app_ctx->rknn_ctx = 0;
139
    }
140
    return 0;
141
}
142

143
int inference_yolov8_pose_model(rknn_app_context_t *app_ctx, image_buffer_t *img, object_detect_result_list *od_results)
144
{
145
    int ret;
146
    image_buffer_t dst_img;
147
    letterbox_t letter_box;
148
    rknn_input inputs[app_ctx->io_num.n_input];
149
    rknn_output outputs[app_ctx->io_num.n_output];
150
    const float nms_threshold = NMS_THRESH;      // Default NMS threshold
151
    const float box_conf_threshold = BOX_THRESH; // Default box threshold
152
    int bg_color = 114;
153

154
    if ((!app_ctx) || !(img) || (!od_results))
155
    {
156
        return -1;
157
    }
158

159
    memset(od_results, 0x00, sizeof(*od_results));
160
    memset(&letter_box, 0, sizeof(letterbox_t));
161
    memset(&dst_img, 0, sizeof(image_buffer_t));
162
    memset(inputs, 0, sizeof(inputs));
163
    memset(outputs, 0, sizeof(outputs));
164

165
    // Pre Process
166
    dst_img.width = app_ctx->model_width;
167
    dst_img.height = app_ctx->model_height;
168
    dst_img.format = IMAGE_FORMAT_RGB888;
169
    dst_img.size = get_image_size(&dst_img);
170
    dst_img.virt_addr = (unsigned char *)malloc(dst_img.size);
171
    if (dst_img.virt_addr == NULL)
172
    {
173
        printf("malloc buffer size:%d fail!\n", dst_img.size);
174
        goto out;
175
    }
176

177
    // letterbox
178
    ret = convert_image_with_letterbox(img, &dst_img, &letter_box, bg_color);
179
    if (ret < 0)
180
    {
181
        printf("convert_image_with_letterbox fail! ret=%d\n", ret);
182
        goto out;
183
    }
184
    // Set Input Data
185
    inputs[0].index = 0;
186
    inputs[0].type = RKNN_TENSOR_UINT8;
187
    inputs[0].fmt = RKNN_TENSOR_NHWC;
188
    inputs[0].size = app_ctx->model_width * app_ctx->model_height * app_ctx->model_channel;
189
    inputs[0].buf = dst_img.virt_addr;
190

191
    ret = rknn_inputs_set(app_ctx->rknn_ctx, app_ctx->io_num.n_input, inputs);
192
    if (ret < 0)
193
    {
194
        printf("rknn_input_set fail! ret=%d\n", ret);
195
        goto out;
196
    }
197

198
    // Run
199
    printf("rknn_run\n");
200
    int start_us,end_us;
201
    start_us = getCurrentTimeUs();
202
    ret = rknn_run(app_ctx->rknn_ctx, nullptr);
203
    end_us = getCurrentTimeUs() - start_us;
204
    printf("rknn_run time=%.2fms, FPS = %.2f\n",end_us / 1000.f,
205
            1000.f * 1000.f / end_us);
206

207
    if (ret < 0)
208
    {
209
        printf("rknn_run fail! ret=%d\n", ret);
210
        goto out;
211
    }
212

213
    // Get Output
214
    memset(outputs, 0, sizeof(outputs));
215
    for (int i = 0; i < app_ctx->io_num.n_output; i++)
216
    {
217
        outputs[i].index = i;
218
        outputs[i].want_float = (!app_ctx->is_quant);
219
    }
220
    ret = rknn_outputs_get(app_ctx->rknn_ctx, app_ctx->io_num.n_output, outputs, NULL);
221
    if (ret < 0)
222
    {
223
        printf("rknn_outputs_get fail! ret=%d\n", ret);
224
        goto out;
225
    }
226
    // Post Process
227
    start_us = getCurrentTimeUs();
228
    post_process(app_ctx, outputs, &letter_box, box_conf_threshold, nms_threshold, od_results);
229
    end_us = getCurrentTimeUs() - start_us;
230
    printf("post_process time=%.2fms, FPS = %.2f\n",end_us / 1000.f,
231
            1000.f * 1000.f / end_us);
232
    // Remeber to release rknn output
233
    rknn_outputs_release(app_ctx->rknn_ctx, app_ctx->io_num.n_output, outputs);
234

235
out:
236
    if (dst_img.virt_addr != NULL)
237
    {
238
        free(dst_img.virt_addr);
239
    }
240

241
    return ret;
242
}
243

244
// 2、核心后处理代码(适配rk3588的代码,也就是RKNPU2)
245
#include "yolov8-pose.h"
246
#include <math.h>
247
#include <stdint.h>
248
#include <stdio.h>
249
#include <stdlib.h>
250
#include <string.h>
251
#include <sys/time.h>
252

253
#include "Float16.h"
254

255
#include <set>
256
#include <vector>
257
#define LABEL_NALE_TXT_PATH "/home/app/bin/yolov8_pose_labels_list.txt"
258

259
static char *labels[OBJ_CLASS_NUM];
260

261
inline static int clamp(float val, int min, int max) { return val > min ? (val < max ? val : max) : min; }
262

263
static char *readLine(FILE *fp, char *buffer, int *len) {
264
    int ch;
265
    int i = 0;
266
    size_t buff_len = 0;
267

268
    buffer = (char *)malloc(buff_len + 1);
269
    if (!buffer)
270
        return NULL; // Out of memory
271

272
    while ((ch = fgetc(fp)) != '\n' && ch != EOF) {
273
        buff_len++;
274
        void *tmp = realloc(buffer, buff_len + 1);
275
        if (tmp == NULL) {
276
            free(buffer);
277
            return NULL; // Out of memory
278
        }
279
        buffer = (char *)tmp;
280

281
        buffer[i] = (char)ch;
282
        i++;
283
    }
284
    buffer[i] = '\0';
285

286
    *len = buff_len;
287

288
    // Detect end
289
    if (ch == EOF && (i == 0 || ferror(fp))) {
290
        free(buffer);
291
        return NULL;
292
    }
293
    return buffer;
294
}
295

296
static int readLines(const char *fileName, char *lines[], int max_line) {
297
    FILE *file = fopen(fileName, "r");
298
    char *s;
299
    int i = 0;
300
    int n = 0;
301

302
    if (file == NULL) {
303
        printf("Open %s fail!\n", fileName);
304
        return -1;
305
    }
306

307
    while ((s = readLine(file, s, &n)) != NULL) {
308
        lines[i++] = s;
309
        if (i >= max_line)
310
            break;
311
    }
312
    fclose(file);
313
    return i;
314
}
315

316
static int loadLabelName(const char *locationFilename, char *label[]) {
317
    printf("load lable %s\n", locationFilename);
318
    readLines(locationFilename, label, OBJ_CLASS_NUM);
319
    return 0;
320
}
321

322
static float CalculateOverlap(float xmin0, float ymin0, float xmax0, float ymax0, float xmin1, float ymin1, float xmax1,
323
                              float ymax1)
324
{
325
    float w = fmax(0.f, fmin(xmax0, xmax1) - fmax(xmin0, xmin1) + 1.0);
326
    float h = fmax(0.f, fmin(ymax0, ymax1) - fmax(ymin0, ymin1) + 1.0);
327
    float i = w * h;
328
    float u = (xmax0 - xmin0 + 1.0) * (ymax0 - ymin0 + 1.0) + (xmax1 - xmin1 + 1.0) * (ymax1 - ymin1 + 1.0) - i;
329
    return u <= 0.f ? 0.f : (i / u);
330
}
331

332
static int nms(int validCount, std::vector<float> &outputLocations, std::vector<int> classIds, std::vector<int> &order,
333
               int filterId, float threshold)
334
{
335
    for (int i = 0; i < validCount; ++i)
336
    {
337
        int n = order[i];
338
        if (n == -1 || classIds[n] != filterId)
339
        {
340
            continue;
341
        }
342
        for (int j = i + 1; j < validCount; ++j)
343
        {
344
            int m = order[j];
345
            if (m == -1 || classIds[m] != filterId)
346
            {
347
                continue;
348
            }
349
            float xmin0 = outputLocations[n * 5 + 0];
350
            float ymin0 = outputLocations[n * 5 + 1];
351
            float xmax0 = outputLocations[n * 5 + 0] + outputLocations[n * 5 + 2];
352
            float ymax0 = outputLocations[n * 5 + 1] + outputLocations[n * 5 + 3];
353

354
            float xmin1 = outputLocations[m * 5 + 0];
355
            float ymin1 = outputLocations[m * 5 + 1];
356
            float xmax1 = outputLocations[m * 5 + 0] + outputLocations[m * 5 + 2];
357
            float ymax1 = outputLocations[m * 5 + 1] + outputLocations[m * 5 + 3];
358

359
            float iou = CalculateOverlap(xmin0, ymin0, xmax0, ymax0, xmin1, ymin1, xmax1, ymax1);
360

361
            if (iou > threshold)
362
            {
363
                order[j] = -1;
364
            }
365
        }
366
    }
367
    return 0;
368
}
369

370
static int quick_sort_indice_inverse(std::vector<float> &input, int left, int right, std::vector<int> &indices) {
371
    float key;
372
    int key_index;
373
    int low = left;
374
    int high = right;
375
    if (left < right) {
376
        key_index = indices[left];
377
        key = input[left];
378
        while (low < high) {
379
            while (low < high && input[high] <= key) {
380
                high--;
381
            }
382
            input[low] = input[high];
383
            indices[low] = indices[high];
384
            while (low < high && input[low] >= key) {
385
                low++;
386
            }
387
            input[high] = input[low];
388
            indices[high] = indices[low];
389
        }
390
        input[low] = key;
391
        indices[low] = key_index;
392
        quick_sort_indice_inverse(input, left, low - 1, indices);
393
        quick_sort_indice_inverse(input, low + 1, right, indices);
394
    }
395
    return low;
396
}
397

398
static float sigmoid(float x) {
399
    return 1.0 / (1.0 + expf(-x));
400
}
401

402
static float unsigmoid(float y) {
403
    return -1.0 * logf((1.0 / y) - 1.0);
404
}
405

406
inline static int32_t __clip(float val, float min, float max) {
407
    float f = val <= min ? min : (val >= max ? max : val);
408
    return f;
409
}
410

411
static int8_t qnt_f32_to_affine(float f32, int32_t zp, float scale) {
412
    float dst_val = (f32 / scale) + zp;
413
    int8_t res = (int8_t)__clip(dst_val, -128, 127);
414
    return res;
415
}
416

417
static uint8_t qnt_f32_to_affine_u8(float f32, int32_t zp, float scale) {
418
    float dst_val = (f32 / scale) + zp;
419
    uint8_t res = (uint8_t)__clip(dst_val, 0, 255);
420
    return res;
421
}
422

423
static float deqnt_affine_to_f32(int8_t qnt, int32_t zp, float scale) {
424
    return ((float)qnt - (float)zp) * scale;
425
}
426
static float deqnt_affine_u8_to_f32(uint8_t qnt, int32_t zp, float scale) {
427
    return ((float)qnt - (float)zp) * scale;
428
}
429

430
void softmax(float *input, int size) {
431
    float max_val = input[0];
432
    for (int i = 1; i < size; ++i) {
433
        if (input[i] > max_val) {
434
            max_val = input[i];
435
        }
436
    }
437

438
    float sum_exp = 0.0;
439
    for (int i = 0; i < size; ++i) {
440
        sum_exp += expf(input[i] - max_val);
441
    }
442

443
    for (int i = 0; i < size; ++i) {
444
        input[i] = expf(input[i] - max_val) / sum_exp;
445
    }
446
}
447

448
static int process_i8(int8_t *input, int grid_h, int grid_w, int stride,
449
                      std::vector<float> &boxes, std::vector<float> &boxScores, std::vector<int> &classId, float threshold,
450
                      int32_t zp, float scale, int index) {
451
    int input_loc_len = 64;
452
    int tensor_len = input_loc_len + OBJ_CLASS_NUM;
453
    int validCount = 0;
454

455
    int8_t thres_i8 = qnt_f32_to_affine(unsigmoid(threshold), zp, scale);
456
    for (int h = 0; h < grid_h; h++) {
457
        for (int w = 0; w < grid_w; w++) {
458
            for (int a = 0; a < OBJ_CLASS_NUM; a++) {
459
                if(input[(input_loc_len + a)*grid_w * grid_h + h * grid_w + w ] >= thres_i8) { //[1,tensor_len,grid_h,grid_w]
460
                    float box_conf_f32 = sigmoid(deqnt_affine_to_f32(input[(input_loc_len + a) * grid_w * grid_h + h * grid_w + w ],
461
                                                 zp, scale));
462
                    float loc[input_loc_len];
463
                    for (int i = 0; i < input_loc_len; ++i) {
464
                        loc[i] = deqnt_affine_to_f32(input[i * grid_w * grid_h + h * grid_w + w], zp, scale);
465
                    }
466

467
                    for (int i = 0; i < input_loc_len / 16; ++i) {
468
                        softmax(&loc[i * 16], 16);
469
                    }
470
                    float xywh_[4] = {0, 0, 0, 0};
471
                    float xywh[4] = {0, 0, 0, 0};
472
                    for (int dfl = 0; dfl < 16; ++dfl) {
473
                        xywh_[0] += loc[dfl] * dfl;
474
                        xywh_[1] += loc[1 * 16 + dfl] * dfl;
475
                        xywh_[2] += loc[2 * 16 + dfl] * dfl;
476
                        xywh_[3] += loc[3 * 16 + dfl] * dfl;
477
                    }
478
                    xywh_[0]=(w+0.5)-xywh_[0];
479
                    xywh_[1]=(h+0.5)-xywh_[1];
480
                    xywh_[2]=(w+0.5)+xywh_[2];
481
                    xywh_[3]=(h+0.5)+xywh_[3];
482
                    xywh[0]=((xywh_[0]+xywh_[2])/2)*stride;
483
                    xywh[1]=((xywh_[1]+xywh_[3])/2)*stride;
484
                    xywh[2]=(xywh_[2]-xywh_[0])*stride;
485
                    xywh[3]=(xywh_[3]-xywh_[1])*stride;
486
                    xywh[0]=xywh[0]-xywh[2]/2;
487
                    xywh[1]=xywh[1]-xywh[3]/2;
488
                    boxes.push_back(xywh[0]);//x
489
                    boxes.push_back(xywh[1]);//y
490
                    boxes.push_back(xywh[2]);//w
491
                    boxes.push_back(xywh[3]);//h
492
                    boxes.push_back(float(index + (h * grid_w) + w));//keypoints index
493
                    boxScores.push_back(box_conf_f32);
494
                    classId.push_back(a);
495
                    validCount++;
496
                }
497
            }
498
        }
499
    }
500
    return validCount;
501
}
502

503
static int process_u8(uint8_t *input, int grid_h, int grid_w, int stride,
504
                      std::vector<float> &boxes, std::vector<float> &boxScores, std::vector<int> &classId, float threshold,
505
                      int32_t zp, float scale, int index) {
506
    int input_loc_len = 64;
507
    int tensor_len = input_loc_len + OBJ_CLASS_NUM;
508
    int validCount = 0;
509

510
    uint8_t thres_i8 = qnt_f32_to_affine_u8(unsigmoid(threshold), zp, scale);
511
    for (int h = 0; h < grid_h; h++) {
512
        for (int w = 0; w < grid_w; w++) {
513
            for (int a = 0; a < OBJ_CLASS_NUM; a++) {
514
                if(input[(input_loc_len + a)*grid_w * grid_h + h * grid_w + w ] >= thres_i8) { //[1,tensor_len,grid_h,grid_w]
515
                    float box_conf_f32 = sigmoid(deqnt_affine_u8_to_f32(input[(input_loc_len + a) * grid_w * grid_h + h * grid_w + w ],
516
                                                 zp, scale));
517
                    float loc[input_loc_len];
518
                    for (int i = 0; i < input_loc_len; ++i) {
519
                        loc[i] = deqnt_affine_u8_to_f32(input[i * grid_w * grid_h + h * grid_w + w], zp, scale);
520
                    }
521

522
                    for (int i = 0; i < input_loc_len / 16; ++i) {
523
                        softmax(&loc[i * 16], 16);
524
                    }
525
                    float xywh_[4] = {0, 0, 0, 0};
526
                    float xywh[4] = {0, 0, 0, 0};
527
                    for (int dfl = 0; dfl < 16; ++dfl) {
528
                        xywh_[0] += loc[dfl] * dfl;
529
                        xywh_[1] += loc[1 * 16 + dfl] * dfl;
530
                        xywh_[2] += loc[2 * 16 + dfl] * dfl;
531
                        xywh_[3] += loc[3 * 16 + dfl] * dfl;
532
                    }
533
                    xywh_[0]=(w+0.5)-xywh_[0];
534
                    xywh_[1]=(h+0.5)-xywh_[1];
535
                    xywh_[2]=(w+0.5)+xywh_[2];
536
                    xywh_[3]=(h+0.5)+xywh_[3];
537
                    xywh[0]=((xywh_[0]+xywh_[2])/2)*stride;
538
                    xywh[1]=((xywh_[1]+xywh_[3])/2)*stride;
539
                    xywh[2]=(xywh_[2]-xywh_[0])*stride;
540
                    xywh[3]=(xywh_[3]-xywh_[1])*stride;
541
                    xywh[0]=xywh[0]-xywh[2]/2;
542
                    xywh[1]=xywh[1]-xywh[3]/2;
543
                    boxes.push_back(xywh[0]);//x
544
                    boxes.push_back(xywh[1]);//y
545
                    boxes.push_back(xywh[2]);//w
546
                    boxes.push_back(xywh[3]);//h
547
                    boxes.push_back(float(index + (h * grid_w) + w));//keypoints index
548
                    boxScores.push_back(box_conf_f32);
549
                    classId.push_back(a);
550
                    validCount++;
551
                }
552
            }
553
        }
554
    }
555
    return validCount;
556
}
557

558
static int process_fp32(float *input, int grid_h, int grid_w, int stride,
559
                      std::vector<float> &boxes, std::vector<float> &boxScores, std::vector<int> &classId, float threshold,
560
                      int32_t zp, float scale, int index) {
561
    int input_loc_len = 64;
562
    int tensor_len = input_loc_len + OBJ_CLASS_NUM;
563
    int validCount = 0;
564
    float thres_fp = unsigmoid(threshold);
565
    for (int h = 0; h < grid_h; h++) {
566
        for (int w = 0; w < grid_w; w++) {
567
            for (int a = 0; a < OBJ_CLASS_NUM; a++) {
568
                if(input[(input_loc_len + a)*grid_w * grid_h + h * grid_w + w ] >= thres_fp) { //[1,tensor_len,grid_h,grid_w]
569
                    float box_conf_f32 = sigmoid(input[(input_loc_len + a) * grid_w * grid_h + h * grid_w + w ]);
570
                    float loc[input_loc_len];
571
                    for (int i = 0; i < input_loc_len; ++i) {
572
                        loc[i] = input[i * grid_w * grid_h + h * grid_w + w];
573
                    }
574

575
                    for (int i = 0; i < input_loc_len / 16; ++i) {
576
                        softmax(&loc[i * 16], 16);
577
                    }
578
                    float xywh_[4] = {0, 0, 0, 0};
579
                    float xywh[4] = {0, 0, 0, 0};
580
                    for (int dfl = 0; dfl < 16; ++dfl) {
581
                        xywh_[0] += loc[dfl] * dfl;
582
                        xywh_[1] += loc[1 * 16 + dfl] * dfl;
583
                        xywh_[2] += loc[2 * 16 + dfl] * dfl;
584
                        xywh_[3] += loc[3 * 16 + dfl] * dfl;
585
                    }
586
                    xywh_[0]=(w+0.5)-xywh_[0];
587
                    xywh_[1]=(h+0.5)-xywh_[1];
588
                    xywh_[2]=(w+0.5)+xywh_[2];
589
                    xywh_[3]=(h+0.5)+xywh_[3];
590
                    xywh[0]=((xywh_[0]+xywh_[2])/2)*stride;
591
                    xywh[1]=((xywh_[1]+xywh_[3])/2)*stride;
592
                    xywh[2]=(xywh_[2]-xywh_[0])*stride;
593
                    xywh[3]=(xywh_[3]-xywh_[1])*stride;
594
                    xywh[0]=xywh[0]-xywh[2]/2;
595
                    xywh[1]=xywh[1]-xywh[3]/2;
596
                    boxes.push_back(xywh[0]);//x
597
                    boxes.push_back(xywh[1]);//y
598
                    boxes.push_back(xywh[2]);//w
599
                    boxes.push_back(xywh[3]);//h
600
                    boxes.push_back(float(index + (h * grid_w) + w));//keypoints index
601
                    boxScores.push_back(box_conf_f32);
602
                    classId.push_back(a);
603
                    validCount++;
604
                }
605
            }
606
        }
607
    }
608
    return validCount;
609
}
610

611
int post_process(rknn_app_context_t *app_ctx, rknn_output *outputs, letterbox_t *letter_box, float conf_threshold, float nms_threshold, object_detect_result_list *od_results)
612
{
613
    std::vector<float> filterBoxes;
614
    std::vector<float> objProbs;
615
    std::vector<int> classId;
616
    int validCount = 0;
617
    int stride = 0;
618
    int grid_h = 0;
619
    int grid_w = 0;
620
    int model_in_w = app_ctx->model_width;
621
    int model_in_h = app_ctx->model_height;
622
    memset(od_results, 0, sizeof(object_detect_result_list));
623
    int index = 0;
624

625
    // 处理三个输出层
626
    for (int i = 0; i < 3; i++) {
627
        grid_h = app_ctx->output_attrs[i].dims[2];
628
        grid_w = app_ctx->output_attrs[i].dims[3];
629
        stride = model_in_h / grid_h;
630
        if (app_ctx->is_quant) {
631
            validCount += process_i8((int8_t *)outputs[i].buf, grid_h, grid_w, stride, filterBoxes, objProbs,
632
                                     classId, conf_threshold, app_ctx->output_attrs[i].zp, app_ctx->output_attrs[i].scale,index);
633
        }
634
        else
635
        {
636
            validCount += process_fp32((float *)outputs[i].buf, grid_h, grid_w, stride, filterBoxes, objProbs,
637
                                     classId, conf_threshold, app_ctx->output_attrs[i].zp, app_ctx->output_attrs[i].scale, index);
638
        }
639
        index += grid_h * grid_w;
640
    }
641

642
    // no object detect
643
    if (validCount <= 0) {
644
        return 0;
645
    }
646
    std::vector<int> indexArray;
647
    for (int i = 0; i < validCount; ++i) {
648
        indexArray.push_back(i);
649
    }
650
    quick_sort_indice_inverse(objProbs, 0, validCount - 1, indexArray);
651

652
    std::set<int> class_set(std::begin(classId), std::end(classId));
653

654
    for (auto c : class_set) {
655
        nms(validCount, filterBoxes, classId, indexArray, c, nms_threshold);
656
    }
657

658
    int last_count = 0;
659
    od_results->count = 0;
660

661
    /* box valid detect target */
662
    for (int i = 0; i < validCount; ++i) {
663
        if (indexArray[i] == -1 || last_count >= OBJ_NUMB_MAX_SIZE) {
664
            continue;
665
        }
666
        int n = indexArray[i];
667
        float x1 = filterBoxes[n * 5 + 0] - letter_box->x_pad;
668
        float y1 = filterBoxes[n * 5 + 1] - letter_box->y_pad;
669
        float w = filterBoxes[n * 5 + 2];
670
        float h = filterBoxes[n * 5 + 3];
671
        // if (n * 5 + 4 >= filterBoxes.size()) {
672
        //     fprintf(stderr, "filterBoxes access out of bound\n");
673
        //     return -1;
674
        // }
675
        int keypoints_index = (int)filterBoxes[n * 5 + 4];
676

677
        // if (outputs[3].buf == nullptr) {
678
        //     fprintf(stderr, "Error: outputs[3] is not available or buffer is NULL\n");
679
        //     return -1;
680
        // }
681

682
        for (int j = 0; j < 17; ++j) {
683
            // if (outputs[i].buf == nullptr) {
684
            //     fprintf(stderr, "Error: outputs[%d].buf is NULL\n", i);
685
            //     return -1;
686
            // }
687
            if (app_ctx->is_quant) {
688
                od_results->results[last_count].keypoints[j][0] = ((float)((rknpu2::float16 *)outputs[3].buf)[j*3*8400+0*8400+keypoints_index]
689
                                                                        - letter_box->x_pad)/ letter_box->scale;
690
                od_results->results[last_count].keypoints[j][1] = ((float)((rknpu2::float16 *)outputs[3].buf)[j*3*8400+1*8400+keypoints_index]
691
                                                                            - letter_box->y_pad)/ letter_box->scale;
692
                od_results->results[last_count].keypoints[j][2] = (float)((rknpu2::float16 *)outputs[3].buf)[j*3*8400+2*8400+keypoints_index];
693
            }
694
            else
695
            {
696
                od_results->results[last_count].keypoints[j][0] = (((float *)outputs[3].buf)[j*3*8400+0*8400+keypoints_index]
697
                                                                - letter_box->x_pad)/ letter_box->scale;
698
                od_results->results[last_count].keypoints[j][1] = (((float *)outputs[3].buf)[j*3*8400+1*8400+keypoints_index]
699
                                                                    - letter_box->y_pad)/ letter_box->scale;
700
                od_results->results[last_count].keypoints[j][2] = ((float *)outputs[3].buf)[j*3*8400+2*8400+keypoints_index];
701
            }
702
        }
703

704
        int id = classId[n];
705
        float obj_conf = objProbs[i];
706
        od_results->results[last_count].box.left = (int)(clamp(x1, 0, model_in_w) / letter_box->scale);
707
        od_results->results[last_count].box.top = (int)(clamp(y1, 0, model_in_h) / letter_box->scale);
708
        od_results->results[last_count].box.right = (int)(clamp(x1+w, 0, model_in_w) / letter_box->scale);
709
        od_results->results[last_count].box.bottom = (int)(clamp(y1+h, 0, model_in_h) / letter_box->scale);
710
        // od_results->results[last_count].box.angle = angle;
711
        od_results->results[last_count].prop = obj_conf;
712
        od_results->results[last_count].cls_id = id;
713
        last_count++;
714
    }
715
    od_results->count = last_count;
716
    return 0;
717
}
718

719
int init_post_process() {
720
    int ret = 0;
721
    ret = loadLabelName(LABEL_NALE_TXT_PATH, labels);
722
    if (ret < 0) {
723
        printf("Load %s failed!\n", LABEL_NALE_TXT_PATH);
724
        return -1;
725
    }
726
    return 0;
727
}
728

729
char *coco_cls_to_name(int cls_id) {
730

731
    if (cls_id >= OBJ_CLASS_NUM) {
732
        return "null";
733
    }
734

735
    if (labels[cls_id]) {
736
        return labels[cls_id];
737
    }
738

739
    return "null";
740
}
741

742
void deinit_post_process() {
743
    for (int i = 0; i < OBJ_CLASS_NUM; i++) {
744
        if (labels[i] != nullptr) {
745
            free(labels[i]);
746
            labels[i] = nullptr;
747
        }
748
    }
749
}
音乐

音乐

1、下载模型转换项目rknn_model_zoo#

2、在板子(瑞芯微)配置后对应的运行环境#

3、使用该项目下对应的后处理postprocess代码(很重要!!!)#

4、参考链接:#

支持与分享

音乐

文章目录

音乐

音乐

Yolo_Pose姿态模型在arm linux的验证与部署

1、下载模型转换项目rknn_model_zoo#

2、在板子(瑞芯微)配置后对应的运行环境#

3、使用该项目下对应的后处理postprocess代码(很重要!!!)#

4、参考链接:#

支持与分享

音乐

文章目录