@@ -94,6 +94,7 @@ mean median min input size model
94
94
46.10 47.53 43.06 [ 416, 416] NanoDet with [ 'object_detection_nanodet_2022nov_int8.onnx']
95
95
144.89 149.58 125.71 [ 640, 640] YoloX with [ 'object_detection_yolox_2022nov.onnx']
96
96
143.83 146.39 119.75 [ 640, 640] YoloX with [ 'object_detection_yolox_2022nov_int8.onnx']
97
+ 12.52 14.47 11.63 [ 1280, 720] VitTrack with [ 'object_tracking_vittrack_2023sep.onnx']
97
98
12.99 13.11 12.14 [ 192, 192] MPPalmDet with [ 'palm_detection_mediapipe_2023feb.onnx']
98
99
12.64 12.44 10.82 [ 192, 192] MPPalmDet with [ 'palm_detection_mediapipe_2023feb_int8.onnx']
99
100
12.64 11.83 11.03 [ 224, 224] MPPersonDet with [ 'person_detection_mediapipe_2023mar.onnx']
@@ -148,6 +149,7 @@ mean median min input size model
148
149
212.90 212.93 209.55 [ 416, 416] NanoDet with [ 'object_detection_nanodet_2022nov_int8.onnx']
149
150
1690.06 2303.34 1480.63 [ 640, 640] YoloX with [ 'object_detection_yolox_2022nov.onnx']
150
151
1489.54 1435.48 1308.12 [ 640, 640] YoloX with [ 'object_detection_yolox_2022nov_int8.onnx']
152
+ 90.49 89.23 86.83 [ 1280, 720] VitTrack with [ 'object_tracking_vittrack_2023sep.onnx']
151
153
356.63 357.29 354.42 [ 192, 192] MPPalmDet with [ 'palm_detection_mediapipe_2023feb.onnx']
152
154
217.52 229.39 101.61 [ 192, 192] MPPalmDet with [ 'palm_detection_mediapipe_2023feb_int8.onnx']
153
155
198.63 198.25 196.68 [ 224, 224] MPPersonDet with [ 'person_detection_mediapipe_2023mar.onnx']
@@ -203,6 +205,7 @@ mean median min input size model
203
205
216.18 216.19 214.30 [ 416, 416] NanoDet with [ 'object_detection_nanodet_2022nov_int8.onnx']
204
206
1207.83 1208.71 1203.64 [ 640, 640] YoloX with [ 'object_detection_yolox_2022nov.onnx']
205
207
1236.98 1250.21 1203.64 [ 640, 640] YoloX with [ 'object_detection_yolox_2022nov_int8.onnx']
208
+ 123.30 125.37 116.69 [ 1280, 720] VitTrack with [ 'object_tracking_vittrack_2023sep.onnx']
206
209
124.89 125.25 124.53 [ 192, 192] MPPalmDet with [ 'palm_detection_mediapipe_2023feb.onnx']
207
210
107.99 109.82 94.05 [ 192, 192] MPPalmDet with [ 'palm_detection_mediapipe_2023feb_int8.onnx']
208
211
108.41 108.33 107.91 [ 224, 224] MPPersonDet with [ 'person_detection_mediapipe_2023mar.onnx']
@@ -241,6 +244,7 @@ mean median min input size model
241
244
54.24 55.24 52.87 [ 320, 240] LPD_YuNet with [ 'license_plate_detection_lpd_yunet_2023mar.onnx']
242
245
63.63 63.43 63.32 [ 416, 416] NanoDet with [ 'object_detection_nanodet_2022nov.onnx']
243
246
371.45 378.00 366.39 [ 640, 640] YoloX with [ 'object_detection_yolox_2022nov.onnx']
247
+ 43.06 42.32 39.92 [ 1280, 720] VitTrack with [ 'object_tracking_vittrack_2023sep.onnx']
244
248
33.85 33.90 33.61 [ 192, 192] MPPalmDet with [ 'palm_detection_mediapipe_2023feb.onnx']
245
249
38.16 37.33 37.10 [ 224, 224] MPPersonDet with [ 'person_detection_mediapipe_2023mar.onnx']
246
250
91.65 91.98 89.90 [ 128, 256] YoutuReID with [ 'person_reid_youtu_2021nov.onnx']
@@ -271,6 +275,7 @@ mean median min input size model
271
275
366.46 366.88 363.46 [ 320, 240] LPD_YuNet with [ 'license_plate_detection_lpd_yunet_2023mar.onnx']
272
276
163.06 163.34 161.77 [ 416, 416] NanoDet with [ 'object_detection_nanodet_2022nov.onnx']
273
277
301.10 311.52 297.74 [ 640, 640] YoloX with [ 'object_detection_yolox_2022nov.onnx']
278
+ 43.36 40.65 39.85 [ 1280, 720] VitTrack with [ 'object_tracking_vittrack_2023sep.onnx']
274
279
149.37 149.95 148.01 [ 192, 192] MPPalmDet with [ 'palm_detection_mediapipe_2023feb.onnx']
275
280
153.89 153.96 153.43 [ 224, 224] MPPersonDet with [ 'person_detection_mediapipe_2023mar.onnx']
276
281
44.29 44.03 43.62 [ 128, 256] YoutuReID with [ 'person_reid_youtu_2021nov.onnx']
@@ -318,6 +323,7 @@ mean median min input size model
318
323
212.69 262.75 170.88 [ 416, 416] NanoDet with [ 'object_detection_nanodet_2022nov_int8.onnx']
319
324
1110.87 1112.27 1085.31 [ 640, 640] YoloX with [ 'object_detection_yolox_2022nov.onnx']
320
325
1128.73 1157.12 1085.31 [ 640, 640] YoloX with [ 'object_detection_yolox_2022nov_int8.onnx']
326
+ 67.31 67.41 66.23 [ 1280, 720] VitTrack with [ 'object_tracking_vittrack_2023sep.onnx']
321
327
147.01 144.01 139.27 [ 192, 192] MPPalmDet with [ 'palm_detection_mediapipe_2023feb.onnx']
322
328
119.70 118.95 94.09 [ 192, 192] MPPalmDet with [ 'palm_detection_mediapipe_2023feb_int8.onnx']
323
329
107.63 107.09 105.61 [ 224, 224] MPPersonDet with [ 'person_detection_mediapipe_2023mar.onnx']
@@ -398,6 +404,7 @@ mean median min input size model
398
404
322.98 323.45 312.13 [ 416, 416] NanoDet with [ 'object_detection_nanodet_2022nov_int8.onnx']
399
405
1875.33 1877.53 1871.26 [ 640, 640] YoloX with [ 'object_detection_yolox_2022nov.onnx']
400
406
1989.04 2005.25 1871.26 [ 640, 640] YoloX with [ 'object_detection_yolox_2022nov_int8.onnx']
407
+ 143.62 143.19 137.16 [ 1280, 720] VitTrack with [ 'object_tracking_vittrack_2023sep.onnx']
401
408
159.80 159.62 159.40 [ 192, 192] MPPalmDet with [ 'palm_detection_mediapipe_2023feb.onnx']
402
409
152.18 152.86 145.56 [ 192, 192] MPPalmDet with [ 'palm_detection_mediapipe_2023feb_int8.onnx']
403
410
145.83 145.77 145.45 [ 224, 224] MPPersonDet with [ 'person_detection_mediapipe_2023mar.onnx']
@@ -418,8 +425,10 @@ mean median min input size model
418
425
419
426
NPU (CANN):
420
427
428
+ <!-- vittrack is excluded due to HardSwish is not supported by CANN backend yet -->
429
+
421
430
```
422
- $ python3 benchmark.py --all --fp32 --cfg_exclude wechat: crnn --model_exclude pose_estimation_mediapipe_2023mar.onnx --cfg_overwrite_backend_target 4
431
+ $ python3 benchmark.py --all --fp32 --cfg_exclude wechat:crnn: vittrack --model_exclude pose_estimation_mediapipe_2023mar.onnx --cfg_overwrite_backend_target 4
423
432
Benchmarking ...
424
433
backend=cv.dnn.DNN_BACKEND_CANN
425
434
target=cv.dnn.DNN_TARGET_NPU
@@ -478,6 +487,7 @@ mean median min input size model
478
487
1903.82 1962.71 1533.79 [ 416, 416] NanoDet with [ 'object_detection_nanodet_2022nov_int8.onnx']
479
488
37604.10 37569.30 37502.48 [ 640, 640] YoloX with [ 'object_detection_yolox_2022nov.onnx']
480
489
24229.20 25577.94 13483.54 [ 640, 640] YoloX with [ 'object_detection_yolox_2022nov_int8.onnx']
490
+ 415.72 403.04 399.44 [ 1280, 720] VitTrack with [ 'object_tracking_vittrack_2023sep.onnx']
481
491
1133.44 1131.54 1124.83 [ 192, 192] MPPalmDet with [ 'palm_detection_mediapipe_2023feb.onnx']
482
492
883.96 919.07 655.33 [ 192, 192] MPPalmDet with [ 'palm_detection_mediapipe_2023feb_int8.onnx']
483
493
1430.98 1424.55 1415.68 [ 224, 224] MPPersonDet with [ 'person_detection_mediapipe_2023mar.onnx']
@@ -533,6 +543,7 @@ mean median min input size model
533
543
117.28 150.31 83.33 [ 416, 416] NanoDet with [ 'object_detection_nanodet_2022nov_int8.onnx']
534
544
553.58 558.76 535.47 [ 640, 640] YoloX with [ 'object_detection_yolox_2022nov.onnx']
535
545
594.18 592.64 535.47 [ 640, 640] YoloX with [ 'object_detection_yolox_2022nov_int8.onnx']
546
+ 49.47 49.21 48.84 [ 1280, 720] VitTrack with [ 'object_tracking_vittrack_2023sep.onnx']
536
547
56.35 55.73 55.25 [ 192, 192] MPPalmDet with [ 'palm_detection_mediapipe_2023feb.onnx']
537
548
57.07 57.19 55.25 [ 192, 192] MPPalmDet with [ 'palm_detection_mediapipe_2023feb_int8.onnx']
538
549
47.94 48.41 47.05 [ 224, 224] MPPersonDet with [ 'person_detection_mediapipe_2023mar.onnx']
@@ -587,6 +598,7 @@ mean median min input size model
587
598
406.28 416.58 385.68 [ 416, 416] NanoDet with [ 'object_detection_nanodet_2022nov_int8.onnx']
588
599
2608.90 2612.42 2597.93 [ 640, 640] YoloX with [ 'object_detection_yolox_2022nov.onnx']
589
600
2609.88 2609.39 2597.93 [ 640, 640] YoloX with [ 'object_detection_yolox_2022nov_int8.onnx']
601
+ 189.23 188.72 182.28 [ 1280, 720] VitTrack with [ 'object_tracking_vittrack_2023sep.onnx']
590
602
228.95 228.74 228.35 [ 192, 192] MPPalmDet with [ 'palm_detection_mediapipe_2023feb.onnx']
591
603
227.97 228.61 226.76 [ 192, 192] MPPalmDet with [ 'palm_detection_mediapipe_2023feb_int8.onnx']
592
604
192.29 192.26 191.74 [ 224, 224] MPPersonDet with [ 'person_detection_mediapipe_2023mar.onnx']
@@ -643,6 +655,7 @@ mean median min input size model
643
655
3002.36 3047.94 2655.38 [ 416, 416] NanoDet with [ 'object_detection_nanodet_2022nov_int8.onnx']
644
656
50678.08 50651.82 50651.19 [ 640, 640] YoloX with [ 'object_detection_yolox_2022nov.onnx']
645
657
36249.71 37771.22 24606.37 [ 640, 640] YoloX with [ 'object_detection_yolox_2022nov_int8.onnx']
658
+ 707.79 706.32 699.40 [ 1280, 720] VitTrack with [ 'object_tracking_vittrack_2023sep.onnx']
646
659
1502.15 1501.98 1500.99 [ 192, 192] MPPalmDet with [ 'palm_detection_mediapipe_2023feb.onnx']
647
660
1300.15 1320.44 1137.60 [ 192, 192] MPPalmDet with [ 'palm_detection_mediapipe_2023feb_int8.onnx']
648
661
1993.05 1993.98 1991.86 [ 224, 224] MPPersonDet with [ 'person_detection_mediapipe_2023mar.onnx']
@@ -669,9 +682,8 @@ Specs: [details_cn](https://doc.rvspace.org/VisionFive2/PB/VisionFive_2/specific
669
682
670
683
CPU:
671
684
<!-- config wechat is excluded due to it needs building with opencv_contrib -->
672
- <!-- config vittrack is excluded due to opencv cannot find ffmpeg and its components -->
673
685
```
674
- $ python3 benchmark.py --all --cfg_exclude wechat: vittrack
686
+ $ python3 benchmark.py --all --cfg_exclude wechat
675
687
Benchmarking ...
676
688
backend=cv.dnn.DNN_BACKEND_OPENCV
677
689
target=cv.dnn.DNN_TARGET_CPU
@@ -698,6 +710,7 @@ mean median min input size model
698
710
1434.56 1463.32 1194.57 [ 416, 416] NanoDet with [ 'object_detection_nanodet_2022nov_int8.onnx']
699
711
26172.62 26160.04 26151.67 [ 640, 640] YoloX with [ 'object_detection_yolox_2022nov.onnx']
700
712
17004.06 17909.88 9659.54 [ 640, 640] YoloX with [ 'object_detection_yolox_2022nov_int8.onnx']
713
+ 304.58 309.56 280.05 [ 1280, 720] VitTrack with [ 'object_tracking_vittrack_2023sep.onnx']
701
714
734.97 735.58 733.95 [ 192, 192] MPPalmDet with [ 'palm_detection_mediapipe_2023feb.onnx']
702
715
609.61 621.69 508.04 [ 192, 192] MPPalmDet with [ 'palm_detection_mediapipe_2023feb_int8.onnx']
703
716
961.41 962.26 960.39 [ 224, 224] MPPersonDet with [ 'person_detection_mediapipe_2023mar.onnx']
@@ -715,3 +728,43 @@ mean median min input size model
715
728
1237.00 1395.68 807.66 [ 1280, 720] CRNN with [ 'text_recognition_CRNN_CN_2021nov_int8.onnx']
716
729
1169.59 1415.29 774.09 [ 1280, 720] CRNN with [ 'text_recognition_CRNN_EN_2022oct_int8.onnx']
717
730
```
731
+
732
+ <!--
733
+
734
+ ### Khadas VIM4
735
+
736
+ CPU:
737
+
738
+ ```
739
+ 67.65 67.84 66.39 [ 1280, 720] VitTrack with [ 'object_tracking_vittrack_2023sep.onnx']
740
+ ```
741
+
742
+ ### NVIDIA Jetson Orin Nano
743
+
744
+ CPU:
745
+
746
+ ```
747
+ 59.30 58.45 57.90 [ 1280, 720] VitTrack with [ 'object_tracking_vittrack_2023sep.onnx']
748
+ ```
749
+
750
+ CUDA:
751
+
752
+ ```
753
+ 13.69 13.69 13.04 [ 1280, 720] VitTrack with [ 'object_tracking_vittrack_2023sep.onnx']
754
+ ```
755
+
756
+ CUDA-FP16:
757
+
758
+ ```
759
+ 16.29 15.77 15.77 [ 1280, 720] VitTrack with [ 'object_tracking_vittrack_2023sep.onnx']
760
+ ```
761
+
762
+ ### Atlas 200I DK
763
+
764
+ CPU:
765
+
766
+ ```
767
+ 88.24 87.00 84.23 [ 1280, 720] VitTrack with [ 'object_tracking_vittrack_2023sep.onnx']
768
+ ```
769
+
770
+ -->
0 commit comments