gursimarsingh
diff --git a/‎benchmark/README.md
Lines changed: 56 additions & 3 deletions b/‎benchmark/README.md
Lines changed: 56 additions & 3 deletions
@@ -94,6 +94,7 @@ mean       median     min        input size   model
 46.10      47.53      43.06      [416, 416]   NanoDet with ['object_detection_nanodet_2022nov_int8.onnx']
 144.89     149.58     125.71     [640, 640]   YoloX with ['object_detection_yolox_2022nov.onnx']
 143.83     146.39     119.75     [640, 640]   YoloX with ['object_detection_yolox_2022nov_int8.onnx']
+12.52      14.47      11.63      [1280, 720]  VitTrack with ['object_tracking_vittrack_2023sep.onnx']
 12.99      13.11      12.14      [192, 192]   MPPalmDet with ['palm_detection_mediapipe_2023feb.onnx']
 12.64      12.44      10.82      [192, 192]   MPPalmDet with ['palm_detection_mediapipe_2023feb_int8.onnx']
 12.64      11.83      11.03      [224, 224]   MPPersonDet with ['person_detection_mediapipe_2023mar.onnx']
@@ -148,6 +149,7 @@ mean       median     min        input size   model
 212.90     212.93     209.55     [416, 416]   NanoDet with ['object_detection_nanodet_2022nov_int8.onnx']
 1690.06    2303.34    1480.63    [640, 640]   YoloX with ['object_detection_yolox_2022nov.onnx']
 1489.54    1435.48    1308.12    [640, 640]   YoloX with ['object_detection_yolox_2022nov_int8.onnx']
+90.49      89.23      86.83      [1280, 720]  VitTrack with ['object_tracking_vittrack_2023sep.onnx']
 356.63     357.29     354.42     [192, 192]   MPPalmDet with ['palm_detection_mediapipe_2023feb.onnx']
 217.52     229.39     101.61     [192, 192]   MPPalmDet with ['palm_detection_mediapipe_2023feb_int8.onnx']
 198.63     198.25     196.68     [224, 224]   MPPersonDet with ['person_detection_mediapipe_2023mar.onnx']
@@ -203,6 +205,7 @@ mean       median     min        input size   model
 216.18     216.19     214.30     [416, 416]   NanoDet with ['object_detection_nanodet_2022nov_int8.onnx']
 1207.83    1208.71    1203.64    [640, 640]   YoloX with ['object_detection_yolox_2022nov.onnx']
 1236.98    1250.21    1203.64    [640, 640]   YoloX with ['object_detection_yolox_2022nov_int8.onnx']
+123.30     125.37     116.69     [1280, 720]  VitTrack with ['object_tracking_vittrack_2023sep.onnx']
 124.89     125.25     124.53     [192, 192]   MPPalmDet with ['palm_detection_mediapipe_2023feb.onnx']
 107.99     109.82     94.05      [192, 192]   MPPalmDet with ['palm_detection_mediapipe_2023feb_int8.onnx']
 108.41     108.33     107.91     [224, 224]   MPPersonDet with ['person_detection_mediapipe_2023mar.onnx']
@@ -241,6 +244,7 @@ mean       median     min        input size   model
 54.24      55.24      52.87      [320, 240]   LPD_YuNet with ['license_plate_detection_lpd_yunet_2023mar.onnx']
 63.63      63.43      63.32      [416, 416]   NanoDet with ['object_detection_nanodet_2022nov.onnx']
 371.45     378.00     366.39     [640, 640]   YoloX with ['object_detection_yolox_2022nov.onnx']
+43.06      42.32      39.92      [1280, 720]  VitTrack with ['object_tracking_vittrack_2023sep.onnx']
 33.85      33.90      33.61      [192, 192]   MPPalmDet with ['palm_detection_mediapipe_2023feb.onnx']
 38.16      37.33      37.10      [224, 224]   MPPersonDet with ['person_detection_mediapipe_2023mar.onnx']
 91.65      91.98      89.90      [128, 256]   YoutuReID with ['person_reid_youtu_2021nov.onnx']
@@ -271,6 +275,7 @@ mean       median     min        input size   model
 366.46     366.88     363.46     [320, 240]   LPD_YuNet with ['license_plate_detection_lpd_yunet_2023mar.onnx']
 163.06     163.34     161.77     [416, 416]   NanoDet with ['object_detection_nanodet_2022nov.onnx']
 301.10     311.52     297.74     [640, 640]   YoloX with ['object_detection_yolox_2022nov.onnx']
+43.36      40.65      39.85      [1280, 720]  VitTrack with ['object_tracking_vittrack_2023sep.onnx']
 149.37     149.95     148.01     [192, 192]   MPPalmDet with ['palm_detection_mediapipe_2023feb.onnx']
 153.89     153.96     153.43     [224, 224]   MPPersonDet with ['person_detection_mediapipe_2023mar.onnx']
 44.29      44.03      43.62      [128, 256]   YoutuReID with ['person_reid_youtu_2021nov.onnx']
@@ -318,6 +323,7 @@ mean       median     min        input size   model
 212.69     262.75     170.88     [416, 416]   NanoDet with ['object_detection_nanodet_2022nov_int8.onnx']
 1110.87    1112.27    1085.31    [640, 640]   YoloX with ['object_detection_yolox_2022nov.onnx']
 1128.73    1157.12    1085.31    [640, 640]   YoloX with ['object_detection_yolox_2022nov_int8.onnx']
+67.31      67.41      66.23      [1280, 720]  VitTrack with ['object_tracking_vittrack_2023sep.onnx']
 147.01     144.01     139.27     [192, 192]   MPPalmDet with ['palm_detection_mediapipe_2023feb.onnx']
 119.70     118.95     94.09      [192, 192]   MPPalmDet with ['palm_detection_mediapipe_2023feb_int8.onnx']
 107.63     107.09     105.61     [224, 224]   MPPersonDet with ['person_detection_mediapipe_2023mar.onnx']
@@ -398,6 +404,7 @@ mean       median     min        input size   model
 322.98     323.45     312.13     [416, 416]   NanoDet with ['object_detection_nanodet_2022nov_int8.onnx']
 1875.33    1877.53    1871.26    [640, 640]   YoloX with ['object_detection_yolox_2022nov.onnx']
 1989.04    2005.25    1871.26    [640, 640]   YoloX with ['object_detection_yolox_2022nov_int8.onnx']
+143.62     143.19     137.16     [1280, 720]  VitTrack with ['object_tracking_vittrack_2023sep.onnx']
 159.80     159.62     159.40     [192, 192]   MPPalmDet with ['palm_detection_mediapipe_2023feb.onnx']
 152.18     152.86     145.56     [192, 192]   MPPalmDet with ['palm_detection_mediapipe_2023feb_int8.onnx']
 145.83     145.77     145.45     [224, 224]   MPPersonDet with ['person_detection_mediapipe_2023mar.onnx']
@@ -418,8 +425,10 @@ mean       median     min        input size   model
 
 NPU (CANN):
 
+<!-- vittrack is excluded due to HardSwish is not supported by CANN backend yet -->
+
 ```
-$ python3 benchmark.py --all --fp32 --cfg_exclude wechat:crnn --model_exclude pose_estimation_mediapipe_2023mar.onnx --cfg_overwrite_backend_target 4
+$ python3 benchmark.py --all --fp32 --cfg_exclude wechat:crnn:vittrack --model_exclude pose_estimation_mediapipe_2023mar.onnx --cfg_overwrite_backend_target 4
 Benchmarking ...
 backend=cv.dnn.DNN_BACKEND_CANN
 target=cv.dnn.DNN_TARGET_NPU
@@ -478,6 +487,7 @@ mean       median     min        input size   model
 1903.82    1962.71    1533.79    [416, 416]   NanoDet with ['object_detection_nanodet_2022nov_int8.onnx']
 37604.10   37569.30   37502.48   [640, 640]   YoloX with ['object_detection_yolox_2022nov.onnx']
 24229.20   25577.94   13483.54   [640, 640]   YoloX with ['object_detection_yolox_2022nov_int8.onnx']
+415.72     403.04     399.44     [1280, 720]  VitTrack with ['object_tracking_vittrack_2023sep.onnx']
 1133.44    1131.54    1124.83    [192, 192]   MPPalmDet with ['palm_detection_mediapipe_2023feb.onnx']
 883.96     919.07     655.33     [192, 192]   MPPalmDet with ['palm_detection_mediapipe_2023feb_int8.onnx']
 1430.98    1424.55    1415.68    [224, 224]   MPPersonDet with ['person_detection_mediapipe_2023mar.onnx']
@@ -533,6 +543,7 @@ mean       median     min        input size   model
 117.28     150.31     83.33      [416, 416]   NanoDet with ['object_detection_nanodet_2022nov_int8.onnx']
 553.58     558.76     535.47     [640, 640]   YoloX with ['object_detection_yolox_2022nov.onnx']
 594.18     592.64     535.47     [640, 640]   YoloX with ['object_detection_yolox_2022nov_int8.onnx']
+49.47      49.21      48.84      [1280, 720]  VitTrack with ['object_tracking_vittrack_2023sep.onnx']
 56.35      55.73      55.25      [192, 192]   MPPalmDet with ['palm_detection_mediapipe_2023feb.onnx']
 57.07      57.19      55.25      [192, 192]   MPPalmDet with ['palm_detection_mediapipe_2023feb_int8.onnx']
 47.94      48.41      47.05      [224, 224]   MPPersonDet with ['person_detection_mediapipe_2023mar.onnx']
@@ -587,6 +598,7 @@ mean       median     min        input size   model
 406.28     416.58     385.68     [416, 416]   NanoDet with ['object_detection_nanodet_2022nov_int8.onnx']
 2608.90    2612.42    2597.93    [640, 640]   YoloX with ['object_detection_yolox_2022nov.onnx']
 2609.88    2609.39    2597.93    [640, 640]   YoloX with ['object_detection_yolox_2022nov_int8.onnx']
+189.23     188.72     182.28     [1280, 720]  VitTrack with ['object_tracking_vittrack_2023sep.onnx']
 228.95     228.74     228.35     [192, 192]   MPPalmDet with ['palm_detection_mediapipe_2023feb.onnx']
 227.97     228.61     226.76     [192, 192]   MPPalmDet with ['palm_detection_mediapipe_2023feb_int8.onnx']
 192.29     192.26     191.74     [224, 224]   MPPersonDet with ['person_detection_mediapipe_2023mar.onnx']
@@ -643,6 +655,7 @@ mean       median     min        input size   model
 3002.36    3047.94    2655.38    [416, 416]   NanoDet with ['object_detection_nanodet_2022nov_int8.onnx']
 50678.08   50651.82   50651.19   [640, 640]   YoloX with ['object_detection_yolox_2022nov.onnx']
 36249.71   37771.22   24606.37   [640, 640]   YoloX with ['object_detection_yolox_2022nov_int8.onnx']
+707.79     706.32     699.40     [1280, 720]  VitTrack with ['object_tracking_vittrack_2023sep.onnx']
 1502.15    1501.98    1500.99    [192, 192]   MPPalmDet with ['palm_detection_mediapipe_2023feb.onnx']
 1300.15    1320.44    1137.60    [192, 192]   MPPalmDet with ['palm_detection_mediapipe_2023feb_int8.onnx']
 1993.05    1993.98    1991.86    [224, 224]   MPPersonDet with ['person_detection_mediapipe_2023mar.onnx']
@@ -669,9 +682,8 @@ Specs: [details_cn](https://doc.rvspace.org/VisionFive2/PB/VisionFive_2/specific
 
 CPU:
 <!-- config wechat is excluded due to it needs building with opencv_contrib -->
-<!-- config vittrack is excluded due to opencv cannot find ffmpeg and its components -->
 ```
-$ python3 benchmark.py --all --cfg_exclude wechat:vittrack
+$ python3 benchmark.py --all --cfg_exclude wechat
 Benchmarking ...
 backend=cv.dnn.DNN_BACKEND_OPENCV
 target=cv.dnn.DNN_TARGET_CPU
@@ -698,6 +710,7 @@ mean       median     min        input size   model
 1434.56    1463.32    1194.57    [416, 416]   NanoDet with ['object_detection_nanodet_2022nov_int8.onnx']
 26172.62   26160.04   26151.67   [640, 640]   YoloX with ['object_detection_yolox_2022nov.onnx']
 17004.06   17909.88   9659.54    [640, 640]   YoloX with ['object_detection_yolox_2022nov_int8.onnx']
+304.58     309.56     280.05     [1280, 720]  VitTrack with ['object_tracking_vittrack_2023sep.onnx']
 734.97     735.58     733.95     [192, 192]   MPPalmDet with ['palm_detection_mediapipe_2023feb.onnx']
 609.61     621.69     508.04     [192, 192]   MPPalmDet with ['palm_detection_mediapipe_2023feb_int8.onnx']
 961.41     962.26     960.39     [224, 224]   MPPersonDet with ['person_detection_mediapipe_2023mar.onnx']
@@ -715,3 +728,43 @@ mean       median     min        input size   model
 1237.00    1395.68    807.66     [1280, 720]  CRNN with ['text_recognition_CRNN_CN_2021nov_int8.onnx']
 1169.59    1415.29    774.09     [1280, 720]  CRNN with ['text_recognition_CRNN_EN_2022oct_int8.onnx']
 ```
+
+<!--
+
+### Khadas VIM4
+
+CPU:
+
+```
+67.65      67.84      66.39      [1280, 720]  VitTrack with ['object_tracking_vittrack_2023sep.onnx']
+```
+
+### NVIDIA Jetson Orin Nano
+
+CPU:
+
+```
+59.30      58.45      57.90      [1280, 720]  VitTrack with ['object_tracking_vittrack_2023sep.onnx']
+```
+
+CUDA:
+
+```
+13.69      13.69      13.04      [1280, 720]  VitTrack with ['object_tracking_vittrack_2023sep.onnx']
+```
+
+CUDA-FP16:
+
+```
+16.29      15.77      15.77      [1280, 720]  VitTrack with ['object_tracking_vittrack_2023sep.onnx']
+```
+
+### Atlas 200I DK
+
+CPU:
+
+```
+88.24      87.00      84.23      [1280, 720]  VitTrack with ['object_tracking_vittrack_2023sep.onnx']
+```
+
+-->