[feat] support l37 split point for YOLOX-Darknet53

chyomin06 · chyomin06 · commit b70271f224e6 · 2025-02-03T20:37:56.000-05:00
diff --git a/cfgs/vision_model/default.yaml b/cfgs/vision_model/default.yaml
@@ -46,4 +46,4 @@ yolox_darknet53:
   conf_thres: 0.001
   nms_thres: 0.65
   weights: "weights/yolox/darknet53/yolox_darknet.pth"
-  splits: "l13"
+  splits: "l13" #"l37"
diff --git a/compressai_vision/model_wrappers/yolox.py b/compressai_vision/model_wrappers/yolox.py
@@ -54,6 +54,7 @@ def __str__(self):
         return str(self.value)
 
     Layer13_Single = "l13"
+    Layer37_Single = "l37"
 
 
 @register_vision_model("yolox_darknet53")
@@ -86,6 +87,8 @@ def __init__(self, device: str, **kwargs):
         self.split_id = str(kwargs["splits"]).lower()
         if self.split_id == str(self.supported_split_points.Layer13_Single):
             self.split_layer_list = ["l13"]
+        elif self.split_id == str(self.supported_split_points.Layer37_Single):
+            self.split_layer_list = ["l37"]
         else:
             raise NotImplementedError
 
@@ -111,6 +114,10 @@ def __init__(self, device: str, **kwargs):
     def SPLIT_L13(self):
         return str(self.supported_split_points.Layer13_Single)
 
+    @property
+    def SPLIT_L37(self):
+        return str(self.supported_split_points.Layer37_Single)
+
     def input_to_features(self, x, device: str) -> Dict:
         """Computes deep features at the intermediate layer(s) all the way from the input"""
 
@@ -120,12 +127,14 @@ def input_to_features(self, x, device: str) -> Dict:
 
         if self.split_id == self.SPLIT_L13:
             output = self._input_to_feature_at_l13(img)
-            output["input_size"] = [input_size]
-            return output
+        elif self.split_id == self.SPLIT_L37:
+            output = self._input_to_feature_at_l37(img)
         else:
             self.logger.error(f"Not supported split point {self.split_id}")
+            raise NotImplementedError
 
-        raise NotImplementedError
+        output["input_size"] = [input_size]
+        return output
 
     def features_to_output(self, x: Dict, device: str):
         """Complete the downstream task from the intermediate deep features"""
@@ -136,6 +145,10 @@ def features_to_output(self, x: Dict, device: str):
             return self._feature_at_l13_to_output(
                 x["data"], x["org_input_size"], x["input_size"]
             )
+        elif self.split_id == self.SPLIT_L37:
+            return self._feature_at_l37_to_output(
+                x["data"], x["org_input_size"], x["input_size"]
+            )
         else:
             self.logger.error(f"Not supported split points {self.split_id}")
 
@@ -151,6 +164,17 @@ def _input_to_feature_at_l13(self, x):
 
         return {"data": self.features_at_splits}
 
+    @torch.no_grad()
+    def _input_to_feature_at_l37(self, x):
+        """Computes and return feature at layer 37 with 11th residual layer output all the way from the input"""
+
+        y = self.backbone.stem(x)
+        y = self.backbone.dark2(y)
+        y = self.backbone.dark3(y)
+        self.features_at_splits[self.SPLIT_L37] = y
+
+        return {"data": self.features_at_splits}
+
     @torch.no_grad()
     def _feature_at_l13_to_output(
         self, x: Dict, org_img_size: Dict, input_img_size: List
@@ -194,6 +218,45 @@ def _feature_at_l13_to_output(
 
         return pred
 
+    @torch.no_grad()
+    def _feature_at_l37_to_output(
+        self, x: Dict, org_img_size: Dict, input_img_size: List
+    ):
+        """
+        performs  downstream task using the features from layer 37
+
+        YOLOX source codes are referenced for this function.
+        <https://github.com/Megvii-BaseDetection/YOLOX/yolox/data/data_augment.py>
+
+        Unnecessary parts for split inference are removed or modified properly.
+
+        Please find the license statement in the downloaded original YOLOX source codes or at here:
+        <https://github.com/Megvii-BaseDetection/YOLOX?tab=Apache-2.0-1-ov-file#readme>
+
+        """
+
+        fp_lvl2 = x[self.SPLIT_L37]
+        fp_lvl1 = self.backbone.dark4(fp_lvl2)
+        fp_lvl0 = self.backbone.dark5(fp_lvl1)
+
+        # yolo branch 1
+        b1_in = self.yolo_fpn.out1_cbl(fp_lvl0)
+        b1_in = self.yolo_fpn.upsample(b1_in)
+        b1_in = torch.cat([b1_in, fp_lvl1], 1)
+        fp_lvl1 = self.yolo_fpn.out1(b1_in)
+
+        # yolo branch 2
+        b2_in = self.yolo_fpn.out2_cbl(fp_lvl1)
+        b2_in = self.yolo_fpn.upsample(b2_in)
+        b2_in = torch.cat([b2_in, fp_lvl2], 1)
+        fp_lvl2 = self.yolo_fpn.out2(b2_in)
+
+        outputs = self.head((fp_lvl2, fp_lvl1, fp_lvl0))
+
+        pred = postprocess(outputs, self.num_classes, self.conf_thres, self.nms_thres)
+
+        return pred
+
     @torch.no_grad()
     def forward(self, x):
         """Complete the downstream task with end-to-end manner all the way from the input"""
diff --git a/scripts/evaluation/default_yolox_darknet3_performance.sh b/scripts/evaluation/default_yolox_darknet3_performance.sh
@@ -40,6 +40,9 @@ fi
 COCO_2017_VAL_SRC="${TESTDATA_DIR}/coco2017"
 
 # COCO 2017 Val - Detection with YOLOX-Darknet53
+
+# option for split points "l13" or "l37"
+# ++vision_model.yolox_darknet53.splits="l37" \ 
 ${ENTRY_CMD} --config-name=${CONF_NAME}.yaml \
              ++pipeline.type=image \
              ++pipeline.conformance.save_conformance_files=False \