pytorch · mergennachin · Jul 10, 2025
diff --git a/.ci/scripts/test_model.sh b/.ci/scripts/test_model.sh
@@ -317,8 +317,9 @@ elif [[ "${BACKEND}" == *"xnnpack"* ]]; then
   echo "Testing ${MODEL_NAME} with xnnpack..."
   WITH_QUANTIZATION=true
   WITH_DELEGATION=true
-  if [[ "$MODEL_NAME" == "mobilebert" ]]; then
-    # TODO(T197452682)
+  if [[ "$MODEL_NAME" == "mobilebert" || "$MODEL_NAME" == "albert" ]]; then
+    # TODO(https://github.com/pytorch/executorch/issues/12341) 
+    # mobilebert, albert incompatible with XNNPACK quantization
     WITH_QUANTIZATION=false
   fi
   test_model_with_xnnpack "${WITH_QUANTIZATION}" "${WITH_DELEGATION}"

diff --git a/.github/workflows/trunk.yml b/.github/workflows/trunk.yml
@@ -63,7 +63,7 @@ jobs:
       contents: read
     strategy:
       matrix:
-        model: [linear, add, add_mul, ic3, ic4, mv2, mv3, resnet18, resnet50, vit, w2l, mobilebert, emformer_join, emformer_transcribe]
+        model: [linear, add, add_mul, ic3, ic4, mv2, mv3, resnet18, resnet50, vit, w2l, mobilebert, emformer_join, emformer_transcribe, efficientnet_b4, detr_resnet50, segformer_ade, albert, wav2vec2]
         backend: [portable, xnnpack-quantization-delegation]
         runner: [linux.arm64.2xlarge]
         include:

@@ -37,6 +37,14 @@ class Model(str, Enum):
     EfficientSam = "efficient_sam"
     Qwen25 = "qwen2_5"
     Phi4Mini = "phi_4_mini"
+    EfficientNetB4 = "efficientnet_b4"
+    DetrResNet50 = "detr_resnet50"
+    SegformerADE = "segformer_ade"
+    Albert = "albert"
+    BiLSTM = "bilstm"
+    Swin2SR2x = "swin2sr_2x"
+    TrOCRHandwritten = "trocr_handwritten"
+    Wav2Vec2 = "wav2vec2"
 
     def __str__(self) -> str:
         return self.value
@@ -82,6 +90,13 @@ def __str__(self) -> str:
     str(Model.EfficientSam): ("efficient_sam", "EfficientSAM"),
     str(Model.Qwen25): ("qwen2_5", "Qwen2_5Model"),
     str(Model.Phi4Mini): ("phi_4_mini", "Phi4MiniModel"),
+    str(Model.EfficientNetB4): ("efficientnet_b4", "EfficientNetB4Model"),
+    str(Model.DetrResNet50): ("detr_resnet50", "DetrResNet50Model"),
+    str(Model.SegformerADE): ("segformer_ade", "SegformerADEModel"),
+    str(Model.Albert): ("albert", "AlbertModelExample"),
+    str(Model.Swin2SR2x): ("swin2sr_2x", "Swin2SR2xModel"),
+    str(Model.TrOCRHandwritten): ("trocr_handwritten", "TrOCRHandwrittenModel"),
+    str(Model.Wav2Vec2): ("wav2vec2", "Wav2Vec2Model"),
 }
 
 __all__ = [

@@ -0,0 +1,11 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+from .model import AlbertModelExample
+
+__all__ = [
+    "AlbertModelExample",
+]
@@ -0,0 +1,30 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+import logging
+
+import torch
+
+from transformers import AlbertModel, AutoTokenizer  # @manual
+
+from ..model_base import EagerModelBase
+
+
+class AlbertModelExample(EagerModelBase):
+    def __init__(self):
+        pass
+
+    def get_eager_model(self) -> torch.nn.Module:
+        logging.info("Loading ALBERT model")
+        # pyre-ignore
+        model = AlbertModel.from_pretrained("albert-base-v2", return_dict=False)
+        model.eval()
+        logging.info("Loaded ALBERT model")
+        return model
+
+    def get_example_inputs(self):
+        tokenizer = AutoTokenizer.from_pretrained("albert-base-v2")
+        return (tokenizer("Hello, my dog is cute", return_tensors="pt")["input_ids"],)
@@ -0,0 +1,11 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+from .model import DetrResNet50Model
+
+__all__ = [
+    "DetrResNet50Model",
+]
@@ -0,0 +1,45 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+import logging
+
+import torch
+from transformers import DetrForObjectDetection
+
+from ..model_base import EagerModelBase
+
+
+class DetrWrapper(torch.nn.Module):
+    """Wrapper for HuggingFace DETR model to make it torch.export compatible"""
+
+    def __init__(self, model_name="facebook/detr-resnet-50"):
+        super().__init__()
+        self.detr = DetrForObjectDetection.from_pretrained(model_name)
+        self.detr.eval()
+
+    def forward(self, pixel_values):
+        # pixel_values: [batch, 3, height, width] - RGB image
+        with torch.no_grad():
+            outputs = self.detr(pixel_values)
+        # Return logits and boxes for object detection
+        return outputs.logits, outputs.pred_boxes
+
+
+class DetrResNet50Model(EagerModelBase):
+    def __init__(self):
+        pass
+
+    def get_eager_model(self) -> torch.nn.Module:
+        logging.info("Loading DETR ResNet-50 model from HuggingFace")
+        model = DetrWrapper("facebook/detr-resnet-50")
+        model.eval()
+        logging.info("Loaded DETR ResNet-50 model")
+        return model
+
+    def get_example_inputs(self):
+        # DETR standard input size: 800x800 RGB image (can handle various sizes)
+        tensor_size = (1, 3, 800, 800)
+        return (torch.randn(tensor_size),)
@@ -0,0 +1,11 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+from .model import EfficientNetB4Model
+
+__all__ = [
+    "EfficientNetB4Model",
+]
@@ -0,0 +1,31 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+import logging
+
+import torch
+
+from torchvision.models import efficientnet_b4  # @manual
+from torchvision.models.efficientnet import EfficientNet_B4_Weights
+
+from ..model_base import EagerModelBase
+
+
+class EfficientNetB4Model(EagerModelBase):
+    def __init__(self):
+        pass
+
+    def get_eager_model(self) -> torch.nn.Module:
+        logging.info("Loading EfficientNet-B4 model")
+        model = efficientnet_b4(weights=EfficientNet_B4_Weights.IMAGENET1K_V1)
+        model.eval()
+        logging.info("Loaded EfficientNet-B4 model")
+        return model
+
+    def get_example_inputs(self):
+        # EfficientNet-B4 uses 380x380 input size
+        tensor_size = (1, 3, 380, 380)
+        return (torch.randn(tensor_size),)
@@ -0,0 +1,11 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+from .model import SegformerADEModel
+
+__all__ = [
+    "SegformerADEModel",
+]
@@ -0,0 +1,44 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+import logging
+
+import torch
+from transformers import SegformerForSemanticSegmentation
+
+from ..model_base import EagerModelBase
+
+
+class SegformerWrapper(torch.nn.Module):
+    """Wrapper for HuggingFace SegFormer model to make it torch.export compatible"""
+
+    def __init__(self, model_name="nvidia/segformer-b0-finetuned-ade-512-512"):
+        super().__init__()
+        self.segformer = SegformerForSemanticSegmentation.from_pretrained(model_name)
+        self.segformer.eval()
+
+    def forward(self, pixel_values):
+        # pixel_values: [batch, 3, height, width] - RGB image
+        with torch.no_grad():
+            outputs = self.segformer(pixel_values)
+        return outputs.logits
+
+
+class SegformerADEModel(EagerModelBase):
+    def __init__(self):
+        pass
+
+    def get_eager_model(self) -> torch.nn.Module:
+        logging.info("Loading SegFormer ADE model from HuggingFace")
+        model = SegformerWrapper("nvidia/segformer-b0-finetuned-ade-512-512")
+        model.eval()
+        logging.info("Loaded SegFormer ADE model")
+        return model
+
+    def get_example_inputs(self):
+        # SegFormer standard input size: 512x512 RGB image
+        tensor_size = (1, 3, 512, 512)
+        return (torch.randn(tensor_size),)
@@ -0,0 +1,11 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+from .model import Swin2SR2xModel
+
+__all__ = [
+    "Swin2SR2xModel",
+]
@@ -0,0 +1,44 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+import logging
+
+import torch
+from transformers import Swin2SRForImageSuperResolution
+
+from ..model_base import EagerModelBase
+
+
+class Swin2SRWrapper(torch.nn.Module):
+    """Wrapper for HuggingFace Swin2SR model to make it torch.export compatible"""
+
+    def __init__(self, model_name="caidas/swin2SR-classical-sr-x2-64"):
+        super().__init__()
+        self.swin2sr = Swin2SRForImageSuperResolution.from_pretrained(model_name)
+        self.swin2sr.eval()
+
+    def forward(self, pixel_values):
+        # pixel_values: [batch, 3, height, width] - RGB image
+        with torch.no_grad():
+            outputs = self.swin2sr(pixel_values)
+        return outputs.reconstruction
+
+
+class Swin2SR2xModel(EagerModelBase):
+    def __init__(self):
+        pass
+
+    def get_eager_model(self) -> torch.nn.Module:
+        logging.info("Loading Swin2SR 2x model from HuggingFace")
+        model = Swin2SRWrapper("caidas/swin2SR-classical-sr-x2-64")
+        model.eval()
+        logging.info("Loaded Swin2SR 2x model")
+        return model
+
+    def get_example_inputs(self):
+        # Swin2SR input size: 64x64 RGB image for 2x super-resolution
+        tensor_size = (1, 3, 64, 64)
+        return (torch.randn(tensor_size),)
@@ -0,0 +1,11 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+from .model import TrOCRHandwrittenModel
+
+__all__ = [
+    "TrOCRHandwrittenModel",
+]
@@ -0,0 +1,45 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+import logging
+
+import torch
+from transformers import VisionEncoderDecoderModel
+
+from ..model_base import EagerModelBase
+
+
+class TrOCRWrapper(torch.nn.Module):
+    """Wrapper for HuggingFace TrOCR model to make it torch.export compatible"""
+
+    def __init__(self, model_name="microsoft/trocr-base-handwritten"):
+        super().__init__()
+        self.trocr = VisionEncoderDecoderModel.from_pretrained(model_name)
+        self.trocr.eval()
+
+    def forward(self, pixel_values):
+        # pixel_values: [batch, 3, height, width] - RGB image
+        with torch.no_grad():
+            # Generate text from image
+            generated_ids = self.trocr.generate(pixel_values, max_length=50)
+        return generated_ids
+
+
+class TrOCRHandwrittenModel(EagerModelBase):
+    def __init__(self):
+        pass
+
+    def get_eager_model(self) -> torch.nn.Module:
+        logging.info("Loading TrOCR handwritten model from HuggingFace")
+        model = TrOCRWrapper("microsoft/trocr-base-handwritten")
+        model.eval()
+        logging.info("Loaded TrOCR handwritten model")
+        return model
+
+    def get_example_inputs(self):
+        # TrOCR input: 384x384 RGB text image
+        pixel_values = torch.randn(1, 3, 384, 384)
+        return (pixel_values,)
@@ -0,0 +1,9 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+from .model import Wav2Vec2Model
+
+__all__ = ["Wav2Vec2Model"]