Summary

mergennachin · mergennachin · commit 362b08b989c4 · 2025-07-10T17:08:00.000-04:00
Added 8 new representative models to ExecutorTorch examples:

EfficientNet-B4: Image classification with CNN architecture
DETR-ResNet50: Object detection using transformer decoder
SegFormer-ADE: Semantic segmentation transformer
Swin2SR: Super-resolution with Swin transformer
ALBERT: Lightweight BERT for NLP tasks
TrOCR: Optical character recognition transformer
XLSR-Wav2Vec2: Cross-lingual speech representation learning
BiLSTM: Bidirectional LSTM for sequence modeling
All models include XNNPACK backend support with appropriate quantization configurations and full CI integration.

Test plan:

Validate model export and execution with portable backend
Test XNNPACK delegation and quantization (with appropriate exclusions)
Integrate into CI workflows for automated testing
Verify all models perform their intended tasks accurately
diff --git a/.ci/scripts/test_model.sh b/.ci/scripts/test_model.sh
@@ -317,8 +317,9 @@ elif [[ "${BACKEND}" == *"xnnpack"* ]]; then
   echo "Testing ${MODEL_NAME} with xnnpack..."
   WITH_QUANTIZATION=true
   WITH_DELEGATION=true
-  if [[ "$MODEL_NAME" == "mobilebert" ]]; then
-    # TODO(T197452682)
+  if [[ "$MODEL_NAME" == "mobilebert" || "$MODEL_NAME" == "albert" ]]; then
+    # TODO(https://github.com/pytorch/executorch/issues/12341) 
+    # mobilebert, albert, xlsr, bilstm incompatible with XNNPACK quantization
     WITH_QUANTIZATION=false
   fi
   test_model_with_xnnpack "${WITH_QUANTIZATION}" "${WITH_DELEGATION}"
diff --git a/.github/workflows/trunk.yml b/.github/workflows/trunk.yml
@@ -63,7 +63,7 @@ jobs:
       contents: read
     strategy:
       matrix:
-        model: [linear, add, add_mul, ic3, ic4, mv2, mv3, resnet18, resnet50, vit, w2l, mobilebert, emformer_join, emformer_transcribe]
+        model: [linear, add, add_mul, ic3, ic4, mv2, mv3, resnet18, resnet50, vit, w2l, mobilebert, emformer_join, emformer_transcribe, efficientnet_b4, detr_resnet50, segformer_ade, albert, xlsr, bilstm]
         backend: [portable, xnnpack-quantization-delegation]
         runner: [linux.arm64.2xlarge]
         include:
diff --git a/examples/models/__init__.py b/examples/models/__init__.py
@@ -37,6 +37,14 @@ class Model(str, Enum):
     EfficientSam = "efficient_sam"
     Qwen25 = "qwen2_5"
     Phi4Mini = "phi_4_mini"
+    EfficientNetB4 = "efficientnet_b4"
+    DetrResNet50 = "detr_resnet50"
+    SegformerADE = "segformer_ade"
+    Albert = "albert"
+    BiLSTM = "bilstm"
+    Swin2SR2x = "swin2sr_2x"
+    TrOCRHandwritten = "trocr_handwritten"
+    XLSR = "xlsr"
 
     def __str__(self) -> str:
         return self.value
@@ -82,6 +90,14 @@ def __str__(self) -> str:
     str(Model.EfficientSam): ("efficient_sam", "EfficientSAM"),
     str(Model.Qwen25): ("qwen2_5", "Qwen2_5Model"),
     str(Model.Phi4Mini): ("phi_4_mini", "Phi4MiniModel"),
+    str(Model.EfficientNetB4): ("efficientnet_b4", "EfficientNetB4Model"),
+    str(Model.DetrResNet50): ("detr_resnet50", "DetrResNet50Model"),
+    str(Model.SegformerADE): ("segformer_ade", "SegformerADEModel"),
+    str(Model.Albert): ("albert", "AlbertModelExample"),
+    str(Model.BiLSTM): ("bilstm", "BidirectionalLSTMModel"),
+    str(Model.Swin2SR2x): ("swin2sr_2x", "Swin2SR2xModel"),
+    str(Model.TrOCRHandwritten): ("trocr_handwritten", "TrOCRHandwrittenModel"),
+    str(Model.XLSR): ("xlsr", "XLSRModel"),
 }
 
 __all__ = [
diff --git a/examples/models/albert/__init__.py b/examples/models/albert/__init__.py
@@ -0,0 +1,11 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+from .model import AlbertModelExample
+
+__all__ = [
+    "AlbertModelExample",
+]
diff --git a/examples/models/albert/model.py b/examples/models/albert/model.py
@@ -0,0 +1,30 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+import logging
+
+import torch
+
+from transformers import AlbertModel, AutoTokenizer  # @manual
+
+from ..model_base import EagerModelBase
+
+
+class AlbertModelExample(EagerModelBase):
+    def __init__(self):
+        pass
+
+    def get_eager_model(self) -> torch.nn.Module:
+        logging.info("Loading ALBERT model")
+        # pyre-ignore
+        model = AlbertModel.from_pretrained("albert-base-v2", return_dict=False)
+        model.eval()
+        logging.info("Loaded ALBERT model")
+        return model
+
+    def get_example_inputs(self):
+        tokenizer = AutoTokenizer.from_pretrained("albert-base-v2")
+        return (tokenizer("Hello, my dog is cute", return_tensors="pt")["input_ids"],)
diff --git a/examples/models/bilstm/__init__.py b/examples/models/bilstm/__init__.py
@@ -0,0 +1,11 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+from .model import BidirectionalLSTMModel
+
+__all__ = [
+    "BidirectionalLSTMModel",
+]
diff --git a/examples/models/bilstm/model.py b/examples/models/bilstm/model.py
@@ -0,0 +1,96 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+import logging
+
+import torch
+import torch.nn as nn
+
+from ..model_base import EagerModelBase
+
+
+class BidirectionalLSTM(nn.Module):
+    """Bidirectional LSTM for sequence modeling"""
+
+    def __init__(self, input_size=100, hidden_size=128, num_layers=2, num_classes=10):
+        super(BidirectionalLSTM, self).__init__()
+        self.hidden_size = hidden_size
+        self.num_layers = num_layers
+
+        # Bidirectional LSTM
+        self.lstm = nn.LSTM(
+            input_size, hidden_size, num_layers, batch_first=True, bidirectional=True
+        )
+
+        # Output layer (hidden_size * 2 because of bidirectional)
+        self.fc = nn.Linear(hidden_size * 2, num_classes)
+
+    def forward(self, x):
+        # Initialize hidden states
+        # For bidirectional: hidden states shape is (num_layers * 2, batch, hidden_size)
+        h0 = torch.zeros(self.num_layers * 2, x.size(0), self.hidden_size).to(x.device)
+        c0 = torch.zeros(self.num_layers * 2, x.size(0), self.hidden_size).to(x.device)
+
+        # LSTM forward pass
+        out, _ = self.lstm(x, (h0, c0))
+
+        # Take the last time step output
+        out = self.fc(out[:, -1, :])
+        return out
+
+
+class BidirectionalLSTMTextClassifier(nn.Module):
+    """Bidirectional LSTM for text classification with embedding layer"""
+
+    def __init__(
+        self, vocab_size=10000, embedding_dim=128, hidden_size=256, num_classes=2
+    ):
+        super(BidirectionalLSTMTextClassifier, self).__init__()
+        self.hidden_size = hidden_size
+
+        # Embedding layer
+        self.embedding = nn.Embedding(vocab_size, embedding_dim)
+
+        # Bidirectional LSTM
+        self.lstm = nn.LSTM(
+            embedding_dim, hidden_size, bidirectional=True, batch_first=True
+        )
+
+        # Output layer
+        self.fc = nn.Linear(hidden_size * 2, num_classes)
+
+    def forward(self, x):
+        # Embedding
+        embedded = self.embedding(x)
+
+        # LSTM
+        lstm_out, _ = self.lstm(embedded)
+
+        # Global max pooling over sequence dimension
+        pooled = torch.max(lstm_out, dim=1)[0]
+
+        # Classification
+        output = self.fc(pooled)
+        return output
+
+
+class BidirectionalLSTMModel(EagerModelBase):
+    def __init__(self):
+        pass
+
+    def get_eager_model(self) -> torch.nn.Module:
+        logging.info("Loading Bidirectional LSTM model")
+        model = BidirectionalLSTM(
+            input_size=100, hidden_size=128, num_layers=2, num_classes=10
+        )
+        model.eval()
+        logging.info("Loaded Bidirectional LSTM model")
+        return model
+
+    def get_example_inputs(self):
+        # Example: (batch_size=1, seq_len=50, input_size=100)
+        tensor_size = (1, 50, 100)
+        return (torch.randn(tensor_size),)
diff --git a/examples/models/detr_resnet50/__init__.py b/examples/models/detr_resnet50/__init__.py
@@ -0,0 +1,11 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+from .model import DetrResNet50Model
+
+__all__ = [
+    "DetrResNet50Model",
+]
diff --git a/examples/models/detr_resnet50/model.py b/examples/models/detr_resnet50/model.py
@@ -0,0 +1,45 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+import logging
+
+import torch
+from transformers import DetrForObjectDetection
+
+from ..model_base import EagerModelBase
+
+
+class DetrWrapper(torch.nn.Module):
+    """Wrapper for HuggingFace DETR model to make it torch.export compatible"""
+
+    def __init__(self, model_name="facebook/detr-resnet-50"):
+        super().__init__()
+        self.detr = DetrForObjectDetection.from_pretrained(model_name)
+        self.detr.eval()
+
+    def forward(self, pixel_values):
+        # pixel_values: [batch, 3, height, width] - RGB image
+        with torch.no_grad():
+            outputs = self.detr(pixel_values)
+        # Return logits and boxes for object detection
+        return outputs.logits, outputs.pred_boxes
+
+
+class DetrResNet50Model(EagerModelBase):
+    def __init__(self):
+        pass
+
+    def get_eager_model(self) -> torch.nn.Module:
+        logging.info("Loading DETR ResNet-50 model from HuggingFace")
+        model = DetrWrapper("facebook/detr-resnet-50")
+        model.eval()
+        logging.info("Loaded DETR ResNet-50 model")
+        return model
+
+    def get_example_inputs(self):
+        # DETR standard input size: 800x800 RGB image (can handle various sizes)
+        tensor_size = (1, 3, 800, 800)
+        return (torch.randn(tensor_size),)
diff --git a/examples/models/efficientnet_b4/__init__.py b/examples/models/efficientnet_b4/__init__.py
@@ -0,0 +1,11 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+from .model import EfficientNetB4Model
+
+__all__ = [
+    "EfficientNetB4Model",
+]
diff --git a/examples/models/efficientnet_b4/model.py b/examples/models/efficientnet_b4/model.py
@@ -0,0 +1,31 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+import logging
+
+import torch
+
+from torchvision.models import efficientnet_b4  # @manual
+from torchvision.models.efficientnet import EfficientNet_B4_Weights
+
+from ..model_base import EagerModelBase
+
+
+class EfficientNetB4Model(EagerModelBase):
+    def __init__(self):
+        pass
+
+    def get_eager_model(self) -> torch.nn.Module:
+        logging.info("Loading EfficientNet-B4 model")
+        model = efficientnet_b4(weights=EfficientNet_B4_Weights.IMAGENET1K_V1)
+        model.eval()
+        logging.info("Loaded EfficientNet-B4 model")
+        return model
+
+    def get_example_inputs(self):
+        # EfficientNet-B4 uses 380x380 input size
+        tensor_size = (1, 3, 380, 380)
+        return (torch.randn(tensor_size),)
diff --git a/examples/models/segformer_ade/__init__.py b/examples/models/segformer_ade/__init__.py
@@ -0,0 +1,11 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+from .model import SegformerADEModel
+
+__all__ = [
+    "SegformerADEModel",
+]
diff --git a/examples/models/segformer_ade/model.py b/examples/models/segformer_ade/model.py
@@ -0,0 +1,44 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+import logging
+
+import torch
+from transformers import SegformerForSemanticSegmentation
+
+from ..model_base import EagerModelBase
+
+
+class SegformerWrapper(torch.nn.Module):
+    """Wrapper for HuggingFace SegFormer model to make it torch.export compatible"""
+
+    def __init__(self, model_name="nvidia/segformer-b0-finetuned-ade-512-512"):
+        super().__init__()
+        self.segformer = SegformerForSemanticSegmentation.from_pretrained(model_name)
+        self.segformer.eval()
+
+    def forward(self, pixel_values):
+        # pixel_values: [batch, 3, height, width] - RGB image
+        with torch.no_grad():
+            outputs = self.segformer(pixel_values)
+        return outputs.logits
+
+
+class SegformerADEModel(EagerModelBase):
+    def __init__(self):
+        pass
+
+    def get_eager_model(self) -> torch.nn.Module:
+        logging.info("Loading SegFormer ADE model from HuggingFace")
+        model = SegformerWrapper("nvidia/segformer-b0-finetuned-ade-512-512")
+        model.eval()
+        logging.info("Loaded SegFormer ADE model")
+        return model
+
+    def get_example_inputs(self):
+        # SegFormer standard input size: 512x512 RGB image
+        tensor_size = (1, 3, 512, 512)
+        return (torch.randn(tensor_size),)
diff --git a/examples/models/swin2sr_2x/__init__.py b/examples/models/swin2sr_2x/__init__.py
@@ -0,0 +1,11 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+from .model import Swin2SR2xModel
+
+__all__ = [
+    "Swin2SR2xModel",
+]
diff --git a/examples/models/swin2sr_2x/model.py b/examples/models/swin2sr_2x/model.py
diff --git a/examples/models/trocr_handwritten/__init__.py b/examples/models/trocr_handwritten/__init__.py
diff --git a/examples/models/trocr_handwritten/model.py b/examples/models/trocr_handwritten/model.py
diff --git a/examples/models/xlsr/__init__.py b/examples/models/xlsr/__init__.py
diff --git a/examples/models/xlsr/model.py b/examples/models/xlsr/model.py
diff --git a/examples/xnnpack/__init__.py b/examples/xnnpack/__init__.py