Add more models as part of GA models

mergennachin · mergennachin · commit 8265cc3cdae1 · 2025-07-10T17:03:18.000-04:00
Summary: I realized that we don't have object detection models in our repo. This is based on looking at list in https://mlcommons.org/benchmarks/inference-mobile/ and https://ai-benchmark.com/tests.html and coming up with a list that we don't have in our examples.
diff --git a/.ci/scripts/test_model.sh b/.ci/scripts/test_model.sh
@@ -317,8 +317,9 @@ elif [[ "${BACKEND}" == *"xnnpack"* ]]; then
   echo "Testing ${MODEL_NAME} with xnnpack..."
   WITH_QUANTIZATION=true
   WITH_DELEGATION=true
-  if [[ "$MODEL_NAME" == "mobilebert" ]]; then
-    # TODO(T197452682)
+  if [[ "$MODEL_NAME" == "mobilebert" || "$MODEL_NAME" == "albert" ]]; then
+    # TODO(https://github.com/pytorch/executorch/issues/12341) 
+    # mobilebert, albert, xlsr, bilstm incompatible with XNNPACK quantization
     WITH_QUANTIZATION=false
   fi
   test_model_with_xnnpack "${WITH_QUANTIZATION}" "${WITH_DELEGATION}"
diff --git a/.github/workflows/trunk.yml b/.github/workflows/trunk.yml
@@ -63,7 +63,7 @@ jobs:
       contents: read
     strategy:
       matrix:
-        model: [linear, add, add_mul, ic3, ic4, mv2, mv3, resnet18, resnet50, vit, w2l, mobilebert, emformer_join, emformer_transcribe]
+        model: [linear, add, add_mul, ic3, ic4, mv2, mv3, resnet18, resnet50, vit, w2l, mobilebert, emformer_join, emformer_transcribe, efficientnet_b4, detr_resnet50, segformer_ade, albert, xlsr, bilstm]
         backend: [portable, xnnpack-quantization-delegation]
         runner: [linux.arm64.2xlarge]
         include:
diff --git a/examples/models/__init__.py b/examples/models/__init__.py
@@ -37,6 +37,14 @@ class Model(str, Enum):
     EfficientSam = "efficient_sam"
     Qwen25 = "qwen2_5"
     Phi4Mini = "phi_4_mini"
+    EfficientNetB4 = "efficientnet_b4"
+    DetrResNet50 = "detr_resnet50"
+    SegformerADE = "segformer_ade"
+    Albert = "albert"
+    BiLSTM = "bilstm"
+    Swin2SR2x = "swin2sr_2x"
+    TrOCRHandwritten = "trocr_handwritten"
+    XLSR = "xlsr"
 
     def __str__(self) -> str:
         return self.value
@@ -82,6 +90,14 @@ def __str__(self) -> str:
     str(Model.EfficientSam): ("efficient_sam", "EfficientSAM"),
     str(Model.Qwen25): ("qwen2_5", "Qwen2_5Model"),
     str(Model.Phi4Mini): ("phi_4_mini", "Phi4MiniModel"),
+    str(Model.EfficientNetB4): ("efficientnet_b4", "EfficientNetB4Model"),
+    str(Model.DetrResNet50): ("detr_resnet50", "DetrResNet50Model"),
+    str(Model.SegformerADE): ("segformer_ade", "SegformerADEModel"),
+    str(Model.Albert): ("albert", "AlbertModelExample"),
+    str(Model.BiLSTM): ("bilstm", "BidirectionalLSTMModel"),
+    str(Model.Swin2SR2x): ("swin2sr_2x", "Swin2SR2xModel"),
+    str(Model.TrOCRHandwritten): ("trocr_handwritten", "TrOCRHandwrittenModel"),
+    str(Model.XLSR): ("xlsr", "XLSRModel"),
 }
 
 __all__ = [
diff --git a/examples/models/albert/__init__.py b/examples/models/albert/__init__.py
@@ -0,0 +1,11 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+from .model import AlbertModelExample
+
+__all__ = [
+    "AlbertModelExample",
+]
diff --git a/examples/models/albert/model.py b/examples/models/albert/model.py
@@ -0,0 +1,30 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+import logging
+
+import torch
+
+from transformers import AlbertModel, AutoTokenizer  # @manual
+
+from ..model_base import EagerModelBase
+
+
+class AlbertModelExample(EagerModelBase):
+    def __init__(self):
+        pass
+
+    def get_eager_model(self) -> torch.nn.Module:
+        logging.info("Loading ALBERT model")
+        # pyre-ignore
+        model = AlbertModel.from_pretrained("albert-base-v2", return_dict=False)
+        model.eval()
+        logging.info("Loaded ALBERT model")
+        return model
+
+    def get_example_inputs(self):
+        tokenizer = AutoTokenizer.from_pretrained("albert-base-v2")
+        return (tokenizer("Hello, my dog is cute", return_tensors="pt")["input_ids"],)
diff --git a/examples/models/bilstm/__init__.py b/examples/models/bilstm/__init__.py
@@ -0,0 +1,11 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+from .model import BidirectionalLSTMModel
+
+__all__ = [
+    "BidirectionalLSTMModel",
+]
diff --git a/examples/models/bilstm/model.py b/examples/models/bilstm/model.py
@@ -0,0 +1,96 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+import logging
+
+import torch
+import torch.nn as nn
+
+from ..model_base import EagerModelBase
+
+
+class BidirectionalLSTM(nn.Module):
+    """Bidirectional LSTM for sequence modeling"""
+
+    def __init__(self, input_size=100, hidden_size=128, num_layers=2, num_classes=10):
+        super(BidirectionalLSTM, self).__init__()
+        self.hidden_size = hidden_size
+        self.num_layers = num_layers
+
+        # Bidirectional LSTM
+        self.lstm = nn.LSTM(
+            input_size, hidden_size, num_layers, batch_first=True, bidirectional=True
+        )
+
+        # Output layer (hidden_size * 2 because of bidirectional)
+        self.fc = nn.Linear(hidden_size * 2, num_classes)
+
+    def forward(self, x):
+        # Initialize hidden states
+        # For bidirectional: hidden states shape is (num_layers * 2, batch, hidden_size)
+        h0 = torch.zeros(self.num_layers * 2, x.size(0), self.hidden_size).to(x.device)
+        c0 = torch.zeros(self.num_layers * 2, x.size(0), self.hidden_size).to(x.device)
+
+        # LSTM forward pass
+        out, _ = self.lstm(x, (h0, c0))
+
+        # Take the last time step output
+        out = self.fc(out[:, -1, :])
+        return out
+
+
+class BidirectionalLSTMTextClassifier(nn.Module):
+    """Bidirectional LSTM for text classification with embedding layer"""
+
+    def __init__(
+        self, vocab_size=10000, embedding_dim=128, hidden_size=256, num_classes=2
+    ):
+        super(BidirectionalLSTMTextClassifier, self).__init__()
+        self.hidden_size = hidden_size
+
+        # Embedding layer
+        self.embedding = nn.Embedding(vocab_size, embedding_dim)
+
+        # Bidirectional LSTM
+        self.lstm = nn.LSTM(
+            embedding_dim, hidden_size, bidirectional=True, batch_first=True
+        )
+
+        # Output layer
+        self.fc = nn.Linear(hidden_size * 2, num_classes)
+
+    def forward(self, x):
+        # Embedding
+        embedded = self.embedding(x)
+
+        # LSTM
+        lstm_out, _ = self.lstm(embedded)
+
+        # Global max pooling over sequence dimension
+        pooled = torch.max(lstm_out, dim=1)[0]
+
+        # Classification
+        output = self.fc(pooled)
+        return output
+
+
+class BidirectionalLSTMModel(EagerModelBase):
+    def __init__(self):
+        pass
+
+    def get_eager_model(self) -> torch.nn.Module:
+        logging.info("Loading Bidirectional LSTM model")
+        model = BidirectionalLSTM(
+            input_size=100, hidden_size=128, num_layers=2, num_classes=10
+        )
+        model.eval()
+        logging.info("Loaded Bidirectional LSTM model")
+        return model
+
+    def get_example_inputs(self):
+        # Example: (batch_size=1, seq_len=50, input_size=100)
+        tensor_size = (1, 50, 100)
+        return (torch.randn(tensor_size),)
diff --git a/examples/models/detr_resnet50/__init__.py b/examples/models/detr_resnet50/__init__.py
@@ -0,0 +1,11 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+from .model import DetrResNet50Model
+
+__all__ = [
+    "DetrResNet50Model",
+]
diff --git a/examples/models/detr_resnet50/model.py b/examples/models/detr_resnet50/model.py
@@ -0,0 +1,45 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+import logging
+
+import torch
+from transformers import DetrForObjectDetection
+
+from ..model_base import EagerModelBase
+
+
+class DetrWrapper(torch.nn.Module):
+    """Wrapper for HuggingFace DETR model to make it torch.export compatible"""
+
+    def __init__(self, model_name="facebook/detr-resnet-50"):
+        super().__init__()
+        self.detr = DetrForObjectDetection.from_pretrained(model_name)
+        self.detr.eval()
+
+    def forward(self, pixel_values):
+        # pixel_values: [batch, 3, height, width] - RGB image
+        with torch.no_grad():
+            outputs = self.detr(pixel_values)
+        # Return logits and boxes for object detection
+        return outputs.logits, outputs.pred_boxes
+
+
+class DetrResNet50Model(EagerModelBase):
+    def __init__(self):
+        pass
+
+    def get_eager_model(self) -> torch.nn.Module:
+        logging.info("Loading DETR ResNet-50 model from HuggingFace")
+        model = DetrWrapper("facebook/detr-resnet-50")
+        model.eval()
+        logging.info("Loaded DETR ResNet-50 model")
+        return model
+
+    def get_example_inputs(self):
+        # DETR standard input size: 800x800 RGB image (can handle various sizes)
+        tensor_size = (1, 3, 800, 800)
+        return (torch.randn(tensor_size),)
diff --git a/examples/models/efficientnet_b4/__init__.py b/examples/models/efficientnet_b4/__init__.py
@@ -0,0 +1,11 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+from .model import EfficientNetB4Model
+
+__all__ = [
+    "EfficientNetB4Model",
+]
diff --git a/examples/models/efficientnet_b4/model.py b/examples/models/efficientnet_b4/model.py
@@ -0,0 +1,31 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+import logging
+
+import torch
+
+from torchvision.models import efficientnet_b4  # @manual
+from torchvision.models.efficientnet import EfficientNet_B4_Weights
+
+from ..model_base import EagerModelBase
+
+
+class EfficientNetB4Model(EagerModelBase):
+    def __init__(self):
+        pass
+
+    def get_eager_model(self) -> torch.nn.Module:
+        logging.info("Loading EfficientNet-B4 model")
+        model = efficientnet_b4(weights=EfficientNet_B4_Weights.IMAGENET1K_V1)
+        model.eval()
+        logging.info("Loaded EfficientNet-B4 model")
+        return model
+
+    def get_example_inputs(self):
+        # EfficientNet-B4 uses 380x380 input size
+        tensor_size = (1, 3, 380, 380)
+        return (torch.randn(tensor_size),)
diff --git a/examples/models/segformer_ade/__init__.py b/examples/models/segformer_ade/__init__.py
@@ -0,0 +1,11 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+from .model import SegformerADEModel
+
+__all__ = [
+    "SegformerADEModel",
+]
diff --git a/examples/models/segformer_ade/model.py b/examples/models/segformer_ade/model.py
@@ -0,0 +1,44 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+import logging
+
+import torch
+from transformers import SegformerForSemanticSegmentation
+
+from ..model_base import EagerModelBase
+
+
+class SegformerWrapper(torch.nn.Module):
+    """Wrapper for HuggingFace SegFormer model to make it torch.export compatible"""
+
+    def __init__(self, model_name="nvidia/segformer-b0-finetuned-ade-512-512"):
+        super().__init__()
+        self.segformer = SegformerForSemanticSegmentation.from_pretrained(model_name)
+        self.segformer.eval()
+
+    def forward(self, pixel_values):
+        # pixel_values: [batch, 3, height, width] - RGB image
+        with torch.no_grad():
+            outputs = self.segformer(pixel_values)
+        return outputs.logits
+
+
+class SegformerADEModel(EagerModelBase):
+    def __init__(self):
+        pass
+
+    def get_eager_model(self) -> torch.nn.Module:
+        logging.info("Loading SegFormer ADE model from HuggingFace")
+        model = SegformerWrapper("nvidia/segformer-b0-finetuned-ade-512-512")
+        model.eval()
+        logging.info("Loaded SegFormer ADE model")
+        return model
+
+    def get_example_inputs(self):
+        # SegFormer standard input size: 512x512 RGB image
+        tensor_size = (1, 3, 512, 512)
+        return (torch.randn(tensor_size),)
diff --git a/examples/models/swin2sr_2x/__init__.py b/examples/models/swin2sr_2x/__init__.py
@@ -0,0 +1,11 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+from .model import Swin2SR2xModel
+
+__all__ = [
+    "Swin2SR2xModel",
+]
diff --git a/examples/models/swin2sr_2x/model.py b/examples/models/swin2sr_2x/model.py
diff --git a/examples/models/trocr_handwritten/__init__.py b/examples/models/trocr_handwritten/__init__.py
diff --git a/examples/models/trocr_handwritten/model.py b/examples/models/trocr_handwritten/model.py
diff --git a/examples/models/xlsr/__init__.py b/examples/models/xlsr/__init__.py
diff --git a/examples/models/xlsr/model.py b/examples/models/xlsr/model.py
diff --git a/examples/xnnpack/__init__.py b/examples/xnnpack/__init__.py