Skip to content

Add more models as part of GA models #12340

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 1 commit into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 3 additions & 2 deletions .ci/scripts/test_model.sh
Original file line number Diff line number Diff line change
Expand Up @@ -317,8 +317,9 @@ elif [[ "${BACKEND}" == *"xnnpack"* ]]; then
echo "Testing ${MODEL_NAME} with xnnpack..."
WITH_QUANTIZATION=true
WITH_DELEGATION=true
if [[ "$MODEL_NAME" == "mobilebert" ]]; then
# TODO(T197452682)
if [[ "$MODEL_NAME" == "mobilebert" || "$MODEL_NAME" == "albert" ]]; then
# TODO(https://github.com/pytorch/executorch/issues/12341)
# mobilebert, albert incompatible with XNNPACK quantization
WITH_QUANTIZATION=false
fi
test_model_with_xnnpack "${WITH_QUANTIZATION}" "${WITH_DELEGATION}"
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/trunk.yml
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@ jobs:
contents: read
strategy:
matrix:
model: [linear, add, add_mul, ic3, ic4, mv2, mv3, resnet18, resnet50, vit, w2l, mobilebert, emformer_join, emformer_transcribe]
model: [linear, add, add_mul, ic3, ic4, mv2, mv3, resnet18, resnet50, vit, w2l, mobilebert, emformer_join, emformer_transcribe, efficientnet_b4, detr_resnet50, segformer_ade, albert, wav2vec2]
backend: [portable, xnnpack-quantization-delegation]
runner: [linux.arm64.2xlarge]
include:
Expand Down
15 changes: 15 additions & 0 deletions examples/models/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,14 @@ class Model(str, Enum):
EfficientSam = "efficient_sam"
Qwen25 = "qwen2_5"
Phi4Mini = "phi_4_mini"
EfficientNetB4 = "efficientnet_b4"
DetrResNet50 = "detr_resnet50"
SegformerADE = "segformer_ade"
Albert = "albert"
BiLSTM = "bilstm"
Swin2SR2x = "swin2sr_2x"
TrOCRHandwritten = "trocr_handwritten"
Wav2Vec2 = "wav2vec2"

def __str__(self) -> str:
return self.value
Expand Down Expand Up @@ -82,6 +90,13 @@ def __str__(self) -> str:
str(Model.EfficientSam): ("efficient_sam", "EfficientSAM"),
str(Model.Qwen25): ("qwen2_5", "Qwen2_5Model"),
str(Model.Phi4Mini): ("phi_4_mini", "Phi4MiniModel"),
str(Model.EfficientNetB4): ("efficientnet_b4", "EfficientNetB4Model"),
str(Model.DetrResNet50): ("detr_resnet50", "DetrResNet50Model"),
str(Model.SegformerADE): ("segformer_ade", "SegformerADEModel"),
str(Model.Albert): ("albert", "AlbertModelExample"),
str(Model.Swin2SR2x): ("swin2sr_2x", "Swin2SR2xModel"),
str(Model.TrOCRHandwritten): ("trocr_handwritten", "TrOCRHandwrittenModel"),
str(Model.Wav2Vec2): ("wav2vec2", "Wav2Vec2Model"),
}

__all__ = [
Expand Down
11 changes: 11 additions & 0 deletions examples/models/albert/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under the BSD-style license found in the
# LICENSE file in the root directory of this source tree.

from .model import AlbertModelExample

__all__ = [
"AlbertModelExample",
]
30 changes: 30 additions & 0 deletions examples/models/albert/model.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under the BSD-style license found in the
# LICENSE file in the root directory of this source tree.

import logging

import torch

from transformers import AlbertModel, AutoTokenizer # @manual

from ..model_base import EagerModelBase


class AlbertModelExample(EagerModelBase):
def __init__(self):
pass

def get_eager_model(self) -> torch.nn.Module:
logging.info("Loading ALBERT model")
# pyre-ignore
model = AlbertModel.from_pretrained("albert-base-v2", return_dict=False)
model.eval()
logging.info("Loaded ALBERT model")
return model

def get_example_inputs(self):
tokenizer = AutoTokenizer.from_pretrained("albert-base-v2")
return (tokenizer("Hello, my dog is cute", return_tensors="pt")["input_ids"],)
11 changes: 11 additions & 0 deletions examples/models/detr_resnet50/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under the BSD-style license found in the
# LICENSE file in the root directory of this source tree.

from .model import DetrResNet50Model

__all__ = [
"DetrResNet50Model",
]
45 changes: 45 additions & 0 deletions examples/models/detr_resnet50/model.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under the BSD-style license found in the
# LICENSE file in the root directory of this source tree.

import logging

import torch
from transformers import DetrForObjectDetection

from ..model_base import EagerModelBase


class DetrWrapper(torch.nn.Module):
"""Wrapper for HuggingFace DETR model to make it torch.export compatible"""

def __init__(self, model_name="facebook/detr-resnet-50"):
super().__init__()
self.detr = DetrForObjectDetection.from_pretrained(model_name)
self.detr.eval()

def forward(self, pixel_values):
# pixel_values: [batch, 3, height, width] - RGB image
with torch.no_grad():
outputs = self.detr(pixel_values)
# Return logits and boxes for object detection
return outputs.logits, outputs.pred_boxes


class DetrResNet50Model(EagerModelBase):
def __init__(self):
pass

def get_eager_model(self) -> torch.nn.Module:
logging.info("Loading DETR ResNet-50 model from HuggingFace")
model = DetrWrapper("facebook/detr-resnet-50")
model.eval()
logging.info("Loaded DETR ResNet-50 model")
return model

def get_example_inputs(self):
# DETR standard input size: 800x800 RGB image (can handle various sizes)
tensor_size = (1, 3, 800, 800)
return (torch.randn(tensor_size),)
11 changes: 11 additions & 0 deletions examples/models/efficientnet_b4/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under the BSD-style license found in the
# LICENSE file in the root directory of this source tree.

from .model import EfficientNetB4Model

__all__ = [
"EfficientNetB4Model",
]
31 changes: 31 additions & 0 deletions examples/models/efficientnet_b4/model.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under the BSD-style license found in the
# LICENSE file in the root directory of this source tree.

import logging

import torch

from torchvision.models import efficientnet_b4 # @manual
from torchvision.models.efficientnet import EfficientNet_B4_Weights

from ..model_base import EagerModelBase


class EfficientNetB4Model(EagerModelBase):
def __init__(self):
pass

def get_eager_model(self) -> torch.nn.Module:
logging.info("Loading EfficientNet-B4 model")
model = efficientnet_b4(weights=EfficientNet_B4_Weights.IMAGENET1K_V1)
model.eval()
logging.info("Loaded EfficientNet-B4 model")
return model

def get_example_inputs(self):
# EfficientNet-B4 uses 380x380 input size
tensor_size = (1, 3, 380, 380)
return (torch.randn(tensor_size),)
11 changes: 11 additions & 0 deletions examples/models/segformer_ade/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under the BSD-style license found in the
# LICENSE file in the root directory of this source tree.

from .model import SegformerADEModel

__all__ = [
"SegformerADEModel",
]
44 changes: 44 additions & 0 deletions examples/models/segformer_ade/model.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under the BSD-style license found in the
# LICENSE file in the root directory of this source tree.

import logging

import torch
from transformers import SegformerForSemanticSegmentation

from ..model_base import EagerModelBase


class SegformerWrapper(torch.nn.Module):
"""Wrapper for HuggingFace SegFormer model to make it torch.export compatible"""

def __init__(self, model_name="nvidia/segformer-b0-finetuned-ade-512-512"):
super().__init__()
self.segformer = SegformerForSemanticSegmentation.from_pretrained(model_name)
self.segformer.eval()

def forward(self, pixel_values):
# pixel_values: [batch, 3, height, width] - RGB image
with torch.no_grad():
outputs = self.segformer(pixel_values)
return outputs.logits


class SegformerADEModel(EagerModelBase):
def __init__(self):
pass

def get_eager_model(self) -> torch.nn.Module:
logging.info("Loading SegFormer ADE model from HuggingFace")
model = SegformerWrapper("nvidia/segformer-b0-finetuned-ade-512-512")
model.eval()
logging.info("Loaded SegFormer ADE model")
return model

def get_example_inputs(self):
# SegFormer standard input size: 512x512 RGB image
tensor_size = (1, 3, 512, 512)
return (torch.randn(tensor_size),)
11 changes: 11 additions & 0 deletions examples/models/swin2sr_2x/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under the BSD-style license found in the
# LICENSE file in the root directory of this source tree.

from .model import Swin2SR2xModel

__all__ = [
"Swin2SR2xModel",
]
44 changes: 44 additions & 0 deletions examples/models/swin2sr_2x/model.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under the BSD-style license found in the
# LICENSE file in the root directory of this source tree.

import logging

import torch
from transformers import Swin2SRForImageSuperResolution

from ..model_base import EagerModelBase


class Swin2SRWrapper(torch.nn.Module):
"""Wrapper for HuggingFace Swin2SR model to make it torch.export compatible"""

def __init__(self, model_name="caidas/swin2SR-classical-sr-x2-64"):
super().__init__()
self.swin2sr = Swin2SRForImageSuperResolution.from_pretrained(model_name)
self.swin2sr.eval()

def forward(self, pixel_values):
# pixel_values: [batch, 3, height, width] - RGB image
with torch.no_grad():
outputs = self.swin2sr(pixel_values)
return outputs.reconstruction


class Swin2SR2xModel(EagerModelBase):
def __init__(self):
pass

def get_eager_model(self) -> torch.nn.Module:
logging.info("Loading Swin2SR 2x model from HuggingFace")
model = Swin2SRWrapper("caidas/swin2SR-classical-sr-x2-64")
model.eval()
logging.info("Loaded Swin2SR 2x model")
return model

def get_example_inputs(self):
# Swin2SR input size: 64x64 RGB image for 2x super-resolution
tensor_size = (1, 3, 64, 64)
return (torch.randn(tensor_size),)
11 changes: 11 additions & 0 deletions examples/models/trocr_handwritten/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under the BSD-style license found in the
# LICENSE file in the root directory of this source tree.

from .model import TrOCRHandwrittenModel

__all__ = [
"TrOCRHandwrittenModel",
]
45 changes: 45 additions & 0 deletions examples/models/trocr_handwritten/model.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under the BSD-style license found in the
# LICENSE file in the root directory of this source tree.

import logging

import torch
from transformers import VisionEncoderDecoderModel

from ..model_base import EagerModelBase


class TrOCRWrapper(torch.nn.Module):
"""Wrapper for HuggingFace TrOCR model to make it torch.export compatible"""

def __init__(self, model_name="microsoft/trocr-base-handwritten"):
super().__init__()
self.trocr = VisionEncoderDecoderModel.from_pretrained(model_name)
self.trocr.eval()

def forward(self, pixel_values):
# pixel_values: [batch, 3, height, width] - RGB image
with torch.no_grad():
# Generate text from image
generated_ids = self.trocr.generate(pixel_values, max_length=50)
return generated_ids


class TrOCRHandwrittenModel(EagerModelBase):
def __init__(self):
pass

def get_eager_model(self) -> torch.nn.Module:
logging.info("Loading TrOCR handwritten model from HuggingFace")
model = TrOCRWrapper("microsoft/trocr-base-handwritten")
model.eval()
logging.info("Loaded TrOCR handwritten model")
return model

def get_example_inputs(self):
# TrOCR input: 384x384 RGB text image
pixel_values = torch.randn(1, 3, 384, 384)
return (pixel_values,)
9 changes: 9 additions & 0 deletions examples/models/wav2vec2/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under the BSD-style license found in the
# LICENSE file in the root directory of this source tree.

from .model import Wav2Vec2Model

__all__ = ["Wav2Vec2Model"]
Loading
Loading