diff --git a/.ci/scripts/test_model.sh b/.ci/scripts/test_model.sh index bc9bbb8bae0..cdcd0932690 100755 --- a/.ci/scripts/test_model.sh +++ b/.ci/scripts/test_model.sh @@ -317,8 +317,9 @@ elif [[ "${BACKEND}" == *"xnnpack"* ]]; then echo "Testing ${MODEL_NAME} with xnnpack..." WITH_QUANTIZATION=true WITH_DELEGATION=true - if [[ "$MODEL_NAME" == "mobilebert" ]]; then - # TODO(T197452682) + if [[ "$MODEL_NAME" == "mobilebert" || "$MODEL_NAME" == "albert" ]]; then + # TODO(https://github.com/pytorch/executorch/issues/12341) + # mobilebert, albert incompatible with XNNPACK quantization WITH_QUANTIZATION=false fi test_model_with_xnnpack "${WITH_QUANTIZATION}" "${WITH_DELEGATION}" diff --git a/.github/workflows/trunk.yml b/.github/workflows/trunk.yml index b90ef543133..52416724e5a 100644 --- a/.github/workflows/trunk.yml +++ b/.github/workflows/trunk.yml @@ -63,7 +63,7 @@ jobs: contents: read strategy: matrix: - model: [linear, add, add_mul, ic3, ic4, mv2, mv3, resnet18, resnet50, vit, w2l, mobilebert, emformer_join, emformer_transcribe] + model: [linear, add, add_mul, ic3, ic4, mv2, mv3, resnet18, resnet50, vit, w2l, mobilebert, emformer_join, emformer_transcribe, efficientnet_b4, detr_resnet50, segformer_ade, albert, wav2vec2] backend: [portable, xnnpack-quantization-delegation] runner: [linux.arm64.2xlarge] include: diff --git a/examples/models/__init__.py b/examples/models/__init__.py index 76469846608..1bfc4a3d556 100644 --- a/examples/models/__init__.py +++ b/examples/models/__init__.py @@ -37,6 +37,14 @@ class Model(str, Enum): EfficientSam = "efficient_sam" Qwen25 = "qwen2_5" Phi4Mini = "phi_4_mini" + EfficientNetB4 = "efficientnet_b4" + DetrResNet50 = "detr_resnet50" + SegformerADE = "segformer_ade" + Albert = "albert" + BiLSTM = "bilstm" + Swin2SR2x = "swin2sr_2x" + TrOCRHandwritten = "trocr_handwritten" + Wav2Vec2 = "wav2vec2" def __str__(self) -> str: return self.value @@ -82,6 +90,13 @@ def __str__(self) -> str: str(Model.EfficientSam): ("efficient_sam", "EfficientSAM"), str(Model.Qwen25): ("qwen2_5", "Qwen2_5Model"), str(Model.Phi4Mini): ("phi_4_mini", "Phi4MiniModel"), + str(Model.EfficientNetB4): ("efficientnet_b4", "EfficientNetB4Model"), + str(Model.DetrResNet50): ("detr_resnet50", "DetrResNet50Model"), + str(Model.SegformerADE): ("segformer_ade", "SegformerADEModel"), + str(Model.Albert): ("albert", "AlbertModelExample"), + str(Model.Swin2SR2x): ("swin2sr_2x", "Swin2SR2xModel"), + str(Model.TrOCRHandwritten): ("trocr_handwritten", "TrOCRHandwrittenModel"), + str(Model.Wav2Vec2): ("wav2vec2", "Wav2Vec2Model"), } __all__ = [ diff --git a/examples/models/albert/__init__.py b/examples/models/albert/__init__.py new file mode 100644 index 00000000000..70457453cbd --- /dev/null +++ b/examples/models/albert/__init__.py @@ -0,0 +1,11 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the BSD-style license found in the +# LICENSE file in the root directory of this source tree. + +from .model import AlbertModelExample + +__all__ = [ + "AlbertModelExample", +] diff --git a/examples/models/albert/model.py b/examples/models/albert/model.py new file mode 100644 index 00000000000..27c53890d6c --- /dev/null +++ b/examples/models/albert/model.py @@ -0,0 +1,30 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the BSD-style license found in the +# LICENSE file in the root directory of this source tree. + +import logging + +import torch + +from transformers import AlbertModel, AutoTokenizer # @manual + +from ..model_base import EagerModelBase + + +class AlbertModelExample(EagerModelBase): + def __init__(self): + pass + + def get_eager_model(self) -> torch.nn.Module: + logging.info("Loading ALBERT model") + # pyre-ignore + model = AlbertModel.from_pretrained("albert-base-v2", return_dict=False) + model.eval() + logging.info("Loaded ALBERT model") + return model + + def get_example_inputs(self): + tokenizer = AutoTokenizer.from_pretrained("albert-base-v2") + return (tokenizer("Hello, my dog is cute", return_tensors="pt")["input_ids"],) diff --git a/examples/models/detr_resnet50/__init__.py b/examples/models/detr_resnet50/__init__.py new file mode 100644 index 00000000000..916422ee03f --- /dev/null +++ b/examples/models/detr_resnet50/__init__.py @@ -0,0 +1,11 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the BSD-style license found in the +# LICENSE file in the root directory of this source tree. + +from .model import DetrResNet50Model + +__all__ = [ + "DetrResNet50Model", +] diff --git a/examples/models/detr_resnet50/model.py b/examples/models/detr_resnet50/model.py new file mode 100644 index 00000000000..a9a97df1dc7 --- /dev/null +++ b/examples/models/detr_resnet50/model.py @@ -0,0 +1,45 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the BSD-style license found in the +# LICENSE file in the root directory of this source tree. + +import logging + +import torch +from transformers import DetrForObjectDetection + +from ..model_base import EagerModelBase + + +class DetrWrapper(torch.nn.Module): + """Wrapper for HuggingFace DETR model to make it torch.export compatible""" + + def __init__(self, model_name="facebook/detr-resnet-50"): + super().__init__() + self.detr = DetrForObjectDetection.from_pretrained(model_name) + self.detr.eval() + + def forward(self, pixel_values): + # pixel_values: [batch, 3, height, width] - RGB image + with torch.no_grad(): + outputs = self.detr(pixel_values) + # Return logits and boxes for object detection + return outputs.logits, outputs.pred_boxes + + +class DetrResNet50Model(EagerModelBase): + def __init__(self): + pass + + def get_eager_model(self) -> torch.nn.Module: + logging.info("Loading DETR ResNet-50 model from HuggingFace") + model = DetrWrapper("facebook/detr-resnet-50") + model.eval() + logging.info("Loaded DETR ResNet-50 model") + return model + + def get_example_inputs(self): + # DETR standard input size: 800x800 RGB image (can handle various sizes) + tensor_size = (1, 3, 800, 800) + return (torch.randn(tensor_size),) diff --git a/examples/models/efficientnet_b4/__init__.py b/examples/models/efficientnet_b4/__init__.py new file mode 100644 index 00000000000..bef4f483e15 --- /dev/null +++ b/examples/models/efficientnet_b4/__init__.py @@ -0,0 +1,11 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the BSD-style license found in the +# LICENSE file in the root directory of this source tree. + +from .model import EfficientNetB4Model + +__all__ = [ + "EfficientNetB4Model", +] diff --git a/examples/models/efficientnet_b4/model.py b/examples/models/efficientnet_b4/model.py new file mode 100644 index 00000000000..bfd7a300b37 --- /dev/null +++ b/examples/models/efficientnet_b4/model.py @@ -0,0 +1,31 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the BSD-style license found in the +# LICENSE file in the root directory of this source tree. + +import logging + +import torch + +from torchvision.models import efficientnet_b4 # @manual +from torchvision.models.efficientnet import EfficientNet_B4_Weights + +from ..model_base import EagerModelBase + + +class EfficientNetB4Model(EagerModelBase): + def __init__(self): + pass + + def get_eager_model(self) -> torch.nn.Module: + logging.info("Loading EfficientNet-B4 model") + model = efficientnet_b4(weights=EfficientNet_B4_Weights.IMAGENET1K_V1) + model.eval() + logging.info("Loaded EfficientNet-B4 model") + return model + + def get_example_inputs(self): + # EfficientNet-B4 uses 380x380 input size + tensor_size = (1, 3, 380, 380) + return (torch.randn(tensor_size),) diff --git a/examples/models/segformer_ade/__init__.py b/examples/models/segformer_ade/__init__.py new file mode 100644 index 00000000000..352f47de94e --- /dev/null +++ b/examples/models/segformer_ade/__init__.py @@ -0,0 +1,11 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the BSD-style license found in the +# LICENSE file in the root directory of this source tree. + +from .model import SegformerADEModel + +__all__ = [ + "SegformerADEModel", +] diff --git a/examples/models/segformer_ade/model.py b/examples/models/segformer_ade/model.py new file mode 100644 index 00000000000..7cb97367bef --- /dev/null +++ b/examples/models/segformer_ade/model.py @@ -0,0 +1,44 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the BSD-style license found in the +# LICENSE file in the root directory of this source tree. + +import logging + +import torch +from transformers import SegformerForSemanticSegmentation + +from ..model_base import EagerModelBase + + +class SegformerWrapper(torch.nn.Module): + """Wrapper for HuggingFace SegFormer model to make it torch.export compatible""" + + def __init__(self, model_name="nvidia/segformer-b0-finetuned-ade-512-512"): + super().__init__() + self.segformer = SegformerForSemanticSegmentation.from_pretrained(model_name) + self.segformer.eval() + + def forward(self, pixel_values): + # pixel_values: [batch, 3, height, width] - RGB image + with torch.no_grad(): + outputs = self.segformer(pixel_values) + return outputs.logits + + +class SegformerADEModel(EagerModelBase): + def __init__(self): + pass + + def get_eager_model(self) -> torch.nn.Module: + logging.info("Loading SegFormer ADE model from HuggingFace") + model = SegformerWrapper("nvidia/segformer-b0-finetuned-ade-512-512") + model.eval() + logging.info("Loaded SegFormer ADE model") + return model + + def get_example_inputs(self): + # SegFormer standard input size: 512x512 RGB image + tensor_size = (1, 3, 512, 512) + return (torch.randn(tensor_size),) diff --git a/examples/models/swin2sr_2x/__init__.py b/examples/models/swin2sr_2x/__init__.py new file mode 100644 index 00000000000..2a761642029 --- /dev/null +++ b/examples/models/swin2sr_2x/__init__.py @@ -0,0 +1,11 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the BSD-style license found in the +# LICENSE file in the root directory of this source tree. + +from .model import Swin2SR2xModel + +__all__ = [ + "Swin2SR2xModel", +] diff --git a/examples/models/swin2sr_2x/model.py b/examples/models/swin2sr_2x/model.py new file mode 100644 index 00000000000..5263cdf663c --- /dev/null +++ b/examples/models/swin2sr_2x/model.py @@ -0,0 +1,44 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the BSD-style license found in the +# LICENSE file in the root directory of this source tree. + +import logging + +import torch +from transformers import Swin2SRForImageSuperResolution + +from ..model_base import EagerModelBase + + +class Swin2SRWrapper(torch.nn.Module): + """Wrapper for HuggingFace Swin2SR model to make it torch.export compatible""" + + def __init__(self, model_name="caidas/swin2SR-classical-sr-x2-64"): + super().__init__() + self.swin2sr = Swin2SRForImageSuperResolution.from_pretrained(model_name) + self.swin2sr.eval() + + def forward(self, pixel_values): + # pixel_values: [batch, 3, height, width] - RGB image + with torch.no_grad(): + outputs = self.swin2sr(pixel_values) + return outputs.reconstruction + + +class Swin2SR2xModel(EagerModelBase): + def __init__(self): + pass + + def get_eager_model(self) -> torch.nn.Module: + logging.info("Loading Swin2SR 2x model from HuggingFace") + model = Swin2SRWrapper("caidas/swin2SR-classical-sr-x2-64") + model.eval() + logging.info("Loaded Swin2SR 2x model") + return model + + def get_example_inputs(self): + # Swin2SR input size: 64x64 RGB image for 2x super-resolution + tensor_size = (1, 3, 64, 64) + return (torch.randn(tensor_size),) diff --git a/examples/models/trocr_handwritten/__init__.py b/examples/models/trocr_handwritten/__init__.py new file mode 100644 index 00000000000..57880691a30 --- /dev/null +++ b/examples/models/trocr_handwritten/__init__.py @@ -0,0 +1,11 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the BSD-style license found in the +# LICENSE file in the root directory of this source tree. + +from .model import TrOCRHandwrittenModel + +__all__ = [ + "TrOCRHandwrittenModel", +] diff --git a/examples/models/trocr_handwritten/model.py b/examples/models/trocr_handwritten/model.py new file mode 100644 index 00000000000..1975d953e78 --- /dev/null +++ b/examples/models/trocr_handwritten/model.py @@ -0,0 +1,45 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the BSD-style license found in the +# LICENSE file in the root directory of this source tree. + +import logging + +import torch +from transformers import VisionEncoderDecoderModel + +from ..model_base import EagerModelBase + + +class TrOCRWrapper(torch.nn.Module): + """Wrapper for HuggingFace TrOCR model to make it torch.export compatible""" + + def __init__(self, model_name="microsoft/trocr-base-handwritten"): + super().__init__() + self.trocr = VisionEncoderDecoderModel.from_pretrained(model_name) + self.trocr.eval() + + def forward(self, pixel_values): + # pixel_values: [batch, 3, height, width] - RGB image + with torch.no_grad(): + # Generate text from image + generated_ids = self.trocr.generate(pixel_values, max_length=50) + return generated_ids + + +class TrOCRHandwrittenModel(EagerModelBase): + def __init__(self): + pass + + def get_eager_model(self) -> torch.nn.Module: + logging.info("Loading TrOCR handwritten model from HuggingFace") + model = TrOCRWrapper("microsoft/trocr-base-handwritten") + model.eval() + logging.info("Loaded TrOCR handwritten model") + return model + + def get_example_inputs(self): + # TrOCR input: 384x384 RGB text image + pixel_values = torch.randn(1, 3, 384, 384) + return (pixel_values,) diff --git a/examples/models/wav2vec2/__init__.py b/examples/models/wav2vec2/__init__.py new file mode 100644 index 00000000000..621c476f7b8 --- /dev/null +++ b/examples/models/wav2vec2/__init__.py @@ -0,0 +1,9 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the BSD-style license found in the +# LICENSE file in the root directory of this source tree. + +from .model import Wav2Vec2Model + +__all__ = ["Wav2Vec2Model"] diff --git a/examples/models/wav2vec2/model.py b/examples/models/wav2vec2/model.py new file mode 100644 index 00000000000..6ee2564880a --- /dev/null +++ b/examples/models/wav2vec2/model.py @@ -0,0 +1,44 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the BSD-style license found in the +# LICENSE file in the root directory of this source tree. + +import logging + +import torch +from transformers import Wav2Vec2Model + +from ..model_base import EagerModelBase + + +class Wav2Vec2Wrapper(torch.nn.Module): + """Wrapper for HuggingFace Wav2Vec2 model to make it torch.export compatible""" + + def __init__(self, model_name="facebook/wav2vec2-base-960h"): + super().__init__() + self.wav2vec2 = Wav2Vec2Model.from_pretrained(model_name) + self.wav2vec2.eval() + + def forward(self, input_values): + # input_values: [batch, sequence_length] - raw audio waveform + with torch.no_grad(): + outputs = self.wav2vec2(input_values) + return outputs.last_hidden_state + + +class Wav2Vec2Model(EagerModelBase): + def __init__(self): + pass + + def get_eager_model(self) -> torch.nn.Module: + logging.info("Loading Wav2Vec2 model from HuggingFace") + model = Wav2Vec2Wrapper("facebook/wav2vec2-base-960h") + model.eval() + logging.info("Loaded Wav2Vec2 model") + return model + + def get_example_inputs(self): + # Raw audio input: 1 second of 16kHz audio + input_values = torch.randn(1, 16000) + return (input_values,) diff --git a/examples/xnnpack/__init__.py b/examples/xnnpack/__init__.py index e78e1fec5be..1c914305158 100644 --- a/examples/xnnpack/__init__.py +++ b/examples/xnnpack/__init__.py @@ -45,6 +45,13 @@ class XNNPACKOptions(object): "emformer_join": XNNPACKOptions(QuantType.DYNAMIC_PER_CHANNEL, True), "emformer_predict": XNNPACKOptions(QuantType.DYNAMIC_PER_CHANNEL, True), "emformer_transcribe": XNNPACKOptions(QuantType.STATIC_PER_CHANNEL, True), + "efficientnet_b4": XNNPACKOptions(QuantType.STATIC_PER_CHANNEL, True), + "detr_resnet50": XNNPACKOptions(QuantType.STATIC_PER_CHANNEL, True), + "segformer_ade": XNNPACKOptions(QuantType.STATIC_PER_CHANNEL, True), + "swin2sr_2x": XNNPACKOptions(QuantType.STATIC_PER_CHANNEL, True), + "albert": XNNPACKOptions(QuantType.DYNAMIC_PER_CHANNEL, True), + "trocr_handwritten": XNNPACKOptions(QuantType.STATIC_PER_CHANNEL, True), + "wav2vec2": XNNPACKOptions(QuantType.DYNAMIC_PER_CHANNEL, True), }