Skip to content

Qualcomm AI Engine Direct - GA FocalNet #11097

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 1 commit into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion backends/qualcomm/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -253,7 +253,7 @@ if(${CMAKE_SYSTEM_PROCESSOR} MATCHES "x86_64")

pybind11_extension(PyQnnManagerAdaptor)
pybind11_extension(PyQnnWrapperAdaptor)
if(NOT MSVC AND NOT ${CMAKE_BUILD_TYPE} MATCHES Debug|RelWithDebInfo)
if(NOT MSVC AND NOT ${CMAKE_BUILD_TYPE} MATCHES RelWithDebInfo)
# Strip unnecessary sections of the binary
pybind11_strip(PyQnnManagerAdaptor)
pybind11_strip(PyQnnWrapperAdaptor)
Expand Down
2 changes: 2 additions & 0 deletions backends/qualcomm/_passes/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
# This source code is licensed under the BSD-style license found in the
# LICENSE file in the root directory of this source tree.

from .annotate_adaptive_avg_pool1d import AnnotateAdaptiveAvgPool1D
from .annotate_quant_attrs import AnnotateQuantAttrs
from .annotate_stack import AnnotateStack
from .annotate_unbind import AnnotateUnbind
Expand Down Expand Up @@ -38,6 +39,7 @@


__all__ = [
AnnotateAdaptiveAvgPool1D,
AnnotateQuantAttrs,
AnnotateStack,
AnnotateUnbind,
Expand Down
43 changes: 43 additions & 0 deletions backends/qualcomm/_passes/annotate_adaptive_avg_pool1d.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
# Copyright (c) Qualcomm Innovation Center, Inc.
# All rights reserved
#
# This source code is licensed under the BSD-style license found in the
# LICENSE file in the root directory of this source tree.
import torch
from executorch.backends.qualcomm.builders.node_visitor import q_ops
from executorch.backends.qualcomm.utils.constants import QCOM_QUANT_ATTRS
from executorch.exir.pass_base import ExportPass, PassResult
from torch.fx.passes.utils.source_matcher_utils import get_source_partitions

from .utils import get_quant_attrs


class AnnotateAdaptiveAvgPool1D(ExportPass):
"""
Add "quant_attrs" to graph nodes' meta from the QDQ information
generated after quantization process.
adaptive_avg_pool1d got decomposed to unsqueeze -> adaptive_avg_pool2d -> squeeze
"""

def __init__(self, edge_program: torch.export.ExportedProgram):
super(AnnotateAdaptiveAvgPool1D, self).__init__()
self.edge_program = edge_program

def _annotate_adaptive_avg_pool1d(self, graph_module: torch.fx.GraphModule):
partitions = get_source_partitions(
graph_module.graph, [torch.ops.aten.adaptive_avg_pool1d.default]
)
for src_partitions in partitions.values():
for src_partition in src_partitions:
output = src_partition.output_nodes[0]
if (list(output.users)[0].target) in q_ops:
quant_attrs = get_quant_attrs(
self.edge_program, list(output.users)[0]
)
for n in src_partition.nodes:
n.meta[QCOM_QUANT_ATTRS] = quant_attrs.copy()

def call(self, graph_module: torch.fx.GraphModule):
self._annotate_adaptive_avg_pool1d(graph_module)
graph_module.recompile()
return PassResult(graph_module, True)
3 changes: 2 additions & 1 deletion backends/qualcomm/_passes/annotate_quant_attrs.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
from typing import Any, Dict

import torch
from executorch.backends.qualcomm.builders.node_visitor import dq_ops, q_ops
from executorch.backends.qualcomm.builders.utils import get_parameter
from executorch.backends.qualcomm.utils.constants import (
QCOM_DTYPE,
Expand All @@ -20,7 +21,7 @@
)
from executorch.exir.pass_base import ExportPass, PassResult

from .utils import dq_ops, get_quant_attrs, q_ops
from .utils import get_quant_attrs


class AnnotateQuantAttrs(ExportPass):
Expand Down
5 changes: 3 additions & 2 deletions backends/qualcomm/_passes/annotate_stack.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,11 +4,12 @@
# This source code is licensed under the BSD-style license found in the
# LICENSE file in the root directory of this source tree.
import torch
from executorch.backends.qualcomm.builders.node_visitor import q_ops
from executorch.backends.qualcomm.utils.constants import QCOM_QUANT_ATTRS
from executorch.exir.pass_base import ExportPass, PassResult
from torch.fx.passes.utils.source_matcher_utils import get_source_partitions

from .utils import get_quant_attrs, q_ops
from .utils import get_quant_attrs


class AnnotateStack(ExportPass):
Expand All @@ -27,7 +28,7 @@ def _annotate_stack(self, graph_module: torch.fx.GraphModule):
partitions = get_source_partitions(
graph_module.graph, [torch.stack, torch.ops.aten.stack.default, "stack"]
)
for _, src_partitions in partitions.items():
for src_partitions in partitions.values():
for src_partition in src_partitions:
output = src_partition.output_nodes[0]
if (list(output.users)[0].target) in q_ops:
Expand Down
6 changes: 4 additions & 2 deletions backends/qualcomm/_passes/annotate_unbind.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,11 +4,13 @@
# This source code is licensed under the BSD-style license found in the
# LICENSE file in the root directory of this source tree.
import torch

from executorch.backends.qualcomm.builders.node_visitor import dq_ops
from executorch.backends.qualcomm.utils.constants import QCOM_QUANT_ATTRS
from executorch.exir.pass_base import ExportPass, PassResult
from torch.fx.passes.utils.source_matcher_utils import get_source_partitions

from .utils import dq_ops, get_quant_attrs
from .utils import get_quant_attrs


class AnnotateUnbind(ExportPass):
Expand All @@ -27,7 +29,7 @@ def _annotate_unbind(self, graph_module: torch.fx.GraphModule):
partitions = get_source_partitions(
graph_module.graph, [torch.unbind, torch.ops.aten.unbind.int, "unbind"]
)
for _, src_partitions in partitions.items():
for src_partitions in partitions.values():
for src_partition in src_partitions:
if src_partition.input_nodes[0].target in dq_ops:
q_node = src_partition.input_nodes[0].args[0]
Expand Down
4 changes: 2 additions & 2 deletions backends/qualcomm/_passes/expand_broadcast_tensor_shape.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,12 +5,12 @@
# LICENSE file in the root directory of this source tree.

import torch

from executorch.backends.qualcomm.builders.node_visitor import dq_ops
from executorch.exir.dialects._ops import ops as exir_ops
from executorch.exir.pass_base import ExportPass, PassResult
from executorch.exir.passes import dead_code_elimination_pass

from .utils import dq_ops


class ExpandBroadcastTensorShape(ExportPass):
"""
Expand Down
4 changes: 2 additions & 2 deletions backends/qualcomm/_passes/fold_qdq.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,14 +4,14 @@
# This source code is licensed under the BSD-style license found in the
# LICENSE file in the root directory of this source tree.
import torch

from executorch.backends.qualcomm.builders.node_visitor import dq_ops, q_ops
from executorch.backends.qualcomm.builders.utils import is_parameter
from executorch.backends.qualcomm.utils.constants import QCOM_BYPASS_NODE
from executorch.exir.dialects._ops import ops as exir_ops
from executorch.exir.pass_base import ExportPass, PassResult
from executorch.exir.passes import dead_code_elimination_pass

from .utils import dq_ops, q_ops


class FoldQDQ(ExportPass):
"""
Expand Down
4 changes: 2 additions & 2 deletions backends/qualcomm/_passes/insert_io_qdq.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,8 @@

import torch

from executorch.backends.qualcomm.builders.node_visitor import q_ops

from executorch.backends.qualcomm.builders.utils import is_parameter
from executorch.backends.qualcomm.utils.constants import (
QCOM_ENCODING,
Expand All @@ -16,8 +18,6 @@
from executorch.exir.dialects._ops import ops as exir_ops
from executorch.exir.pass_base import ExportPass, PassResult

from .utils import q_ops


class InsertIOQDQ(ExportPass):
"""
Expand Down
8 changes: 5 additions & 3 deletions backends/qualcomm/_passes/qnn_pass_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
from typing import Dict

from executorch.backends.qualcomm._passes import (
AnnotateAdaptiveAvgPool1D,
AnnotateQuantAttrs,
AnnotateStack,
AnnotateUnbind,
Expand Down Expand Up @@ -73,6 +74,7 @@ def get_capture_program_passes():
# The second value in each tuple in `default_passes_and_setting` indicates whether the corresponding pass is activated by default.
# If a pass is activated, it will be executed by default.
default_passes_and_setting = [
(AnnotateAdaptiveAvgPool1D, True),
(AnnotateQuantAttrs, True),
(AnnotateStack, True),
(AnnotateUnbind, True),
Expand Down Expand Up @@ -128,11 +130,11 @@ def get_to_edge_transform_passes(
dep_table: Dict = None,
):
# TODO: remove this workaround when target could be correctly detected
from executorch.backends.qualcomm._passes import utils
from executorch.backends.qualcomm.builders import node_visitor
from executorch.exir.dialects._ops import ops as exir_ops

utils.q_ops.add(exir_ops.edge.pt2e_quant.quantize_affine.default)
utils.dq_ops.add(exir_ops.edge.pt2e_quant.dequantize_affine.default)
node_visitor.q_ops.add(exir_ops.edge.pt2e_quant.quantize_affine.default)
node_visitor.dq_ops.add(exir_ops.edge.pt2e_quant.dequantize_affine.default)

passes_job = (
passes_job if passes_job is not None else get_capture_program_passes()
Expand Down
4 changes: 2 additions & 2 deletions backends/qualcomm/_passes/recompose_rms_norm.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,13 +4,13 @@
# This source code is licensed under the BSD-style license found in the
# LICENSE file in the root directory of this source tree.
import torch

from executorch.backends.qualcomm.builders.node_visitor import dq_ops
from executorch.backends.qualcomm.builders.utils import get_parameter, is_parameter
from executorch.exir.dialects._ops import ops as exir_ops
from executorch.exir.pass_base import ExportPass, PassResult
from torch.fx.passes.utils.source_matcher_utils import get_source_partitions

from .utils import dq_ops


class RecomposeRmsNorm(ExportPass):
"""
Expand Down
15 changes: 2 additions & 13 deletions backends/qualcomm/_passes/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,19 +13,6 @@
from torch._subclasses import FakeTensor


q_ops = {
exir_ops.edge.quantized_decomposed.quantize_per_channel.default,
exir_ops.edge.quantized_decomposed.quantize_per_tensor.default,
exir_ops.edge.quantized_decomposed.quantize_per_tensor.tensor,
}

dq_ops = {
exir_ops.edge.quantized_decomposed.dequantize_per_tensor.default,
exir_ops.edge.quantized_decomposed.dequantize_per_tensor.tensor,
exir_ops.edge.quantized_decomposed.dequantize_per_channel.default,
}


def copy_meta(meta: Dict, callback=None):
copied = {}
for k, v in meta.items():
Expand Down Expand Up @@ -73,6 +60,7 @@ def get_passes_dependency_for_capture_program():
dict: A dictionary mapping each pass to its corresponding list of dependencies.
"""
from executorch.backends.qualcomm._passes import (
AnnotateAdaptiveAvgPool1D,
AnnotateQuantAttrs,
AnnotateStack,
AnnotateUnbind,
Expand All @@ -94,6 +82,7 @@ def get_passes_dependency_for_capture_program():
)

return {
AnnotateAdaptiveAvgPool1D: [RemoveRedundancy],
AnnotateQuantAttrs: [
RecomposePixelUnshuffle,
ConvertBmmToMatmul,
Expand Down
13 changes: 12 additions & 1 deletion backends/qualcomm/builders/node_visitor.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,6 @@

import numpy as np
import torch
from executorch.backends.qualcomm._passes.utils import dq_ops
from executorch.backends.qualcomm.utils.constants import (
QCOM_AXIS,
QCOM_AXIS_ORDER,
Expand Down Expand Up @@ -79,6 +78,18 @@
exir_ops.edge.quantized_decomposed.dequantize_per_tensor.tensor,
}

q_ops = {
exir_ops.edge.quantized_decomposed.quantize_per_channel.default,
exir_ops.edge.quantized_decomposed.quantize_per_tensor.default,
exir_ops.edge.quantized_decomposed.quantize_per_tensor.tensor,
}

dq_ops = {
exir_ops.edge.quantized_decomposed.dequantize_per_tensor.default,
exir_ops.edge.quantized_decomposed.dequantize_per_tensor.tensor,
exir_ops.edge.quantized_decomposed.dequantize_per_channel.default,
}


class NodeVisitor:
"""
Expand Down
17 changes: 14 additions & 3 deletions backends/qualcomm/quantizer/annotators.py
Original file line number Diff line number Diff line change
Expand Up @@ -462,8 +462,13 @@ def annotate_neg(node: Node, quantization_config: QuantizationConfig) -> None:
annotate_single_in_single_out(node, quantization_config)


@register_annotator([torch.ops.aten.adaptive_avg_pool2d.default])
def annotate_adaptive_avgpool2d(
@register_annotator(
[
torch.ops.aten.adaptive_avg_pool1d.default,
torch.ops.aten.adaptive_avg_pool2d.default,
]
)
def annotate_adaptive_avg_pool(
node: Node, quantization_config: QuantizationConfig
) -> None:
annotate_single_in_single_out(node, quantization_config)
Expand Down Expand Up @@ -1170,7 +1175,13 @@ def annotate_unbind(node: Node, quantization_config: QuantizationConfig) -> None
)


@register_annotator([torch.ops.aten.split.Tensor, torch.ops.aten.chunk.default])
@register_annotator(
[
torch.ops.aten.split_with_sizes.default,
torch.ops.aten.split.Tensor,
torch.ops.aten.chunk.default,
]
)
def annotate_chunk(node: Node, quantization_config: QuantizationConfig) -> None:
if _is_annotated([node]):
return
Expand Down
6 changes: 3 additions & 3 deletions backends/qualcomm/scripts/build.sh
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ CMAKE_X86_64="build-x86"
BUILD_AARCH64="true"
CMAKE_AARCH64="build-android"
CLEAN="true"
BUILD_TYPE="Debug"
BUILD_TYPE="RelWithDebInfo"
BUILD_JOB_NUMBER="16"

if [ -z PYTHON_EXECUTABLE ]; then
Expand Down Expand Up @@ -71,7 +71,7 @@ if [ "$BUILD_AARCH64" = true ]; then
rm -rf $BUILD_ROOT && mkdir $BUILD_ROOT
else
# Force rebuild flatccrt for the correct platform
cd $BUILD_ROOT/devtools && make clean
cd $BUILD_ROOT/third-party/flatcc && make clean
fi

cd $BUILD_ROOT
Expand Down Expand Up @@ -116,7 +116,7 @@ if [ "$BUILD_X86_64" = true ]; then
rm -rf $BUILD_ROOT && mkdir $BUILD_ROOT
else
# Force rebuild flatccrt for the correct platform
cd $BUILD_ROOT/devtools && make clean
cd $BUILD_ROOT/third-party/flatcc && make clean
fi

cd $BUILD_ROOT
Expand Down
38 changes: 38 additions & 0 deletions backends/qualcomm/tests/test_qnn_delegate.py
Original file line number Diff line number Diff line change
Expand Up @@ -3868,6 +3868,44 @@ def test_fbnet(self):
self.assertGreaterEqual(msg["top_1"], 60)
self.assertGreaterEqual(msg["top_5"], 90)

def test_focalnet(self):
if not self.required_envs([self.image_dataset]):
self.skipTest("missing required envs")

cmds = [
"python",
f"{self.executorch_root}/examples/qualcomm/oss_scripts/focalnet.py",
"--dataset",
self.image_dataset,
"--artifact",
self.artifact_dir,
"--build_folder",
self.build_folder,
"--device",
self.device,
"--model",
self.model,
"--ip",
self.ip,
"--port",
str(self.port),
]
if self.host:
cmds.extend(["--host", self.host])
if self.shared_buffer:
cmds.extend(["--shared_buffer"])

p = subprocess.Popen(cmds, stdout=subprocess.DEVNULL)
with Listener((self.ip, self.port)) as listener:
conn = listener.accept()
p.communicate()
msg = json.loads(conn.recv())
if "Error" in msg:
self.fail(msg["Error"])
else:
self.assertGreaterEqual(msg["top_1"], 55)
self.assertGreaterEqual(msg["top_5"], 80)

def test_gMLP(self):
if not self.required_envs([self.image_dataset]):
self.skipTest("missing required envs")
Expand Down
Loading
Loading