Merge branch 'main' into export-D72726244

JakeStevens · web-flow · commit 71e92329994d · 2025-07-11T14:05:36.000-04:00
diff --git a/examples/demo-apps/apple_ios/LLaMA/LLaMA.xcodeproj/project.pbxproj b/examples/demo-apps/apple_ios/LLaMA/LLaMA.xcodeproj/project.pbxproj
@@ -64,6 +64,7 @@
 		306A71512DC1DC3D00936B1F /* pre_tokenizer.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 306A71472DC1DC3D00936B1F /* pre_tokenizer.cpp */; };
 		306A71522DC1DC3D00936B1F /* token_decoder.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 306A714B2DC1DC3D00936B1F /* token_decoder.cpp */; };
 		3072D5232DC3EA280083FC83 /* Constants.swift in Sources */ = {isa = PBXBuildFile; fileRef = 3072D5222DC3EA280083FC83 /* Constants.swift */; };
+		F24909E82E207004001E5B69 /* normalizer.cpp in Sources */ = {isa = PBXBuildFile; fileRef = F24909E72E207004001E5B69 /* normalizer.cpp */; };
 		F292B0752D88B0C200BE6839 /* tiktoken.cpp in Sources */ = {isa = PBXBuildFile; fileRef = F292B06F2D88B0C200BE6839 /* tiktoken.cpp */; };
 		F292B0762D88B0C200BE6839 /* llama2c_tokenizer.cpp in Sources */ = {isa = PBXBuildFile; fileRef = F292B06C2D88B0C200BE6839 /* llama2c_tokenizer.cpp */; };
 		F292B0772D88B0C200BE6839 /* bpe_tokenizer_base.cpp in Sources */ = {isa = PBXBuildFile; fileRef = F292B06A2D88B0C200BE6839 /* bpe_tokenizer_base.cpp */; };
@@ -152,6 +153,7 @@
 		306A714A2DC1DC3D00936B1F /* std_regex.cpp */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.cpp; name = std_regex.cpp; path = src/std_regex.cpp; sourceTree = "<group>"; };
 		306A714B2DC1DC3D00936B1F /* token_decoder.cpp */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.cpp; name = token_decoder.cpp; path = src/token_decoder.cpp; sourceTree = "<group>"; };
 		3072D5222DC3EA280083FC83 /* Constants.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = Constants.swift; sourceTree = "<group>"; };
+		F24909E72E207004001E5B69 /* normalizer.cpp */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.cpp; name = normalizer.cpp; path = src/normalizer.cpp; sourceTree = "<group>"; };
 		F292B06A2D88B0C200BE6839 /* bpe_tokenizer_base.cpp */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.cpp; name = bpe_tokenizer_base.cpp; path = src/bpe_tokenizer_base.cpp; sourceTree = "<group>"; };
 		F292B06C2D88B0C200BE6839 /* llama2c_tokenizer.cpp */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.cpp; name = llama2c_tokenizer.cpp; path = src/llama2c_tokenizer.cpp; sourceTree = "<group>"; };
 		F292B06F2D88B0C200BE6839 /* tiktoken.cpp */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.cpp; name = tiktoken.cpp; path = src/tiktoken.cpp; sourceTree = "<group>"; };
@@ -309,6 +311,7 @@
 		03729F0E2BB203D700152F2E /* tokenizers */ = {
 			isa = PBXGroup;
 			children = (
+				F24909E72E207004001E5B69 /* normalizer.cpp */,
 				F292B06A2D88B0C200BE6839 /* bpe_tokenizer_base.cpp */,
 				306A71452DC1DC3D00936B1F /* hf_tokenizer.cpp */,
 				F292B1002D88B20C00BE6839 /* llama_tiktoken.cpp */,
@@ -598,6 +601,7 @@
 			files = (
 				03D151B82E0E0908007A38BE /* LLaVARunner.mm in Sources */,
 				03729EE12BB1F93800152F2E /* LLaMARunner.mm in Sources */,
+				F24909E82E207004001E5B69 /* normalizer.cpp in Sources */,
 				0372C3152C89418E00CD942A /* llava_runner.cpp in Sources */,
 				03D151CA2E0E98C4007A38BE /* sentencepiece.cpp in Sources */,
 				03D151D92E0E9E43007A38BE /* ExecuTorchTextLLMRunner.mm in Sources */,
diff --git a/exir/emit/test/test_emit.py b/exir/emit/test/test_emit.py
@@ -1875,3 +1875,55 @@ def forward(self, x):
                     ),
                 )
             )
+
+    def test_emit_sym_min_max(self) -> None:
+        class SymMaxModel(nn.Module):
+            def __init__(self, test_min=False):
+                super().__init__()
+                self.test_min = test_min
+
+            def forward(self, x):
+                # Get size of 0th dimension - this creates sym_size op
+                batch_size = x.shape[0]
+                # Compute max of batch_size and 10 - this should create sym_max op
+                if self.test_min:
+                    out_size = min(batch_size, 10)
+                else:
+                    out_size = max(batch_size, 10)
+                # Create a 1D tensor of zeros with the computed size
+                result = torch.zeros(out_size, dtype=x.dtype, device=x.device)
+                return result
+
+        for validate_min in [True, False]:
+            model = SymMaxModel(test_min=validate_min)
+            test_inputs = [
+                torch.randn(5, 3),  # should output zeros(10) for max zeros(5) for min
+                torch.randn(15, 3),  # should output zeros(15) for max zeros(10) for min
+                torch.randn(10, 3),  # should output zeros(10) for max zeros(10) for min
+            ]
+            model.eval()
+            reference_outputs = []
+            with torch.no_grad():
+                for _, inp in enumerate(test_inputs):
+                    output = model(inp)
+                    reference_outputs.append(output)
+
+            batch_dim = Dim("batch", min=1, max=20)
+            dynamic_shapes = {"x": {0: batch_dim}}  # 0th dimension is dynamic
+            exported_program = torch.export.export(
+                model, (test_inputs[0],), dynamic_shapes=dynamic_shapes
+            )
+            edge_program = to_edge(
+                exported_program,
+                compile_config=exir.EdgeCompileConfig(_check_ir_validity=False),
+            )
+            et_program = edge_program.to_executorch()
+            program_buffer = et_program.buffer
+            et_module = _load_for_executorch_from_buffer(program_buffer)
+            for _, (inp, expected) in enumerate(zip(test_inputs, reference_outputs)):
+                # Execute with ExecutorTorch
+                et_output = et_module.forward([inp])
+                et_result = et_output[0]  # Get first output
+                # Compare results
+                self.assertTrue(expected.shape == et_result.shape)
+                self.assertTrue(torch.allclose(expected, et_result))
diff --git a/exir/passes/executorch_prim_ops_registry.py b/exir/passes/executorch_prim_ops_registry.py
@@ -110,6 +110,20 @@ def trunc(a: _SymScalar) -> _SymScalar:
     return math.trunc(a)  # pyre-ignore
 
 
+@bind_pattern_to_op(
+    executorch_prims_lib, "sym_max.Scalar(Scalar a, Scalar b) -> Scalar"
+)
+def sym_max(a: _SymScalar, b: _SymScalar) -> bool:
+    return max(a, b)  # pyre-ignore
+
+
+@bind_pattern_to_op(
+    executorch_prims_lib, "sym_min.Scalar(Scalar a, Scalar b) -> Scalar"
+)
+def sym_min(a: _SymScalar, b: _SymScalar) -> bool:
+    return min(a, b)  # pyre-ignore
+
+
 _PYTHON_SYM_OPS_TO_EXECUTORCH_SYM_OPS: Dict[Any, OpOverload] = {
     builtins.round: ops.backend.executorch_prim.round.Scalar,
     math.ceil: ops.backend.executorch_prim.ceil.Scalar,
@@ -127,12 +141,12 @@ def trunc(a: _SymScalar) -> _SymScalar:
     operator.mod: ops.backend.executorch_prim.mod.Scalar,
     operator.neg: ops.backend.executorch_prim.neg.Scalar,
     torch.sym_float: ops.backend.executorch_prim.sym_float.Scalar,
+    torch.sym_max: ops.backend.executorch_prim.sym_max.Scalar,
+    torch.sym_min: ops.backend.executorch_prim.sym_min.Scalar,
 }
 
 
-_EXECUTORCH_SYM_OPS: Set[OpOverload] = set(
-    _PYTHON_SYM_OPS_TO_EXECUTORCH_SYM_OPS.values()
-)
+_EXECUTORCH_SYM_OPS: Set[Any] = set(_PYTHON_SYM_OPS_TO_EXECUTORCH_SYM_OPS.values())
 _EXECUTORCH_SYM_OPS.update(
     {
         torch.ops.aten.sym_stride.int,
diff --git a/extension/benchmark/apple/Benchmark/Benchmark.xcodeproj/project.pbxproj b/extension/benchmark/apple/Benchmark/Benchmark.xcodeproj/project.pbxproj
@@ -35,6 +35,7 @@
 		30AA4B662DC0766800B1BE50 /* re2_regex.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 30AA4B5C2DC0766800B1BE50 /* re2_regex.cpp */; };
 		3C6ABD332DFA27DE0015DE55 /* regex_lookahead.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 3C6ABD322DFA27DE0015DE55 /* regex_lookahead.cpp */; };
 		F22E9E1A2DF2CBB900EC5425 /* text_llm_runner.cpp in Sources */ = {isa = PBXBuildFile; fileRef = F22E9E192DF2CBB900EC5425 /* text_llm_runner.cpp */; };
+		F24909E22E206FBA001E5B69 /* normalizer.cpp in Sources */ = {isa = PBXBuildFile; fileRef = F24909E12E206FBA001E5B69 /* normalizer.cpp */; };
 		F292B01D2D88AF3500BE6839 /* bpe_tokenizer_base.cpp in Sources */ = {isa = PBXBuildFile; fileRef = F292B0162D88AF3500BE6839 /* bpe_tokenizer_base.cpp */; };
 		F292B0202D88AF3500BE6839 /* llama2c_tokenizer.cpp in Sources */ = {isa = PBXBuildFile; fileRef = F292B0172D88AF3500BE6839 /* llama2c_tokenizer.cpp */; };
 		F292B0212D88AF3500BE6839 /* tiktoken.cpp in Sources */ = {isa = PBXBuildFile; fileRef = F292B01A2D88AF3500BE6839 /* tiktoken.cpp */; };
@@ -100,6 +101,7 @@
 		3C6ABD322DFA27DE0015DE55 /* regex_lookahead.cpp */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.cpp; name = regex_lookahead.cpp; path = src/regex_lookahead.cpp; sourceTree = "<group>"; };
 		F22E9E182DF2CBB900EC5425 /* text_llm_runner.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = text_llm_runner.h; sourceTree = "<group>"; };
 		F22E9E192DF2CBB900EC5425 /* text_llm_runner.cpp */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.cpp; path = text_llm_runner.cpp; sourceTree = "<group>"; };
+		F24909E12E206FBA001E5B69 /* normalizer.cpp */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.cpp; name = normalizer.cpp; path = src/normalizer.cpp; sourceTree = "<group>"; };
 		F292B0162D88AF3500BE6839 /* bpe_tokenizer_base.cpp */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.cpp; name = bpe_tokenizer_base.cpp; path = src/bpe_tokenizer_base.cpp; sourceTree = "<group>"; };
 		F292B0172D88AF3500BE6839 /* llama2c_tokenizer.cpp */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.cpp; name = llama2c_tokenizer.cpp; path = src/llama2c_tokenizer.cpp; sourceTree = "<group>"; };
 		F292B01A2D88AF3500BE6839 /* tiktoken.cpp */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.cpp; name = tiktoken.cpp; path = src/tiktoken.cpp; sourceTree = "<group>"; };
@@ -185,6 +187,7 @@
 		032A74022CAFBB7800932D36 /* tokenizers */ = {
 			isa = PBXGroup;
 			children = (
+				F24909E12E206FBA001E5B69 /* normalizer.cpp */,
 				F2E1B5162E03AC19002C9718 /* sentencepiece.cpp */,
 				3C6ABD322DFA27DE0015DE55 /* regex_lookahead.cpp */,
 				30AA4B592DC0766800B1BE50 /* hf_tokenizer.cpp */,
@@ -430,6 +433,7 @@
 				F292B0202D88AF3500BE6839 /* llama2c_tokenizer.cpp in Sources */,
 				F292B0212D88AF3500BE6839 /* tiktoken.cpp in Sources */,
 				F2E1B5172E03AC19002C9718 /* sentencepiece.cpp in Sources */,
+				F24909E22E206FBA001E5B69 /* normalizer.cpp in Sources */,
 				03E7E6792CBDCAE900205E71 /* CoreMLTests.mm in Sources */,
 				032A74232CAFC1B300932D36 /* runner.cpp in Sources */,
 				03B2D37A2C8A515C0046936E /* GenericTests.mm in Sources */,
diff --git a/extension/llm/tokenizers b/extension/llm/tokenizers
@@ -1 +1 @@
-Subproject commit d202b36fe006457c2139a423ef183ca4ce7c410c
+Subproject commit 23359bdce7bedc084e101c39e89506dff459dab8
diff --git a/kernels/prim_ops/register_prim_ops.cpp b/kernels/prim_ops/register_prim_ops.cpp
@@ -12,6 +12,7 @@
 #include <executorch/runtime/kernel/kernel_includes.h>
 #include <executorch/runtime/kernel/operator_registry.h>
 
+#include <algorithm>
 #include <cmath>
 
 using torch::executor::function::et_copy_index;
@@ -120,6 +121,48 @@ static Kernel prim_ops[] = {
           int64_t numel = self_tensor.numel();
           out = EValue(numel);
         }),
+    // executorch_prim::sym_max.Scalar(SymInt a, SymInt b) -> SymInt
+    Kernel(
+        "executorch_prim::sym_max.Scalar",
+        [](KernelRuntimeContext& context, EValue** stack) {
+          (void)context;
+          EValue& a = *stack[0];
+          EValue& b = *stack[1];
+          EValue& out = *stack[2];
+          if (a.isInt() && b.isInt()) {
+            out = EValue(std::max(a.toInt(), b.toInt()));
+          } else {
+            ET_KERNEL_CHECK_MSG(
+                context,
+                false,
+                InvalidType,
+                /* void */,
+                "sym_max only supports int inputs, got %zu, %zu",
+                (size_t)a.tag,
+                (size_t)b.tag);
+          }
+        }),
+    // executorch_prim::sym_min.Scalar(SymInt a, SymInt b) -> SymInt
+    Kernel(
+        "executorch_prim::sym_min.Scalar",
+        [](KernelRuntimeContext& context, EValue** stack) {
+          (void)context;
+          EValue& a = *stack[0];
+          EValue& b = *stack[1];
+          EValue& out = *stack[2];
+          if (a.isInt() && b.isInt()) {
+            out = EValue(std::min(a.toInt(), b.toInt()));
+          } else {
+            ET_KERNEL_CHECK_MSG(
+                context,
+                false,
+                InvalidType,
+                /* void */,
+                "sym_min only supports int inputs, got %zu, %zu",
+                (size_t)a.tag,
+                (size_t)b.tag);
+          }
+        }),
     // executorch_prim::add.Scalar(Scalar, Scalar) -> Scalar
     Kernel(
         "executorch_prim::add.Scalar",
diff --git a/kernels/prim_ops/test/prim_ops_test.cpp b/kernels/prim_ops/test/prim_ops_test.cpp
@@ -37,6 +37,8 @@ class RegisterPrimOpsTest : public OperatorTest {
 TEST_F(RegisterPrimOpsTest, OpRegistered) {
   EXPECT_TRUE(hasOpsFn("aten::sym_size.int"));
   EXPECT_TRUE(hasOpsFn("aten::sym_numel"));
+  EXPECT_TRUE(hasOpsFn("executorch_prim::sym_max.Scalar"));
+  EXPECT_TRUE(hasOpsFn("executorch_prim::sym_min.Scalar"));
 }
 
 TEST_F(RegisterPrimOpsTest, SymSizeReturnsCorrectValue) {
@@ -81,6 +83,88 @@ TEST_F(RegisterPrimOpsTest, SymNumelReturnsCorrectValue) {
   EXPECT_EQ(stack[1]->toInt(), expected);
 }
 
+TEST_F(RegisterPrimOpsTest, SymMaxReturnsCorrectValue) {
+  EValue values[3];
+  int64_t a = 5;
+  int64_t b = 3;
+  int64_t out = 0;
+  values[0] = EValue(a);
+  values[1] = EValue(b);
+  values[2] = EValue(out);
+
+  EValue* stack[3];
+  for (size_t i = 0; i < 3; i++) {
+    stack[i] = &values[i];
+  }
+
+  getOpsFn("executorch_prim::sym_max.Scalar")(context_, stack);
+  EXPECT_EQ(stack[2]->toInt(), 5);
+
+  // Test with swapped values
+  values[0] = EValue(b);
+  values[1] = EValue(a);
+  values[2] = EValue(out);
+  getOpsFn("executorch_prim::sym_max.Scalar")(context_, stack);
+  EXPECT_EQ(stack[2]->toInt(), 5);
+
+  // Test with equal values
+  values[0] = EValue(a);
+  values[1] = EValue(a);
+  values[2] = EValue(out);
+  getOpsFn("executorch_prim::sym_max.Scalar")(context_, stack);
+  EXPECT_EQ(stack[2]->toInt(), 5);
+
+  // Test with negative values
+  a = -2;
+  b = -5;
+  values[0] = EValue(a);
+  values[1] = EValue(b);
+  values[2] = EValue(out);
+  getOpsFn("executorch_prim::sym_max.Scalar")(context_, stack);
+  EXPECT_EQ(stack[2]->toInt(), -2);
+}
+
+TEST_F(RegisterPrimOpsTest, SymMinReturnsCorrectValue) {
+  EValue values[3];
+  int64_t a = 5;
+  int64_t b = 3;
+  int64_t out = 0;
+  values[0] = EValue(a);
+  values[1] = EValue(b);
+  values[2] = EValue(out);
+
+  EValue* stack[3];
+  for (size_t i = 0; i < 3; i++) {
+    stack[i] = &values[i];
+  }
+
+  getOpsFn("executorch_prim::sym_min.Scalar")(context_, stack);
+  EXPECT_EQ(stack[2]->toInt(), 3);
+
+  // Test with swapped values
+  values[0] = EValue(b);
+  values[1] = EValue(a);
+  values[2] = EValue(out);
+  getOpsFn("executorch_prim::sym_min.Scalar")(context_, stack);
+  EXPECT_EQ(stack[2]->toInt(), 3);
+
+  // Test with equal values
+  values[0] = EValue(a);
+  values[1] = EValue(a);
+  values[2] = EValue(out);
+  getOpsFn("executorch_prim::sym_min.Scalar")(context_, stack);
+  EXPECT_EQ(stack[2]->toInt(), 5);
+
+  // Test with negative values
+  a = -2;
+  b = -5;
+  values[0] = EValue(a);
+  values[1] = EValue(b);
+  values[2] = EValue(out);
+  getOpsFn("executorch_prim::sym_min.Scalar")(context_, stack);
+  EXPECT_EQ(stack[2]->toInt(), -5);
+}
+
 TEST_F(RegisterPrimOpsTest, TestAlgebraOps) {
   EValue values[3];
   int64_t a = 3;
diff --git a/third-party/CMakeLists.txt b/third-party/CMakeLists.txt
@@ -4,6 +4,7 @@
 # This source code is licensed under the BSD-style license found in the
 # LICENSE file in the root directory of this source tree.
 
+set(CMAKE_POLICY_VERSION_MINIMUM 3.5)
 add_subdirectory(json)
 add_subdirectory(gflags)
 
@@ -86,6 +87,7 @@ ExternalProject_Add(
              -DFLATCC_REFLECTION=OFF
              -DFLATCC_DEBUG_CLANG_SANITIZE=OFF
              -DFLATCC_INSTALL=ON
+             -DCMAKE_POLICY_VERSION_MINIMUM=3.5
              -DCMAKE_INSTALL_PREFIX:PATH=<INSTALL_DIR>
              -DCMAKE_POSITION_INDEPENDENT_CODE=ON
              -DCMAKE_TOOLCHAIN_FILE=
diff --git a/third-party/ao b/third-party/ao
@@ -1 +1 @@
-Subproject commit bc68b11f1bf77be38721ca7dd2c477aeb5e6626e
+Subproject commit aee079503e6d882e798fd58a42780f5b98ae2126