Reduce number of retracing (Canonicalization) in conversion fx pass pipeline.

chunnienc · copybara-github · commit 0db3444c317c · 2025-04-18T12:01:22.000-07:00
PiperOrigin-RevId: 749104077
diff --git a/ai_edge_torch/_convert/conversion.py b/ai_edge_torch/_convert/conversion.py
@@ -35,14 +35,12 @@ def _run_convert_passes(
   )
 
   passes = [
+      fx_passes.CastInputsBf16ToF32Pass(),
       fx_passes.BuildInterpolateCompositePass(),
-      fx_passes.CanonicalizePass(),
       fx_passes.OptimizeLayoutTransposesPass(),
       fx_passes.CanonicalizePass(),
       fx_passes.BuildAtenCompositePass(),
       fx_passes.RemoveNonUserOutputsPass(),
-      fx_passes.CastInputsBf16ToF32Pass(),
-      fx_passes.CanonicalizePass(),
   ]
 
   # Debuginfo is not injected automatically by odml_torch. Only inject
diff --git a/ai_edge_torch/generative/fx_passes/__init__.py b/ai_edge_torch/generative/fx_passes/__init__.py
@@ -23,8 +23,5 @@ def run_generative_passes(
 ) -> torch.export.ExportedProgram:
   return fx_infra.run_passes(
       exported_program,
-      [
-          RemoveSDPACompositeZeroMaskPass(),
-          CanonicalizePass(),
-      ],
+      [RemoveSDPACompositeZeroMaskPass()],
   )
diff --git a/ai_edge_torch/odml_torch/export.py b/ai_edge_torch/odml_torch/export.py
@@ -264,13 +264,16 @@ def _convert_i64_to_i32(exported_program: torch.export.ExportedProgram):
     exported_program: The exported program to apply the pass.
   """
 
+  is_modified = False
+
   def in_i32(x: int):
     return -2147483648 <= x <= 2147483647
 
   def to_int32(x: torch.Tensor):
     return torch.ops.aten._to_copy.default(x, dtype=torch.int32)
 
   def rewrite_arange(node: torch.fx.Node):
+    nonlocal is_modified
     tensor_meta = node.meta.get("tensor_meta", None)
     if not tensor_meta:
       return
@@ -282,12 +285,14 @@ def rewrite_arange(node: torch.fx.Node):
       return
     op = node.target
     node.target = lambda *args, **kwargs: to_int32(op(*args, **kwargs))
+    is_modified = True
 
   graph_module = exported_program.graph_module
   for node in graph_module.graph.nodes:
 
     if node.target == torch.ops.aten.arange.start_step:
       rewrite_arange(node)
+  return is_modified
 
 
 # TODO(b/331481564) Make this a ai_edge_torch FX pass.
@@ -351,9 +356,9 @@ def exported_program_to_mlir(
       exported_program,
       fx_infra.decomp.pre_lower_decomp(),
   )
-  _convert_i64_to_i32(exported_program)
-  # Run decompositions for retracing and cananicalization.
-  exported_program = fx_infra.safe_run_decompositions(exported_program, {})
+  if _convert_i64_to_i32(exported_program):
+    # Run decompositions for retracing and cananicalization, if modified.
+    exported_program = fx_infra.safe_run_decompositions(exported_program, {})
 
   # Passes below mutate the exported program to a state not executable by torch.
   # Do not call run_decompositions after applying the passes.

Original file line number	Diff line number	Diff line change
`@@ -35,14 +35,12 @@ def _run_convert_passes(`
`35`	`35`	`)`
`36`	`36`
`37`	`37`	`passes = [`
	`38`	`+ fx_passes.CastInputsBf16ToF32Pass(),`
`38`	`39`	`fx_passes.BuildInterpolateCompositePass(),`
`39`		`- fx_passes.CanonicalizePass(),`
`40`	`40`	`fx_passes.OptimizeLayoutTransposesPass(),`
`41`	`41`	`fx_passes.CanonicalizePass(),`
`42`	`42`	`fx_passes.BuildAtenCompositePass(),`
`43`	`43`	`fx_passes.RemoveNonUserOutputsPass(),`
`44`		`- fx_passes.CastInputsBf16ToF32Pass(),`
`45`		`- fx_passes.CanonicalizePass(),`
`46`	`44`	`]`
`47`	`45`
`48`	`46`	`# Debuginfo is not injected automatically by odml_torch. Only inject`