review feedback

tianleiwu · tianleiwu · commit 20891af7d751 · 2025-11-21T00:29:00.000Z
diff --git a/src/python/py/models/builder.py b/src/python/py/models/builder.py
@@ -304,6 +304,10 @@ def create_model(
     elif config.architectures[0] == "SmolLM3ForCausalLM":
         onnx_model = SmolLM3Model(config, io_dtype, onnx_dtype, execution_provider, cache_dir, extra_options)
     elif config.architectures[0] == "Qwen2_5_VLForConditionalGeneration":
+        text_config = config.text_config
+        for key in text_config:
+            if not hasattr(config, key):
+                setattr(config, key, getattr(text_config, key))
         print(
             "WARNING: This is only generating the text component of the model. Setting `--extra_options exclude_embeds=true` by default."
         )
diff --git a/src/python/py/models/builders/base.py b/src/python/py/models/builders/base.py
@@ -466,7 +466,7 @@ def make_rope_init(self, config):
                 "sections": config.rope_scaling["mrope_section"],  # Sections for MRoPE
             }
 
-    def make_attention_init(self):
+    def is_gqa_supported(self) -> bool:
         valid_gqa_configurations = {
             ("cpu", ir.DataType.FLOAT),
             ("cuda", ir.DataType.FLOAT16),
@@ -476,7 +476,10 @@ def make_attention_init(self):
             ("webgpu", ir.DataType.FLOAT),
             ("trt-rtx", ir.DataType.FLOAT16),
         }
-        if (self.ep, self.io_dtype) in valid_gqa_configurations:
+        return (self.ep, self.io_dtype) in valid_gqa_configurations
+
+    def make_attention_init(self):
+        if self.is_gqa_supported():
             # Change model settings for GroupQueryAttention
             self.attention_attrs["op_type"] = "GroupQueryAttention"
             print("GroupQueryAttention (GQA) is used in this model.")
diff --git a/src/python/py/models/builders/qwen.py b/src/python/py/models/builders/qwen.py