vllm-project
diff --git a/‎tests/e2e/online_serving/test_qwen3_omni.py‎
Lines changed: 22 additions & 7 deletions b/‎tests/e2e/online_serving/test_qwen3_omni.py‎
Lines changed: 22 additions & 7 deletions
diff --git a/‎tests/entrypoints/test_pd_disaggregation.py‎
Lines changed: 27 additions & 19 deletions b/‎tests/entrypoints/test_pd_disaggregation.py‎
Lines changed: 27 additions & 19 deletions
diff --git a/‎tests/test_config_factory.py‎
Lines changed: 56 additions & 0 deletions b/‎tests/test_config_factory.py‎
Lines changed: 56 additions & 0 deletions
@@ -18,7 +18,8 @@
 
 models = ["Qwen/Qwen3-Omni-30B-A3B-Instruct"]
 
-# Set VLLM_TEST_PD_MODE=1 to test PD disaggregation (follow-up — deploy overlay not yet migrated).
+# Set VLLM_TEST_PD_MODE=1 to test PD disaggregation via the deploy config's
+# ``pd_separation`` section.
 _USE_PD = os.environ.get("VLLM_TEST_PD_MODE", "0") == "1"
 
 _CI_DEPLOY = get_deploy_config_path("ci/qwen3_omni_moe.yaml")
@@ -37,6 +38,23 @@ def get_chunk_config(config_path: str | None = None):
     return modify_stage_config(config_path, updates={"async_chunk": True})
 
 
+def get_pd_config(config_path: str | None = None):
+    """Load the qwen3_omni CI deploy yaml with PD separation enabled."""
+    if config_path is None:
+        config_path = _CI_DEPLOY
+    return modify_stage_config(
+        config_path,
+        updates={
+            "pd_separation.enabled": True,
+            "pd_separation.async_chunk": False,
+            "stages": {
+                1: {"devices": "2"},
+                2: {"devices": "2"},
+            },
+        },
+    )
+
+
 def get_prefix_caching_config(config_path: str):
     """Create a stage config with prefix caching enabled on the thinker (stage 0)."""
     path = modify_stage_config(
@@ -52,10 +70,9 @@ def get_prefix_caching_config(config_path: str):
 
 # Platform-specific overrides live inside the new deploy yaml's ``platforms:``
 # section, so a single ``_CI_DEPLOY`` path serves CUDA, ROCm, and XPU.
-# TODO: re-add VLLM_TEST_PD_MODE branch once the PD-disaggregation deploy
-# overlay has been migrated to the new schema (previously used the deleted
-# ``qwen3_omni_moe_pd_ci.yaml`` stage-configs file).
-if current_omni_platform.is_xpu():
+if _USE_PD:
+    stage_configs = [get_pd_config()]
+elif current_omni_platform.is_xpu():
     stage_configs = [_CI_DEPLOY]
 else:  # CUDA + ROCm MI325 share the same deploy config
     stage_configs = [get_chunk_config()]
@@ -111,7 +128,6 @@ def get_max_batch_size(size_type="few"):
 @pytest.mark.advanced_model
 @pytest.mark.core_model
 @pytest.mark.omni
-@pytest.mark.skipif(_USE_PD, reason="Temporarily skip PD mode in this test module.")
 @hardware_test(res={"cuda": "H100", "rocm": "MI325"}, num_cards=3 if _USE_PD else 2)
 @pytest.mark.parametrize("omni_server", test_params, indirect=True)
 def test_mix_to_text_audio_001(omni_server, openai_client) -> None:
@@ -151,7 +167,6 @@ def test_mix_to_text_audio_001(omni_server, openai_client) -> None:
 @pytest.mark.advanced_model
 @pytest.mark.core_model
 @pytest.mark.omni
-@pytest.mark.skipif(_USE_PD, reason="Temporarily skip PD mode in this test module.")
 @hardware_test(res={"cuda": "H100", "rocm": "MI325"}, num_cards=3 if _USE_PD else 2)
 @pytest.mark.parametrize("omni_server", test_params, indirect=True)
 def test_text_to_text_001(omni_server, openai_client) -> None:
 
@@ -1081,41 +1081,49 @@ def test_pop_uses_fallback_when_no_stored(self, monkeypatch):
 
 
 class TestPDYAMLConfig:
-    def test_pd_yaml_loads(self):
-        """The PD separation YAML config should load without errors."""
+    def test_pd_yaml_loads(self, tmp_path):
+        """PD deploy config should merge into a 4-stage runtime config."""
         import os
 
-        yaml_path = os.path.join(
-            os.path.dirname(__file__),
-            "../../vllm_omni/model_executor/stage_configs/qwen3_omni_moe_pd_separation.yaml",
-        )
-        yaml_path = os.path.abspath(yaml_path)
-        if not os.path.exists(yaml_path):
-            pytest.skip("PD separation YAML not found")
+        import vllm_omni.model_executor.models.qwen3_omni.pipeline  # noqa: F401
+        from vllm_omni.config.stage_config import _PIPELINE_REGISTRY, load_deploy_config, merge_pipeline_deploy
 
-        from omegaconf import OmegaConf
+        base_path = os.path.abspath(
+            os.path.join(os.path.dirname(__file__), "../../vllm_omni/deploy/qwen3_omni_moe.yaml")
+        )
+        if not os.path.exists(base_path):
+            pytest.skip("Qwen3-Omni deploy config not found")
+
+        overlay = tmp_path / "qwen3_omni_pd_overlay.yaml"
+        overlay.write_text(
+            f"base_config: {base_path}\n"
+            "pd_separation:\n"
+            "  enabled: true\n"
+            "  async_chunk: false\n",
+            encoding="utf-8",
+        )
 
-        cfg = OmegaConf.load(yaml_path)
-        stages = cfg.stage_args
+        deploy = load_deploy_config(overlay)
+        stages = merge_pipeline_deploy(_PIPELINE_REGISTRY["qwen3_omni_moe"], deploy)
         assert len(stages) == 4
 
         # Prefill stage
-        assert stages[0].is_prefill_only is True
+        assert stages[0].yaml_extras["is_prefill_only"] is True
         assert stages[0].final_output is False
         assert stages[0].is_comprehension is True
 
         # Decode stage
-        assert stages[1].is_decode_only is True
+        assert stages[1].yaml_extras["is_decode_only"] is True
         assert stages[1].final_output is True
         assert stages[1].final_output_type == "text"
         assert stages[1].is_comprehension is True
-        assert 0 in stages[1].engine_input_source
+        assert 0 in stages[1].input_sources
 
         # KV transfer configs
-        assert stages[0].engine_args.kv_transfer_config.kv_role == "kv_producer"
-        assert stages[1].engine_args.kv_transfer_config.kv_role == "kv_consumer"
-        assert stages[0].engine_args.kv_transfer_config.kv_connector == "MooncakeConnector"
-        assert stages[1].engine_args.kv_transfer_config.kv_connector == "MooncakeConnector"
+        assert stages[0].yaml_engine_args["kv_transfer_config"]["kv_role"] == "kv_producer"
+        assert stages[1].yaml_engine_args["kv_transfer_config"]["kv_role"] == "kv_consumer"
+        assert stages[0].yaml_engine_args["kv_transfer_config"]["kv_connector"] == "MooncakeConnector"
+        assert stages[1].yaml_engine_args["kv_transfer_config"]["kv_connector"] == "MooncakeConnector"
 
 
 class TestPrefillStopNeutralization:
 
@@ -812,6 +812,62 @@ def test_merge_pipeline_deploy(self):
         assert s0.yaml_engine_args["engine_output_type"] == "latent"
         assert s0.yaml_extras["default_sampling_params"]["detokenize"] is True
 
+    def test_merge_pipeline_deploy_with_pd_separation(self, tmp_path):
+        from pathlib import Path
+
+        import vllm_omni.model_executor.models.qwen3_omni.pipeline  # noqa: F401
+        from vllm_omni.config.stage_config import load_deploy_config, merge_pipeline_deploy
+
+        pipeline = _PIPELINE_REGISTRY["qwen3_omni_moe"]
+        base = Path(__file__).parent.parent / "vllm_omni" / "deploy" / "qwen3_omni_moe.yaml"
+        if not base.exists():
+            pytest.skip("Deploy config not found")
+
+        overlay = tmp_path / "qwen3_omni_pd.yaml"
+        overlay.write_text(
+            f"base_config: {base}\n"
+            "pd_separation:\n"
+            "  enabled: true\n"
+            "  target_stage_id: 0\n"
+            "  async_chunk: false\n"
+            "  stages:\n"
+            "    - role: prefill\n"
+            "      max_num_seqs: 16\n"
+            "      devices: \"0\"\n"
+            "      engine_extras:\n"
+            "        kv_transfer_config:\n"
+            "          kv_connector: MooncakeConnector\n"
+            "          kv_role: kv_producer\n"
+            "          kv_rank: 0\n"
+            "          kv_parallel_size: 2\n"
+            "    - role: decode\n"
+            "      max_num_seqs: 64\n"
+            "      devices: \"1\"\n"
+            "      engine_extras:\n"
+            "        kv_transfer_config:\n"
+            "          kv_connector: MooncakeConnector\n"
+            "          kv_role: kv_consumer\n"
+            "          kv_rank: 1\n"
+            "          kv_parallel_size: 2\n",
+            encoding="utf-8",
+        )
+
+        deploy = load_deploy_config(overlay)
+        stages = merge_pipeline_deploy(pipeline, deploy)
+
+        assert len(stages) == 4
+        assert stages[0].yaml_extras["is_prefill_only"] is True
+        assert stages[1].yaml_extras["is_decode_only"] is True
+        assert stages[1].input_sources == [0]
+        assert stages[2].input_sources == [1]
+        assert stages[3].input_sources == [2]
+        assert stages[0].yaml_engine_args.get("async_chunk") is not True
+        assert stages[1].yaml_engine_args.get("custom_process_next_stage_input_func") is None
+        assert stages[0].yaml_engine_args["kv_transfer_config"]["kv_role"] == "kv_producer"
+        assert stages[1].yaml_engine_args["kv_transfer_config"]["kv_role"] == "kv_consumer"
+        assert stages[2].yaml_extras["input_connectors"] == {"from_stage_1": "connector_of_shared_memory"}
+        assert stages[3].yaml_extras["input_connectors"] == {"from_stage_2": "connector_of_shared_memory"}
+
 
 class TestQwen3OmniPipeline:
     def test_registered(self):