@@ -411,6 +411,7 @@ class EngineArgs:
411
411
disable_async_output_proc : bool = not ModelConfig .use_async_output_proc
412
412
scheduling_policy : SchedulerPolicy = SchedulerConfig .policy
413
413
scheduler_cls : Union [str , Type [object ]] = SchedulerConfig .scheduler_cls
414
+ use_batch_scheduler : bool = SchedulerConfig .use_batch_scheduler
414
415
415
416
override_neuron_config : dict [str , Any ] = \
416
417
get_field (ModelConfig , "override_neuron_config" )
@@ -855,6 +856,8 @@ def add_cli_args(parser: FlexibleArgumentParser) -> FlexibleArgumentParser:
855
856
** scheduler_kwargs ["disable_chunked_mm_input" ])
856
857
scheduler_group .add_argument ("--scheduler-cls" ,
857
858
** scheduler_kwargs ["scheduler_cls" ])
859
+ scheduler_group .add_argument ("--use-batch-scheduler" ,
860
+ ** scheduler_kwargs ["use_batch_scheduler" ])
858
861
scheduler_group .add_argument (
859
862
"--disable-hybrid-kv-cache-manager" ,
860
863
** scheduler_kwargs ["disable_hybrid_kv_cache_manager" ])
@@ -1182,6 +1185,7 @@ def create_engine_config(
1182
1185
and parallel_config .use_ray ),
1183
1186
policy = self .scheduling_policy ,
1184
1187
scheduler_cls = self .scheduler_cls ,
1188
+ use_batch_scheduler = self .use_batch_scheduler ,
1185
1189
max_num_partial_prefills = self .max_num_partial_prefills ,
1186
1190
max_long_partial_prefills = self .max_long_partial_prefills ,
1187
1191
long_prefill_token_threshold = self .long_prefill_token_threshold ,
@@ -1550,6 +1554,18 @@ def _set_default_args_v1(self, usage_context: UsageContext,
1550
1554
if not self .enable_chunked_prefill :
1551
1555
self .max_num_batched_tokens = model_config .max_model_len
1552
1556
1557
+ if self .use_batch_scheduler :
1558
+ if self .scheduler_cls == EngineArgs .scheduler_cls :
1559
+ self .scheduler_cls = \
1560
+ "vllm.v1.core.sched.scheduler.BatchScheduler"
1561
+ else :
1562
+ logger .warning (
1563
+ "use_batch_scheduler is set to True, "
1564
+ "but a custom scheduler_cls is also provided. "
1565
+ "The specified scheduler_cls (%s) will take precedence, "
1566
+ "and use_batch_scheduler will be ignored." ,
1567
+ self .scheduler_cls )
1568
+
1553
1569
# V1 should use the new scheduler by default.
1554
1570
# Swap it only if this arg is set to the original V0 default
1555
1571
if self .scheduler_cls == EngineArgs .scheduler_cls :
0 commit comments