Skip to content

Commit bd0b54e

Browse files
committed
Ensure token_budget respects max_num_scheduled_tokens constraint
1 parent cce1d1d commit bd0b54e

File tree

1 file changed

+6
-4
lines changed

1 file changed

+6
-4
lines changed

vllm/v1/core/sched/scheduler.py

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1437,10 +1437,7 @@ def schedule(self) -> SchedulerOutput:
14371437
token_budget = self._estimate_token_budget()
14381438
req_index = 0
14391439
while self.scheduled_queue:
1440-
if token_budget <= 0:
1441-
break
1442-
1443-
scheduled_request = self.scheduled_queue.popleft()
1440+
scheduled_request = self.scheduled_queue[0]
14441441

14451442
req_id = scheduled_request.request_id
14461443
num_new_tokens = scheduled_request.num_new_tokens
@@ -1451,6 +1448,11 @@ def schedule(self) -> SchedulerOutput:
14511448
spec_token_ids = scheduled_request.spec_token_ids
14521449
request_data = scheduled_request.request_data
14531450

1451+
if num_scheduled_tokens and token_budget < num_new_tokens:
1452+
break
1453+
1454+
self.scheduled_queue.popleft()
1455+
14541456
# requests in the scheduled_queue can also be preempted or finished.
14551457
if self.requests[req_id].status >= RequestStatus.PREEMPTED:
14561458
continue

0 commit comments

Comments
 (0)