Skip to content

Commit 364ec25

Browse files
committed
free blocks for finished pooling requests
Signed-off-by: Max de Bayser <[email protected]>
1 parent 77f7056 commit 364ec25

File tree

1 file changed

+3
-0
lines changed

1 file changed

+3
-0
lines changed

vllm/v1/core/sched/scheduler.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -777,6 +777,8 @@ def update_from_output(
777777
pooler_output = pooler_outputs[req_index]
778778
stopped = check_stop(request, self.max_model_len,
779779
pooler_output)
780+
if stopped:
781+
kv_transfer_params = self._free_request(request)
780782

781783
# Extract sample logprobs if needed.
782784
if request.sampling_params is not None \
@@ -889,6 +891,7 @@ def finish_requests(
889891
For example, the API server can abort a request when the client
890892
disconnects.
891893
"""
894+
print("finish requests")
892895
assert RequestStatus.is_finished(finished_status)
893896
if isinstance(request_ids, str):
894897
request_ids = (request_ids, )

0 commit comments

Comments
 (0)