We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
There was an error while loading. Please reload this page.
1 parent 77f7056 commit 364ec25Copy full SHA for 364ec25
vllm/v1/core/sched/scheduler.py
@@ -777,6 +777,8 @@ def update_from_output(
777
pooler_output = pooler_outputs[req_index]
778
stopped = check_stop(request, self.max_model_len,
779
pooler_output)
780
+ if stopped:
781
+ kv_transfer_params = self._free_request(request)
782
783
# Extract sample logprobs if needed.
784
if request.sampling_params is not None \
@@ -889,6 +891,7 @@ def finish_requests(
889
891
For example, the API server can abort a request when the client
890
892
disconnects.
893
"""
894
+ print("finish requests")
895
assert RequestStatus.is_finished(finished_status)
896
if isinstance(request_ids, str):
897
request_ids = (request_ids, )
0 commit comments