We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
There was an error while loading. Please reload this page.
1 parent 2985f65 commit 3709433Copy full SHA for 3709433
vllm/v1/worker/gpu/spec_decode/eagle.py
@@ -139,7 +139,7 @@ def generate_draft(
139
num_tokens_across_dp: torch.Tensor | None,
140
) -> None:
141
pos = self.input_buffers.positions[:num_reqs]
142
- query_start_loc = self.input_buffers.query_start_loc[: num_reqs + 1]
+ query_start_loc = self.input_buffers.query_start_loc.gpu[: num_reqs + 1]
143
for step in range(1, self.num_speculative_steps):
144
# Run the eagle model.
145
last_hidden_states, hidden_states = self.run_model(
0 commit comments