remove unnecessary output

jinzhen-lin · jinzhen-lin · commit 2f1d4116a014 · 2025-11-29T11:37:51.000+08:00
Signed-off-by: Jinzhen Lin &lt;jinzhen.ljz@antgroup.com&gt;
diff --git a/vllm/model_executor/layers/quantization/utils/marlin_utils_fp4.py b/vllm/model_executor/layers/quantization/utils/marlin_utils_fp4.py
@@ -120,11 +120,9 @@ def apply_fp4_marlin_linear(
 
         inputs, a_scales = marlin_quant_input(inputs, torch.float8_e4m3fn)
 
-    output = torch.empty(out_shape, dtype=reshaped_x.dtype, device=reshaped_x.device)
-
     output = ops.gptq_marlin_gemm(
         a=inputs,
-        c=output,
+        c=None,
         b_q_weight=weight,
         b_bias=bias,
         b_scales=weight_scale,