try allocate more gpu memory

ChenTaoyu-SJTU · ChenTaoyu-SJTU · commit afb3a2009e10 · 2025-06-26T02:19:34.000Z
Signed-off-by: ChenTaoyu-SJTU &lt;ctynb@qq.com&gt;
diff --git a/benchmarks/scripts/run_accuracy.py b/benchmarks/scripts/run_accuracy.py
@@ -73,7 +73,7 @@
 
 def run_accuracy_unimodal(queue, model, dataset):
     try:
-        model_args = f"pretrained={model},max_model_len=4096,dtype=auto,tensor_parallel_size=2,gpu_memory_utilization=0.6"
+        model_args = f"pretrained={model},max_model_len=4096,dtype=auto,tensor_parallel_size=2,gpu_memory_utilization=0.85"
         results = lm_eval.simple_evaluate(
             model="vllm",
             model_args=model_args,