wip

baptistecolle · baptistecolle · commit eb8acb14285e · 2025-02-05T12:50:58.000Z
diff --git a/.dockerignore b/.dockerignore
@@ -1,5 +1,3 @@
-optimum-benchmark
-
 **/*.pyc
 **/*.pyo
 **/*.mo
@@ -8,7 +6,7 @@ optimum-benchmark
 **/__pycache__/
 Dockerfile
 db.sqlite3
-**/*.md
+
 build
 .venv
 ruff_cache
@@ -21,3 +19,5 @@ ruff_cache
 *.rar
 *.7z
 *.iso
+
+
diff --git a/docker/cpu-openvino/Dockerfile b/docker/cpu-openvino/Dockerfile
@@ -2,11 +2,21 @@ FROM ghcr.io/huggingface/optimum-benchmark:latest-cpu
 
 WORKDIR /workspace
 
+# Copy the entire "optimum-benchmark" directory into the container
+COPY optimum-benchmark /workspace/optimum-benchmark
+
+# Install the package in editable mode with extras
+RUN pip install -e /workspace/optimum-benchmark[openvino]
+
+WORKDIR /workspace
+
 COPY setup.py .
 # COPY pyproject.toml .
 
 RUN pip install -e .[openvino]
 
 COPY . .
 
+ENV DEBUG_MODE=1
+
 CMD ["llm-perf", "run-benchmark", "--hardware", "cpu", "--backend", "openvino"]
diff --git a/llm_perf/benchmark_runners/cpu/update_llm_perf_cpu_openvino.py b/llm_perf/benchmark_runners/cpu/update_llm_perf_cpu_openvino.py
@@ -67,12 +67,13 @@ def get_benchmark_config(self, model: str, **kwargs) -> BenchmarkConfig:
         backend_config = OVConfig(
             model=model,
             device="cpu",
-            device_ids="0",
             no_weights=True,
             library="transformers",
             task="text-generation",
             quantization_config=quant_config,
             model_kwargs={"trust_remote_code": True},
+            reshape=True,
+            export=True,
         )
 
         return BenchmarkConfig(
diff --git a/llm_perf/common/benchmark_runner.py b/llm_perf/common/benchmark_runner.py
@@ -95,6 +95,7 @@ def is_benchmark_conducted(self, push_repo_id, subfolder):
             report = BenchmarkReport.from_pretrained(
                 repo_id=push_repo_id, subfolder=subfolder
             )
+            print(report.to_dict())
             if "traceback" in report.to_dict():
                 return False
             else:
diff --git a/llm_perf/common/utils.py b/llm_perf/common/utils.py
@@ -2,10 +2,9 @@
 
 from llm_perf.common.dependency import get_benchmark_top_n, is_debug_mode
 
-INPUT_SHAPES = {"batch_size": 1, "sequence_length": 256}
+INPUT_SHAPES = {"batch_size": 1, "sequence_length": 128}
 GENERATE_KWARGS = {"max_new_tokens": 64, "min_new_tokens": 64}
 
-
 OPEN_LLM_LEADERBOARD = pd.read_csv(
     "hf://datasets/optimum-benchmark/llm-perf-leaderboard/llm-df.csv"
 )
@@ -52,7 +51,7 @@ def get_top_llm_list(n: int = 10) -> list[str]:
 
 
 if is_debug_mode():
-    CANONICAL_PRETRAINED_OPEN_LLM_LIST = ["bigscience/bloomz-560m"]
+    CANONICAL_PRETRAINED_OPEN_LLM_LIST = ["meta-llama/Llama-3.1-8B-Instruct"]
 else:
     CANONICAL_PRETRAINED_OPEN_LLM_LIST = get_top_llm_list(n=get_benchmark_top_n())
     print(
diff --git a/setup.py b/setup.py
@@ -14,7 +14,7 @@
     "huggingface_hub[hf_transfer]",
     "datasets>=2.14.6",
     "beautifulsoup4",
-    "optimum-benchmark @ git+https://github.com/huggingface/optimum-benchmark.git",
+    # "optimum-benchmark @ git+https://github.com/huggingface/optimum-benchmark.git",
 ]
 
 # Optional dependencies
@@ -25,7 +25,7 @@
         "optimum-benchmark[onnxruntime] @ git+https://github.com/huggingface/optimum-benchmark.git",
     ],
     "openvino": [
-        "optimum-benchmark[openvino] @ git+https://github.com/huggingface/optimum-benchmark.git"
+        # "optimum-benchmark[openvino] @ git+https://github.com/huggingface/optimum-benchmark.git"
     ],
     "cuda": [
         "flash-attn",

Original file line number	Diff line number	Diff line change
`@@ -95,6 +95,7 @@ def is_benchmark_conducted(self, push_repo_id, subfolder):`
`95`	`95`	`report = BenchmarkReport.from_pretrained(`
`96`	`96`	`repo_id=push_repo_id, subfolder=subfolder`
`97`	`97`	`)`
	`98`	`+ print(report.to_dict())`
`98`	`99`	`if "traceback" in report.to_dict():`
`99`	`100`	`return False`
`100`	`101`	`else:`