refactor(public-release): refactro backend to make it public

baptistecolle · baptistecolle · commit 86dcef2e8ba1 · 2024-12-11T12:34:03.000Z
diff --git a/.github/workflows/benchmark_cuda_pytorch.yaml b/.github/workflows/benchmark_cuda_pytorch.yaml
@@ -3,7 +3,7 @@ name: Benchmark CUDA PyTorch
 on:
   workflow_dispatch:
   schedule:
-    - cron: "0 0 * * *"
+    - cron: "0 3 * * *"
   pull_request:
 
 concurrency:
diff --git a/llm_perf/benchmark_runners/cuda/update_llm_perf_cuda_pytorch.py b/llm_perf/benchmark_runners/cuda/update_llm_perf_cuda_pytorch.py
@@ -191,6 +191,17 @@ def _get_weights_configs(self, subset) -> Dict[str, Dict[str, Any]]:
                     },
                 },
             }
+        elif subset == "torchao":
+            return {
+                "torchao-int4wo-128": {
+                    "torch_dtype": "bfloat16",
+                    "quant_scheme": "torchao",
+                    "quant_config": {
+                        "quant_type": "int4_weight_only",
+                        "group_size": 128,
+                    },
+                },
+            }
         else:
             raise ValueError(f"Unknown subset: {subset}")
 
diff --git a/setup.py b/setup.py
@@ -32,6 +32,7 @@
         "auto-gptq",
         "bitsandbytes",
         "autoawq",
+        "torchao",
     ],
 }