Skip to content

Commit 86dcef2

Browse files
committed
refactor(public-release): refactro backend to make it public
1 parent a930a6f commit 86dcef2

File tree

3 files changed

+13
-1
lines changed

3 files changed

+13
-1
lines changed

.github/workflows/benchmark_cuda_pytorch.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@ name: Benchmark CUDA PyTorch
33
on:
44
workflow_dispatch:
55
schedule:
6-
- cron: "0 0 * * *"
6+
- cron: "0 3 * * *"
77
pull_request:
88

99
concurrency:

llm_perf/benchmark_runners/cuda/update_llm_perf_cuda_pytorch.py

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -191,6 +191,17 @@ def _get_weights_configs(self, subset) -> Dict[str, Dict[str, Any]]:
191191
},
192192
},
193193
}
194+
elif subset == "torchao":
195+
return {
196+
"torchao-int4wo-128": {
197+
"torch_dtype": "bfloat16",
198+
"quant_scheme": "torchao",
199+
"quant_config": {
200+
"quant_type": "int4_weight_only",
201+
"group_size": 128,
202+
},
203+
},
204+
}
194205
else:
195206
raise ValueError(f"Unknown subset: {subset}")
196207

setup.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,7 @@
3232
"auto-gptq",
3333
"bitsandbytes",
3434
"autoawq",
35+
"torchao",
3536
],
3637
}
3738

0 commit comments

Comments
 (0)