Skip to content

Commit 385da7c

Browse files
authored
Add 3.x readme (#1971)
Signed-off-by: Sun, Xuehao <[email protected]>
1 parent acd8f4f commit 385da7c

File tree

4 files changed

+259
-105
lines changed

4 files changed

+259
-105
lines changed

README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -146,7 +146,7 @@ quantized_model = fit(model=float_model, conf=static_quant_conf, calib_dataloade
146146
<td colspan="2" align="center"><a href="./docs/source/3x/design.md#workflows">Workflow</a></td>
147147
<td colspan="2" align="center"><a href="https://intel.github.io/neural-compressor/latest/docs/source/api-doc/apis.html">APIs</a></td>
148148
<td colspan="1" align="center"><a href="./docs/source/3x/llm_recipes.md">LLMs Recipes</a></td>
149-
<td colspan="1" align="center">Examples</td>
149+
<td colspan="1" align="center"><a href="./examples/3.x_api/README.md">Examples</a></td>
150150
</tr>
151151
</tbody>
152152
<thead>

docs/source/llm_recipes.md

Lines changed: 84 additions & 84 deletions
Original file line numberDiff line numberDiff line change
@@ -17,8 +17,8 @@ This document aims to publish the specific recipes we achieved for the popular L
1717
| EleutherAI/gpt-j-6b ||||
1818
| facebook/opt-1.3b ||||
1919
| facebook/opt-30b ||||
20-
| meta-llama/Llama-2-7b-hf | |||
21-
| meta-llama/Llama-2-13b-hf | |||
20+
| meta-llama/Llama-2-7b-hf | WIP |||
21+
| meta-llama/Llama-2-13b-hf | WIP |||
2222
| meta-llama/Llama-2-70b-hf ||||
2323
| tiiuae/falcon-7b ||||
2424
| tiiuae/falcon-40b ||||
@@ -29,8 +29,8 @@ This document aims to publish the specific recipes we achieved for the popular L
2929
| databricks/dolly-v2-12b ||||
3030
| EleutherAI/gpt-neox-20b ||||
3131
| mistralai/Mistral-7B-v0.1 ||||
32-
| THUDM/chatglm2-6b | || |
33-
| THUDM/chatglm3-6b | WIP || WIP |
32+
| THUDM/chatglm2-6b | WIP || WIP |
33+
| THUDM/chatglm3-6b | WIP || |
3434

3535
**Detail recipes can be found [HERE](https://github.com/intel/intel-extension-for-transformers/blob/main/examples/huggingface/pytorch/text-generation/quantization/llm_quantization_recipes.md).**
3636

@@ -68,48 +68,48 @@ This document aims to publish the specific recipes we achieved for the popular L
6868
<tr>
6969
<td>baichuan-inc/Baichuan-13B-Chat</td>
7070
<td>67.57%</td>
71-
<td>69.07%</td>
72-
<td>1.0222</td>
71+
<td>67.86%</td>
72+
<td>1.0043</td>
7373
<td>67.55%</td>
7474
<td>0.9997</td>
75-
<td>68.12%</td>
76-
<td>1.0081</td>
77-
<td>66.93%</td>
78-
<td>0.9905</td>
75+
<td>67.46%</td>
76+
<td>0.9984</td>
77+
<td>N/A</td>
78+
<td>N/A</td>
7979
</tr>
8080
<tr>
8181
<td>baichuan-inc/Baichuan2-13B-Chat</td>
8282
<td>71.51%</td>
83-
<td>75.57%</td>
84-
<td>1.0568</td>
83+
<td>75.51%</td>
84+
<td>1.0559</td>
8585
<td>71.57%</td>
8686
<td>1.0008</td>
87-
<td>70.81%</td>
88-
<td>0.9902</td>
89-
<td>N/A</td>
90-
<td>N/A</td>
87+
<td>71.45%</td>
88+
<td>0.9992</td>
89+
<td>70.87%</td>
90+
<td>0.9911</td>
9191
</tr>
9292
<tr>
9393
<td>baichuan-inc/Baichuan2-7B-Chat</td>
9494
<td>67.67%</td>
95-
<td>68.06%</td>
96-
<td>1.0058</td>
95+
<td>67.51%</td>
96+
<td>0.9976</td>
9797
<td>67.61%</td>
9898
<td>0.9991</td>
99-
<td>67.90%</td>
100-
<td>1.0034</td>
101-
<td>N/A</td>
102-
<td>N/A</td>
99+
<td>68.08%</td>
100+
<td>1.0061</td>
101+
<td>67.18%</td>
102+
<td>0.9928</td>
103103
</tr>
104104
<tr>
105105
<td>bigscience/bloom-1b7</td>
106106
<td>46.34%</td>
107-
<td>47.99%</td>
108-
<td>1.0356</td>
107+
<td>47.97%</td>
108+
<td>1.0352</td>
109109
<td>46.21%</td>
110110
<td>0.9972</td>
111-
<td>46.90%</td>
112-
<td>1.0121</td>
111+
<td>47.00%</td>
112+
<td>1.0142</td>
113113
<td>N/A</td>
114114
<td>N/A</td>
115115
</tr>
@@ -128,14 +128,14 @@ This document aims to publish the specific recipes we achieved for the popular L
128128
<tr>
129129
<td>EleutherAI/gpt-j-6b</td>
130130
<td>68.31%</td>
131+
<td>68.00%</td>
132+
<td>0.9955</td>
131133
<td>68.27%</td>
132134
<td>0.9994</td>
133-
<td>68.27%</td>
134-
<td>0.9994</td>
135-
<td>68.35%</td>
136-
<td>1.0006</td>
137-
<td>68.02%</td>
138-
<td>0.9958</td>
135+
<td>68.23%</td>
136+
<td>0.9988</td>
137+
<td>67.40%</td>
138+
<td>0.9867</td>
139139
</tr>
140140
<tr>
141141
<td>EleutherAI/gpt-neox-20b</td>
@@ -144,68 +144,68 @@ This document aims to publish the specific recipes we achieved for the popular L
144144
<td>N/A</td>
145145
<td>72.29%</td>
146146
<td>0.9994</td>
147-
<td>71.74%</td>
148-
<td>0.9918</td>
147+
<td>72.15%</td>
148+
<td>0.9975</td>
149149
<td>N/A</td>
150150
<td>N/A</td>
151151
</tr>
152152
<tr>
153153
<td>facebook/opt-1.3b</td>
154154
<td>57.89%</td>
155-
<td>57.68%</td>
156-
<td>0.9964</td>
155+
<td>57.35%</td>
156+
<td>0.9907</td>
157157
<td>58.12%</td>
158158
<td>1.0040</td>
159-
<td>58.26%</td>
160-
<td>1.0064</td>
159+
<td>58.01%</td>
160+
<td>1.0021</td>
161161
<td>N/A</td>
162162
<td>N/A</td>
163163
</tr>
164164
<tr>
165165
<td>facebook/opt-30b</td>
166166
<td>71.49%</td>
167-
<td>71.78%</td>
168-
<td>1.0041</td>
167+
<td>71.51%</td>
168+
<td>1.0003</td>
169169
<td>71.53%</td>
170170
<td>1.0006</td>
171-
<td>71.59%</td>
172-
<td>1.0014</td>
173-
<td>71.80%</td>
174-
<td>1.0043</td>
171+
<td>71.82%</td>
172+
<td>1.0046</td>
173+
<td>71.43%</td>
174+
<td>0.9992</td>
175175
</tr>
176176
<tr>
177177
<td>meta-llama/Llama-2-13b-hf</td>
178178
<td>76.77%</td>
179-
<td>76.25%</td>
180-
<td>0.9932</td>
179+
<td>N/A</td>
180+
<td>N/A</td>
181181
<td>76.89%</td>
182182
<td>1.0016</td>
183-
<td>77.66%</td>
184-
<td>1.0116</td>
185-
<td>76.60%</td>
186-
<td>0.9978</td>
183+
<td>76.96%</td>
184+
<td>1.0025</td>
185+
<td>N/A</td>
186+
<td>N/A</td>
187187
</tr>
188188
<tr>
189189
<td>meta-llama/Llama-2-70b-hf</td>
190190
<td>79.64%</td>
191-
<td>79.14%</td>
192-
<td>0.9937</td>
191+
<td>79.53%</td>
192+
<td>0.9986</td>
193193
<td>79.62%</td>
194194
<td>0.9997</td>
195-
<td>80.09%</td>
196-
<td>1.0057</td>
197-
<td>79.68%</td>
198-
<td>1.0005</td>
195+
<td>80.05%</td>
196+
<td>1.0051</td>
197+
<td>N/A</td>
198+
<td>N/A</td>
199199
</tr>
200200
<tr>
201201
<td>meta-llama/Llama-2-7b-hf</td>
202202
<td>73.92%</td>
203-
<td>73.45%</td>
204-
<td>0.9936</td>
203+
<td>N/A</td>
204+
<td>N/A</td>
205205
<td>73.90%</td>
206206
<td>0.9997</td>
207-
<td>73.84%</td>
208-
<td>0.9989</td>
207+
<td>73.51%</td>
208+
<td>0.9945</td>
209209
<td>N/A</td>
210210
<td>N/A</td>
211211
</tr>
@@ -216,22 +216,22 @@ This document aims to publish the specific recipes we achieved for the popular L
216216
<td>N/A</td>
217217
<td>75.80%</td>
218218
<td>0.9987</td>
219-
<td>76.25%</td>
220-
<td>1.0046</td>
221-
<td>75.74%</td>
222-
<td>0.9979</td>
219+
<td>75.37%</td>
220+
<td>0.9930</td>
221+
<td>75.82%</td>
222+
<td>0.9989</td>
223223
</tr>
224224
<tr>
225225
<td>THUDM/chatglm2-6b</td>
226226
<td>53.23%</td>
227-
<td>52.86%</td>
228-
<td>0.9930</td>
227+
<td>N/A</td>
228+
<td>N/A</td>
229229
<td>53.00%</td>
230230
<td>0.9957</td>
231-
<td>52.90%</td>
232-
<td>0.9938</td>
233-
<td>52.92%</td>
234-
<td>0.9942</td>
231+
<td>N/A</td>
232+
<td>N/A</td>
233+
<td>N/A</td>
234+
<td>N/A</td>
235235
</tr>
236236
<tr>
237237
<td>THUDM/chatglm3-6b</td>
@@ -242,31 +242,31 @@ This document aims to publish the specific recipes we achieved for the popular L
242242
<td>0.9990</td>
243243
<td>N/A</td>
244244
<td>N/A</td>
245-
<td>N/A</td>
246-
<td>N/A</td>
245+
<td>58.59%</td>
246+
<td>0.9915</td>
247247
</tr>
248248
<tr>
249249
<td>tiiuae/falcon-40b</td>
250250
<td>77.22%</td>
251-
<td>76.95%</td>
252-
<td>0.9965</td>
251+
<td>77.26%</td>
252+
<td>1.0005</td>
253253
<td>77.18%</td>
254254
<td>0.9995</td>
255-
<td>77.55%</td>
256-
<td>1.0043</td>
257-
<td>77.82%</td>
258-
<td>1.0078</td>
255+
<td>77.97%</td>
256+
<td>1.0097</td>
257+
<td>N/A</td>
258+
<td>N/A</td>
259259
</tr>
260260
<tr>
261261
<td>tiiuae/falcon-7b</td>
262262
<td>74.67%</td>
263-
<td>76.63%</td>
264-
<td>1.0262</td>
263+
<td>76.17%</td>
264+
<td>1.0201</td>
265265
<td>74.73%</td>
266266
<td>1.0008</td>
267-
<td>75.06%</td>
268-
<td>1.0052</td>
269-
<td>74.00%</td>
270-
<td>0.9910</td>
267+
<td>74.79%</td>
268+
<td>1.0016</td>
269+
<td>N/A</td>
270+
<td>N/A</td>
271271
</tr>
272272
</tbody></table>

examples/.config/model_params_tensorflow_3x.json

Lines changed: 5 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -8,20 +8,6 @@
88
"batch_size": 64,
99
"fp32_model_url": "https://storage.googleapis.com/intel-optimized-tensorflow/models/v2_7_0/fp32_bert_squad.pb"
1010
},
11-
"distilbert_base": {
12-
"model_src_dir": "nlp/distilbert_base/quantization/ptq",
13-
"dataset_location": "/tf_dataset2/datasets/sst2_validation_dataset",
14-
"input_model": "/tf_dataset2/models/tensorflow/distilbert_base/fp32/distilbert_base_fp32.pb",
15-
"main_script": "main.py",
16-
"batch_size": 128
17-
},
18-
"distilbert_base_sq": {
19-
"model_src_dir": "nlp/distilbert_base/quantization/ptq",
20-
"dataset_location": "/tf_dataset2/datasets/sst2_validation_dataset",
21-
"input_model": "/tf_dataset2/models/tensorflow/distilbert_base/fp32/distilbert_base_fp32.pb",
22-
"main_script": "main.py",
23-
"batch_size": 128
24-
},
2511
"opt_125m_sq": {
2612
"model_src_dir": "nlp/large_language_models/quantization/ptq/smoothquant",
2713
"dataset_location": "",
@@ -97,9 +83,9 @@
9783
"model_src_dir": "object_detection/yolo_v5/quantization/ptq",
9884
"dataset_location": "/tf_dataset2/datasets/coco_yolov5/coco",
9985
"input_model": "/tf_dataset2/models/tensorflow/yolo_v5/yolov5s.pb",
100-
"main_script": "main.py",
86+
"main_script": "main.py",
10187
"batch_size": 1
102-
},
88+
},
10389
"faster_rcnn_resnet50": {
10490
"model_src_dir": "object_detection/faster_rcnn_resnet50/quantization/ptq",
10591
"dataset_location": "/tf_dataset/tensorflow/coco_val.record",
@@ -125,14 +111,14 @@
125111
"model_src_dir": "object_detection/ssd_mobilenet_v1/quantization/ptq",
126112
"dataset_location": "/tf_dataset/tensorflow/coco_val.record",
127113
"input_model": "/tf_dataset/pre-train-model-oob/object_detection/ssd_mobilenet_v1/frozen_inference_graph.pb",
128-
"main_script": "main.py",
114+
"main_script": "main.py",
129115
"batch_size": 10
130116
},
131117
"ssd_mobilenet_v1_ckpt": {
132118
"model_src_dir": "object_detection/ssd_mobilenet_v1/quantization/ptq",
133119
"dataset_location": "/tf_dataset/tensorflow/coco_val.record",
134120
"input_model": "/tf_dataset/pre-train-model-oob/object_detection/ssd_mobilenet_v1",
135-
"main_script": "main.py",
121+
"main_script": "main.py",
136122
"batch_size": 10
137123
},
138124
"wide_deep_large_ds": {
@@ -158,5 +144,4 @@
158144
"batch_size": 1
159145
}
160146
}
161-
}
162-
147+
}

0 commit comments

Comments
 (0)