You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
Copy file name to clipboardExpand all lines: docs/source/llm_recipes.md
+84-84Lines changed: 84 additions & 84 deletions
Original file line number
Diff line number
Diff line change
@@ -17,8 +17,8 @@ This document aims to publish the specific recipes we achieved for the popular L
17
17
| EleutherAI/gpt-j-6b | ✔ | ✔ | ✔ |
18
18
| facebook/opt-1.3b | ✔ | ✔ | ✔ |
19
19
| facebook/opt-30b | ✔ | ✔ | ✔ |
20
-
| meta-llama/Llama-2-7b-hf | ✔ | ✔ | ✔ |
21
-
| meta-llama/Llama-2-13b-hf | ✔ | ✔ | ✔ |
20
+
| meta-llama/Llama-2-7b-hf |WIP| ✔ | ✔ |
21
+
| meta-llama/Llama-2-13b-hf |WIP| ✔ | ✔ |
22
22
| meta-llama/Llama-2-70b-hf | ✔ | ✔ | ✔ |
23
23
| tiiuae/falcon-7b | ✔ | ✔ | ✔ |
24
24
| tiiuae/falcon-40b | ✔ | ✔ | ✔ |
@@ -29,8 +29,8 @@ This document aims to publish the specific recipes we achieved for the popular L
29
29
| databricks/dolly-v2-12b | ✖ | ✔ | ✖ |
30
30
| EleutherAI/gpt-neox-20b | ✖ | ✔ | ✔ |
31
31
| mistralai/Mistral-7B-v0.1 | ✖ | ✔ | ✔ |
32
-
| THUDM/chatglm2-6b | ✔ | ✔ | ✔ |
33
-
| THUDM/chatglm3-6b | WIP | ✔ |WIP|
32
+
| THUDM/chatglm2-6b |WIP | ✔ |WIP|
33
+
| THUDM/chatglm3-6b | WIP | ✔ | ✔ |
34
34
35
35
**Detail recipes can be found [HERE](https://github.com/intel/intel-extension-for-transformers/blob/main/examples/huggingface/pytorch/text-generation/quantization/llm_quantization_recipes.md).**
36
36
@@ -68,48 +68,48 @@ This document aims to publish the specific recipes we achieved for the popular L
68
68
<tr>
69
69
<td>baichuan-inc/Baichuan-13B-Chat</td>
70
70
<td>67.57%</td>
71
-
<td>69.07%</td>
72
-
<td>1.0222</td>
71
+
<td>67.86%</td>
72
+
<td>1.0043</td>
73
73
<td>67.55%</td>
74
74
<td>0.9997</td>
75
-
<td>68.12%</td>
76
-
<td>1.0081</td>
77
-
<td>66.93%</td>
78
-
<td>0.9905</td>
75
+
<td>67.46%</td>
76
+
<td>0.9984</td>
77
+
<td>N/A</td>
78
+
<td>N/A</td>
79
79
</tr>
80
80
<tr>
81
81
<td>baichuan-inc/Baichuan2-13B-Chat</td>
82
82
<td>71.51%</td>
83
-
<td>75.57%</td>
84
-
<td>1.0568</td>
83
+
<td>75.51%</td>
84
+
<td>1.0559</td>
85
85
<td>71.57%</td>
86
86
<td>1.0008</td>
87
-
<td>70.81%</td>
88
-
<td>0.9902</td>
89
-
<td>N/A</td>
90
-
<td>N/A</td>
87
+
<td>71.45%</td>
88
+
<td>0.9992</td>
89
+
<td>70.87%</td>
90
+
<td>0.9911</td>
91
91
</tr>
92
92
<tr>
93
93
<td>baichuan-inc/Baichuan2-7B-Chat</td>
94
94
<td>67.67%</td>
95
-
<td>68.06%</td>
96
-
<td>1.0058</td>
95
+
<td>67.51%</td>
96
+
<td>0.9976</td>
97
97
<td>67.61%</td>
98
98
<td>0.9991</td>
99
-
<td>67.90%</td>
100
-
<td>1.0034</td>
101
-
<td>N/A</td>
102
-
<td>N/A</td>
99
+
<td>68.08%</td>
100
+
<td>1.0061</td>
101
+
<td>67.18%</td>
102
+
<td>0.9928</td>
103
103
</tr>
104
104
<tr>
105
105
<td>bigscience/bloom-1b7</td>
106
106
<td>46.34%</td>
107
-
<td>47.99%</td>
108
-
<td>1.0356</td>
107
+
<td>47.97%</td>
108
+
<td>1.0352</td>
109
109
<td>46.21%</td>
110
110
<td>0.9972</td>
111
-
<td>46.90%</td>
112
-
<td>1.0121</td>
111
+
<td>47.00%</td>
112
+
<td>1.0142</td>
113
113
<td>N/A</td>
114
114
<td>N/A</td>
115
115
</tr>
@@ -128,14 +128,14 @@ This document aims to publish the specific recipes we achieved for the popular L
128
128
<tr>
129
129
<td>EleutherAI/gpt-j-6b</td>
130
130
<td>68.31%</td>
131
+
<td>68.00%</td>
132
+
<td>0.9955</td>
131
133
<td>68.27%</td>
132
134
<td>0.9994</td>
133
-
<td>68.27%</td>
134
-
<td>0.9994</td>
135
-
<td>68.35%</td>
136
-
<td>1.0006</td>
137
-
<td>68.02%</td>
138
-
<td>0.9958</td>
135
+
<td>68.23%</td>
136
+
<td>0.9988</td>
137
+
<td>67.40%</td>
138
+
<td>0.9867</td>
139
139
</tr>
140
140
<tr>
141
141
<td>EleutherAI/gpt-neox-20b</td>
@@ -144,68 +144,68 @@ This document aims to publish the specific recipes we achieved for the popular L
144
144
<td>N/A</td>
145
145
<td>72.29%</td>
146
146
<td>0.9994</td>
147
-
<td>71.74%</td>
148
-
<td>0.9918</td>
147
+
<td>72.15%</td>
148
+
<td>0.9975</td>
149
149
<td>N/A</td>
150
150
<td>N/A</td>
151
151
</tr>
152
152
<tr>
153
153
<td>facebook/opt-1.3b</td>
154
154
<td>57.89%</td>
155
-
<td>57.68%</td>
156
-
<td>0.9964</td>
155
+
<td>57.35%</td>
156
+
<td>0.9907</td>
157
157
<td>58.12%</td>
158
158
<td>1.0040</td>
159
-
<td>58.26%</td>
160
-
<td>1.0064</td>
159
+
<td>58.01%</td>
160
+
<td>1.0021</td>
161
161
<td>N/A</td>
162
162
<td>N/A</td>
163
163
</tr>
164
164
<tr>
165
165
<td>facebook/opt-30b</td>
166
166
<td>71.49%</td>
167
-
<td>71.78%</td>
168
-
<td>1.0041</td>
167
+
<td>71.51%</td>
168
+
<td>1.0003</td>
169
169
<td>71.53%</td>
170
170
<td>1.0006</td>
171
-
<td>71.59%</td>
172
-
<td>1.0014</td>
173
-
<td>71.80%</td>
174
-
<td>1.0043</td>
171
+
<td>71.82%</td>
172
+
<td>1.0046</td>
173
+
<td>71.43%</td>
174
+
<td>0.9992</td>
175
175
</tr>
176
176
<tr>
177
177
<td>meta-llama/Llama-2-13b-hf</td>
178
178
<td>76.77%</td>
179
-
<td>76.25%</td>
180
-
<td>0.9932</td>
179
+
<td>N/A</td>
180
+
<td>N/A</td>
181
181
<td>76.89%</td>
182
182
<td>1.0016</td>
183
-
<td>77.66%</td>
184
-
<td>1.0116</td>
185
-
<td>76.60%</td>
186
-
<td>0.9978</td>
183
+
<td>76.96%</td>
184
+
<td>1.0025</td>
185
+
<td>N/A</td>
186
+
<td>N/A</td>
187
187
</tr>
188
188
<tr>
189
189
<td>meta-llama/Llama-2-70b-hf</td>
190
190
<td>79.64%</td>
191
-
<td>79.14%</td>
192
-
<td>0.9937</td>
191
+
<td>79.53%</td>
192
+
<td>0.9986</td>
193
193
<td>79.62%</td>
194
194
<td>0.9997</td>
195
-
<td>80.09%</td>
196
-
<td>1.0057</td>
197
-
<td>79.68%</td>
198
-
<td>1.0005</td>
195
+
<td>80.05%</td>
196
+
<td>1.0051</td>
197
+
<td>N/A</td>
198
+
<td>N/A</td>
199
199
</tr>
200
200
<tr>
201
201
<td>meta-llama/Llama-2-7b-hf</td>
202
202
<td>73.92%</td>
203
-
<td>73.45%</td>
204
-
<td>0.9936</td>
203
+
<td>N/A</td>
204
+
<td>N/A</td>
205
205
<td>73.90%</td>
206
206
<td>0.9997</td>
207
-
<td>73.84%</td>
208
-
<td>0.9989</td>
207
+
<td>73.51%</td>
208
+
<td>0.9945</td>
209
209
<td>N/A</td>
210
210
<td>N/A</td>
211
211
</tr>
@@ -216,22 +216,22 @@ This document aims to publish the specific recipes we achieved for the popular L
216
216
<td>N/A</td>
217
217
<td>75.80%</td>
218
218
<td>0.9987</td>
219
-
<td>76.25%</td>
220
-
<td>1.0046</td>
221
-
<td>75.74%</td>
222
-
<td>0.9979</td>
219
+
<td>75.37%</td>
220
+
<td>0.9930</td>
221
+
<td>75.82%</td>
222
+
<td>0.9989</td>
223
223
</tr>
224
224
<tr>
225
225
<td>THUDM/chatglm2-6b</td>
226
226
<td>53.23%</td>
227
-
<td>52.86%</td>
228
-
<td>0.9930</td>
227
+
<td>N/A</td>
228
+
<td>N/A</td>
229
229
<td>53.00%</td>
230
230
<td>0.9957</td>
231
-
<td>52.90%</td>
232
-
<td>0.9938</td>
233
-
<td>52.92%</td>
234
-
<td>0.9942</td>
231
+
<td>N/A</td>
232
+
<td>N/A</td>
233
+
<td>N/A</td>
234
+
<td>N/A</td>
235
235
</tr>
236
236
<tr>
237
237
<td>THUDM/chatglm3-6b</td>
@@ -242,31 +242,31 @@ This document aims to publish the specific recipes we achieved for the popular L
0 commit comments