@@ -124,9 +124,17 @@ const uint8_t* getConstantDataPtr(
124
124
const uint8_t * constant_data_ptr) {
125
125
auto buffer_idx = tensor_value->constant_buffer_idx ();
126
126
if (buffer_idx) {
127
- const auto & constant_data_offsets = *flatbuffer_graph->constant_data ();
128
- uint64_t constant_data_offset = constant_data_offsets[buffer_idx]->offset ();
129
- return constant_data_ptr + constant_data_offset;
127
+ if (!constant_data_ptr) {
128
+ // TODO(T172265611): Remove constant_buffer in flatbuffer path after BC
129
+ // window
130
+ const auto & constant_buffer = *flatbuffer_graph->constant_buffer ();
131
+ return constant_buffer[buffer_idx]->storage ()->data ();
132
+ } else {
133
+ const auto & constant_data_offsets = *flatbuffer_graph->constant_data ();
134
+ uint64_t constant_data_offset =
135
+ constant_data_offsets[buffer_idx]->offset ();
136
+ return constant_data_ptr + constant_data_offset;
137
+ }
130
138
}
131
139
132
140
return nullptr ;
@@ -186,29 +194,105 @@ Error defineTensor(
186
194
187
195
xnn_status status;
188
196
// The type we might have to convert to
189
- auto datatype = getDataType (tensor_value->datatype ());
197
+ auto dq_datatype = getDataType (tensor_value->dq_datatype ());
198
+
199
+ if (dq_datatype != xnn_datatype::xnn_datatype_invalid) {
200
+ if (dq_datatype != xnn_datatype::xnn_datatype_qint8) {
201
+ ET_CHECK_OR_RETURN_ERROR (
202
+ false ,
203
+ Internal,
204
+ " Only int8_t is supported for dq_datatype for now, got: %d" ,
205
+ dq_datatype);
206
+ } else {
207
+ ET_CHECK_OR_RETURN_ERROR (
208
+ (tensor_value->flags () & XNN_VALUE_FLAG_EXTERNAL_INPUT),
209
+ Internal,
210
+ " Dynamic quantization of tensor is only allowed for the external input tensor value for now! got flags: %u" ,
211
+ tensor_value->flags ());
212
+ }
213
+ }
190
214
191
215
if (qtensor_value == nullptr ) {
192
216
// FP32 tensor
193
- ET_CHECK_OR_RETURN_ERROR (
194
- !isQuantizedDataType (datatype),
195
- Internal,
196
- " xnn_datatype is quantized, but is not quantized tensor value" );
197
-
198
- status = xnn_define_tensor_value (
199
- /* subgraph=*/ subgraph_ptr,
200
- /* datatype=*/ datatype,
201
- /* num_dims=*/ tensor_value->num_dims (),
202
- /* dims=*/ dims_data.data (),
203
- /* data=*/ buffer_ptr,
204
- /* external_id=*/ tensor_value->external_id (),
205
- /* flags=*/ tensor_value->flags (),
206
- /* id_out=*/ &id);
207
- ET_CHECK_OR_RETURN_ERROR (
208
- xnn_status_success == status,
209
- Internal,
210
- " Failed to define tensor with id %i" ,
211
- id);
217
+ if (!isQuantizedDataType (dq_datatype)) {
218
+ // Define non-quantied tensor
219
+ status = xnn_define_tensor_value (
220
+ /* subgraph=*/ subgraph_ptr,
221
+ /* datatype=*/ getDataType (tensor_value->datatype ()),
222
+ /* num_dims=*/ tensor_value->num_dims (),
223
+ /* dims=*/ dims_data.data (),
224
+ /* data=*/ buffer_ptr,
225
+ /* external_id=*/ tensor_value->external_id (),
226
+ /* flags=*/ tensor_value->flags (),
227
+ /* id_out=*/ &id);
228
+ } else if (dq_datatype != xnn_datatype::xnn_datatype_invalid) {
229
+ ET_CHECK_OR_RETURN_ERROR (
230
+ isQuantizedDataType (dq_datatype),
231
+ Internal,
232
+ " Dynamic quantization can only produce supported quantized dtypes" );
233
+ ET_CHECK_OR_RETURN_ERROR (
234
+ tensor_value->external_id () != XNN_INVALID_VALUE_ID,
235
+ Internal,
236
+ " Dynamic quantization can only work with external inputs for now, got an internal ID" );
237
+ ET_CHECK_OR_RETURN_ERROR (
238
+ buffer_ptr == nullptr ,
239
+ Internal,
240
+ " Dynamic quantization can only work with external inputs for now, got const data" );
241
+
242
+ switch (dq_datatype) {
243
+ case xnn_datatype::xnn_datatype_qint8: {
244
+ // HACK TO Maintain FC/BC for ASR this will be removed after 01/2024
245
+
246
+ // When encountering a dynamically quantized tensor via dq_datatype,
247
+ // which is the old flow for serializing dynamically quantized linear.
248
+ // We replace the definition of a single tensor with a new dynamic
249
+ // Quantization pattern. We change the pattern from:
250
+ // serialized_qd_input
251
+ // to
252
+ // (fp32_input --> convert --> qdint8_input)
253
+
254
+ status = xnn_define_dynamically_quantized_tensor_value (
255
+ /* subgraph=*/ subgraph_ptr,
256
+ /* datatype=*/ xnn_datatype_qdint8,
257
+ /* num_dims=*/ tensor_value->num_dims (),
258
+ /* num_nonbatch_dims=*/ 1 , // always do per token quantization
259
+ /* dims=*/ dims_data.data (),
260
+ /* external_id=*/ XNN_INVALID_VALUE_ID, // always internal value id
261
+ /* flags=*/ 0 , // this is netiher external input or output
262
+ /* id_out=*/ &id);
263
+
264
+ // this is the FP16 or FP32 external value that is being dynamically
265
+ // quantized
266
+ uint32_t float_id;
267
+ enum xnn_datatype fp_datatype = getDataType (tensor_value->datatype ());
268
+ status = xnn_define_tensor_value (
269
+ /* subgraph=*/ subgraph_ptr,
270
+ /* datatype=*/ fp_datatype,
271
+ /* num_dims=*/ tensor_value->num_dims (),
272
+ /* dims=*/ dims_data.data (),
273
+ /* data=*/ buffer_ptr,
274
+ /* external_id=*/ tensor_value->external_id (),
275
+ /* flags=*/ tensor_value->flags (),
276
+ /* id_out=*/ &float_id);
277
+
278
+ // Define dynamic conversion from float to qdint8
279
+ status = xnn_define_convert (
280
+ /* subgraph=*/ subgraph_ptr,
281
+ /* input_id=*/ float_id,
282
+ /* output_id=*/ id,
283
+ /* flags=*/ 0 );
284
+ break ;
285
+ }
286
+ default :
287
+ ET_CHECK_OR_RETURN_ERROR (
288
+ false ,
289
+ NotImplemented,
290
+ " Unhandled Dyanmic Quantization dtype: %d" ,
291
+ dq_datatype);
292
+ }
293
+ } else {
294
+ ET_CHECK_OR_RETURN_ERROR (false , NotImplemented, " Unhandled fp32 tensor" );
295
+ }
212
296
} else {
213
297
// define tensor for quantized
214
298
switch (qtensor_value->quant_params_type ()) {
@@ -222,7 +306,7 @@ Error defineTensor(
222
306
qparams->zero_point ());
223
307
status = xnn_define_quantized_tensor_value (
224
308
/* subgraph=*/ subgraph_ptr,
225
- /* datatype=*/ datatype,
309
+ /* datatype=*/ getDataType (tensor_value-> datatype ()) ,
226
310
/* zero_point=*/ qparams->zero_point (),
227
311
/* scale=*/ qparams->scale (),
228
312
/* num_dims=*/ tensor_value->num_dims (),
@@ -235,20 +319,21 @@ Error defineTensor(
235
319
}
236
320
case fb_xnnpack::XNNQuantParams::PerChannelQuant: {
237
321
auto qparams = qtensor_value->quant_params_as_PerChannelQuant ();
322
+ enum xnn_datatype dtype = getDataType (tensor_value->datatype ());
238
323
int32_t zero_point =
239
- (datatype == xnn_datatype::xnn_datatype_qcint4 ? 8 : 0 );
324
+ (dtype == xnn_datatype::xnn_datatype_qcint4 ? 8 : 0 );
240
325
241
326
ET_LOG (
242
327
Debug,
243
328
" define quant tensor (per channel): buffer_ptr: %p, scale.numel(): %u, channel_dim: %u, dtype: %u, zero_point: %d\n " ,
244
329
buffer_ptr,
245
330
qparams->scale ()->size (),
246
331
qparams->channel_dim (),
247
- datatype ,
332
+ dtype ,
248
333
zero_point);
249
334
status = xnn_define_channelwise_quantized_tensor_value_v2 (
250
335
/* subgraph=*/ subgraph_ptr,
251
- /* datatype=*/ datatype ,
336
+ /* datatype=*/ dtype ,
252
337
/* zero_point=*/ zero_point,
253
338
/* scale=*/ qparams->scale ()->data (),
254
339
/* num_dims=*/ tensor_value->num_dims (),
@@ -261,6 +346,7 @@ Error defineTensor(
261
346
break ;
262
347
}
263
348
case fb_xnnpack::XNNQuantParams::PerChannelGroupQuant: {
349
+ xnn_datatype datatype = getDataType (tensor_value->datatype ());
264
350
ET_CHECK_OR_RETURN_ERROR (
265
351
datatype == xnn_datatype::xnn_datatype_qbint4,
266
352
Internal,
@@ -324,7 +410,7 @@ Error defineTensor(
324
410
" Dynamically Quantized Tensors currently only support per token quantization" );
325
411
status = xnn_define_dynamically_quantized_tensor_value (
326
412
/* subgraph=*/ subgraph_ptr,
327
- /* datatype=*/ datatype,
413
+ /* datatype=*/ getDataType (tensor_value-> datatype ()) ,
328
414
/* num_dims=*/ tensor_value->num_dims (),
329
415
/* num_nonbatch_dims*/ qparams->num_nonbatch_dims (),
330
416
/* dims=*/ dims_data.data (),
@@ -1508,24 +1594,23 @@ __ET_NODISCARD Error XNNCompiler::compileModel(
1508
1594
constant_data = reinterpret_cast <const uint8_t *>(buffer_pointer) +
1509
1595
header->constant_data_offset ;
1510
1596
} else if (header.error () == Error::NotFound) {
1511
- ET_LOG (
1512
- Error,
1513
- " XNNHeader version mismatch: '%.4s' != expected '%.4s'" ,
1514
- // Header Magic and FlatbufferIdentifier are same offset and size
1515
- flatbuffers::GetBufferIdentifier (buffer_pointer),
1516
- XNNHeader::kMagic );
1517
- return header.error ();
1597
+ flatbuffer_data = reinterpret_cast <const uint8_t *>(buffer_pointer);
1518
1598
} else {
1519
1599
ET_LOG (Error, " XNNHeader may be corrupt" );
1520
1600
return header.error ();
1521
1601
}
1522
1602
1603
+ // Temporarily support identifier XN00 and XN01
1604
+ bool is_supported_version =
1605
+ strncmp (flatbuffers::GetBufferIdentifier (flatbuffer_data), " XN00" , 4 ) ==
1606
+ 0 ||
1607
+ strncmp (flatbuffers::GetBufferIdentifier (flatbuffer_data), " XN01" , 4 ) ==
1608
+ 0 ;
1523
1609
ET_CHECK_OR_RETURN_ERROR (
1524
- fb_xnnpack::XNNGraphBufferHasIdentifier (flatbuffer_data) ,
1610
+ is_supported_version ,
1525
1611
DelegateInvalidCompatibility,
1526
- " XNNPACK Delegate flatbuffer version mismatch: '%.4s' != expected '%.4s'" ,
1527
- flatbuffers::GetBufferIdentifier (flatbuffer_data),
1528
- fb_xnnpack::XNNGraphIdentifier ());
1612
+ " XNNPACK Delegate Serialization Format version identifier '%.4s' != expected XN00 or XN01'" ,
1613
+ flatbuffers::GetBufferIdentifier (flatbuffer_data));
1529
1614
1530
1615
auto flatbuffer_graph = fb_xnnpack::GetXNNGraph (flatbuffer_data);
1531
1616
// initialize xnnpack
0 commit comments