Skip to content

Commit 05a7d52

Browse files
kwanghoon-metafacebook-github-bot
authored andcommitted
Back out "BC Deprecate XN00 Support" (#4573)
Summary: Pull Request resolved: #4573 Original commit changeset: b7e692a06d93 Original Phabricator Diff: D60403908 Reviewed By: tarun292, mcr229 Differential Revision: D60853074 fbshipit-source-id: df42d684cb2f9890e68dadf26aa16636167b29e8
1 parent 9d9cda0 commit 05a7d52

File tree

5 files changed

+494
-43
lines changed

5 files changed

+494
-43
lines changed

backends/xnnpack/CMakeLists.txt

Lines changed: 14 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -37,10 +37,21 @@ set(_common_compile_options -Wno-deprecated-declarations -fPIC)
3737

3838
set(_xnnpack_schema__include_dir "${CMAKE_BINARY_DIR}/schema/include")
3939
# Paths to headers generated from the .fbs files.
40+
set(_xnnpack_flatbuffer__outputs)
41+
foreach(fbs_file ${_xnnpack_schema__srcs})
42+
string(REGEX REPLACE "([^/]+)[.]fbs$" "\\1_generated.h" generated
43+
"${fbs_file}"
44+
)
45+
list(APPEND _xnnpack_flatbuffer__outputs
46+
"${_xnnpack_schema__include_dir}/executorch/${generated}"
47+
)
48+
endforeach()
49+
4050
set(_xnnpack_schema__outputs)
4151
foreach(fbs_file ${_xnnpack_schema__srcs})
42-
string(REGEX REPLACE "([^/]+)[.]fbs$" "\\1_generated.h"
43-
generated "${fbs_file}")
52+
string(REGEX REPLACE "runtime_([^/]+)[.]fbs$" "\\1_generated.h" generated
53+
"${fbs_file}"
54+
)
4455
list(APPEND _xnnpack_schema__outputs
4556
"${_xnnpack_schema__include_dir}/executorch/${generated}"
4657
)
@@ -53,6 +64,7 @@ add_custom_command(
5364
${FLATC_EXECUTABLE} --cpp --cpp-std c++11 --scoped-enums -o
5465
"${_xnnpack_schema__include_dir}/executorch/backends/xnnpack/serialization"
5566
${_xnnpack_schema__srcs}
67+
COMMAND mv ${_xnnpack_flatbuffer__outputs} ${_xnnpack_schema__outputs}
5668
WORKING_DIRECTORY ${EXECUTORCH_ROOT}
5769
COMMENT "Generating xnnpack_schema headers"
5870
VERBATIM

backends/xnnpack/runtime/XNNCompiler.cpp

Lines changed: 124 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -124,9 +124,17 @@ const uint8_t* getConstantDataPtr(
124124
const uint8_t* constant_data_ptr) {
125125
auto buffer_idx = tensor_value->constant_buffer_idx();
126126
if (buffer_idx) {
127-
const auto& constant_data_offsets = *flatbuffer_graph->constant_data();
128-
uint64_t constant_data_offset = constant_data_offsets[buffer_idx]->offset();
129-
return constant_data_ptr + constant_data_offset;
127+
if (!constant_data_ptr) {
128+
// TODO(T172265611): Remove constant_buffer in flatbuffer path after BC
129+
// window
130+
const auto& constant_buffer = *flatbuffer_graph->constant_buffer();
131+
return constant_buffer[buffer_idx]->storage()->data();
132+
} else {
133+
const auto& constant_data_offsets = *flatbuffer_graph->constant_data();
134+
uint64_t constant_data_offset =
135+
constant_data_offsets[buffer_idx]->offset();
136+
return constant_data_ptr + constant_data_offset;
137+
}
130138
}
131139

132140
return nullptr;
@@ -186,29 +194,105 @@ Error defineTensor(
186194

187195
xnn_status status;
188196
// The type we might have to convert to
189-
auto datatype = getDataType(tensor_value->datatype());
197+
auto dq_datatype = getDataType(tensor_value->dq_datatype());
198+
199+
if (dq_datatype != xnn_datatype::xnn_datatype_invalid) {
200+
if (dq_datatype != xnn_datatype::xnn_datatype_qint8) {
201+
ET_CHECK_OR_RETURN_ERROR(
202+
false,
203+
Internal,
204+
"Only int8_t is supported for dq_datatype for now, got: %d",
205+
dq_datatype);
206+
} else {
207+
ET_CHECK_OR_RETURN_ERROR(
208+
(tensor_value->flags() & XNN_VALUE_FLAG_EXTERNAL_INPUT),
209+
Internal,
210+
"Dynamic quantization of tensor is only allowed for the external input tensor value for now! got flags: %u",
211+
tensor_value->flags());
212+
}
213+
}
190214

191215
if (qtensor_value == nullptr) {
192216
// FP32 tensor
193-
ET_CHECK_OR_RETURN_ERROR(
194-
!isQuantizedDataType(datatype),
195-
Internal,
196-
"xnn_datatype is quantized, but is not quantized tensor value");
197-
198-
status = xnn_define_tensor_value(
199-
/*subgraph=*/subgraph_ptr,
200-
/*datatype=*/datatype,
201-
/*num_dims=*/tensor_value->num_dims(),
202-
/*dims=*/dims_data.data(),
203-
/*data=*/buffer_ptr,
204-
/*external_id=*/tensor_value->external_id(),
205-
/*flags=*/tensor_value->flags(),
206-
/*id_out=*/&id);
207-
ET_CHECK_OR_RETURN_ERROR(
208-
xnn_status_success == status,
209-
Internal,
210-
"Failed to define tensor with id %i",
211-
id);
217+
if (!isQuantizedDataType(dq_datatype)) {
218+
// Define non-quantied tensor
219+
status = xnn_define_tensor_value(
220+
/*subgraph=*/subgraph_ptr,
221+
/*datatype=*/getDataType(tensor_value->datatype()),
222+
/*num_dims=*/tensor_value->num_dims(),
223+
/*dims=*/dims_data.data(),
224+
/*data=*/buffer_ptr,
225+
/*external_id=*/tensor_value->external_id(),
226+
/*flags=*/tensor_value->flags(),
227+
/*id_out=*/&id);
228+
} else if (dq_datatype != xnn_datatype::xnn_datatype_invalid) {
229+
ET_CHECK_OR_RETURN_ERROR(
230+
isQuantizedDataType(dq_datatype),
231+
Internal,
232+
"Dynamic quantization can only produce supported quantized dtypes");
233+
ET_CHECK_OR_RETURN_ERROR(
234+
tensor_value->external_id() != XNN_INVALID_VALUE_ID,
235+
Internal,
236+
"Dynamic quantization can only work with external inputs for now, got an internal ID");
237+
ET_CHECK_OR_RETURN_ERROR(
238+
buffer_ptr == nullptr,
239+
Internal,
240+
"Dynamic quantization can only work with external inputs for now, got const data");
241+
242+
switch (dq_datatype) {
243+
case xnn_datatype::xnn_datatype_qint8: {
244+
// HACK TO Maintain FC/BC for ASR this will be removed after 01/2024
245+
246+
// When encountering a dynamically quantized tensor via dq_datatype,
247+
// which is the old flow for serializing dynamically quantized linear.
248+
// We replace the definition of a single tensor with a new dynamic
249+
// Quantization pattern. We change the pattern from:
250+
// serialized_qd_input
251+
// to
252+
// (fp32_input --> convert --> qdint8_input)
253+
254+
status = xnn_define_dynamically_quantized_tensor_value(
255+
/*subgraph=*/subgraph_ptr,
256+
/*datatype=*/xnn_datatype_qdint8,
257+
/*num_dims=*/tensor_value->num_dims(),
258+
/*num_nonbatch_dims=*/1, // always do per token quantization
259+
/*dims=*/dims_data.data(),
260+
/*external_id=*/XNN_INVALID_VALUE_ID, // always internal value id
261+
/*flags=*/0, // this is netiher external input or output
262+
/*id_out=*/&id);
263+
264+
// this is the FP16 or FP32 external value that is being dynamically
265+
// quantized
266+
uint32_t float_id;
267+
enum xnn_datatype fp_datatype = getDataType(tensor_value->datatype());
268+
status = xnn_define_tensor_value(
269+
/*subgraph=*/subgraph_ptr,
270+
/*datatype=*/fp_datatype,
271+
/*num_dims=*/tensor_value->num_dims(),
272+
/*dims=*/dims_data.data(),
273+
/*data=*/buffer_ptr,
274+
/*external_id=*/tensor_value->external_id(),
275+
/*flags=*/tensor_value->flags(),
276+
/*id_out=*/&float_id);
277+
278+
// Define dynamic conversion from float to qdint8
279+
status = xnn_define_convert(
280+
/*subgraph=*/subgraph_ptr,
281+
/*input_id=*/float_id,
282+
/*output_id=*/id,
283+
/*flags=*/0);
284+
break;
285+
}
286+
default:
287+
ET_CHECK_OR_RETURN_ERROR(
288+
false,
289+
NotImplemented,
290+
"Unhandled Dyanmic Quantization dtype: %d",
291+
dq_datatype);
292+
}
293+
} else {
294+
ET_CHECK_OR_RETURN_ERROR(false, NotImplemented, "Unhandled fp32 tensor");
295+
}
212296
} else {
213297
// define tensor for quantized
214298
switch (qtensor_value->quant_params_type()) {
@@ -222,7 +306,7 @@ Error defineTensor(
222306
qparams->zero_point());
223307
status = xnn_define_quantized_tensor_value(
224308
/*subgraph=*/subgraph_ptr,
225-
/*datatype=*/datatype,
309+
/*datatype=*/getDataType(tensor_value->datatype()),
226310
/*zero_point=*/qparams->zero_point(),
227311
/*scale=*/qparams->scale(),
228312
/*num_dims=*/tensor_value->num_dims(),
@@ -235,20 +319,21 @@ Error defineTensor(
235319
}
236320
case fb_xnnpack::XNNQuantParams::PerChannelQuant: {
237321
auto qparams = qtensor_value->quant_params_as_PerChannelQuant();
322+
enum xnn_datatype dtype = getDataType(tensor_value->datatype());
238323
int32_t zero_point =
239-
(datatype == xnn_datatype::xnn_datatype_qcint4 ? 8 : 0);
324+
(dtype == xnn_datatype::xnn_datatype_qcint4 ? 8 : 0);
240325

241326
ET_LOG(
242327
Debug,
243328
"define quant tensor (per channel): buffer_ptr: %p, scale.numel(): %u, channel_dim: %u, dtype: %u, zero_point: %d\n",
244329
buffer_ptr,
245330
qparams->scale()->size(),
246331
qparams->channel_dim(),
247-
datatype,
332+
dtype,
248333
zero_point);
249334
status = xnn_define_channelwise_quantized_tensor_value_v2(
250335
/*subgraph=*/subgraph_ptr,
251-
/*datatype=*/datatype,
336+
/*datatype=*/dtype,
252337
/*zero_point=*/zero_point,
253338
/*scale=*/qparams->scale()->data(),
254339
/*num_dims=*/tensor_value->num_dims(),
@@ -261,6 +346,7 @@ Error defineTensor(
261346
break;
262347
}
263348
case fb_xnnpack::XNNQuantParams::PerChannelGroupQuant: {
349+
xnn_datatype datatype = getDataType(tensor_value->datatype());
264350
ET_CHECK_OR_RETURN_ERROR(
265351
datatype == xnn_datatype::xnn_datatype_qbint4,
266352
Internal,
@@ -324,7 +410,7 @@ Error defineTensor(
324410
"Dynamically Quantized Tensors currently only support per token quantization");
325411
status = xnn_define_dynamically_quantized_tensor_value(
326412
/*subgraph=*/subgraph_ptr,
327-
/*datatype=*/datatype,
413+
/*datatype=*/getDataType(tensor_value->datatype()),
328414
/*num_dims=*/tensor_value->num_dims(),
329415
/*num_nonbatch_dims*/ qparams->num_nonbatch_dims(),
330416
/*dims=*/dims_data.data(),
@@ -1508,24 +1594,23 @@ __ET_NODISCARD Error XNNCompiler::compileModel(
15081594
constant_data = reinterpret_cast<const uint8_t*>(buffer_pointer) +
15091595
header->constant_data_offset;
15101596
} else if (header.error() == Error::NotFound) {
1511-
ET_LOG(
1512-
Error,
1513-
"XNNHeader version mismatch: '%.4s' != expected '%.4s'",
1514-
// Header Magic and FlatbufferIdentifier are same offset and size
1515-
flatbuffers::GetBufferIdentifier(buffer_pointer),
1516-
XNNHeader::kMagic);
1517-
return header.error();
1597+
flatbuffer_data = reinterpret_cast<const uint8_t*>(buffer_pointer);
15181598
} else {
15191599
ET_LOG(Error, "XNNHeader may be corrupt");
15201600
return header.error();
15211601
}
15221602

1603+
// Temporarily support identifier XN00 and XN01
1604+
bool is_supported_version =
1605+
strncmp(flatbuffers::GetBufferIdentifier(flatbuffer_data), "XN00", 4) ==
1606+
0 ||
1607+
strncmp(flatbuffers::GetBufferIdentifier(flatbuffer_data), "XN01", 4) ==
1608+
0;
15231609
ET_CHECK_OR_RETURN_ERROR(
1524-
fb_xnnpack::XNNGraphBufferHasIdentifier(flatbuffer_data),
1610+
is_supported_version,
15251611
DelegateInvalidCompatibility,
1526-
"XNNPACK Delegate flatbuffer version mismatch: '%.4s' != expected '%.4s'",
1527-
flatbuffers::GetBufferIdentifier(flatbuffer_data),
1528-
fb_xnnpack::XNNGraphIdentifier());
1612+
"XNNPACK Delegate Serialization Format version identifier '%.4s' != expected XN00 or XN01'",
1613+
flatbuffers::GetBufferIdentifier(flatbuffer_data));
15291614

15301615
auto flatbuffer_graph = fb_xnnpack::GetXNNGraph(flatbuffer_data);
15311616
// initialize xnnpack

0 commit comments

Comments
 (0)