Introduce data schema to store raw tensors (#6919)

lucylq · web-flow · commit 19268dee3e9e · 2024-11-17T22:09:57.000-08:00
[executorch][serialization] Introduce data schema to store raw tensors Pull Request resolved: #6540 @imported-using-ghimport Differential Revision: [D65156641](https://our.internmc.facebook.com/intern/diff/D65156641/) ghstack-source-id: 253910001
diff --git a/extension/flat_tensor/README.md b/extension/flat_tensor/README.md
@@ -0,0 +1,6 @@
+## FlatTensor
+
+> [!IMPORTANT]
+> FlatTensor is still under development, and not ready to use.
+
+FlatTensor is a flatbuffer-based format for storing and loading tensors. The format provides a way to store tensors keyed by string.
diff --git a/extension/flat_tensor/flat_tensor.fbs b/extension/flat_tensor/flat_tensor.fbs
@@ -0,0 +1,75 @@
+// Schema for flatbuffer-serialized tensors.
+
+include "scalar_type.fbs";
+namespace flat_tensor;
+
+// Update after BC breaking changes.
+file_identifier "FT01";
+file_extension "ptd";
+
+table TensorMetadata {
+  // The unique id used to connect the data and program.
+  fully_qualified_name: string;
+  scalar_type: executorch_flatbuffer.ScalarType;
+
+  // Size of each dimension.
+  dim_sizes: [int32];
+
+  // Specifies in what order the dimensions are laid out in memory (from outer
+  // to inner).
+  //
+  // For example, given a rank 3 Tensor of size (3, 5, 2). If we name
+  // dimensions: [row, column, batch], then a dim_order of:
+  // - (2, 0, 1) represents a [batch, row, column] ordering where "column" is
+  //   the innermost dimension, then comes "row", and the outermost dimension is
+  //   "batch".
+  // - (0, 2, 1) represents a [row, batch, column] ordering where "column" is
+  //   the innermost dimension, then comes "batch", and the outermost dimension
+  //   is "row".
+  dim_order: [uint8];
+
+  // FlatTensor.segments index that the tensor data is stored in.
+  segment_index: uint32;
+
+  // Tensor offsets are relative to each TensorSegment.
+  // To retrieve a given tensor:
+  // 1. segment_base_offset: from the file header.
+  // 2. segment_offset: segments[segment_index].offset
+  // 3. tensor_offset: segments[segment_offset].tensor_metadata[j].offset
+  //    Find the relevant index j by matching on tensor fqn.
+  offset: uint64;
+}
+
+// Describes a contiguous piece of data that lives outside of the flatbuffer data,
+// typically appended afterwards in the file.
+// For .ptd files, the "extended header" in the file points to the segment base offset.
+table DataSegment {
+  // Segment offsets are relative to the segment base offset provided in the
+  // extended file header. Segments will typically be aligned in a way to make
+  // it possible to use mmap() to load them.
+  offset: uint64;
+
+  // The size in bytes of valid data starting at the offset. The segment
+  // data may be followed by padding before the segment that follows it,
+  // to make it easier to use mmap().
+  size: uint64;
+}
+
+// FlatTensor is a flatbuffer-based format for storing and loading tensors.
+table FlatTensor {
+  // Schema version.
+  version: uint32;
+
+  // Alignment for each tensor in bytes. Offsets of the tensor provided
+  // in TensorMetadata.offset are aligned to tensor_alignment.
+  tensor_alignment: uint32;
+
+  // Tensor information, including metadata and offsets to the raw tensor data.
+  tensors: [TensorMetadata];
+
+  // List of data segments that follow the FlatTensor data in this file, sorted by
+  // offset. Elements in this schema can refer to these segments by index.
+  segments: [DataSegment];
+}
+
+root_type FlatTensor;
diff --git a/extension/flat_tensor/flat_tensor_schema.py b/extension/flat_tensor/flat_tensor_schema.py
@@ -0,0 +1,38 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+
+from dataclasses import dataclass
+from typing import List
+
+from executorch.exir.scalar_type import ScalarType
+
+# Note: check executorch/extension/data_format/flat_tensor.fbs for explanations of these fields.
+
+
+@dataclass
+class TensorMetadata:
+    fully_qualified_name: str
+    scalar_type: ScalarType
+    dim_sizes: List[int]
+    dim_order: List[bytes]
+
+    segment_index: int
+    offset: int
+
+
+@dataclass
+class DataSegment:
+    offset: int
+    size: int
+
+
+@dataclass
+class FlatTensor:
+    version: int
+    tensor_alignment: int
+    tensors: List[TensorMetadata]
+    segments: List[DataSegment]
diff --git a/extension/flat_tensor/scalar_type.fbs b/extension/flat_tensor/scalar_type.fbs
@@ -0,0 +1,38 @@
+// Copyright (c) Meta Platforms, Inc. and affiliates.
+
+//
+// See executorch/schema/README.md before modifying this file.
+//
+
+// TODO(T207893511): sync scalar_type.fbs copies across ExecuTorch repo.
+namespace executorch_flatbuffer;
+
+// The scalar data type.
+// Must match executorch/runtime/core/portable_type/tensor_impl.h
+enum ScalarType : byte {
+  BYTE = 0,
+  CHAR = 1,
+  SHORT = 2,
+  INT = 3,
+  LONG = 4,
+  HALF = 5,
+  FLOAT = 6,
+  DOUBLE = 7,
+  BOOL = 11,
+  // TODO(jakeszwe): Verify these are unused and then remove support
+  QINT8 = 12,
+  QUINT8 = 13,
+  QINT32 = 14,
+  QUINT4X2 = 16,
+  QUINT2X4 = 17,
+  BITS16 = 22,
+  // Types currently not implemented.
+  // COMPLEXHALF = 8,
+  // COMPLEXFLOAT = 9,
+  // COMPLEXDOUBLE = 10,
+  // BFLOAT16 = 15,
+  // BITS1x8 = 18,
+  // BITS2x4 = 19,
+  // BITS4x2 = 20,
+  // BITS8 = 21,
+}
diff --git a/schema/README.md b/schema/README.md
@@ -2,8 +2,7 @@ The `schema.fbs` file in this directory describes the
 [Flatbuffers](https://google.github.io/flatbuffers/) schema used to serialize
 ExecuTorch programs.
 
-The `scalar_type.fbs` file contains schema for scalar types, used in both
-`schema.fbs` and `bundled_program_schema.fbs`.
+The `scalar_type.fbs` file contains schema for scalar types.
 
 ## Rules to ensure forward/backward compatibility