Program.fbs change to support serialized mutable state (#4216)

JacobSzwejbka · facebook-github-bot · commit 2f8ecf397339 · 2024-07-30T16:08:25.000-07:00
Summary: Pull Request resolved: #4216 Need a way to indicate values that have a meaningful initial state serialized in the program, who also are able to be mutated on device. https://docs.google.com/document/d/1D8WpMmIiQxU_n5OYWXl3mrpBYUewz79izyAO2UknSsM/edit?usp=sharing Reviewed By: dbort Differential Revision: D58747605 fbshipit-source-id: 096b40443ba4ecc8044a4d397838309e8c97c8fa
diff --git a/exir/emit/_emitter.py b/exir/emit/_emitter.py
@@ -1090,7 +1090,7 @@ def _get_empty_tensor_evalue() -> EValue:
                     dim_order=[],
                     requires_grad=False,
                     layout=0,
-                    constant_buffer_idx=0,
+                    data_buffer_idx=0,
                     allocation_info=None,
                     shape_dynamism=TensorShapeDynamism.STATIC,
                 )
diff --git a/exir/emit/test/test_emit.py b/exir/emit/test/test_emit.py
@@ -109,7 +109,7 @@ def check_tensor_buffer_loc(
         value = typing.cast(schema.Tensor, values[value_index].val)
         self.assertIsInstance(value, schema.Tensor)
 
-        self.assertEqual(value.constant_buffer_idx, exp_buffer_idx)
+        self.assertEqual(value.data_buffer_idx, exp_buffer_idx)
 
         if not value.allocation_info:
             self.assertIsNone(exp_mem_id)
@@ -810,7 +810,7 @@ def forward(self, x: torch.Tensor) -> torch.Tensor:
             < non_const_buffer_size_without_const_prop_pass[1]
         )
 
-    # cant compare plans directly with __eq__ because of the plan names, and constant_buffer_idx in tensor values
+    # cant compare plans directly with __eq__ because of the plan names, and data_buffer_idx in tensor values
     def _compare_execution_plans(
         self, plan_single: ExecutionPlan, plan_merged: ExecutionPlan
     ) -> None:
diff --git a/exir/print_program.py b/exir/print_program.py
@@ -79,7 +79,7 @@ def _format_evalue(  # noqa: C901
     evstr = "\033[34m"
     if isinstance(evalue.val, Tensor):
         tensor = evalue.val
-        if tensor.constant_buffer_idx > 0:
+        if tensor.data_buffer_idx > 0:
             assert not _is_dynamic_shape_tensor(
                 tensor
             ), "A constant tensor can not be dynamic shape"
diff --git a/exir/schema.py b/exir/schema.py
@@ -51,7 +51,7 @@ class Tensor:
     dim_order: List[bytes]
     requires_grad: bool
     layout: int
-    constant_buffer_idx: int
+    data_buffer_idx: int
     allocation_info: Optional[AllocationDetails]
 
     # check schema.fbs for explanations
diff --git a/exir/tensor.py b/exir/tensor.py
@@ -308,7 +308,7 @@ def make_allocation_info(mem_id: int, mem_offset: int) -> schema.AllocationDetai
 
 
 def make_tensor_value(
-    constant_buffer_idx: int,
+    data_buffer_idx: int,
     allocation_info: Optional[schema.AllocationDetails],
     spec: TensorSpec,
 ) -> schema.Tensor:
@@ -341,7 +341,7 @@ def to_list(
         sizes=tensor_size,
         dim_order=tensor_dim_order,
         requires_grad=spec.requires_grad,
-        constant_buffer_idx=constant_buffer_idx,
+        data_buffer_idx=data_buffer_idx,
         allocation_info=allocation_info,
         layout=layout_enum(spec.layout),
         shape_dynamism=spec.shape_dynamism,
diff --git a/exir/tests/common.py b/exir/tests/common.py
@@ -49,7 +49,7 @@ def get_test_program() -> Program:
                             dim_order=typing.cast(List[bytes], [0, 1]),
                             requires_grad=False,
                             layout=0,
-                            constant_buffer_idx=0,
+                            data_buffer_idx=0,
                             allocation_info=AllocationDetails(
                                 memory_id=1,
                                 memory_offset_high=0,
diff --git a/exir/tests/test_verification.py b/exir/tests/test_verification.py
@@ -4,6 +4,8 @@
 # This source code is licensed under the BSD-style license found in the
 # LICENSE file in the root directory of this source tree.
 
+# pyre-unsafe
+
 import unittest
 
 import torch
@@ -47,7 +49,7 @@ def f(x: torch.Tensor) -> torch.Tensor:
         for val_idx in range(len(test.execution_plan.values)):
             val = test.execution_plan.values[val_idx].val
             if not (
-                isinstance(val, Tensor) and val.constant_buffer_idx == 0
+                isinstance(val, Tensor) and val.data_buffer_idx == 0
             ) and not isinstance(val, TensorList):
                 test.load_value(val_idx)
         vlist = test.get_value_list()
diff --git a/exir/verification/interpreter.py b/exir/verification/interpreter.py
@@ -166,11 +166,11 @@ def get_constant_tensors(self) -> List[Tensor]:
         tensors = []
         for elem in self.execution_plan.values:
             val = elem.val
-            if isinstance(val, Tensor) and val.constant_buffer_idx != 0:
+            if isinstance(val, Tensor) and val.data_buffer_idx != 0:
                 # load val into res
                 # pyre-fixme[16]
                 tensor = bindings.convert_to_tensor(
-                    self.data_buffers[val.constant_buffer_idx],
+                    self.data_buffers[val.data_buffer_idx],
                     val.scalar_type,
                     val.sizes,
                     stride_from_dim_order(val.sizes, val.dim_order),
@@ -239,7 +239,7 @@ def load_from_value_list(self, idx: int) -> None:  # noqa
                 tensor_list.append(self._value_list[i])
             self._value_list[idx] = tensor_list
         elif isinstance(val, Tensor):
-            if val.constant_buffer_idx == 0:
+            if val.data_buffer_idx == 0:
                 # TODO(zhengxu) Verify that argument is actually an out variant
                 self._value_list[idx] = torch.empty(
                     val.sizes, dtype=get_scalar_type(val.scalar_type)
@@ -248,7 +248,7 @@ def load_from_value_list(self, idx: int) -> None:  # noqa
                 # Constant Tensor conversion
                 # pyre-fixme [16]
                 tensor = bindings.convert_to_tensor(
-                    self.data_buffers[val.constant_buffer_idx],
+                    self.data_buffers[val.data_buffer_idx],
                     val.scalar_type,
                     val.sizes,
                     stride_from_dim_order(val.sizes, val.dim_order),
diff --git a/runtime/executor/tensor_parser_exec_aten.cpp b/runtime/executor/tensor_parser_exec_aten.cpp
@@ -53,9 +53,9 @@ __ET_NODISCARD Result<void*> getTensorDataPtr(
     const Program* program,
     size_t nbytes,
     HierarchicalAllocator* allocator) {
-  if (s_tensor->constant_buffer_idx() > 0) {
-    auto data = program->get_constant_buffer_data(
-        s_tensor->constant_buffer_idx(), nbytes);
+  if (s_tensor->data_buffer_idx() > 0) {
+    auto data =
+        program->get_constant_buffer_data(s_tensor->data_buffer_idx(), nbytes);
     if (!data.ok()) {
       return data.error();
     }
diff --git a/schema/program.fbs b/schema/program.fbs
@@ -53,6 +53,20 @@ enum TensorShapeDynamism : byte {
   DYNAMIC_UNBOUND = 2,
 }
 
+
+// Table to put additional information about tensors in that is not applicable
+// to the vast majority of tensors in the vast majority of programs.
+table ExtraTensorInfo {
+  // [Optional] Specifies the SubsegmentOffsets in
+  //  program.mutable_data_segments that specifies where the data is located in.
+  //  If not present and the data is located in a segment, then the data is in
+  //  the first index.
+  mutable_data_segments_idx:uint64;
+
+  // [Optional] The unique name of the tensor. e.g. 'mod.linear.weight'
+  fully_qualified_name:string;
+}
+
 table Tensor {
   scalar_type:ScalarType;
 
@@ -63,26 +77,47 @@ table Tensor {
 
   sizes:[int];
 
-  // Specifies in what order the dimensions are laid out in memory (from outer to inner).
-  // For example, given a rank 3 Tensor of size (3, 5, 2). If we name dimensions: [row, column, batch], then a dim_order of:
-  // (2, 0, 1) represents a [batch, row, column] ordering where "column" is the innermost dimension, then comes "row", and the outermost dimension is "batch".
-  // (0, 2, 1) represents a [row, batch, column] ordering where "column" is the innermost dimension, then comes "batch", and the outermost dimension is "row".
+  // Specifies in what order the dimensions are laid out in memory (from outer
+  // to inner).
+  //
+  // For example, given a rank 3 Tensor of size (3, 5, 2). If we name
+  // dimensions: [row, column, batch], then a dim_order of:
+  // - (2, 0, 1) represents a [batch, row, column] ordering where "column" is
+  //   the innermost dimension, then comes "row", and the outermost dimension is
+  //   "batch".
+  // - (0, 2, 1) represents a [row, batch, column] ordering where "column" is
+  //   the innermost dimension, then comes "batch", and the outermost dimension
+  //   is "row".
   dim_order:[ubyte];
 
   // out of scope M1
   requires_grad:bool;
 
-  // Overall, a Tensor is either constant or non-constant, except we differentiate 2 special
-  // variants of non-constant Tensor ("input" and control-flow "placeholder") as a special
-  // optimization to avoid holding unnecessary AllocationDetails.
+  // Overall, a Tensor is either constant or mutable. At method load time
+  //  constant tensors receive a dataptr into the serialized program. Mutable
+  //  tensors can either receive a pointer from the heirarchical allocator or a
+  //  nullptr if they will receive a data pointer at execution time (inputs
+  //  and control flow placeholders can be like this). Mutable tensors may or
+  //  may not also have an initial value in the serialized program.
+  //
   // In summary:
-  //   constant_buffer_idx > 0, allocation_info = Null: Tensor is a constant
-  //   constant_buffer_idx = 0, allocation_info = Non Null: Tensor is a non-constant.
-  //   constant_buffer_idx = 0, allocation_info = Null: Tensor is a non-constant
-  //     that will receive a dataptr at input time or during execution.
+  //   data_buffer_idx > 0, allocation_info = Null: Tensor is a constant.
+  //   data_buffer_idx = 0, allocation_info = Non Null: Tensor is mutable and
+  //     will receive a dataptr at method load time.
+  //   data_buffer_idx = 0, allocation_info = Null: Tensor is mutable and
+  //     will receive a dataptr at input time or during execution.
+  //   data_buffer_idx > 0, allocation_info = Non Null: Tensor is mutable and
+  //     will receive a dataptr at method load time, and has an initial state.
   //
-  // Index to the program's constant buffer table, value 0 is reserved to indicate non constant
-  constant_buffer_idx:uint;
+  // Tensor data is stored inline if program.constant_buffer is null. Otherwise
+  //  it is in a segment. If this tensor's allocation_info is null then the
+  //  tensor data location is specified by program.constant_segment. If the
+  //  allocation_info is non_null then the data is somewhere in
+  //  program.mutable_data_segments. If tensor_info is Null, then the data is
+  //  in program.mutable_data_segments[0] otherwise if tensor_info is non-null
+  //  then the mutable_data_segment index is specified by
+  //  tensor_info.mutable_data_segments_index.
+  data_buffer_idx:uint;
 
   // [Optional] preallocation details for non-constants (null otherwise).
   allocation_info:AllocationDetails;
@@ -102,7 +137,11 @@ table Tensor {
   //
   // 3. dynamism == DYNAMIC_UNBOUND: the stored sizes field can be ignored since
   //    shape is fully dynamic.
-  shape_dynamism: TensorShapeDynamism;
+  shape_dynamism:TensorShapeDynamism;
+
+  // [Optional] Additional information about the Tensor that is not applicable
+  // to most tensors.
+  extra_tensor_info:ExtraTensorInfo;
 }
 
 table Int {
@@ -276,9 +315,11 @@ table BackendDelegate {
   compile_specs: [CompileSpec];
 }
 
-// A sequence of blocking instructions to be executed in order. The abstraction is not currently leveraged,
-// all current programs are 1 chain. We are leaving chains as part of the program definition for future
-// use cases around graph level async where different threads will be represented as seperate chains.
+// A sequence of blocking instructions to be executed in order. The
+// abstraction is not currently leveraged, all current programs are 1 chain.
+// We are leaving chains as part of the program definition for future use cases
+// around graph level async where different threads will be represented as
+// seperate chains.
 table Chain {
   // Indices of the values that are (non-static) inputs into this Chain.
   inputs:[int];
@@ -401,7 +442,17 @@ table Program {
   // offset. If constant_segment.offsets field is non-empty, constant_buffer
   // must be empty. constant_segment.offsets[0] is reserved to be pointed to by
   // non-constant Tensors.
-  constant_segment: SubsegmentOffsets;
+  constant_segment:SubsegmentOffsets;
+
+  // [Optional] Describes the offsets into various segments for each mutable
+  // tensor. Only mutable tensors with a meaningful initial state are
+  // serialized here (for example weights that will be trained on-device as
+  // opposed to just layer activations). Seperate from the constant_segment to
+  // reduce peak memory usage by letting us read directly from the PTE file
+  // into the mutable tensor, as opposed to loading the .pte data into
+  // constant memory, copying it over, and then being unable to release the
+  // constant segment. No two elements should point to the same segment.
+  mutable_data_segments:[SubsegmentOffsets];
 }
 
 root_type Program;

Original file line number	Diff line number	Diff line change
`@@ -1090,7 +1090,7 @@ def _get_empty_tensor_evalue() -> EValue:`
`1090`	`1090`	`dim_order=[],`
`1091`	`1091`	`requires_grad=False,`
`1092`	`1092`	`layout=0,`
`1093`		`- constant_buffer_idx=0,`
	`1093`	`+ data_buffer_idx=0,`
`1094`	`1094`	`allocation_info=None,`
`1095`	`1095`	`shape_dynamism=TensorShapeDynamism.STATIC,`
`1096`	`1096`	`)`