enable dil_unbind and fix dil_split

chunyuan-w · EikanWang · commit cde7f435a2d5 · 2020-10-28T10:13:52.000+08:00
diff --git a/scripts/cpu/gen-dense-cpu-ops.py b/scripts/cpu/gen-dense-cpu-ops.py
@@ -70,6 +70,8 @@
     'aten::slice.Tensor(Tensor(a) self, int dim=0, int start=0, int end=9223372036854775807, int step=1) -> Tensor(a)',
     'aten::select.int(Tensor(a) self, int dim, int index) -> Tensor(a)',
     'aten::select.Dimname(Tensor(a) self, Dimname dim, int index) -> Tensor(a)',
+    'aten::unbind.int(Tensor(a) self, int dim=0) -> Tensor(a)[]',
+    'aten::unbind.Dimname(Tensor(a) self, Dimname dim) -> Tensor(a)[]',
     'aten::view(Tensor(a) self, int[] size) -> Tensor(a)',
     'aten::index_select(Tensor self, int dim, Tensor index) -> Tensor',
     'aten::_unsafe_view(Tensor self, int[] size) -> Tensor',
diff --git a/torch_ipex/csrc/cpu/DevOPs.cpp b/torch_ipex/csrc/cpu/DevOPs.cpp
@@ -1985,6 +1985,7 @@ at::Tensor AtenIpexCPUDev::dil_slice(const at::Tensor & self, int64_t dim, int64
   DEBUG("AtenIpexCPUDev::dil_slice\n");
   CHECK_DNNL_OP_PRE_COND(self);
 
+  // TODO use weight TAG to decide whether to reorder or not
   dbl::comm::reorder_to_bf16_for_mix_prec(self, true);
 
   // Port from aten/src/ATen/native/TensorShape.cpp
@@ -2023,6 +2024,22 @@ at::Tensor AtenIpexCPUDev::dil_slice(const at::Tensor & self, int64_t dim, int64
   return result;
 }
 
+std::vector<at::Tensor> AtenIpexCPUDev::dil_unbind(const at::Tensor &self, int64_t dim) {
+  DEBUG("AtenIpexCPUDev::dil_unbind\n");
+
+  dim = at::maybe_wrap_dim(dim, self.dim());
+  int64_t size = dil_size(self, dim);
+  std::vector<at::Tensor> tensors(size);
+  for (int i = 0; i < size; i++) {
+    tensors[i] = dil_select(self, dim, i);
+  }
+  return tensors;
+}
+
+std::vector<at::Tensor>AtenIpexCPUDev::dil_unbind(const at::Tensor& self, at::Dimname dim) {
+  return dil_unbind(self, at::dimname_to_position(self, dim));
+}
+
 at::Tensor AtenIpexCPUDev::dil_select(const at::Tensor & self, int64_t dim, int64_t index) {
   DEBUG("AtenIpexCPUDev::dil_select\n");
   CHECK_DNNL_OP_PRE_COND(self);
@@ -2119,19 +2136,43 @@ at::Tensor AtenIpexCPUDev::dil_select(const at::Tensor & self, at::Dimname dim,
 
 std::vector<at::Tensor> AtenIpexCPUDev::dil_split(const at::Tensor& self, int64_t split_size, int64_t dim) {
   DEBUG("AtenIpexCPUDev::dil_split\n");
+  TORCH_CHECK(self.dim() != 0, "split expects at least a 1-dimensional tensor");
+  TORCH_CHECK(split_size >= 0,  "split expects split_size be non-negative, but got split_size=", split_size);
+  
   CHECK_DNNL_OP_PRE_COND(self);
   dim = at::maybe_wrap_dim(dim, self.dim());
   int64_t dim_size = dil_size(self, dim);
+  TORCH_CHECK(split_size > 0 || self.size(dim) == 0,
+          "split_size can only be 0 if dimension size is 0, "
+          "but got dimension size of ", dim_size);
+  // if split_size is 0 and dimension size is 0, there is 1 split.
   int64_t num_splits = 1;
   if (split_size != 0) {
     // ensuring num_splits is at least 1 makes consistent the case where split_size > dim_size
     // (returns a single split).  We might want to error here, but keep it for BC.
     num_splits = std::max<int64_t>((dim_size + split_size - 1) / split_size, 1);
   }
-  std::vector<int64_t> split_sizes(num_splits, split_size);
+  std::vector<at::Tensor> splits(num_splits);
   int64_t last_split_size = split_size - (split_size * num_splits - dim_size);
-  split_sizes[num_splits-1] = last_split_size;
-  return dil_split_with_sizes(self, split_sizes, dim);
+
+  for (int64_t i = 0; i < num_splits; ++i) {
+    auto length = i < num_splits - 1 ? split_size : last_split_size;
+    splits[i] = _dil_narrow(self, dim, i * split_size, length);
+  }
+  return splits;
+}
+
+// TODO only used for dil_split
+at::Tensor AtenIpexCPUDev::_dil_narrow(const at::Tensor& self, int64_t dim, int64_t start, int64_t length) {
+  // Port from aten/src/ATen/native/TensorShape.cpp
+  TORCH_CHECK(self.dim() > 0, "narrow() cannot be applied to a 0-dim tensor.");
+  auto cur_size = self.size(dim);
+  if (start != cur_size) {  // start being the end is valid, but not a valid dim specification.
+    start = at::maybe_wrap_dim(start, cur_size);
+  }
+  TORCH_CHECK(length >= 0 && start <= cur_size - length,
+           "start (", start, ") + length (", length, ") exceeds dimension size (", cur_size, ").");
+  return dil_slice(self, dim, start, start + length, 1);
 }
 
 at::Tensor AtenIpexCPUDev::dil_gelu(const at::Tensor& input) {
diff --git a/torch_ipex/csrc/cpu/DevOPs.h b/torch_ipex/csrc/cpu/DevOPs.h
@@ -71,11 +71,14 @@ class AtenIpexCPUDev {
   static at::Tensor dil_cat(at::TensorList tensors, int64_t dim);
   static std::vector<at::Tensor> dil_split_with_sizes(const at::Tensor& self, at::IntArrayRef split_sizes, int64_t dim);
   static std::vector<at::Tensor> dil_split(const at::Tensor& self, int64_t split_size, int64_t dim);
+  static at::Tensor _dil_narrow(const at::Tensor& self, int64_t dim, int64_t start, int64_t length);
   static at::Tensor dil_gelu(const at::Tensor& input);
   static at::Tensor dil_gelu_backward(const at::Tensor& grad_output, const at::Tensor& input);
   static std::tuple<at::Tensor, at::Tensor, at::Tensor> dil_native_layer_norm(const at::Tensor& X, const at::Tensor& gamma, const at::Tensor& beta, int64_t M, int64_t N, double eps);
   static std::tuple<at::Tensor, at::Tensor, at::Tensor> dil_native_layer_norm_backward(const at::Tensor& dY, const at::Tensor& X, const at::Tensor& mean, const at::Tensor& rstd, const at::Tensor& gamma, int64_t M, int64_t N, std::array<bool, 3> grad_input_mask);
   static at::Tensor dil_slice(const at::Tensor & self, int64_t dim, int64_t start, int64_t end, int64_t step);
+  static std::vector<at::Tensor> dil_unbind(const at::Tensor &self, int64_t dim);
+  static std::vector<at::Tensor> dil_unbind(const at::Tensor& self, at::Dimname dim);
   static at::Tensor dil_select(const at::Tensor & self, int64_t dim, int64_t index);
   static at::Tensor dil_select(const at::Tensor & self, at::Dimname dim, int64_t index);
   static at::Tensor dil_view(const at::Tensor & self, at::IntArrayRef size);