add check on not dil and not own whole storage tensor

chunyuan-w · EikanWang · commit 396450370e7e · 2020-10-28T10:14:00.000+08:00
diff --git a/tests/cpu/test_bf16_lazy_reorder.py b/tests/cpu/test_bf16_lazy_reorder.py
@@ -812,6 +812,29 @@ def test_sliced_inplace_eltwise(self):
             self._check_tensor_shape(x_cpu_slice, x_dpcpp_slice)
             self.assertEqual(x_cpu_slice, x_dpcpp_slice, 0.01)        
 
+    def test_sliced_eltwise_backward(self):
+        rand_seed = int(get_rand_seed())
+        print("{} rand sed: {}".format(sys._getframe().f_code.co_name, rand_seed))
+        torch.manual_seed(rand_seed)
+
+        input = torch.rand(10, 10, 10)
+        with AutoDNNL(True), AutoMixPrecision(True, train=True):
+            x_cpu = input.clone().requires_grad_()
+            x_cpu_slice = x_cpu[3:7, 3:7, 5]
+
+            x_dpcpp = input.clone().to(device=device).requires_grad_()
+            x_dpcpp_slice = x_dpcpp[3:7, 3:7, 5]
+
+            y_cpu = F.relu(x_cpu_slice)
+            y_dpcpp = F.relu(x_dpcpp_slice)
+
+            y_cpu.sum().backward()
+            y_dpcpp.sum().backward()
+            
+            self._check_tensor_shape(y_cpu, y_dpcpp)
+            self.assertEqual(y_cpu, y_dpcpp)
+            self.assertEqual(x_cpu.grad, x_dpcpp.grad)
+
     def test_linear_with_sliced_bias(self):
         bias = torch.rand(30)
         x_cpu = torch.rand(20, 30)
@@ -827,6 +850,42 @@ def test_linear_with_sliced_bias(self):
 
         self.assertEqual(y_cpu, y_dpcpp, 0.1)
 
+    def test_chunk_version_counter(self):
+        rand_seed = int(get_rand_seed())
+        print("{} rand sed: {}".format(sys._getframe().f_code.co_name, rand_seed))
+        torch.manual_seed(rand_seed)
+        
+        x_dpcpp = torch.randn(32, 4096).to(device).requires_grad_()
+        
+        with AutoDNNL(True), AutoMixPrecision(True, train=True):
+            x_chunked = x_dpcpp.chunk(4, 1)
+            
+            output = x_chunked[0].sigmoid_()
+            version_counter = output._version
+            
+            output_other = x_chunked[1].sigmoid_()
+            self.assertTrue(output._version == version_counter)
+    
+    def test_unbind(self):
+        rand_seed = int(get_rand_seed())
+        print("{} rand sed: {}".format(sys._getframe().f_code.co_name, rand_seed))
+        torch.manual_seed(rand_seed)
+        x_cpu = torch.rand(2, 8, 2)
+        x_dpcpp = copy.deepcopy(x_cpu).to(device=device)
+
+        x_cpu_unbind = torch.unbind(x_cpu)
+        with AutoDNNL(True), AutoMixPrecision(True):
+            self.assertFalse(ipex.core.is_bf16_dil_tensor(x_dpcpp))
+            x_dpcpp_unbind = torch.unbind(x_dpcpp)
+            self.assertTrue(ipex.core.is_bf16_dil_tensor(x_dpcpp))
+            self.assertTrue(ipex.core.is_bf16_dil_tensor(x_dpcpp_unbind[0]))
+            self.assertTrue(ipex.core.is_bf16_dil_tensor(x_dpcpp_unbind[1]))
+            
+            self._check_tensor_shape(x_cpu_unbind[0], x_dpcpp_unbind[0])
+            self._check_tensor_shape(x_cpu_unbind[1], x_dpcpp_unbind[1])
+            self.assertEqual(x_cpu_unbind[0], x_dpcpp_unbind[0], 0.01)
+            self.assertEqual(x_cpu_unbind[1], x_dpcpp_unbind[1], 0.01)
+
 class TestBinOPs(TestCase):
     def _gen_shapes(self):
         dims = torch.randint(1, 10, (1,))
diff --git a/tests/cpu/test_lazy_reorder.py b/tests/cpu/test_lazy_reorder.py
@@ -1422,6 +1422,18 @@ def test_split_backward(self):
                 y2.backward()
                 self.assertEqual(x1.grad, x2.grad)
 
+    def test_split_share_memory(self):
+        with AutoDNNL(True):
+            x_dpcpp = torch.FloatTensor([1, 1, 1, 1, -1, -1, -1, -1]).to(device=device)
+            other = torch.FloatTensor([-1, -1, -1, -1]).to(device=device)
+
+            x_target = torch.FloatTensor([0, 0, 0, 0, -1, -1, -1, -1]).to(device=device)
+
+            splited_x = torch.split(x_dpcpp, 4)
+            splited_x[0].add_(other)
+
+            self.assertEqual(x_dpcpp, x_target)
+
 class ConvRelu(nn.Module):
     def __init__(self):
         super(ConvRelu, self).__init__()
diff --git a/torch_ipex/csrc/cpu/DevOPs.cpp b/torch_ipex/csrc/cpu/DevOPs.cpp
@@ -1929,7 +1929,11 @@ at::Tensor dil_as_strided(
 
   auto* _tensor_impl = (IPEXTensorImpl *)result.unsafeGetTensorImpl();
   _tensor_impl->copy_meta_info(self.unsafeGetTensorImpl());
-  // reset version counter for chunk
+  // When a tensor is chunked, the obtained chunked tensors do not share the version counter. 
+  // We have copied the version counter in copy_meta_info and it is a workaround to reset the 
+  // version counter here.
+  // Note that when a tensor is sliced, PyTorch will call as_view which will copy the version 
+  // counter to the sliced tensor. We do not need to handle it here.
   _tensor_impl->set_version_counter(0);
   _tensor_impl->copy_auto_grad(self.unsafeGetTensorImpl());
 
@@ -2134,8 +2138,21 @@ at::Tensor AtenIpexCPUDev::dil_select(const at::Tensor & self, at::Dimname dim,
   return dil_select(self, at::dimname_to_position(self, dim), index);
 }
 
+at::Tensor _dil_narrow(const at::Tensor& self, int64_t dim, int64_t start, int64_t length) {
+  // Port from aten/src/ATen/native/TensorShape.cpp
+  TORCH_CHECK(self.dim() > 0, "narrow() cannot be applied to a 0-dim tensor.");
+  auto cur_size = self.size(dim);
+  if (start != cur_size) {  // start being the end is valid, but not a valid dim specification.
+    start = at::maybe_wrap_dim(start, cur_size);
+  }
+  TORCH_CHECK(length >= 0 && start <= cur_size - length,
+           "start (", start, ") + length (", length, ") exceeds dimension size (", cur_size, ").");
+  return AtenIpexCPUDev::dil_slice(self, dim, start, start + length, 1);
+}
+
 std::vector<at::Tensor> AtenIpexCPUDev::dil_split(const at::Tensor& self, int64_t split_size, int64_t dim) {
   DEBUG("AtenIpexCPUDev::dil_split\n");
+  // Port from aten/src/ATen/native/TensorShape.cpp
   TORCH_CHECK(self.dim() != 0, "split expects at least a 1-dimensional tensor");
   TORCH_CHECK(split_size >= 0,  "split expects split_size be non-negative, but got split_size=", split_size);
   
@@ -2162,19 +2179,6 @@ std::vector<at::Tensor> AtenIpexCPUDev::dil_split(const at::Tensor& self, int64_
   return splits;
 }
 
-// TODO only used for dil_split
-at::Tensor AtenIpexCPUDev::_dil_narrow(const at::Tensor& self, int64_t dim, int64_t start, int64_t length) {
-  // Port from aten/src/ATen/native/TensorShape.cpp
-  TORCH_CHECK(self.dim() > 0, "narrow() cannot be applied to a 0-dim tensor.");
-  auto cur_size = self.size(dim);
-  if (start != cur_size) {  // start being the end is valid, but not a valid dim specification.
-    start = at::maybe_wrap_dim(start, cur_size);
-  }
-  TORCH_CHECK(length >= 0 && start <= cur_size - length,
-           "start (", start, ") + length (", length, ") exceeds dimension size (", cur_size, ").");
-  return dil_slice(self, dim, start, start + length, 1);
-}
-
 at::Tensor AtenIpexCPUDev::dil_gelu(const at::Tensor& input) {
   DEBUG("AtenIpexCPUDev::dil_gelu\n");
   CHECK_DNNL_OP_PRE_COND(input);
diff --git a/torch_ipex/csrc/cpu/DevOPs.h b/torch_ipex/csrc/cpu/DevOPs.h
@@ -71,7 +71,6 @@ class AtenIpexCPUDev {
   static at::Tensor dil_cat(at::TensorList tensors, int64_t dim);
   static std::vector<at::Tensor> dil_split_with_sizes(const at::Tensor& self, at::IntArrayRef split_sizes, int64_t dim);
   static std::vector<at::Tensor> dil_split(const at::Tensor& self, int64_t split_size, int64_t dim);
-  static at::Tensor _dil_narrow(const at::Tensor& self, int64_t dim, int64_t start, int64_t length);
   static at::Tensor dil_gelu(const at::Tensor& input);
   static at::Tensor dil_gelu_backward(const at::Tensor& grad_output, const at::Tensor& input);
   static std::tuple<at::Tensor, at::Tensor, at::Tensor> dil_native_layer_norm(const at::Tensor& X, const at::Tensor& gamma, const at::Tensor& beta, int64_t M, int64_t N, double eps);
diff --git a/torch_ipex/csrc/cpu/dbl/Common.cpp b/torch_ipex/csrc/cpu/dbl/Common.cpp
@@ -155,6 +155,9 @@ void reorder_to_dtype(const at::Tensor& tensor, at::ScalarType dst_scalar_type,
     // The data type of DIL tensor is same as the dst data type. DO NOTHING
     return;
   }
+  // should fallback if not dil tensor and not own whole storage
+  IPEX_CHECK(cpu::ShadeDataContext::isDilTensor(tensor) || check_tensor_own_whole_storage(tensor),  "Reorder only works while tensor owns the whole storage or tensor is a dil tensor");
+
   auto dst_desc = src.get_desc().to_type(get_dil_data_type(dst_scalar_type));
   reorder_to_desc(tensor, dst_desc, scales);
 }

Original file line number	Diff line number	Diff line change
`@@ -155,6 +155,9 @@ void reorder_to_dtype(const at::Tensor& tensor, at::ScalarType dst_scalar_type,`
`155`	`155`	`// The data type of DIL tensor is same as the dst data type. DO NOTHING`
`156`	`156`	`return;`
`157`	`157`	`}`
	`158`	`+ // should fallback if not dil tensor and not own whole storage`
	`159`	`+ IPEX_CHECK(cpu::ShadeDataContext::isDilTensor(tensor) \|\| check_tensor_own_whole_storage(tensor), "Reorder only works while tensor owns the whole storage or tensor is a dil tensor");`
	`160`	`+`
`158`	`161`	`auto dst_desc = src.get_desc().to_type(get_dil_data_type(dst_scalar_type));`
`159`	`162`	`reorder_to_desc(tensor, dst_desc, scales);`
`160`	`163`	`}`