enable the fusion of linear and GeLU

chunyuanw · EikanWang · commit 6ad4be04a5d9 · 2020-10-18T18:50:20.000-07:00
diff --git a/tests/cpu/test_jit.py b/tests/cpu/test_jit.py
@@ -149,6 +149,16 @@ def __init__(self, in_channels, out_channels, **kwargs):
 
     def forward(self, x):
         return F.relu(self.linear(x), inplace=True)
+ 
+class LinearGelu(nn.Module):
+    def __init__(self, in_channels, out_channels, **kwargs):
+        super(LinearGelu, self).__init__()
+        seed = 2018
+        torch.manual_seed(seed)
+        self.linear = nn.Linear(in_channels, out_channels, **kwargs)
+
+    def forward(self, x):
+        return F.gelu(self.linear(x))
 
 class ConvSumInDiffBlock(nn.Module):
     def __init__(self, dim, in_channels, out_channels, **kwargs):
@@ -544,6 +554,27 @@ def test_output_linear_relu(self):
             kind_in_graph="ipex::linear_relu")
 
 
+    def test_output_linear_gelu(self):
+        self._test_output(
+            LinearGelu(3, 32, bias=True),
+            torch.rand(32, 3),
+            kind_in_graph="ipex::linear_gelu")
+        self._test_output_bf16(
+            LinearGelu(3, 32, bias=True),
+            torch.rand(32, 3),
+            kind_in_graph="ipex::linear_gelu",
+            prec=5e-3)
+        self._test_output(
+            LinearGelu(3, 32, bias=False),
+            torch.rand(32, 3),
+            kind_in_graph="ipex::linear_gelu")
+        self._test_output_bf16(
+            LinearGelu(3, 32, bias=False),
+            torch.rand(32, 3),
+            kind_in_graph="ipex::linear_gelu",
+            prec=5e-3)
+
+
     def test_channel_shuffle(self):
         self._test_output(
             ChannelShuffle(10, 16, 50, 50, 4),
diff --git a/torch_ipex/csrc/cpu/FusionOPs.cpp b/torch_ipex/csrc/cpu/FusionOPs.cpp
@@ -386,12 +386,13 @@ at::Tensor& AtenIpexJITDev::dil_convolution_sum_relu(
     "Convolution_Sum_Relu");
 }
 
-at::Tensor AtenIpexJITDev::dil_linear_fuse_relu(
+at::Tensor AtenIpexJITDev::dil_linear_fuse_eltwise(
     const at::Tensor& self,
     const at::Tensor& weight,
-    const at::Tensor& bias) {
+    const at::Tensor& bias,
+    const dil::attr_t& attr) {
 #if defined(IPEX_PROFILE_OP)
-  RECORD_FUNCTION("AtenIpexJITDev::dil_linear_fuse_relu", std::vector<c10::IValue>({self, weight, bias}), torch::autograd::Node::peek_at_next_sequence_nr());
+  RECORD_FUNCTION("AtenIpexJITDev::dil_linear_fuse_eltwise", std::vector<c10::IValue>({self, weight, bias}), torch::autograd::Node::peek_at_next_sequence_nr());
 #endif
   IPEX_CHECK(self.dim() >= 2,
       "dil_linear: input needs to has dim at least 2, input dim ", self.dim());
@@ -413,7 +414,7 @@ at::Tensor AtenIpexJITDev::dil_linear_fuse_relu(
     b = try_gen_dil_tensor(bias_contiguous);
   }
 
-  dil::tensor y = dbl::linear::linear_impl(x, w, b, /* dst_scale */ dil::scale_t(), dil::attr_t::fuse_relu());
+  dil::tensor y = dbl::linear::linear_impl(x, w, b, /* dst_scale */ dil::scale_t(), attr);
 
   auto input_size = self.sizes();
   std::vector<int64_t> output_size(input_size.begin(), input_size.end() - 1);
diff --git a/torch_ipex/csrc/cpu/FusionOPs.h b/torch_ipex/csrc/cpu/FusionOPs.h
@@ -18,6 +18,7 @@ namespace ipex {
   // static auto conv3d_relu_sum = Symbol::fromQualString("ipex::conv3d_relu_sum");
   static auto conv2d_sum_relu = Symbol::fromQualString("ipex::conv2d_sum_relu");
   static auto linear_relu = Symbol::fromQualString("ipex::linear_relu");
+  static auto linear_gelu = Symbol::fromQualString("ipex::linear_gelu");
 
   // 3d ops
   static auto conv3d_relu = Symbol::fromQualString("ipex::conv3d_relu");
@@ -48,7 +49,7 @@ class AtenIpexJITDev {
 
   static at::Tensor& dil_convolution_sum_relu( const at::Tensor& input, const at::Tensor& weight, const at::Tensor& bias, at::IntArrayRef stride, at::IntArrayRef padding, at::IntArrayRef dilation, int64_t groups, at::Tensor& accumu, at::Scalar alpha);
 
-  static at::Tensor dil_linear_fuse_relu(const at::Tensor& self, const at::Tensor& weight, const at::Tensor& bias);
+  static at::Tensor dil_linear_fuse_eltwise(const at::Tensor& self, const at::Tensor& weight, const at::Tensor& bias, const dil::attr_t& attr);
 
 };
 
diff --git a/torch_ipex/csrc/cpu/dil/dil/attributes.hpp b/torch_ipex/csrc/cpu/dil/dil/attributes.hpp
@@ -69,6 +69,15 @@ struct attr_t : public dnnl::primitive_attr {
     return attr;
   }
 
+  static attr_t fuse_gelu(float scale = 1.0, float alpha = 0.f,
+                          float beta = 0.f) {
+    attr_t attr;
+    post_ops po;
+    po.append_eltwise(scale, algorithm::eltwise_gelu_tanh, alpha, beta);
+    attr.set_post_ops(po);
+    return attr;
+  }
+
   static attr_t fuse_elu(float scale = 1.0f, float alpha = 0.f, float beta = 1.0f) {
     attr_t attr;
     post_ops po;
diff --git a/torch_ipex/csrc/jit/fusion_pass.cpp b/torch_ipex/csrc/jit/fusion_pass.cpp
@@ -288,6 +288,7 @@ OpFuser::RuleTab OpFuser::dnnlRules = {
   {{ipex::conv2d_sum, Symbol::fromQualString("aten::relu_")}, ipex::conv2d_sum_relu},
 
   {{Symbol::fromQualString("torch_ipex::linear"), aten::relu}, ipex::linear_relu},
+  {{Symbol::fromQualString("torch_ipex::linear"), aten::gelu}, ipex::linear_gelu},
   {{Symbol::fromQualString("torch_ipex::linear"), Symbol::fromQualString("aten::relu_")}, ipex::linear_relu},
 
   // 3d ops
diff --git a/torch_ipex/csrc/jit/register_dnnl_jit_ops.cpp b/torch_ipex/csrc/jit/register_dnnl_jit_ops.cpp
@@ -314,10 +314,11 @@ RegisterOperators op({
       [] (const Node* node) ->Operation {
         if (torch_ipex::check_auto_dnnl()) {
           return [] (Stack& stack) {
-            auto result = AtenIpexJITDev::dil_linear_fuse_relu(
+            auto result = AtenIpexJITDev::dil_linear_fuse_eltwise(
                 (std::move(peek(stack, 0, 3))).toTensor(),
                 (std::move(peek(stack, 1, 3))).toTensor(),
-                toOptionalTensor(std::move(peek(stack, 2, 3)))
+                toOptionalTensor(std::move(peek(stack, 2, 3))),
+                dil::attr_t::fuse_relu()
             );
             drop(stack, 3);
             pack(stack, std::move(result));
@@ -328,6 +329,27 @@ RegisterOperators op({
         }
       },
       aliasAnalysisFromSchema()
+      ),
+    Operator(
+      "ipex::linear_gelu(Tensor input, Tensor weight, Tensor? bias=None) -> Tensor",
+      [] (const Node* node) ->Operation {
+        if (torch_ipex::check_auto_dnnl()) {
+          return [] (Stack& stack) {
+            auto result = AtenIpexJITDev::dil_linear_fuse_eltwise(
+                (std::move(peek(stack, 0, 3))).toTensor(),
+                (std::move(peek(stack, 1, 3))).toTensor(),
+                toOptionalTensor(std::move(peek(stack, 2, 3))),
+                dil::attr_t::fuse_gelu()
+            );
+            drop(stack, 3);
+            pack(stack, std::move(result));
+            return 0;
+          };
+        } else {
+          TORCH_CHECK(false, "PyTorch native path not support linear gelu fusion now");
+        }
+      },
+      aliasAnalysisFromSchema()
       )
     });
 }