Enable linear+sigmoid, linear+silu, linear+swish fusion (#541)

liangan1 · web-flow · commit d9ef0bc9e746 · 2022-02-19T19:52:57.000+08:00
* Enable linear+sigmoid, linear+silu, linear+swish fusion
diff --git a/intel_extension_for_pytorch/csrc/jit/cpu/kernels/LinearPacked.cpp b/intel_extension_for_pytorch/csrc/jit/cpu/kernels/LinearPacked.cpp
@@ -57,6 +57,24 @@ at::Tensor linear_gelu_run(
   return op_context->run(input, ideep::attr_t::fuse_gelu());
 }
 
+at::Tensor linear_sigmoid_run(
+    const at::Tensor& input,
+    const c10::intrusive_ptr<LinearOpContext>& op_context) {
+  IPEX_RECORD_FUNCTION(
+      "ipex_prepack::linear_sigmoid_run", std::vector<c10::IValue>({}));
+
+  return op_context->run(input, ideep::attr_t::fuse_sigmoid());
+}
+
+at::Tensor linear_swish_run(
+    const at::Tensor& input,
+    const c10::intrusive_ptr<LinearOpContext>& op_context) {
+  IPEX_RECORD_FUNCTION(
+      "ipex_prepack::linear_swish_run", std::vector<c10::IValue>({}));
+
+  return op_context->run(input, ideep::attr_t::fuse_swish());
+}
+
 at::Tensor linear_add_run(
     const at::Tensor& input,
     at::Tensor& accumu,
@@ -125,4 +143,4 @@ at::Tensor& run(
 } // namespace linear
 } // namespace detail
 } // namespace cpu
-} // namespace torch_ipex
+} // namespace torch_ipex
diff --git a/intel_extension_for_pytorch/csrc/jit/cpu/kernels/LinearPacked.h b/intel_extension_for_pytorch/csrc/jit/cpu/kernels/LinearPacked.h
@@ -29,6 +29,14 @@ at::Tensor linear_gelu_run(
     const at::Tensor& input,
     const c10::intrusive_ptr<LinearOpContext>& op_context);
 
+at::Tensor linear_sigmoid_run(
+    const at::Tensor& input,
+    const c10::intrusive_ptr<LinearOpContext>& op_context);
+
+at::Tensor linear_swish_run(
+    const at::Tensor& input,
+    const c10::intrusive_ptr<LinearOpContext>& op_context);
+
 at::Tensor linear_add_run(
     const at::Tensor& input,
     at::Tensor& accumu,
@@ -57,4 +65,4 @@ at::Tensor& run(
 } // namespace linear
 } // namespace detail
 } // namespace cpu
-} // namespace torch_ipex
+} // namespace torch_ipex
diff --git a/intel_extension_for_pytorch/csrc/jit/cpu/passes/graph_rewrite_linear.cpp b/intel_extension_for_pytorch/csrc/jit/cpu/passes/graph_rewrite_linear.cpp
@@ -98,8 +98,12 @@ void insertPrePackedLinearOp(std::shared_ptr<Graph>& graph) {
 }
 
 void fuseLinearWithEltwise(std::shared_ptr<Graph>& graph) {
-  SubgraphRewriter rewriter_relu, rewriter_gelu;
+  SubgraphRewriter rewriter_relu, rewriter_gelu, rewriter_silu,
+      rewriter_sigmoid, rewriter_swish;
   std::array<std::string, 2> relu_operators = {"relu", "relu_"};
+  std::array<std::string, 2> sigmoid_operators = {"sigmoid", "sigmoid_"};
+  std::array<std::string, 2> silu_operators = {"silu", "silu_"};
+  std::array<std::string, 2> mul_operators = {"mul", "mul_"};
 
   auto linear_relu_rstring = CodeTemplate(R"(
      graph(%input, %weight, %bias, %out_features:int, %in_features:int, %batch_size:int, %weight_is_prepacked:bool):
@@ -127,13 +131,68 @@ void fuseLinearWithEltwise(std::shared_ptr<Graph>& graph) {
         %res = ipex_prepack::linear_gelu_run(%input, %packed_weight)
         return (%res))";
 
+  auto linear_sigmoid_rstring = CodeTemplate(R"(
+    graph(%input, %weight, %bias, %out_features:int, %in_features:int, %batch_size:int, %weight_is_prepacked:bool):
+        %packed_weight = ipex_prepack::linear_prepack(%weight, %bias, %out_features, %in_features, %batch_size, %weight_is_prepacked)
+        %x = ipex_prepack::linear_run(%input, %packed_weight)
+        %res= aten::${sigmoid}(%x)
+        return (%res))");
+
+  auto linear_silu_rstring = CodeTemplate(R"(
+    graph(%input, %weight, %bias, %out_features:int, %in_features:int, %batch_size:int, %weight_is_prepacked:bool):
+        %packed_weight = ipex_prepack::linear_prepack(%weight, %bias, %out_features, %in_features, %batch_size, %weight_is_prepacked)
+        %x = ipex_prepack::linear_run(%input, %packed_weight)
+        %res= aten::${silu}(%x)
+        return (%res))");
+
+  auto linear_sigmoid_mul_rstring = CodeTemplate(R"(
+    graph(%input, %weight, %bias, %out_features:int, %in_features:int, %batch_size:int, %weight_is_prepacked:bool):
+        %packed_weight = ipex_prepack::linear_prepack(%weight, %bias, %out_features, %in_features, %batch_size, %weight_is_prepacked)
+        %x = ipex_prepack::linear_run(%input, %packed_weight)
+        %y = aten::${sigmoid}(%x)
+        %res = aten::${mul}(%x, %y)
+        return (%res))");
+
+  std::string linear_swish_fused = R"(
+    graph(%input, %weight, %bias, %out_features:int, %in_features:int, %batch_size:int, %weight_is_prepacked:bool):
+        %packed_weight = ipex_prepack::linear_prepack(%weight, %bias, %out_features, %in_features, %batch_size, %weight_is_prepacked)
+        %res = ipex_prepack::linear_swish_run(%input, %packed_weight)
+        return (%res))";
+
+  std::string linear_sigmoid_fused = R"(
+    graph(%input, %weight, %bias, %out_features:int, %in_features:int, %batch_size:int, %weight_is_prepacked:bool):
+        %packed_weight = ipex_prepack::linear_prepack(%weight, %bias, %out_features, %in_features, %batch_size, %weight_is_prepacked)
+        %res = ipex_prepack::linear_sigmoid_run(%input, %packed_weight)
+        return (%res))";
+
   for (const auto& relu : relu_operators) {
     TemplateEnv env;
     env.s("relu", relu);
     rewriter_relu.RegisterRewritePattern(
         linear_relu_rstring.format(env), linear_relu_fused);
   }
 
+  for (const auto& silu : silu_operators) {
+    TemplateEnv env;
+    env.s("silu", silu);
+    rewriter_silu.RegisterRewritePattern(
+        linear_silu_rstring.format(env), linear_swish_fused);
+  }
+
+  for (const auto& sigmoid : sigmoid_operators) {
+    TemplateEnv env;
+    env.s("sigmoid", sigmoid);
+    rewriter_sigmoid.RegisterRewritePattern(
+        linear_sigmoid_rstring.format(env), linear_sigmoid_fused);
+    for (const auto& mul : mul_operators) {
+      env.s("mul", mul);
+      rewriter_swish.RegisterRewritePattern(
+          linear_sigmoid_mul_rstring.format(env), linear_swish_fused);
+    }
+  }
+  rewriter_silu.runOnGraph(graph);
+  rewriter_sigmoid.runOnGraph(graph);
+  rewriter_swish.runOnGraph(graph);
   rewriter_gelu.RegisterRewritePattern(linear_gelu, linear_gelu_fused);
 
   rewriter_relu.runOnGraph(graph);
diff --git a/intel_extension_for_pytorch/csrc/jit/cpu/passes/register_dnnl_jit_ops.cpp b/intel_extension_for_pytorch/csrc/jit/cpu/passes/register_dnnl_jit_ops.cpp
@@ -17,7 +17,6 @@
 #include "csrc/jit/cpu/kernels/Shuffle.h"
 #include "csrc/jit/cpu/kernels/Softmax.h"
 
-
 namespace torch {
 namespace jit {
 
@@ -365,6 +364,39 @@ RegisterOperators op({
           };
         },
         aliasAnalysisFromSchema()),
+    Operator(
+        "ipex_prepack::linear_sigmoid_run(Tensor input, "
+        "__torch__.torch.classes.ipex_prepack.LinearOpContext W_prepack) "
+        "-> Tensor",
+        [](const Node* node) -> Operation {
+          return [](Stack* stack) {
+            auto result = linear_sigmoid_run(
+                (std::move(peek(stack, 0, 2))).toTensor(),
+                (std::move(peek(stack, 1, 2)))
+                    .toCustomClass<LinearOpContext>());
+            drop(stack, 2);
+            pack(stack, std::move(result));
+            return 0;
+          };
+        },
+        aliasAnalysisFromSchema()),
+    Operator(
+        "ipex_prepack::linear_swish_run(Tensor input, "
+        "__torch__.torch.classes.ipex_prepack.LinearOpContext W_prepack) "
+        "-> Tensor",
+        [](const Node* node) -> Operation {
+          return [](Stack* stack) {
+            auto result = linear_swish_run(
+                (std::move(peek(stack, 0, 2))).toTensor(),
+                (std::move(peek(stack, 1, 2)))
+                    .toCustomClass<LinearOpContext>());
+            drop(stack, 2);
+            pack(stack, std::move(result));
+            return 0;
+          };
+        },
+        aliasAnalysisFromSchema()),
+
     Operator(
         "ipex_prepack::linear_add_run(Tensor input, Tensor(a!) accumu, *, "
         "Scalar? alpha, "
diff --git a/tests/cpu/test_jit.py b/tests/cpu/test_jit.py
@@ -351,6 +351,38 @@ def __init__(self, in_channels, out_channels, **kwargs):
     def forward(self, x):
         return F.gelu(self.linear(x))
 
+class LinearSigmoid(nn.Module):
+    def __init__(self, in_channels, out_channels, **kwargs):
+        super(LinearSigmoid, self).__init__()
+        seed = 2018
+        torch.manual_seed(seed)
+        self.linear = nn.Linear(in_channels, out_channels, **kwargs)
+
+    def forward(self, x):
+        return F.sigmoid(self.linear(x))
+
+class LinearSwish(nn.Module):
+    def __init__(self, in_channels, out_channels, **kwargs):
+        super(LinearSwish, self).__init__()
+        seed = 2018
+        torch.manual_seed(seed)
+        self.linear = nn.Linear(in_channels, out_channels, **kwargs)
+
+    def forward(self, x):
+        linear_res = self.linear(x)
+        return F.silu(linear_res)
+
+class LinearSwish_v1(nn.Module):
+    def __init__(self, in_channels, out_channels, **kwargs):
+        super(LinearSwish_v1, self).__init__()
+        seed = 2018
+        torch.manual_seed(seed)
+        self.linear = nn.Linear(in_channels, out_channels, **kwargs)
+
+    def forward(self, x):
+        linear_res = self.linear(x)
+        return torch.mul(linear_res, F.sigmoid(linear_res))
+
 class LinearAdd(nn.Module):
     def __init__(self, in_channels, out_channels, **kwargs):
         super(LinearAdd, self).__init__()
@@ -2152,6 +2184,63 @@ def test_output_linear_gelu(self):
             torch.rand(32, 3),
             kind_in_graph="ipex_prepack::linear_gelu_run",
             prec=5e-3)
+    
+    def test_output_linear_swish(self):
+        self._test_output(
+            LinearSwish_v1(3, 32, bias=True),
+            torch.rand(32, 3),
+            kind_in_graph="aten::linear")
+        self._test_output_bf16(
+            LinearSwish_v1(3, 32, bias=True),
+            torch.rand(32, 3),
+            kind_in_graph="ipex_prepack::linear_swish_run",
+            prec=5e-3)
+        self._test_output(
+            LinearSwish_v1(3, 32, bias=False),
+            torch.rand(32, 3),
+            kind_in_graph="aten::linear")
+        self._test_output_bf16(
+            LinearSwish_v1(3, 32, bias=False),
+            torch.rand(32, 3),
+            kind_in_graph="ipex_prepack::linear_swish_run",
+            prec=5e-3)
+        self._test_output(
+            LinearSwish(3, 32, bias=True),
+            torch.rand(32, 3),
+            kind_in_graph="aten::linear")
+        self._test_output_bf16(
+            LinearSwish(3, 32, bias=True),
+            torch.rand(32, 3),
+            kind_in_graph="ipex_prepack::linear_swish_run",
+            prec=5e-3)
+        self._test_output(
+            LinearSwish(3, 32, bias=False),
+            torch.rand(32, 3),
+            kind_in_graph="aten::linear")
+        self._test_output_bf16(
+            LinearSwish(3, 32, bias=False),
+            torch.rand(32, 3),
+            kind_in_graph="ipex_prepack::linear_swish_run", prec=5e-3)
+
+    def test_output_linear_sigmoid(self):
+        self._test_output(
+            LinearSigmoid(3, 32, bias=True),
+            torch.rand(32, 3),
+            kind_in_graph="aten::linear")
+        self._test_output_bf16(
+            LinearSigmoid(3, 32, bias=True),
+            torch.rand(32, 3),
+            kind_in_graph="ipex_prepack::linear_sigmoid_run",
+            prec=5e-3)
+        self._test_output(
+            LinearSigmoid(3, 32, bias=False),
+            torch.rand(32, 3),
+            kind_in_graph="aten::linear")
+        self._test_output_bf16(
+            LinearSigmoid(3, 32, bias=False),
+            torch.rand(32, 3),
+            kind_in_graph="ipex_prepack::linear_sigmoid_run",
+            prec=5e-3)
 
     def test_channel_shuffle(self):
         self._test_output(