X86: Remove LowerToHorizontalOp and modified test case #148477

houngkoungting · 2025-07-13T15:19:47Z

FIX #143000
Remove LowerToHorizontalOp and adjust test case ; all tests pass after the change.

@RKSimon

llvmbot · 2025-07-13T15:20:14Z

@llvm/pr-subscribers-backend-x86

Author: 黃國庭 (houngkoungting)

Changes

FIX #143000
Remove LowerToHorizontalOp and adjust test case ; all tests pass after the change.

@RKSimon

Patch is 205.94 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/148477.diff

6 Files Affected:

(modified) llvm/lib/Target/X86/X86ISelLowering.cpp (-118)
(renamed) llvm/test/Transforms/PhaseOrdering/X86/haddsub-2.ll (+182-13)
(renamed) llvm/test/Transforms/PhaseOrdering/X86/haddsub-shuf.ll (+273-12)
(renamed) llvm/test/Transforms/PhaseOrdering/X86/haddsub-undef.ll (+398-9)
(renamed) llvm/test/Transforms/PhaseOrdering/X86/haddsub.ll (+581-9)
(renamed) llvm/test/Transforms/PhaseOrdering/X86/phaddsub-undef.ll (+68-10)

diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index f8f29b9f2cdc7..677ecf8801e2d 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -8569,122 +8569,6 @@ static SDValue getHopForBuildVector(const BuildVectorSDNode *BV,
   return DAG.getNode(HOpcode, DL, VT, V0, V1);
 }
 
-/// Lower BUILD_VECTOR to a horizontal add/sub operation if possible.
-static SDValue LowerToHorizontalOp(const BuildVectorSDNode *BV, const SDLoc &DL,
-                                   const X86Subtarget &Subtarget,
-                                   SelectionDAG &DAG) {
-  // We need at least 2 non-undef elements to make this worthwhile by default.
-  unsigned NumNonUndefs =
-      count_if(BV->op_values(), [](SDValue V) { return !V.isUndef(); });
-  if (NumNonUndefs < 2)
-    return SDValue();
-
-  // There are 4 sets of horizontal math operations distinguished by type:
-  // int/FP at 128-bit/256-bit. Each type was introduced with a different
-  // subtarget feature. Try to match those "native" patterns first.
-  MVT VT = BV->getSimpleValueType(0);
-  if (((VT == MVT::v4f32 || VT == MVT::v2f64) && Subtarget.hasSSE3()) ||
-      ((VT == MVT::v8i16 || VT == MVT::v4i32) && Subtarget.hasSSSE3()) ||
-      ((VT == MVT::v8f32 || VT == MVT::v4f64) && Subtarget.hasAVX()) ||
-      ((VT == MVT::v16i16 || VT == MVT::v8i32) && Subtarget.hasAVX2())) {
-    unsigned HOpcode;
-    SDValue V0, V1;
-    if (isHopBuildVector(BV, DAG, HOpcode, V0, V1))
-      return getHopForBuildVector(BV, DL, DAG, HOpcode, V0, V1);
-  }
-
-  // Try harder to match 256-bit ops by using extract/concat.
-  if (!Subtarget.hasAVX() || !VT.is256BitVector())
-    return SDValue();
-
-  // Count the number of UNDEF operands in the build_vector in input.
-  unsigned NumElts = VT.getVectorNumElements();
-  unsigned Half = NumElts / 2;
-  unsigned NumUndefsLO = 0;
-  unsigned NumUndefsHI = 0;
-  for (unsigned i = 0, e = Half; i != e; ++i)
-    if (BV->getOperand(i)->isUndef())
-      NumUndefsLO++;
-
-  for (unsigned i = Half, e = NumElts; i != e; ++i)
-    if (BV->getOperand(i)->isUndef())
-      NumUndefsHI++;
-
-  SDValue InVec0, InVec1;
-  if (VT == MVT::v8i32 || VT == MVT::v16i16) {
-    SDValue InVec2, InVec3;
-    unsigned X86Opcode;
-    bool CanFold = true;
-
-    if (isHorizontalBinOpPart(BV, ISD::ADD, DL, DAG, 0, Half, InVec0, InVec1) &&
-        isHorizontalBinOpPart(BV, ISD::ADD, DL, DAG, Half, NumElts, InVec2,
-                              InVec3) &&
-        ((InVec0.isUndef() || InVec2.isUndef()) || InVec0 == InVec2) &&
-        ((InVec1.isUndef() || InVec3.isUndef()) || InVec1 == InVec3))
-      X86Opcode = X86ISD::HADD;
-    else if (isHorizontalBinOpPart(BV, ISD::SUB, DL, DAG, 0, Half, InVec0,
-                                   InVec1) &&
-             isHorizontalBinOpPart(BV, ISD::SUB, DL, DAG, Half, NumElts, InVec2,
-                                   InVec3) &&
-             ((InVec0.isUndef() || InVec2.isUndef()) || InVec0 == InVec2) &&
-             ((InVec1.isUndef() || InVec3.isUndef()) || InVec1 == InVec3))
-      X86Opcode = X86ISD::HSUB;
-    else
-      CanFold = false;
-
-    if (CanFold) {
-      // Do not try to expand this build_vector into a pair of horizontal
-      // add/sub if we can emit a pair of scalar add/sub.
-      if (NumUndefsLO + 1 == Half || NumUndefsHI + 1 == Half)
-        return SDValue();
-
-      // Convert this build_vector into a pair of horizontal binops followed by
-      // a concat vector. We must adjust the outputs from the partial horizontal
-      // matching calls above to account for undefined vector halves.
-      SDValue V0 = InVec0.isUndef() ? InVec2 : InVec0;
-      SDValue V1 = InVec1.isUndef() ? InVec3 : InVec1;
-      assert((!V0.isUndef() || !V1.isUndef()) && "Horizontal-op of undefs?");
-      bool isUndefLO = NumUndefsLO == Half;
-      bool isUndefHI = NumUndefsHI == Half;
-      return ExpandHorizontalBinOp(V0, V1, DL, DAG, X86Opcode, false, isUndefLO,
-                                   isUndefHI);
-    }
-  }
-
-  if (VT == MVT::v8f32 || VT == MVT::v4f64 || VT == MVT::v8i32 ||
-      VT == MVT::v16i16) {
-    unsigned X86Opcode;
-    if (isHorizontalBinOpPart(BV, ISD::ADD, DL, DAG, 0, NumElts, InVec0,
-                              InVec1))
-      X86Opcode = X86ISD::HADD;
-    else if (isHorizontalBinOpPart(BV, ISD::SUB, DL, DAG, 0, NumElts, InVec0,
-                                   InVec1))
-      X86Opcode = X86ISD::HSUB;
-    else if (isHorizontalBinOpPart(BV, ISD::FADD, DL, DAG, 0, NumElts, InVec0,
-                                   InVec1))
-      X86Opcode = X86ISD::FHADD;
-    else if (isHorizontalBinOpPart(BV, ISD::FSUB, DL, DAG, 0, NumElts, InVec0,
-                                   InVec1))
-      X86Opcode = X86ISD::FHSUB;
-    else
-      return SDValue();
-
-    // Don't try to expand this build_vector into a pair of horizontal add/sub
-    // if we can simply emit a pair of scalar add/sub.
-    if (NumUndefsLO + 1 == Half || NumUndefsHI + 1 == Half)
-      return SDValue();
-
-    // Convert this build_vector into two horizontal add/sub followed by
-    // a concat vector.
-    bool isUndefLO = NumUndefsLO == Half;
-    bool isUndefHI = NumUndefsHI == Half;
-    return ExpandHorizontalBinOp(InVec0, InVec1, DL, DAG, X86Opcode, true,
-                                 isUndefLO, isUndefHI);
-  }
-
-  return SDValue();
-}
-
 static SDValue LowerShift(SDValue Op, const X86Subtarget &Subtarget,
                           SelectionDAG &DAG);
 
@@ -9270,8 +9154,6 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const {
 
   if (SDValue AddSub = lowerToAddSubOrFMAddSub(BV, dl, Subtarget, DAG))
     return AddSub;
-  if (SDValue HorizontalOp = LowerToHorizontalOp(BV, dl, Subtarget, DAG))
-    return HorizontalOp;
   if (SDValue Broadcast = lowerBuildVectorAsBroadcast(BV, dl, Subtarget, DAG))
     return Broadcast;
   if (SDValue BitOp = lowerBuildVectorToBitOp(BV, dl, Subtarget, DAG))
diff --git a/llvm/test/CodeGen/X86/haddsub-2.ll b/llvm/test/Transforms/PhaseOrdering/X86/haddsub-2.ll
similarity index 81%
rename from llvm/test/CodeGen/X86/haddsub-2.ll
rename to llvm/test/Transforms/PhaseOrdering/X86/haddsub-2.ll
index bca446fa8fb56..4eb5bdba9edb6 100644
--- a/llvm/test/CodeGen/X86/haddsub-2.ll
+++ b/llvm/test/Transforms/PhaseOrdering/X86/haddsub-2.ll
@@ -1,38 +1,39 @@
-; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+sse2,+sse3 | FileCheck %s --check-prefix=SSE --check-prefix=SSE3
-; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+sse2,+sse3,+ssse3 | FileCheck %s --check-prefix=SSE --check-prefix=SSSE3
-; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx | FileCheck %s --check-prefix=AVX --check-prefix=AVX1
-; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx2 | FileCheck %s --check-prefix=AVX --check-prefix=AVX2
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; RUN: opt -passes="default<O3>" -S %s | FileCheck %s
 
 define <4 x float> @hadd_ps_test1(<4 x float> %A, <4 x float> %B) {
-; SSE-LABEL: hadd_ps_test1:
-; SSE:       # %bb.0:
-; SSE-NEXT:    haddps %xmm1, %xmm0
-; SSE-NEXT:    retq
+; CHECK-LABEL: define <4 x float> @hadd_ps_test1(
+; CHECK-SAME: <4 x float> [[A:%.*]], <4 x float> [[B:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] {
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x float> [[A]], <4 x float> [[B]], <4 x i32> <i32 0, i32 2, i32 4, i32 6>
+; CHECK-NEXT:    [[TMP2:%.*]] = shufflevector <4 x float> [[A]], <4 x float> [[B]], <4 x i32> <i32 1, i32 3, i32 5, i32 7>
+; CHECK-NEXT:    [[TMP3:%.*]] = fadd <4 x float> [[TMP1]], [[TMP2]]
+; CHECK-NEXT:    ret <4 x float> [[TMP3]]
 ;
-; AVX-LABEL: hadd_ps_test1:
-; AVX:       # %bb.0:
-; AVX-NEXT:    vhaddps %xmm1, %xmm0, %xmm0
-; AVX-NEXT:    retq
+
   %vecext = extractelement <4 x float> %A, i32 0
   %vecext1 = extractelement <4 x float> %A, i32 1
   %add = fadd float %vecext, %vecext1
   %vecinit = insertelement <4 x float> undef, float %add, i32 0
+
   %vecext2 = extractelement <4 x float> %A, i32 2
   %vecext3 = extractelement <4 x float> %A, i32 3
   %add4 = fadd float %vecext2, %vecext3
   %vecinit5 = insertelement <4 x float> %vecinit, float %add4, i32 1
+
   %vecext6 = extractelement <4 x float> %B, i32 0
   %vecext7 = extractelement <4 x float> %B, i32 1
   %add8 = fadd float %vecext6, %vecext7
   %vecinit9 = insertelement <4 x float> %vecinit5, float %add8, i32 2
+
   %vecext10 = extractelement <4 x float> %B, i32 2
   %vecext11 = extractelement <4 x float> %B, i32 3
   %add12 = fadd float %vecext10, %vecext11
   %vecinit13 = insertelement <4 x float> %vecinit9, float %add12, i32 3
+
   ret <4 x float> %vecinit13
 }
 
+
 define <4 x float> @hadd_ps_test2(<4 x float> %A, <4 x float> %B) {
 ; SSE-LABEL: hadd_ps_test2:
 ; SSE:       # %bb.0:
@@ -43,6 +44,13 @@ define <4 x float> @hadd_ps_test2(<4 x float> %A, <4 x float> %B) {
 ; AVX:       # %bb.0:
 ; AVX-NEXT:    vhaddps %xmm1, %xmm0, %xmm0
 ; AVX-NEXT:    retq
+; CHECK-LABEL: define <4 x float> @hadd_ps_test2(
+; CHECK-SAME: <4 x float> [[A:%.*]], <4 x float> [[B:%.*]]) local_unnamed_addr #[[ATTR0]] {
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x float> [[A]], <4 x float> [[B]], <4 x i32> <i32 0, i32 2, i32 4, i32 6>
+; CHECK-NEXT:    [[TMP2:%.*]] = shufflevector <4 x float> [[A]], <4 x float> [[B]], <4 x i32> <i32 1, i32 3, i32 5, i32 7>
+; CHECK-NEXT:    [[TMP3:%.*]] = fadd <4 x float> [[TMP1]], [[TMP2]]
+; CHECK-NEXT:    ret <4 x float> [[TMP3]]
+;
   %vecext = extractelement <4 x float> %A, i32 2
   %vecext1 = extractelement <4 x float> %A, i32 3
   %add = fadd float %vecext, %vecext1
@@ -72,6 +80,13 @@ define <4 x float> @hsub_ps_test1(<4 x float> %A, <4 x float> %B) {
 ; AVX:       # %bb.0:
 ; AVX-NEXT:    vhsubps %xmm1, %xmm0, %xmm0
 ; AVX-NEXT:    retq
+; CHECK-LABEL: define <4 x float> @hsub_ps_test1(
+; CHECK-SAME: <4 x float> [[A:%.*]], <4 x float> [[B:%.*]]) local_unnamed_addr #[[ATTR0]] {
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x float> [[A]], <4 x float> [[B]], <4 x i32> <i32 0, i32 2, i32 4, i32 6>
+; CHECK-NEXT:    [[TMP2:%.*]] = shufflevector <4 x float> [[A]], <4 x float> [[B]], <4 x i32> <i32 1, i32 3, i32 5, i32 7>
+; CHECK-NEXT:    [[TMP3:%.*]] = fsub <4 x float> [[TMP1]], [[TMP2]]
+; CHECK-NEXT:    ret <4 x float> [[TMP3]]
+;
   %vecext = extractelement <4 x float> %A, i32 0
   %vecext1 = extractelement <4 x float> %A, i32 1
   %sub = fsub float %vecext, %vecext1
@@ -101,6 +116,13 @@ define <4 x float> @hsub_ps_test2(<4 x float> %A, <4 x float> %B) {
 ; AVX:       # %bb.0:
 ; AVX-NEXT:    vhsubps %xmm1, %xmm0, %xmm0
 ; AVX-NEXT:    retq
+; CHECK-LABEL: define <4 x float> @hsub_ps_test2(
+; CHECK-SAME: <4 x float> [[A:%.*]], <4 x float> [[B:%.*]]) local_unnamed_addr #[[ATTR0]] {
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x float> [[A]], <4 x float> [[B]], <4 x i32> <i32 0, i32 2, i32 4, i32 6>
+; CHECK-NEXT:    [[TMP2:%.*]] = shufflevector <4 x float> [[A]], <4 x float> [[B]], <4 x i32> <i32 1, i32 3, i32 5, i32 7>
+; CHECK-NEXT:    [[TMP3:%.*]] = fsub <4 x float> [[TMP1]], [[TMP2]]
+; CHECK-NEXT:    ret <4 x float> [[TMP3]]
+;
   %vecext = extractelement <4 x float> %A, i32 2
   %vecext1 = extractelement <4 x float> %A, i32 3
   %sub = fsub float %vecext, %vecext1
@@ -159,6 +181,13 @@ define <4 x i32> @phadd_d_test1(<4 x i32> %A, <4 x i32> %B) {
 ; AVX:       # %bb.0:
 ; AVX-NEXT:    vphaddd %xmm1, %xmm0, %xmm0
 ; AVX-NEXT:    retq
+; CHECK-LABEL: define <4 x i32> @phadd_d_test1(
+; CHECK-SAME: <4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]]) local_unnamed_addr #[[ATTR0]] {
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x i32> [[A]], <4 x i32> [[B]], <4 x i32> <i32 0, i32 2, i32 4, i32 6>
+; CHECK-NEXT:    [[TMP2:%.*]] = shufflevector <4 x i32> [[A]], <4 x i32> [[B]], <4 x i32> <i32 1, i32 3, i32 5, i32 7>
+; CHECK-NEXT:    [[TMP3:%.*]] = add <4 x i32> [[TMP1]], [[TMP2]]
+; CHECK-NEXT:    ret <4 x i32> [[TMP3]]
+;
   %vecext = extractelement <4 x i32> %A, i32 0
   %vecext1 = extractelement <4 x i32> %A, i32 1
   %add = add i32 %vecext, %vecext1
@@ -217,6 +246,13 @@ define <4 x i32> @phadd_d_test2(<4 x i32> %A, <4 x i32> %B) {
 ; AVX:       # %bb.0:
 ; AVX-NEXT:    vphaddd %xmm1, %xmm0, %xmm0
 ; AVX-NEXT:    retq
+; CHECK-LABEL: define <4 x i32> @phadd_d_test2(
+; CHECK-SAME: <4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]]) local_unnamed_addr #[[ATTR0]] {
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x i32> [[A]], <4 x i32> [[B]], <4 x i32> <i32 0, i32 2, i32 5, i32 7>
+; CHECK-NEXT:    [[TMP2:%.*]] = shufflevector <4 x i32> [[A]], <4 x i32> [[B]], <4 x i32> <i32 1, i32 3, i32 4, i32 6>
+; CHECK-NEXT:    [[TMP3:%.*]] = add <4 x i32> [[TMP1]], [[TMP2]]
+; CHECK-NEXT:    ret <4 x i32> [[TMP3]]
+;
   %vecext = extractelement <4 x i32> %A, i32 2
   %vecext1 = extractelement <4 x i32> %A, i32 3
   %add = add i32 %vecext, %vecext1
@@ -275,6 +311,13 @@ define <4 x i32> @phsub_d_test1(<4 x i32> %A, <4 x i32> %B) {
 ; AVX:       # %bb.0:
 ; AVX-NEXT:    vphsubd %xmm1, %xmm0, %xmm0
 ; AVX-NEXT:    retq
+; CHECK-LABEL: define <4 x i32> @phsub_d_test1(
+; CHECK-SAME: <4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]]) local_unnamed_addr #[[ATTR0]] {
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x i32> [[A]], <4 x i32> [[B]], <4 x i32> <i32 0, i32 2, i32 4, i32 6>
+; CHECK-NEXT:    [[TMP2:%.*]] = shufflevector <4 x i32> [[A]], <4 x i32> [[B]], <4 x i32> <i32 1, i32 3, i32 5, i32 7>
+; CHECK-NEXT:    [[TMP3:%.*]] = sub <4 x i32> [[TMP1]], [[TMP2]]
+; CHECK-NEXT:    ret <4 x i32> [[TMP3]]
+;
   %vecext = extractelement <4 x i32> %A, i32 0
   %vecext1 = extractelement <4 x i32> %A, i32 1
   %sub = sub i32 %vecext, %vecext1
@@ -333,6 +376,13 @@ define <4 x i32> @phsub_d_test2(<4 x i32> %A, <4 x i32> %B) {
 ; AVX:       # %bb.0:
 ; AVX-NEXT:    vphsubd %xmm1, %xmm0, %xmm0
 ; AVX-NEXT:    retq
+; CHECK-LABEL: define <4 x i32> @phsub_d_test2(
+; CHECK-SAME: <4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]]) local_unnamed_addr #[[ATTR0]] {
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x i32> [[A]], <4 x i32> [[B]], <4 x i32> <i32 0, i32 2, i32 4, i32 6>
+; CHECK-NEXT:    [[TMP2:%.*]] = shufflevector <4 x i32> [[A]], <4 x i32> [[B]], <4 x i32> <i32 1, i32 3, i32 5, i32 7>
+; CHECK-NEXT:    [[TMP3:%.*]] = sub <4 x i32> [[TMP1]], [[TMP2]]
+; CHECK-NEXT:    ret <4 x i32> [[TMP3]]
+;
   %vecext = extractelement <4 x i32> %A, i32 2
   %vecext1 = extractelement <4 x i32> %A, i32 3
   %sub = sub i32 %vecext, %vecext1
@@ -362,6 +412,13 @@ define <2 x double> @hadd_pd_test1(<2 x double> %A, <2 x double> %B) {
 ; AVX:       # %bb.0:
 ; AVX-NEXT:    vhaddpd %xmm1, %xmm0, %xmm0
 ; AVX-NEXT:    retq
+; CHECK-LABEL: define <2 x double> @hadd_pd_test1(
+; CHECK-SAME: <2 x double> [[A:%.*]], <2 x double> [[B:%.*]]) local_unnamed_addr #[[ATTR0]] {
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <2 x double> [[A]], <2 x double> [[B]], <2 x i32> <i32 0, i32 2>
+; CHECK-NEXT:    [[TMP2:%.*]] = shufflevector <2 x double> [[A]], <2 x double> [[B]], <2 x i32> <i32 1, i32 3>
+; CHECK-NEXT:    [[VECINIT2:%.*]] = fadd <2 x double> [[TMP1]], [[TMP2]]
+; CHECK-NEXT:    ret <2 x double> [[VECINIT2]]
+;
   %vecext = extractelement <2 x double> %A, i32 0
   %vecext1 = extractelement <2 x double> %A, i32 1
   %add = fadd double %vecext, %vecext1
@@ -383,6 +440,13 @@ define <2 x double> @hadd_pd_test2(<2 x double> %A, <2 x double> %B) {
 ; AVX:       # %bb.0:
 ; AVX-NEXT:    vhaddpd %xmm1, %xmm0, %xmm0
 ; AVX-NEXT:    retq
+; CHECK-LABEL: define <2 x double> @hadd_pd_test2(
+; CHECK-SAME: <2 x double> [[A:%.*]], <2 x double> [[B:%.*]]) local_unnamed_addr #[[ATTR0]] {
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <2 x double> [[A]], <2 x double> [[B]], <2 x i32> <i32 1, i32 3>
+; CHECK-NEXT:    [[TMP2:%.*]] = shufflevector <2 x double> [[A]], <2 x double> [[B]], <2 x i32> <i32 0, i32 2>
+; CHECK-NEXT:    [[VECINIT2:%.*]] = fadd <2 x double> [[TMP1]], [[TMP2]]
+; CHECK-NEXT:    ret <2 x double> [[VECINIT2]]
+;
   %vecext = extractelement <2 x double> %A, i32 1
   %vecext1 = extractelement <2 x double> %A, i32 0
   %add = fadd double %vecext, %vecext1
@@ -404,6 +468,13 @@ define <2 x double> @hsub_pd_test1(<2 x double> %A, <2 x double> %B) {
 ; AVX:       # %bb.0:
 ; AVX-NEXT:    vhsubpd %xmm1, %xmm0, %xmm0
 ; AVX-NEXT:    retq
+; CHECK-LABEL: define <2 x double> @hsub_pd_test1(
+; CHECK-SAME: <2 x double> [[A:%.*]], <2 x double> [[B:%.*]]) local_unnamed_addr #[[ATTR0]] {
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <2 x double> [[A]], <2 x double> [[B]], <2 x i32> <i32 0, i32 2>
+; CHECK-NEXT:    [[TMP2:%.*]] = shufflevector <2 x double> [[A]], <2 x double> [[B]], <2 x i32> <i32 1, i32 3>
+; CHECK-NEXT:    [[VECINIT2:%.*]] = fsub <2 x double> [[TMP1]], [[TMP2]]
+; CHECK-NEXT:    ret <2 x double> [[VECINIT2]]
+;
   %vecext = extractelement <2 x double> %A, i32 0
   %vecext1 = extractelement <2 x double> %A, i32 1
   %sub = fsub double %vecext, %vecext1
@@ -425,6 +496,13 @@ define <2 x double> @hsub_pd_test2(<2 x double> %A, <2 x double> %B) {
 ; AVX:       # %bb.0:
 ; AVX-NEXT:    vhsubpd %xmm1, %xmm0, %xmm0
 ; AVX-NEXT:    retq
+; CHECK-LABEL: define <2 x double> @hsub_pd_test2(
+; CHECK-SAME: <2 x double> [[A:%.*]], <2 x double> [[B:%.*]]) local_unnamed_addr #[[ATTR0]] {
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <2 x double> [[A]], <2 x double> [[B]], <2 x i32> <i32 0, i32 2>
+; CHECK-NEXT:    [[TMP2:%.*]] = shufflevector <2 x double> [[A]], <2 x double> [[B]], <2 x i32> <i32 1, i32 3>
+; CHECK-NEXT:    [[VECINIT2:%.*]] = fsub <2 x double> [[TMP1]], [[TMP2]]
+; CHECK-NEXT:    ret <2 x double> [[VECINIT2]]
+;
   %vecext = extractelement <2 x double> %B, i32 0
   %vecext1 = extractelement <2 x double> %B, i32 1
   %sub = fsub double %vecext, %vecext1
@@ -456,6 +534,13 @@ define <4 x double> @avx_vhadd_pd_test(<4 x double> %A, <4 x double> %B) {
 ; AVX2-NEXT:    vhaddpd %ymm1, %ymm0, %ymm0
 ; AVX2-NEXT:    vpermpd {{.*#+}} ymm0 = ymm0[0,2,1,3]
 ; AVX2-NEXT:    retq
+; CHECK-LABEL: define <4 x double> @avx_vhadd_pd_test(
+; CHECK-SAME: <4 x double> [[A:%.*]], <4 x double> [[B:%.*]]) local_unnamed_addr #[[ATTR0]] {
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x double> [[A]], <4 x double> [[B]], <4 x i32> <i32 0, i32 2, i32 4, i32 6>
+; CHECK-NEXT:    [[TMP2:%.*]] = shufflevector <4 x double> [[A]], <4 x double> [[B]], <4 x i32> <i32 1, i32 3, i32 5, i32 7>
+; CHECK-NEXT:    [[TMP3:%.*]] = fadd <4 x double> [[TMP1]], [[TMP2]]
+; CHECK-NEXT:    ret <4 x double> [[TMP3]]
+;
   %vecext = extractelement <4 x double> %A, i32 0
   %vecext1 = extractelement <4 x double> %A, i32 1
   %add = fadd double %vecext, %vecext1
@@ -495,6 +580,13 @@ define <4 x double> @avx_vhsub_pd_test(<4 x double> %A, <4 x double> %B) {
 ; AVX2-NEXT:    vhsubpd %ymm1, %ymm0, %ymm0
 ; AVX2-NEXT:    vpermpd {{.*#+}} ymm0 = ymm0[0,2,1,3]
 ; AVX2-NEXT:    retq
+; CHECK-LABEL: define <4 x double> @avx_vhsub_pd_test(
+; CHECK-SAME: <4 x double> [[A:%.*]], <4 x double> [[B:%.*]]) local_unnamed_addr #[[ATTR0]] {
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x double> [[A]], <4 x double> [[B]], <4 x i32> <i32 0, i32 2, i32 4, i32 6>
+; CHECK-NEXT:    [[TMP2:%.*]] = shufflevector <4 x double> [[A]], <4 x double> [[B]], <4 x i32> <i32 1, i32 3, i32 5, i32 7>
+; CHECK-NEXT:    [[TMP3:%.*]] = fsub <4 x double> [[TMP1]], [[TMP2]]
+; CHECK-NEXT:    ret <4 x double> [[TMP3]]
+;
   %vecext = extractelement <4 x double> %A, i32 0
   %vecext1 = extractelement <4 x double> %A, i32 1
   %sub = fsub double %vecext, %vecext1
@@ -590,6 +682,13 @@ define <8 x i32> @avx2_vphadd_d_test(<8 x i32> %A, <8 x i32> %B) {
 ; AVX2-NEXT:    vphaddd %ymm1, %ymm0, %ymm0
 ; AVX2-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3]
 ; AVX2-NEXT:    retq
+; CHECK-LABEL: define <8 x i32> @avx2_vphadd_d_test(
+; CHECK-SAME: <8 x i32> [[A:%.*]], <8 x i32> [[B:%.*]]) local_unnamed_addr #[[ATTR0]] {
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <8 x i32> [[A]], <8 x i32> [[B]], <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
+; CHECK-NEXT:    [[TMP2:%.*]] = shufflevector <8 x i32> [[A]], <8 x i32> [[B]], <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
+; CHECK-NEXT:    [[TMP3:%.*]] = add <8 x i32> [[TMP1]], [[TMP2]]
+; CHECK-NEXT:    ret <8 x i32> [[TMP3]]
+;
   %vecext = extractelement <8 x i32> %A, i32 0
   %vecext1 = extractelement <8 x i32> %A, i32 1
  ...
[truncated]

llvmbot · 2025-07-13T15:20:14Z

@llvm/pr-subscribers-llvm-transforms

Author: 黃國庭 (houngkoungting)

Changes

FIX #143000
Remove LowerToHorizontalOp and adjust test case ; all tests pass after the change.

@RKSimon

Patch is 205.94 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/148477.diff

6 Files Affected:

(modified) llvm/lib/Target/X86/X86ISelLowering.cpp (-118)
(renamed) llvm/test/Transforms/PhaseOrdering/X86/haddsub-2.ll (+182-13)
(renamed) llvm/test/Transforms/PhaseOrdering/X86/haddsub-shuf.ll (+273-12)
(renamed) llvm/test/Transforms/PhaseOrdering/X86/haddsub-undef.ll (+398-9)
(renamed) llvm/test/Transforms/PhaseOrdering/X86/haddsub.ll (+581-9)
(renamed) llvm/test/Transforms/PhaseOrdering/X86/phaddsub-undef.ll (+68-10)

diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index f8f29b9f2cdc7..677ecf8801e2d 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -8569,122 +8569,6 @@ static SDValue getHopForBuildVector(const BuildVectorSDNode *BV,
   return DAG.getNode(HOpcode, DL, VT, V0, V1);
 }
 
-/// Lower BUILD_VECTOR to a horizontal add/sub operation if possible.
-static SDValue LowerToHorizontalOp(const BuildVectorSDNode *BV, const SDLoc &DL,
-                                   const X86Subtarget &Subtarget,
-                                   SelectionDAG &DAG) {
-  // We need at least 2 non-undef elements to make this worthwhile by default.
-  unsigned NumNonUndefs =
-      count_if(BV->op_values(), [](SDValue V) { return !V.isUndef(); });
-  if (NumNonUndefs < 2)
-    return SDValue();
-
-  // There are 4 sets of horizontal math operations distinguished by type:
-  // int/FP at 128-bit/256-bit. Each type was introduced with a different
-  // subtarget feature. Try to match those "native" patterns first.
-  MVT VT = BV->getSimpleValueType(0);
-  if (((VT == MVT::v4f32 || VT == MVT::v2f64) && Subtarget.hasSSE3()) ||
-      ((VT == MVT::v8i16 || VT == MVT::v4i32) && Subtarget.hasSSSE3()) ||
-      ((VT == MVT::v8f32 || VT == MVT::v4f64) && Subtarget.hasAVX()) ||
-      ((VT == MVT::v16i16 || VT == MVT::v8i32) && Subtarget.hasAVX2())) {
-    unsigned HOpcode;
-    SDValue V0, V1;
-    if (isHopBuildVector(BV, DAG, HOpcode, V0, V1))
-      return getHopForBuildVector(BV, DL, DAG, HOpcode, V0, V1);
-  }
-
-  // Try harder to match 256-bit ops by using extract/concat.
-  if (!Subtarget.hasAVX() || !VT.is256BitVector())
-    return SDValue();
-
-  // Count the number of UNDEF operands in the build_vector in input.
-  unsigned NumElts = VT.getVectorNumElements();
-  unsigned Half = NumElts / 2;
-  unsigned NumUndefsLO = 0;
-  unsigned NumUndefsHI = 0;
-  for (unsigned i = 0, e = Half; i != e; ++i)
-    if (BV->getOperand(i)->isUndef())
-      NumUndefsLO++;
-
-  for (unsigned i = Half, e = NumElts; i != e; ++i)
-    if (BV->getOperand(i)->isUndef())
-      NumUndefsHI++;
-
-  SDValue InVec0, InVec1;
-  if (VT == MVT::v8i32 || VT == MVT::v16i16) {
-    SDValue InVec2, InVec3;
-    unsigned X86Opcode;
-    bool CanFold = true;
-
-    if (isHorizontalBinOpPart(BV, ISD::ADD, DL, DAG, 0, Half, InVec0, InVec1) &&
-        isHorizontalBinOpPart(BV, ISD::ADD, DL, DAG, Half, NumElts, InVec2,
-                              InVec3) &&
-        ((InVec0.isUndef() || InVec2.isUndef()) || InVec0 == InVec2) &&
-        ((InVec1.isUndef() || InVec3.isUndef()) || InVec1 == InVec3))
-      X86Opcode = X86ISD::HADD;
-    else if (isHorizontalBinOpPart(BV, ISD::SUB, DL, DAG, 0, Half, InVec0,
-                                   InVec1) &&
-             isHorizontalBinOpPart(BV, ISD::SUB, DL, DAG, Half, NumElts, InVec2,
-                                   InVec3) &&
-             ((InVec0.isUndef() || InVec2.isUndef()) || InVec0 == InVec2) &&
-             ((InVec1.isUndef() || InVec3.isUndef()) || InVec1 == InVec3))
-      X86Opcode = X86ISD::HSUB;
-    else
-      CanFold = false;
-
-    if (CanFold) {
-      // Do not try to expand this build_vector into a pair of horizontal
-      // add/sub if we can emit a pair of scalar add/sub.
-      if (NumUndefsLO + 1 == Half || NumUndefsHI + 1 == Half)
-        return SDValue();
-
-      // Convert this build_vector into a pair of horizontal binops followed by
-      // a concat vector. We must adjust the outputs from the partial horizontal
-      // matching calls above to account for undefined vector halves.
-      SDValue V0 = InVec0.isUndef() ? InVec2 : InVec0;
-      SDValue V1 = InVec1.isUndef() ? InVec3 : InVec1;
-      assert((!V0.isUndef() || !V1.isUndef()) && "Horizontal-op of undefs?");
-      bool isUndefLO = NumUndefsLO == Half;
-      bool isUndefHI = NumUndefsHI == Half;
-      return ExpandHorizontalBinOp(V0, V1, DL, DAG, X86Opcode, false, isUndefLO,
-                                   isUndefHI);
-    }
-  }
-
-  if (VT == MVT::v8f32 || VT == MVT::v4f64 || VT == MVT::v8i32 ||
-      VT == MVT::v16i16) {
-    unsigned X86Opcode;
-    if (isHorizontalBinOpPart(BV, ISD::ADD, DL, DAG, 0, NumElts, InVec0,
-                              InVec1))
-      X86Opcode = X86ISD::HADD;
-    else if (isHorizontalBinOpPart(BV, ISD::SUB, DL, DAG, 0, NumElts, InVec0,
-                                   InVec1))
-      X86Opcode = X86ISD::HSUB;
-    else if (isHorizontalBinOpPart(BV, ISD::FADD, DL, DAG, 0, NumElts, InVec0,
-                                   InVec1))
-      X86Opcode = X86ISD::FHADD;
-    else if (isHorizontalBinOpPart(BV, ISD::FSUB, DL, DAG, 0, NumElts, InVec0,
-                                   InVec1))
-      X86Opcode = X86ISD::FHSUB;
-    else
-      return SDValue();
-
-    // Don't try to expand this build_vector into a pair of horizontal add/sub
-    // if we can simply emit a pair of scalar add/sub.
-    if (NumUndefsLO + 1 == Half || NumUndefsHI + 1 == Half)
-      return SDValue();
-
-    // Convert this build_vector into two horizontal add/sub followed by
-    // a concat vector.
-    bool isUndefLO = NumUndefsLO == Half;
-    bool isUndefHI = NumUndefsHI == Half;
-    return ExpandHorizontalBinOp(InVec0, InVec1, DL, DAG, X86Opcode, true,
-                                 isUndefLO, isUndefHI);
-  }
-
-  return SDValue();
-}
-
 static SDValue LowerShift(SDValue Op, const X86Subtarget &Subtarget,
                           SelectionDAG &DAG);
 
@@ -9270,8 +9154,6 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const {
 
   if (SDValue AddSub = lowerToAddSubOrFMAddSub(BV, dl, Subtarget, DAG))
     return AddSub;
-  if (SDValue HorizontalOp = LowerToHorizontalOp(BV, dl, Subtarget, DAG))
-    return HorizontalOp;
   if (SDValue Broadcast = lowerBuildVectorAsBroadcast(BV, dl, Subtarget, DAG))
     return Broadcast;
   if (SDValue BitOp = lowerBuildVectorToBitOp(BV, dl, Subtarget, DAG))
diff --git a/llvm/test/CodeGen/X86/haddsub-2.ll b/llvm/test/Transforms/PhaseOrdering/X86/haddsub-2.ll
similarity index 81%
rename from llvm/test/CodeGen/X86/haddsub-2.ll
rename to llvm/test/Transforms/PhaseOrdering/X86/haddsub-2.ll
index bca446fa8fb56..4eb5bdba9edb6 100644
--- a/llvm/test/CodeGen/X86/haddsub-2.ll
+++ b/llvm/test/Transforms/PhaseOrdering/X86/haddsub-2.ll
@@ -1,38 +1,39 @@
-; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+sse2,+sse3 | FileCheck %s --check-prefix=SSE --check-prefix=SSE3
-; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+sse2,+sse3,+ssse3 | FileCheck %s --check-prefix=SSE --check-prefix=SSSE3
-; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx | FileCheck %s --check-prefix=AVX --check-prefix=AVX1
-; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx2 | FileCheck %s --check-prefix=AVX --check-prefix=AVX2
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; RUN: opt -passes="default<O3>" -S %s | FileCheck %s
 
 define <4 x float> @hadd_ps_test1(<4 x float> %A, <4 x float> %B) {
-; SSE-LABEL: hadd_ps_test1:
-; SSE:       # %bb.0:
-; SSE-NEXT:    haddps %xmm1, %xmm0
-; SSE-NEXT:    retq
+; CHECK-LABEL: define <4 x float> @hadd_ps_test1(
+; CHECK-SAME: <4 x float> [[A:%.*]], <4 x float> [[B:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] {
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x float> [[A]], <4 x float> [[B]], <4 x i32> <i32 0, i32 2, i32 4, i32 6>
+; CHECK-NEXT:    [[TMP2:%.*]] = shufflevector <4 x float> [[A]], <4 x float> [[B]], <4 x i32> <i32 1, i32 3, i32 5, i32 7>
+; CHECK-NEXT:    [[TMP3:%.*]] = fadd <4 x float> [[TMP1]], [[TMP2]]
+; CHECK-NEXT:    ret <4 x float> [[TMP3]]
 ;
-; AVX-LABEL: hadd_ps_test1:
-; AVX:       # %bb.0:
-; AVX-NEXT:    vhaddps %xmm1, %xmm0, %xmm0
-; AVX-NEXT:    retq
+
   %vecext = extractelement <4 x float> %A, i32 0
   %vecext1 = extractelement <4 x float> %A, i32 1
   %add = fadd float %vecext, %vecext1
   %vecinit = insertelement <4 x float> undef, float %add, i32 0
+
   %vecext2 = extractelement <4 x float> %A, i32 2
   %vecext3 = extractelement <4 x float> %A, i32 3
   %add4 = fadd float %vecext2, %vecext3
   %vecinit5 = insertelement <4 x float> %vecinit, float %add4, i32 1
+
   %vecext6 = extractelement <4 x float> %B, i32 0
   %vecext7 = extractelement <4 x float> %B, i32 1
   %add8 = fadd float %vecext6, %vecext7
   %vecinit9 = insertelement <4 x float> %vecinit5, float %add8, i32 2
+
   %vecext10 = extractelement <4 x float> %B, i32 2
   %vecext11 = extractelement <4 x float> %B, i32 3
   %add12 = fadd float %vecext10, %vecext11
   %vecinit13 = insertelement <4 x float> %vecinit9, float %add12, i32 3
+
   ret <4 x float> %vecinit13
 }
 
+
 define <4 x float> @hadd_ps_test2(<4 x float> %A, <4 x float> %B) {
 ; SSE-LABEL: hadd_ps_test2:
 ; SSE:       # %bb.0:
@@ -43,6 +44,13 @@ define <4 x float> @hadd_ps_test2(<4 x float> %A, <4 x float> %B) {
 ; AVX:       # %bb.0:
 ; AVX-NEXT:    vhaddps %xmm1, %xmm0, %xmm0
 ; AVX-NEXT:    retq
+; CHECK-LABEL: define <4 x float> @hadd_ps_test2(
+; CHECK-SAME: <4 x float> [[A:%.*]], <4 x float> [[B:%.*]]) local_unnamed_addr #[[ATTR0]] {
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x float> [[A]], <4 x float> [[B]], <4 x i32> <i32 0, i32 2, i32 4, i32 6>
+; CHECK-NEXT:    [[TMP2:%.*]] = shufflevector <4 x float> [[A]], <4 x float> [[B]], <4 x i32> <i32 1, i32 3, i32 5, i32 7>
+; CHECK-NEXT:    [[TMP3:%.*]] = fadd <4 x float> [[TMP1]], [[TMP2]]
+; CHECK-NEXT:    ret <4 x float> [[TMP3]]
+;
   %vecext = extractelement <4 x float> %A, i32 2
   %vecext1 = extractelement <4 x float> %A, i32 3
   %add = fadd float %vecext, %vecext1
@@ -72,6 +80,13 @@ define <4 x float> @hsub_ps_test1(<4 x float> %A, <4 x float> %B) {
 ; AVX:       # %bb.0:
 ; AVX-NEXT:    vhsubps %xmm1, %xmm0, %xmm0
 ; AVX-NEXT:    retq
+; CHECK-LABEL: define <4 x float> @hsub_ps_test1(
+; CHECK-SAME: <4 x float> [[A:%.*]], <4 x float> [[B:%.*]]) local_unnamed_addr #[[ATTR0]] {
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x float> [[A]], <4 x float> [[B]], <4 x i32> <i32 0, i32 2, i32 4, i32 6>
+; CHECK-NEXT:    [[TMP2:%.*]] = shufflevector <4 x float> [[A]], <4 x float> [[B]], <4 x i32> <i32 1, i32 3, i32 5, i32 7>
+; CHECK-NEXT:    [[TMP3:%.*]] = fsub <4 x float> [[TMP1]], [[TMP2]]
+; CHECK-NEXT:    ret <4 x float> [[TMP3]]
+;
   %vecext = extractelement <4 x float> %A, i32 0
   %vecext1 = extractelement <4 x float> %A, i32 1
   %sub = fsub float %vecext, %vecext1
@@ -101,6 +116,13 @@ define <4 x float> @hsub_ps_test2(<4 x float> %A, <4 x float> %B) {
 ; AVX:       # %bb.0:
 ; AVX-NEXT:    vhsubps %xmm1, %xmm0, %xmm0
 ; AVX-NEXT:    retq
+; CHECK-LABEL: define <4 x float> @hsub_ps_test2(
+; CHECK-SAME: <4 x float> [[A:%.*]], <4 x float> [[B:%.*]]) local_unnamed_addr #[[ATTR0]] {
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x float> [[A]], <4 x float> [[B]], <4 x i32> <i32 0, i32 2, i32 4, i32 6>
+; CHECK-NEXT:    [[TMP2:%.*]] = shufflevector <4 x float> [[A]], <4 x float> [[B]], <4 x i32> <i32 1, i32 3, i32 5, i32 7>
+; CHECK-NEXT:    [[TMP3:%.*]] = fsub <4 x float> [[TMP1]], [[TMP2]]
+; CHECK-NEXT:    ret <4 x float> [[TMP3]]
+;
   %vecext = extractelement <4 x float> %A, i32 2
   %vecext1 = extractelement <4 x float> %A, i32 3
   %sub = fsub float %vecext, %vecext1
@@ -159,6 +181,13 @@ define <4 x i32> @phadd_d_test1(<4 x i32> %A, <4 x i32> %B) {
 ; AVX:       # %bb.0:
 ; AVX-NEXT:    vphaddd %xmm1, %xmm0, %xmm0
 ; AVX-NEXT:    retq
+; CHECK-LABEL: define <4 x i32> @phadd_d_test1(
+; CHECK-SAME: <4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]]) local_unnamed_addr #[[ATTR0]] {
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x i32> [[A]], <4 x i32> [[B]], <4 x i32> <i32 0, i32 2, i32 4, i32 6>
+; CHECK-NEXT:    [[TMP2:%.*]] = shufflevector <4 x i32> [[A]], <4 x i32> [[B]], <4 x i32> <i32 1, i32 3, i32 5, i32 7>
+; CHECK-NEXT:    [[TMP3:%.*]] = add <4 x i32> [[TMP1]], [[TMP2]]
+; CHECK-NEXT:    ret <4 x i32> [[TMP3]]
+;
   %vecext = extractelement <4 x i32> %A, i32 0
   %vecext1 = extractelement <4 x i32> %A, i32 1
   %add = add i32 %vecext, %vecext1
@@ -217,6 +246,13 @@ define <4 x i32> @phadd_d_test2(<4 x i32> %A, <4 x i32> %B) {
 ; AVX:       # %bb.0:
 ; AVX-NEXT:    vphaddd %xmm1, %xmm0, %xmm0
 ; AVX-NEXT:    retq
+; CHECK-LABEL: define <4 x i32> @phadd_d_test2(
+; CHECK-SAME: <4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]]) local_unnamed_addr #[[ATTR0]] {
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x i32> [[A]], <4 x i32> [[B]], <4 x i32> <i32 0, i32 2, i32 5, i32 7>
+; CHECK-NEXT:    [[TMP2:%.*]] = shufflevector <4 x i32> [[A]], <4 x i32> [[B]], <4 x i32> <i32 1, i32 3, i32 4, i32 6>
+; CHECK-NEXT:    [[TMP3:%.*]] = add <4 x i32> [[TMP1]], [[TMP2]]
+; CHECK-NEXT:    ret <4 x i32> [[TMP3]]
+;
   %vecext = extractelement <4 x i32> %A, i32 2
   %vecext1 = extractelement <4 x i32> %A, i32 3
   %add = add i32 %vecext, %vecext1
@@ -275,6 +311,13 @@ define <4 x i32> @phsub_d_test1(<4 x i32> %A, <4 x i32> %B) {
 ; AVX:       # %bb.0:
 ; AVX-NEXT:    vphsubd %xmm1, %xmm0, %xmm0
 ; AVX-NEXT:    retq
+; CHECK-LABEL: define <4 x i32> @phsub_d_test1(
+; CHECK-SAME: <4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]]) local_unnamed_addr #[[ATTR0]] {
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x i32> [[A]], <4 x i32> [[B]], <4 x i32> <i32 0, i32 2, i32 4, i32 6>
+; CHECK-NEXT:    [[TMP2:%.*]] = shufflevector <4 x i32> [[A]], <4 x i32> [[B]], <4 x i32> <i32 1, i32 3, i32 5, i32 7>
+; CHECK-NEXT:    [[TMP3:%.*]] = sub <4 x i32> [[TMP1]], [[TMP2]]
+; CHECK-NEXT:    ret <4 x i32> [[TMP3]]
+;
   %vecext = extractelement <4 x i32> %A, i32 0
   %vecext1 = extractelement <4 x i32> %A, i32 1
   %sub = sub i32 %vecext, %vecext1
@@ -333,6 +376,13 @@ define <4 x i32> @phsub_d_test2(<4 x i32> %A, <4 x i32> %B) {
 ; AVX:       # %bb.0:
 ; AVX-NEXT:    vphsubd %xmm1, %xmm0, %xmm0
 ; AVX-NEXT:    retq
+; CHECK-LABEL: define <4 x i32> @phsub_d_test2(
+; CHECK-SAME: <4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]]) local_unnamed_addr #[[ATTR0]] {
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x i32> [[A]], <4 x i32> [[B]], <4 x i32> <i32 0, i32 2, i32 4, i32 6>
+; CHECK-NEXT:    [[TMP2:%.*]] = shufflevector <4 x i32> [[A]], <4 x i32> [[B]], <4 x i32> <i32 1, i32 3, i32 5, i32 7>
+; CHECK-NEXT:    [[TMP3:%.*]] = sub <4 x i32> [[TMP1]], [[TMP2]]
+; CHECK-NEXT:    ret <4 x i32> [[TMP3]]
+;
   %vecext = extractelement <4 x i32> %A, i32 2
   %vecext1 = extractelement <4 x i32> %A, i32 3
   %sub = sub i32 %vecext, %vecext1
@@ -362,6 +412,13 @@ define <2 x double> @hadd_pd_test1(<2 x double> %A, <2 x double> %B) {
 ; AVX:       # %bb.0:
 ; AVX-NEXT:    vhaddpd %xmm1, %xmm0, %xmm0
 ; AVX-NEXT:    retq
+; CHECK-LABEL: define <2 x double> @hadd_pd_test1(
+; CHECK-SAME: <2 x double> [[A:%.*]], <2 x double> [[B:%.*]]) local_unnamed_addr #[[ATTR0]] {
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <2 x double> [[A]], <2 x double> [[B]], <2 x i32> <i32 0, i32 2>
+; CHECK-NEXT:    [[TMP2:%.*]] = shufflevector <2 x double> [[A]], <2 x double> [[B]], <2 x i32> <i32 1, i32 3>
+; CHECK-NEXT:    [[VECINIT2:%.*]] = fadd <2 x double> [[TMP1]], [[TMP2]]
+; CHECK-NEXT:    ret <2 x double> [[VECINIT2]]
+;
   %vecext = extractelement <2 x double> %A, i32 0
   %vecext1 = extractelement <2 x double> %A, i32 1
   %add = fadd double %vecext, %vecext1
@@ -383,6 +440,13 @@ define <2 x double> @hadd_pd_test2(<2 x double> %A, <2 x double> %B) {
 ; AVX:       # %bb.0:
 ; AVX-NEXT:    vhaddpd %xmm1, %xmm0, %xmm0
 ; AVX-NEXT:    retq
+; CHECK-LABEL: define <2 x double> @hadd_pd_test2(
+; CHECK-SAME: <2 x double> [[A:%.*]], <2 x double> [[B:%.*]]) local_unnamed_addr #[[ATTR0]] {
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <2 x double> [[A]], <2 x double> [[B]], <2 x i32> <i32 1, i32 3>
+; CHECK-NEXT:    [[TMP2:%.*]] = shufflevector <2 x double> [[A]], <2 x double> [[B]], <2 x i32> <i32 0, i32 2>
+; CHECK-NEXT:    [[VECINIT2:%.*]] = fadd <2 x double> [[TMP1]], [[TMP2]]
+; CHECK-NEXT:    ret <2 x double> [[VECINIT2]]
+;
   %vecext = extractelement <2 x double> %A, i32 1
   %vecext1 = extractelement <2 x double> %A, i32 0
   %add = fadd double %vecext, %vecext1
@@ -404,6 +468,13 @@ define <2 x double> @hsub_pd_test1(<2 x double> %A, <2 x double> %B) {
 ; AVX:       # %bb.0:
 ; AVX-NEXT:    vhsubpd %xmm1, %xmm0, %xmm0
 ; AVX-NEXT:    retq
+; CHECK-LABEL: define <2 x double> @hsub_pd_test1(
+; CHECK-SAME: <2 x double> [[A:%.*]], <2 x double> [[B:%.*]]) local_unnamed_addr #[[ATTR0]] {
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <2 x double> [[A]], <2 x double> [[B]], <2 x i32> <i32 0, i32 2>
+; CHECK-NEXT:    [[TMP2:%.*]] = shufflevector <2 x double> [[A]], <2 x double> [[B]], <2 x i32> <i32 1, i32 3>
+; CHECK-NEXT:    [[VECINIT2:%.*]] = fsub <2 x double> [[TMP1]], [[TMP2]]
+; CHECK-NEXT:    ret <2 x double> [[VECINIT2]]
+;
   %vecext = extractelement <2 x double> %A, i32 0
   %vecext1 = extractelement <2 x double> %A, i32 1
   %sub = fsub double %vecext, %vecext1
@@ -425,6 +496,13 @@ define <2 x double> @hsub_pd_test2(<2 x double> %A, <2 x double> %B) {
 ; AVX:       # %bb.0:
 ; AVX-NEXT:    vhsubpd %xmm1, %xmm0, %xmm0
 ; AVX-NEXT:    retq
+; CHECK-LABEL: define <2 x double> @hsub_pd_test2(
+; CHECK-SAME: <2 x double> [[A:%.*]], <2 x double> [[B:%.*]]) local_unnamed_addr #[[ATTR0]] {
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <2 x double> [[A]], <2 x double> [[B]], <2 x i32> <i32 0, i32 2>
+; CHECK-NEXT:    [[TMP2:%.*]] = shufflevector <2 x double> [[A]], <2 x double> [[B]], <2 x i32> <i32 1, i32 3>
+; CHECK-NEXT:    [[VECINIT2:%.*]] = fsub <2 x double> [[TMP1]], [[TMP2]]
+; CHECK-NEXT:    ret <2 x double> [[VECINIT2]]
+;
   %vecext = extractelement <2 x double> %B, i32 0
   %vecext1 = extractelement <2 x double> %B, i32 1
   %sub = fsub double %vecext, %vecext1
@@ -456,6 +534,13 @@ define <4 x double> @avx_vhadd_pd_test(<4 x double> %A, <4 x double> %B) {
 ; AVX2-NEXT:    vhaddpd %ymm1, %ymm0, %ymm0
 ; AVX2-NEXT:    vpermpd {{.*#+}} ymm0 = ymm0[0,2,1,3]
 ; AVX2-NEXT:    retq
+; CHECK-LABEL: define <4 x double> @avx_vhadd_pd_test(
+; CHECK-SAME: <4 x double> [[A:%.*]], <4 x double> [[B:%.*]]) local_unnamed_addr #[[ATTR0]] {
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x double> [[A]], <4 x double> [[B]], <4 x i32> <i32 0, i32 2, i32 4, i32 6>
+; CHECK-NEXT:    [[TMP2:%.*]] = shufflevector <4 x double> [[A]], <4 x double> [[B]], <4 x i32> <i32 1, i32 3, i32 5, i32 7>
+; CHECK-NEXT:    [[TMP3:%.*]] = fadd <4 x double> [[TMP1]], [[TMP2]]
+; CHECK-NEXT:    ret <4 x double> [[TMP3]]
+;
   %vecext = extractelement <4 x double> %A, i32 0
   %vecext1 = extractelement <4 x double> %A, i32 1
   %add = fadd double %vecext, %vecext1
@@ -495,6 +580,13 @@ define <4 x double> @avx_vhsub_pd_test(<4 x double> %A, <4 x double> %B) {
 ; AVX2-NEXT:    vhsubpd %ymm1, %ymm0, %ymm0
 ; AVX2-NEXT:    vpermpd {{.*#+}} ymm0 = ymm0[0,2,1,3]
 ; AVX2-NEXT:    retq
+; CHECK-LABEL: define <4 x double> @avx_vhsub_pd_test(
+; CHECK-SAME: <4 x double> [[A:%.*]], <4 x double> [[B:%.*]]) local_unnamed_addr #[[ATTR0]] {
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x double> [[A]], <4 x double> [[B]], <4 x i32> <i32 0, i32 2, i32 4, i32 6>
+; CHECK-NEXT:    [[TMP2:%.*]] = shufflevector <4 x double> [[A]], <4 x double> [[B]], <4 x i32> <i32 1, i32 3, i32 5, i32 7>
+; CHECK-NEXT:    [[TMP3:%.*]] = fsub <4 x double> [[TMP1]], [[TMP2]]
+; CHECK-NEXT:    ret <4 x double> [[TMP3]]
+;
   %vecext = extractelement <4 x double> %A, i32 0
   %vecext1 = extractelement <4 x double> %A, i32 1
   %sub = fsub double %vecext, %vecext1
@@ -590,6 +682,13 @@ define <8 x i32> @avx2_vphadd_d_test(<8 x i32> %A, <8 x i32> %B) {
 ; AVX2-NEXT:    vphaddd %ymm1, %ymm0, %ymm0
 ; AVX2-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3]
 ; AVX2-NEXT:    retq
+; CHECK-LABEL: define <8 x i32> @avx2_vphadd_d_test(
+; CHECK-SAME: <8 x i32> [[A:%.*]], <8 x i32> [[B:%.*]]) local_unnamed_addr #[[ATTR0]] {
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <8 x i32> [[A]], <8 x i32> [[B]], <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
+; CHECK-NEXT:    [[TMP2:%.*]] = shufflevector <8 x i32> [[A]], <8 x i32> [[B]], <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
+; CHECK-NEXT:    [[TMP3:%.*]] = add <8 x i32> [[TMP1]], [[TMP2]]
+; CHECK-NEXT:    ret <8 x i32> [[TMP3]]
+;
   %vecext = extractelement <8 x i32> %A, i32 0
   %vecext1 = extractelement <8 x i32> %A, i32 1
  ...
[truncated]

github-actions · 2025-07-13T15:22:26Z

⚠️ undef deprecator found issues in your code. ⚠️

You can test this locally with the following command:

git diff -U0 --pickaxe-regex -S '([^a-zA-Z0-9#_-]undef[^a-zA-Z0-9_-]|UndefValue::get)' 'HEAD~1' HEAD llvm/lib/Target/X86/X86ISelLowering.cpp llvm/test/Transforms/PhaseOrdering/X86/haddsub-2.ll llvm/test/Transforms/PhaseOrdering/X86/haddsub-shuf.ll llvm/test/Transforms/PhaseOrdering/X86/haddsub-undef.ll llvm/test/Transforms/PhaseOrdering/X86/haddsub.ll llvm/test/Transforms/PhaseOrdering/X86/phaddsub-undef.ll

The following files introduce new uses of undef:

llvm/test/Transforms/PhaseOrdering/X86/haddsub-2.ll
llvm/test/Transforms/PhaseOrdering/X86/haddsub-shuf.ll
llvm/test/Transforms/PhaseOrdering/X86/haddsub-undef.ll
llvm/test/Transforms/PhaseOrdering/X86/haddsub.ll
llvm/test/Transforms/PhaseOrdering/X86/phaddsub-undef.ll

Undef is now deprecated and should only be used in the rare cases where no replacement is possible. For example, a load of uninitialized memory yields undef. You should use poison values for placeholders instead.

In tests, avoid using undef and having tests that trigger undefined behavior. If you need an operand with some unimportant value, you can add a new argument to the function and use that instead.

For example, this is considered a bad practice:

define void @fn() {
  ...
  br i1 undef, ...
}

Please use the following instead:

define void @fn(i1 %cond) {
  ...
  br i1 %cond, ...
}

Please refer to the Undefined Behavior Manual for more information.

X86: Remove LowerToHorizontalOp and modified test case

c4b205c

llvmbot added backend:X86 llvm:transforms labels Jul 13, 2025

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

X86: Remove LowerToHorizontalOp and modified test case #148477

X86: Remove LowerToHorizontalOp and modified test case #148477

houngkoungting commented Jul 13, 2025

Uh oh!

llvmbot commented Jul 13, 2025

Uh oh!

llvmbot commented Jul 13, 2025

Uh oh!

github-actions bot commented Jul 13, 2025

Uh oh!

Uh oh!

X86: Remove LowerToHorizontalOp and modified test case #148477

Are you sure you want to change the base?

X86: Remove LowerToHorizontalOp and modified test case #148477

Conversation

houngkoungting commented Jul 13, 2025

Uh oh!

llvmbot commented Jul 13, 2025

Uh oh!

llvmbot commented Jul 13, 2025

Uh oh!

github-actions bot commented Jul 13, 2025

Uh oh!

Uh oh!