-
Notifications
You must be signed in to change notification settings - Fork 14.5k
X86: Remove LowerToHorizontalOp and modified test case #148477
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Conversation
@llvm/pr-subscribers-backend-x86 Author: 黃國庭 (houngkoungting) ChangesFIX #143000 @RKSimon Patch is 205.94 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/148477.diff 6 Files Affected:
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index f8f29b9f2cdc7..677ecf8801e2d 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -8569,122 +8569,6 @@ static SDValue getHopForBuildVector(const BuildVectorSDNode *BV,
return DAG.getNode(HOpcode, DL, VT, V0, V1);
}
-/// Lower BUILD_VECTOR to a horizontal add/sub operation if possible.
-static SDValue LowerToHorizontalOp(const BuildVectorSDNode *BV, const SDLoc &DL,
- const X86Subtarget &Subtarget,
- SelectionDAG &DAG) {
- // We need at least 2 non-undef elements to make this worthwhile by default.
- unsigned NumNonUndefs =
- count_if(BV->op_values(), [](SDValue V) { return !V.isUndef(); });
- if (NumNonUndefs < 2)
- return SDValue();
-
- // There are 4 sets of horizontal math operations distinguished by type:
- // int/FP at 128-bit/256-bit. Each type was introduced with a different
- // subtarget feature. Try to match those "native" patterns first.
- MVT VT = BV->getSimpleValueType(0);
- if (((VT == MVT::v4f32 || VT == MVT::v2f64) && Subtarget.hasSSE3()) ||
- ((VT == MVT::v8i16 || VT == MVT::v4i32) && Subtarget.hasSSSE3()) ||
- ((VT == MVT::v8f32 || VT == MVT::v4f64) && Subtarget.hasAVX()) ||
- ((VT == MVT::v16i16 || VT == MVT::v8i32) && Subtarget.hasAVX2())) {
- unsigned HOpcode;
- SDValue V0, V1;
- if (isHopBuildVector(BV, DAG, HOpcode, V0, V1))
- return getHopForBuildVector(BV, DL, DAG, HOpcode, V0, V1);
- }
-
- // Try harder to match 256-bit ops by using extract/concat.
- if (!Subtarget.hasAVX() || !VT.is256BitVector())
- return SDValue();
-
- // Count the number of UNDEF operands in the build_vector in input.
- unsigned NumElts = VT.getVectorNumElements();
- unsigned Half = NumElts / 2;
- unsigned NumUndefsLO = 0;
- unsigned NumUndefsHI = 0;
- for (unsigned i = 0, e = Half; i != e; ++i)
- if (BV->getOperand(i)->isUndef())
- NumUndefsLO++;
-
- for (unsigned i = Half, e = NumElts; i != e; ++i)
- if (BV->getOperand(i)->isUndef())
- NumUndefsHI++;
-
- SDValue InVec0, InVec1;
- if (VT == MVT::v8i32 || VT == MVT::v16i16) {
- SDValue InVec2, InVec3;
- unsigned X86Opcode;
- bool CanFold = true;
-
- if (isHorizontalBinOpPart(BV, ISD::ADD, DL, DAG, 0, Half, InVec0, InVec1) &&
- isHorizontalBinOpPart(BV, ISD::ADD, DL, DAG, Half, NumElts, InVec2,
- InVec3) &&
- ((InVec0.isUndef() || InVec2.isUndef()) || InVec0 == InVec2) &&
- ((InVec1.isUndef() || InVec3.isUndef()) || InVec1 == InVec3))
- X86Opcode = X86ISD::HADD;
- else if (isHorizontalBinOpPart(BV, ISD::SUB, DL, DAG, 0, Half, InVec0,
- InVec1) &&
- isHorizontalBinOpPart(BV, ISD::SUB, DL, DAG, Half, NumElts, InVec2,
- InVec3) &&
- ((InVec0.isUndef() || InVec2.isUndef()) || InVec0 == InVec2) &&
- ((InVec1.isUndef() || InVec3.isUndef()) || InVec1 == InVec3))
- X86Opcode = X86ISD::HSUB;
- else
- CanFold = false;
-
- if (CanFold) {
- // Do not try to expand this build_vector into a pair of horizontal
- // add/sub if we can emit a pair of scalar add/sub.
- if (NumUndefsLO + 1 == Half || NumUndefsHI + 1 == Half)
- return SDValue();
-
- // Convert this build_vector into a pair of horizontal binops followed by
- // a concat vector. We must adjust the outputs from the partial horizontal
- // matching calls above to account for undefined vector halves.
- SDValue V0 = InVec0.isUndef() ? InVec2 : InVec0;
- SDValue V1 = InVec1.isUndef() ? InVec3 : InVec1;
- assert((!V0.isUndef() || !V1.isUndef()) && "Horizontal-op of undefs?");
- bool isUndefLO = NumUndefsLO == Half;
- bool isUndefHI = NumUndefsHI == Half;
- return ExpandHorizontalBinOp(V0, V1, DL, DAG, X86Opcode, false, isUndefLO,
- isUndefHI);
- }
- }
-
- if (VT == MVT::v8f32 || VT == MVT::v4f64 || VT == MVT::v8i32 ||
- VT == MVT::v16i16) {
- unsigned X86Opcode;
- if (isHorizontalBinOpPart(BV, ISD::ADD, DL, DAG, 0, NumElts, InVec0,
- InVec1))
- X86Opcode = X86ISD::HADD;
- else if (isHorizontalBinOpPart(BV, ISD::SUB, DL, DAG, 0, NumElts, InVec0,
- InVec1))
- X86Opcode = X86ISD::HSUB;
- else if (isHorizontalBinOpPart(BV, ISD::FADD, DL, DAG, 0, NumElts, InVec0,
- InVec1))
- X86Opcode = X86ISD::FHADD;
- else if (isHorizontalBinOpPart(BV, ISD::FSUB, DL, DAG, 0, NumElts, InVec0,
- InVec1))
- X86Opcode = X86ISD::FHSUB;
- else
- return SDValue();
-
- // Don't try to expand this build_vector into a pair of horizontal add/sub
- // if we can simply emit a pair of scalar add/sub.
- if (NumUndefsLO + 1 == Half || NumUndefsHI + 1 == Half)
- return SDValue();
-
- // Convert this build_vector into two horizontal add/sub followed by
- // a concat vector.
- bool isUndefLO = NumUndefsLO == Half;
- bool isUndefHI = NumUndefsHI == Half;
- return ExpandHorizontalBinOp(InVec0, InVec1, DL, DAG, X86Opcode, true,
- isUndefLO, isUndefHI);
- }
-
- return SDValue();
-}
-
static SDValue LowerShift(SDValue Op, const X86Subtarget &Subtarget,
SelectionDAG &DAG);
@@ -9270,8 +9154,6 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const {
if (SDValue AddSub = lowerToAddSubOrFMAddSub(BV, dl, Subtarget, DAG))
return AddSub;
- if (SDValue HorizontalOp = LowerToHorizontalOp(BV, dl, Subtarget, DAG))
- return HorizontalOp;
if (SDValue Broadcast = lowerBuildVectorAsBroadcast(BV, dl, Subtarget, DAG))
return Broadcast;
if (SDValue BitOp = lowerBuildVectorToBitOp(BV, dl, Subtarget, DAG))
diff --git a/llvm/test/CodeGen/X86/haddsub-2.ll b/llvm/test/Transforms/PhaseOrdering/X86/haddsub-2.ll
similarity index 81%
rename from llvm/test/CodeGen/X86/haddsub-2.ll
rename to llvm/test/Transforms/PhaseOrdering/X86/haddsub-2.ll
index bca446fa8fb56..4eb5bdba9edb6 100644
--- a/llvm/test/CodeGen/X86/haddsub-2.ll
+++ b/llvm/test/Transforms/PhaseOrdering/X86/haddsub-2.ll
@@ -1,38 +1,39 @@
-; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+sse2,+sse3 | FileCheck %s --check-prefix=SSE --check-prefix=SSE3
-; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+sse2,+sse3,+ssse3 | FileCheck %s --check-prefix=SSE --check-prefix=SSSE3
-; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx | FileCheck %s --check-prefix=AVX --check-prefix=AVX1
-; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx2 | FileCheck %s --check-prefix=AVX --check-prefix=AVX2
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; RUN: opt -passes="default<O3>" -S %s | FileCheck %s
define <4 x float> @hadd_ps_test1(<4 x float> %A, <4 x float> %B) {
-; SSE-LABEL: hadd_ps_test1:
-; SSE: # %bb.0:
-; SSE-NEXT: haddps %xmm1, %xmm0
-; SSE-NEXT: retq
+; CHECK-LABEL: define <4 x float> @hadd_ps_test1(
+; CHECK-SAME: <4 x float> [[A:%.*]], <4 x float> [[B:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] {
+; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x float> [[A]], <4 x float> [[B]], <4 x i32> <i32 0, i32 2, i32 4, i32 6>
+; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x float> [[A]], <4 x float> [[B]], <4 x i32> <i32 1, i32 3, i32 5, i32 7>
+; CHECK-NEXT: [[TMP3:%.*]] = fadd <4 x float> [[TMP1]], [[TMP2]]
+; CHECK-NEXT: ret <4 x float> [[TMP3]]
;
-; AVX-LABEL: hadd_ps_test1:
-; AVX: # %bb.0:
-; AVX-NEXT: vhaddps %xmm1, %xmm0, %xmm0
-; AVX-NEXT: retq
+
%vecext = extractelement <4 x float> %A, i32 0
%vecext1 = extractelement <4 x float> %A, i32 1
%add = fadd float %vecext, %vecext1
%vecinit = insertelement <4 x float> undef, float %add, i32 0
+
%vecext2 = extractelement <4 x float> %A, i32 2
%vecext3 = extractelement <4 x float> %A, i32 3
%add4 = fadd float %vecext2, %vecext3
%vecinit5 = insertelement <4 x float> %vecinit, float %add4, i32 1
+
%vecext6 = extractelement <4 x float> %B, i32 0
%vecext7 = extractelement <4 x float> %B, i32 1
%add8 = fadd float %vecext6, %vecext7
%vecinit9 = insertelement <4 x float> %vecinit5, float %add8, i32 2
+
%vecext10 = extractelement <4 x float> %B, i32 2
%vecext11 = extractelement <4 x float> %B, i32 3
%add12 = fadd float %vecext10, %vecext11
%vecinit13 = insertelement <4 x float> %vecinit9, float %add12, i32 3
+
ret <4 x float> %vecinit13
}
+
define <4 x float> @hadd_ps_test2(<4 x float> %A, <4 x float> %B) {
; SSE-LABEL: hadd_ps_test2:
; SSE: # %bb.0:
@@ -43,6 +44,13 @@ define <4 x float> @hadd_ps_test2(<4 x float> %A, <4 x float> %B) {
; AVX: # %bb.0:
; AVX-NEXT: vhaddps %xmm1, %xmm0, %xmm0
; AVX-NEXT: retq
+; CHECK-LABEL: define <4 x float> @hadd_ps_test2(
+; CHECK-SAME: <4 x float> [[A:%.*]], <4 x float> [[B:%.*]]) local_unnamed_addr #[[ATTR0]] {
+; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x float> [[A]], <4 x float> [[B]], <4 x i32> <i32 0, i32 2, i32 4, i32 6>
+; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x float> [[A]], <4 x float> [[B]], <4 x i32> <i32 1, i32 3, i32 5, i32 7>
+; CHECK-NEXT: [[TMP3:%.*]] = fadd <4 x float> [[TMP1]], [[TMP2]]
+; CHECK-NEXT: ret <4 x float> [[TMP3]]
+;
%vecext = extractelement <4 x float> %A, i32 2
%vecext1 = extractelement <4 x float> %A, i32 3
%add = fadd float %vecext, %vecext1
@@ -72,6 +80,13 @@ define <4 x float> @hsub_ps_test1(<4 x float> %A, <4 x float> %B) {
; AVX: # %bb.0:
; AVX-NEXT: vhsubps %xmm1, %xmm0, %xmm0
; AVX-NEXT: retq
+; CHECK-LABEL: define <4 x float> @hsub_ps_test1(
+; CHECK-SAME: <4 x float> [[A:%.*]], <4 x float> [[B:%.*]]) local_unnamed_addr #[[ATTR0]] {
+; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x float> [[A]], <4 x float> [[B]], <4 x i32> <i32 0, i32 2, i32 4, i32 6>
+; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x float> [[A]], <4 x float> [[B]], <4 x i32> <i32 1, i32 3, i32 5, i32 7>
+; CHECK-NEXT: [[TMP3:%.*]] = fsub <4 x float> [[TMP1]], [[TMP2]]
+; CHECK-NEXT: ret <4 x float> [[TMP3]]
+;
%vecext = extractelement <4 x float> %A, i32 0
%vecext1 = extractelement <4 x float> %A, i32 1
%sub = fsub float %vecext, %vecext1
@@ -101,6 +116,13 @@ define <4 x float> @hsub_ps_test2(<4 x float> %A, <4 x float> %B) {
; AVX: # %bb.0:
; AVX-NEXT: vhsubps %xmm1, %xmm0, %xmm0
; AVX-NEXT: retq
+; CHECK-LABEL: define <4 x float> @hsub_ps_test2(
+; CHECK-SAME: <4 x float> [[A:%.*]], <4 x float> [[B:%.*]]) local_unnamed_addr #[[ATTR0]] {
+; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x float> [[A]], <4 x float> [[B]], <4 x i32> <i32 0, i32 2, i32 4, i32 6>
+; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x float> [[A]], <4 x float> [[B]], <4 x i32> <i32 1, i32 3, i32 5, i32 7>
+; CHECK-NEXT: [[TMP3:%.*]] = fsub <4 x float> [[TMP1]], [[TMP2]]
+; CHECK-NEXT: ret <4 x float> [[TMP3]]
+;
%vecext = extractelement <4 x float> %A, i32 2
%vecext1 = extractelement <4 x float> %A, i32 3
%sub = fsub float %vecext, %vecext1
@@ -159,6 +181,13 @@ define <4 x i32> @phadd_d_test1(<4 x i32> %A, <4 x i32> %B) {
; AVX: # %bb.0:
; AVX-NEXT: vphaddd %xmm1, %xmm0, %xmm0
; AVX-NEXT: retq
+; CHECK-LABEL: define <4 x i32> @phadd_d_test1(
+; CHECK-SAME: <4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]]) local_unnamed_addr #[[ATTR0]] {
+; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[A]], <4 x i32> [[B]], <4 x i32> <i32 0, i32 2, i32 4, i32 6>
+; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x i32> [[A]], <4 x i32> [[B]], <4 x i32> <i32 1, i32 3, i32 5, i32 7>
+; CHECK-NEXT: [[TMP3:%.*]] = add <4 x i32> [[TMP1]], [[TMP2]]
+; CHECK-NEXT: ret <4 x i32> [[TMP3]]
+;
%vecext = extractelement <4 x i32> %A, i32 0
%vecext1 = extractelement <4 x i32> %A, i32 1
%add = add i32 %vecext, %vecext1
@@ -217,6 +246,13 @@ define <4 x i32> @phadd_d_test2(<4 x i32> %A, <4 x i32> %B) {
; AVX: # %bb.0:
; AVX-NEXT: vphaddd %xmm1, %xmm0, %xmm0
; AVX-NEXT: retq
+; CHECK-LABEL: define <4 x i32> @phadd_d_test2(
+; CHECK-SAME: <4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]]) local_unnamed_addr #[[ATTR0]] {
+; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[A]], <4 x i32> [[B]], <4 x i32> <i32 0, i32 2, i32 5, i32 7>
+; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x i32> [[A]], <4 x i32> [[B]], <4 x i32> <i32 1, i32 3, i32 4, i32 6>
+; CHECK-NEXT: [[TMP3:%.*]] = add <4 x i32> [[TMP1]], [[TMP2]]
+; CHECK-NEXT: ret <4 x i32> [[TMP3]]
+;
%vecext = extractelement <4 x i32> %A, i32 2
%vecext1 = extractelement <4 x i32> %A, i32 3
%add = add i32 %vecext, %vecext1
@@ -275,6 +311,13 @@ define <4 x i32> @phsub_d_test1(<4 x i32> %A, <4 x i32> %B) {
; AVX: # %bb.0:
; AVX-NEXT: vphsubd %xmm1, %xmm0, %xmm0
; AVX-NEXT: retq
+; CHECK-LABEL: define <4 x i32> @phsub_d_test1(
+; CHECK-SAME: <4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]]) local_unnamed_addr #[[ATTR0]] {
+; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[A]], <4 x i32> [[B]], <4 x i32> <i32 0, i32 2, i32 4, i32 6>
+; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x i32> [[A]], <4 x i32> [[B]], <4 x i32> <i32 1, i32 3, i32 5, i32 7>
+; CHECK-NEXT: [[TMP3:%.*]] = sub <4 x i32> [[TMP1]], [[TMP2]]
+; CHECK-NEXT: ret <4 x i32> [[TMP3]]
+;
%vecext = extractelement <4 x i32> %A, i32 0
%vecext1 = extractelement <4 x i32> %A, i32 1
%sub = sub i32 %vecext, %vecext1
@@ -333,6 +376,13 @@ define <4 x i32> @phsub_d_test2(<4 x i32> %A, <4 x i32> %B) {
; AVX: # %bb.0:
; AVX-NEXT: vphsubd %xmm1, %xmm0, %xmm0
; AVX-NEXT: retq
+; CHECK-LABEL: define <4 x i32> @phsub_d_test2(
+; CHECK-SAME: <4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]]) local_unnamed_addr #[[ATTR0]] {
+; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[A]], <4 x i32> [[B]], <4 x i32> <i32 0, i32 2, i32 4, i32 6>
+; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x i32> [[A]], <4 x i32> [[B]], <4 x i32> <i32 1, i32 3, i32 5, i32 7>
+; CHECK-NEXT: [[TMP3:%.*]] = sub <4 x i32> [[TMP1]], [[TMP2]]
+; CHECK-NEXT: ret <4 x i32> [[TMP3]]
+;
%vecext = extractelement <4 x i32> %A, i32 2
%vecext1 = extractelement <4 x i32> %A, i32 3
%sub = sub i32 %vecext, %vecext1
@@ -362,6 +412,13 @@ define <2 x double> @hadd_pd_test1(<2 x double> %A, <2 x double> %B) {
; AVX: # %bb.0:
; AVX-NEXT: vhaddpd %xmm1, %xmm0, %xmm0
; AVX-NEXT: retq
+; CHECK-LABEL: define <2 x double> @hadd_pd_test1(
+; CHECK-SAME: <2 x double> [[A:%.*]], <2 x double> [[B:%.*]]) local_unnamed_addr #[[ATTR0]] {
+; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <2 x double> [[A]], <2 x double> [[B]], <2 x i32> <i32 0, i32 2>
+; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <2 x double> [[A]], <2 x double> [[B]], <2 x i32> <i32 1, i32 3>
+; CHECK-NEXT: [[VECINIT2:%.*]] = fadd <2 x double> [[TMP1]], [[TMP2]]
+; CHECK-NEXT: ret <2 x double> [[VECINIT2]]
+;
%vecext = extractelement <2 x double> %A, i32 0
%vecext1 = extractelement <2 x double> %A, i32 1
%add = fadd double %vecext, %vecext1
@@ -383,6 +440,13 @@ define <2 x double> @hadd_pd_test2(<2 x double> %A, <2 x double> %B) {
; AVX: # %bb.0:
; AVX-NEXT: vhaddpd %xmm1, %xmm0, %xmm0
; AVX-NEXT: retq
+; CHECK-LABEL: define <2 x double> @hadd_pd_test2(
+; CHECK-SAME: <2 x double> [[A:%.*]], <2 x double> [[B:%.*]]) local_unnamed_addr #[[ATTR0]] {
+; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <2 x double> [[A]], <2 x double> [[B]], <2 x i32> <i32 1, i32 3>
+; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <2 x double> [[A]], <2 x double> [[B]], <2 x i32> <i32 0, i32 2>
+; CHECK-NEXT: [[VECINIT2:%.*]] = fadd <2 x double> [[TMP1]], [[TMP2]]
+; CHECK-NEXT: ret <2 x double> [[VECINIT2]]
+;
%vecext = extractelement <2 x double> %A, i32 1
%vecext1 = extractelement <2 x double> %A, i32 0
%add = fadd double %vecext, %vecext1
@@ -404,6 +468,13 @@ define <2 x double> @hsub_pd_test1(<2 x double> %A, <2 x double> %B) {
; AVX: # %bb.0:
; AVX-NEXT: vhsubpd %xmm1, %xmm0, %xmm0
; AVX-NEXT: retq
+; CHECK-LABEL: define <2 x double> @hsub_pd_test1(
+; CHECK-SAME: <2 x double> [[A:%.*]], <2 x double> [[B:%.*]]) local_unnamed_addr #[[ATTR0]] {
+; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <2 x double> [[A]], <2 x double> [[B]], <2 x i32> <i32 0, i32 2>
+; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <2 x double> [[A]], <2 x double> [[B]], <2 x i32> <i32 1, i32 3>
+; CHECK-NEXT: [[VECINIT2:%.*]] = fsub <2 x double> [[TMP1]], [[TMP2]]
+; CHECK-NEXT: ret <2 x double> [[VECINIT2]]
+;
%vecext = extractelement <2 x double> %A, i32 0
%vecext1 = extractelement <2 x double> %A, i32 1
%sub = fsub double %vecext, %vecext1
@@ -425,6 +496,13 @@ define <2 x double> @hsub_pd_test2(<2 x double> %A, <2 x double> %B) {
; AVX: # %bb.0:
; AVX-NEXT: vhsubpd %xmm1, %xmm0, %xmm0
; AVX-NEXT: retq
+; CHECK-LABEL: define <2 x double> @hsub_pd_test2(
+; CHECK-SAME: <2 x double> [[A:%.*]], <2 x double> [[B:%.*]]) local_unnamed_addr #[[ATTR0]] {
+; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <2 x double> [[A]], <2 x double> [[B]], <2 x i32> <i32 0, i32 2>
+; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <2 x double> [[A]], <2 x double> [[B]], <2 x i32> <i32 1, i32 3>
+; CHECK-NEXT: [[VECINIT2:%.*]] = fsub <2 x double> [[TMP1]], [[TMP2]]
+; CHECK-NEXT: ret <2 x double> [[VECINIT2]]
+;
%vecext = extractelement <2 x double> %B, i32 0
%vecext1 = extractelement <2 x double> %B, i32 1
%sub = fsub double %vecext, %vecext1
@@ -456,6 +534,13 @@ define <4 x double> @avx_vhadd_pd_test(<4 x double> %A, <4 x double> %B) {
; AVX2-NEXT: vhaddpd %ymm1, %ymm0, %ymm0
; AVX2-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,2,1,3]
; AVX2-NEXT: retq
+; CHECK-LABEL: define <4 x double> @avx_vhadd_pd_test(
+; CHECK-SAME: <4 x double> [[A:%.*]], <4 x double> [[B:%.*]]) local_unnamed_addr #[[ATTR0]] {
+; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x double> [[A]], <4 x double> [[B]], <4 x i32> <i32 0, i32 2, i32 4, i32 6>
+; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x double> [[A]], <4 x double> [[B]], <4 x i32> <i32 1, i32 3, i32 5, i32 7>
+; CHECK-NEXT: [[TMP3:%.*]] = fadd <4 x double> [[TMP1]], [[TMP2]]
+; CHECK-NEXT: ret <4 x double> [[TMP3]]
+;
%vecext = extractelement <4 x double> %A, i32 0
%vecext1 = extractelement <4 x double> %A, i32 1
%add = fadd double %vecext, %vecext1
@@ -495,6 +580,13 @@ define <4 x double> @avx_vhsub_pd_test(<4 x double> %A, <4 x double> %B) {
; AVX2-NEXT: vhsubpd %ymm1, %ymm0, %ymm0
; AVX2-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,2,1,3]
; AVX2-NEXT: retq
+; CHECK-LABEL: define <4 x double> @avx_vhsub_pd_test(
+; CHECK-SAME: <4 x double> [[A:%.*]], <4 x double> [[B:%.*]]) local_unnamed_addr #[[ATTR0]] {
+; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x double> [[A]], <4 x double> [[B]], <4 x i32> <i32 0, i32 2, i32 4, i32 6>
+; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x double> [[A]], <4 x double> [[B]], <4 x i32> <i32 1, i32 3, i32 5, i32 7>
+; CHECK-NEXT: [[TMP3:%.*]] = fsub <4 x double> [[TMP1]], [[TMP2]]
+; CHECK-NEXT: ret <4 x double> [[TMP3]]
+;
%vecext = extractelement <4 x double> %A, i32 0
%vecext1 = extractelement <4 x double> %A, i32 1
%sub = fsub double %vecext, %vecext1
@@ -590,6 +682,13 @@ define <8 x i32> @avx2_vphadd_d_test(<8 x i32> %A, <8 x i32> %B) {
; AVX2-NEXT: vphaddd %ymm1, %ymm0, %ymm0
; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3]
; AVX2-NEXT: retq
+; CHECK-LABEL: define <8 x i32> @avx2_vphadd_d_test(
+; CHECK-SAME: <8 x i32> [[A:%.*]], <8 x i32> [[B:%.*]]) local_unnamed_addr #[[ATTR0]] {
+; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x i32> [[A]], <8 x i32> [[B]], <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
+; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <8 x i32> [[A]], <8 x i32> [[B]], <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
+; CHECK-NEXT: [[TMP3:%.*]] = add <8 x i32> [[TMP1]], [[TMP2]]
+; CHECK-NEXT: ret <8 x i32> [[TMP3]]
+;
%vecext = extractelement <8 x i32> %A, i32 0
%vecext1 = extractelement <8 x i32> %A, i32 1
...
[truncated]
|
@llvm/pr-subscribers-llvm-transforms Author: 黃國庭 (houngkoungting) ChangesFIX #143000 @RKSimon Patch is 205.94 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/148477.diff 6 Files Affected:
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index f8f29b9f2cdc7..677ecf8801e2d 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -8569,122 +8569,6 @@ static SDValue getHopForBuildVector(const BuildVectorSDNode *BV,
return DAG.getNode(HOpcode, DL, VT, V0, V1);
}
-/// Lower BUILD_VECTOR to a horizontal add/sub operation if possible.
-static SDValue LowerToHorizontalOp(const BuildVectorSDNode *BV, const SDLoc &DL,
- const X86Subtarget &Subtarget,
- SelectionDAG &DAG) {
- // We need at least 2 non-undef elements to make this worthwhile by default.
- unsigned NumNonUndefs =
- count_if(BV->op_values(), [](SDValue V) { return !V.isUndef(); });
- if (NumNonUndefs < 2)
- return SDValue();
-
- // There are 4 sets of horizontal math operations distinguished by type:
- // int/FP at 128-bit/256-bit. Each type was introduced with a different
- // subtarget feature. Try to match those "native" patterns first.
- MVT VT = BV->getSimpleValueType(0);
- if (((VT == MVT::v4f32 || VT == MVT::v2f64) && Subtarget.hasSSE3()) ||
- ((VT == MVT::v8i16 || VT == MVT::v4i32) && Subtarget.hasSSSE3()) ||
- ((VT == MVT::v8f32 || VT == MVT::v4f64) && Subtarget.hasAVX()) ||
- ((VT == MVT::v16i16 || VT == MVT::v8i32) && Subtarget.hasAVX2())) {
- unsigned HOpcode;
- SDValue V0, V1;
- if (isHopBuildVector(BV, DAG, HOpcode, V0, V1))
- return getHopForBuildVector(BV, DL, DAG, HOpcode, V0, V1);
- }
-
- // Try harder to match 256-bit ops by using extract/concat.
- if (!Subtarget.hasAVX() || !VT.is256BitVector())
- return SDValue();
-
- // Count the number of UNDEF operands in the build_vector in input.
- unsigned NumElts = VT.getVectorNumElements();
- unsigned Half = NumElts / 2;
- unsigned NumUndefsLO = 0;
- unsigned NumUndefsHI = 0;
- for (unsigned i = 0, e = Half; i != e; ++i)
- if (BV->getOperand(i)->isUndef())
- NumUndefsLO++;
-
- for (unsigned i = Half, e = NumElts; i != e; ++i)
- if (BV->getOperand(i)->isUndef())
- NumUndefsHI++;
-
- SDValue InVec0, InVec1;
- if (VT == MVT::v8i32 || VT == MVT::v16i16) {
- SDValue InVec2, InVec3;
- unsigned X86Opcode;
- bool CanFold = true;
-
- if (isHorizontalBinOpPart(BV, ISD::ADD, DL, DAG, 0, Half, InVec0, InVec1) &&
- isHorizontalBinOpPart(BV, ISD::ADD, DL, DAG, Half, NumElts, InVec2,
- InVec3) &&
- ((InVec0.isUndef() || InVec2.isUndef()) || InVec0 == InVec2) &&
- ((InVec1.isUndef() || InVec3.isUndef()) || InVec1 == InVec3))
- X86Opcode = X86ISD::HADD;
- else if (isHorizontalBinOpPart(BV, ISD::SUB, DL, DAG, 0, Half, InVec0,
- InVec1) &&
- isHorizontalBinOpPart(BV, ISD::SUB, DL, DAG, Half, NumElts, InVec2,
- InVec3) &&
- ((InVec0.isUndef() || InVec2.isUndef()) || InVec0 == InVec2) &&
- ((InVec1.isUndef() || InVec3.isUndef()) || InVec1 == InVec3))
- X86Opcode = X86ISD::HSUB;
- else
- CanFold = false;
-
- if (CanFold) {
- // Do not try to expand this build_vector into a pair of horizontal
- // add/sub if we can emit a pair of scalar add/sub.
- if (NumUndefsLO + 1 == Half || NumUndefsHI + 1 == Half)
- return SDValue();
-
- // Convert this build_vector into a pair of horizontal binops followed by
- // a concat vector. We must adjust the outputs from the partial horizontal
- // matching calls above to account for undefined vector halves.
- SDValue V0 = InVec0.isUndef() ? InVec2 : InVec0;
- SDValue V1 = InVec1.isUndef() ? InVec3 : InVec1;
- assert((!V0.isUndef() || !V1.isUndef()) && "Horizontal-op of undefs?");
- bool isUndefLO = NumUndefsLO == Half;
- bool isUndefHI = NumUndefsHI == Half;
- return ExpandHorizontalBinOp(V0, V1, DL, DAG, X86Opcode, false, isUndefLO,
- isUndefHI);
- }
- }
-
- if (VT == MVT::v8f32 || VT == MVT::v4f64 || VT == MVT::v8i32 ||
- VT == MVT::v16i16) {
- unsigned X86Opcode;
- if (isHorizontalBinOpPart(BV, ISD::ADD, DL, DAG, 0, NumElts, InVec0,
- InVec1))
- X86Opcode = X86ISD::HADD;
- else if (isHorizontalBinOpPart(BV, ISD::SUB, DL, DAG, 0, NumElts, InVec0,
- InVec1))
- X86Opcode = X86ISD::HSUB;
- else if (isHorizontalBinOpPart(BV, ISD::FADD, DL, DAG, 0, NumElts, InVec0,
- InVec1))
- X86Opcode = X86ISD::FHADD;
- else if (isHorizontalBinOpPart(BV, ISD::FSUB, DL, DAG, 0, NumElts, InVec0,
- InVec1))
- X86Opcode = X86ISD::FHSUB;
- else
- return SDValue();
-
- // Don't try to expand this build_vector into a pair of horizontal add/sub
- // if we can simply emit a pair of scalar add/sub.
- if (NumUndefsLO + 1 == Half || NumUndefsHI + 1 == Half)
- return SDValue();
-
- // Convert this build_vector into two horizontal add/sub followed by
- // a concat vector.
- bool isUndefLO = NumUndefsLO == Half;
- bool isUndefHI = NumUndefsHI == Half;
- return ExpandHorizontalBinOp(InVec0, InVec1, DL, DAG, X86Opcode, true,
- isUndefLO, isUndefHI);
- }
-
- return SDValue();
-}
-
static SDValue LowerShift(SDValue Op, const X86Subtarget &Subtarget,
SelectionDAG &DAG);
@@ -9270,8 +9154,6 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const {
if (SDValue AddSub = lowerToAddSubOrFMAddSub(BV, dl, Subtarget, DAG))
return AddSub;
- if (SDValue HorizontalOp = LowerToHorizontalOp(BV, dl, Subtarget, DAG))
- return HorizontalOp;
if (SDValue Broadcast = lowerBuildVectorAsBroadcast(BV, dl, Subtarget, DAG))
return Broadcast;
if (SDValue BitOp = lowerBuildVectorToBitOp(BV, dl, Subtarget, DAG))
diff --git a/llvm/test/CodeGen/X86/haddsub-2.ll b/llvm/test/Transforms/PhaseOrdering/X86/haddsub-2.ll
similarity index 81%
rename from llvm/test/CodeGen/X86/haddsub-2.ll
rename to llvm/test/Transforms/PhaseOrdering/X86/haddsub-2.ll
index bca446fa8fb56..4eb5bdba9edb6 100644
--- a/llvm/test/CodeGen/X86/haddsub-2.ll
+++ b/llvm/test/Transforms/PhaseOrdering/X86/haddsub-2.ll
@@ -1,38 +1,39 @@
-; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+sse2,+sse3 | FileCheck %s --check-prefix=SSE --check-prefix=SSE3
-; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+sse2,+sse3,+ssse3 | FileCheck %s --check-prefix=SSE --check-prefix=SSSE3
-; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx | FileCheck %s --check-prefix=AVX --check-prefix=AVX1
-; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx2 | FileCheck %s --check-prefix=AVX --check-prefix=AVX2
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; RUN: opt -passes="default<O3>" -S %s | FileCheck %s
define <4 x float> @hadd_ps_test1(<4 x float> %A, <4 x float> %B) {
-; SSE-LABEL: hadd_ps_test1:
-; SSE: # %bb.0:
-; SSE-NEXT: haddps %xmm1, %xmm0
-; SSE-NEXT: retq
+; CHECK-LABEL: define <4 x float> @hadd_ps_test1(
+; CHECK-SAME: <4 x float> [[A:%.*]], <4 x float> [[B:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] {
+; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x float> [[A]], <4 x float> [[B]], <4 x i32> <i32 0, i32 2, i32 4, i32 6>
+; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x float> [[A]], <4 x float> [[B]], <4 x i32> <i32 1, i32 3, i32 5, i32 7>
+; CHECK-NEXT: [[TMP3:%.*]] = fadd <4 x float> [[TMP1]], [[TMP2]]
+; CHECK-NEXT: ret <4 x float> [[TMP3]]
;
-; AVX-LABEL: hadd_ps_test1:
-; AVX: # %bb.0:
-; AVX-NEXT: vhaddps %xmm1, %xmm0, %xmm0
-; AVX-NEXT: retq
+
%vecext = extractelement <4 x float> %A, i32 0
%vecext1 = extractelement <4 x float> %A, i32 1
%add = fadd float %vecext, %vecext1
%vecinit = insertelement <4 x float> undef, float %add, i32 0
+
%vecext2 = extractelement <4 x float> %A, i32 2
%vecext3 = extractelement <4 x float> %A, i32 3
%add4 = fadd float %vecext2, %vecext3
%vecinit5 = insertelement <4 x float> %vecinit, float %add4, i32 1
+
%vecext6 = extractelement <4 x float> %B, i32 0
%vecext7 = extractelement <4 x float> %B, i32 1
%add8 = fadd float %vecext6, %vecext7
%vecinit9 = insertelement <4 x float> %vecinit5, float %add8, i32 2
+
%vecext10 = extractelement <4 x float> %B, i32 2
%vecext11 = extractelement <4 x float> %B, i32 3
%add12 = fadd float %vecext10, %vecext11
%vecinit13 = insertelement <4 x float> %vecinit9, float %add12, i32 3
+
ret <4 x float> %vecinit13
}
+
define <4 x float> @hadd_ps_test2(<4 x float> %A, <4 x float> %B) {
; SSE-LABEL: hadd_ps_test2:
; SSE: # %bb.0:
@@ -43,6 +44,13 @@ define <4 x float> @hadd_ps_test2(<4 x float> %A, <4 x float> %B) {
; AVX: # %bb.0:
; AVX-NEXT: vhaddps %xmm1, %xmm0, %xmm0
; AVX-NEXT: retq
+; CHECK-LABEL: define <4 x float> @hadd_ps_test2(
+; CHECK-SAME: <4 x float> [[A:%.*]], <4 x float> [[B:%.*]]) local_unnamed_addr #[[ATTR0]] {
+; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x float> [[A]], <4 x float> [[B]], <4 x i32> <i32 0, i32 2, i32 4, i32 6>
+; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x float> [[A]], <4 x float> [[B]], <4 x i32> <i32 1, i32 3, i32 5, i32 7>
+; CHECK-NEXT: [[TMP3:%.*]] = fadd <4 x float> [[TMP1]], [[TMP2]]
+; CHECK-NEXT: ret <4 x float> [[TMP3]]
+;
%vecext = extractelement <4 x float> %A, i32 2
%vecext1 = extractelement <4 x float> %A, i32 3
%add = fadd float %vecext, %vecext1
@@ -72,6 +80,13 @@ define <4 x float> @hsub_ps_test1(<4 x float> %A, <4 x float> %B) {
; AVX: # %bb.0:
; AVX-NEXT: vhsubps %xmm1, %xmm0, %xmm0
; AVX-NEXT: retq
+; CHECK-LABEL: define <4 x float> @hsub_ps_test1(
+; CHECK-SAME: <4 x float> [[A:%.*]], <4 x float> [[B:%.*]]) local_unnamed_addr #[[ATTR0]] {
+; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x float> [[A]], <4 x float> [[B]], <4 x i32> <i32 0, i32 2, i32 4, i32 6>
+; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x float> [[A]], <4 x float> [[B]], <4 x i32> <i32 1, i32 3, i32 5, i32 7>
+; CHECK-NEXT: [[TMP3:%.*]] = fsub <4 x float> [[TMP1]], [[TMP2]]
+; CHECK-NEXT: ret <4 x float> [[TMP3]]
+;
%vecext = extractelement <4 x float> %A, i32 0
%vecext1 = extractelement <4 x float> %A, i32 1
%sub = fsub float %vecext, %vecext1
@@ -101,6 +116,13 @@ define <4 x float> @hsub_ps_test2(<4 x float> %A, <4 x float> %B) {
; AVX: # %bb.0:
; AVX-NEXT: vhsubps %xmm1, %xmm0, %xmm0
; AVX-NEXT: retq
+; CHECK-LABEL: define <4 x float> @hsub_ps_test2(
+; CHECK-SAME: <4 x float> [[A:%.*]], <4 x float> [[B:%.*]]) local_unnamed_addr #[[ATTR0]] {
+; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x float> [[A]], <4 x float> [[B]], <4 x i32> <i32 0, i32 2, i32 4, i32 6>
+; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x float> [[A]], <4 x float> [[B]], <4 x i32> <i32 1, i32 3, i32 5, i32 7>
+; CHECK-NEXT: [[TMP3:%.*]] = fsub <4 x float> [[TMP1]], [[TMP2]]
+; CHECK-NEXT: ret <4 x float> [[TMP3]]
+;
%vecext = extractelement <4 x float> %A, i32 2
%vecext1 = extractelement <4 x float> %A, i32 3
%sub = fsub float %vecext, %vecext1
@@ -159,6 +181,13 @@ define <4 x i32> @phadd_d_test1(<4 x i32> %A, <4 x i32> %B) {
; AVX: # %bb.0:
; AVX-NEXT: vphaddd %xmm1, %xmm0, %xmm0
; AVX-NEXT: retq
+; CHECK-LABEL: define <4 x i32> @phadd_d_test1(
+; CHECK-SAME: <4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]]) local_unnamed_addr #[[ATTR0]] {
+; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[A]], <4 x i32> [[B]], <4 x i32> <i32 0, i32 2, i32 4, i32 6>
+; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x i32> [[A]], <4 x i32> [[B]], <4 x i32> <i32 1, i32 3, i32 5, i32 7>
+; CHECK-NEXT: [[TMP3:%.*]] = add <4 x i32> [[TMP1]], [[TMP2]]
+; CHECK-NEXT: ret <4 x i32> [[TMP3]]
+;
%vecext = extractelement <4 x i32> %A, i32 0
%vecext1 = extractelement <4 x i32> %A, i32 1
%add = add i32 %vecext, %vecext1
@@ -217,6 +246,13 @@ define <4 x i32> @phadd_d_test2(<4 x i32> %A, <4 x i32> %B) {
; AVX: # %bb.0:
; AVX-NEXT: vphaddd %xmm1, %xmm0, %xmm0
; AVX-NEXT: retq
+; CHECK-LABEL: define <4 x i32> @phadd_d_test2(
+; CHECK-SAME: <4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]]) local_unnamed_addr #[[ATTR0]] {
+; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[A]], <4 x i32> [[B]], <4 x i32> <i32 0, i32 2, i32 5, i32 7>
+; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x i32> [[A]], <4 x i32> [[B]], <4 x i32> <i32 1, i32 3, i32 4, i32 6>
+; CHECK-NEXT: [[TMP3:%.*]] = add <4 x i32> [[TMP1]], [[TMP2]]
+; CHECK-NEXT: ret <4 x i32> [[TMP3]]
+;
%vecext = extractelement <4 x i32> %A, i32 2
%vecext1 = extractelement <4 x i32> %A, i32 3
%add = add i32 %vecext, %vecext1
@@ -275,6 +311,13 @@ define <4 x i32> @phsub_d_test1(<4 x i32> %A, <4 x i32> %B) {
; AVX: # %bb.0:
; AVX-NEXT: vphsubd %xmm1, %xmm0, %xmm0
; AVX-NEXT: retq
+; CHECK-LABEL: define <4 x i32> @phsub_d_test1(
+; CHECK-SAME: <4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]]) local_unnamed_addr #[[ATTR0]] {
+; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[A]], <4 x i32> [[B]], <4 x i32> <i32 0, i32 2, i32 4, i32 6>
+; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x i32> [[A]], <4 x i32> [[B]], <4 x i32> <i32 1, i32 3, i32 5, i32 7>
+; CHECK-NEXT: [[TMP3:%.*]] = sub <4 x i32> [[TMP1]], [[TMP2]]
+; CHECK-NEXT: ret <4 x i32> [[TMP3]]
+;
%vecext = extractelement <4 x i32> %A, i32 0
%vecext1 = extractelement <4 x i32> %A, i32 1
%sub = sub i32 %vecext, %vecext1
@@ -333,6 +376,13 @@ define <4 x i32> @phsub_d_test2(<4 x i32> %A, <4 x i32> %B) {
; AVX: # %bb.0:
; AVX-NEXT: vphsubd %xmm1, %xmm0, %xmm0
; AVX-NEXT: retq
+; CHECK-LABEL: define <4 x i32> @phsub_d_test2(
+; CHECK-SAME: <4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]]) local_unnamed_addr #[[ATTR0]] {
+; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[A]], <4 x i32> [[B]], <4 x i32> <i32 0, i32 2, i32 4, i32 6>
+; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x i32> [[A]], <4 x i32> [[B]], <4 x i32> <i32 1, i32 3, i32 5, i32 7>
+; CHECK-NEXT: [[TMP3:%.*]] = sub <4 x i32> [[TMP1]], [[TMP2]]
+; CHECK-NEXT: ret <4 x i32> [[TMP3]]
+;
%vecext = extractelement <4 x i32> %A, i32 2
%vecext1 = extractelement <4 x i32> %A, i32 3
%sub = sub i32 %vecext, %vecext1
@@ -362,6 +412,13 @@ define <2 x double> @hadd_pd_test1(<2 x double> %A, <2 x double> %B) {
; AVX: # %bb.0:
; AVX-NEXT: vhaddpd %xmm1, %xmm0, %xmm0
; AVX-NEXT: retq
+; CHECK-LABEL: define <2 x double> @hadd_pd_test1(
+; CHECK-SAME: <2 x double> [[A:%.*]], <2 x double> [[B:%.*]]) local_unnamed_addr #[[ATTR0]] {
+; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <2 x double> [[A]], <2 x double> [[B]], <2 x i32> <i32 0, i32 2>
+; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <2 x double> [[A]], <2 x double> [[B]], <2 x i32> <i32 1, i32 3>
+; CHECK-NEXT: [[VECINIT2:%.*]] = fadd <2 x double> [[TMP1]], [[TMP2]]
+; CHECK-NEXT: ret <2 x double> [[VECINIT2]]
+;
%vecext = extractelement <2 x double> %A, i32 0
%vecext1 = extractelement <2 x double> %A, i32 1
%add = fadd double %vecext, %vecext1
@@ -383,6 +440,13 @@ define <2 x double> @hadd_pd_test2(<2 x double> %A, <2 x double> %B) {
; AVX: # %bb.0:
; AVX-NEXT: vhaddpd %xmm1, %xmm0, %xmm0
; AVX-NEXT: retq
+; CHECK-LABEL: define <2 x double> @hadd_pd_test2(
+; CHECK-SAME: <2 x double> [[A:%.*]], <2 x double> [[B:%.*]]) local_unnamed_addr #[[ATTR0]] {
+; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <2 x double> [[A]], <2 x double> [[B]], <2 x i32> <i32 1, i32 3>
+; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <2 x double> [[A]], <2 x double> [[B]], <2 x i32> <i32 0, i32 2>
+; CHECK-NEXT: [[VECINIT2:%.*]] = fadd <2 x double> [[TMP1]], [[TMP2]]
+; CHECK-NEXT: ret <2 x double> [[VECINIT2]]
+;
%vecext = extractelement <2 x double> %A, i32 1
%vecext1 = extractelement <2 x double> %A, i32 0
%add = fadd double %vecext, %vecext1
@@ -404,6 +468,13 @@ define <2 x double> @hsub_pd_test1(<2 x double> %A, <2 x double> %B) {
; AVX: # %bb.0:
; AVX-NEXT: vhsubpd %xmm1, %xmm0, %xmm0
; AVX-NEXT: retq
+; CHECK-LABEL: define <2 x double> @hsub_pd_test1(
+; CHECK-SAME: <2 x double> [[A:%.*]], <2 x double> [[B:%.*]]) local_unnamed_addr #[[ATTR0]] {
+; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <2 x double> [[A]], <2 x double> [[B]], <2 x i32> <i32 0, i32 2>
+; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <2 x double> [[A]], <2 x double> [[B]], <2 x i32> <i32 1, i32 3>
+; CHECK-NEXT: [[VECINIT2:%.*]] = fsub <2 x double> [[TMP1]], [[TMP2]]
+; CHECK-NEXT: ret <2 x double> [[VECINIT2]]
+;
%vecext = extractelement <2 x double> %A, i32 0
%vecext1 = extractelement <2 x double> %A, i32 1
%sub = fsub double %vecext, %vecext1
@@ -425,6 +496,13 @@ define <2 x double> @hsub_pd_test2(<2 x double> %A, <2 x double> %B) {
; AVX: # %bb.0:
; AVX-NEXT: vhsubpd %xmm1, %xmm0, %xmm0
; AVX-NEXT: retq
+; CHECK-LABEL: define <2 x double> @hsub_pd_test2(
+; CHECK-SAME: <2 x double> [[A:%.*]], <2 x double> [[B:%.*]]) local_unnamed_addr #[[ATTR0]] {
+; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <2 x double> [[A]], <2 x double> [[B]], <2 x i32> <i32 0, i32 2>
+; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <2 x double> [[A]], <2 x double> [[B]], <2 x i32> <i32 1, i32 3>
+; CHECK-NEXT: [[VECINIT2:%.*]] = fsub <2 x double> [[TMP1]], [[TMP2]]
+; CHECK-NEXT: ret <2 x double> [[VECINIT2]]
+;
%vecext = extractelement <2 x double> %B, i32 0
%vecext1 = extractelement <2 x double> %B, i32 1
%sub = fsub double %vecext, %vecext1
@@ -456,6 +534,13 @@ define <4 x double> @avx_vhadd_pd_test(<4 x double> %A, <4 x double> %B) {
; AVX2-NEXT: vhaddpd %ymm1, %ymm0, %ymm0
; AVX2-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,2,1,3]
; AVX2-NEXT: retq
+; CHECK-LABEL: define <4 x double> @avx_vhadd_pd_test(
+; CHECK-SAME: <4 x double> [[A:%.*]], <4 x double> [[B:%.*]]) local_unnamed_addr #[[ATTR0]] {
+; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x double> [[A]], <4 x double> [[B]], <4 x i32> <i32 0, i32 2, i32 4, i32 6>
+; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x double> [[A]], <4 x double> [[B]], <4 x i32> <i32 1, i32 3, i32 5, i32 7>
+; CHECK-NEXT: [[TMP3:%.*]] = fadd <4 x double> [[TMP1]], [[TMP2]]
+; CHECK-NEXT: ret <4 x double> [[TMP3]]
+;
%vecext = extractelement <4 x double> %A, i32 0
%vecext1 = extractelement <4 x double> %A, i32 1
%add = fadd double %vecext, %vecext1
@@ -495,6 +580,13 @@ define <4 x double> @avx_vhsub_pd_test(<4 x double> %A, <4 x double> %B) {
; AVX2-NEXT: vhsubpd %ymm1, %ymm0, %ymm0
; AVX2-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,2,1,3]
; AVX2-NEXT: retq
+; CHECK-LABEL: define <4 x double> @avx_vhsub_pd_test(
+; CHECK-SAME: <4 x double> [[A:%.*]], <4 x double> [[B:%.*]]) local_unnamed_addr #[[ATTR0]] {
+; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x double> [[A]], <4 x double> [[B]], <4 x i32> <i32 0, i32 2, i32 4, i32 6>
+; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x double> [[A]], <4 x double> [[B]], <4 x i32> <i32 1, i32 3, i32 5, i32 7>
+; CHECK-NEXT: [[TMP3:%.*]] = fsub <4 x double> [[TMP1]], [[TMP2]]
+; CHECK-NEXT: ret <4 x double> [[TMP3]]
+;
%vecext = extractelement <4 x double> %A, i32 0
%vecext1 = extractelement <4 x double> %A, i32 1
%sub = fsub double %vecext, %vecext1
@@ -590,6 +682,13 @@ define <8 x i32> @avx2_vphadd_d_test(<8 x i32> %A, <8 x i32> %B) {
; AVX2-NEXT: vphaddd %ymm1, %ymm0, %ymm0
; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3]
; AVX2-NEXT: retq
+; CHECK-LABEL: define <8 x i32> @avx2_vphadd_d_test(
+; CHECK-SAME: <8 x i32> [[A:%.*]], <8 x i32> [[B:%.*]]) local_unnamed_addr #[[ATTR0]] {
+; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x i32> [[A]], <8 x i32> [[B]], <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
+; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <8 x i32> [[A]], <8 x i32> [[B]], <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
+; CHECK-NEXT: [[TMP3:%.*]] = add <8 x i32> [[TMP1]], [[TMP2]]
+; CHECK-NEXT: ret <8 x i32> [[TMP3]]
+;
%vecext = extractelement <8 x i32> %A, i32 0
%vecext1 = extractelement <8 x i32> %A, i32 1
...
[truncated]
|
You can test this locally with the following command:git diff -U0 --pickaxe-regex -S '([^a-zA-Z0-9#_-]undef[^a-zA-Z0-9_-]|UndefValue::get)' 'HEAD~1' HEAD llvm/lib/Target/X86/X86ISelLowering.cpp llvm/test/Transforms/PhaseOrdering/X86/haddsub-2.ll llvm/test/Transforms/PhaseOrdering/X86/haddsub-shuf.ll llvm/test/Transforms/PhaseOrdering/X86/haddsub-undef.ll llvm/test/Transforms/PhaseOrdering/X86/haddsub.ll llvm/test/Transforms/PhaseOrdering/X86/phaddsub-undef.ll The following files introduce new uses of undef:
Undef is now deprecated and should only be used in the rare cases where no replacement is possible. For example, a load of uninitialized memory yields In tests, avoid using For example, this is considered a bad practice: define void @fn() {
...
br i1 undef, ...
} Please use the following instead: define void @fn(i1 %cond) {
...
br i1 %cond, ...
} Please refer to the Undefined Behavior Manual for more information. |
FIX #143000
Remove LowerToHorizontalOp and adjust test case ; all tests pass after the change.
@RKSimon