diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index f8f29b9f2cdc7..677ecf8801e2d 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -8569,122 +8569,6 @@ static SDValue getHopForBuildVector(const BuildVectorSDNode *BV, return DAG.getNode(HOpcode, DL, VT, V0, V1); } -/// Lower BUILD_VECTOR to a horizontal add/sub operation if possible. -static SDValue LowerToHorizontalOp(const BuildVectorSDNode *BV, const SDLoc &DL, - const X86Subtarget &Subtarget, - SelectionDAG &DAG) { - // We need at least 2 non-undef elements to make this worthwhile by default. - unsigned NumNonUndefs = - count_if(BV->op_values(), [](SDValue V) { return !V.isUndef(); }); - if (NumNonUndefs < 2) - return SDValue(); - - // There are 4 sets of horizontal math operations distinguished by type: - // int/FP at 128-bit/256-bit. Each type was introduced with a different - // subtarget feature. Try to match those "native" patterns first. - MVT VT = BV->getSimpleValueType(0); - if (((VT == MVT::v4f32 || VT == MVT::v2f64) && Subtarget.hasSSE3()) || - ((VT == MVT::v8i16 || VT == MVT::v4i32) && Subtarget.hasSSSE3()) || - ((VT == MVT::v8f32 || VT == MVT::v4f64) && Subtarget.hasAVX()) || - ((VT == MVT::v16i16 || VT == MVT::v8i32) && Subtarget.hasAVX2())) { - unsigned HOpcode; - SDValue V0, V1; - if (isHopBuildVector(BV, DAG, HOpcode, V0, V1)) - return getHopForBuildVector(BV, DL, DAG, HOpcode, V0, V1); - } - - // Try harder to match 256-bit ops by using extract/concat. - if (!Subtarget.hasAVX() || !VT.is256BitVector()) - return SDValue(); - - // Count the number of UNDEF operands in the build_vector in input. - unsigned NumElts = VT.getVectorNumElements(); - unsigned Half = NumElts / 2; - unsigned NumUndefsLO = 0; - unsigned NumUndefsHI = 0; - for (unsigned i = 0, e = Half; i != e; ++i) - if (BV->getOperand(i)->isUndef()) - NumUndefsLO++; - - for (unsigned i = Half, e = NumElts; i != e; ++i) - if (BV->getOperand(i)->isUndef()) - NumUndefsHI++; - - SDValue InVec0, InVec1; - if (VT == MVT::v8i32 || VT == MVT::v16i16) { - SDValue InVec2, InVec3; - unsigned X86Opcode; - bool CanFold = true; - - if (isHorizontalBinOpPart(BV, ISD::ADD, DL, DAG, 0, Half, InVec0, InVec1) && - isHorizontalBinOpPart(BV, ISD::ADD, DL, DAG, Half, NumElts, InVec2, - InVec3) && - ((InVec0.isUndef() || InVec2.isUndef()) || InVec0 == InVec2) && - ((InVec1.isUndef() || InVec3.isUndef()) || InVec1 == InVec3)) - X86Opcode = X86ISD::HADD; - else if (isHorizontalBinOpPart(BV, ISD::SUB, DL, DAG, 0, Half, InVec0, - InVec1) && - isHorizontalBinOpPart(BV, ISD::SUB, DL, DAG, Half, NumElts, InVec2, - InVec3) && - ((InVec0.isUndef() || InVec2.isUndef()) || InVec0 == InVec2) && - ((InVec1.isUndef() || InVec3.isUndef()) || InVec1 == InVec3)) - X86Opcode = X86ISD::HSUB; - else - CanFold = false; - - if (CanFold) { - // Do not try to expand this build_vector into a pair of horizontal - // add/sub if we can emit a pair of scalar add/sub. - if (NumUndefsLO + 1 == Half || NumUndefsHI + 1 == Half) - return SDValue(); - - // Convert this build_vector into a pair of horizontal binops followed by - // a concat vector. We must adjust the outputs from the partial horizontal - // matching calls above to account for undefined vector halves. - SDValue V0 = InVec0.isUndef() ? InVec2 : InVec0; - SDValue V1 = InVec1.isUndef() ? InVec3 : InVec1; - assert((!V0.isUndef() || !V1.isUndef()) && "Horizontal-op of undefs?"); - bool isUndefLO = NumUndefsLO == Half; - bool isUndefHI = NumUndefsHI == Half; - return ExpandHorizontalBinOp(V0, V1, DL, DAG, X86Opcode, false, isUndefLO, - isUndefHI); - } - } - - if (VT == MVT::v8f32 || VT == MVT::v4f64 || VT == MVT::v8i32 || - VT == MVT::v16i16) { - unsigned X86Opcode; - if (isHorizontalBinOpPart(BV, ISD::ADD, DL, DAG, 0, NumElts, InVec0, - InVec1)) - X86Opcode = X86ISD::HADD; - else if (isHorizontalBinOpPart(BV, ISD::SUB, DL, DAG, 0, NumElts, InVec0, - InVec1)) - X86Opcode = X86ISD::HSUB; - else if (isHorizontalBinOpPart(BV, ISD::FADD, DL, DAG, 0, NumElts, InVec0, - InVec1)) - X86Opcode = X86ISD::FHADD; - else if (isHorizontalBinOpPart(BV, ISD::FSUB, DL, DAG, 0, NumElts, InVec0, - InVec1)) - X86Opcode = X86ISD::FHSUB; - else - return SDValue(); - - // Don't try to expand this build_vector into a pair of horizontal add/sub - // if we can simply emit a pair of scalar add/sub. - if (NumUndefsLO + 1 == Half || NumUndefsHI + 1 == Half) - return SDValue(); - - // Convert this build_vector into two horizontal add/sub followed by - // a concat vector. - bool isUndefLO = NumUndefsLO == Half; - bool isUndefHI = NumUndefsHI == Half; - return ExpandHorizontalBinOp(InVec0, InVec1, DL, DAG, X86Opcode, true, - isUndefLO, isUndefHI); - } - - return SDValue(); -} - static SDValue LowerShift(SDValue Op, const X86Subtarget &Subtarget, SelectionDAG &DAG); @@ -9270,8 +9154,6 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const { if (SDValue AddSub = lowerToAddSubOrFMAddSub(BV, dl, Subtarget, DAG)) return AddSub; - if (SDValue HorizontalOp = LowerToHorizontalOp(BV, dl, Subtarget, DAG)) - return HorizontalOp; if (SDValue Broadcast = lowerBuildVectorAsBroadcast(BV, dl, Subtarget, DAG)) return Broadcast; if (SDValue BitOp = lowerBuildVectorToBitOp(BV, dl, Subtarget, DAG)) diff --git a/llvm/test/CodeGen/X86/haddsub-2.ll b/llvm/test/Transforms/PhaseOrdering/X86/haddsub-2.ll similarity index 81% rename from llvm/test/CodeGen/X86/haddsub-2.ll rename to llvm/test/Transforms/PhaseOrdering/X86/haddsub-2.ll index bca446fa8fb56..4eb5bdba9edb6 100644 --- a/llvm/test/CodeGen/X86/haddsub-2.ll +++ b/llvm/test/Transforms/PhaseOrdering/X86/haddsub-2.ll @@ -1,38 +1,39 @@ -; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+sse2,+sse3 | FileCheck %s --check-prefix=SSE --check-prefix=SSE3 -; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+sse2,+sse3,+ssse3 | FileCheck %s --check-prefix=SSE --check-prefix=SSSE3 -; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx | FileCheck %s --check-prefix=AVX --check-prefix=AVX1 -; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx2 | FileCheck %s --check-prefix=AVX --check-prefix=AVX2 +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 +; RUN: opt -passes="default" -S %s | FileCheck %s define <4 x float> @hadd_ps_test1(<4 x float> %A, <4 x float> %B) { -; SSE-LABEL: hadd_ps_test1: -; SSE: # %bb.0: -; SSE-NEXT: haddps %xmm1, %xmm0 -; SSE-NEXT: retq +; CHECK-LABEL: define <4 x float> @hadd_ps_test1( +; CHECK-SAME: <4 x float> [[A:%.*]], <4 x float> [[B:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] { +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x float> [[A]], <4 x float> [[B]], <4 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x float> [[A]], <4 x float> [[B]], <4 x i32> +; CHECK-NEXT: [[TMP3:%.*]] = fadd <4 x float> [[TMP1]], [[TMP2]] +; CHECK-NEXT: ret <4 x float> [[TMP3]] ; -; AVX-LABEL: hadd_ps_test1: -; AVX: # %bb.0: -; AVX-NEXT: vhaddps %xmm1, %xmm0, %xmm0 -; AVX-NEXT: retq + %vecext = extractelement <4 x float> %A, i32 0 %vecext1 = extractelement <4 x float> %A, i32 1 %add = fadd float %vecext, %vecext1 %vecinit = insertelement <4 x float> undef, float %add, i32 0 + %vecext2 = extractelement <4 x float> %A, i32 2 %vecext3 = extractelement <4 x float> %A, i32 3 %add4 = fadd float %vecext2, %vecext3 %vecinit5 = insertelement <4 x float> %vecinit, float %add4, i32 1 + %vecext6 = extractelement <4 x float> %B, i32 0 %vecext7 = extractelement <4 x float> %B, i32 1 %add8 = fadd float %vecext6, %vecext7 %vecinit9 = insertelement <4 x float> %vecinit5, float %add8, i32 2 + %vecext10 = extractelement <4 x float> %B, i32 2 %vecext11 = extractelement <4 x float> %B, i32 3 %add12 = fadd float %vecext10, %vecext11 %vecinit13 = insertelement <4 x float> %vecinit9, float %add12, i32 3 + ret <4 x float> %vecinit13 } + define <4 x float> @hadd_ps_test2(<4 x float> %A, <4 x float> %B) { ; SSE-LABEL: hadd_ps_test2: ; SSE: # %bb.0: @@ -43,6 +44,13 @@ define <4 x float> @hadd_ps_test2(<4 x float> %A, <4 x float> %B) { ; AVX: # %bb.0: ; AVX-NEXT: vhaddps %xmm1, %xmm0, %xmm0 ; AVX-NEXT: retq +; CHECK-LABEL: define <4 x float> @hadd_ps_test2( +; CHECK-SAME: <4 x float> [[A:%.*]], <4 x float> [[B:%.*]]) local_unnamed_addr #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x float> [[A]], <4 x float> [[B]], <4 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x float> [[A]], <4 x float> [[B]], <4 x i32> +; CHECK-NEXT: [[TMP3:%.*]] = fadd <4 x float> [[TMP1]], [[TMP2]] +; CHECK-NEXT: ret <4 x float> [[TMP3]] +; %vecext = extractelement <4 x float> %A, i32 2 %vecext1 = extractelement <4 x float> %A, i32 3 %add = fadd float %vecext, %vecext1 @@ -72,6 +80,13 @@ define <4 x float> @hsub_ps_test1(<4 x float> %A, <4 x float> %B) { ; AVX: # %bb.0: ; AVX-NEXT: vhsubps %xmm1, %xmm0, %xmm0 ; AVX-NEXT: retq +; CHECK-LABEL: define <4 x float> @hsub_ps_test1( +; CHECK-SAME: <4 x float> [[A:%.*]], <4 x float> [[B:%.*]]) local_unnamed_addr #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x float> [[A]], <4 x float> [[B]], <4 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x float> [[A]], <4 x float> [[B]], <4 x i32> +; CHECK-NEXT: [[TMP3:%.*]] = fsub <4 x float> [[TMP1]], [[TMP2]] +; CHECK-NEXT: ret <4 x float> [[TMP3]] +; %vecext = extractelement <4 x float> %A, i32 0 %vecext1 = extractelement <4 x float> %A, i32 1 %sub = fsub float %vecext, %vecext1 @@ -101,6 +116,13 @@ define <4 x float> @hsub_ps_test2(<4 x float> %A, <4 x float> %B) { ; AVX: # %bb.0: ; AVX-NEXT: vhsubps %xmm1, %xmm0, %xmm0 ; AVX-NEXT: retq +; CHECK-LABEL: define <4 x float> @hsub_ps_test2( +; CHECK-SAME: <4 x float> [[A:%.*]], <4 x float> [[B:%.*]]) local_unnamed_addr #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x float> [[A]], <4 x float> [[B]], <4 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x float> [[A]], <4 x float> [[B]], <4 x i32> +; CHECK-NEXT: [[TMP3:%.*]] = fsub <4 x float> [[TMP1]], [[TMP2]] +; CHECK-NEXT: ret <4 x float> [[TMP3]] +; %vecext = extractelement <4 x float> %A, i32 2 %vecext1 = extractelement <4 x float> %A, i32 3 %sub = fsub float %vecext, %vecext1 @@ -159,6 +181,13 @@ define <4 x i32> @phadd_d_test1(<4 x i32> %A, <4 x i32> %B) { ; AVX: # %bb.0: ; AVX-NEXT: vphaddd %xmm1, %xmm0, %xmm0 ; AVX-NEXT: retq +; CHECK-LABEL: define <4 x i32> @phadd_d_test1( +; CHECK-SAME: <4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]]) local_unnamed_addr #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[A]], <4 x i32> [[B]], <4 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x i32> [[A]], <4 x i32> [[B]], <4 x i32> +; CHECK-NEXT: [[TMP3:%.*]] = add <4 x i32> [[TMP1]], [[TMP2]] +; CHECK-NEXT: ret <4 x i32> [[TMP3]] +; %vecext = extractelement <4 x i32> %A, i32 0 %vecext1 = extractelement <4 x i32> %A, i32 1 %add = add i32 %vecext, %vecext1 @@ -217,6 +246,13 @@ define <4 x i32> @phadd_d_test2(<4 x i32> %A, <4 x i32> %B) { ; AVX: # %bb.0: ; AVX-NEXT: vphaddd %xmm1, %xmm0, %xmm0 ; AVX-NEXT: retq +; CHECK-LABEL: define <4 x i32> @phadd_d_test2( +; CHECK-SAME: <4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]]) local_unnamed_addr #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[A]], <4 x i32> [[B]], <4 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x i32> [[A]], <4 x i32> [[B]], <4 x i32> +; CHECK-NEXT: [[TMP3:%.*]] = add <4 x i32> [[TMP1]], [[TMP2]] +; CHECK-NEXT: ret <4 x i32> [[TMP3]] +; %vecext = extractelement <4 x i32> %A, i32 2 %vecext1 = extractelement <4 x i32> %A, i32 3 %add = add i32 %vecext, %vecext1 @@ -275,6 +311,13 @@ define <4 x i32> @phsub_d_test1(<4 x i32> %A, <4 x i32> %B) { ; AVX: # %bb.0: ; AVX-NEXT: vphsubd %xmm1, %xmm0, %xmm0 ; AVX-NEXT: retq +; CHECK-LABEL: define <4 x i32> @phsub_d_test1( +; CHECK-SAME: <4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]]) local_unnamed_addr #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[A]], <4 x i32> [[B]], <4 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x i32> [[A]], <4 x i32> [[B]], <4 x i32> +; CHECK-NEXT: [[TMP3:%.*]] = sub <4 x i32> [[TMP1]], [[TMP2]] +; CHECK-NEXT: ret <4 x i32> [[TMP3]] +; %vecext = extractelement <4 x i32> %A, i32 0 %vecext1 = extractelement <4 x i32> %A, i32 1 %sub = sub i32 %vecext, %vecext1 @@ -333,6 +376,13 @@ define <4 x i32> @phsub_d_test2(<4 x i32> %A, <4 x i32> %B) { ; AVX: # %bb.0: ; AVX-NEXT: vphsubd %xmm1, %xmm0, %xmm0 ; AVX-NEXT: retq +; CHECK-LABEL: define <4 x i32> @phsub_d_test2( +; CHECK-SAME: <4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]]) local_unnamed_addr #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[A]], <4 x i32> [[B]], <4 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x i32> [[A]], <4 x i32> [[B]], <4 x i32> +; CHECK-NEXT: [[TMP3:%.*]] = sub <4 x i32> [[TMP1]], [[TMP2]] +; CHECK-NEXT: ret <4 x i32> [[TMP3]] +; %vecext = extractelement <4 x i32> %A, i32 2 %vecext1 = extractelement <4 x i32> %A, i32 3 %sub = sub i32 %vecext, %vecext1 @@ -362,6 +412,13 @@ define <2 x double> @hadd_pd_test1(<2 x double> %A, <2 x double> %B) { ; AVX: # %bb.0: ; AVX-NEXT: vhaddpd %xmm1, %xmm0, %xmm0 ; AVX-NEXT: retq +; CHECK-LABEL: define <2 x double> @hadd_pd_test1( +; CHECK-SAME: <2 x double> [[A:%.*]], <2 x double> [[B:%.*]]) local_unnamed_addr #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <2 x double> [[A]], <2 x double> [[B]], <2 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <2 x double> [[A]], <2 x double> [[B]], <2 x i32> +; CHECK-NEXT: [[VECINIT2:%.*]] = fadd <2 x double> [[TMP1]], [[TMP2]] +; CHECK-NEXT: ret <2 x double> [[VECINIT2]] +; %vecext = extractelement <2 x double> %A, i32 0 %vecext1 = extractelement <2 x double> %A, i32 1 %add = fadd double %vecext, %vecext1 @@ -383,6 +440,13 @@ define <2 x double> @hadd_pd_test2(<2 x double> %A, <2 x double> %B) { ; AVX: # %bb.0: ; AVX-NEXT: vhaddpd %xmm1, %xmm0, %xmm0 ; AVX-NEXT: retq +; CHECK-LABEL: define <2 x double> @hadd_pd_test2( +; CHECK-SAME: <2 x double> [[A:%.*]], <2 x double> [[B:%.*]]) local_unnamed_addr #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <2 x double> [[A]], <2 x double> [[B]], <2 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <2 x double> [[A]], <2 x double> [[B]], <2 x i32> +; CHECK-NEXT: [[VECINIT2:%.*]] = fadd <2 x double> [[TMP1]], [[TMP2]] +; CHECK-NEXT: ret <2 x double> [[VECINIT2]] +; %vecext = extractelement <2 x double> %A, i32 1 %vecext1 = extractelement <2 x double> %A, i32 0 %add = fadd double %vecext, %vecext1 @@ -404,6 +468,13 @@ define <2 x double> @hsub_pd_test1(<2 x double> %A, <2 x double> %B) { ; AVX: # %bb.0: ; AVX-NEXT: vhsubpd %xmm1, %xmm0, %xmm0 ; AVX-NEXT: retq +; CHECK-LABEL: define <2 x double> @hsub_pd_test1( +; CHECK-SAME: <2 x double> [[A:%.*]], <2 x double> [[B:%.*]]) local_unnamed_addr #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <2 x double> [[A]], <2 x double> [[B]], <2 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <2 x double> [[A]], <2 x double> [[B]], <2 x i32> +; CHECK-NEXT: [[VECINIT2:%.*]] = fsub <2 x double> [[TMP1]], [[TMP2]] +; CHECK-NEXT: ret <2 x double> [[VECINIT2]] +; %vecext = extractelement <2 x double> %A, i32 0 %vecext1 = extractelement <2 x double> %A, i32 1 %sub = fsub double %vecext, %vecext1 @@ -425,6 +496,13 @@ define <2 x double> @hsub_pd_test2(<2 x double> %A, <2 x double> %B) { ; AVX: # %bb.0: ; AVX-NEXT: vhsubpd %xmm1, %xmm0, %xmm0 ; AVX-NEXT: retq +; CHECK-LABEL: define <2 x double> @hsub_pd_test2( +; CHECK-SAME: <2 x double> [[A:%.*]], <2 x double> [[B:%.*]]) local_unnamed_addr #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <2 x double> [[A]], <2 x double> [[B]], <2 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <2 x double> [[A]], <2 x double> [[B]], <2 x i32> +; CHECK-NEXT: [[VECINIT2:%.*]] = fsub <2 x double> [[TMP1]], [[TMP2]] +; CHECK-NEXT: ret <2 x double> [[VECINIT2]] +; %vecext = extractelement <2 x double> %B, i32 0 %vecext1 = extractelement <2 x double> %B, i32 1 %sub = fsub double %vecext, %vecext1 @@ -456,6 +534,13 @@ define <4 x double> @avx_vhadd_pd_test(<4 x double> %A, <4 x double> %B) { ; AVX2-NEXT: vhaddpd %ymm1, %ymm0, %ymm0 ; AVX2-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,2,1,3] ; AVX2-NEXT: retq +; CHECK-LABEL: define <4 x double> @avx_vhadd_pd_test( +; CHECK-SAME: <4 x double> [[A:%.*]], <4 x double> [[B:%.*]]) local_unnamed_addr #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x double> [[A]], <4 x double> [[B]], <4 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x double> [[A]], <4 x double> [[B]], <4 x i32> +; CHECK-NEXT: [[TMP3:%.*]] = fadd <4 x double> [[TMP1]], [[TMP2]] +; CHECK-NEXT: ret <4 x double> [[TMP3]] +; %vecext = extractelement <4 x double> %A, i32 0 %vecext1 = extractelement <4 x double> %A, i32 1 %add = fadd double %vecext, %vecext1 @@ -495,6 +580,13 @@ define <4 x double> @avx_vhsub_pd_test(<4 x double> %A, <4 x double> %B) { ; AVX2-NEXT: vhsubpd %ymm1, %ymm0, %ymm0 ; AVX2-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,2,1,3] ; AVX2-NEXT: retq +; CHECK-LABEL: define <4 x double> @avx_vhsub_pd_test( +; CHECK-SAME: <4 x double> [[A:%.*]], <4 x double> [[B:%.*]]) local_unnamed_addr #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x double> [[A]], <4 x double> [[B]], <4 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x double> [[A]], <4 x double> [[B]], <4 x i32> +; CHECK-NEXT: [[TMP3:%.*]] = fsub <4 x double> [[TMP1]], [[TMP2]] +; CHECK-NEXT: ret <4 x double> [[TMP3]] +; %vecext = extractelement <4 x double> %A, i32 0 %vecext1 = extractelement <4 x double> %A, i32 1 %sub = fsub double %vecext, %vecext1 @@ -590,6 +682,13 @@ define <8 x i32> @avx2_vphadd_d_test(<8 x i32> %A, <8 x i32> %B) { ; AVX2-NEXT: vphaddd %ymm1, %ymm0, %ymm0 ; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3] ; AVX2-NEXT: retq +; CHECK-LABEL: define <8 x i32> @avx2_vphadd_d_test( +; CHECK-SAME: <8 x i32> [[A:%.*]], <8 x i32> [[B:%.*]]) local_unnamed_addr #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x i32> [[A]], <8 x i32> [[B]], <8 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <8 x i32> [[A]], <8 x i32> [[B]], <8 x i32> +; CHECK-NEXT: [[TMP3:%.*]] = add <8 x i32> [[TMP1]], [[TMP2]] +; CHECK-NEXT: ret <8 x i32> [[TMP3]] +; %vecext = extractelement <8 x i32> %A, i32 0 %vecext1 = extractelement <8 x i32> %A, i32 1 %add = add i32 %vecext, %vecext1 @@ -745,6 +844,13 @@ define <16 x i16> @avx2_vphadd_w_test(<16 x i16> %a, <16 x i16> %b) nounwind { ; AVX2-NEXT: vphaddw %ymm1, %ymm0, %ymm0 ; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3] ; AVX2-NEXT: retq +; CHECK-LABEL: define <16 x i16> @avx2_vphadd_w_test( +; CHECK-SAME: <16 x i16> [[A:%.*]], <16 x i16> [[B:%.*]]) local_unnamed_addr #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <16 x i16> [[A]], <16 x i16> [[B]], <16 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <16 x i16> [[A]], <16 x i16> [[B]], <16 x i32> +; CHECK-NEXT: [[TMP3:%.*]] = add <16 x i16> [[TMP1]], [[TMP2]] +; CHECK-NEXT: ret <16 x i16> [[TMP3]] +; %vecext = extractelement <16 x i16> %a, i32 0 %vecext1 = extractelement <16 x i16> %a, i32 1 %add = add i16 %vecext, %vecext1 @@ -863,6 +969,13 @@ define <4 x i32> @not_a_hsub_1(<4 x i32> %A, <4 x i32> %B) { ; AVX-NEXT: vpinsrd $2, %edx, %xmm0, %xmm0 ; AVX-NEXT: vpinsrd $3, %esi, %xmm0, %xmm0 ; AVX-NEXT: retq +; CHECK-LABEL: define <4 x i32> @not_a_hsub_1( +; CHECK-SAME: <4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]]) local_unnamed_addr #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[A]], <4 x i32> [[B]], <4 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x i32> [[A]], <4 x i32> [[B]], <4 x i32> +; CHECK-NEXT: [[TMP3:%.*]] = sub <4 x i32> [[TMP1]], [[TMP2]] +; CHECK-NEXT: ret <4 x i32> [[TMP3]] +; %vecext = extractelement <4 x i32> %A, i32 0 %vecext1 = extractelement <4 x i32> %A, i32 1 %sub = sub i32 %vecext, %vecext1 @@ -920,6 +1033,13 @@ define <4 x float> @not_a_hsub_2(<4 x float> %A, <4 x float> %B) { ; AVX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0],xmm0[3] ; AVX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1,2],xmm2[0] ; AVX-NEXT: retq +; CHECK-LABEL: define <4 x float> @not_a_hsub_2( +; CHECK-SAME: <4 x float> [[A:%.*]], <4 x float> [[B:%.*]]) local_unnamed_addr #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x float> [[A]], <4 x float> [[B]], <4 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x float> [[A]], <4 x float> [[B]], <4 x i32> +; CHECK-NEXT: [[TMP3:%.*]] = fsub <4 x float> [[TMP1]], [[TMP2]] +; CHECK-NEXT: ret <4 x float> [[TMP3]] +; %vecext = extractelement <4 x float> %A, i32 2 %vecext1 = extractelement <4 x float> %A, i32 3 %sub = fsub float %vecext, %vecext1 @@ -960,6 +1080,13 @@ define <2 x double> @not_a_hsub_3(<2 x double> %A, <2 x double> %B) { ; AVX-NEXT: vsubsd %xmm0, %xmm2, %xmm0 ; AVX-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] ; AVX-NEXT: retq +; CHECK-LABEL: define <2 x double> @not_a_hsub_3( +; CHECK-SAME: <2 x double> [[A:%.*]], <2 x double> [[B:%.*]]) local_unnamed_addr #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <2 x double> [[A]], <2 x double> [[B]], <2 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <2 x double> [[A]], <2 x double> [[B]], <2 x i32> +; CHECK-NEXT: [[VECINIT2:%.*]] = fsub <2 x double> [[TMP1]], [[TMP2]] +; CHECK-NEXT: ret <2 x double> [[VECINIT2]] +; %vecext = extractelement <2 x double> %B, i32 0 %vecext1 = extractelement <2 x double> %B, i32 1 %sub = fsub double %vecext, %vecext1 @@ -985,6 +1112,13 @@ define <8 x float> @avx_vhadd_ps(<8 x float> %a, <8 x float> %b) { ; AVX: # %bb.0: ; AVX-NEXT: vhaddps %ymm1, %ymm0, %ymm0 ; AVX-NEXT: retq +; CHECK-LABEL: define <8 x float> @avx_vhadd_ps( +; CHECK-SAME: <8 x float> [[A:%.*]], <8 x float> [[B:%.*]]) local_unnamed_addr #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x float> [[A]], <8 x float> [[B]], <8 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <8 x float> [[A]], <8 x float> [[B]], <8 x i32> +; CHECK-NEXT: [[TMP3:%.*]] = fadd <8 x float> [[TMP1]], [[TMP2]] +; CHECK-NEXT: ret <8 x float> [[TMP3]] +; %vecext = extractelement <8 x float> %a, i32 0 %vecext1 = extractelement <8 x float> %a, i32 1 %add = fadd float %vecext, %vecext1 @@ -1031,6 +1165,13 @@ define <8 x float> @avx_vhsub_ps(<8 x float> %a, <8 x float> %b) { ; AVX: # %bb.0: ; AVX-NEXT: vhsubps %ymm1, %ymm0, %ymm0 ; AVX-NEXT: retq +; CHECK-LABEL: define <8 x float> @avx_vhsub_ps( +; CHECK-SAME: <8 x float> [[A:%.*]], <8 x float> [[B:%.*]]) local_unnamed_addr #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x float> [[A]], <8 x float> [[B]], <8 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <8 x float> [[A]], <8 x float> [[B]], <8 x i32> +; CHECK-NEXT: [[TMP3:%.*]] = fsub <8 x float> [[TMP1]], [[TMP2]] +; CHECK-NEXT: ret <8 x float> [[TMP3]] +; %vecext = extractelement <8 x float> %a, i32 0 %vecext1 = extractelement <8 x float> %a, i32 1 %sub = fsub float %vecext, %vecext1 @@ -1077,6 +1218,13 @@ define <4 x double> @avx_hadd_pd(<4 x double> %a, <4 x double> %b) { ; AVX: # %bb.0: ; AVX-NEXT: vhaddpd %ymm1, %ymm0, %ymm0 ; AVX-NEXT: retq +; CHECK-LABEL: define <4 x double> @avx_hadd_pd( +; CHECK-SAME: <4 x double> [[A:%.*]], <4 x double> [[B:%.*]]) local_unnamed_addr #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x double> [[A]], <4 x double> [[B]], <4 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x double> [[A]], <4 x double> [[B]], <4 x i32> +; CHECK-NEXT: [[TMP3:%.*]] = fadd <4 x double> [[TMP1]], [[TMP2]] +; CHECK-NEXT: ret <4 x double> [[TMP3]] +; %vecext = extractelement <4 x double> %a, i32 0 %vecext1 = extractelement <4 x double> %a, i32 1 %add = fadd double %vecext, %vecext1 @@ -1107,6 +1255,13 @@ define <4 x double> @avx_hsub_pd(<4 x double> %a, <4 x double> %b) { ; AVX: # %bb.0: ; AVX-NEXT: vhsubpd %ymm1, %ymm0, %ymm0 ; AVX-NEXT: retq +; CHECK-LABEL: define <4 x double> @avx_hsub_pd( +; CHECK-SAME: <4 x double> [[A:%.*]], <4 x double> [[B:%.*]]) local_unnamed_addr #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x double> [[A]], <4 x double> [[B]], <4 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x double> [[A]], <4 x double> [[B]], <4 x i32> +; CHECK-NEXT: [[TMP3:%.*]] = fsub <4 x double> [[TMP1]], [[TMP2]] +; CHECK-NEXT: ret <4 x double> [[TMP3]] +; %vecext = extractelement <4 x double> %a, i32 0 %vecext1 = extractelement <4 x double> %a, i32 1 %sub = fsub double %vecext, %vecext1 @@ -1202,6 +1357,13 @@ define <8 x i32> @avx2_hadd_d(<8 x i32> %a, <8 x i32> %b) { ; AVX2: # %bb.0: ; AVX2-NEXT: vphaddd %ymm1, %ymm0, %ymm0 ; AVX2-NEXT: retq +; CHECK-LABEL: define <8 x i32> @avx2_hadd_d( +; CHECK-SAME: <8 x i32> [[A:%.*]], <8 x i32> [[B:%.*]]) local_unnamed_addr #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x i32> [[A]], <8 x i32> [[B]], <8 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <8 x i32> [[A]], <8 x i32> [[B]], <8 x i32> +; CHECK-NEXT: [[TMP3:%.*]] = add <8 x i32> [[TMP1]], [[TMP2]] +; CHECK-NEXT: ret <8 x i32> [[TMP3]] +; %vecext = extractelement <8 x i32> %a, i32 0 %vecext1 = extractelement <8 x i32> %a, i32 1 %add = add i32 %vecext, %vecext1 @@ -1355,6 +1517,13 @@ define <16 x i16> @avx2_hadd_w(<16 x i16> %a, <16 x i16> %b) nounwind { ; AVX2: # %bb.0: ; AVX2-NEXT: vphaddw %ymm1, %ymm0, %ymm0 ; AVX2-NEXT: retq +; CHECK-LABEL: define <16 x i16> @avx2_hadd_w( +; CHECK-SAME: <16 x i16> [[A:%.*]], <16 x i16> [[B:%.*]]) local_unnamed_addr #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <16 x i16> [[A]], <16 x i16> [[B]], <16 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <16 x i16> [[A]], <16 x i16> [[B]], <16 x i32> +; CHECK-NEXT: [[TMP3:%.*]] = add <16 x i16> [[TMP1]], [[TMP2]] +; CHECK-NEXT: ret <16 x i16> [[TMP3]] +; %vecext = extractelement <16 x i16> %a, i32 0 %vecext1 = extractelement <16 x i16> %a, i32 1 %add = add i16 %vecext, %vecext1 diff --git a/llvm/test/CodeGen/X86/haddsub-shuf.ll b/llvm/test/Transforms/PhaseOrdering/X86/haddsub-shuf.ll similarity index 73% rename from llvm/test/CodeGen/X86/haddsub-shuf.ll rename to llvm/test/Transforms/PhaseOrdering/X86/haddsub-shuf.ll index 364ad953a11d4..f425550c1c6df 100644 --- a/llvm/test/CodeGen/X86/haddsub-shuf.ll +++ b/llvm/test/Transforms/PhaseOrdering/X86/haddsub-shuf.ll @@ -1,15 +1,5 @@ -; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+sse3 | FileCheck %s --check-prefixes=SSE,SSE_SLOW,SSE3 -; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+sse3,fast-hops | FileCheck %s --check-prefixes=SSE,SSE_FAST,SSE3 -; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+ssse3 | FileCheck %s --check-prefixes=SSE,SSE_SLOW,SSSE3,SSSE3_SLOW -; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+ssse3,fast-hops | FileCheck %s --check-prefixes=SSE,SSE_FAST,SSSE3,SSSE3_FAST -; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx | FileCheck %s --check-prefixes=AVX,AVX1,AVX1_SLOW -; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx,fast-hops | FileCheck %s --check-prefixes=AVX,AVX1,AVX1_FAST -; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx2 | FileCheck %s --check-prefixes=AVX,AVX2,AVX2_SLOW -; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx2,fast-hops | FileCheck %s --check-prefixes=AVX,AVX2,AVX2_FAST - -; The next 8 tests check for matching the horizontal op and eliminating the shuffle. -; PR34111 - https://bugs.llvm.org/show_bug.cgi?id=34111 +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 +; RUN: opt -passes="default" -S %s | FileCheck %s define <4 x float> @hadd_v4f32(<4 x float> %a) { ; SSE-LABEL: hadd_v4f32: @@ -21,6 +11,13 @@ define <4 x float> @hadd_v4f32(<4 x float> %a) { ; AVX: # %bb.0: ; AVX-NEXT: vhaddps %xmm0, %xmm0, %xmm0 ; AVX-NEXT: retq +; CHECK-LABEL: define <4 x float> @hadd_v4f32( +; CHECK-SAME: <4 x float> [[A:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] { +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x float> [[A]], <4 x float> poison, <4 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x float> [[A]], <4 x float> poison, <4 x i32> +; CHECK-NEXT: [[SHUF:%.*]] = fadd <4 x float> [[TMP1]], [[TMP2]] +; CHECK-NEXT: ret <4 x float> [[SHUF]] +; %a02 = shufflevector <4 x float> %a, <4 x float> undef, <2 x i32> %a13 = shufflevector <4 x float> %a, <4 x float> undef, <2 x i32> %hop = fadd <2 x float> %a02, %a13 @@ -65,6 +62,13 @@ define <8 x float> @hadd_v8f32a(<8 x float> %a) { ; AVX2-NEXT: vhaddps %xmm1, %xmm0, %xmm0 ; AVX2-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,0,2,1] ; AVX2-NEXT: retq +; CHECK-LABEL: define <8 x float> @hadd_v8f32a( +; CHECK-SAME: <8 x float> [[A:%.*]]) local_unnamed_addr #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x float> [[A]], <8 x float> poison, <8 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <8 x float> [[A]], <8 x float> poison, <8 x i32> +; CHECK-NEXT: [[SHUF:%.*]] = fadd <8 x float> [[TMP1]], [[TMP2]] +; CHECK-NEXT: ret <8 x float> [[SHUF]] +; %a0 = shufflevector <8 x float> %a, <8 x float> undef, <4 x i32> %a1 = shufflevector <8 x float> %a, <8 x float> undef, <4 x i32> %hop = fadd <4 x float> %a0, %a1 @@ -83,6 +87,13 @@ define <8 x float> @hadd_v8f32b(<8 x float> %a) { ; AVX: # %bb.0: ; AVX-NEXT: vhaddps %ymm0, %ymm0, %ymm0 ; AVX-NEXT: retq +; CHECK-LABEL: define <8 x float> @hadd_v8f32b( +; CHECK-SAME: <8 x float> [[A:%.*]]) local_unnamed_addr #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x float> [[A]], <8 x float> poison, <8 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <8 x float> [[A]], <8 x float> poison, <8 x i32> +; CHECK-NEXT: [[SHUF:%.*]] = fadd <8 x float> [[TMP1]], [[TMP2]] +; CHECK-NEXT: ret <8 x float> [[SHUF]] +; %a0 = shufflevector <8 x float> %a, <8 x float> undef, <8 x i32> %a1 = shufflevector <8 x float> %a, <8 x float> undef, <8 x i32> %hop = fadd <8 x float> %a0, %a1 @@ -100,6 +111,13 @@ define <4 x float> @hsub_v4f32(<4 x float> %a) { ; AVX: # %bb.0: ; AVX-NEXT: vhsubps %xmm0, %xmm0, %xmm0 ; AVX-NEXT: retq +; CHECK-LABEL: define <4 x float> @hsub_v4f32( +; CHECK-SAME: <4 x float> [[A:%.*]]) local_unnamed_addr #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x float> [[A]], <4 x float> poison, <4 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x float> [[A]], <4 x float> poison, <4 x i32> +; CHECK-NEXT: [[SHUF:%.*]] = fsub <4 x float> [[TMP1]], [[TMP2]] +; CHECK-NEXT: ret <4 x float> [[SHUF]] +; %a02 = shufflevector <4 x float> %a, <4 x float> undef, <2 x i32> %a13 = shufflevector <4 x float> %a, <4 x float> undef, <2 x i32> %hop = fsub <2 x float> %a02, %a13 @@ -144,6 +162,13 @@ define <8 x float> @hsub_v8f32a(<8 x float> %a) { ; AVX2-NEXT: vhsubps %xmm1, %xmm0, %xmm0 ; AVX2-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,0,2,1] ; AVX2-NEXT: retq +; CHECK-LABEL: define <8 x float> @hsub_v8f32a( +; CHECK-SAME: <8 x float> [[A:%.*]]) local_unnamed_addr #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x float> [[A]], <8 x float> poison, <8 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <8 x float> [[A]], <8 x float> poison, <8 x i32> +; CHECK-NEXT: [[SHUF:%.*]] = fsub <8 x float> [[TMP1]], [[TMP2]] +; CHECK-NEXT: ret <8 x float> [[SHUF]] +; %a0 = shufflevector <8 x float> %a, <8 x float> undef, <4 x i32> %a1 = shufflevector <8 x float> %a, <8 x float> undef, <4 x i32> %hop = fsub <4 x float> %a0, %a1 @@ -162,6 +187,13 @@ define <8 x float> @hsub_v8f32b(<8 x float> %a) { ; AVX: # %bb.0: ; AVX-NEXT: vhsubps %ymm0, %ymm0, %ymm0 ; AVX-NEXT: retq +; CHECK-LABEL: define <8 x float> @hsub_v8f32b( +; CHECK-SAME: <8 x float> [[A:%.*]]) local_unnamed_addr #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x float> [[A]], <8 x float> poison, <8 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <8 x float> [[A]], <8 x float> poison, <8 x i32> +; CHECK-NEXT: [[SHUF:%.*]] = fsub <8 x float> [[TMP1]], [[TMP2]] +; CHECK-NEXT: ret <8 x float> [[SHUF]] +; %a0 = shufflevector <8 x float> %a, <8 x float> undef, <8 x i32> %a1 = shufflevector <8 x float> %a, <8 x float> undef, <8 x i32> %hop = fsub <8 x float> %a0, %a1 @@ -206,6 +238,13 @@ define <2 x double> @hadd_v2f64(<2 x double> %a) { ; AVX2_FAST: # %bb.0: ; AVX2_FAST-NEXT: vhaddpd %xmm0, %xmm0, %xmm0 ; AVX2_FAST-NEXT: retq +; CHECK-LABEL: define <2 x double> @hadd_v2f64( +; CHECK-SAME: <2 x double> [[A:%.*]]) local_unnamed_addr #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <2 x double> [[A]], <2 x double> poison, <2 x i32> zeroinitializer +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <2 x double> [[A]], <2 x double> poison, <2 x i32> +; CHECK-NEXT: [[SHUF:%.*]] = fadd <2 x double> [[TMP1]], [[TMP2]] +; CHECK-NEXT: ret <2 x double> [[SHUF]] +; %a0 = shufflevector <2 x double> %a, <2 x double> undef, <2 x i32> %a1 = shufflevector <2 x double> %a, <2 x double> undef, <2 x i32> %hop = fadd <2 x double> %a0, %a1 @@ -250,6 +289,13 @@ define <2 x double> @hadd_v2f64_scalar_splat(<2 x double> %a) { ; AVX2_FAST: # %bb.0: ; AVX2_FAST-NEXT: vhaddpd %xmm0, %xmm0, %xmm0 ; AVX2_FAST-NEXT: retq +; CHECK-LABEL: define <2 x double> @hadd_v2f64_scalar_splat( +; CHECK-SAME: <2 x double> [[A:%.*]]) local_unnamed_addr #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <2 x double> [[A]], <2 x double> poison, <2 x i32> zeroinitializer +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <2 x double> [[A]], <2 x double> poison, <2 x i32> +; CHECK-NEXT: [[SHUF:%.*]] = fadd <2 x double> [[TMP1]], [[TMP2]] +; CHECK-NEXT: ret <2 x double> [[SHUF]] +; %a0 = extractelement <2 x double> %a, i32 0 %a1 = extractelement <2 x double> %a, i32 1 %hop = fadd double %a0, %a1 @@ -281,6 +327,13 @@ define <4 x double> @hadd_v4f64_scalar_splat(<4 x double> %a) { ; AVX: # %bb.0: ; AVX-NEXT: vhaddpd %ymm0, %ymm0, %ymm0 ; AVX-NEXT: retq +; CHECK-LABEL: define <4 x double> @hadd_v4f64_scalar_splat( +; CHECK-SAME: <4 x double> [[A:%.*]]) local_unnamed_addr #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x double> [[A]], <4 x double> poison, <4 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x double> [[A]], <4 x double> poison, <4 x i32> +; CHECK-NEXT: [[SHUF:%.*]] = fadd <4 x double> [[TMP1]], [[TMP2]] +; CHECK-NEXT: ret <4 x double> [[SHUF]] +; %a0 = extractelement <4 x double> %a, i32 0 %a1 = extractelement <4 x double> %a, i32 1 %hop0 = fadd double %a0, %a1 @@ -335,6 +388,13 @@ define <4 x double> @hadd_v4f64_scalar_broadcast(<4 x double> %a) { ; AVX2_FAST-NEXT: vhaddpd %xmm0, %xmm0, %xmm0 ; AVX2_FAST-NEXT: vbroadcastsd %xmm0, %ymm0 ; AVX2_FAST-NEXT: retq +; CHECK-LABEL: define <4 x double> @hadd_v4f64_scalar_broadcast( +; CHECK-SAME: <4 x double> [[A:%.*]]) local_unnamed_addr #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x double> [[A]], <4 x double> poison, <4 x i32> zeroinitializer +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x double> [[A]], <4 x double> poison, <4 x i32> +; CHECK-NEXT: [[SHUF:%.*]] = fadd <4 x double> [[TMP1]], [[TMP2]] +; CHECK-NEXT: ret <4 x double> [[SHUF]] +; %a0 = extractelement <4 x double> %a, i32 0 %a1 = extractelement <4 x double> %a, i32 1 %hop0 = fadd double %a0, %a1 @@ -370,6 +430,13 @@ define <4 x double> @hadd_v4f64(<4 x double> %a) { ; AVX: # %bb.0: ; AVX-NEXT: vhaddpd %ymm0, %ymm0, %ymm0 ; AVX-NEXT: retq +; CHECK-LABEL: define <4 x double> @hadd_v4f64( +; CHECK-SAME: <4 x double> [[A:%.*]]) local_unnamed_addr #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x double> [[A]], <4 x double> poison, <4 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x double> [[A]], <4 x double> poison, <4 x i32> +; CHECK-NEXT: [[SHUF:%.*]] = fadd <4 x double> [[TMP1]], [[TMP2]] +; CHECK-NEXT: ret <4 x double> [[SHUF]] +; %a0 = shufflevector <4 x double> %a, <4 x double> undef, <4 x i32> %a1 = shufflevector <4 x double> %a, <4 x double> undef, <4 x i32> %hop = fadd <4 x double> %a0, %a1 @@ -414,6 +481,12 @@ define <2 x double> @hsub_v2f64(<2 x double> %a) { ; AVX2_FAST: # %bb.0: ; AVX2_FAST-NEXT: vhsubpd %xmm0, %xmm0, %xmm0 ; AVX2_FAST-NEXT: retq +; CHECK-LABEL: define <2 x double> @hsub_v2f64( +; CHECK-SAME: <2 x double> [[A:%.*]]) local_unnamed_addr #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <2 x double> [[A]], <2 x double> poison, <2 x i32> +; CHECK-NEXT: [[SHUF:%.*]] = fsub <2 x double> [[TMP1]], [[A]] +; CHECK-NEXT: ret <2 x double> [[SHUF]] +; %a0 = shufflevector <2 x double> %a, <2 x double> undef, <2 x i32> %a1 = shufflevector <2 x double> %a, <2 x double> undef, <2 x i32> %hop = fsub <2 x double> %a0, %a1 @@ -444,6 +517,13 @@ define <4 x double> @hsub_v4f64(<4 x double> %a) { ; AVX: # %bb.0: ; AVX-NEXT: vhsubpd %ymm0, %ymm0, %ymm0 ; AVX-NEXT: retq +; CHECK-LABEL: define <4 x double> @hsub_v4f64( +; CHECK-SAME: <4 x double> [[A:%.*]]) local_unnamed_addr #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x double> [[A]], <4 x double> poison, <4 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x double> [[A]], <4 x double> poison, <4 x i32> +; CHECK-NEXT: [[SHUF:%.*]] = fsub <4 x double> [[TMP1]], [[TMP2]] +; CHECK-NEXT: ret <4 x double> [[SHUF]] +; %a0 = shufflevector <4 x double> %a, <4 x double> undef, <4 x i32> %a1 = shufflevector <4 x double> %a, <4 x double> undef, <4 x i32> %hop = fsub <4 x double> %a0, %a1 @@ -468,6 +548,13 @@ define <4 x i32> @hadd_v4i32(<4 x i32> %a) { ; AVX: # %bb.0: ; AVX-NEXT: vphaddd %xmm0, %xmm0, %xmm0 ; AVX-NEXT: retq +; CHECK-LABEL: define <4 x i32> @hadd_v4i32( +; CHECK-SAME: <4 x i32> [[A:%.*]]) local_unnamed_addr #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[A]], <4 x i32> poison, <4 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x i32> [[A]], <4 x i32> poison, <4 x i32> +; CHECK-NEXT: [[SHUF:%.*]] = add <4 x i32> [[TMP1]], [[TMP2]] +; CHECK-NEXT: ret <4 x i32> [[SHUF]] +; %a02 = shufflevector <4 x i32> %a, <4 x i32> undef, <4 x i32> %a13 = shufflevector <4 x i32> %a, <4 x i32> undef, <4 x i32> %hop = add <4 x i32> %a02, %a13 @@ -524,6 +611,13 @@ define <8 x i32> @hadd_v8i32a(<8 x i32> %a) { ; AVX2-NEXT: vphaddd %xmm1, %xmm0, %xmm0 ; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,0,2,1] ; AVX2-NEXT: retq +; CHECK-LABEL: define <8 x i32> @hadd_v8i32a( +; CHECK-SAME: <8 x i32> [[A:%.*]]) local_unnamed_addr #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x i32> [[A]], <8 x i32> poison, <8 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <8 x i32> [[A]], <8 x i32> poison, <8 x i32> +; CHECK-NEXT: [[SHUF:%.*]] = add <8 x i32> [[TMP1]], [[TMP2]] +; CHECK-NEXT: ret <8 x i32> [[SHUF]] +; %a0 = shufflevector <8 x i32> %a, <8 x i32> undef, <4 x i32> %a1 = shufflevector <8 x i32> %a, <8 x i32> undef, <4 x i32> %hop = add <4 x i32> %a0, %a1 @@ -560,6 +654,13 @@ define <8 x i32> @hadd_v8i32b(<8 x i32> %a) { ; AVX2: # %bb.0: ; AVX2-NEXT: vphaddd %ymm0, %ymm0, %ymm0 ; AVX2-NEXT: retq +; CHECK-LABEL: define <8 x i32> @hadd_v8i32b( +; CHECK-SAME: <8 x i32> [[A:%.*]]) local_unnamed_addr #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x i32> [[A]], <8 x i32> poison, <8 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <8 x i32> [[A]], <8 x i32> poison, <8 x i32> +; CHECK-NEXT: [[SHUF:%.*]] = add <8 x i32> [[TMP1]], [[TMP2]] +; CHECK-NEXT: ret <8 x i32> [[SHUF]] +; %a0 = shufflevector <8 x i32> %a, <8 x i32> undef, <8 x i32> %a1 = shufflevector <8 x i32> %a, <8 x i32> undef, <8 x i32> %hop = add <8 x i32> %a0, %a1 @@ -584,6 +685,13 @@ define <4 x i32> @hsub_v4i32(<4 x i32> %a) { ; AVX: # %bb.0: ; AVX-NEXT: vphsubd %xmm0, %xmm0, %xmm0 ; AVX-NEXT: retq +; CHECK-LABEL: define <4 x i32> @hsub_v4i32( +; CHECK-SAME: <4 x i32> [[A:%.*]]) local_unnamed_addr #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[A]], <4 x i32> poison, <4 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x i32> [[A]], <4 x i32> poison, <4 x i32> +; CHECK-NEXT: [[SHUF:%.*]] = sub <4 x i32> [[TMP1]], [[TMP2]] +; CHECK-NEXT: ret <4 x i32> [[SHUF]] +; %a02 = shufflevector <4 x i32> %a, <4 x i32> undef, <4 x i32> %a13 = shufflevector <4 x i32> %a, <4 x i32> undef, <4 x i32> %hop = sub <4 x i32> %a02, %a13 @@ -640,6 +748,13 @@ define <8 x i32> @hsub_v8i32a(<8 x i32> %a) { ; AVX2-NEXT: vphsubd %xmm1, %xmm0, %xmm0 ; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,0,2,1] ; AVX2-NEXT: retq +; CHECK-LABEL: define <8 x i32> @hsub_v8i32a( +; CHECK-SAME: <8 x i32> [[A:%.*]]) local_unnamed_addr #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x i32> [[A]], <8 x i32> poison, <8 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <8 x i32> [[A]], <8 x i32> poison, <8 x i32> +; CHECK-NEXT: [[SHUF:%.*]] = sub <8 x i32> [[TMP1]], [[TMP2]] +; CHECK-NEXT: ret <8 x i32> [[SHUF]] +; %a0 = shufflevector <8 x i32> %a, <8 x i32> undef, <4 x i32> %a1 = shufflevector <8 x i32> %a, <8 x i32> undef, <4 x i32> %hop = sub <4 x i32> %a0, %a1 @@ -676,6 +791,13 @@ define <8 x i32> @hsub_v8i32b(<8 x i32> %a) { ; AVX2: # %bb.0: ; AVX2-NEXT: vphsubd %ymm0, %ymm0, %ymm0 ; AVX2-NEXT: retq +; CHECK-LABEL: define <8 x i32> @hsub_v8i32b( +; CHECK-SAME: <8 x i32> [[A:%.*]]) local_unnamed_addr #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x i32> [[A]], <8 x i32> poison, <8 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <8 x i32> [[A]], <8 x i32> poison, <8 x i32> +; CHECK-NEXT: [[SHUF:%.*]] = sub <8 x i32> [[TMP1]], [[TMP2]] +; CHECK-NEXT: ret <8 x i32> [[SHUF]] +; %a0 = shufflevector <8 x i32> %a, <8 x i32> undef, <8 x i32> %a1 = shufflevector <8 x i32> %a, <8 x i32> undef, <8 x i32> %hop = sub <8 x i32> %a0, %a1 @@ -705,6 +827,13 @@ define <8 x i16> @hadd_v8i16(<8 x i16> %a) { ; AVX: # %bb.0: ; AVX-NEXT: vphaddw %xmm0, %xmm0, %xmm0 ; AVX-NEXT: retq +; CHECK-LABEL: define <8 x i16> @hadd_v8i16( +; CHECK-SAME: <8 x i16> [[A:%.*]]) local_unnamed_addr #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x i16> [[A]], <8 x i16> poison, <8 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <8 x i16> [[A]], <8 x i16> poison, <8 x i32> +; CHECK-NEXT: [[SHUF:%.*]] = add <8 x i16> [[TMP1]], [[TMP2]] +; CHECK-NEXT: ret <8 x i16> [[SHUF]] +; %a0246 = shufflevector <8 x i16> %a, <8 x i16> undef, <8 x i32> %a1357 = shufflevector <8 x i16> %a, <8 x i16> undef, <8 x i32> %hop = add <8 x i16> %a0246, %a1357 @@ -768,6 +897,13 @@ define <16 x i16> @hadd_v16i16a(<16 x i16> %a) { ; AVX2-NEXT: vphaddw %xmm1, %xmm0, %xmm0 ; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,0,2,1] ; AVX2-NEXT: retq +; CHECK-LABEL: define <16 x i16> @hadd_v16i16a( +; CHECK-SAME: <16 x i16> [[A:%.*]]) local_unnamed_addr #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <16 x i16> [[A]], <16 x i16> poison, <16 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <16 x i16> [[A]], <16 x i16> poison, <16 x i32> +; CHECK-NEXT: [[SHUF:%.*]] = add <16 x i16> [[TMP1]], [[TMP2]] +; CHECK-NEXT: ret <16 x i16> [[SHUF]] +; %a0 = shufflevector <16 x i16> %a, <16 x i16> undef, <8 x i32> %a1 = shufflevector <16 x i16> %a, <16 x i16> undef, <8 x i32> %hop = add <8 x i16> %a0, %a1 @@ -820,6 +956,13 @@ define <16 x i16> @hadd_v16i16b(<16 x i16> %a) { ; AVX2: # %bb.0: ; AVX2-NEXT: vphaddw %ymm0, %ymm0, %ymm0 ; AVX2-NEXT: retq +; CHECK-LABEL: define <16 x i16> @hadd_v16i16b( +; CHECK-SAME: <16 x i16> [[A:%.*]]) local_unnamed_addr #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <16 x i16> [[A]], <16 x i16> poison, <16 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <16 x i16> [[A]], <16 x i16> poison, <16 x i32> +; CHECK-NEXT: [[SHUF:%.*]] = add <16 x i16> [[TMP1]], [[TMP2]] +; CHECK-NEXT: ret <16 x i16> [[SHUF]] +; %a0 = shufflevector <16 x i16> %a, <16 x i16> undef, <16 x i32> %a1 = shufflevector <16 x i16> %a, <16 x i16> undef, <16 x i32> %hop = add <16 x i16> %a0, %a1 @@ -845,6 +988,13 @@ define <8 x i16> @hsub_v8i16(<8 x i16> %a) { ; AVX: # %bb.0: ; AVX-NEXT: vphsubw %xmm0, %xmm0, %xmm0 ; AVX-NEXT: retq +; CHECK-LABEL: define <8 x i16> @hsub_v8i16( +; CHECK-SAME: <8 x i16> [[A:%.*]]) local_unnamed_addr #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x i16> [[A]], <8 x i16> poison, <8 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <8 x i16> [[A]], <8 x i16> poison, <8 x i32> +; CHECK-NEXT: [[SHUF:%.*]] = sub <8 x i16> [[TMP1]], [[TMP2]] +; CHECK-NEXT: ret <8 x i16> [[SHUF]] +; %a0246 = shufflevector <8 x i16> %a, <8 x i16> undef, <8 x i32> %a1357 = shufflevector <8 x i16> %a, <8 x i16> undef, <8 x i32> %hop = sub <8 x i16> %a0246, %a1357 @@ -908,6 +1058,13 @@ define <16 x i16> @hsub_v16i16a(<16 x i16> %a) { ; AVX2-NEXT: vphsubw %xmm1, %xmm0, %xmm0 ; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,0,2,1] ; AVX2-NEXT: retq +; CHECK-LABEL: define <16 x i16> @hsub_v16i16a( +; CHECK-SAME: <16 x i16> [[A:%.*]]) local_unnamed_addr #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <16 x i16> [[A]], <16 x i16> poison, <16 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <16 x i16> [[A]], <16 x i16> poison, <16 x i32> +; CHECK-NEXT: [[SHUF:%.*]] = sub <16 x i16> [[TMP1]], [[TMP2]] +; CHECK-NEXT: ret <16 x i16> [[SHUF]] +; %a0 = shufflevector <16 x i16> %a, <16 x i16> undef, <8 x i32> %a1 = shufflevector <16 x i16> %a, <16 x i16> undef, <8 x i32> %hop = sub <8 x i16> %a0, %a1 @@ -960,6 +1117,13 @@ define <16 x i16> @hsub_v16i16b(<16 x i16> %a) { ; AVX2: # %bb.0: ; AVX2-NEXT: vphsubw %ymm0, %ymm0, %ymm0 ; AVX2-NEXT: retq +; CHECK-LABEL: define <16 x i16> @hsub_v16i16b( +; CHECK-SAME: <16 x i16> [[A:%.*]]) local_unnamed_addr #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <16 x i16> [[A]], <16 x i16> poison, <16 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <16 x i16> [[A]], <16 x i16> poison, <16 x i32> +; CHECK-NEXT: [[SHUF:%.*]] = sub <16 x i16> [[TMP1]], [[TMP2]] +; CHECK-NEXT: ret <16 x i16> [[SHUF]] +; %a0 = shufflevector <16 x i16> %a, <16 x i16> undef, <16 x i32> %a1 = shufflevector <16 x i16> %a, <16 x i16> undef, <16 x i32> %hop = sub <16 x i16> %a0, %a1 @@ -985,6 +1149,12 @@ define <4 x float> @broadcast_haddps_v4f32(<4 x float> %a0) { ; AVX2-NEXT: vhaddps %xmm0, %xmm0, %xmm0 ; AVX2-NEXT: vbroadcastss %xmm0, %xmm0 ; AVX2-NEXT: retq +; CHECK-LABEL: define <4 x float> @broadcast_haddps_v4f32( +; CHECK-SAME: <4 x float> [[A0:%.*]]) local_unnamed_addr #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x float> @llvm.x86.sse3.hadd.ps(<4 x float> [[A0]], <4 x float> [[A0]]) +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x float> [[TMP1]], <4 x float> poison, <4 x i32> zeroinitializer +; CHECK-NEXT: ret <4 x float> [[TMP2]] +; %1 = tail call <4 x float> @llvm.x86.sse3.hadd.ps(<4 x float> %a0, <4 x float> %a0) %2 = shufflevector <4 x float> %1, <4 x float> undef, <4 x i32> zeroinitializer ret <4 x float> %2 @@ -1002,6 +1172,13 @@ define <4 x float> @PR34724_1(<4 x float> %a, <4 x float> %b) { ; AVX: # %bb.0: ; AVX-NEXT: vhaddps %xmm1, %xmm0, %xmm0 ; AVX-NEXT: retq +; CHECK-LABEL: define <4 x float> @PR34724_1( +; CHECK-SAME: <4 x float> [[A:%.*]], <4 x float> [[B:%.*]]) local_unnamed_addr #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x float> [[A]], <4 x float> [[B]], <4 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x float> [[A]], <4 x float> [[B]], <4 x i32> +; CHECK-NEXT: [[VECINIT13:%.*]] = fadd <4 x float> [[TMP1]], [[TMP2]] +; CHECK-NEXT: ret <4 x float> [[VECINIT13]] +; %t0 = shufflevector <4 x float> %a, <4 x float> %b, <2 x i32> %t1 = shufflevector <4 x float> %a, <4 x float> %b, <2 x i32> %t2 = fadd <2 x float> %t0, %t1 @@ -1022,6 +1199,13 @@ define <4 x float> @PR34724_2(<4 x float> %a, <4 x float> %b) { ; AVX: # %bb.0: ; AVX-NEXT: vhaddps %xmm1, %xmm0, %xmm0 ; AVX-NEXT: retq +; CHECK-LABEL: define <4 x float> @PR34724_2( +; CHECK-SAME: <4 x float> [[A:%.*]], <4 x float> [[B:%.*]]) local_unnamed_addr #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x float> [[A]], <4 x float> [[B]], <4 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x float> [[A]], <4 x float> [[B]], <4 x i32> +; CHECK-NEXT: [[VECINIT13:%.*]] = fadd <4 x float> [[TMP1]], [[TMP2]] +; CHECK-NEXT: ret <4 x float> [[VECINIT13]] +; %t0 = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> %t1 = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> %t2 = fadd <4 x float> %t0, %t1 @@ -1051,6 +1235,13 @@ define <4 x float> @hadd_4f32_v8f32_shuffle(<8 x float> %a0) { ; AVX-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3] ; AVX-NEXT: vzeroupper ; AVX-NEXT: retq +; CHECK-LABEL: define <4 x float> @hadd_4f32_v8f32_shuffle( +; CHECK-SAME: <8 x float> [[A0:%.*]]) local_unnamed_addr #[[ATTR0]] { +; CHECK-NEXT: [[HADD0:%.*]] = shufflevector <8 x float> [[A0]], <8 x float> poison, <4 x i32> +; CHECK-NEXT: [[HADD1:%.*]] = shufflevector <8 x float> [[A0]], <8 x float> poison, <4 x i32> +; CHECK-NEXT: [[HADD:%.*]] = fadd <4 x float> [[HADD0]], [[HADD1]] +; CHECK-NEXT: ret <4 x float> [[HADD]] +; %shuf256 = shufflevector <8 x float> %a0, <8 x float> undef, <8 x i32> %lo = shufflevector <8 x float> %shuf256, <8 x float> undef, <4 x i32> %hi = shufflevector <8 x float> %shuf256, <8 x float> undef, <4 x i32> @@ -1074,6 +1265,13 @@ define <4 x float> @hsub_4f32_v8f32_shuffle(<8 x float> %a0) { ; AVX-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3] ; AVX-NEXT: vzeroupper ; AVX-NEXT: retq +; CHECK-LABEL: define <4 x float> @hsub_4f32_v8f32_shuffle( +; CHECK-SAME: <8 x float> [[A0:%.*]]) local_unnamed_addr #[[ATTR0]] { +; CHECK-NEXT: [[HSUB0:%.*]] = shufflevector <8 x float> [[A0]], <8 x float> poison, <4 x i32> +; CHECK-NEXT: [[HSUB1:%.*]] = shufflevector <8 x float> [[A0]], <8 x float> poison, <4 x i32> +; CHECK-NEXT: [[HSUB:%.*]] = fadd <4 x float> [[HSUB0]], [[HSUB1]] +; CHECK-NEXT: ret <4 x float> [[HSUB]] +; %shuf256 = shufflevector <8 x float> %a0, <8 x float> undef, <8 x i32> %lo = shufflevector <8 x float> %shuf256, <8 x float> undef, <4 x i32> %hi = shufflevector <8 x float> %shuf256, <8 x float> undef, <4 x i32> @@ -1113,6 +1311,13 @@ define <4 x i32> @hadd_4i32_v8i32_shuffle(<8 x i32> %a0) { ; AVX2-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,1,3,3] ; AVX2-NEXT: vzeroupper ; AVX2-NEXT: retq +; CHECK-LABEL: define <4 x i32> @hadd_4i32_v8i32_shuffle( +; CHECK-SAME: <8 x i32> [[A0:%.*]]) local_unnamed_addr #[[ATTR0]] { +; CHECK-NEXT: [[HADD0:%.*]] = shufflevector <8 x i32> [[A0]], <8 x i32> poison, <4 x i32> +; CHECK-NEXT: [[HADD1:%.*]] = shufflevector <8 x i32> [[A0]], <8 x i32> poison, <4 x i32> +; CHECK-NEXT: [[HADD:%.*]] = add <4 x i32> [[HADD0]], [[HADD1]] +; CHECK-NEXT: ret <4 x i32> [[HADD]] +; %shuf256 = shufflevector <8 x i32> %a0, <8 x i32> undef, <8 x i32> %lo = shufflevector <8 x i32> %shuf256, <8 x i32> undef, <4 x i32> %hi = shufflevector <8 x i32> %shuf256, <8 x i32> undef, <4 x i32> @@ -1152,6 +1357,13 @@ define <4 x i32> @hsub_4i32_v8i32_shuffle(<8 x i32> %a0) { ; AVX2-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,1,3,3] ; AVX2-NEXT: vzeroupper ; AVX2-NEXT: retq +; CHECK-LABEL: define <4 x i32> @hsub_4i32_v8i32_shuffle( +; CHECK-SAME: <8 x i32> [[A0:%.*]]) local_unnamed_addr #[[ATTR0]] { +; CHECK-NEXT: [[HSUB0:%.*]] = shufflevector <8 x i32> [[A0]], <8 x i32> poison, <4 x i32> +; CHECK-NEXT: [[HSUB1:%.*]] = shufflevector <8 x i32> [[A0]], <8 x i32> poison, <4 x i32> +; CHECK-NEXT: [[HSUB:%.*]] = add <4 x i32> [[HSUB0]], [[HSUB1]] +; CHECK-NEXT: ret <4 x i32> [[HSUB]] +; %shuf256 = shufflevector <8 x i32> %a0, <8 x i32> undef, <8 x i32> %lo = shufflevector <8 x i32> %shuf256, <8 x i32> undef, <4 x i32> %hi = shufflevector <8 x i32> %shuf256, <8 x i32> undef, <4 x i32> @@ -1185,6 +1397,13 @@ define <4 x double> @hadd_4f64_v4f64_shuffle(<4 x double> %a0, <4 x double> %a1) ; AVX2-NEXT: vhaddpd %ymm1, %ymm0, %ymm0 ; AVX2-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,2,1,3] ; AVX2-NEXT: retq +; CHECK-LABEL: define <4 x double> @hadd_4f64_v4f64_shuffle( +; CHECK-SAME: <4 x double> [[A0:%.*]], <4 x double> [[A1:%.*]]) local_unnamed_addr #[[ATTR0]] { +; CHECK-NEXT: [[HADD0:%.*]] = shufflevector <4 x double> [[A0]], <4 x double> [[A1]], <4 x i32> +; CHECK-NEXT: [[HADD1:%.*]] = shufflevector <4 x double> [[A0]], <4 x double> [[A1]], <4 x i32> +; CHECK-NEXT: [[HADD:%.*]] = fadd <4 x double> [[HADD0]], [[HADD1]] +; CHECK-NEXT: ret <4 x double> [[HADD]] +; %shuf0 = shufflevector <4 x double> %a0, <4 x double> %a1, <4 x i32> %shuf1 = shufflevector <4 x double> %a0, <4 x double> %a1, <4 x i32> %hadd0 = shufflevector <4 x double> %shuf0, <4 x double> %shuf1, <4 x i32> @@ -1213,6 +1432,13 @@ define <4 x double> @hsub_4f64_v4f64_shuffle(<4 x double> %a0, <4 x double> %a1) ; AVX2-NEXT: vhsubpd %ymm1, %ymm0, %ymm0 ; AVX2-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,2,1,3] ; AVX2-NEXT: retq +; CHECK-LABEL: define <4 x double> @hsub_4f64_v4f64_shuffle( +; CHECK-SAME: <4 x double> [[A0:%.*]], <4 x double> [[A1:%.*]]) local_unnamed_addr #[[ATTR0]] { +; CHECK-NEXT: [[HADD0:%.*]] = shufflevector <4 x double> [[A0]], <4 x double> [[A1]], <4 x i32> +; CHECK-NEXT: [[HADD1:%.*]] = shufflevector <4 x double> [[A0]], <4 x double> [[A1]], <4 x i32> +; CHECK-NEXT: [[HADD:%.*]] = fsub <4 x double> [[HADD0]], [[HADD1]] +; CHECK-NEXT: ret <4 x double> [[HADD]] +; %shuf0 = shufflevector <4 x double> %a0, <4 x double> %a1, <4 x i32> %shuf1 = shufflevector <4 x double> %a0, <4 x double> %a1, <4 x i32> %hadd0 = shufflevector <4 x double> %shuf0, <4 x double> %shuf1, <4 x i32> @@ -1241,6 +1467,13 @@ define <8 x float> @hadd_8f32_v8f32_shuffle(<8 x float> %a0, <8 x float> %a1) { ; AVX2-NEXT: vhaddps %ymm1, %ymm0, %ymm0 ; AVX2-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,2,1,3] ; AVX2-NEXT: retq +; CHECK-LABEL: define <8 x float> @hadd_8f32_v8f32_shuffle( +; CHECK-SAME: <8 x float> [[A0:%.*]], <8 x float> [[A1:%.*]]) local_unnamed_addr #[[ATTR0]] { +; CHECK-NEXT: [[HADD0:%.*]] = shufflevector <8 x float> [[A0]], <8 x float> [[A1]], <8 x i32> +; CHECK-NEXT: [[HADD1:%.*]] = shufflevector <8 x float> [[A0]], <8 x float> [[A1]], <8 x i32> +; CHECK-NEXT: [[HADD:%.*]] = fadd <8 x float> [[HADD0]], [[HADD1]] +; CHECK-NEXT: ret <8 x float> [[HADD]] +; %shuf0 = shufflevector <8 x float> %a0, <8 x float> %a1, <8 x i32> %shuf1 = shufflevector <8 x float> %a0, <8 x float> %a1, <8 x i32> %hadd0 = shufflevector <8 x float> %shuf0, <8 x float> %shuf1, <8 x i32> @@ -1269,6 +1502,13 @@ define <8 x float> @hsub_8f32_v8f32_shuffle(<8 x float> %a0, <8 x float> %a1) { ; AVX2-NEXT: vhaddps %ymm1, %ymm0, %ymm0 ; AVX2-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,2,1,3] ; AVX2-NEXT: retq +; CHECK-LABEL: define <8 x float> @hsub_8f32_v8f32_shuffle( +; CHECK-SAME: <8 x float> [[A0:%.*]], <8 x float> [[A1:%.*]]) local_unnamed_addr #[[ATTR0]] { +; CHECK-NEXT: [[HSUB0:%.*]] = shufflevector <8 x float> [[A0]], <8 x float> [[A1]], <8 x i32> +; CHECK-NEXT: [[HSUB1:%.*]] = shufflevector <8 x float> [[A0]], <8 x float> [[A1]], <8 x i32> +; CHECK-NEXT: [[HSUB:%.*]] = fadd <8 x float> [[HSUB0]], [[HSUB1]] +; CHECK-NEXT: ret <8 x float> [[HSUB]] +; %shuf0 = shufflevector <8 x float> %a0, <8 x float> %a1, <8 x i32> %shuf1 = shufflevector <8 x float> %a0, <8 x float> %a1, <8 x i32> %hsub0 = shufflevector <8 x float> %shuf0, <8 x float> %shuf1, <8 x i32> @@ -1312,6 +1552,13 @@ define <8 x i32> @hadd_8i32_v8i32_shuffle(<8 x i32> %a0, <8 x i32> %a1) { ; AVX2-NEXT: vphaddd %ymm1, %ymm0, %ymm0 ; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3] ; AVX2-NEXT: retq +; CHECK-LABEL: define <8 x i32> @hadd_8i32_v8i32_shuffle( +; CHECK-SAME: <8 x i32> [[A0:%.*]], <8 x i32> [[A1:%.*]]) local_unnamed_addr #[[ATTR0]] { +; CHECK-NEXT: [[HADD0:%.*]] = shufflevector <8 x i32> [[A0]], <8 x i32> [[A1]], <8 x i32> +; CHECK-NEXT: [[HADD1:%.*]] = shufflevector <8 x i32> [[A0]], <8 x i32> [[A1]], <8 x i32> +; CHECK-NEXT: [[HADD:%.*]] = add <8 x i32> [[HADD0]], [[HADD1]] +; CHECK-NEXT: ret <8 x i32> [[HADD]] +; %shuf0 = shufflevector <8 x i32> %a0, <8 x i32> %a1, <8 x i32> %shuf1 = shufflevector <8 x i32> %a0, <8 x i32> %a1, <8 x i32> %hadd0 = shufflevector <8 x i32> %shuf0, <8 x i32> %shuf1, <8 x i32> @@ -1356,6 +1603,13 @@ define <8 x i32> @hsub_8i32_v8i32_shuffle(<8 x i32> %a0, <8 x i32> %a1) { ; AVX2-NEXT: vphsubd %ymm1, %ymm0, %ymm0 ; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3] ; AVX2-NEXT: retq +; CHECK-LABEL: define <8 x i32> @hsub_8i32_v8i32_shuffle( +; CHECK-SAME: <8 x i32> [[A0:%.*]], <8 x i32> [[A1:%.*]]) local_unnamed_addr #[[ATTR0]] { +; CHECK-NEXT: [[HADD0:%.*]] = shufflevector <8 x i32> [[A0]], <8 x i32> [[A1]], <8 x i32> +; CHECK-NEXT: [[HADD1:%.*]] = shufflevector <8 x i32> [[A0]], <8 x i32> [[A1]], <8 x i32> +; CHECK-NEXT: [[HADD:%.*]] = sub <8 x i32> [[HADD0]], [[HADD1]] +; CHECK-NEXT: ret <8 x i32> [[HADD]] +; %shuf0 = shufflevector <8 x i32> %a0, <8 x i32> %a1, <8 x i32> %shuf1 = shufflevector <8 x i32> %a0, <8 x i32> %a1, <8 x i32> %hadd0 = shufflevector <8 x i32> %shuf0, <8 x i32> %shuf1, <8 x i32> @@ -1413,6 +1667,13 @@ define <16 x i16> @hadd_16i16_16i16_shuffle(<16 x i16> %a0, <16 x i16> %a1) { ; AVX2-NEXT: vphaddw %ymm1, %ymm0, %ymm0 ; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3] ; AVX2-NEXT: retq +; CHECK-LABEL: define <16 x i16> @hadd_16i16_16i16_shuffle( +; CHECK-SAME: <16 x i16> [[A0:%.*]], <16 x i16> [[A1:%.*]]) local_unnamed_addr #[[ATTR0]] { +; CHECK-NEXT: [[HADD0:%.*]] = shufflevector <16 x i16> [[A0]], <16 x i16> [[A1]], <16 x i32> +; CHECK-NEXT: [[HADD1:%.*]] = shufflevector <16 x i16> [[A0]], <16 x i16> [[A1]], <16 x i32> +; CHECK-NEXT: [[HADD:%.*]] = add <16 x i16> [[HADD0]], [[HADD1]] +; CHECK-NEXT: ret <16 x i16> [[HADD]] +; %shuf0 = shufflevector <16 x i16> %a0, <16 x i16> %a1, <16 x i32> %shuf1 = shufflevector <16 x i16> %a0, <16 x i16> %a1, <16 x i32> %hadd0 = shufflevector <16 x i16> %shuf0, <16 x i16> %shuf1, <16 x i32> diff --git a/llvm/test/CodeGen/X86/haddsub-undef.ll b/llvm/test/Transforms/PhaseOrdering/X86/haddsub-undef.ll similarity index 57% rename from llvm/test/CodeGen/X86/haddsub-undef.ll rename to llvm/test/Transforms/PhaseOrdering/X86/haddsub-undef.ll index 94fa81742ba71..678b0a10717ac 100644 --- a/llvm/test/CodeGen/X86/haddsub-undef.ll +++ b/llvm/test/Transforms/PhaseOrdering/X86/haddsub-undef.ll @@ -1,12 +1,5 @@ -; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+sse3 | FileCheck %s --check-prefixes=SSE,SSE-SLOW -; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+sse3,fast-hops | FileCheck %s --check-prefixes=SSE,SSE-FAST -; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx | FileCheck %s --check-prefixes=AVX,AVX-SLOW,AVX1-SLOW -; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx,fast-hops | FileCheck %s --check-prefixes=AVX,AVX-FAST,AVX1-FAST -; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx512f | FileCheck %s --check-prefixes=AVX,AVX-SLOW,AVX512,AVX512-SLOW -; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx512f,fast-hops | FileCheck %s --check-prefixes=AVX,AVX-FAST,AVX512,AVX512-FAST - -; Verify that we correctly fold horizontal binop even in the presence of UNDEFs. +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 +; RUN: opt -passes="default" -S %s | FileCheck %s define <4 x float> @test1_undef(<4 x float> %a, <4 x float> %b) { ; SSE-LABEL: test1_undef: @@ -18,6 +11,19 @@ define <4 x float> @test1_undef(<4 x float> %a, <4 x float> %b) { ; AVX: # %bb.0: ; AVX-NEXT: vhaddps %xmm1, %xmm0, %xmm0 ; AVX-NEXT: retq +; CHECK-LABEL: define <4 x float> @test1_undef( +; CHECK-SAME: <4 x float> [[A:%.*]], <4 x float> [[B:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] { +; CHECK-NEXT: [[SHIFT:%.*]] = shufflevector <4 x float> [[A]], <4 x float> poison, <4 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = fadd <4 x float> [[A]], [[SHIFT]] +; CHECK-NEXT: [[VECINIT3:%.*]] = insertelement <4 x float> [[TMP1]], float undef, i64 2 +; CHECK-NEXT: [[SHIFT1:%.*]] = shufflevector <4 x float> [[A]], <4 x float> poison, <4 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = fadd <4 x float> [[A]], [[SHIFT1]] +; CHECK-NEXT: [[VECINIT5:%.*]] = shufflevector <4 x float> [[VECINIT3]], <4 x float> [[TMP2]], <4 x i32> +; CHECK-NEXT: [[SHIFT2:%.*]] = shufflevector <4 x float> [[B]], <4 x float> poison, <4 x i32> +; CHECK-NEXT: [[TMP3:%.*]] = fadd <4 x float> [[SHIFT2]], [[B]] +; CHECK-NEXT: [[VECINIT13:%.*]] = shufflevector <4 x float> [[VECINIT5]], <4 x float> [[TMP3]], <4 x i32> +; CHECK-NEXT: ret <4 x float> [[VECINIT13]] +; %vecext = extractelement <4 x float> %a, i32 0 %vecext1 = extractelement <4 x float> %a, i32 1 %add = fadd float %vecext, %vecext1 @@ -43,6 +49,19 @@ define <4 x float> @test2_undef(<4 x float> %a, <4 x float> %b) { ; AVX: # %bb.0: ; AVX-NEXT: vhaddps %xmm1, %xmm0, %xmm0 ; AVX-NEXT: retq +; CHECK-LABEL: define <4 x float> @test2_undef( +; CHECK-SAME: <4 x float> [[A:%.*]], <4 x float> [[B:%.*]]) local_unnamed_addr #[[ATTR0]] { +; CHECK-NEXT: [[SHIFT:%.*]] = shufflevector <4 x float> [[A]], <4 x float> poison, <4 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = fadd <4 x float> [[A]], [[SHIFT]] +; CHECK-NEXT: [[VECINIT3:%.*]] = insertelement <4 x float> [[TMP1]], float undef, i64 1 +; CHECK-NEXT: [[SHIFT1:%.*]] = shufflevector <4 x float> [[B]], <4 x float> poison, <4 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = fadd <4 x float> [[B]], [[SHIFT1]] +; CHECK-NEXT: [[VECINIT9:%.*]] = shufflevector <4 x float> [[VECINIT3]], <4 x float> [[TMP2]], <4 x i32> +; CHECK-NEXT: [[SHIFT2:%.*]] = shufflevector <4 x float> [[B]], <4 x float> poison, <4 x i32> +; CHECK-NEXT: [[TMP3:%.*]] = fadd <4 x float> [[SHIFT2]], [[B]] +; CHECK-NEXT: [[VECINIT13:%.*]] = shufflevector <4 x float> [[VECINIT9]], <4 x float> [[TMP3]], <4 x i32> +; CHECK-NEXT: ret <4 x float> [[VECINIT13]] +; %vecext = extractelement <4 x float> %a, i32 0 %vecext1 = extractelement <4 x float> %a, i32 1 %add = fadd float %vecext, %vecext1 @@ -68,6 +87,19 @@ define <4 x float> @test3_undef(<4 x float> %a, <4 x float> %b) { ; AVX: # %bb.0: ; AVX-NEXT: vhaddps %xmm1, %xmm0, %xmm0 ; AVX-NEXT: retq +; CHECK-LABEL: define <4 x float> @test3_undef( +; CHECK-SAME: <4 x float> [[A:%.*]], <4 x float> [[B:%.*]]) local_unnamed_addr #[[ATTR0]] { +; CHECK-NEXT: [[SHIFT:%.*]] = shufflevector <4 x float> [[A]], <4 x float> poison, <4 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = fadd <4 x float> [[A]], [[SHIFT]] +; CHECK-NEXT: [[VECINIT3:%.*]] = insertelement <4 x float> [[TMP1]], float undef, i64 3 +; CHECK-NEXT: [[SHIFT1:%.*]] = shufflevector <4 x float> [[A]], <4 x float> poison, <4 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = fadd <4 x float> [[A]], [[SHIFT1]] +; CHECK-NEXT: [[VECINIT5:%.*]] = shufflevector <4 x float> [[VECINIT3]], <4 x float> [[TMP2]], <4 x i32> +; CHECK-NEXT: [[SHIFT2:%.*]] = shufflevector <4 x float> [[B]], <4 x float> poison, <4 x i32> +; CHECK-NEXT: [[TMP3:%.*]] = fadd <4 x float> [[B]], [[SHIFT2]] +; CHECK-NEXT: [[VECINIT9:%.*]] = shufflevector <4 x float> [[VECINIT5]], <4 x float> [[TMP3]], <4 x i32> +; CHECK-NEXT: ret <4 x float> [[VECINIT9]] +; %vecext = extractelement <4 x float> %a, i32 0 %vecext1 = extractelement <4 x float> %a, i32 1 %add = fadd float %vecext, %vecext1 @@ -105,6 +137,13 @@ define <4 x float> @test4_undef(<4 x float> %a, <4 x float> %b) { ; AVX-FAST: # %bb.0: ; AVX-FAST-NEXT: vhaddps %xmm0, %xmm0, %xmm0 ; AVX-FAST-NEXT: retq +; CHECK-LABEL: define <4 x float> @test4_undef( +; CHECK-SAME: <4 x float> [[A:%.*]], <4 x float> [[B:%.*]]) local_unnamed_addr #[[ATTR0]] { +; CHECK-NEXT: [[SHIFT:%.*]] = shufflevector <4 x float> [[A]], <4 x float> poison, <4 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = fadd <4 x float> [[A]], [[SHIFT]] +; CHECK-NEXT: [[VECINIT:%.*]] = shufflevector <4 x float> [[TMP1]], <4 x float> , <4 x i32> +; CHECK-NEXT: ret <4 x float> [[VECINIT]] +; %vecext = extractelement <4 x float> %a, i32 0 %vecext1 = extractelement <4 x float> %a, i32 1 %add = fadd float %vecext, %vecext1 @@ -135,6 +174,13 @@ define <2 x double> @test5_undef(<2 x double> %a, <2 x double> %b) { ; AVX-FAST: # %bb.0: ; AVX-FAST-NEXT: vhaddpd %xmm0, %xmm0, %xmm0 ; AVX-FAST-NEXT: retq +; CHECK-LABEL: define <2 x double> @test5_undef( +; CHECK-SAME: <2 x double> [[A:%.*]], <2 x double> [[B:%.*]]) local_unnamed_addr #[[ATTR0]] { +; CHECK-NEXT: [[SHIFT:%.*]] = shufflevector <2 x double> [[A]], <2 x double> poison, <2 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = fadd <2 x double> [[A]], [[SHIFT]] +; CHECK-NEXT: [[VECINIT1:%.*]] = insertelement <2 x double> [[TMP1]], double undef, i64 1 +; CHECK-NEXT: ret <2 x double> [[VECINIT1]] +; %vecext = extractelement <2 x double> %a, i32 0 %vecext1 = extractelement <2 x double> %a, i32 1 %add = fadd double %vecext, %vecext1 @@ -152,6 +198,16 @@ define <4 x float> @test6_undef(<4 x float> %a, <4 x float> %b) { ; AVX: # %bb.0: ; AVX-NEXT: vhaddps %xmm0, %xmm0, %xmm0 ; AVX-NEXT: retq +; CHECK-LABEL: define <4 x float> @test6_undef( +; CHECK-SAME: <4 x float> [[A:%.*]], <4 x float> [[B:%.*]]) local_unnamed_addr #[[ATTR0]] { +; CHECK-NEXT: [[SHIFT:%.*]] = shufflevector <4 x float> [[A]], <4 x float> poison, <4 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = fadd <4 x float> [[A]], [[SHIFT]] +; CHECK-NEXT: [[VECINIT:%.*]] = shufflevector <4 x float> [[TMP1]], <4 x float> , <4 x i32> +; CHECK-NEXT: [[SHIFT1:%.*]] = shufflevector <4 x float> [[A]], <4 x float> poison, <4 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = fadd <4 x float> [[A]], [[SHIFT1]] +; CHECK-NEXT: [[VECINIT5:%.*]] = shufflevector <4 x float> [[VECINIT]], <4 x float> [[TMP2]], <4 x i32> +; CHECK-NEXT: ret <4 x float> [[VECINIT5]] +; %vecext = extractelement <4 x float> %a, i32 0 %vecext1 = extractelement <4 x float> %a, i32 1 %add = fadd float %vecext, %vecext1 @@ -173,6 +229,16 @@ define <4 x float> @test7_undef(<4 x float> %a, <4 x float> %b) { ; AVX: # %bb.0: ; AVX-NEXT: vhaddps %xmm1, %xmm0, %xmm0 ; AVX-NEXT: retq +; CHECK-LABEL: define <4 x float> @test7_undef( +; CHECK-SAME: <4 x float> [[A:%.*]], <4 x float> [[B:%.*]]) local_unnamed_addr #[[ATTR0]] { +; CHECK-NEXT: [[SHIFT:%.*]] = shufflevector <4 x float> [[B]], <4 x float> poison, <4 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = fadd <4 x float> [[B]], [[SHIFT]] +; CHECK-NEXT: [[VECINIT:%.*]] = shufflevector <4 x float> [[TMP1]], <4 x float> , <4 x i32> +; CHECK-NEXT: [[SHIFT1:%.*]] = shufflevector <4 x float> [[B]], <4 x float> poison, <4 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = fadd <4 x float> [[SHIFT1]], [[B]] +; CHECK-NEXT: [[VECINIT5:%.*]] = shufflevector <4 x float> [[VECINIT]], <4 x float> [[TMP2]], <4 x i32> +; CHECK-NEXT: ret <4 x float> [[VECINIT5]] +; %vecext = extractelement <4 x float> %b, i32 0 %vecext1 = extractelement <4 x float> %b, i32 1 %add = fadd float %vecext, %vecext1 @@ -218,6 +284,16 @@ define <4 x float> @test8_undef(<4 x float> %a, <4 x float> %b) { ; AVX-FAST-NEXT: vhaddps %xmm0, %xmm0, %xmm0 ; AVX-FAST-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,1,1,3] ; AVX-FAST-NEXT: retq +; CHECK-LABEL: define <4 x float> @test8_undef( +; CHECK-SAME: <4 x float> [[A:%.*]], <4 x float> [[B:%.*]]) local_unnamed_addr #[[ATTR0]] { +; CHECK-NEXT: [[SHIFT:%.*]] = shufflevector <4 x float> [[A]], <4 x float> poison, <4 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = fadd <4 x float> [[A]], [[SHIFT]] +; CHECK-NEXT: [[VECINIT:%.*]] = shufflevector <4 x float> [[TMP1]], <4 x float> , <4 x i32> +; CHECK-NEXT: [[SHIFT1:%.*]] = shufflevector <4 x float> [[A]], <4 x float> poison, <4 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = fadd <4 x float> [[A]], [[SHIFT1]] +; CHECK-NEXT: [[VECINIT5:%.*]] = shufflevector <4 x float> [[VECINIT]], <4 x float> [[TMP2]], <4 x i32> +; CHECK-NEXT: ret <4 x float> [[VECINIT5]] +; %vecext = extractelement <4 x float> %a, i32 0 %vecext1 = extractelement <4 x float> %a, i32 1 %add = fadd float %vecext, %vecext1 @@ -239,6 +315,16 @@ define <4 x float> @test9_undef(<4 x float> %a, <4 x float> %b) { ; AVX: # %bb.0: ; AVX-NEXT: vhaddps %xmm1, %xmm0, %xmm0 ; AVX-NEXT: retq +; CHECK-LABEL: define <4 x float> @test9_undef( +; CHECK-SAME: <4 x float> [[A:%.*]], <4 x float> [[B:%.*]]) local_unnamed_addr #[[ATTR0]] { +; CHECK-NEXT: [[SHIFT:%.*]] = shufflevector <4 x float> [[A]], <4 x float> poison, <4 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = fadd <4 x float> [[A]], [[SHIFT]] +; CHECK-NEXT: [[VECINIT:%.*]] = shufflevector <4 x float> [[TMP1]], <4 x float> , <4 x i32> +; CHECK-NEXT: [[SHIFT1:%.*]] = shufflevector <4 x float> [[B]], <4 x float> poison, <4 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = fadd <4 x float> [[SHIFT1]], [[B]] +; CHECK-NEXT: [[VECINIT5:%.*]] = shufflevector <4 x float> [[VECINIT]], <4 x float> [[TMP2]], <4 x i32> +; CHECK-NEXT: ret <4 x float> [[VECINIT5]] +; %vecext = extractelement <4 x float> %a, i32 0 %vecext1 = extractelement <4 x float> %a, i32 1 %add = fadd float %vecext, %vecext1 @@ -260,6 +346,16 @@ define <8 x float> @test10_undef(<8 x float> %a, <8 x float> %b) { ; AVX: # %bb.0: ; AVX-NEXT: vhaddps %xmm1, %xmm0, %xmm0 ; AVX-NEXT: retq +; CHECK-LABEL: define <8 x float> @test10_undef( +; CHECK-SAME: <8 x float> [[A:%.*]], <8 x float> [[B:%.*]]) local_unnamed_addr #[[ATTR0]] { +; CHECK-NEXT: [[SHIFT:%.*]] = shufflevector <8 x float> [[A]], <8 x float> poison, <8 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = fadd <8 x float> [[A]], [[SHIFT]] +; CHECK-NEXT: [[VECINIT:%.*]] = shufflevector <8 x float> [[TMP1]], <8 x float> , <8 x i32> +; CHECK-NEXT: [[SHIFT1:%.*]] = shufflevector <8 x float> [[B]], <8 x float> poison, <8 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = fadd <8 x float> [[SHIFT1]], [[B]] +; CHECK-NEXT: [[VECINIT5:%.*]] = shufflevector <8 x float> [[VECINIT]], <8 x float> [[TMP2]], <8 x i32> +; CHECK-NEXT: ret <8 x float> [[VECINIT5]] +; %vecext = extractelement <8 x float> %a, i32 0 %vecext1 = extractelement <8 x float> %a, i32 1 %add = fadd float %vecext, %vecext1 @@ -292,6 +388,16 @@ define <8 x float> @test11_undef(<8 x float> %a, <8 x float> %b) { ; AVX: # %bb.0: ; AVX-NEXT: vhaddps %ymm1, %ymm0, %ymm0 ; AVX-NEXT: retq +; CHECK-LABEL: define <8 x float> @test11_undef( +; CHECK-SAME: <8 x float> [[A:%.*]], <8 x float> [[B:%.*]]) local_unnamed_addr #[[ATTR0]] { +; CHECK-NEXT: [[SHIFT:%.*]] = shufflevector <8 x float> [[A]], <8 x float> poison, <8 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = fadd <8 x float> [[A]], [[SHIFT]] +; CHECK-NEXT: [[VECINIT:%.*]] = shufflevector <8 x float> [[TMP1]], <8 x float> , <8 x i32> +; CHECK-NEXT: [[SHIFT1:%.*]] = shufflevector <8 x float> [[B]], <8 x float> poison, <8 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = fadd <8 x float> [[B]], [[SHIFT1]] +; CHECK-NEXT: [[VECINIT5:%.*]] = shufflevector <8 x float> [[VECINIT]], <8 x float> [[TMP2]], <8 x i32> +; CHECK-NEXT: ret <8 x float> [[VECINIT5]] +; %vecext = extractelement <8 x float> %a, i32 0 %vecext1 = extractelement <8 x float> %a, i32 1 %add = fadd float %vecext, %vecext1 @@ -313,6 +419,16 @@ define <8 x float> @test12_undef(<8 x float> %a, <8 x float> %b) { ; AVX: # %bb.0: ; AVX-NEXT: vhaddps %xmm0, %xmm0, %xmm0 ; AVX-NEXT: retq +; CHECK-LABEL: define <8 x float> @test12_undef( +; CHECK-SAME: <8 x float> [[A:%.*]], <8 x float> [[B:%.*]]) local_unnamed_addr #[[ATTR0]] { +; CHECK-NEXT: [[SHIFT:%.*]] = shufflevector <8 x float> [[A]], <8 x float> poison, <8 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = fadd <8 x float> [[A]], [[SHIFT]] +; CHECK-NEXT: [[VECINIT:%.*]] = shufflevector <8 x float> [[TMP1]], <8 x float> , <8 x i32> +; CHECK-NEXT: [[SHIFT1:%.*]] = shufflevector <8 x float> [[A]], <8 x float> poison, <8 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = fadd <8 x float> [[A]], [[SHIFT1]] +; CHECK-NEXT: [[VECINIT5:%.*]] = shufflevector <8 x float> [[VECINIT]], <8 x float> [[TMP2]], <8 x i32> +; CHECK-NEXT: ret <8 x float> [[VECINIT5]] +; %vecext = extractelement <8 x float> %a, i32 0 %vecext1 = extractelement <8 x float> %a, i32 1 %add = fadd float %vecext, %vecext1 @@ -335,6 +451,14 @@ define <8 x float> @test13_undef(<8 x float> %a, <8 x float> %b) { ; AVX-NEXT: vextractf128 $1, %ymm0, %xmm1 ; AVX-NEXT: vhaddps %xmm1, %xmm0, %xmm0 ; AVX-NEXT: retq +; CHECK-LABEL: define <8 x float> @test13_undef( +; CHECK-SAME: <8 x float> [[A:%.*]], <8 x float> [[B:%.*]]) local_unnamed_addr #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x float> [[A]], <8 x float> poison, <4 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <8 x float> [[A]], <8 x float> poison, <4 x i32> +; CHECK-NEXT: [[TMP3:%.*]] = fadd <4 x float> [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <4 x float> [[TMP3]], <4 x float> undef, <8 x i32> +; CHECK-NEXT: ret <8 x float> [[TMP4]] +; %vecext = extractelement <8 x float> %a, i32 0 %vecext1 = extractelement <8 x float> %a, i32 1 %add1 = fadd float %vecext, %vecext1 @@ -389,6 +513,14 @@ define <16 x float> @test13_v16f32_undef(<16 x float> %a, <16 x float> %b) { ; AVX512-SLOW-NEXT: vaddss %xmm0, %xmm2, %xmm0 ; AVX512-SLOW-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[0] ; AVX512-SLOW-NEXT: retq +; CHECK-LABEL: define <16 x float> @test13_v16f32_undef( +; CHECK-SAME: <16 x float> [[A:%.*]], <16 x float> [[B:%.*]]) local_unnamed_addr #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <16 x float> [[A]], <16 x float> poison, <4 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <16 x float> [[A]], <16 x float> poison, <4 x i32> +; CHECK-NEXT: [[TMP3:%.*]] = fadd <4 x float> [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <4 x float> [[TMP3]], <4 x float> undef, <16 x i32> +; CHECK-NEXT: ret <16 x float> [[TMP4]] +; %vecext = extractelement <16 x float> %a, i32 0 %vecext1 = extractelement <16 x float> %a, i32 1 %add1 = fadd float %vecext, %vecext1 @@ -429,6 +561,12 @@ define <2 x double> @add_pd_003(<2 x double> %x) { ; AVX-FAST: # %bb.0: ; AVX-FAST-NEXT: vhaddpd %xmm0, %xmm0, %xmm0 ; AVX-FAST-NEXT: retq +; CHECK-LABEL: define <2 x double> @add_pd_003( +; CHECK-SAME: <2 x double> [[X:%.*]]) local_unnamed_addr #[[ATTR0]] { +; CHECK-NEXT: [[L:%.*]] = shufflevector <2 x double> [[X]], <2 x double> poison, <2 x i32> +; CHECK-NEXT: [[ADD:%.*]] = fadd <2 x double> [[X]], [[L]] +; CHECK-NEXT: ret <2 x double> [[ADD]] +; %l = shufflevector <2 x double> %x, <2 x double> undef, <2 x i32> %add = fadd <2 x double> %l, %x ret <2 x double> %add @@ -459,6 +597,12 @@ define <2 x double> @add_pd_003_2(<2 x double> %x) { ; AVX-FAST: # %bb.0: ; AVX-FAST-NEXT: vhaddpd %xmm0, %xmm0, %xmm0 ; AVX-FAST-NEXT: retq +; CHECK-LABEL: define <2 x double> @add_pd_003_2( +; CHECK-SAME: <2 x double> [[X:%.*]]) local_unnamed_addr #[[ATTR0]] { +; CHECK-NEXT: [[L:%.*]] = shufflevector <2 x double> [[X]], <2 x double> poison, <2 x i32> +; CHECK-NEXT: [[ADD:%.*]] = fadd <2 x double> [[X]], [[L]] +; CHECK-NEXT: ret <2 x double> [[ADD]] +; %l = shufflevector <2 x double> %x, <2 x double> undef, <2 x i32> %add = fadd <2 x double> %l, %x ret <2 x double> %add @@ -481,6 +625,12 @@ define <2 x double> @add_pd_010(<2 x double> %x) { ; AVX-FAST: # %bb.0: ; AVX-FAST-NEXT: vhaddpd %xmm0, %xmm0, %xmm0 ; AVX-FAST-NEXT: retq +; CHECK-LABEL: define <2 x double> @add_pd_010( +; CHECK-SAME: <2 x double> [[X:%.*]]) local_unnamed_addr #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <2 x double> [[X]], <2 x double> poison, <2 x i32> +; CHECK-NEXT: [[SHUFFLE2:%.*]] = fadd <2 x double> [[TMP1]], [[X]] +; CHECK-NEXT: ret <2 x double> [[SHUFFLE2]] +; %l = shufflevector <2 x double> %x, <2 x double> undef, <2 x i32> %add = fadd <2 x double> %l, %x %shuffle2 = shufflevector <2 x double> %add, <2 x double> undef, <2 x i32> @@ -497,6 +647,13 @@ define <4 x float> @add_ps_007(<4 x float> %x) { ; AVX: # %bb.0: ; AVX-NEXT: vhaddps %xmm0, %xmm0, %xmm0 ; AVX-NEXT: retq +; CHECK-LABEL: define <4 x float> @add_ps_007( +; CHECK-SAME: <4 x float> [[X:%.*]]) local_unnamed_addr #[[ATTR0]] { +; CHECK-NEXT: [[L:%.*]] = shufflevector <4 x float> [[X]], <4 x float> poison, <4 x i32> +; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x float> [[X]], <4 x float> poison, <4 x i32> +; CHECK-NEXT: [[ADD:%.*]] = fadd <4 x float> [[L]], [[R]] +; CHECK-NEXT: ret <4 x float> [[ADD]] +; %l = shufflevector <4 x float> %x, <4 x float> undef, <4 x i32> %r = shufflevector <4 x float> %x, <4 x float> undef, <4 x i32> %add = fadd <4 x float> %l, %r @@ -530,6 +687,13 @@ define <4 x float> @add_ps_030(<4 x float> %x) { ; AVX-FAST-NEXT: vhaddps %xmm0, %xmm0, %xmm0 ; AVX-FAST-NEXT: vshufps {{.*#+}} xmm0 = xmm0[1,0,2,3] ; AVX-FAST-NEXT: retq +; CHECK-LABEL: define <4 x float> @add_ps_030( +; CHECK-SAME: <4 x float> [[X:%.*]]) local_unnamed_addr #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x float> [[X]], <4 x float> poison, <4 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x float> [[X]], <4 x float> poison, <4 x i32> +; CHECK-NEXT: [[SHUFFLE2:%.*]] = fadd <4 x float> [[TMP1]], [[TMP2]] +; CHECK-NEXT: ret <4 x float> [[SHUFFLE2]] +; %l = shufflevector <4 x float> %x, <4 x float> undef, <4 x i32> %r = shufflevector <4 x float> %x, <4 x float> undef, <4 x i32> %add = fadd <4 x float> %l, %r @@ -547,6 +711,13 @@ define <4 x float> @add_ps_007_2(<4 x float> %x) { ; AVX: # %bb.0: ; AVX-NEXT: vhaddps %xmm0, %xmm0, %xmm0 ; AVX-NEXT: retq +; CHECK-LABEL: define <4 x float> @add_ps_007_2( +; CHECK-SAME: <4 x float> [[X:%.*]]) local_unnamed_addr #[[ATTR0]] { +; CHECK-NEXT: [[L:%.*]] = shufflevector <4 x float> [[X]], <4 x float> poison, <4 x i32> +; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x float> [[X]], <4 x float> poison, <4 x i32> +; CHECK-NEXT: [[ADD:%.*]] = fadd <4 x float> [[L]], [[R]] +; CHECK-NEXT: ret <4 x float> [[ADD]] +; %l = shufflevector <4 x float> %x, <4 x float> undef, <4 x i32> %r = shufflevector <4 x float> %x, <4 x float> undef, <4 x i32> %add = fadd <4 x float> %l, %r @@ -575,6 +746,12 @@ define <4 x float> @add_ps_008(<4 x float> %x) { ; AVX-FAST: # %bb.0: ; AVX-FAST-NEXT: vhaddps %xmm0, %xmm0, %xmm0 ; AVX-FAST-NEXT: retq +; CHECK-LABEL: define <4 x float> @add_ps_008( +; CHECK-SAME: <4 x float> [[X:%.*]]) local_unnamed_addr #[[ATTR0]] { +; CHECK-NEXT: [[L:%.*]] = shufflevector <4 x float> [[X]], <4 x float> poison, <4 x i32> +; CHECK-NEXT: [[ADD:%.*]] = fadd <4 x float> [[X]], [[L]] +; CHECK-NEXT: ret <4 x float> [[ADD]] +; %l = shufflevector <4 x float> %x, <4 x float> undef, <4 x i32> %add = fadd <4 x float> %l, %x ret <4 x float> %add @@ -593,6 +770,13 @@ define <4 x float> @add_ps_016(<4 x float> %0, <4 x float> %1) { ; AVX-NEXT: vhaddps %xmm0, %xmm1, %xmm0 ; AVX-NEXT: vshufps {{.*#+}} xmm0 = xmm0[1,0,3,3] ; AVX-NEXT: retq +; CHECK-LABEL: define <4 x float> @add_ps_016( +; CHECK-SAME: <4 x float> [[TMP0:%.*]], <4 x float> [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x float> [[TMP1]], <4 x float> [[TMP0]], <4 x i32> +; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <4 x float> [[TMP1]], <4 x float> [[TMP0]], <4 x i32> +; CHECK-NEXT: [[TMP5:%.*]] = fadd <4 x float> [[TMP3]], [[TMP4]] +; CHECK-NEXT: ret <4 x float> [[TMP5]] +; %3 = shufflevector <4 x float> %1, <4 x float> %0, <2 x i32> %4 = shufflevector <4 x float> %1, <4 x float> %0, <2 x i32> %5 = fadd <2 x float> %3, %4 @@ -630,6 +814,13 @@ define <4 x float> @add_ps_017(<4 x float> %x) { ; AVX-FAST-NEXT: vhaddps %xmm0, %xmm0, %xmm0 ; AVX-FAST-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3] ; AVX-FAST-NEXT: retq +; CHECK-LABEL: define <4 x float> @add_ps_017( +; CHECK-SAME: <4 x float> [[X:%.*]]) local_unnamed_addr #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x float> [[X]], <4 x float> poison, <4 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x float> [[X]], <4 x float> poison, <4 x i32> +; CHECK-NEXT: [[SHUFFLE2:%.*]] = fadd <4 x float> [[TMP1]], [[TMP2]] +; CHECK-NEXT: ret <4 x float> [[SHUFFLE2]] +; %l = shufflevector <4 x float> %x, <4 x float> undef, <4 x i32> %add = fadd <4 x float> %l, %x %shuffle2 = shufflevector <4 x float> %add, <4 x float> undef, <4 x i32> @@ -660,6 +851,12 @@ define <4 x float> @add_ps_018(<4 x float> %x) { ; AVX512-NEXT: vhaddps %xmm0, %xmm0, %xmm0 ; AVX512-NEXT: vbroadcastss %xmm0, %xmm0 ; AVX512-NEXT: retq +; CHECK-LABEL: define <4 x float> @add_ps_018( +; CHECK-SAME: <4 x float> [[X:%.*]]) local_unnamed_addr #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x float> [[X]], <4 x float> poison, <4 x i32> +; CHECK-NEXT: [[SHUFFLE2:%.*]] = fadd <4 x float> [[TMP1]], [[X]] +; CHECK-NEXT: ret <4 x float> [[SHUFFLE2]] +; %l = shufflevector <4 x float> %x, <4 x float> undef, <4 x i32> %r = shufflevector <4 x float> %x, <4 x float> undef, <4 x i32> %add = fadd <4 x float> %l, %r @@ -704,6 +901,13 @@ define <4 x double> @add_pd_011(<4 x double> %0, <4 x double> %1) { ; AVX512-NEXT: vhaddpd %xmm1, %xmm0, %xmm0 ; AVX512-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,1,1,3] ; AVX512-NEXT: retq +; CHECK-LABEL: define <4 x double> @add_pd_011( +; CHECK-SAME: <4 x double> [[TMP0:%.*]], <4 x double> [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x double> [[TMP0]], <4 x double> [[TMP1]], <4 x i32> +; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <4 x double> [[TMP0]], <4 x double> [[TMP1]], <4 x i32> +; CHECK-NEXT: [[TMP5:%.*]] = fadd <4 x double> [[TMP3]], [[TMP4]] +; CHECK-NEXT: ret <4 x double> [[TMP5]] +; %3 = shufflevector <4 x double> %0, <4 x double> %1, <4 x i32> %4 = shufflevector <4 x double> %0, <4 x double> %1, <4 x i32> %5 = fadd <4 x double> %3, %4 @@ -722,6 +926,18 @@ define <4 x float> @v8f32_inputs_v4f32_output_0101(<8 x float> %a, <8 x float> % ; AVX-NEXT: vhaddps %xmm1, %xmm0, %xmm0 ; AVX-NEXT: vzeroupper ; AVX-NEXT: retq +; CHECK-LABEL: define <4 x float> @v8f32_inputs_v4f32_output_0101( +; CHECK-SAME: <8 x float> [[A:%.*]], <8 x float> [[B:%.*]]) local_unnamed_addr #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x float> [[A]], <8 x float> poison, <4 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <8 x float> [[A]], <8 x float> poison, <4 x i32> +; CHECK-NEXT: [[TMP3:%.*]] = fadd <4 x float> [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[R0:%.*]] = shufflevector <4 x float> [[TMP3]], <4 x float> , <4 x i32> +; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <8 x float> [[B]], <8 x float> poison, <4 x i32> +; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <8 x float> [[B]], <8 x float> poison, <4 x i32> +; CHECK-NEXT: [[TMP6:%.*]] = fadd <4 x float> [[TMP4]], [[TMP5]] +; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x float> [[R0]], <4 x float> [[TMP6]], <4 x i32> +; CHECK-NEXT: ret <4 x float> [[R]] +; %a0 = extractelement <8 x float> %a, i32 0 %a1 = extractelement <8 x float> %a, i32 1 %b0 = extractelement <8 x float> %b, i32 0 @@ -744,6 +960,17 @@ define <4 x float> @v8f32_input0_v4f32_output_0123(<8 x float> %a, <4 x float> % ; AVX-NEXT: vhaddps %xmm1, %xmm0, %xmm0 ; AVX-NEXT: vzeroupper ; AVX-NEXT: retq +; CHECK-LABEL: define <4 x float> @v8f32_input0_v4f32_output_0123( +; CHECK-SAME: <8 x float> [[A:%.*]], <4 x float> [[B:%.*]]) local_unnamed_addr #[[ATTR0]] { +; CHECK-NEXT: [[SHIFT1:%.*]] = shufflevector <4 x float> [[B]], <4 x float> poison, <4 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = fadd <4 x float> [[SHIFT1]], [[B]] +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <8 x float> [[A]], <8 x float> poison, <4 x i32> +; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <8 x float> [[A]], <8 x float> poison, <4 x i32> +; CHECK-NEXT: [[TMP4:%.*]] = fadd <4 x float> [[TMP2]], [[TMP3]] +; CHECK-NEXT: [[R0:%.*]] = shufflevector <4 x float> [[TMP4]], <4 x float> , <4 x i32> +; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x float> [[R0]], <4 x float> [[TMP1]], <4 x i32> +; CHECK-NEXT: ret <4 x float> [[R]] +; %a0 = extractelement <8 x float> %a, i32 0 %a1 = extractelement <8 x float> %a, i32 1 %b2 = extractelement <4 x float> %b, i32 2 @@ -766,6 +993,17 @@ define <4 x float> @v8f32_input1_v4f32_output_2301(<4 x float> %a, <8 x float> % ; AVX-NEXT: vhaddps %xmm1, %xmm0, %xmm0 ; AVX-NEXT: vzeroupper ; AVX-NEXT: retq +; CHECK-LABEL: define <4 x float> @v8f32_input1_v4f32_output_2301( +; CHECK-SAME: <4 x float> [[A:%.*]], <8 x float> [[B:%.*]]) local_unnamed_addr #[[ATTR0]] { +; CHECK-NEXT: [[SHIFT:%.*]] = shufflevector <4 x float> [[A]], <4 x float> poison, <4 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = fadd <4 x float> [[A]], [[SHIFT]] +; CHECK-NEXT: [[R1:%.*]] = shufflevector <4 x float> [[TMP1]], <4 x float> , <4 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <8 x float> [[B]], <8 x float> poison, <4 x i32> +; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <8 x float> [[B]], <8 x float> poison, <4 x i32> +; CHECK-NEXT: [[TMP4:%.*]] = fadd <4 x float> [[TMP2]], [[TMP3]] +; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x float> [[R1]], <4 x float> [[TMP4]], <4 x i32> +; CHECK-NEXT: ret <4 x float> [[R]] +; %a2 = extractelement <4 x float> %a, i32 2 %a3 = extractelement <4 x float> %a, i32 3 %b0 = extractelement <8 x float> %b, i32 0 @@ -788,6 +1026,18 @@ define <4 x float> @v8f32_inputs_v4f32_output_2323(<8 x float> %a, <8 x float> % ; AVX-NEXT: vhaddps %xmm1, %xmm0, %xmm0 ; AVX-NEXT: vzeroupper ; AVX-NEXT: retq +; CHECK-LABEL: define <4 x float> @v8f32_inputs_v4f32_output_2323( +; CHECK-SAME: <8 x float> [[A:%.*]], <8 x float> [[B:%.*]]) local_unnamed_addr #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x float> [[A]], <8 x float> poison, <4 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <8 x float> [[A]], <8 x float> poison, <4 x i32> +; CHECK-NEXT: [[TMP3:%.*]] = fadd <4 x float> [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[R1:%.*]] = shufflevector <4 x float> [[TMP3]], <4 x float> , <4 x i32> +; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <8 x float> [[B]], <8 x float> poison, <4 x i32> +; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <8 x float> [[B]], <8 x float> poison, <4 x i32> +; CHECK-NEXT: [[TMP6:%.*]] = fadd <4 x float> [[TMP4]], [[TMP5]] +; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x float> [[R1]], <4 x float> [[TMP6]], <4 x i32> +; CHECK-NEXT: ret <4 x float> [[R]] +; %a2 = extractelement <8 x float> %a, i32 2 %a3 = extractelement <8 x float> %a, i32 3 %b2 = extractelement <8 x float> %b, i32 2 @@ -822,6 +1072,18 @@ define <4 x float> @v16f32_inputs_v4f32_output_0123(<16 x float> %a, <16 x float ; AVX512-NEXT: vhaddps %xmm1, %xmm0, %xmm0 ; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq +; CHECK-LABEL: define <4 x float> @v16f32_inputs_v4f32_output_0123( +; CHECK-SAME: <16 x float> [[A:%.*]], <16 x float> [[B:%.*]]) local_unnamed_addr #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <16 x float> [[A]], <16 x float> poison, <4 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <16 x float> [[A]], <16 x float> poison, <4 x i32> +; CHECK-NEXT: [[TMP3:%.*]] = fadd <4 x float> [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[R0:%.*]] = shufflevector <4 x float> [[TMP3]], <4 x float> , <4 x i32> +; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <16 x float> [[B]], <16 x float> poison, <4 x i32> +; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <16 x float> [[B]], <16 x float> poison, <4 x i32> +; CHECK-NEXT: [[TMP6:%.*]] = fadd <4 x float> [[TMP4]], [[TMP5]] +; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x float> [[R0]], <4 x float> [[TMP6]], <4 x i32> +; CHECK-NEXT: ret <4 x float> [[R]] +; %a0 = extractelement <16 x float> %a, i32 0 %a1 = extractelement <16 x float> %a, i32 1 %b2 = extractelement <16 x float> %b, i32 2 @@ -853,6 +1115,18 @@ define <8 x float> @v16f32_inputs_v8f32_output_4567(<16 x float> %a, <16 x float ; AVX512: # %bb.0: ; AVX512-NEXT: vhaddps %ymm1, %ymm0, %ymm0 ; AVX512-NEXT: retq +; CHECK-LABEL: define <8 x float> @v16f32_inputs_v8f32_output_4567( +; CHECK-SAME: <16 x float> [[A:%.*]], <16 x float> [[B:%.*]]) local_unnamed_addr #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <16 x float> [[A]], <16 x float> poison, <8 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <16 x float> [[A]], <16 x float> poison, <8 x i32> +; CHECK-NEXT: [[TMP3:%.*]] = fadd <8 x float> [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[R4:%.*]] = shufflevector <8 x float> [[TMP3]], <8 x float> , <8 x i32> +; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <16 x float> [[B]], <16 x float> poison, <8 x i32> +; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <16 x float> [[B]], <16 x float> poison, <8 x i32> +; CHECK-NEXT: [[TMP6:%.*]] = fadd <8 x float> [[TMP4]], [[TMP5]] +; CHECK-NEXT: [[R:%.*]] = shufflevector <8 x float> [[R4]], <8 x float> [[TMP6]], <8 x i32> +; CHECK-NEXT: ret <8 x float> [[R]] +; %a4 = extractelement <16 x float> %a, i32 4 %a5 = extractelement <16 x float> %a, i32 5 %b6 = extractelement <16 x float> %b, i32 6 @@ -874,6 +1148,16 @@ define <8 x float> @PR40243(<8 x float> %a, <8 x float> %b) { ; AVX: # %bb.0: ; AVX-NEXT: vhaddps %ymm1, %ymm0, %ymm0 ; AVX-NEXT: retq +; CHECK-LABEL: define <8 x float> @PR40243( +; CHECK-SAME: <8 x float> [[A:%.*]], <8 x float> [[B:%.*]]) local_unnamed_addr #[[ATTR0]] { +; CHECK-NEXT: [[SHIFT:%.*]] = shufflevector <8 x float> [[A]], <8 x float> poison, <8 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = fadd <8 x float> [[A]], [[SHIFT]] +; CHECK-NEXT: [[SHIFT1:%.*]] = shufflevector <8 x float> [[B]], <8 x float> poison, <8 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = fadd <8 x float> [[SHIFT1]], [[B]] +; CHECK-NEXT: [[R4:%.*]] = shufflevector <8 x float> [[TMP1]], <8 x float> , <8 x i32> +; CHECK-NEXT: [[R:%.*]] = shufflevector <8 x float> [[R4]], <8 x float> [[TMP2]], <8 x i32> +; CHECK-NEXT: ret <8 x float> [[R]] +; %a4 = extractelement <8 x float> %a, i32 4 %a5 = extractelement <8 x float> %a, i32 5 %add4 = fadd float %a4, %a5 @@ -921,6 +1205,13 @@ define <4 x double> @PR44694(<4 x double> %0, <4 x double> %1) { ; AVX512-NEXT: vhaddpd %ymm1, %ymm0, %ymm0 ; AVX512-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,2,1,3] ; AVX512-NEXT: retq +; CHECK-LABEL: define <4 x double> @PR44694( +; CHECK-SAME: <4 x double> [[TMP0:%.*]], <4 x double> [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x double> [[TMP0]], <4 x double> [[TMP1]], <4 x i32> +; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <4 x double> [[TMP0]], <4 x double> [[TMP1]], <4 x i32> +; CHECK-NEXT: [[TMP5:%.*]] = fadd <4 x double> [[TMP3]], [[TMP4]] +; CHECK-NEXT: ret <4 x double> [[TMP5]] +; %3 = shufflevector <4 x double> %0, <4 x double> %1, <4 x i32> %4 = shufflevector <4 x double> %0, <4 x double> %1, <4 x i32> %5 = fadd <4 x double> %3, %4 @@ -952,6 +1243,13 @@ define <4 x float> @PR45747_1(<4 x float> %a, <4 x float> %b) nounwind { ; AVX-FAST: # %bb.0: ; AVX-FAST-NEXT: vhaddps %xmm0, %xmm0, %xmm0 ; AVX-FAST-NEXT: retq +; CHECK-LABEL: define <4 x float> @PR45747_1( +; CHECK-SAME: <4 x float> [[A:%.*]], <4 x float> [[B:%.*]]) local_unnamed_addr #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x float> [[A]], <4 x float> poison, <4 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x float> [[A]], <4 x float> poison, <4 x i32> +; CHECK-NEXT: [[SHUFFLE:%.*]] = fadd <4 x float> [[TMP1]], [[TMP2]] +; CHECK-NEXT: ret <4 x float> [[SHUFFLE]] +; %t0 = shufflevector <4 x float> %a, <4 x float> undef, <4 x i32> %t1 = fadd <4 x float> %t0, %a %shuffle = shufflevector <4 x float> %t1, <4 x float> undef, <4 x i32> @@ -985,6 +1283,13 @@ define <4 x float> @PR45747_2(<4 x float> %a, <4 x float> %b) nounwind { ; AVX-FAST-NEXT: vhaddps %xmm1, %xmm1, %xmm0 ; AVX-FAST-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3] ; AVX-FAST-NEXT: retq +; CHECK-LABEL: define <4 x float> @PR45747_2( +; CHECK-SAME: <4 x float> [[A:%.*]], <4 x float> [[B:%.*]]) local_unnamed_addr #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x float> [[B]], <4 x float> poison, <4 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x float> [[B]], <4 x float> poison, <4 x i32> +; CHECK-NEXT: [[SHUFFLE:%.*]] = fadd <4 x float> [[TMP1]], [[TMP2]] +; CHECK-NEXT: ret <4 x float> [[SHUFFLE]] +; %t0 = shufflevector <4 x float> %b, <4 x float> undef, <4 x i32> %t1 = fadd <4 x float> %t0, %b %shuffle = shufflevector <4 x float> %t1, <4 x float> undef, <4 x i32> @@ -1001,6 +1306,13 @@ define <4 x float> @PR34724_add_v4f32_u123(<4 x float> %0, <4 x float> %1) { ; AVX: # %bb.0: ; AVX-NEXT: vhaddps %xmm1, %xmm0, %xmm0 ; AVX-NEXT: retq +; CHECK-LABEL: define <4 x float> @PR34724_add_v4f32_u123( +; CHECK-SAME: <4 x float> [[TMP0:%.*]], <4 x float> [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x float> [[TMP0]], <4 x float> [[TMP1]], <4 x i32> +; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <4 x float> [[TMP0]], <4 x float> [[TMP1]], <4 x i32> +; CHECK-NEXT: [[TMP5:%.*]] = fadd <4 x float> [[TMP3]], [[TMP4]] +; CHECK-NEXT: ret <4 x float> [[TMP5]] +; %3 = shufflevector <4 x float> %0, <4 x float> %1, <2 x i32> %4 = shufflevector <4 x float> %0, <4 x float> %1, <2 x i32> %5 = fadd <2 x float> %3, %4 @@ -1040,6 +1352,13 @@ define <4 x float> @PR34724_add_v4f32_0u23(<4 x float> %0, <4 x float> %1) { ; AVX-FAST: # %bb.0: ; AVX-FAST-NEXT: vhaddps %xmm1, %xmm0, %xmm0 ; AVX-FAST-NEXT: retq +; CHECK-LABEL: define <4 x float> @PR34724_add_v4f32_0u23( +; CHECK-SAME: <4 x float> [[TMP0:%.*]], <4 x float> [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x float> [[TMP0]], <4 x float> [[TMP1]], <4 x i32> +; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <4 x float> [[TMP0]], <4 x float> [[TMP1]], <4 x i32> +; CHECK-NEXT: [[TMP5:%.*]] = fadd <4 x float> [[TMP3]], [[TMP4]] +; CHECK-NEXT: ret <4 x float> [[TMP5]] +; %3 = shufflevector <4 x float> %0, <4 x float> undef, <4 x i32> %4 = fadd <4 x float> %3, %0 %5 = shufflevector <4 x float> %1, <4 x float> undef, <4 x i32> @@ -1061,6 +1380,13 @@ define <4 x float> @PR34724_add_v4f32_01u3(<4 x float> %0, <4 x float> %1) { ; AVX: # %bb.0: ; AVX-NEXT: vhaddps %xmm1, %xmm0, %xmm0 ; AVX-NEXT: retq +; CHECK-LABEL: define <4 x float> @PR34724_add_v4f32_01u3( +; CHECK-SAME: <4 x float> [[TMP0:%.*]], <4 x float> [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x float> [[TMP0]], <4 x float> [[TMP1]], <4 x i32> +; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <4 x float> [[TMP0]], <4 x float> [[TMP1]], <4 x i32> +; CHECK-NEXT: [[TMP5:%.*]] = fadd <4 x float> [[TMP3]], [[TMP4]] +; CHECK-NEXT: ret <4 x float> [[TMP5]] +; %3 = shufflevector <4 x float> %0, <4 x float> undef, <2 x i32> %4 = shufflevector <4 x float> %0, <4 x float> undef, <2 x i32> %5 = fadd <2 x float> %3, %4 @@ -1081,6 +1407,13 @@ define <4 x float> @PR34724_add_v4f32_012u(<4 x float> %0, <4 x float> %1) { ; AVX: # %bb.0: ; AVX-NEXT: vhaddps %xmm1, %xmm0, %xmm0 ; AVX-NEXT: retq +; CHECK-LABEL: define <4 x float> @PR34724_add_v4f32_012u( +; CHECK-SAME: <4 x float> [[TMP0:%.*]], <4 x float> [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x float> [[TMP0]], <4 x float> [[TMP1]], <4 x i32> +; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <4 x float> [[TMP0]], <4 x float> [[TMP1]], <4 x i32> +; CHECK-NEXT: [[TMP5:%.*]] = fadd <4 x float> [[TMP3]], [[TMP4]] +; CHECK-NEXT: ret <4 x float> [[TMP5]] +; %3 = shufflevector <4 x float> %0, <4 x float> undef, <2 x i32> %4 = shufflevector <4 x float> %0, <4 x float> undef, <2 x i32> %5 = fadd <2 x float> %3, %4 @@ -1129,6 +1462,20 @@ define <4 x double> @PR34724_add_v4f64_u123(<4 x double> %0, <4 x double> %1) { ; AVX-FAST-NEXT: vinsertf128 $1, %xmm1, %ymm2, %ymm1 ; AVX-FAST-NEXT: vhaddpd %ymm0, %ymm1, %ymm0 ; AVX-FAST-NEXT: retq +; CHECK-LABEL: define <4 x double> @PR34724_add_v4f64_u123( +; CHECK-SAME: <4 x double> [[TMP0:%.*]], <4 x double> [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x double> [[TMP0]], <4 x double> [[TMP1]], <2 x i32> +; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <4 x double> [[TMP0]], <4 x double> [[TMP1]], <2 x i32> +; CHECK-NEXT: [[TMP5:%.*]] = fadd <2 x double> [[TMP3]], [[TMP4]] +; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <2 x double> [[TMP5]], <2 x double> poison, <4 x i32> +; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <4 x double> [[TMP6]], <4 x double> , <4 x i32> +; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <2 x double> [[TMP5]], <2 x double> poison, <4 x i32> +; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <4 x double> [[TMP7]], <4 x double> [[TMP8]], <4 x i32> +; CHECK-NEXT: [[SHIFT:%.*]] = shufflevector <4 x double> [[TMP1]], <4 x double> poison, <4 x i32> +; CHECK-NEXT: [[TMP10:%.*]] = fadd <4 x double> [[SHIFT]], [[TMP1]] +; CHECK-NEXT: [[TMP11:%.*]] = shufflevector <4 x double> [[TMP9]], <4 x double> [[TMP10]], <4 x i32> +; CHECK-NEXT: ret <4 x double> [[TMP11]] +; %3 = shufflevector <4 x double> %0, <4 x double> %1, <2 x i32> %4 = shufflevector <4 x double> %0, <4 x double> %1, <2 x i32> %5 = fadd <2 x double> %3, %4 @@ -1176,6 +1523,20 @@ define <4 x double> @PR34724_add_v4f64_0u23(<4 x double> %0, <4 x double> %1) { ; AVX-FAST-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 ; AVX-FAST-NEXT: vhaddpd %ymm1, %ymm0, %ymm0 ; AVX-FAST-NEXT: retq +; CHECK-LABEL: define <4 x double> @PR34724_add_v4f64_0u23( +; CHECK-SAME: <4 x double> [[TMP0:%.*]], <4 x double> [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x double> [[TMP0]], <4 x double> [[TMP1]], <2 x i32> +; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <4 x double> [[TMP0]], <4 x double> [[TMP1]], <2 x i32> +; CHECK-NEXT: [[TMP5:%.*]] = fadd <2 x double> [[TMP3]], [[TMP4]] +; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <2 x double> [[TMP5]], <2 x double> poison, <4 x i32> +; CHECK-NEXT: [[TMP7:%.*]] = insertelement <4 x double> [[TMP6]], double undef, i64 1 +; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <2 x double> [[TMP5]], <2 x double> poison, <4 x i32> +; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <4 x double> [[TMP7]], <4 x double> [[TMP8]], <4 x i32> +; CHECK-NEXT: [[SHIFT:%.*]] = shufflevector <4 x double> [[TMP1]], <4 x double> poison, <4 x i32> +; CHECK-NEXT: [[TMP10:%.*]] = fadd <4 x double> [[SHIFT]], [[TMP1]] +; CHECK-NEXT: [[TMP11:%.*]] = shufflevector <4 x double> [[TMP9]], <4 x double> [[TMP10]], <4 x i32> +; CHECK-NEXT: ret <4 x double> [[TMP11]] +; %3 = shufflevector <4 x double> %0, <4 x double> %1, <2 x i32> %4 = shufflevector <4 x double> %0, <4 x double> %1, <2 x i32> %5 = fadd <2 x double> %3, %4 @@ -1230,6 +1591,20 @@ define <4 x double> @PR34724_add_v4f64_01u3(<4 x double> %0, <4 x double> %1) { ; AVX512-FAST-NEXT: vhaddpd %ymm1, %ymm0, %ymm0 ; AVX512-FAST-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,2,3,3] ; AVX512-FAST-NEXT: retq +; CHECK-LABEL: define <4 x double> @PR34724_add_v4f64_01u3( +; CHECK-SAME: <4 x double> [[TMP0:%.*]], <4 x double> [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x double> [[TMP0]], <4 x double> poison, <2 x i32> +; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <4 x double> [[TMP0]], <4 x double> poison, <2 x i32> +; CHECK-NEXT: [[TMP5:%.*]] = fadd <2 x double> [[TMP3]], [[TMP4]] +; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <2 x double> [[TMP5]], <2 x double> poison, <4 x i32> +; CHECK-NEXT: [[TMP7:%.*]] = insertelement <4 x double> [[TMP6]], double undef, i64 2 +; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <2 x double> [[TMP5]], <2 x double> poison, <4 x i32> +; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <4 x double> [[TMP7]], <4 x double> [[TMP8]], <4 x i32> +; CHECK-NEXT: [[SHIFT:%.*]] = shufflevector <4 x double> [[TMP1]], <4 x double> poison, <4 x i32> +; CHECK-NEXT: [[TMP10:%.*]] = fadd <4 x double> [[SHIFT]], [[TMP1]] +; CHECK-NEXT: [[TMP11:%.*]] = shufflevector <4 x double> [[TMP9]], <4 x double> [[TMP10]], <4 x i32> +; CHECK-NEXT: ret <4 x double> [[TMP11]] +; %3 = shufflevector <4 x double> %0, <4 x double> undef, <2 x i32> %4 = shufflevector <4 x double> %0, <4 x double> undef, <2 x i32> %5 = fadd <2 x double> %3, %4 @@ -1276,6 +1651,20 @@ define <4 x double> @PR34724_add_v4f64_012u(<4 x double> %0, <4 x double> %1) { ; AVX-FAST-NEXT: vhaddpd %xmm1, %xmm1, %xmm1 ; AVX-FAST-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 ; AVX-FAST-NEXT: retq +; CHECK-LABEL: define <4 x double> @PR34724_add_v4f64_012u( +; CHECK-SAME: <4 x double> [[TMP0:%.*]], <4 x double> [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x double> [[TMP0]], <4 x double> poison, <2 x i32> +; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <4 x double> [[TMP0]], <4 x double> poison, <2 x i32> +; CHECK-NEXT: [[TMP5:%.*]] = fadd <2 x double> [[TMP3]], [[TMP4]] +; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <2 x double> [[TMP5]], <2 x double> poison, <4 x i32> +; CHECK-NEXT: [[TMP7:%.*]] = insertelement <4 x double> [[TMP6]], double undef, i64 3 +; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <2 x double> [[TMP5]], <2 x double> poison, <4 x i32> +; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <4 x double> [[TMP7]], <4 x double> [[TMP8]], <4 x i32> +; CHECK-NEXT: [[SHIFT:%.*]] = shufflevector <4 x double> [[TMP1]], <4 x double> poison, <4 x i32> +; CHECK-NEXT: [[TMP10:%.*]] = fadd <4 x double> [[TMP1]], [[SHIFT]] +; CHECK-NEXT: [[TMP11:%.*]] = shufflevector <4 x double> [[TMP9]], <4 x double> [[TMP10]], <4 x i32> +; CHECK-NEXT: ret <4 x double> [[TMP11]] +; %3 = shufflevector <4 x double> %0, <4 x double> undef, <2 x i32> %4 = shufflevector <4 x double> %0, <4 x double> undef, <2 x i32> %5 = fadd <2 x double> %3, %4 diff --git a/llvm/test/CodeGen/X86/haddsub.ll b/llvm/test/Transforms/PhaseOrdering/X86/haddsub.ll similarity index 64% rename from llvm/test/CodeGen/X86/haddsub.ll rename to llvm/test/Transforms/PhaseOrdering/X86/haddsub.ll index a0778195b5c73..91289087689ef 100644 --- a/llvm/test/CodeGen/X86/haddsub.ll +++ b/llvm/test/Transforms/PhaseOrdering/X86/haddsub.ll @@ -1,12 +1,5 @@ -; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+sse3 | FileCheck %s --check-prefixes=SSE3,SSE3-SLOW -; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+sse3,fast-hops | FileCheck %s --check-prefixes=SSE3,SSE3-FAST -; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx | FileCheck %s --check-prefixes=AVX,AVX-SLOW -; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx,fast-hops | FileCheck %s --check-prefixes=AVX,AVX-FAST -; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx2 | FileCheck %s --check-prefixes=AVX,AVX-SLOW -; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx2,fast-hops | FileCheck %s --check-prefixes=AVX,AVX-FAST -; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx512f | FileCheck %s --check-prefixes=AVX,AVX-SLOW -; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx512f,fast-hops | FileCheck %s --check-prefixes=AVX,AVX-FAST +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 +; RUN: opt -passes="default" -S %s | FileCheck %s define <2 x double> @haddpd1(<2 x double> %x, <2 x double> %y) { ; SSE3-LABEL: haddpd1: @@ -18,6 +11,13 @@ define <2 x double> @haddpd1(<2 x double> %x, <2 x double> %y) { ; AVX: # %bb.0: ; AVX-NEXT: vhaddpd %xmm1, %xmm0, %xmm0 ; AVX-NEXT: retq +; CHECK-LABEL: define <2 x double> @haddpd1( +; CHECK-SAME: <2 x double> [[X:%.*]], <2 x double> [[Y:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] { +; CHECK-NEXT: [[A:%.*]] = shufflevector <2 x double> [[X]], <2 x double> [[Y]], <2 x i32> +; CHECK-NEXT: [[B:%.*]] = shufflevector <2 x double> [[X]], <2 x double> [[Y]], <2 x i32> +; CHECK-NEXT: [[R:%.*]] = fadd <2 x double> [[A]], [[B]] +; CHECK-NEXT: ret <2 x double> [[R]] +; %a = shufflevector <2 x double> %x, <2 x double> %y, <2 x i32> %b = shufflevector <2 x double> %x, <2 x double> %y, <2 x i32> %r = fadd <2 x double> %a, %b @@ -34,6 +34,13 @@ define <2 x double> @haddpd2(<2 x double> %x, <2 x double> %y) { ; AVX: # %bb.0: ; AVX-NEXT: vhaddpd %xmm1, %xmm0, %xmm0 ; AVX-NEXT: retq +; CHECK-LABEL: define <2 x double> @haddpd2( +; CHECK-SAME: <2 x double> [[X:%.*]], <2 x double> [[Y:%.*]]) local_unnamed_addr #[[ATTR0]] { +; CHECK-NEXT: [[A:%.*]] = shufflevector <2 x double> [[X]], <2 x double> [[Y]], <2 x i32> +; CHECK-NEXT: [[B:%.*]] = shufflevector <2 x double> [[X]], <2 x double> [[Y]], <2 x i32> +; CHECK-NEXT: [[R:%.*]] = fadd <2 x double> [[A]], [[B]] +; CHECK-NEXT: ret <2 x double> [[R]] +; %a = shufflevector <2 x double> %x, <2 x double> %y, <2 x i32> %b = shufflevector <2 x double> %y, <2 x double> %x, <2 x i32> %r = fadd <2 x double> %a, %b @@ -63,6 +70,13 @@ define <2 x double> @haddpd3(<2 x double> %x) { ; AVX-FAST: # %bb.0: ; AVX-FAST-NEXT: vhaddpd %xmm0, %xmm0, %xmm0 ; AVX-FAST-NEXT: retq +; CHECK-LABEL: define <2 x double> @haddpd3( +; CHECK-SAME: <2 x double> [[X:%.*]]) local_unnamed_addr #[[ATTR0]] { +; CHECK-NEXT: [[A:%.*]] = shufflevector <2 x double> [[X]], <2 x double> poison, <2 x i32> +; CHECK-NEXT: [[B:%.*]] = shufflevector <2 x double> [[X]], <2 x double> poison, <2 x i32> +; CHECK-NEXT: [[R:%.*]] = fadd <2 x double> [[A]], [[B]] +; CHECK-NEXT: ret <2 x double> [[R]] +; %a = shufflevector <2 x double> %x, <2 x double> undef, <2 x i32> %b = shufflevector <2 x double> %x, <2 x double> undef, <2 x i32> %r = fadd <2 x double> %a, %b @@ -79,6 +93,13 @@ define <4 x float> @haddps1(<4 x float> %x, <4 x float> %y) { ; AVX: # %bb.0: ; AVX-NEXT: vhaddps %xmm1, %xmm0, %xmm0 ; AVX-NEXT: retq +; CHECK-LABEL: define <4 x float> @haddps1( +; CHECK-SAME: <4 x float> [[X:%.*]], <4 x float> [[Y:%.*]]) local_unnamed_addr #[[ATTR0]] { +; CHECK-NEXT: [[A:%.*]] = shufflevector <4 x float> [[X]], <4 x float> [[Y]], <4 x i32> +; CHECK-NEXT: [[B:%.*]] = shufflevector <4 x float> [[X]], <4 x float> [[Y]], <4 x i32> +; CHECK-NEXT: [[R:%.*]] = fadd <4 x float> [[A]], [[B]] +; CHECK-NEXT: ret <4 x float> [[R]] +; %a = shufflevector <4 x float> %x, <4 x float> %y, <4 x i32> %b = shufflevector <4 x float> %x, <4 x float> %y, <4 x i32> %r = fadd <4 x float> %a, %b @@ -95,6 +116,13 @@ define <4 x float> @haddps2(<4 x float> %x, <4 x float> %y) { ; AVX: # %bb.0: ; AVX-NEXT: vhaddps %xmm1, %xmm0, %xmm0 ; AVX-NEXT: retq +; CHECK-LABEL: define <4 x float> @haddps2( +; CHECK-SAME: <4 x float> [[X:%.*]], <4 x float> [[Y:%.*]]) local_unnamed_addr #[[ATTR0]] { +; CHECK-NEXT: [[A:%.*]] = shufflevector <4 x float> [[X]], <4 x float> [[Y]], <4 x i32> +; CHECK-NEXT: [[B:%.*]] = shufflevector <4 x float> [[Y]], <4 x float> [[X]], <4 x i32> +; CHECK-NEXT: [[R:%.*]] = fadd <4 x float> [[A]], [[B]] +; CHECK-NEXT: ret <4 x float> [[R]] +; %a = shufflevector <4 x float> %x, <4 x float> %y, <4 x i32> %b = shufflevector <4 x float> %y, <4 x float> %x, <4 x i32> %r = fadd <4 x float> %a, %b @@ -111,6 +139,13 @@ define <4 x float> @haddps3(<4 x float> %x) { ; AVX: # %bb.0: ; AVX-NEXT: vhaddps %xmm0, %xmm0, %xmm0 ; AVX-NEXT: retq +; CHECK-LABEL: define <4 x float> @haddps3( +; CHECK-SAME: <4 x float> [[X:%.*]]) local_unnamed_addr #[[ATTR0]] { +; CHECK-NEXT: [[A:%.*]] = shufflevector <4 x float> [[X]], <4 x float> , <4 x i32> +; CHECK-NEXT: [[B:%.*]] = shufflevector <4 x float> [[X]], <4 x float> , <4 x i32> +; CHECK-NEXT: [[R:%.*]] = fadd <4 x float> [[A]], [[B]] +; CHECK-NEXT: ret <4 x float> [[R]] +; %a = shufflevector <4 x float> %x, <4 x float> undef, <4 x i32> %b = shufflevector <4 x float> %x, <4 x float> undef, <4 x i32> %r = fadd <4 x float> %a, %b @@ -127,6 +162,13 @@ define <4 x float> @haddps4(<4 x float> %x) { ; AVX: # %bb.0: ; AVX-NEXT: vhaddps %xmm0, %xmm0, %xmm0 ; AVX-NEXT: retq +; CHECK-LABEL: define <4 x float> @haddps4( +; CHECK-SAME: <4 x float> [[X:%.*]]) local_unnamed_addr #[[ATTR0]] { +; CHECK-NEXT: [[A:%.*]] = shufflevector <4 x float> [[X]], <4 x float> poison, <4 x i32> +; CHECK-NEXT: [[B:%.*]] = shufflevector <4 x float> [[X]], <4 x float> poison, <4 x i32> +; CHECK-NEXT: [[R:%.*]] = fadd <4 x float> [[A]], [[B]] +; CHECK-NEXT: ret <4 x float> [[R]] +; %a = shufflevector <4 x float> %x, <4 x float> undef, <4 x i32> %b = shufflevector <4 x float> %x, <4 x float> undef, <4 x i32> %r = fadd <4 x float> %a, %b @@ -143,6 +185,13 @@ define <4 x float> @haddps5(<4 x float> %x) { ; AVX: # %bb.0: ; AVX-NEXT: vhaddps %xmm0, %xmm0, %xmm0 ; AVX-NEXT: retq +; CHECK-LABEL: define <4 x float> @haddps5( +; CHECK-SAME: <4 x float> [[X:%.*]]) local_unnamed_addr #[[ATTR0]] { +; CHECK-NEXT: [[A:%.*]] = shufflevector <4 x float> [[X]], <4 x float> poison, <4 x i32> +; CHECK-NEXT: [[B:%.*]] = shufflevector <4 x float> [[X]], <4 x float> poison, <4 x i32> +; CHECK-NEXT: [[R:%.*]] = fadd <4 x float> [[A]], [[B]] +; CHECK-NEXT: ret <4 x float> [[R]] +; %a = shufflevector <4 x float> %x, <4 x float> undef, <4 x i32> %b = shufflevector <4 x float> %x, <4 x float> undef, <4 x i32> %r = fadd <4 x float> %a, %b @@ -171,6 +220,13 @@ define <4 x float> @haddps6(<4 x float> %x) { ; AVX-FAST: # %bb.0: ; AVX-FAST-NEXT: vhaddps %xmm0, %xmm0, %xmm0 ; AVX-FAST-NEXT: retq +; CHECK-LABEL: define <4 x float> @haddps6( +; CHECK-SAME: <4 x float> [[X:%.*]]) local_unnamed_addr #[[ATTR0]] { +; CHECK-NEXT: [[A:%.*]] = shufflevector <4 x float> [[X]], <4 x float> poison, <4 x i32> +; CHECK-NEXT: [[B:%.*]] = shufflevector <4 x float> [[X]], <4 x float> poison, <4 x i32> +; CHECK-NEXT: [[R:%.*]] = fadd <4 x float> [[A]], [[B]] +; CHECK-NEXT: ret <4 x float> [[R]] +; %a = shufflevector <4 x float> %x, <4 x float> undef, <4 x i32> %b = shufflevector <4 x float> %x, <4 x float> undef, <4 x i32> %r = fadd <4 x float> %a, %b @@ -187,6 +243,13 @@ define <4 x float> @haddps7(<4 x float> %x) { ; AVX: # %bb.0: ; AVX-NEXT: vhaddps %xmm0, %xmm0, %xmm0 ; AVX-NEXT: retq +; CHECK-LABEL: define <4 x float> @haddps7( +; CHECK-SAME: <4 x float> [[X:%.*]]) local_unnamed_addr #[[ATTR0]] { +; CHECK-NEXT: [[A:%.*]] = shufflevector <4 x float> [[X]], <4 x float> poison, <4 x i32> +; CHECK-NEXT: [[B:%.*]] = shufflevector <4 x float> [[X]], <4 x float> poison, <4 x i32> +; CHECK-NEXT: [[R:%.*]] = fadd <4 x float> [[A]], [[B]] +; CHECK-NEXT: ret <4 x float> [[R]] +; %a = shufflevector <4 x float> %x, <4 x float> undef, <4 x i32> %b = shufflevector <4 x float> %x, <4 x float> undef, <4 x i32> %r = fadd <4 x float> %a, %b @@ -203,6 +266,13 @@ define <2 x double> @hsubpd1(<2 x double> %x, <2 x double> %y) { ; AVX: # %bb.0: ; AVX-NEXT: vhsubpd %xmm1, %xmm0, %xmm0 ; AVX-NEXT: retq +; CHECK-LABEL: define <2 x double> @hsubpd1( +; CHECK-SAME: <2 x double> [[X:%.*]], <2 x double> [[Y:%.*]]) local_unnamed_addr #[[ATTR0]] { +; CHECK-NEXT: [[A:%.*]] = shufflevector <2 x double> [[X]], <2 x double> [[Y]], <2 x i32> +; CHECK-NEXT: [[B:%.*]] = shufflevector <2 x double> [[X]], <2 x double> [[Y]], <2 x i32> +; CHECK-NEXT: [[R:%.*]] = fsub <2 x double> [[A]], [[B]] +; CHECK-NEXT: ret <2 x double> [[R]] +; %a = shufflevector <2 x double> %x, <2 x double> %y, <2 x i32> %b = shufflevector <2 x double> %x, <2 x double> %y, <2 x i32> %r = fsub <2 x double> %a, %b @@ -232,6 +302,13 @@ define <2 x double> @hsubpd2(<2 x double> %x) { ; AVX-FAST: # %bb.0: ; AVX-FAST-NEXT: vhsubpd %xmm0, %xmm0, %xmm0 ; AVX-FAST-NEXT: retq +; CHECK-LABEL: define <2 x double> @hsubpd2( +; CHECK-SAME: <2 x double> [[X:%.*]]) local_unnamed_addr #[[ATTR0]] { +; CHECK-NEXT: [[A:%.*]] = shufflevector <2 x double> [[X]], <2 x double> poison, <2 x i32> +; CHECK-NEXT: [[B:%.*]] = shufflevector <2 x double> [[X]], <2 x double> poison, <2 x i32> +; CHECK-NEXT: [[R:%.*]] = fsub <2 x double> [[A]], [[B]] +; CHECK-NEXT: ret <2 x double> [[R]] +; %a = shufflevector <2 x double> %x, <2 x double> undef, <2 x i32> %b = shufflevector <2 x double> %x, <2 x double> undef, <2 x i32> %r = fsub <2 x double> %a, %b @@ -248,6 +325,13 @@ define <4 x float> @hsubps1(<4 x float> %x, <4 x float> %y) { ; AVX: # %bb.0: ; AVX-NEXT: vhsubps %xmm1, %xmm0, %xmm0 ; AVX-NEXT: retq +; CHECK-LABEL: define <4 x float> @hsubps1( +; CHECK-SAME: <4 x float> [[X:%.*]], <4 x float> [[Y:%.*]]) local_unnamed_addr #[[ATTR0]] { +; CHECK-NEXT: [[A:%.*]] = shufflevector <4 x float> [[X]], <4 x float> [[Y]], <4 x i32> +; CHECK-NEXT: [[B:%.*]] = shufflevector <4 x float> [[X]], <4 x float> [[Y]], <4 x i32> +; CHECK-NEXT: [[R:%.*]] = fsub <4 x float> [[A]], [[B]] +; CHECK-NEXT: ret <4 x float> [[R]] +; %a = shufflevector <4 x float> %x, <4 x float> %y, <4 x i32> %b = shufflevector <4 x float> %x, <4 x float> %y, <4 x i32> %r = fsub <4 x float> %a, %b @@ -264,6 +348,13 @@ define <4 x float> @hsubps2(<4 x float> %x) { ; AVX: # %bb.0: ; AVX-NEXT: vhsubps %xmm0, %xmm0, %xmm0 ; AVX-NEXT: retq +; CHECK-LABEL: define <4 x float> @hsubps2( +; CHECK-SAME: <4 x float> [[X:%.*]]) local_unnamed_addr #[[ATTR0]] { +; CHECK-NEXT: [[A:%.*]] = shufflevector <4 x float> [[X]], <4 x float> , <4 x i32> +; CHECK-NEXT: [[B:%.*]] = shufflevector <4 x float> [[X]], <4 x float> , <4 x i32> +; CHECK-NEXT: [[R:%.*]] = fsub <4 x float> [[A]], [[B]] +; CHECK-NEXT: ret <4 x float> [[R]] +; %a = shufflevector <4 x float> %x, <4 x float> undef, <4 x i32> %b = shufflevector <4 x float> %x, <4 x float> undef, <4 x i32> %r = fsub <4 x float> %a, %b @@ -280,6 +371,13 @@ define <4 x float> @hsubps3(<4 x float> %x) { ; AVX: # %bb.0: ; AVX-NEXT: vhsubps %xmm0, %xmm0, %xmm0 ; AVX-NEXT: retq +; CHECK-LABEL: define <4 x float> @hsubps3( +; CHECK-SAME: <4 x float> [[X:%.*]]) local_unnamed_addr #[[ATTR0]] { +; CHECK-NEXT: [[A:%.*]] = shufflevector <4 x float> [[X]], <4 x float> poison, <4 x i32> +; CHECK-NEXT: [[B:%.*]] = shufflevector <4 x float> [[X]], <4 x float> poison, <4 x i32> +; CHECK-NEXT: [[R:%.*]] = fsub <4 x float> [[A]], [[B]] +; CHECK-NEXT: ret <4 x float> [[R]] +; %a = shufflevector <4 x float> %x, <4 x float> undef, <4 x i32> %b = shufflevector <4 x float> %x, <4 x float> undef, <4 x i32> %r = fsub <4 x float> %a, %b @@ -308,6 +406,13 @@ define <4 x float> @hsubps4(<4 x float> %x) { ; AVX-FAST: # %bb.0: ; AVX-FAST-NEXT: vhsubps %xmm0, %xmm0, %xmm0 ; AVX-FAST-NEXT: retq +; CHECK-LABEL: define <4 x float> @hsubps4( +; CHECK-SAME: <4 x float> [[X:%.*]]) local_unnamed_addr #[[ATTR0]] { +; CHECK-NEXT: [[A:%.*]] = shufflevector <4 x float> [[X]], <4 x float> poison, <4 x i32> +; CHECK-NEXT: [[B:%.*]] = shufflevector <4 x float> [[X]], <4 x float> poison, <4 x i32> +; CHECK-NEXT: [[R:%.*]] = fsub <4 x float> [[A]], [[B]] +; CHECK-NEXT: ret <4 x float> [[R]] +; %a = shufflevector <4 x float> %x, <4 x float> undef, <4 x i32> %b = shufflevector <4 x float> %x, <4 x float> undef, <4 x i32> %r = fsub <4 x float> %a, %b @@ -325,6 +430,13 @@ define <8 x float> @vhaddps1(<8 x float> %x, <8 x float> %y) { ; AVX: # %bb.0: ; AVX-NEXT: vhaddps %ymm1, %ymm0, %ymm0 ; AVX-NEXT: retq +; CHECK-LABEL: define <8 x float> @vhaddps1( +; CHECK-SAME: <8 x float> [[X:%.*]], <8 x float> [[Y:%.*]]) local_unnamed_addr #[[ATTR0]] { +; CHECK-NEXT: [[A:%.*]] = shufflevector <8 x float> [[X]], <8 x float> [[Y]], <8 x i32> +; CHECK-NEXT: [[B:%.*]] = shufflevector <8 x float> [[X]], <8 x float> [[Y]], <8 x i32> +; CHECK-NEXT: [[R:%.*]] = fadd <8 x float> [[A]], [[B]] +; CHECK-NEXT: ret <8 x float> [[R]] +; %a = shufflevector <8 x float> %x, <8 x float> %y, <8 x i32> %b = shufflevector <8 x float> %x, <8 x float> %y, <8 x i32> %r = fadd <8 x float> %a, %b @@ -342,6 +454,13 @@ define <8 x float> @vhaddps2(<8 x float> %x, <8 x float> %y) { ; AVX: # %bb.0: ; AVX-NEXT: vhaddps %ymm1, %ymm0, %ymm0 ; AVX-NEXT: retq +; CHECK-LABEL: define <8 x float> @vhaddps2( +; CHECK-SAME: <8 x float> [[X:%.*]], <8 x float> [[Y:%.*]]) local_unnamed_addr #[[ATTR0]] { +; CHECK-NEXT: [[A:%.*]] = shufflevector <8 x float> [[X]], <8 x float> [[Y]], <8 x i32> +; CHECK-NEXT: [[B:%.*]] = shufflevector <8 x float> [[Y]], <8 x float> [[X]], <8 x i32> +; CHECK-NEXT: [[R:%.*]] = fadd <8 x float> [[A]], [[B]] +; CHECK-NEXT: ret <8 x float> [[R]] +; %a = shufflevector <8 x float> %x, <8 x float> %y, <8 x i32> %b = shufflevector <8 x float> %y, <8 x float> %x, <8 x i32> %r = fadd <8 x float> %a, %b @@ -359,6 +478,13 @@ define <8 x float> @vhaddps3(<8 x float> %x) { ; AVX: # %bb.0: ; AVX-NEXT: vhaddps %ymm0, %ymm0, %ymm0 ; AVX-NEXT: retq +; CHECK-LABEL: define <8 x float> @vhaddps3( +; CHECK-SAME: <8 x float> [[X:%.*]]) local_unnamed_addr #[[ATTR0]] { +; CHECK-NEXT: [[A:%.*]] = shufflevector <8 x float> [[X]], <8 x float> , <8 x i32> +; CHECK-NEXT: [[B:%.*]] = shufflevector <8 x float> [[X]], <8 x float> , <8 x i32> +; CHECK-NEXT: [[R:%.*]] = fadd <8 x float> [[A]], [[B]] +; CHECK-NEXT: ret <8 x float> [[R]] +; %a = shufflevector <8 x float> %x, <8 x float> undef, <8 x i32> %b = shufflevector <8 x float> %x, <8 x float> undef, <8 x i32> %r = fadd <8 x float> %a, %b @@ -376,6 +502,13 @@ define <8 x float> @vhsubps1(<8 x float> %x, <8 x float> %y) { ; AVX: # %bb.0: ; AVX-NEXT: vhsubps %ymm1, %ymm0, %ymm0 ; AVX-NEXT: retq +; CHECK-LABEL: define <8 x float> @vhsubps1( +; CHECK-SAME: <8 x float> [[X:%.*]], <8 x float> [[Y:%.*]]) local_unnamed_addr #[[ATTR0]] { +; CHECK-NEXT: [[A:%.*]] = shufflevector <8 x float> [[X]], <8 x float> [[Y]], <8 x i32> +; CHECK-NEXT: [[B:%.*]] = shufflevector <8 x float> [[X]], <8 x float> [[Y]], <8 x i32> +; CHECK-NEXT: [[R:%.*]] = fsub <8 x float> [[A]], [[B]] +; CHECK-NEXT: ret <8 x float> [[R]] +; %a = shufflevector <8 x float> %x, <8 x float> %y, <8 x i32> %b = shufflevector <8 x float> %x, <8 x float> %y, <8 x i32> %r = fsub <8 x float> %a, %b @@ -393,6 +526,13 @@ define <8 x float> @vhsubps3(<8 x float> %x) { ; AVX: # %bb.0: ; AVX-NEXT: vhsubps %ymm0, %ymm0, %ymm0 ; AVX-NEXT: retq +; CHECK-LABEL: define <8 x float> @vhsubps3( +; CHECK-SAME: <8 x float> [[X:%.*]]) local_unnamed_addr #[[ATTR0]] { +; CHECK-NEXT: [[A:%.*]] = shufflevector <8 x float> [[X]], <8 x float> , <8 x i32> +; CHECK-NEXT: [[B:%.*]] = shufflevector <8 x float> [[X]], <8 x float> , <8 x i32> +; CHECK-NEXT: [[R:%.*]] = fsub <8 x float> [[A]], [[B]] +; CHECK-NEXT: ret <8 x float> [[R]] +; %a = shufflevector <8 x float> %x, <8 x float> undef, <8 x i32> %b = shufflevector <8 x float> %x, <8 x float> undef, <8 x i32> %r = fsub <8 x float> %a, %b @@ -410,6 +550,13 @@ define <4 x double> @vhaddpd1(<4 x double> %x, <4 x double> %y) { ; AVX: # %bb.0: ; AVX-NEXT: vhaddpd %ymm1, %ymm0, %ymm0 ; AVX-NEXT: retq +; CHECK-LABEL: define <4 x double> @vhaddpd1( +; CHECK-SAME: <4 x double> [[X:%.*]], <4 x double> [[Y:%.*]]) local_unnamed_addr #[[ATTR0]] { +; CHECK-NEXT: [[A:%.*]] = shufflevector <4 x double> [[X]], <4 x double> [[Y]], <4 x i32> +; CHECK-NEXT: [[B:%.*]] = shufflevector <4 x double> [[X]], <4 x double> [[Y]], <4 x i32> +; CHECK-NEXT: [[R:%.*]] = fadd <4 x double> [[A]], [[B]] +; CHECK-NEXT: ret <4 x double> [[R]] +; %a = shufflevector <4 x double> %x, <4 x double> %y, <4 x i32> %b = shufflevector <4 x double> %x, <4 x double> %y, <4 x i32> %r = fadd <4 x double> %a, %b @@ -427,6 +574,13 @@ define <4 x double> @vhsubpd1(<4 x double> %x, <4 x double> %y) { ; AVX: # %bb.0: ; AVX-NEXT: vhsubpd %ymm1, %ymm0, %ymm0 ; AVX-NEXT: retq +; CHECK-LABEL: define <4 x double> @vhsubpd1( +; CHECK-SAME: <4 x double> [[X:%.*]], <4 x double> [[Y:%.*]]) local_unnamed_addr #[[ATTR0]] { +; CHECK-NEXT: [[A:%.*]] = shufflevector <4 x double> [[X]], <4 x double> [[Y]], <4 x i32> +; CHECK-NEXT: [[B:%.*]] = shufflevector <4 x double> [[X]], <4 x double> [[Y]], <4 x i32> +; CHECK-NEXT: [[R:%.*]] = fsub <4 x double> [[A]], [[B]] +; CHECK-NEXT: ret <4 x double> [[R]] +; %a = shufflevector <4 x double> %x, <4 x double> %y, <4 x i32> %b = shufflevector <4 x double> %x, <4 x double> %y, <4 x i32> %r = fsub <4 x double> %a, %b @@ -443,6 +597,13 @@ define <2 x float> @haddps_v2f32(<4 x float> %v0) { ; AVX: # %bb.0: ; AVX-NEXT: vhaddps %xmm0, %xmm0, %xmm0 ; AVX-NEXT: retq +; CHECK-LABEL: define <2 x float> @haddps_v2f32( +; CHECK-SAME: <4 x float> [[V0:%.*]]) local_unnamed_addr #[[ATTR0]] { +; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <4 x float> [[V0]], <4 x float> poison, <2 x i32> +; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <4 x float> [[V0]], <4 x float> poison, <2 x i32> +; CHECK-NEXT: [[TMP6:%.*]] = fadd <2 x float> [[TMP4]], [[TMP5]] +; CHECK-NEXT: ret <2 x float> [[TMP6]] +; %v0.0 = extractelement <4 x float> %v0, i32 0 %v0.1 = extractelement <4 x float> %v0, i32 1 %v0.2 = extractelement <4 x float> %v0, i32 2 @@ -478,6 +639,13 @@ define float @extract_extract01_v4f32_fadd_f32(<4 x float> %x) { ; AVX-FAST: # %bb.0: ; AVX-FAST-NEXT: vhaddps %xmm0, %xmm0, %xmm0 ; AVX-FAST-NEXT: retq +; CHECK-LABEL: define float @extract_extract01_v4f32_fadd_f32( +; CHECK-SAME: <4 x float> [[X:%.*]]) local_unnamed_addr #[[ATTR0]] { +; CHECK-NEXT: [[SHIFT:%.*]] = shufflevector <4 x float> [[X]], <4 x float> poison, <4 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = fadd <4 x float> [[X]], [[SHIFT]] +; CHECK-NEXT: [[X01:%.*]] = extractelement <4 x float> [[TMP1]], i64 0 +; CHECK-NEXT: ret float [[X01]] +; %x0 = extractelement <4 x float> %x, i32 0 %x1 = extractelement <4 x float> %x, i32 1 %x01 = fadd float %x0, %x1 @@ -511,6 +679,13 @@ define float @extract_extract23_v4f32_fadd_f32(<4 x float> %x) { ; AVX-FAST-NEXT: vhaddps %xmm0, %xmm0, %xmm0 ; AVX-FAST-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3] ; AVX-FAST-NEXT: retq +; CHECK-LABEL: define float @extract_extract23_v4f32_fadd_f32( +; CHECK-SAME: <4 x float> [[X:%.*]]) local_unnamed_addr #[[ATTR0]] { +; CHECK-NEXT: [[SHIFT:%.*]] = shufflevector <4 x float> [[X]], <4 x float> poison, <4 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = fadd <4 x float> [[X]], [[SHIFT]] +; CHECK-NEXT: [[X01:%.*]] = extractelement <4 x float> [[TMP1]], i64 2 +; CHECK-NEXT: ret float [[X01]] +; %x0 = extractelement <4 x float> %x, i32 2 %x1 = extractelement <4 x float> %x, i32 3 %x01 = fadd float %x0, %x1 @@ -539,6 +714,13 @@ define float @extract_extract01_v4f32_fadd_f32_commute(<4 x float> %x) { ; AVX-FAST: # %bb.0: ; AVX-FAST-NEXT: vhaddps %xmm0, %xmm0, %xmm0 ; AVX-FAST-NEXT: retq +; CHECK-LABEL: define float @extract_extract01_v4f32_fadd_f32_commute( +; CHECK-SAME: <4 x float> [[X:%.*]]) local_unnamed_addr #[[ATTR0]] { +; CHECK-NEXT: [[SHIFT:%.*]] = shufflevector <4 x float> [[X]], <4 x float> poison, <4 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = fadd <4 x float> [[SHIFT]], [[X]] +; CHECK-NEXT: [[X01:%.*]] = extractelement <4 x float> [[TMP1]], i64 0 +; CHECK-NEXT: ret float [[X01]] +; %x0 = extractelement <4 x float> %x, i32 0 %x1 = extractelement <4 x float> %x, i32 1 %x01 = fadd float %x1, %x0 @@ -572,6 +754,13 @@ define float @extract_extract23_v4f32_fadd_f32_commute(<4 x float> %x) { ; AVX-FAST-NEXT: vhaddps %xmm0, %xmm0, %xmm0 ; AVX-FAST-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3] ; AVX-FAST-NEXT: retq +; CHECK-LABEL: define float @extract_extract23_v4f32_fadd_f32_commute( +; CHECK-SAME: <4 x float> [[X:%.*]]) local_unnamed_addr #[[ATTR0]] { +; CHECK-NEXT: [[SHIFT:%.*]] = shufflevector <4 x float> [[X]], <4 x float> poison, <4 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = fadd <4 x float> [[SHIFT]], [[X]] +; CHECK-NEXT: [[X01:%.*]] = extractelement <4 x float> [[TMP1]], i64 2 +; CHECK-NEXT: ret float [[X01]] +; %x0 = extractelement <4 x float> %x, i32 2 %x1 = extractelement <4 x float> %x, i32 3 %x01 = fadd float %x1, %x0 @@ -601,6 +790,13 @@ define double @extract_extract01_v2f64_fadd_f64(<2 x double> %x) { ; AVX-FAST: # %bb.0: ; AVX-FAST-NEXT: vhaddpd %xmm0, %xmm0, %xmm0 ; AVX-FAST-NEXT: retq +; CHECK-LABEL: define double @extract_extract01_v2f64_fadd_f64( +; CHECK-SAME: <2 x double> [[X:%.*]]) local_unnamed_addr #[[ATTR0]] { +; CHECK-NEXT: [[SHIFT:%.*]] = shufflevector <2 x double> [[X]], <2 x double> poison, <2 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = fadd <2 x double> [[X]], [[SHIFT]] +; CHECK-NEXT: [[X01:%.*]] = extractelement <2 x double> [[TMP1]], i64 0 +; CHECK-NEXT: ret double [[X01]] +; %x0 = extractelement <2 x double> %x, i32 0 %x1 = extractelement <2 x double> %x, i32 1 %x01 = fadd double %x0, %x1 @@ -630,6 +826,13 @@ define double @extract_extract01_v2f64_fadd_f64_commute(<2 x double> %x) { ; AVX-FAST: # %bb.0: ; AVX-FAST-NEXT: vhaddpd %xmm0, %xmm0, %xmm0 ; AVX-FAST-NEXT: retq +; CHECK-LABEL: define double @extract_extract01_v2f64_fadd_f64_commute( +; CHECK-SAME: <2 x double> [[X:%.*]]) local_unnamed_addr #[[ATTR0]] { +; CHECK-NEXT: [[SHIFT:%.*]] = shufflevector <2 x double> [[X]], <2 x double> poison, <2 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = fadd <2 x double> [[SHIFT]], [[X]] +; CHECK-NEXT: [[X01:%.*]] = extractelement <2 x double> [[TMP1]], i64 0 +; CHECK-NEXT: ret double [[X01]] +; %x0 = extractelement <2 x double> %x, i32 0 %x1 = extractelement <2 x double> %x, i32 1 %x01 = fadd double %x1, %x0 @@ -658,6 +861,13 @@ define float @extract_extract01_v4f32_fsub_f32(<4 x float> %x) { ; AVX-FAST: # %bb.0: ; AVX-FAST-NEXT: vhsubps %xmm0, %xmm0, %xmm0 ; AVX-FAST-NEXT: retq +; CHECK-LABEL: define float @extract_extract01_v4f32_fsub_f32( +; CHECK-SAME: <4 x float> [[X:%.*]]) local_unnamed_addr #[[ATTR0]] { +; CHECK-NEXT: [[SHIFT:%.*]] = shufflevector <4 x float> [[X]], <4 x float> poison, <4 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = fsub <4 x float> [[X]], [[SHIFT]] +; CHECK-NEXT: [[X01:%.*]] = extractelement <4 x float> [[TMP1]], i64 0 +; CHECK-NEXT: ret float [[X01]] +; %x0 = extractelement <4 x float> %x, i32 0 %x1 = extractelement <4 x float> %x, i32 1 %x01 = fsub float %x0, %x1 @@ -692,6 +902,13 @@ define float @extract_extract23_v4f32_fsub_f32(<4 x float> %x) { ; AVX-FAST-NEXT: vhsubps %xmm0, %xmm0, %xmm0 ; AVX-FAST-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3] ; AVX-FAST-NEXT: retq +; CHECK-LABEL: define float @extract_extract23_v4f32_fsub_f32( +; CHECK-SAME: <4 x float> [[X:%.*]]) local_unnamed_addr #[[ATTR0]] { +; CHECK-NEXT: [[SHIFT:%.*]] = shufflevector <4 x float> [[X]], <4 x float> poison, <4 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = fsub <4 x float> [[X]], [[SHIFT]] +; CHECK-NEXT: [[X01:%.*]] = extractelement <4 x float> [[TMP1]], i64 2 +; CHECK-NEXT: ret float [[X01]] +; %x0 = extractelement <4 x float> %x, i32 2 %x1 = extractelement <4 x float> %x, i32 3 %x01 = fsub float %x0, %x1 @@ -711,6 +928,13 @@ define float @extract_extract01_v4f32_fsub_f32_commute(<4 x float> %x) { ; AVX-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] ; AVX-NEXT: vsubss %xmm0, %xmm1, %xmm0 ; AVX-NEXT: retq +; CHECK-LABEL: define float @extract_extract01_v4f32_fsub_f32_commute( +; CHECK-SAME: <4 x float> [[X:%.*]]) local_unnamed_addr #[[ATTR0]] { +; CHECK-NEXT: [[SHIFT:%.*]] = shufflevector <4 x float> [[X]], <4 x float> poison, <4 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = fsub <4 x float> [[SHIFT]], [[X]] +; CHECK-NEXT: [[X01:%.*]] = extractelement <4 x float> [[TMP1]], i64 0 +; CHECK-NEXT: ret float [[X01]] +; %x0 = extractelement <4 x float> %x, i32 0 %x1 = extractelement <4 x float> %x, i32 1 %x01 = fsub float %x1, %x0 @@ -732,6 +956,13 @@ define float @extract_extract23_v4f32_fsub_f32_commute(<4 x float> %x) { ; AVX-NEXT: vshufps {{.*#+}} xmm0 = xmm0[3,3,3,3] ; AVX-NEXT: vsubss %xmm1, %xmm0, %xmm0 ; AVX-NEXT: retq +; CHECK-LABEL: define float @extract_extract23_v4f32_fsub_f32_commute( +; CHECK-SAME: <4 x float> [[X:%.*]]) local_unnamed_addr #[[ATTR0]] { +; CHECK-NEXT: [[SHIFT:%.*]] = shufflevector <4 x float> [[X]], <4 x float> poison, <4 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = fsub <4 x float> [[SHIFT]], [[X]] +; CHECK-NEXT: [[X01:%.*]] = extractelement <4 x float> [[TMP1]], i64 2 +; CHECK-NEXT: ret float [[X01]] +; %x0 = extractelement <4 x float> %x, i32 2 %x1 = extractelement <4 x float> %x, i32 3 %x01 = fsub float %x1, %x0 @@ -761,6 +992,13 @@ define double @extract_extract01_v2f64_fsub_f64(<2 x double> %x) { ; AVX-FAST: # %bb.0: ; AVX-FAST-NEXT: vhsubpd %xmm0, %xmm0, %xmm0 ; AVX-FAST-NEXT: retq +; CHECK-LABEL: define double @extract_extract01_v2f64_fsub_f64( +; CHECK-SAME: <2 x double> [[X:%.*]]) local_unnamed_addr #[[ATTR0]] { +; CHECK-NEXT: [[SHIFT:%.*]] = shufflevector <2 x double> [[X]], <2 x double> poison, <2 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = fsub <2 x double> [[X]], [[SHIFT]] +; CHECK-NEXT: [[X01:%.*]] = extractelement <2 x double> [[TMP1]], i64 0 +; CHECK-NEXT: ret double [[X01]] +; %x0 = extractelement <2 x double> %x, i32 0 %x1 = extractelement <2 x double> %x, i32 1 %x01 = fsub double %x0, %x1 @@ -781,6 +1019,13 @@ define double @extract_extract01_v2f64_fsub_f64_commute(<2 x double> %x) { ; AVX-NEXT: vshufpd {{.*#+}} xmm1 = xmm0[1,0] ; AVX-NEXT: vsubsd %xmm0, %xmm1, %xmm0 ; AVX-NEXT: retq +; CHECK-LABEL: define double @extract_extract01_v2f64_fsub_f64_commute( +; CHECK-SAME: <2 x double> [[X:%.*]]) local_unnamed_addr #[[ATTR0]] { +; CHECK-NEXT: [[SHIFT:%.*]] = shufflevector <2 x double> [[X]], <2 x double> poison, <2 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = fsub <2 x double> [[SHIFT]], [[X]] +; CHECK-NEXT: [[X01:%.*]] = extractelement <2 x double> [[TMP1]], i64 0 +; CHECK-NEXT: ret double [[X01]] +; %x0 = extractelement <2 x double> %x, i32 0 %x1 = extractelement <2 x double> %x, i32 1 %x01 = fsub double %x1, %x0 @@ -813,6 +1058,13 @@ define float @extract_extract01_v8f32_fadd_f32(<8 x float> %x) { ; AVX-FAST-NEXT: vhaddps %xmm0, %xmm0, %xmm0 ; AVX-FAST-NEXT: vzeroupper ; AVX-FAST-NEXT: retq +; CHECK-LABEL: define float @extract_extract01_v8f32_fadd_f32( +; CHECK-SAME: <8 x float> [[X:%.*]]) local_unnamed_addr #[[ATTR0]] { +; CHECK-NEXT: [[SHIFT:%.*]] = shufflevector <8 x float> [[X]], <8 x float> poison, <8 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = fadd <8 x float> [[X]], [[SHIFT]] +; CHECK-NEXT: [[X01:%.*]] = extractelement <8 x float> [[TMP1]], i64 0 +; CHECK-NEXT: ret float [[X01]] +; %x0 = extractelement <8 x float> %x, i32 0 %x1 = extractelement <8 x float> %x, i32 1 %x01 = fadd float %x0, %x1 @@ -848,6 +1100,13 @@ define float @extract_extract23_v8f32_fadd_f32(<8 x float> %x) { ; AVX-FAST-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3] ; AVX-FAST-NEXT: vzeroupper ; AVX-FAST-NEXT: retq +; CHECK-LABEL: define float @extract_extract23_v8f32_fadd_f32( +; CHECK-SAME: <8 x float> [[X:%.*]]) local_unnamed_addr #[[ATTR0]] { +; CHECK-NEXT: [[SHIFT:%.*]] = shufflevector <8 x float> [[X]], <8 x float> poison, <8 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = fadd <8 x float> [[X]], [[SHIFT]] +; CHECK-NEXT: [[X01:%.*]] = extractelement <8 x float> [[TMP1]], i64 2 +; CHECK-NEXT: ret float [[X01]] +; %x0 = extractelement <8 x float> %x, i32 2 %x1 = extractelement <8 x float> %x, i32 3 %x01 = fadd float %x0, %x1 @@ -885,6 +1144,13 @@ define float @extract_extract67_v8f32_fadd_f32(<8 x float> %x) { ; AVX-FAST-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3] ; AVX-FAST-NEXT: vzeroupper ; AVX-FAST-NEXT: retq +; CHECK-LABEL: define float @extract_extract67_v8f32_fadd_f32( +; CHECK-SAME: <8 x float> [[X:%.*]]) local_unnamed_addr #[[ATTR0]] { +; CHECK-NEXT: [[SHIFT:%.*]] = shufflevector <8 x float> [[X]], <8 x float> poison, <8 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = fadd <8 x float> [[X]], [[SHIFT]] +; CHECK-NEXT: [[X01:%.*]] = extractelement <8 x float> [[TMP1]], i64 6 +; CHECK-NEXT: ret float [[X01]] +; %x0 = extractelement <8 x float> %x, i32 6 %x1 = extractelement <8 x float> %x, i32 7 %x01 = fadd float %x0, %x1 @@ -915,6 +1181,13 @@ define float @extract_extract01_v8f32_fadd_f32_commute(<8 x float> %x) { ; AVX-FAST-NEXT: vhaddps %xmm0, %xmm0, %xmm0 ; AVX-FAST-NEXT: vzeroupper ; AVX-FAST-NEXT: retq +; CHECK-LABEL: define float @extract_extract01_v8f32_fadd_f32_commute( +; CHECK-SAME: <8 x float> [[X:%.*]]) local_unnamed_addr #[[ATTR0]] { +; CHECK-NEXT: [[SHIFT:%.*]] = shufflevector <8 x float> [[X]], <8 x float> poison, <8 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = fadd <8 x float> [[SHIFT]], [[X]] +; CHECK-NEXT: [[X01:%.*]] = extractelement <8 x float> [[TMP1]], i64 0 +; CHECK-NEXT: ret float [[X01]] +; %x0 = extractelement <8 x float> %x, i32 0 %x1 = extractelement <8 x float> %x, i32 1 %x01 = fadd float %x1, %x0 @@ -950,6 +1223,13 @@ define float @extract_extract23_v8f32_fadd_f32_commute(<8 x float> %x) { ; AVX-FAST-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3] ; AVX-FAST-NEXT: vzeroupper ; AVX-FAST-NEXT: retq +; CHECK-LABEL: define float @extract_extract23_v8f32_fadd_f32_commute( +; CHECK-SAME: <8 x float> [[X:%.*]]) local_unnamed_addr #[[ATTR0]] { +; CHECK-NEXT: [[SHIFT:%.*]] = shufflevector <8 x float> [[X]], <8 x float> poison, <8 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = fadd <8 x float> [[SHIFT]], [[X]] +; CHECK-NEXT: [[X01:%.*]] = extractelement <8 x float> [[TMP1]], i64 2 +; CHECK-NEXT: ret float [[X01]] +; %x0 = extractelement <8 x float> %x, i32 2 %x1 = extractelement <8 x float> %x, i32 3 %x01 = fadd float %x1, %x0 @@ -987,6 +1267,13 @@ define float @extract_extract67_v8f32_fadd_f32_commute(<8 x float> %x) { ; AVX-FAST-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3] ; AVX-FAST-NEXT: vzeroupper ; AVX-FAST-NEXT: retq +; CHECK-LABEL: define float @extract_extract67_v8f32_fadd_f32_commute( +; CHECK-SAME: <8 x float> [[X:%.*]]) local_unnamed_addr #[[ATTR0]] { +; CHECK-NEXT: [[SHIFT:%.*]] = shufflevector <8 x float> [[X]], <8 x float> poison, <8 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = fadd <8 x float> [[SHIFT]], [[X]] +; CHECK-NEXT: [[X01:%.*]] = extractelement <8 x float> [[TMP1]], i64 6 +; CHECK-NEXT: ret float [[X01]] +; %x0 = extractelement <8 x float> %x, i32 6 %x1 = extractelement <8 x float> %x, i32 7 %x01 = fadd float %x1, %x0 @@ -1018,6 +1305,13 @@ define double @extract_extract01_v4f64_fadd_f64(<4 x double> %x) { ; AVX-FAST-NEXT: vhaddpd %xmm0, %xmm0, %xmm0 ; AVX-FAST-NEXT: vzeroupper ; AVX-FAST-NEXT: retq +; CHECK-LABEL: define double @extract_extract01_v4f64_fadd_f64( +; CHECK-SAME: <4 x double> [[X:%.*]]) local_unnamed_addr #[[ATTR0]] { +; CHECK-NEXT: [[SHIFT:%.*]] = shufflevector <4 x double> [[X]], <4 x double> poison, <4 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = fadd <4 x double> [[X]], [[SHIFT]] +; CHECK-NEXT: [[X01:%.*]] = extractelement <4 x double> [[TMP1]], i64 0 +; CHECK-NEXT: ret double [[X01]] +; %x0 = extractelement <4 x double> %x, i32 0 %x1 = extractelement <4 x double> %x, i32 1 %x01 = fadd double %x0, %x1 @@ -1052,6 +1346,13 @@ define double @extract_extract23_v4f64_fadd_f64(<4 x double> %x) { ; AVX-FAST-NEXT: vhaddpd %xmm0, %xmm0, %xmm0 ; AVX-FAST-NEXT: vzeroupper ; AVX-FAST-NEXT: retq +; CHECK-LABEL: define double @extract_extract23_v4f64_fadd_f64( +; CHECK-SAME: <4 x double> [[X:%.*]]) local_unnamed_addr #[[ATTR0]] { +; CHECK-NEXT: [[SHIFT:%.*]] = shufflevector <4 x double> [[X]], <4 x double> poison, <4 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = fadd <4 x double> [[X]], [[SHIFT]] +; CHECK-NEXT: [[X01:%.*]] = extractelement <4 x double> [[TMP1]], i64 2 +; CHECK-NEXT: ret double [[X01]] +; %x0 = extractelement <4 x double> %x, i32 2 %x1 = extractelement <4 x double> %x, i32 3 %x01 = fadd double %x0, %x1 @@ -1083,6 +1384,13 @@ define double @extract_extract01_v4f64_fadd_f64_commute(<4 x double> %x) { ; AVX-FAST-NEXT: vhaddpd %xmm0, %xmm0, %xmm0 ; AVX-FAST-NEXT: vzeroupper ; AVX-FAST-NEXT: retq +; CHECK-LABEL: define double @extract_extract01_v4f64_fadd_f64_commute( +; CHECK-SAME: <4 x double> [[X:%.*]]) local_unnamed_addr #[[ATTR0]] { +; CHECK-NEXT: [[SHIFT:%.*]] = shufflevector <4 x double> [[X]], <4 x double> poison, <4 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = fadd <4 x double> [[SHIFT]], [[X]] +; CHECK-NEXT: [[X01:%.*]] = extractelement <4 x double> [[TMP1]], i64 0 +; CHECK-NEXT: ret double [[X01]] +; %x0 = extractelement <4 x double> %x, i32 0 %x1 = extractelement <4 x double> %x, i32 1 %x01 = fadd double %x1, %x0 @@ -1117,6 +1425,13 @@ define double @extract_extract23_v4f64_fadd_f64_commute(<4 x double> %x) { ; AVX-FAST-NEXT: vhaddpd %xmm0, %xmm0, %xmm0 ; AVX-FAST-NEXT: vzeroupper ; AVX-FAST-NEXT: retq +; CHECK-LABEL: define double @extract_extract23_v4f64_fadd_f64_commute( +; CHECK-SAME: <4 x double> [[X:%.*]]) local_unnamed_addr #[[ATTR0]] { +; CHECK-NEXT: [[SHIFT:%.*]] = shufflevector <4 x double> [[X]], <4 x double> poison, <4 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = fadd <4 x double> [[SHIFT]], [[X]] +; CHECK-NEXT: [[X01:%.*]] = extractelement <4 x double> [[TMP1]], i64 2 +; CHECK-NEXT: ret double [[X01]] +; %x0 = extractelement <4 x double> %x, i32 2 %x1 = extractelement <4 x double> %x, i32 3 %x01 = fadd double %x1, %x0 @@ -1147,6 +1462,13 @@ define float @extract_extract01_v8f32_fsub_f32(<8 x float> %x) { ; AVX-FAST-NEXT: vhsubps %xmm0, %xmm0, %xmm0 ; AVX-FAST-NEXT: vzeroupper ; AVX-FAST-NEXT: retq +; CHECK-LABEL: define float @extract_extract01_v8f32_fsub_f32( +; CHECK-SAME: <8 x float> [[X:%.*]]) local_unnamed_addr #[[ATTR0]] { +; CHECK-NEXT: [[SHIFT:%.*]] = shufflevector <8 x float> [[X]], <8 x float> poison, <8 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = fsub <8 x float> [[X]], [[SHIFT]] +; CHECK-NEXT: [[X01:%.*]] = extractelement <8 x float> [[TMP1]], i64 0 +; CHECK-NEXT: ret float [[X01]] +; %x0 = extractelement <8 x float> %x, i32 0 %x1 = extractelement <8 x float> %x, i32 1 %x01 = fsub float %x0, %x1 @@ -1183,6 +1505,13 @@ define float @extract_extract23_v8f32_fsub_f32(<8 x float> %x) { ; AVX-FAST-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3] ; AVX-FAST-NEXT: vzeroupper ; AVX-FAST-NEXT: retq +; CHECK-LABEL: define float @extract_extract23_v8f32_fsub_f32( +; CHECK-SAME: <8 x float> [[X:%.*]]) local_unnamed_addr #[[ATTR0]] { +; CHECK-NEXT: [[SHIFT:%.*]] = shufflevector <8 x float> [[X]], <8 x float> poison, <8 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = fsub <8 x float> [[X]], [[SHIFT]] +; CHECK-NEXT: [[X01:%.*]] = extractelement <8 x float> [[TMP1]], i64 2 +; CHECK-NEXT: ret float [[X01]] +; %x0 = extractelement <8 x float> %x, i32 2 %x1 = extractelement <8 x float> %x, i32 3 %x01 = fsub float %x0, %x1 @@ -1217,6 +1546,13 @@ define float @extract_extract45_v8f32_fsub_f32(<8 x float> %x) { ; AVX-FAST-NEXT: vhsubps %xmm0, %xmm0, %xmm0 ; AVX-FAST-NEXT: vzeroupper ; AVX-FAST-NEXT: retq +; CHECK-LABEL: define float @extract_extract45_v8f32_fsub_f32( +; CHECK-SAME: <8 x float> [[X:%.*]]) local_unnamed_addr #[[ATTR0]] { +; CHECK-NEXT: [[SHIFT:%.*]] = shufflevector <8 x float> [[X]], <8 x float> poison, <8 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = fsub <8 x float> [[X]], [[SHIFT]] +; CHECK-NEXT: [[X01:%.*]] = extractelement <8 x float> [[TMP1]], i64 4 +; CHECK-NEXT: ret float [[X01]] +; %x0 = extractelement <8 x float> %x, i32 4 %x1 = extractelement <8 x float> %x, i32 5 %x01 = fsub float %x0, %x1 @@ -1239,6 +1575,13 @@ define float @extract_extract01_v8f32_fsub_f32_commute(<8 x float> %x) { ; AVX-NEXT: vsubss %xmm0, %xmm1, %xmm0 ; AVX-NEXT: vzeroupper ; AVX-NEXT: retq +; CHECK-LABEL: define float @extract_extract01_v8f32_fsub_f32_commute( +; CHECK-SAME: <8 x float> [[X:%.*]]) local_unnamed_addr #[[ATTR0]] { +; CHECK-NEXT: [[SHIFT:%.*]] = shufflevector <8 x float> [[X]], <8 x float> poison, <8 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = fsub <8 x float> [[SHIFT]], [[X]] +; CHECK-NEXT: [[X01:%.*]] = extractelement <8 x float> [[TMP1]], i64 0 +; CHECK-NEXT: ret float [[X01]] +; %x0 = extractelement <8 x float> %x, i32 0 %x1 = extractelement <8 x float> %x, i32 1 %x01 = fsub float %x1, %x0 @@ -1270,6 +1613,13 @@ define double @extract_extract01_v4f64_fsub_f64(<4 x double> %x) { ; AVX-FAST-NEXT: vhsubpd %xmm0, %xmm0, %xmm0 ; AVX-FAST-NEXT: vzeroupper ; AVX-FAST-NEXT: retq +; CHECK-LABEL: define double @extract_extract01_v4f64_fsub_f64( +; CHECK-SAME: <4 x double> [[X:%.*]]) local_unnamed_addr #[[ATTR0]] { +; CHECK-NEXT: [[SHIFT:%.*]] = shufflevector <4 x double> [[X]], <4 x double> poison, <4 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = fsub <4 x double> [[X]], [[SHIFT]] +; CHECK-NEXT: [[X01:%.*]] = extractelement <4 x double> [[TMP1]], i64 0 +; CHECK-NEXT: ret double [[X01]] +; %x0 = extractelement <4 x double> %x, i32 0 %x1 = extractelement <4 x double> %x, i32 1 %x01 = fsub double %x0, %x1 @@ -1293,6 +1643,13 @@ define double @extract_extract01_v4f64_fsub_f64_commute(<4 x double> %x) { ; AVX-NEXT: vsubsd %xmm0, %xmm1, %xmm0 ; AVX-NEXT: vzeroupper ; AVX-NEXT: retq +; CHECK-LABEL: define double @extract_extract01_v4f64_fsub_f64_commute( +; CHECK-SAME: <4 x double> [[X:%.*]]) local_unnamed_addr #[[ATTR0]] { +; CHECK-NEXT: [[SHIFT:%.*]] = shufflevector <4 x double> [[X]], <4 x double> poison, <4 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = fsub <4 x double> [[SHIFT]], [[X]] +; CHECK-NEXT: [[X01:%.*]] = extractelement <4 x double> [[TMP1]], i64 0 +; CHECK-NEXT: ret double [[X01]] +; %x0 = extractelement <4 x double> %x, i32 0 %x1 = extractelement <4 x double> %x, i32 1 %x01 = fsub double %x1, %x0 @@ -1325,6 +1682,13 @@ define float @extract_extract01_v16f32_fadd_f32(<16 x float> %x) { ; AVX-FAST-NEXT: vhaddps %xmm0, %xmm0, %xmm0 ; AVX-FAST-NEXT: vzeroupper ; AVX-FAST-NEXT: retq +; CHECK-LABEL: define float @extract_extract01_v16f32_fadd_f32( +; CHECK-SAME: <16 x float> [[X:%.*]]) local_unnamed_addr #[[ATTR0]] { +; CHECK-NEXT: [[SHIFT:%.*]] = shufflevector <16 x float> [[X]], <16 x float> poison, <16 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = fadd <16 x float> [[X]], [[SHIFT]] +; CHECK-NEXT: [[X01:%.*]] = extractelement <16 x float> [[TMP1]], i64 0 +; CHECK-NEXT: ret float [[X01]] +; %x0 = extractelement <16 x float> %x, i32 0 %x1 = extractelement <16 x float> %x, i32 1 %x01 = fadd float %x0, %x1 @@ -1355,6 +1719,13 @@ define float @extract_extract01_v16f32_fadd_f32_commute(<16 x float> %x) { ; AVX-FAST-NEXT: vhaddps %xmm0, %xmm0, %xmm0 ; AVX-FAST-NEXT: vzeroupper ; AVX-FAST-NEXT: retq +; CHECK-LABEL: define float @extract_extract01_v16f32_fadd_f32_commute( +; CHECK-SAME: <16 x float> [[X:%.*]]) local_unnamed_addr #[[ATTR0]] { +; CHECK-NEXT: [[SHIFT:%.*]] = shufflevector <16 x float> [[X]], <16 x float> poison, <16 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = fadd <16 x float> [[SHIFT]], [[X]] +; CHECK-NEXT: [[X01:%.*]] = extractelement <16 x float> [[TMP1]], i64 0 +; CHECK-NEXT: ret float [[X01]] +; %x0 = extractelement <16 x float> %x, i32 0 %x1 = extractelement <16 x float> %x, i32 1 %x01 = fadd float %x1, %x0 @@ -1386,6 +1757,13 @@ define double @extract_extract01_v8f64_fadd_f64(<8 x double> %x) { ; AVX-FAST-NEXT: vhaddpd %xmm0, %xmm0, %xmm0 ; AVX-FAST-NEXT: vzeroupper ; AVX-FAST-NEXT: retq +; CHECK-LABEL: define double @extract_extract01_v8f64_fadd_f64( +; CHECK-SAME: <8 x double> [[X:%.*]]) local_unnamed_addr #[[ATTR0]] { +; CHECK-NEXT: [[SHIFT:%.*]] = shufflevector <8 x double> [[X]], <8 x double> poison, <8 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = fadd <8 x double> [[X]], [[SHIFT]] +; CHECK-NEXT: [[X01:%.*]] = extractelement <8 x double> [[TMP1]], i64 0 +; CHECK-NEXT: ret double [[X01]] +; %x0 = extractelement <8 x double> %x, i32 0 %x1 = extractelement <8 x double> %x, i32 1 %x01 = fadd double %x0, %x1 @@ -1417,6 +1795,13 @@ define double @extract_extract01_v8f64_fadd_f64_commute(<8 x double> %x) { ; AVX-FAST-NEXT: vhaddpd %xmm0, %xmm0, %xmm0 ; AVX-FAST-NEXT: vzeroupper ; AVX-FAST-NEXT: retq +; CHECK-LABEL: define double @extract_extract01_v8f64_fadd_f64_commute( +; CHECK-SAME: <8 x double> [[X:%.*]]) local_unnamed_addr #[[ATTR0]] { +; CHECK-NEXT: [[SHIFT:%.*]] = shufflevector <8 x double> [[X]], <8 x double> poison, <8 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = fadd <8 x double> [[SHIFT]], [[X]] +; CHECK-NEXT: [[X01:%.*]] = extractelement <8 x double> [[TMP1]], i64 0 +; CHECK-NEXT: ret double [[X01]] +; %x0 = extractelement <8 x double> %x, i32 0 %x1 = extractelement <8 x double> %x, i32 1 %x01 = fadd double %x1, %x0 @@ -1447,6 +1832,13 @@ define float @extract_extract01_v16f32_fsub_f32(<16 x float> %x) { ; AVX-FAST-NEXT: vhsubps %xmm0, %xmm0, %xmm0 ; AVX-FAST-NEXT: vzeroupper ; AVX-FAST-NEXT: retq +; CHECK-LABEL: define float @extract_extract01_v16f32_fsub_f32( +; CHECK-SAME: <16 x float> [[X:%.*]]) local_unnamed_addr #[[ATTR0]] { +; CHECK-NEXT: [[SHIFT:%.*]] = shufflevector <16 x float> [[X]], <16 x float> poison, <16 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = fsub <16 x float> [[X]], [[SHIFT]] +; CHECK-NEXT: [[X01:%.*]] = extractelement <16 x float> [[TMP1]], i64 0 +; CHECK-NEXT: ret float [[X01]] +; %x0 = extractelement <16 x float> %x, i32 0 %x1 = extractelement <16 x float> %x, i32 1 %x01 = fsub float %x0, %x1 @@ -1467,6 +1859,13 @@ define float @extract_extract01_v16f32_fsub_f32_commute(<16 x float> %x) { ; AVX-NEXT: vsubss %xmm0, %xmm1, %xmm0 ; AVX-NEXT: vzeroupper ; AVX-NEXT: retq +; CHECK-LABEL: define float @extract_extract01_v16f32_fsub_f32_commute( +; CHECK-SAME: <16 x float> [[X:%.*]]) local_unnamed_addr #[[ATTR0]] { +; CHECK-NEXT: [[SHIFT:%.*]] = shufflevector <16 x float> [[X]], <16 x float> poison, <16 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = fsub <16 x float> [[SHIFT]], [[X]] +; CHECK-NEXT: [[X01:%.*]] = extractelement <16 x float> [[TMP1]], i64 0 +; CHECK-NEXT: ret float [[X01]] +; %x0 = extractelement <16 x float> %x, i32 0 %x1 = extractelement <16 x float> %x, i32 1 %x01 = fsub float %x1, %x0 @@ -1498,6 +1897,13 @@ define double @extract_extract01_v8f64_fsub_f64(<8 x double> %x) { ; AVX-FAST-NEXT: vhsubpd %xmm0, %xmm0, %xmm0 ; AVX-FAST-NEXT: vzeroupper ; AVX-FAST-NEXT: retq +; CHECK-LABEL: define double @extract_extract01_v8f64_fsub_f64( +; CHECK-SAME: <8 x double> [[X:%.*]]) local_unnamed_addr #[[ATTR0]] { +; CHECK-NEXT: [[SHIFT:%.*]] = shufflevector <8 x double> [[X]], <8 x double> poison, <8 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = fsub <8 x double> [[X]], [[SHIFT]] +; CHECK-NEXT: [[X01:%.*]] = extractelement <8 x double> [[TMP1]], i64 0 +; CHECK-NEXT: ret double [[X01]] +; %x0 = extractelement <8 x double> %x, i32 0 %x1 = extractelement <8 x double> %x, i32 1 %x01 = fsub double %x0, %x1 @@ -1519,6 +1925,13 @@ define double @extract_extract01_v8f64_fsub_f64_commute(<8 x double> %x) { ; AVX-NEXT: vsubsd %xmm0, %xmm1, %xmm0 ; AVX-NEXT: vzeroupper ; AVX-NEXT: retq +; CHECK-LABEL: define double @extract_extract01_v8f64_fsub_f64_commute( +; CHECK-SAME: <8 x double> [[X:%.*]]) local_unnamed_addr #[[ATTR0]] { +; CHECK-NEXT: [[SHIFT:%.*]] = shufflevector <8 x double> [[X]], <8 x double> poison, <8 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = fsub <8 x double> [[SHIFT]], [[X]] +; CHECK-NEXT: [[X01:%.*]] = extractelement <8 x double> [[TMP1]], i64 0 +; CHECK-NEXT: ret double [[X01]] +; %x0 = extractelement <8 x double> %x, i32 0 %x1 = extractelement <8 x double> %x, i32 1 %x01 = fsub double %x1, %x0 @@ -1553,6 +1966,14 @@ define float @extract_extract01_v4f32_fadd_f32_uses1(<4 x float> %x, ptr %p) { ; AVX-FAST-NEXT: vmovss %xmm0, (%rdi) ; AVX-FAST-NEXT: vhaddps %xmm0, %xmm0, %xmm0 ; AVX-FAST-NEXT: retq +; CHECK-LABEL: define float @extract_extract01_v4f32_fadd_f32_uses1( +; CHECK-SAME: <4 x float> [[X:%.*]], ptr writeonly captures(none) initializes((0, 4)) [[P:%.*]]) local_unnamed_addr #[[ATTR1:[0-9]+]] { +; CHECK-NEXT: [[X0:%.*]] = extractelement <4 x float> [[X]], i64 0 +; CHECK-NEXT: store float [[X0]], ptr [[P]], align 4 +; CHECK-NEXT: [[X1:%.*]] = extractelement <4 x float> [[X]], i64 1 +; CHECK-NEXT: [[X01:%.*]] = fadd float [[X0]], [[X1]] +; CHECK-NEXT: ret float [[X01]] +; %x0 = extractelement <4 x float> %x, i32 0 store float %x0, ptr %p %x1 = extractelement <4 x float> %x, i32 1 @@ -1587,6 +2008,14 @@ define float @extract_extract01_v4f32_fadd_f32_uses2(<4 x float> %x, ptr %p) { ; AVX-FAST-NEXT: vextractps $1, %xmm0, (%rdi) ; AVX-FAST-NEXT: vhaddps %xmm0, %xmm0, %xmm0 ; AVX-FAST-NEXT: retq +; CHECK-LABEL: define float @extract_extract01_v4f32_fadd_f32_uses2( +; CHECK-SAME: <4 x float> [[X:%.*]], ptr writeonly captures(none) initializes((0, 4)) [[P:%.*]]) local_unnamed_addr #[[ATTR1]] { +; CHECK-NEXT: [[X0:%.*]] = extractelement <4 x float> [[X]], i64 0 +; CHECK-NEXT: [[X1:%.*]] = extractelement <4 x float> [[X]], i64 1 +; CHECK-NEXT: store float [[X1]], ptr [[P]], align 4 +; CHECK-NEXT: [[X01:%.*]] = fadd float [[X0]], [[X1]] +; CHECK-NEXT: ret float [[X01]] +; %x0 = extractelement <4 x float> %x, i32 0 %x1 = extractelement <4 x float> %x, i32 1 store float %x1, ptr %p @@ -1610,6 +2039,15 @@ define float @extract_extract01_v4f32_fadd_f32_uses3(<4 x float> %x, ptr %p1, pt ; AVX-NEXT: vmovss %xmm1, (%rsi) ; AVX-NEXT: vaddss %xmm1, %xmm0, %xmm0 ; AVX-NEXT: retq +; CHECK-LABEL: define float @extract_extract01_v4f32_fadd_f32_uses3( +; CHECK-SAME: <4 x float> [[X:%.*]], ptr writeonly captures(none) initializes((0, 4)) [[P1:%.*]], ptr writeonly captures(none) initializes((0, 4)) [[P2:%.*]]) local_unnamed_addr #[[ATTR1]] { +; CHECK-NEXT: [[X0:%.*]] = extractelement <4 x float> [[X]], i64 0 +; CHECK-NEXT: store float [[X0]], ptr [[P1]], align 4 +; CHECK-NEXT: [[X1:%.*]] = extractelement <4 x float> [[X]], i64 1 +; CHECK-NEXT: store float [[X1]], ptr [[P2]], align 4 +; CHECK-NEXT: [[X01:%.*]] = fadd float [[X0]], [[X1]] +; CHECK-NEXT: ret float [[X01]] +; %x0 = extractelement <4 x float> %x, i32 0 store float %x0, ptr %p1 %x1 = extractelement <4 x float> %x, i32 1 @@ -1665,6 +2103,11 @@ define float @fadd_reduce_v8f32(float %a0, <8 x float> %a1) { ; AVX-FAST-NEXT: vaddss %xmm1, %xmm0, %xmm0 ; AVX-FAST-NEXT: vzeroupper ; AVX-FAST-NEXT: retq +; CHECK-LABEL: define float @fadd_reduce_v8f32( +; CHECK-SAME: float [[A0:%.*]], <8 x float> [[A1:%.*]]) local_unnamed_addr #[[ATTR0]] { +; CHECK-NEXT: [[R:%.*]] = tail call fast float @llvm.vector.reduce.fadd.v8f32(float [[A0]], <8 x float> [[A1]]) +; CHECK-NEXT: ret float [[R]] +; %r = call fast float @llvm.vector.reduce.fadd.f32.v8f32(float %a0, <8 x float> %a1) ret float %r } @@ -1704,6 +2147,11 @@ define double @fadd_reduce_v4f64(double %a0, <4 x double> %a1) { ; AVX-FAST-NEXT: vaddsd %xmm1, %xmm0, %xmm0 ; AVX-FAST-NEXT: vzeroupper ; AVX-FAST-NEXT: retq +; CHECK-LABEL: define double @fadd_reduce_v4f64( +; CHECK-SAME: double [[A0:%.*]], <4 x double> [[A1:%.*]]) local_unnamed_addr #[[ATTR0]] { +; CHECK-NEXT: [[R:%.*]] = tail call fast double @llvm.vector.reduce.fadd.v4f64(double [[A0]], <4 x double> [[A1]]) +; CHECK-NEXT: ret double [[R]] +; %r = call fast double @llvm.vector.reduce.fadd.f64.v4f64(double %a0, <4 x double> %a1) ret double %r } @@ -1760,6 +2208,19 @@ define float @PR39936_v8f32(<8 x float>) { ; AVX-FAST-NEXT: vhaddps %xmm0, %xmm0, %xmm0 ; AVX-FAST-NEXT: vzeroupper ; AVX-FAST-NEXT: retq +; CHECK-LABEL: define float @PR39936_v8f32( +; CHECK-SAME: <8 x float> [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <8 x float> [[TMP0]], <8 x float> poison, <8 x i32> +; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <8 x float> [[TMP0]], <8 x float> poison, <8 x i32> +; CHECK-NEXT: [[TMP4:%.*]] = fadd <8 x float> [[TMP2]], [[TMP3]] +; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <8 x float> [[TMP4]], <8 x float> poison, <8 x i32> +; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <8 x float> [[TMP4]], <8 x float> poison, <8 x i32> +; CHECK-NEXT: [[TMP7:%.*]] = fadd <8 x float> [[TMP5]], [[TMP6]] +; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <8 x float> [[TMP7]], <8 x float> poison, <8 x i32> +; CHECK-NEXT: [[TMP9:%.*]] = fadd <8 x float> [[TMP7]], [[TMP8]] +; CHECK-NEXT: [[TMP10:%.*]] = extractelement <8 x float> [[TMP9]], i64 0 +; CHECK-NEXT: ret float [[TMP10]] +; %2 = shufflevector <8 x float> %0, <8 x float> undef, <8 x i32> %3 = shufflevector <8 x float> %0, <8 x float> undef, <8 x i32> %4 = fadd <8 x float> %2, %3 @@ -1804,6 +2265,15 @@ define float @hadd32_4(<4 x float> %x225) { ; AVX-FAST-NEXT: vaddps %xmm1, %xmm0, %xmm0 ; AVX-FAST-NEXT: vhaddps %xmm0, %xmm0, %xmm0 ; AVX-FAST-NEXT: retq +; CHECK-LABEL: define float @hadd32_4( +; CHECK-SAME: <4 x float> [[X225:%.*]]) local_unnamed_addr #[[ATTR0]] { +; CHECK-NEXT: [[X226:%.*]] = shufflevector <4 x float> [[X225]], <4 x float> poison, <4 x i32> +; CHECK-NEXT: [[X227:%.*]] = fadd <4 x float> [[X225]], [[X226]] +; CHECK-NEXT: [[X228:%.*]] = shufflevector <4 x float> [[X227]], <4 x float> poison, <4 x i32> +; CHECK-NEXT: [[X229:%.*]] = fadd <4 x float> [[X227]], [[X228]] +; CHECK-NEXT: [[X230:%.*]] = extractelement <4 x float> [[X229]], i64 0 +; CHECK-NEXT: ret float [[X230]] +; %x226 = shufflevector <4 x float> %x225, <4 x float> undef, <4 x i32> %x227 = fadd <4 x float> %x225, %x226 %x228 = shufflevector <4 x float> %x227, <4 x float> undef, <4 x i32> @@ -1846,6 +2316,15 @@ define float @hadd32_8(<8 x float> %x225) { ; AVX-FAST-NEXT: vhaddps %xmm0, %xmm0, %xmm0 ; AVX-FAST-NEXT: vzeroupper ; AVX-FAST-NEXT: retq +; CHECK-LABEL: define float @hadd32_8( +; CHECK-SAME: <8 x float> [[X225:%.*]]) local_unnamed_addr #[[ATTR0]] { +; CHECK-NEXT: [[X226:%.*]] = shufflevector <8 x float> [[X225]], <8 x float> poison, <8 x i32> +; CHECK-NEXT: [[X227:%.*]] = fadd <8 x float> [[X225]], [[X226]] +; CHECK-NEXT: [[X228:%.*]] = shufflevector <8 x float> [[X227]], <8 x float> poison, <8 x i32> +; CHECK-NEXT: [[X229:%.*]] = fadd <8 x float> [[X227]], [[X228]] +; CHECK-NEXT: [[X230:%.*]] = extractelement <8 x float> [[X229]], i64 0 +; CHECK-NEXT: ret float [[X230]] +; %x226 = shufflevector <8 x float> %x225, <8 x float> undef, <8 x i32> %x227 = fadd <8 x float> %x225, %x226 %x228 = shufflevector <8 x float> %x227, <8 x float> undef, <8 x i32> @@ -1888,6 +2367,15 @@ define float @hadd32_16(<16 x float> %x225) { ; AVX-FAST-NEXT: vhaddps %xmm0, %xmm0, %xmm0 ; AVX-FAST-NEXT: vzeroupper ; AVX-FAST-NEXT: retq +; CHECK-LABEL: define float @hadd32_16( +; CHECK-SAME: <16 x float> [[X225:%.*]]) local_unnamed_addr #[[ATTR0]] { +; CHECK-NEXT: [[X226:%.*]] = shufflevector <16 x float> [[X225]], <16 x float> poison, <16 x i32> +; CHECK-NEXT: [[X227:%.*]] = fadd <16 x float> [[X225]], [[X226]] +; CHECK-NEXT: [[X228:%.*]] = shufflevector <16 x float> [[X227]], <16 x float> poison, <16 x i32> +; CHECK-NEXT: [[X229:%.*]] = fadd <16 x float> [[X227]], [[X228]] +; CHECK-NEXT: [[X230:%.*]] = extractelement <16 x float> [[X229]], i64 0 +; CHECK-NEXT: ret float [[X230]] +; %x226 = shufflevector <16 x float> %x225, <16 x float> undef, <16 x i32> %x227 = fadd <16 x float> %x225, %x226 %x228 = shufflevector <16 x float> %x227, <16 x float> undef, <16 x i32> @@ -1911,6 +2399,15 @@ define float @hadd32_4_optsize(<4 x float> %x225) optsize { ; AVX-NEXT: vaddps %xmm1, %xmm0, %xmm0 ; AVX-NEXT: vhaddps %xmm0, %xmm0, %xmm0 ; AVX-NEXT: retq +; CHECK-LABEL: define float @hadd32_4_optsize( +; CHECK-SAME: <4 x float> [[X225:%.*]]) local_unnamed_addr #[[ATTR2:[0-9]+]] { +; CHECK-NEXT: [[X226:%.*]] = shufflevector <4 x float> [[X225]], <4 x float> poison, <4 x i32> +; CHECK-NEXT: [[X227:%.*]] = fadd <4 x float> [[X225]], [[X226]] +; CHECK-NEXT: [[X228:%.*]] = shufflevector <4 x float> [[X227]], <4 x float> poison, <4 x i32> +; CHECK-NEXT: [[X229:%.*]] = fadd <4 x float> [[X227]], [[X228]] +; CHECK-NEXT: [[X230:%.*]] = extractelement <4 x float> [[X229]], i64 0 +; CHECK-NEXT: ret float [[X230]] +; %x226 = shufflevector <4 x float> %x225, <4 x float> undef, <4 x i32> %x227 = fadd <4 x float> %x225, %x226 %x228 = shufflevector <4 x float> %x227, <4 x float> undef, <4 x i32> @@ -1935,6 +2432,15 @@ define float @hadd32_8_optsize(<8 x float> %x225) optsize { ; AVX-NEXT: vhaddps %xmm0, %xmm0, %xmm0 ; AVX-NEXT: vzeroupper ; AVX-NEXT: retq +; CHECK-LABEL: define float @hadd32_8_optsize( +; CHECK-SAME: <8 x float> [[X225:%.*]]) local_unnamed_addr #[[ATTR2]] { +; CHECK-NEXT: [[X226:%.*]] = shufflevector <8 x float> [[X225]], <8 x float> poison, <8 x i32> +; CHECK-NEXT: [[X227:%.*]] = fadd <8 x float> [[X225]], [[X226]] +; CHECK-NEXT: [[X228:%.*]] = shufflevector <8 x float> [[X227]], <8 x float> poison, <8 x i32> +; CHECK-NEXT: [[X229:%.*]] = fadd <8 x float> [[X227]], [[X228]] +; CHECK-NEXT: [[X230:%.*]] = extractelement <8 x float> [[X229]], i64 0 +; CHECK-NEXT: ret float [[X230]] +; %x226 = shufflevector <8 x float> %x225, <8 x float> undef, <8 x i32> %x227 = fadd <8 x float> %x225, %x226 %x228 = shufflevector <8 x float> %x227, <8 x float> undef, <8 x i32> @@ -1959,6 +2465,15 @@ define float @hadd32_16_optsize(<16 x float> %x225) optsize { ; AVX-NEXT: vhaddps %xmm0, %xmm0, %xmm0 ; AVX-NEXT: vzeroupper ; AVX-NEXT: retq +; CHECK-LABEL: define float @hadd32_16_optsize( +; CHECK-SAME: <16 x float> [[X225:%.*]]) local_unnamed_addr #[[ATTR2]] { +; CHECK-NEXT: [[X226:%.*]] = shufflevector <16 x float> [[X225]], <16 x float> poison, <16 x i32> +; CHECK-NEXT: [[X227:%.*]] = fadd <16 x float> [[X225]], [[X226]] +; CHECK-NEXT: [[X228:%.*]] = shufflevector <16 x float> [[X227]], <16 x float> poison, <16 x i32> +; CHECK-NEXT: [[X229:%.*]] = fadd <16 x float> [[X227]], [[X228]] +; CHECK-NEXT: [[X230:%.*]] = extractelement <16 x float> [[X229]], i64 0 +; CHECK-NEXT: ret float [[X230]] +; %x226 = shufflevector <16 x float> %x225, <16 x float> undef, <16 x i32> %x227 = fadd <16 x float> %x225, %x226 %x228 = shufflevector <16 x float> %x227, <16 x float> undef, <16 x i32> @@ -1982,6 +2497,15 @@ define float @hadd32_4_pgso(<4 x float> %x225) !prof !14 { ; AVX-NEXT: vaddps %xmm1, %xmm0, %xmm0 ; AVX-NEXT: vhaddps %xmm0, %xmm0, %xmm0 ; AVX-NEXT: retq +; CHECK-LABEL: define float @hadd32_4_pgso( +; CHECK-SAME: <4 x float> [[X225:%.*]]) local_unnamed_addr #[[ATTR0]] !prof [[PROF14:![0-9]+]] { +; CHECK-NEXT: [[X226:%.*]] = shufflevector <4 x float> [[X225]], <4 x float> poison, <4 x i32> +; CHECK-NEXT: [[X227:%.*]] = fadd <4 x float> [[X225]], [[X226]] +; CHECK-NEXT: [[X228:%.*]] = shufflevector <4 x float> [[X227]], <4 x float> poison, <4 x i32> +; CHECK-NEXT: [[X229:%.*]] = fadd <4 x float> [[X227]], [[X228]] +; CHECK-NEXT: [[X230:%.*]] = extractelement <4 x float> [[X229]], i64 0 +; CHECK-NEXT: ret float [[X230]] +; %x226 = shufflevector <4 x float> %x225, <4 x float> undef, <4 x i32> %x227 = fadd <4 x float> %x225, %x226 %x228 = shufflevector <4 x float> %x227, <4 x float> undef, <4 x i32> @@ -2006,6 +2530,15 @@ define float @hadd32_8_pgso(<8 x float> %x225) !prof !14 { ; AVX-NEXT: vhaddps %xmm0, %xmm0, %xmm0 ; AVX-NEXT: vzeroupper ; AVX-NEXT: retq +; CHECK-LABEL: define float @hadd32_8_pgso( +; CHECK-SAME: <8 x float> [[X225:%.*]]) local_unnamed_addr #[[ATTR0]] !prof [[PROF14]] { +; CHECK-NEXT: [[X226:%.*]] = shufflevector <8 x float> [[X225]], <8 x float> poison, <8 x i32> +; CHECK-NEXT: [[X227:%.*]] = fadd <8 x float> [[X225]], [[X226]] +; CHECK-NEXT: [[X228:%.*]] = shufflevector <8 x float> [[X227]], <8 x float> poison, <8 x i32> +; CHECK-NEXT: [[X229:%.*]] = fadd <8 x float> [[X227]], [[X228]] +; CHECK-NEXT: [[X230:%.*]] = extractelement <8 x float> [[X229]], i64 0 +; CHECK-NEXT: ret float [[X230]] +; %x226 = shufflevector <8 x float> %x225, <8 x float> undef, <8 x i32> %x227 = fadd <8 x float> %x225, %x226 %x228 = shufflevector <8 x float> %x227, <8 x float> undef, <8 x i32> @@ -2030,6 +2563,15 @@ define float @hadd32_16_pgso(<16 x float> %x225) !prof !14 { ; AVX-NEXT: vhaddps %xmm0, %xmm0, %xmm0 ; AVX-NEXT: vzeroupper ; AVX-NEXT: retq +; CHECK-LABEL: define float @hadd32_16_pgso( +; CHECK-SAME: <16 x float> [[X225:%.*]]) local_unnamed_addr #[[ATTR0]] !prof [[PROF14]] { +; CHECK-NEXT: [[X226:%.*]] = shufflevector <16 x float> [[X225]], <16 x float> poison, <16 x i32> +; CHECK-NEXT: [[X227:%.*]] = fadd <16 x float> [[X225]], [[X226]] +; CHECK-NEXT: [[X228:%.*]] = shufflevector <16 x float> [[X227]], <16 x float> poison, <16 x i32> +; CHECK-NEXT: [[X229:%.*]] = fadd <16 x float> [[X227]], [[X228]] +; CHECK-NEXT: [[X230:%.*]] = extractelement <16 x float> [[X229]], i64 0 +; CHECK-NEXT: ret float [[X230]] +; %x226 = shufflevector <16 x float> %x225, <16 x float> undef, <16 x i32> %x227 = fadd <16 x float> %x225, %x226 %x228 = shufflevector <16 x float> %x227, <16 x float> undef, <16 x i32> @@ -2071,6 +2613,15 @@ define float @partial_reduction_fadd_v8f32(<8 x float> %x) { ; AVX-FAST-NEXT: vhaddps %xmm0, %xmm0, %xmm0 ; AVX-FAST-NEXT: vzeroupper ; AVX-FAST-NEXT: retq +; CHECK-LABEL: define float @partial_reduction_fadd_v8f32( +; CHECK-SAME: <8 x float> [[X:%.*]]) local_unnamed_addr #[[ATTR0]] { +; CHECK-NEXT: [[X23:%.*]] = shufflevector <8 x float> [[X]], <8 x float> poison, <8 x i32> +; CHECK-NEXT: [[X0213:%.*]] = fadd <8 x float> [[X]], [[X23]] +; CHECK-NEXT: [[X13:%.*]] = shufflevector <8 x float> [[X0213]], <8 x float> poison, <8 x i32> +; CHECK-NEXT: [[X0123:%.*]] = fadd reassoc nsz <8 x float> [[X13]], [[X0213]] +; CHECK-NEXT: [[R:%.*]] = extractelement <8 x float> [[X0123]], i64 0 +; CHECK-NEXT: ret float [[R]] +; %x23 = shufflevector <8 x float> %x, <8 x float> undef, <8 x i32> %x0213 = fadd <8 x float> %x, %x23 %x13 = shufflevector <8 x float> %x0213, <8 x float> undef, <8 x i32> @@ -2116,6 +2667,15 @@ define float @partial_reduction_fadd_v8f32_wrong_flags(<8 x float> %x) { ; AVX-FAST-NEXT: vhaddps %xmm0, %xmm0, %xmm0 ; AVX-FAST-NEXT: vzeroupper ; AVX-FAST-NEXT: retq +; CHECK-LABEL: define float @partial_reduction_fadd_v8f32_wrong_flags( +; CHECK-SAME: <8 x float> [[X:%.*]]) local_unnamed_addr #[[ATTR0]] { +; CHECK-NEXT: [[X23:%.*]] = shufflevector <8 x float> [[X]], <8 x float> poison, <8 x i32> +; CHECK-NEXT: [[X0213:%.*]] = fadd fast <8 x float> [[X23]], [[X]] +; CHECK-NEXT: [[X13:%.*]] = shufflevector <8 x float> [[X0213]], <8 x float> poison, <8 x i32> +; CHECK-NEXT: [[X0123:%.*]] = fadd nnan ninf <8 x float> [[X0213]], [[X13]] +; CHECK-NEXT: [[R:%.*]] = extractelement <8 x float> [[X0123]], i64 0 +; CHECK-NEXT: ret float [[R]] +; %x23 = shufflevector <8 x float> %x, <8 x float> undef, <8 x i32> %x0213 = fadd fast <8 x float> %x, %x23 %x13 = shufflevector <8 x float> %x0213, <8 x float> undef, <8 x i32> @@ -2157,6 +2717,15 @@ define float @partial_reduction_fadd_v16f32(<16 x float> %x) { ; AVX-FAST-NEXT: vhaddps %xmm0, %xmm0, %xmm0 ; AVX-FAST-NEXT: vzeroupper ; AVX-FAST-NEXT: retq +; CHECK-LABEL: define float @partial_reduction_fadd_v16f32( +; CHECK-SAME: <16 x float> [[X:%.*]]) local_unnamed_addr #[[ATTR0]] { +; CHECK-NEXT: [[X23:%.*]] = shufflevector <16 x float> [[X]], <16 x float> poison, <16 x i32> +; CHECK-NEXT: [[X0213:%.*]] = fadd <16 x float> [[X]], [[X23]] +; CHECK-NEXT: [[X13:%.*]] = shufflevector <16 x float> [[X0213]], <16 x float> poison, <16 x i32> +; CHECK-NEXT: [[X0123:%.*]] = fadd reassoc nsz <16 x float> [[X13]], [[X0213]] +; CHECK-NEXT: [[R:%.*]] = extractelement <16 x float> [[X0123]], i64 0 +; CHECK-NEXT: ret float [[R]] +; %x23 = shufflevector <16 x float> %x, <16 x float> undef, <16 x i32> %x0213 = fadd <16 x float> %x, %x23 %x13 = shufflevector <16 x float> %x0213, <16 x float> undef, <16 x i32> @@ -2181,3 +2750,6 @@ define float @partial_reduction_fadd_v16f32(<16 x float> %x) { !12 = !{i32 999000, i64 100, i32 1} !13 = !{i32 999999, i64 1, i32 2} !14 = !{!"function_entry_count", i64 0} +;. +; CHECK: [[PROF14]] = !{!"function_entry_count", i64 0} +;. diff --git a/llvm/test/CodeGen/X86/phaddsub-undef.ll b/llvm/test/Transforms/PhaseOrdering/X86/phaddsub-undef.ll similarity index 53% rename from llvm/test/CodeGen/X86/phaddsub-undef.ll rename to llvm/test/Transforms/PhaseOrdering/X86/phaddsub-undef.ll index 8aa40939994fd..3cfd1b797209c 100644 --- a/llvm/test/CodeGen/X86/phaddsub-undef.ll +++ b/llvm/test/Transforms/PhaseOrdering/X86/phaddsub-undef.ll @@ -1,14 +1,6 @@ -; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+ssse3 | FileCheck %s --check-prefixes=SSE,SSE-SLOW -; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+ssse3,fast-hops | FileCheck %s --check-prefixes=SSE,SSE-FAST -; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx | FileCheck %s --check-prefixes=AVX,AVX1,AVX1-SLOW -; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx,fast-hops | FileCheck %s --check-prefixes=AVX,AVX1,AVX1-FAST -; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx2 | FileCheck %s --check-prefixes=AVX,AVX2 -; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx2,fast-hops | FileCheck %s --check-prefixes=AVX,AVX2 -; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx512vl | FileCheck %s --check-prefixes=AVX,AVX512 -; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx512vl,fast-hops | FileCheck %s --check-prefixes=AVX,AVX512 +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 +; RUN: opt -passes="default" -S %s | FileCheck %s -; Verify that we correctly fold horizontal binop even in the presence of UNDEFs. define <8 x i32> @test14_undef(<8 x i32> %a, <8 x i32> %b) { ; SSE-LABEL: test14_undef: @@ -20,6 +12,16 @@ define <8 x i32> @test14_undef(<8 x i32> %a, <8 x i32> %b) { ; AVX: # %bb.0: ; AVX-NEXT: vphaddd %xmm1, %xmm0, %xmm0 ; AVX-NEXT: retq +; CHECK-LABEL: define <8 x i32> @test14_undef( +; CHECK-SAME: <8 x i32> [[A:%.*]], <8 x i32> [[B:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] { +; CHECK-NEXT: [[SHIFT:%.*]] = shufflevector <8 x i32> [[A]], <8 x i32> poison, <8 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = add <8 x i32> [[A]], [[SHIFT]] +; CHECK-NEXT: [[VECINIT:%.*]] = shufflevector <8 x i32> [[TMP1]], <8 x i32> , <8 x i32> +; CHECK-NEXT: [[SHIFT1:%.*]] = shufflevector <8 x i32> [[B]], <8 x i32> poison, <8 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = add <8 x i32> [[SHIFT1]], [[B]] +; CHECK-NEXT: [[VECINIT5:%.*]] = shufflevector <8 x i32> [[VECINIT]], <8 x i32> [[TMP2]], <8 x i32> +; CHECK-NEXT: ret <8 x i32> [[VECINIT5]] +; %vecext = extractelement <8 x i32> %a, i32 0 %vecext1 = extractelement <8 x i32> %a, i32 1 %add = add i32 %vecext, %vecext1 @@ -87,6 +89,16 @@ define <8 x i32> @test15_undef(<8 x i32> %a, <8 x i32> %b) { ; AVX512: # %bb.0: ; AVX512-NEXT: vphaddd %ymm1, %ymm0, %ymm0 ; AVX512-NEXT: retq +; CHECK-LABEL: define <8 x i32> @test15_undef( +; CHECK-SAME: <8 x i32> [[A:%.*]], <8 x i32> [[B:%.*]]) local_unnamed_addr #[[ATTR0]] { +; CHECK-NEXT: [[SHIFT:%.*]] = shufflevector <8 x i32> [[A]], <8 x i32> poison, <8 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = add <8 x i32> [[A]], [[SHIFT]] +; CHECK-NEXT: [[VECINIT:%.*]] = shufflevector <8 x i32> [[TMP1]], <8 x i32> , <8 x i32> +; CHECK-NEXT: [[SHIFT1:%.*]] = shufflevector <8 x i32> [[B]], <8 x i32> poison, <8 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = add <8 x i32> [[B]], [[SHIFT1]] +; CHECK-NEXT: [[VECINIT5:%.*]] = shufflevector <8 x i32> [[VECINIT]], <8 x i32> [[TMP2]], <8 x i32> +; CHECK-NEXT: ret <8 x i32> [[VECINIT5]] +; %vecext = extractelement <8 x i32> %a, i32 0 %vecext1 = extractelement <8 x i32> %a, i32 1 %add = add i32 %vecext, %vecext1 @@ -121,6 +133,16 @@ define <8 x i32> @PR40243_alt(<8 x i32> %a, <8 x i32> %b) { ; AVX512: # %bb.0: ; AVX512-NEXT: vphaddd %ymm1, %ymm0, %ymm0 ; AVX512-NEXT: retq +; CHECK-LABEL: define <8 x i32> @PR40243_alt( +; CHECK-SAME: <8 x i32> [[A:%.*]], <8 x i32> [[B:%.*]]) local_unnamed_addr #[[ATTR0]] { +; CHECK-NEXT: [[SHIFT:%.*]] = shufflevector <8 x i32> [[A]], <8 x i32> poison, <8 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = add <8 x i32> [[A]], [[SHIFT]] +; CHECK-NEXT: [[SHIFT1:%.*]] = shufflevector <8 x i32> [[B]], <8 x i32> poison, <8 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = add <8 x i32> [[SHIFT1]], [[B]] +; CHECK-NEXT: [[R4:%.*]] = shufflevector <8 x i32> [[TMP1]], <8 x i32> , <8 x i32> +; CHECK-NEXT: [[R:%.*]] = shufflevector <8 x i32> [[R4]], <8 x i32> [[TMP2]], <8 x i32> +; CHECK-NEXT: ret <8 x i32> [[R]] +; %a4 = extractelement <8 x i32> %a, i32 4 %a5 = extractelement <8 x i32> %a, i32 5 %add4 = add i32 %a4, %a5 @@ -142,6 +164,16 @@ define <8 x i32> @test16_undef(<8 x i32> %a, <8 x i32> %b) { ; AVX: # %bb.0: ; AVX-NEXT: vphaddd %xmm0, %xmm0, %xmm0 ; AVX-NEXT: retq +; CHECK-LABEL: define <8 x i32> @test16_undef( +; CHECK-SAME: <8 x i32> [[A:%.*]], <8 x i32> [[B:%.*]]) local_unnamed_addr #[[ATTR0]] { +; CHECK-NEXT: [[SHIFT:%.*]] = shufflevector <8 x i32> [[A]], <8 x i32> poison, <8 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = add <8 x i32> [[A]], [[SHIFT]] +; CHECK-NEXT: [[VECINIT:%.*]] = shufflevector <8 x i32> [[TMP1]], <8 x i32> , <8 x i32> +; CHECK-NEXT: [[SHIFT1:%.*]] = shufflevector <8 x i32> [[A]], <8 x i32> poison, <8 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = add <8 x i32> [[A]], [[SHIFT1]] +; CHECK-NEXT: [[VECINIT5:%.*]] = shufflevector <8 x i32> [[VECINIT]], <8 x i32> [[TMP2]], <8 x i32> +; CHECK-NEXT: ret <8 x i32> [[VECINIT5]] +; %vecext = extractelement <8 x i32> %a, i32 0 %vecext1 = extractelement <8 x i32> %a, i32 1 %add = add i32 %vecext, %vecext1 @@ -163,6 +195,16 @@ define <16 x i32> @test16_v16i32_undef(<16 x i32> %a, <16 x i32> %b) { ; AVX: # %bb.0: ; AVX-NEXT: vphaddd %xmm0, %xmm0, %xmm0 ; AVX-NEXT: retq +; CHECK-LABEL: define <16 x i32> @test16_v16i32_undef( +; CHECK-SAME: <16 x i32> [[A:%.*]], <16 x i32> [[B:%.*]]) local_unnamed_addr #[[ATTR0]] { +; CHECK-NEXT: [[SHIFT:%.*]] = shufflevector <16 x i32> [[A]], <16 x i32> poison, <16 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = add <16 x i32> [[A]], [[SHIFT]] +; CHECK-NEXT: [[VECINIT:%.*]] = shufflevector <16 x i32> [[TMP1]], <16 x i32> , <16 x i32> +; CHECK-NEXT: [[SHIFT1:%.*]] = shufflevector <16 x i32> [[A]], <16 x i32> poison, <16 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = add <16 x i32> [[A]], [[SHIFT1]] +; CHECK-NEXT: [[VECINIT5:%.*]] = shufflevector <16 x i32> [[VECINIT]], <16 x i32> [[TMP2]], <16 x i32> +; CHECK-NEXT: ret <16 x i32> [[VECINIT5]] +; %vecext = extractelement <16 x i32> %a, i32 0 %vecext1 = extractelement <16 x i32> %a, i32 1 %add = add i32 %vecext, %vecext1 @@ -197,6 +239,14 @@ define <8 x i32> @test17_undef(<8 x i32> %a, <8 x i32> %b) { ; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1 ; AVX512-NEXT: vphaddd %xmm1, %xmm0, %xmm0 ; AVX512-NEXT: retq +; CHECK-LABEL: define <8 x i32> @test17_undef( +; CHECK-SAME: <8 x i32> [[A:%.*]], <8 x i32> [[B:%.*]]) local_unnamed_addr #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x i32> [[A]], <8 x i32> poison, <4 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <8 x i32> [[A]], <8 x i32> poison, <4 x i32> +; CHECK-NEXT: [[TMP3:%.*]] = add <4 x i32> [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <4 x i32> [[TMP3]], <4 x i32> undef, <8 x i32> +; CHECK-NEXT: ret <8 x i32> [[TMP4]] +; %vecext = extractelement <8 x i32> %a, i32 0 %vecext1 = extractelement <8 x i32> %a, i32 1 %add1 = add i32 %vecext, %vecext1 @@ -239,6 +289,14 @@ define <16 x i32> @test17_v16i32_undef(<16 x i32> %a, <16 x i32> %b) { ; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1 ; AVX512-NEXT: vphaddd %xmm1, %xmm0, %xmm0 ; AVX512-NEXT: retq +; CHECK-LABEL: define <16 x i32> @test17_v16i32_undef( +; CHECK-SAME: <16 x i32> [[A:%.*]], <16 x i32> [[B:%.*]]) local_unnamed_addr #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <16 x i32> [[A]], <16 x i32> poison, <4 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <16 x i32> [[A]], <16 x i32> poison, <4 x i32> +; CHECK-NEXT: [[TMP3:%.*]] = add <4 x i32> [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <4 x i32> [[TMP3]], <4 x i32> undef, <16 x i32> +; CHECK-NEXT: ret <16 x i32> [[TMP4]] +; %vecext = extractelement <16 x i32> %a, i32 0 %vecext1 = extractelement <16 x i32> %a, i32 1 %add1 = add i32 %vecext, %vecext1