@@ -11721,29 +11721,63 @@ SDValue SITargetLowering::performFCopySignCombine(SDNode *N,
11721
11721
DAGCombinerInfo &DCI) const {
11722
11722
SDValue MagnitudeOp = N->getOperand(0);
11723
11723
SDValue SignOp = N->getOperand(1);
11724
+
11725
+ // The generic combine for fcopysign + fp cast is too conservative with
11726
+ // vectors, and also gets confused by the splitting we will perform here, so
11727
+ // peek through FP casts.
11728
+ if (SignOp.getOpcode() == ISD::FP_EXTEND ||
11729
+ SignOp.getOpcode() == ISD::FP_ROUND)
11730
+ SignOp = SignOp.getOperand(0);
11731
+
11724
11732
SelectionDAG &DAG = DCI.DAG;
11725
11733
SDLoc DL(N);
11734
+ EVT SignVT = SignOp.getValueType();
11726
11735
11727
11736
// f64 fcopysign is really an f32 copysign on the high bits, so replace the
11728
11737
// lower half with a copy.
11729
11738
// fcopysign f64:x, _:y -> x.lo32, (fcopysign (f32 x.hi32), _:y)
11730
- if (MagnitudeOp.getValueType() == MVT::f64) {
11731
- SDValue MagAsVector =
11732
- DAG.getNode(ISD::BITCAST, DL, MVT::v2f32, MagnitudeOp);
11733
- SDValue MagLo = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32,
11734
- MagAsVector, DAG.getConstant(0, DL, MVT::i32));
11735
- SDValue MagHi = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32,
11736
- MagAsVector, DAG.getConstant(1, DL, MVT::i32));
11739
+ EVT MagVT = MagnitudeOp.getValueType();
11740
+ if (MagVT.getScalarType() == MVT::f64) {
11741
+ unsigned NumElts = MagVT.isVector() ? MagVT.getVectorNumElements() : 1;
11742
+
11743
+ EVT F32VT = MagVT.isVector()
11744
+ ? EVT::getVectorVT(*DAG.getContext(), MVT::f32, 2 * NumElts)
11745
+ : MVT::v2f32;
11746
+
11747
+ SDValue MagAsVector = DAG.getNode(ISD::BITCAST, DL, F32VT, MagnitudeOp);
11748
+
11749
+ SmallVector<SDValue, 8> NewElts;
11750
+ for (unsigned I = 0; I != NumElts; ++I) {
11751
+ SDValue MagLo =
11752
+ DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, MagAsVector,
11753
+ DAG.getConstant(2 * I, DL, MVT::i32));
11754
+ SDValue MagHi =
11755
+ DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, MagAsVector,
11756
+ DAG.getConstant(2 * I + 1, DL, MVT::i32));
11737
11757
11738
- SDValue HiOp = DAG.getNode(ISD::FCOPYSIGN, DL, MVT::f32, MagHi, SignOp);
11758
+ SDValue SignOpElt =
11759
+ MagVT.isVector()
11760
+ ? DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, SignVT.getScalarType(),
11761
+ SignOp, DAG.getConstant(I, DL, MVT::i32))
11762
+ : SignOp;
11763
+
11764
+ SDValue HiOp =
11765
+ DAG.getNode(ISD::FCOPYSIGN, DL, MVT::f32, MagHi, SignOpElt);
11766
+
11767
+ SDValue Vector =
11768
+ DAG.getNode(ISD::BUILD_VECTOR, DL, MVT::v2f32, MagLo, HiOp);
11769
+
11770
+ SDValue NewElt = DAG.getNode(ISD::BITCAST, DL, MVT::f64, Vector);
11771
+ NewElts.push_back(NewElt);
11772
+ }
11739
11773
11740
- SDValue Vector =
11741
- DAG.getNode(ISD::BUILD_VECTOR, DL, MVT::v2f32, MagLo, HiOp) ;
11774
+ if (NewElts.size() == 1)
11775
+ return NewElts[0] ;
11742
11776
11743
- return DAG.getNode(ISD::BITCAST , DL, MVT::f64, Vector );
11777
+ return DAG.getNode(ISD::BUILD_VECTOR , DL, MagVT, NewElts );
11744
11778
}
11745
11779
11746
- if (SignOp.getValueType() != MVT::f64)
11780
+ if (SignVT != MVT::f64)
11747
11781
return SDValue();
11748
11782
11749
11783
// Reduce width of sign operand, we only need the highest bit.
0 commit comments