Skip to content

Commit e9a34e4

Browse files
authored
[RISCV] Support vectorizing FMINIMUMNUM and FMAXIMUMNUM (#135727)
RISC-V V extension support vfmax and vfmin, which follow IEEE754-2019. We can use them directly.
1 parent d8e8175 commit e9a34e4

File tree

8 files changed

+2964
-208
lines changed

8 files changed

+2964
-208
lines changed

llvm/lib/Target/RISCV/RISCVISelLowering.cpp

Lines changed: 11 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -985,6 +985,8 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
985985
static const unsigned ZvfhminZvfbfminPromoteOps[] = {
986986
ISD::FMINNUM,
987987
ISD::FMAXNUM,
988+
ISD::FMINIMUMNUM,
989+
ISD::FMAXIMUMNUM,
988990
ISD::FADD,
989991
ISD::FSUB,
990992
ISD::FMUL,
@@ -1053,7 +1055,9 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
10531055
// Expand various condition codes (explained above).
10541056
setCondCodeAction(VFPCCToExpand, VT, Expand);
10551057

1056-
setOperationAction({ISD::FMINNUM, ISD::FMAXNUM}, VT, Legal);
1058+
setOperationAction(
1059+
{ISD::FMINNUM, ISD::FMAXNUM, ISD::FMAXIMUMNUM, ISD::FMINIMUMNUM}, VT,
1060+
Legal);
10571061
setOperationAction({ISD::FMAXIMUM, ISD::FMINIMUM}, VT, Custom);
10581062

10591063
setOperationAction({ISD::FTRUNC, ISD::FCEIL, ISD::FFLOOR, ISD::FROUND,
@@ -1471,7 +1475,8 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
14711475
setOperationAction({ISD::FADD, ISD::FSUB, ISD::FMUL, ISD::FDIV,
14721476
ISD::FNEG, ISD::FABS, ISD::FCOPYSIGN, ISD::FSQRT,
14731477
ISD::FMA, ISD::FMINNUM, ISD::FMAXNUM,
1474-
ISD::IS_FPCLASS, ISD::FMAXIMUM, ISD::FMINIMUM},
1478+
ISD::FMINIMUMNUM, ISD::FMAXIMUMNUM, ISD::IS_FPCLASS,
1479+
ISD::FMAXIMUM, ISD::FMINIMUM},
14751480
VT, Custom);
14761481

14771482
setOperationAction({ISD::FTRUNC, ISD::FCEIL, ISD::FFLOOR, ISD::FROUND,
@@ -6941,9 +6946,11 @@ static unsigned getRISCVVLOp(SDValue Op) {
69416946
case ISD::VP_FP_TO_UINT:
69426947
return RISCVISD::VFCVT_RTZ_XU_F_VL;
69436948
case ISD::FMINNUM:
6949+
case ISD::FMINIMUMNUM:
69446950
case ISD::VP_FMINNUM:
69456951
return RISCVISD::VFMIN_VL;
69466952
case ISD::FMAXNUM:
6953+
case ISD::FMAXIMUMNUM:
69476954
case ISD::VP_FMAXNUM:
69486955
return RISCVISD::VFMAX_VL;
69496956
case ISD::LRINT:
@@ -7979,6 +7986,8 @@ SDValue RISCVTargetLowering::LowerOperation(SDValue Op,
79797986
case ISD::FMA:
79807987
case ISD::FMINNUM:
79817988
case ISD::FMAXNUM:
7989+
case ISD::FMINIMUMNUM:
7990+
case ISD::FMAXIMUMNUM:
79827991
if (isPromotedOpNeedingSplit(Op, Subtarget))
79837992
return SplitVectorOp(Op, DAG);
79847993
[[fallthrough]];

llvm/lib/Target/RISCV/RISCVInstrInfoVSDPatterns.td

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1360,6 +1360,8 @@ foreach vti = AllFloatVectors in {
13601360
// 13.11. Vector Floating-Point MIN/MAX Instructions
13611361
defm : VPatBinaryFPSDNode_VV_VF<fminnum, "PseudoVFMIN", isSEWAware=1>;
13621362
defm : VPatBinaryFPSDNode_VV_VF<fmaxnum, "PseudoVFMAX", isSEWAware=1>;
1363+
defm : VPatBinaryFPSDNode_VV_VF<fminimumnum, "PseudoVFMIN", isSEWAware=1>;
1364+
defm : VPatBinaryFPSDNode_VV_VF<fmaximumnum, "PseudoVFMAX", isSEWAware=1>;
13631365

13641366
// 13.13. Vector Floating-Point Compare Instructions
13651367
defm : VPatFPSetCCSDNode_VV_VF_FV<SETEQ, "PseudoVMFEQ", "PseudoVMFEQ">;
Lines changed: 201 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,201 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2+
; RUN: llc --mtriple=riscv64-linux-gnu --mattr=+v,+zvfh < %s | FileCheck %s --check-prefixes=CHECK,ZVFH
3+
; RUN: llc --mtriple=riscv64-linux-gnu --mattr=+v,+zvfhmin,+zfh < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN
4+
5+
define <2 x double> @max_v2f64(<2 x double> %a, <2 x double> %b) {
6+
; CHECK-LABEL: max_v2f64:
7+
; CHECK: # %bb.0: # %entry
8+
; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma
9+
; CHECK-NEXT: vfmax.vv v8, v8, v9
10+
; CHECK-NEXT: ret
11+
entry:
12+
%c = call <2 x double> @llvm.maximumnum.v2f64(<2 x double> %a, <2 x double> %b)
13+
ret <2 x double> %c
14+
}
15+
16+
define <3 x double> @max_v3f64(<3 x double> %a, <3 x double> %b) {
17+
; CHECK-LABEL: max_v3f64:
18+
; CHECK: # %bb.0: # %entry
19+
; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma
20+
; CHECK-NEXT: vfmax.vv v8, v8, v10
21+
; CHECK-NEXT: ret
22+
entry:
23+
%c = call <3 x double> @llvm.maximumnum.v3f64(<3 x double> %a, <3 x double> %b)
24+
ret <3 x double> %c
25+
}
26+
27+
define <4 x double> @max_v4f64(<4 x double> %a, <4 x double> %b) {
28+
; CHECK-LABEL: max_v4f64:
29+
; CHECK: # %bb.0: # %entry
30+
; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma
31+
; CHECK-NEXT: vfmax.vv v8, v8, v10
32+
; CHECK-NEXT: ret
33+
entry:
34+
%c = call <4 x double> @llvm.maximumnum.v4f64(<4 x double> %a, <4 x double> %b)
35+
ret <4 x double> %c
36+
}
37+
38+
define <2 x float> @max_v2f32(<2 x float> %a, <2 x float> %b) {
39+
; CHECK-LABEL: max_v2f32:
40+
; CHECK: # %bb.0: # %entry
41+
; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
42+
; CHECK-NEXT: vfmax.vv v8, v8, v9
43+
; CHECK-NEXT: ret
44+
entry:
45+
%c = call <2 x float> @llvm.maximumnum.v2f32(<2 x float> %a, <2 x float> %b)
46+
ret <2 x float> %c
47+
}
48+
49+
define <3 x float> @max_v3f32(<3 x float> %a, <3 x float> %b) {
50+
; CHECK-LABEL: max_v3f32:
51+
; CHECK: # %bb.0: # %entry
52+
; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
53+
; CHECK-NEXT: vfmax.vv v8, v8, v9
54+
; CHECK-NEXT: ret
55+
entry:
56+
%c = call <3 x float> @llvm.maximumnum.v3f32(<3 x float> %a, <3 x float> %b)
57+
ret <3 x float> %c
58+
}
59+
60+
define <4 x float> @max_v4f32(<4 x float> %a, <4 x float> %b) {
61+
; CHECK-LABEL: max_v4f32:
62+
; CHECK: # %bb.0: # %entry
63+
; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
64+
; CHECK-NEXT: vfmax.vv v8, v8, v9
65+
; CHECK-NEXT: ret
66+
entry:
67+
%c = call <4 x float> @llvm.maximumnum.v4f32(<4 x float> %a, <4 x float> %b)
68+
ret <4 x float> %c
69+
}
70+
71+
define <5 x float> @max_v5f32(<5 x float> %a, <5 x float> %b) {
72+
; CHECK-LABEL: max_v5f32:
73+
; CHECK: # %bb.0: # %entry
74+
; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma
75+
; CHECK-NEXT: vfmax.vv v8, v8, v10
76+
; CHECK-NEXT: ret
77+
entry:
78+
%c = call <5 x float> @llvm.maximumnum.v5f32(<5 x float> %a, <5 x float> %b)
79+
ret <5 x float> %c
80+
}
81+
82+
define <8 x float> @max_v8f32(<8 x float> %a, <8 x float> %b) {
83+
; CHECK-LABEL: max_v8f32:
84+
; CHECK: # %bb.0: # %entry
85+
; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma
86+
; CHECK-NEXT: vfmax.vv v8, v8, v10
87+
; CHECK-NEXT: ret
88+
entry:
89+
%c = call <8 x float> @llvm.maximumnum.v8f32(<8 x float> %a, <8 x float> %b)
90+
ret <8 x float> %c
91+
}
92+
93+
define <2 x half> @max_v2f16(<2 x half> %a, <2 x half> %b) {
94+
; ZVFH-LABEL: max_v2f16:
95+
; ZVFH: # %bb.0: # %entry
96+
; ZVFH-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
97+
; ZVFH-NEXT: vfmax.vv v8, v8, v9
98+
; ZVFH-NEXT: ret
99+
;
100+
; ZVFHMIN-LABEL: max_v2f16:
101+
; ZVFHMIN: # %bb.0: # %entry
102+
; ZVFHMIN-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
103+
; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v9
104+
; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8
105+
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
106+
; ZVFHMIN-NEXT: vfmax.vv v9, v9, v10
107+
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
108+
; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9
109+
; ZVFHMIN-NEXT: ret
110+
entry:
111+
%c = call <2 x half> @llvm.maximumnum.v2f16(<2 x half> %a, <2 x half> %b)
112+
ret <2 x half> %c
113+
}
114+
115+
define <4 x half> @max_v4f16(<4 x half> %a, <4 x half> %b) {
116+
; ZVFH-LABEL: max_v4f16:
117+
; ZVFH: # %bb.0: # %entry
118+
; ZVFH-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
119+
; ZVFH-NEXT: vfmax.vv v8, v8, v9
120+
; ZVFH-NEXT: ret
121+
;
122+
; ZVFHMIN-LABEL: max_v4f16:
123+
; ZVFHMIN: # %bb.0: # %entry
124+
; ZVFHMIN-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
125+
; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v9
126+
; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8
127+
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma
128+
; ZVFHMIN-NEXT: vfmax.vv v9, v9, v10
129+
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
130+
; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9
131+
; ZVFHMIN-NEXT: ret
132+
entry:
133+
%c = call <4 x half> @llvm.maximumnum.v4f16(<4 x half> %a, <4 x half> %b)
134+
ret <4 x half> %c
135+
}
136+
137+
define <8 x half> @max_v8f16(<8 x half> %a, <8 x half> %b) {
138+
; ZVFH-LABEL: max_v8f16:
139+
; ZVFH: # %bb.0: # %entry
140+
; ZVFH-NEXT: vsetivli zero, 8, e16, m1, ta, ma
141+
; ZVFH-NEXT: vfmax.vv v8, v8, v9
142+
; ZVFH-NEXT: ret
143+
;
144+
; ZVFHMIN-LABEL: max_v8f16:
145+
; ZVFHMIN: # %bb.0: # %entry
146+
; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma
147+
; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v9
148+
; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8
149+
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
150+
; ZVFHMIN-NEXT: vfmax.vv v10, v12, v10
151+
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma
152+
; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10
153+
; ZVFHMIN-NEXT: ret
154+
entry:
155+
%c = call <8 x half> @llvm.maximumnum.v8f16(<8 x half> %a, <8 x half> %b)
156+
ret <8 x half> %c
157+
}
158+
159+
define <9 x half> @max_v9f16(<9 x half> %a, <9 x half> %b) {
160+
; ZVFH-LABEL: max_v9f16:
161+
; ZVFH: # %bb.0: # %entry
162+
; ZVFH-NEXT: vsetivli zero, 16, e16, m2, ta, ma
163+
; ZVFH-NEXT: vfmax.vv v8, v8, v10
164+
; ZVFH-NEXT: ret
165+
;
166+
; ZVFHMIN-LABEL: max_v9f16:
167+
; ZVFHMIN: # %bb.0: # %entry
168+
; ZVFHMIN-NEXT: vsetivli zero, 16, e16, m2, ta, ma
169+
; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v10
170+
; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8
171+
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma
172+
; ZVFHMIN-NEXT: vfmax.vv v12, v16, v12
173+
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma
174+
; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12
175+
; ZVFHMIN-NEXT: ret
176+
entry:
177+
%c = call <9 x half> @llvm.maximumnum.v9f16(<9 x half> %a, <9 x half> %b)
178+
ret <9 x half> %c
179+
}
180+
181+
define <16 x half> @max_v16f16(<16 x half> %a, <16 x half> %b) {
182+
; ZVFH-LABEL: max_v16f16:
183+
; ZVFH: # %bb.0: # %entry
184+
; ZVFH-NEXT: vsetivli zero, 16, e16, m2, ta, ma
185+
; ZVFH-NEXT: vfmax.vv v8, v8, v10
186+
; ZVFH-NEXT: ret
187+
;
188+
; ZVFHMIN-LABEL: max_v16f16:
189+
; ZVFHMIN: # %bb.0: # %entry
190+
; ZVFHMIN-NEXT: vsetivli zero, 16, e16, m2, ta, ma
191+
; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v10
192+
; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8
193+
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma
194+
; ZVFHMIN-NEXT: vfmax.vv v12, v16, v12
195+
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma
196+
; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12
197+
; ZVFHMIN-NEXT: ret
198+
entry:
199+
%c = call <16 x half> @llvm.maximumnum.v16f16(<16 x half> %a, <16 x half> %b)
200+
ret <16 x half> %c
201+
}

0 commit comments

Comments
 (0)