Skip to content

Commit 0303fd2

Browse files
authored
[PowerPC] hoist xxspltib out of loop body (#127121)
Fixes #127119 Remove `hasSideEffects` from `xxspltib` since there is no special restriction specified in the ISA that prevent it from being reordered, move, CSE, or LICM. Removing this restriction will allow `xxspltib` to be hoisted out of loop bodies.
1 parent ba9bd22 commit 0303fd2

File tree

3 files changed

+47
-49
lines changed

3 files changed

+47
-49
lines changed

llvm/lib/Target/PowerPC/PPCInstrVSX.td

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1659,8 +1659,6 @@ let Predicates = [HasVSX, HasP9Vector] in {
16591659
RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">;
16601660

16611661
// Vector Splat Immediate Byte
1662-
// FIXME: Setting the hasSideEffects flag here to match current behaviour.
1663-
let hasSideEffects = 1 in
16641662
def XXSPLTIB : X_RD6_IMM8<60, 360, (outs vsrc:$XT), (ins u8imm:$IMM8),
16651663
"xxspltib $XT, $IMM8", IIC_VecPerm, []>;
16661664

llvm/test/CodeGen/PowerPC/licm-xxsplti.ll

Lines changed: 31 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -82,6 +82,7 @@ for.body:
8282
; AIX32-NEXT: li 6, 0
8383
; AIX32-NEXT: beq 0, L..BB0_4
8484
; AIX32-NEXT: # %bb.2: # %for.body.preheader.new
85+
; AIX32-NEXT: xxspltib 0, 6
8586
; AIX32-NEXT: addi 12, 4, -8
8687
; AIX32-NEXT: addi 9, 3, -8
8788
; AIX32-NEXT: rlwinm 7, 5, 0, 1, 30
@@ -91,20 +92,19 @@ for.body:
9192
; AIX32-NEXT: .align 4
9293
; AIX32-NEXT: L..BB0_3: # %for.body
9394
; AIX32-NEXT: # =>This Inner Loop Header: Depth=1
94-
; AIX32-NEXT: lxvwsx 0, 12, 10
95-
; AIX32-NEXT: xxspltib 1, 6
96-
; AIX32-NEXT: lxvwsx 2, 12, 11
95+
; AIX32-NEXT: lxvwsx 1, 12, 10
9796
; AIX32-NEXT: addic 6, 6, 2
98-
; AIX32-NEXT: addi 12, 12, 8
9997
; AIX32-NEXT: addze 8, 8
10098
; AIX32-NEXT: xor 0, 6, 7
10199
; AIX32-NEXT: or. 0, 0, 8
102-
; AIX32-NEXT: xxland 0, 0, 1
103-
; AIX32-NEXT: xxland 1, 2, 1
104-
; AIX32-NEXT: xscvspdpn 0, 0
105-
; AIX32-NEXT: stfsu 0, 8(9)
106-
; AIX32-NEXT: xscvspdpn 0, 1
107-
; AIX32-NEXT: stfs 0, 4(9)
100+
; AIX32-NEXT: xxland 1, 1, 0
101+
; AIX32-NEXT: xscvspdpn 1, 1
102+
; AIX32-NEXT: stfsu 1, 8(9)
103+
; AIX32-NEXT: lxvwsx 1, 12, 11
104+
; AIX32-NEXT: addi 12, 12, 8
105+
; AIX32-NEXT: xxland 1, 1, 0
106+
; AIX32-NEXT: xscvspdpn 1, 1
107+
; AIX32-NEXT: stfs 1, 4(9)
108108
; AIX32-NEXT: bne 0, L..BB0_3
109109

110110
; AIX64: ._Z3fooPfS_Pi:
@@ -118,10 +118,11 @@ for.body:
118118
; AIX64-NEXT: beq 0, L..BB0_4
119119
; AIX64-NEXT: # %bb.2: # %for.body.preheader.new
120120
; AIX64-NEXT: rlwinm 6, 5, 0, 1, 30
121-
; AIX64-NEXT: addi 10, 4, -8
121+
; AIX64-NEXT: xxspltib 0, 6
122+
; AIX64-NEXT: addi 9, 4, -8
122123
; AIX64-NEXT: addi 7, 3, -8
123124
; AIX64-NEXT: li 8, 8
124-
; AIX64-NEXT: li 9, 12
125+
; AIX64-NEXT: li 10, 12
125126
; AIX64-NEXT: li 11, 4
126127
; AIX64-NEXT: addi 6, 6, -2
127128
; AIX64-NEXT: rldicl 6, 6, 63, 1
@@ -131,17 +132,16 @@ for.body:
131132
; AIX64-NEXT: .align 4
132133
; AIX64-NEXT: L..BB0_3: # %for.body
133134
; AIX64-NEXT: # =>This Inner Loop Header: Depth=1
134-
; AIX64-NEXT: lxvwsx 0, 10, 8
135-
; AIX64-NEXT: xxspltib 1, 6
135+
; AIX64-NEXT: lxvwsx 1, 9, 8
136136
; AIX64-NEXT: addi 6, 6, 2
137-
; AIX64-NEXT: xxland 0, 0, 1
138-
; AIX64-NEXT: xscvspdpn 0, 0
139-
; AIX64-NEXT: stfsu 0, 8(7)
140-
; AIX64-NEXT: lxvwsx 0, 10, 9
141-
; AIX64-NEXT: addi 10, 10, 8
142-
; AIX64-NEXT: xxland 0, 0, 1
143-
; AIX64-NEXT: xxsldwi 0, 0, 0, 3
144-
; AIX64-NEXT: stfiwx 0, 7, 11
137+
; AIX64-NEXT: xxland 1, 1, 0
138+
; AIX64-NEXT: xscvspdpn 1, 1
139+
; AIX64-NEXT: stfsu 1, 8(7)
140+
; AIX64-NEXT: lxvwsx 1, 9, 10
141+
; AIX64-NEXT: addi 9, 9, 8
142+
; AIX64-NEXT: xxland 1, 1, 0
143+
; AIX64-NEXT: xxsldwi 1, 1, 1, 3
144+
; AIX64-NEXT: stfiwx 1, 7, 11
145145
; AIX64-NEXT: bdnz L..BB0_3
146146

147147
; LINUX64LE: _Z3fooPfS_Pi: # @_Z3fooPfS_Pi
@@ -157,6 +157,7 @@ for.body:
157157
; LINUX64LE-NEXT: beq 0, .LBB0_4
158158
; LINUX64LE-NEXT: # %bb.2: # %for.body.preheader.new
159159
; LINUX64LE-NEXT: rlwinm 6, 5, 0, 1, 30
160+
; LINUX64LE-NEXT: xxspltib 0, 6
160161
; LINUX64LE-NEXT: addi 8, 4, -8
161162
; LINUX64LE-NEXT: addi 7, 3, -8
162163
; LINUX64LE-NEXT: li 9, 8
@@ -170,15 +171,14 @@ for.body:
170171
; LINUX64LE-NEXT: .p2align 4
171172
; LINUX64LE-NEXT: .LBB0_3: # %for.body
172173
; LINUX64LE-NEXT: # =>This Inner Loop Header: Depth=1
173-
; LINUX64LE-NEXT: lxvwsx 0, 8, 9
174-
; LINUX64LE-NEXT: xxspltib 1, 6
174+
; LINUX64LE-NEXT: lxvwsx 1, 8, 9
175175
; LINUX64LE-NEXT: addi 6, 6, 2
176-
; LINUX64LE-NEXT: xxland 0, 0, 1
177-
; LINUX64LE-NEXT: xxsldwi 0, 0, 0, 3
178-
; LINUX64LE-NEXT: xscvspdpn 0, 0
179-
; LINUX64LE-NEXT: stfsu 0, 8(7)
180-
; LINUX64LE-NEXT: lxvwsx 0, 8, 10
176+
; LINUX64LE-NEXT: xxland 1, 1, 0
177+
; LINUX64LE-NEXT: xxsldwi 1, 1, 1, 3
178+
; LINUX64LE-NEXT: xscvspdpn 1, 1
179+
; LINUX64LE-NEXT: stfsu 1, 8(7)
180+
; LINUX64LE-NEXT: lxvwsx 1, 8, 10
181181
; LINUX64LE-NEXT: addi 8, 8, 8
182-
; LINUX64LE-NEXT: xxland 0, 0, 1
183-
; LINUX64LE-NEXT: stxvrwx 0, 7, 11
182+
; LINUX64LE-NEXT: xxland 1, 1, 0
183+
; LINUX64LE-NEXT: stxvrwx 1, 7, 11
184184
; LINUX64LE-NEXT: bdnz .LBB0_3

llvm/test/CodeGen/PowerPC/memset-tail.ll

Lines changed: 16 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -380,17 +380,17 @@ define dso_local void @memsetTailV1B2(ptr nocapture noundef writeonly %p) local_
380380
;
381381
; P9-BE-LABEL: memsetTailV1B2:
382382
; P9-BE: # %bb.0: # %entry
383+
; P9-BE-NEXT: xxspltib 0, 15
383384
; P9-BE-NEXT: li 4, 3855
384385
; P9-BE-NEXT: sth 4, 16(3)
385-
; P9-BE-NEXT: xxspltib 0, 15
386386
; P9-BE-NEXT: stxv 0, 0(3)
387387
; P9-BE-NEXT: blr
388388
;
389389
; P10-BE-LABEL: memsetTailV1B2:
390390
; P10-BE: # %bb.0: # %entry
391+
; P10-BE-NEXT: xxspltib 0, 15
391392
; P10-BE-NEXT: li 4, 3855
392393
; P10-BE-NEXT: sth 4, 16(3)
393-
; P10-BE-NEXT: xxspltib 0, 15
394394
; P10-BE-NEXT: stxv 0, 0(3)
395395
; P10-BE-NEXT: blr
396396
;
@@ -404,17 +404,17 @@ define dso_local void @memsetTailV1B2(ptr nocapture noundef writeonly %p) local_
404404
;
405405
; P9-LE-LABEL: memsetTailV1B2:
406406
; P9-LE: # %bb.0: # %entry
407+
; P9-LE-NEXT: xxspltib 0, 15
407408
; P9-LE-NEXT: li 4, 3855
408409
; P9-LE-NEXT: sth 4, 16(3)
409-
; P9-LE-NEXT: xxspltib 0, 15
410410
; P9-LE-NEXT: stxv 0, 0(3)
411411
; P9-LE-NEXT: blr
412412
;
413413
; P10-LE-LABEL: memsetTailV1B2:
414414
; P10-LE: # %bb.0: # %entry
415+
; P10-LE-NEXT: xxspltib 0, 15
415416
; P10-LE-NEXT: li 4, 3855
416417
; P10-LE-NEXT: sth 4, 16(3)
417-
; P10-LE-NEXT: xxspltib 0, 15
418418
; P10-LE-NEXT: stxv 0, 0(3)
419419
; P10-LE-NEXT: blr
420420
entry:
@@ -433,17 +433,17 @@ define dso_local void @memsetTailV1B1(ptr nocapture noundef writeonly %p) local_
433433
;
434434
; P9-BE-LABEL: memsetTailV1B1:
435435
; P9-BE: # %bb.0: # %entry
436+
; P9-BE-NEXT: xxspltib 0, 15
436437
; P9-BE-NEXT: li 4, 15
437438
; P9-BE-NEXT: stb 4, 16(3)
438-
; P9-BE-NEXT: xxspltib 0, 15
439439
; P9-BE-NEXT: stxv 0, 0(3)
440440
; P9-BE-NEXT: blr
441441
;
442442
; P10-BE-LABEL: memsetTailV1B1:
443443
; P10-BE: # %bb.0: # %entry
444+
; P10-BE-NEXT: xxspltib 0, 15
444445
; P10-BE-NEXT: li 4, 15
445446
; P10-BE-NEXT: stb 4, 16(3)
446-
; P10-BE-NEXT: xxspltib 0, 15
447447
; P10-BE-NEXT: stxv 0, 0(3)
448448
; P10-BE-NEXT: blr
449449
;
@@ -457,17 +457,17 @@ define dso_local void @memsetTailV1B1(ptr nocapture noundef writeonly %p) local_
457457
;
458458
; P9-LE-LABEL: memsetTailV1B1:
459459
; P9-LE: # %bb.0: # %entry
460+
; P9-LE-NEXT: xxspltib 0, 15
460461
; P9-LE-NEXT: li 4, 15
461462
; P9-LE-NEXT: stb 4, 16(3)
462-
; P9-LE-NEXT: xxspltib 0, 15
463463
; P9-LE-NEXT: stxv 0, 0(3)
464464
; P9-LE-NEXT: blr
465465
;
466466
; P10-LE-LABEL: memsetTailV1B1:
467467
; P10-LE: # %bb.0: # %entry
468+
; P10-LE-NEXT: xxspltib 0, 15
468469
; P10-LE-NEXT: li 4, 15
469470
; P10-LE-NEXT: stb 4, 16(3)
470-
; P10-LE-NEXT: xxspltib 0, 15
471471
; P10-LE-NEXT: stxv 0, 0(3)
472472
; P10-LE-NEXT: blr
473473
entry:
@@ -861,17 +861,17 @@ define dso_local void @memset2TailV1B2(ptr nocapture noundef writeonly %p) local
861861
;
862862
; P9-BE-LABEL: memset2TailV1B2:
863863
; P9-BE: # %bb.0: # %entry
864+
; P9-BE-NEXT: xxspltib 0, 165
864865
; P9-BE-NEXT: li 4, -23131
865866
; P9-BE-NEXT: sth 4, 16(3)
866-
; P9-BE-NEXT: xxspltib 0, 165
867867
; P9-BE-NEXT: stxv 0, 0(3)
868868
; P9-BE-NEXT: blr
869869
;
870870
; P10-BE-LABEL: memset2TailV1B2:
871871
; P10-BE: # %bb.0: # %entry
872+
; P10-BE-NEXT: xxspltib 0, 165
872873
; P10-BE-NEXT: li 4, -23131
873874
; P10-BE-NEXT: sth 4, 16(3)
874-
; P10-BE-NEXT: xxspltib 0, 165
875875
; P10-BE-NEXT: stxv 0, 0(3)
876876
; P10-BE-NEXT: blr
877877
;
@@ -887,17 +887,17 @@ define dso_local void @memset2TailV1B2(ptr nocapture noundef writeonly %p) local
887887
;
888888
; P9-LE-LABEL: memset2TailV1B2:
889889
; P9-LE: # %bb.0: # %entry
890+
; P9-LE-NEXT: xxspltib 0, 165
890891
; P9-LE-NEXT: li 4, -23131
891892
; P9-LE-NEXT: sth 4, 16(3)
892-
; P9-LE-NEXT: xxspltib 0, 165
893893
; P9-LE-NEXT: stxv 0, 0(3)
894894
; P9-LE-NEXT: blr
895895
;
896896
; P10-LE-LABEL: memset2TailV1B2:
897897
; P10-LE: # %bb.0: # %entry
898+
; P10-LE-NEXT: xxspltib 0, 165
898899
; P10-LE-NEXT: li 4, -23131
899900
; P10-LE-NEXT: sth 4, 16(3)
900-
; P10-LE-NEXT: xxspltib 0, 165
901901
; P10-LE-NEXT: stxv 0, 0(3)
902902
; P10-LE-NEXT: blr
903903
entry:
@@ -917,17 +917,17 @@ define dso_local void @memset2TailV1B1(ptr nocapture noundef writeonly %p) local
917917
;
918918
; P9-BE-LABEL: memset2TailV1B1:
919919
; P9-BE: # %bb.0: # %entry
920+
; P9-BE-NEXT: xxspltib 0, 165
920921
; P9-BE-NEXT: li 4, -91
921922
; P9-BE-NEXT: stb 4, 16(3)
922-
; P9-BE-NEXT: xxspltib 0, 165
923923
; P9-BE-NEXT: stxv 0, 0(3)
924924
; P9-BE-NEXT: blr
925925
;
926926
; P10-BE-LABEL: memset2TailV1B1:
927927
; P10-BE: # %bb.0: # %entry
928+
; P10-BE-NEXT: xxspltib 0, 165
928929
; P10-BE-NEXT: li 4, -91
929930
; P10-BE-NEXT: stb 4, 16(3)
930-
; P10-BE-NEXT: xxspltib 0, 165
931931
; P10-BE-NEXT: stxv 0, 0(3)
932932
; P10-BE-NEXT: blr
933933
;
@@ -943,17 +943,17 @@ define dso_local void @memset2TailV1B1(ptr nocapture noundef writeonly %p) local
943943
;
944944
; P9-LE-LABEL: memset2TailV1B1:
945945
; P9-LE: # %bb.0: # %entry
946+
; P9-LE-NEXT: xxspltib 0, 165
946947
; P9-LE-NEXT: li 4, -91
947948
; P9-LE-NEXT: stb 4, 16(3)
948-
; P9-LE-NEXT: xxspltib 0, 165
949949
; P9-LE-NEXT: stxv 0, 0(3)
950950
; P9-LE-NEXT: blr
951951
;
952952
; P10-LE-LABEL: memset2TailV1B1:
953953
; P10-LE: # %bb.0: # %entry
954+
; P10-LE-NEXT: xxspltib 0, 165
954955
; P10-LE-NEXT: li 4, -91
955956
; P10-LE-NEXT: stb 4, 16(3)
956-
; P10-LE-NEXT: xxspltib 0, 165
957957
; P10-LE-NEXT: stxv 0, 0(3)
958958
; P10-LE-NEXT: blr
959959
entry:

0 commit comments

Comments
 (0)