-
Notifications
You must be signed in to change notification settings - Fork 14.5k
[CodeGen] Prevent register coalescer rematerialization based on target #148430
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: users/tomershafir/spr/main.codegen-prevent-register-coalescer-rematerialization-based-on-target
Are you sure you want to change the base?
Conversation
Created using spr 1.3.6
@llvm/pr-subscribers-llvm-regalloc Author: Tomer Shafir (tomershafir) ChangesThis change makes the register coalescer prevent rematerialization of a trivial def for a move instruction, if the target guides against it, based on the new target hook Full diff: https://github.com/llvm/llvm-project/pull/148430.diff 6 Files Affected:
diff --git a/llvm/lib/CodeGen/RegisterCoalescer.cpp b/llvm/lib/CodeGen/RegisterCoalescer.cpp
index 2d7987a2e1988..03624c98f71f0 100644
--- a/llvm/lib/CodeGen/RegisterCoalescer.cpp
+++ b/llvm/lib/CodeGen/RegisterCoalescer.cpp
@@ -69,6 +69,9 @@ STATISTIC(numCrossRCs, "Number of cross class joins performed");
STATISTIC(numCommutes, "Number of instruction commuting performed");
STATISTIC(numExtends, "Number of copies extended");
STATISTIC(NumReMats, "Number of instructions re-materialized");
+STATISTIC(NumReMatsPrevented,
+ "Number of instruction rematerialization prevented by "
+ "`shouldReMaterializeTrivialRegDef` hook");
STATISTIC(NumInflated, "Number of register classes inflated");
STATISTIC(NumLaneConflicts, "Number of dead lane conflicts tested");
STATISTIC(NumLaneResolves, "Number of dead lane conflicts resolved");
@@ -1400,6 +1403,12 @@ bool RegisterCoalescer::reMaterializeTrivialDef(const CoalescerPair &CP,
if (!Edit.canRematerializeAt(RM, ValNo, CopyIdx))
return false;
+ if (!TII->shouldReMaterializeTrivialRegDef(CopyMI, DstReg, SrcReg)) {
+ LLVM_DEBUG(dbgs() << "Remat prevented: " << CopyIdx << "\t" << *CopyMI);
+ ++NumReMatsPrevented;
+ return false;
+ }
+
DebugLoc DL = CopyMI->getDebugLoc();
MachineBasicBlock *MBB = CopyMI->getParent();
MachineBasicBlock::iterator MII =
diff --git a/llvm/test/CodeGen/AArch64/arm64-abi-varargs.ll b/llvm/test/CodeGen/AArch64/arm64-abi-varargs.ll
index 1b22514a59d60..890367a761281 100644
--- a/llvm/test/CodeGen/AArch64/arm64-abi-varargs.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-abi-varargs.ll
@@ -64,18 +64,18 @@ define i32 @main() nounwind ssp {
; CHECK: ; %bb.0:
; CHECK-NEXT: sub sp, sp, #96
; CHECK-NEXT: stp x29, x30, [sp, #80] ; 16-byte Folded Spill
-; CHECK-NEXT: mov w9, #1 ; =0x1
-; CHECK-NEXT: mov w8, #2 ; =0x2
-; CHECK-NEXT: stp w8, w9, [sp, #72]
-; CHECK-NEXT: mov w9, #3 ; =0x3
-; CHECK-NEXT: mov w8, #4 ; =0x4
-; CHECK-NEXT: stp w8, w9, [sp, #64]
-; CHECK-NEXT: mov w9, #5 ; =0x5
-; CHECK-NEXT: mov w8, #6 ; =0x6
-; CHECK-NEXT: stp w8, w9, [sp, #56]
-; CHECK-NEXT: mov w9, #7 ; =0x7
-; CHECK-NEXT: mov w8, #8 ; =0x8
-; CHECK-NEXT: stp w8, w9, [sp, #48]
+; CHECK-NEXT: mov w8, #1 ; =0x1
+; CHECK-NEXT: mov w1, #2 ; =0x2
+; CHECK-NEXT: stp w1, w8, [sp, #72]
+; CHECK-NEXT: mov w2, #3 ; =0x3
+; CHECK-NEXT: mov w3, #4 ; =0x4
+; CHECK-NEXT: stp w3, w2, [sp, #64]
+; CHECK-NEXT: mov w4, #5 ; =0x5
+; CHECK-NEXT: mov w5, #6 ; =0x6
+; CHECK-NEXT: stp w5, w4, [sp, #56]
+; CHECK-NEXT: mov w6, #7 ; =0x7
+; CHECK-NEXT: mov w7, #8 ; =0x8
+; CHECK-NEXT: stp w7, w6, [sp, #48]
; CHECK-NEXT: mov w8, #9 ; =0x9
; CHECK-NEXT: mov w9, #10 ; =0xa
; CHECK-NEXT: stp w9, w8, [sp, #40]
@@ -86,13 +86,6 @@ define i32 @main() nounwind ssp {
; CHECK-NEXT: str x9, [sp, #8]
; CHECK-NEXT: str w8, [sp]
; CHECK-NEXT: add x0, sp, #76
-; CHECK-NEXT: mov w1, #2 ; =0x2
-; CHECK-NEXT: mov w2, #3 ; =0x3
-; CHECK-NEXT: mov w3, #4 ; =0x4
-; CHECK-NEXT: mov w4, #5 ; =0x5
-; CHECK-NEXT: mov w5, #6 ; =0x6
-; CHECK-NEXT: mov w6, #7 ; =0x7
-; CHECK-NEXT: mov w7, #8 ; =0x8
; CHECK-NEXT: bl _fn9
; CHECK-NEXT: mov w0, #0 ; =0x0
; CHECK-NEXT: ldp x29, x30, [sp, #80] ; 16-byte Folded Reload
diff --git a/llvm/test/CodeGen/AArch64/arm64-reg-coalesce-remat-zero-cycle-regmov-fpr.mir b/llvm/test/CodeGen/AArch64/arm64-reg-coalesce-remat-zero-cycle-regmov-fpr.mir
new file mode 100644
index 0000000000000..303e25edb2b18
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/arm64-reg-coalesce-remat-zero-cycle-regmov-fpr.mir
@@ -0,0 +1,174 @@
+# RUN: llc -o - -mtriple=arm64-linux-gnu -run-pass=register-coalescer -verify-coalescing %s | FileCheck %s -check-prefixes=NOTCPU-LINUX
+# RUN: llc -o - -mtriple=arm64-apple-macosx -mcpu=generic -run-pass=register-coalescer -verify-coalescing %s | FileCheck %s -check-prefixes=NOTCPU-APPLE
+# RUN: llc -o - -mtriple=arm64-apple-macosx -mcpu=apple-m1 -run-pass=register-coalescer -verify-coalescing %s | FileCheck %s -check-prefixes=CPU
+# RUN: llc -o - -mtriple=arm64-apple-macosx -mcpu=apple-m1 -mattr=-zcm-fpr64 -mattr=-zcm-fpr128 -run-pass=register-coalescer -verify-coalescing %s | FileCheck %s -check-prefixes=NOTATTR
+# RUN: llc -o - -mtriple=arm64-apple-macosx -mattr=+zcm-fpr64 -mattr=+zcm-fpr128 -run-pass=register-coalescer -verify-coalescing %s | FileCheck %s -check-prefixes=ATTR
+
+--- |
+ define void @remat_FPR128() {
+ ret void
+ }
+ declare void @foo_v4i32(<4 x float>, <4 x float>)
+
+ define void @remat_FPR64() {
+ ret void
+ }
+ declare void @foo_double(double, double)
+
+ define void @remat_FPR32() {
+ ret void
+ }
+ declare void @foo_float(float, float)
+
+ define void @remat_FPR16() {
+ ret void
+ }
+ declare void @foo_half(half, half)
+...
+---
+name: remat_FPR128
+tracksRegLiveness: true
+body: |
+ bb.0:
+ ; CHECK-LABEL: name: remat_FPR128
+
+ ; NOTCPU-LINUX: %0:fpr128 = MOVIv2d_ns 64
+ ; NOTCPU-LINUX-NEXT: %1:fpr128 = MOVIv2d_ns 64
+ ; NOTCPU-LINUX: BL @foo_v4i32
+
+ ; NOTCPU-APPLE: %0:fpr128 = MOVIv2d_ns 64
+ ; NOTCPU-APPLE-NEXT: %1:fpr128 = MOVIv2d_ns 64
+ ; NOTCPU-APPLE: BL @foo_v4i32
+
+ ; CPU: %0:fpr128 = MOVIv2d_ns 64
+ ; CPU-NEXT: %1:fpr128 = COPY %0
+ ; CPU: BL @foo_v4i32
+
+ ; NOTATTR: %0:fpr128 = MOVIv2d_ns 64
+ ; NOTATTR-NEXT: %1:fpr128 = MOVIv2d_ns 64
+ ; NOTATTR: BL @foo_v4i32
+
+ ; ATTR: %0:fpr128 = MOVIv2d_ns 64
+ ; ATTR-NEXT: %1:fpr128 = COPY %0
+ ; ATTR: BL @foo_v4i32
+
+ %0:fpr128 = MOVIv2d_ns 64
+ %1:fpr128 = COPY %0
+
+ ; Creates a live range interference to prevent coalescing and force
+ ; trying to rematerialize the previous COPY.
+ %1 = ADDv4i32 %1, %1
+
+ BL @foo_v4i32, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit %0, implicit %1
+ RET_ReallyLR
+
+---
+name: remat_FPR64
+tracksRegLiveness: true
+body: |
+ bb.0:
+ ; CHECK-LABEL: name: remat_FPR64
+
+ ; NOTCPU-LINUX: %0:fpr64 = FMOVDi 64
+ ; NOTCPU-LINUX-NEXT: %1:fpr64 = FMOVDi 64
+ ; NOTCPU-LINUX: BL @foo_double
+
+ ; NOTCPU-APPLE: %0:fpr64 = FMOVDi 64
+ ; NOTCPU-APPLE-NEXT: %1:fpr64 = FMOVDi 64
+ ; NOTCPU-APPLE: BL @foo_double
+
+ ; CPU: %0:fpr64 = FMOVDi 64
+ ; CPU-NEXT: %1:fpr64 = COPY %0
+ ; CPU: BL @foo_double
+
+ ; NOTATTR: %0:fpr64 = FMOVDi 64
+ ; NOTATTR-NEXT: %1:fpr64 = FMOVDi 64
+ ; NOTATTR: BL @foo_double
+
+ ; ATTR: %0:fpr64 = FMOVDi 64
+ ; ATTR-NEXT: %1:fpr64 = COPY %0
+ ; ATTR: BL @foo_double
+
+ %0:fpr64 = FMOVDi 64
+ %1:fpr64 = COPY %0
+
+ ; Creates a live range interference to prevent coalescing and force
+ ; trying to rematerialize the previous COPY.
+ %1 = FADDDrr %1, %1, implicit $fpcr
+
+ BL @foo_double, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit %0, implicit %1
+ RET_ReallyLR
+
+---
+name: remat_FPR32
+tracksRegLiveness: true
+body: |
+ bb.0:
+ ; CHECK-LABEL: name: remat_FPR32
+
+ ; NOTCPU-LINUX: %0:fpr32 = FMOVSi 64
+ ; NOTCPU-LINUX-NEXT: %1:fpr32 = FMOVSi 64
+ ; NOTCPU-LINUX: BL @foo_float
+
+ ; NOTCPU-APPLE: %0:fpr32 = FMOVSi 64
+ ; NOTCPU-APPLE-NEXT: %1:fpr32 = FMOVSi 64
+ ; NOTCPU-APPLE: BL @foo_float
+
+ ; CPU: %0:fpr32 = FMOVSi 64
+ ; CPU-NEXT: %1:fpr32 = COPY %0
+ ; CPU: BL @foo_float
+
+ ; NOTATTR: %0:fpr32 = FMOVSi 64
+ ; NOTATTR-NEXT: %1:fpr32 = FMOVSi 64
+ ; NOTATTR: BL @foo_float
+
+ ; ATTR: %0:fpr32 = FMOVSi 64
+ ; ATTR-NEXT: %1:fpr32 = COPY %0
+ ; ATTR: BL @foo_float
+
+ %0:fpr32 = FMOVSi 64
+ %1:fpr32 = COPY %0
+
+ ; Creates a live range interference to prevent coalescing and force
+ ; trying to rematerialize the previous COPY.
+ %1 = FADDSrr %1, %1, implicit $fpcr
+
+ BL @foo_float, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit %0, implicit %1
+ RET_ReallyLR
+
+---
+name: remat_FPR16
+tracksRegLiveness: true
+body: |
+ bb.0:
+ ; CHECK-LABEL: name: remat_FPR16
+
+ ; NOTCPU-LINUX: %0:fpr16 = FMOVHi 64
+ ; NOTCPU-LINUX-NEXT: %1:fpr16 = FMOVHi 64
+ ; NOTCPU-LINUX: BL @foo_half
+
+ ; NOTCPU-APPLE: %0:fpr16 = FMOVHi 64
+ ; NOTCPU-APPLE-NEXT: %1:fpr16 = FMOVHi 64
+ ; NOTCPU-APPLE: BL @foo_half
+
+ ; CPU: %0:fpr16 = FMOVHi 64
+ ; CPU-NEXT: %1:fpr16 = COPY %0
+ ; CPU: BL @foo_half
+
+ ; NOTATTR: %0:fpr16 = FMOVHi 64
+ ; NOTATTR-NEXT: %1:fpr16 = FMOVHi 64
+ ; NOTATTR: BL @foo_half
+
+ ; ATTR: %0:fpr16 = FMOVHi 64
+ ; ATTR-NEXT: %1:fpr16 = COPY %0
+ ; ATTR: BL @foo_half
+
+ %0:fpr16 = FMOVHi 64
+ %1:fpr16 = COPY %0
+
+ ; Creates a live range interference to prevent coalescing and force
+ ; trying to rematerialize the previous COPY.
+ %1 = FADDHrr %1, %1, implicit $fpcr
+
+ BL @foo_half, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit %0, implicit %1
+ RET_ReallyLR
diff --git a/llvm/test/CodeGen/AArch64/arm64-reg-coalesce-remat-zero-cycle-regmov-gpr.mir b/llvm/test/CodeGen/AArch64/arm64-reg-coalesce-remat-zero-cycle-regmov-gpr.mir
new file mode 100644
index 0000000000000..6247572b2cf2c
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/arm64-reg-coalesce-remat-zero-cycle-regmov-gpr.mir
@@ -0,0 +1,90 @@
+# RUN: llc -o - -mtriple=arm64-linux-gnu -run-pass=register-coalescer -verify-coalescing %s | FileCheck %s -check-prefixes=NOTCPU-LINUX
+# RUN: llc -o - -mtriple=arm64-apple-macosx -mcpu=generic -run-pass=register-coalescer -verify-coalescing %s | FileCheck %s -check-prefixes=NOTCPU-APPLE
+# RUN: llc -o - -mtriple=arm64-apple-macosx -mcpu=apple-m1 -run-pass=register-coalescer -verify-coalescing %s | FileCheck %s -check-prefixes=CPU
+# RUN: llc -o - -mtriple=arm64-apple-macosx -mcpu=apple-m1 -mattr=-zcm-gpr64 -run-pass=register-coalescer -verify-coalescing %s | FileCheck %s -check-prefixes=NOTATTR
+# RUN: llc -o - -mtriple=arm64-apple-macosx -mattr=+zcm-gpr64 -run-pass=register-coalescer -verify-coalescing %s | FileCheck %s -check-prefixes=ATTR
+
+--- |
+ define void @remat_GPR32() {
+ ret void
+ }
+ declare void @foo_i32(i32, i32)
+
+ define void @remat_GPR64() {
+ ret void
+ }
+ declare void @foo_i64(i64, i64)
+...
+---
+name: remat_GPR32
+tracksRegLiveness: true
+body: |
+ bb.0:
+ ; CHECK-LABEL: name: remat_GPR32
+
+ ; NOTCPU-LINUX: %0:gpr32 = MOVi32imm 32
+ ; NOTCPU-LINUX-NEXT: %1:gpr32common = MOVi32imm 32
+ ; NOTCPU-LINUX: BL @foo_i32
+
+ ; NOTCPU-APPLE: %0:gpr32 = MOVi32imm 32
+ ; NOTCPU-APPLE-NEXT: %1:gpr32common = MOVi32imm 32
+ ; NOTCPU-APPLE: BL @foo_i32
+
+ ; CPU: %0:gpr32 = MOVi32imm 32
+ ; CPU-NEXT: %1:gpr32sp = COPY %0
+ ; CPU: BL @foo_i32
+
+ ; NOTATTR: %0:gpr32 = MOVi32imm 32
+ ; NOTATTR-NEXT: %1:gpr32common = MOVi32imm 32
+ ; NOTATTR: BL @foo_i32
+
+ ; ATTR: %0:gpr32 = MOVi32imm 32
+ ; ATTR-NEXT: %1:gpr32sp = COPY %0
+ ; ATTR: BL @foo_i32
+
+ %0:gpr32 = MOVi32imm 32
+ %1:gpr32sp = COPY %0
+
+ ; Creates a live range interference to prevent coalescing and force
+ ; trying to rematerialize the previous COPY.
+ %1 = ADDWri %1, 1, 0
+
+ BL @foo_i32, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit %0, implicit %1
+ RET_ReallyLR
+
+---
+name: remat_GPR64
+tracksRegLiveness: true
+body: |
+ bb.0:
+ ; CHECK-LABEL: name: remat_GPR64
+
+ ; NOTCPU-LINUX: %0:gpr64 = MOVi64imm 64
+ ; NOTCPU-LINUX-NEXT: %1:gpr64common = MOVi64imm 64
+ ; NOTCPU-LINUX: BL @foo_i64
+
+ ; NOTCPU-APPLE: %0:gpr64 = MOVi64imm 64
+ ; NOTCPU-APPLE-NEXT: %1:gpr64common = MOVi64imm 64
+ ; NOTCPU-APPLE: BL @foo_i64
+
+ ; CPU: %0:gpr64 = MOVi64imm 64
+ ; CPU-NEXT: %1:gpr64sp = COPY %0
+ ; CPU: BL @foo_i64
+
+ ; NOTATTR: %0:gpr64 = MOVi64imm 64
+ ; NOTATTR-NEXT: %1:gpr64common = MOVi64imm 64
+ ; NOTATTR: BL @foo_i64
+
+ ; ATTR: %0:gpr64 = MOVi64imm 64
+ ; ATTR-NEXT: %1:gpr64sp = COPY %0
+ ; ATTR: BL @foo_i64
+
+ %0:gpr64 = MOVi64imm 64
+ %1:gpr64sp = COPY %0
+
+ ; Creates a live range interference to prevent coalescing and force
+ ; trying to rematerialize the previous COPY.
+ %1 = ADDXri %1, 1, 0
+
+ BL @foo_i64, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit %0, implicit %1
+ RET_ReallyLR
diff --git a/llvm/test/CodeGen/AArch64/arm64-vshuffle.ll b/llvm/test/CodeGen/AArch64/arm64-vshuffle.ll
index b225d9a1acaf5..3edec9c9d8fc6 100644
--- a/llvm/test/CodeGen/AArch64/arm64-vshuffle.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-vshuffle.ll
@@ -4,6 +4,7 @@ define <8 x i1> @test1() {
; CHECK-LABEL: test1:
; CHECK: ; %bb.0: ; %entry
; CHECK-NEXT: movi.16b v0, #0
+; CHECK-NEXT: ; kill: def $d0 killed $d0 killed $q0
; CHECK-NEXT: ret
entry:
%Shuff = shufflevector <8 x i1> <i1 0, i1 1, i1 2, i1 3, i1 4, i1 5, i1 6,
@@ -58,9 +59,14 @@ bb:
; CHECK: .byte 0 ; 0x0
; CHECK: .byte 0 ; 0x0
define <16 x i1> @test4(ptr %ptr, i32 %v) {
-; CHECK-LABEL: _test4:
-; CHECK: adrp x[[REG3:[0-9]+]], lCPI3_0@PAGE
-; CHECK: ldr q[[REG2:[0-9]+]], [x[[REG3]], lCPI3_0@PAGEOFF]
+; CHECK-LABEL: test4:
+; CHECK: ; %bb.0: ; %bb
+; CHECK-NEXT: Lloh0:
+; CHECK-NEXT: adrp x8, lCPI3_0@PAGE
+; CHECK-NEXT: Lloh1:
+; CHECK-NEXT: ldr q0, [x8, lCPI3_0@PAGEOFF]
+; CHECK-NEXT: ret
+; CHECK-NEXT: .loh AdrpLdr Lloh0, Lloh1
bb:
%Shuff = shufflevector <16 x i1> zeroinitializer,
<16 x i1> <i1 0, i1 1, i1 1, i1 0, i1 0, i1 1, i1 0, i1 0, i1 0, i1 1,
diff --git a/llvm/test/CodeGen/AArch64/clear-dead-implicit-def-impdef.ll b/llvm/test/CodeGen/AArch64/clear-dead-implicit-def-impdef.ll
index bc26eca6f27ef..b5c5124b664a2 100644
--- a/llvm/test/CodeGen/AArch64/clear-dead-implicit-def-impdef.ll
+++ b/llvm/test/CodeGen/AArch64/clear-dead-implicit-def-impdef.ll
@@ -13,16 +13,17 @@ define void @_ZN38SanitizerCommonInterceptors_Scanf_Test8TestBodyEv(ptr %.str.40
; CHECK-NEXT: stp x22, x21, [sp, #80] ; 16-byte Folded Spill
; CHECK-NEXT: stp x20, x19, [sp, #96] ; 16-byte Folded Spill
; CHECK-NEXT: stp x29, x30, [sp, #112] ; 16-byte Folded Spill
-; CHECK-NEXT: mov x24, x6
-; CHECK-NEXT: mov x19, x5
-; CHECK-NEXT: mov x20, x4
-; CHECK-NEXT: mov x21, x3
-; CHECK-NEXT: mov x22, x2
-; CHECK-NEXT: mov x23, x1
-; CHECK-NEXT: mov x25, x0
+; CHECK-NEXT: mov x25, x6
+; CHECK-NEXT: str x5, [sp, #24] ; 8-byte Folded Spill
+; CHECK-NEXT: mov x21, x4
+; CHECK-NEXT: mov x22, x3
+; CHECK-NEXT: mov x23, x2
+; CHECK-NEXT: mov x24, x1
+; CHECK-NEXT: mov x26, x0
; CHECK-NEXT: str xzr, [sp]
+; CHECK-NEXT: mov w19, #1 ; =0x1
; CHECK-NEXT: mov x0, #0 ; =0x0
-; CHECK-NEXT: mov w1, #1 ; =0x1
+; CHECK-NEXT: mov x1, x19
; CHECK-NEXT: bl __ZL9testScanfPKcjz
; CHECK-NEXT: mov w28, #4 ; =0x4
; CHECK-NEXT: stp x28, x28, [sp, #8]
@@ -34,59 +35,57 @@ define void @_ZN38SanitizerCommonInterceptors_Scanf_Test8TestBodyEv(ptr %.str.40
; CHECK-NEXT: mov x0, #0 ; =0x0
; CHECK-NEXT: mov w1, #0 ; =0x0
; CHECK-NEXT: bl __ZL9testScanfPKcjz
-; CHECK-NEXT: mov w27, #8 ; =0x8
-; CHECK-NEXT: str x27, [sp]
+; CHECK-NEXT: mov w20, #8 ; =0x8
+; CHECK-NEXT: str x20, [sp]
; CHECK-NEXT: mov x0, #0 ; =0x0
; CHECK-NEXT: mov w1, #0 ; =0x0
; CHECK-NEXT: bl __ZL9testScanfPKcjz
-; CHECK-NEXT: mov w26, #1 ; =0x1
-; CHECK-NEXT: stp xzr, x26, [sp]
+; CHECK-NEXT: stp xzr, x19, [sp]
; CHECK-NEXT: mov x0, #0 ; =0x0
; CHECK-NEXT: mov w1, #0 ; =0x0
; CHECK-NEXT: bl __ZL9testScanfPKcjz
-; CHECK-NEXT: str x26, [sp]
+; CHECK-NEXT: str x19, [sp]
; CHECK-NEXT: mov x0, #0 ; =0x0
; CHECK-NEXT: mov w1, #0 ; =0x0
; CHECK-NEXT: bl __ZL9testScanfPKcjz
; CHECK-NEXT: str x28, [sp]
; CHECK-NEXT: Lloh0:
-; CHECK-NEXT: adrp x26, _.str@GOTPAGE
+; CHECK-NEXT: adrp x27, _.str@GOTPAGE
; CHECK-NEXT: Lloh1:
-; CHECK-NEXT: ldr x26, [x26, _.str@GOTPAGEOFF]
-; CHECK-NEXT: mov x0, x26
+; CHECK-NEXT: ldr x27, [x27, _.str@GOTPAGEOFF]
+; CHECK-NEXT: mov x0, x27
; CHECK-NEXT: mov w1, #0 ; =0x0
; CHECK-NEXT: bl __ZL9testScanfPKcjz
-; CHECK-NEXT: str wzr, [x24]
-; CHECK-NEXT: str x27, [sp]
-; CHECK-NEXT: mov x0, x25
+; CHECK-NEXT: str wzr, [x25]
+; CHECK-NEXT: str x20, [sp]
+; CHECK-NEXT: mov x0, x26
; CHECK-NEXT: mov w1, #0 ; =0x0
; CHECK-NEXT: bl __ZL20testScanfNoGnuMallocPKcjz
; CHECK-NEXT: str x28, [sp]
-; CHECK-NEXT: mov x0, x23
+; CHECK-NEXT: mov x0, x24
; CHECK-NEXT: mov w1, #0 ; =0x0
; CHECK-NEXT: bl __ZL20testScanfNoGnuMallocPKcjz
; CHECK-NEXT: str x28, [sp]
-; CHECK-NEXT: mov x0, x22
+; CHECK-NEXT: mov x0, x23
; CHECK-NEXT: mov w1, #0 ; =0x0
; CHECK-NEXT: bl __ZL20testScanfNoGnuMallocPKcjz
; CHECK-NEXT: str x28, [sp]
-; CHECK-NEXT: mov x0, x21
+; CHECK-NEXT: mov x0, x22
; CHECK-NEXT: mov w1, #0 ; =0x0
; CHECK-NEXT: bl __ZL20testScanfNoGnuMallocPKcjz
; CHECK-NEXT: str x28, [sp]
-; CHECK-NEXT: mov x0, x20
+; CHECK-NEXT: mov x0, x21
; CHECK-NEXT: mov w1, #0 ; =0x0
; CHECK-NEXT: bl __ZL20testScanfNoGnuMallocPKcjz
; CHECK-NEXT: str xzr, [sp]
-; CHECK-NEXT: mov x0, x19
+; CHECK-NEXT: ldr x0, [sp, #24] ; 8-byte Folded Reload
; CHECK-NEXT: mov w1, #0 ; =0x0
; CHECK-NEXT: bl __ZL20testScanfNoGnuMallocPKcjz
; CHECK-NEXT: str xzr, [sp]
-; CHECK-NEXT: mov x0, x26
+; CHECK-NEXT: mov x0, x27
; CHECK-NEXT: mov w1, #0 ; =0x0
; CHECK-NEXT: bl __ZL20testScanfNoGnuMallocPKcjz
-; CHECK-NEXT: mov w8, #1 ; =0x1
-; CHECK-NEXT: stp x8, xzr, [sp, #8]
+; CHECK-NEXT: stp x19, xzr, [sp, #8]
; CHECK-NEXT: str xzr, [sp]
; CHECK-NEXT: mov x0, #0 ; =0x0
; CHECK-NEXT: mov w1, #0 ; =0x0
|
@llvm/pr-subscribers-backend-aarch64 Author: Tomer Shafir (tomershafir) ChangesThis change makes the register coalescer prevent rematerialization of a trivial def for a move instruction, if the target guides against it, based on the new target hook Full diff: https://github.com/llvm/llvm-project/pull/148430.diff 6 Files Affected:
diff --git a/llvm/lib/CodeGen/RegisterCoalescer.cpp b/llvm/lib/CodeGen/RegisterCoalescer.cpp
index 2d7987a2e1988..03624c98f71f0 100644
--- a/llvm/lib/CodeGen/RegisterCoalescer.cpp
+++ b/llvm/lib/CodeGen/RegisterCoalescer.cpp
@@ -69,6 +69,9 @@ STATISTIC(numCrossRCs, "Number of cross class joins performed");
STATISTIC(numCommutes, "Number of instruction commuting performed");
STATISTIC(numExtends, "Number of copies extended");
STATISTIC(NumReMats, "Number of instructions re-materialized");
+STATISTIC(NumReMatsPrevented,
+ "Number of instruction rematerialization prevented by "
+ "`shouldReMaterializeTrivialRegDef` hook");
STATISTIC(NumInflated, "Number of register classes inflated");
STATISTIC(NumLaneConflicts, "Number of dead lane conflicts tested");
STATISTIC(NumLaneResolves, "Number of dead lane conflicts resolved");
@@ -1400,6 +1403,12 @@ bool RegisterCoalescer::reMaterializeTrivialDef(const CoalescerPair &CP,
if (!Edit.canRematerializeAt(RM, ValNo, CopyIdx))
return false;
+ if (!TII->shouldReMaterializeTrivialRegDef(CopyMI, DstReg, SrcReg)) {
+ LLVM_DEBUG(dbgs() << "Remat prevented: " << CopyIdx << "\t" << *CopyMI);
+ ++NumReMatsPrevented;
+ return false;
+ }
+
DebugLoc DL = CopyMI->getDebugLoc();
MachineBasicBlock *MBB = CopyMI->getParent();
MachineBasicBlock::iterator MII =
diff --git a/llvm/test/CodeGen/AArch64/arm64-abi-varargs.ll b/llvm/test/CodeGen/AArch64/arm64-abi-varargs.ll
index 1b22514a59d60..890367a761281 100644
--- a/llvm/test/CodeGen/AArch64/arm64-abi-varargs.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-abi-varargs.ll
@@ -64,18 +64,18 @@ define i32 @main() nounwind ssp {
; CHECK: ; %bb.0:
; CHECK-NEXT: sub sp, sp, #96
; CHECK-NEXT: stp x29, x30, [sp, #80] ; 16-byte Folded Spill
-; CHECK-NEXT: mov w9, #1 ; =0x1
-; CHECK-NEXT: mov w8, #2 ; =0x2
-; CHECK-NEXT: stp w8, w9, [sp, #72]
-; CHECK-NEXT: mov w9, #3 ; =0x3
-; CHECK-NEXT: mov w8, #4 ; =0x4
-; CHECK-NEXT: stp w8, w9, [sp, #64]
-; CHECK-NEXT: mov w9, #5 ; =0x5
-; CHECK-NEXT: mov w8, #6 ; =0x6
-; CHECK-NEXT: stp w8, w9, [sp, #56]
-; CHECK-NEXT: mov w9, #7 ; =0x7
-; CHECK-NEXT: mov w8, #8 ; =0x8
-; CHECK-NEXT: stp w8, w9, [sp, #48]
+; CHECK-NEXT: mov w8, #1 ; =0x1
+; CHECK-NEXT: mov w1, #2 ; =0x2
+; CHECK-NEXT: stp w1, w8, [sp, #72]
+; CHECK-NEXT: mov w2, #3 ; =0x3
+; CHECK-NEXT: mov w3, #4 ; =0x4
+; CHECK-NEXT: stp w3, w2, [sp, #64]
+; CHECK-NEXT: mov w4, #5 ; =0x5
+; CHECK-NEXT: mov w5, #6 ; =0x6
+; CHECK-NEXT: stp w5, w4, [sp, #56]
+; CHECK-NEXT: mov w6, #7 ; =0x7
+; CHECK-NEXT: mov w7, #8 ; =0x8
+; CHECK-NEXT: stp w7, w6, [sp, #48]
; CHECK-NEXT: mov w8, #9 ; =0x9
; CHECK-NEXT: mov w9, #10 ; =0xa
; CHECK-NEXT: stp w9, w8, [sp, #40]
@@ -86,13 +86,6 @@ define i32 @main() nounwind ssp {
; CHECK-NEXT: str x9, [sp, #8]
; CHECK-NEXT: str w8, [sp]
; CHECK-NEXT: add x0, sp, #76
-; CHECK-NEXT: mov w1, #2 ; =0x2
-; CHECK-NEXT: mov w2, #3 ; =0x3
-; CHECK-NEXT: mov w3, #4 ; =0x4
-; CHECK-NEXT: mov w4, #5 ; =0x5
-; CHECK-NEXT: mov w5, #6 ; =0x6
-; CHECK-NEXT: mov w6, #7 ; =0x7
-; CHECK-NEXT: mov w7, #8 ; =0x8
; CHECK-NEXT: bl _fn9
; CHECK-NEXT: mov w0, #0 ; =0x0
; CHECK-NEXT: ldp x29, x30, [sp, #80] ; 16-byte Folded Reload
diff --git a/llvm/test/CodeGen/AArch64/arm64-reg-coalesce-remat-zero-cycle-regmov-fpr.mir b/llvm/test/CodeGen/AArch64/arm64-reg-coalesce-remat-zero-cycle-regmov-fpr.mir
new file mode 100644
index 0000000000000..303e25edb2b18
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/arm64-reg-coalesce-remat-zero-cycle-regmov-fpr.mir
@@ -0,0 +1,174 @@
+# RUN: llc -o - -mtriple=arm64-linux-gnu -run-pass=register-coalescer -verify-coalescing %s | FileCheck %s -check-prefixes=NOTCPU-LINUX
+# RUN: llc -o - -mtriple=arm64-apple-macosx -mcpu=generic -run-pass=register-coalescer -verify-coalescing %s | FileCheck %s -check-prefixes=NOTCPU-APPLE
+# RUN: llc -o - -mtriple=arm64-apple-macosx -mcpu=apple-m1 -run-pass=register-coalescer -verify-coalescing %s | FileCheck %s -check-prefixes=CPU
+# RUN: llc -o - -mtriple=arm64-apple-macosx -mcpu=apple-m1 -mattr=-zcm-fpr64 -mattr=-zcm-fpr128 -run-pass=register-coalescer -verify-coalescing %s | FileCheck %s -check-prefixes=NOTATTR
+# RUN: llc -o - -mtriple=arm64-apple-macosx -mattr=+zcm-fpr64 -mattr=+zcm-fpr128 -run-pass=register-coalescer -verify-coalescing %s | FileCheck %s -check-prefixes=ATTR
+
+--- |
+ define void @remat_FPR128() {
+ ret void
+ }
+ declare void @foo_v4i32(<4 x float>, <4 x float>)
+
+ define void @remat_FPR64() {
+ ret void
+ }
+ declare void @foo_double(double, double)
+
+ define void @remat_FPR32() {
+ ret void
+ }
+ declare void @foo_float(float, float)
+
+ define void @remat_FPR16() {
+ ret void
+ }
+ declare void @foo_half(half, half)
+...
+---
+name: remat_FPR128
+tracksRegLiveness: true
+body: |
+ bb.0:
+ ; CHECK-LABEL: name: remat_FPR128
+
+ ; NOTCPU-LINUX: %0:fpr128 = MOVIv2d_ns 64
+ ; NOTCPU-LINUX-NEXT: %1:fpr128 = MOVIv2d_ns 64
+ ; NOTCPU-LINUX: BL @foo_v4i32
+
+ ; NOTCPU-APPLE: %0:fpr128 = MOVIv2d_ns 64
+ ; NOTCPU-APPLE-NEXT: %1:fpr128 = MOVIv2d_ns 64
+ ; NOTCPU-APPLE: BL @foo_v4i32
+
+ ; CPU: %0:fpr128 = MOVIv2d_ns 64
+ ; CPU-NEXT: %1:fpr128 = COPY %0
+ ; CPU: BL @foo_v4i32
+
+ ; NOTATTR: %0:fpr128 = MOVIv2d_ns 64
+ ; NOTATTR-NEXT: %1:fpr128 = MOVIv2d_ns 64
+ ; NOTATTR: BL @foo_v4i32
+
+ ; ATTR: %0:fpr128 = MOVIv2d_ns 64
+ ; ATTR-NEXT: %1:fpr128 = COPY %0
+ ; ATTR: BL @foo_v4i32
+
+ %0:fpr128 = MOVIv2d_ns 64
+ %1:fpr128 = COPY %0
+
+ ; Creates a live range interference to prevent coalescing and force
+ ; trying to rematerialize the previous COPY.
+ %1 = ADDv4i32 %1, %1
+
+ BL @foo_v4i32, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit %0, implicit %1
+ RET_ReallyLR
+
+---
+name: remat_FPR64
+tracksRegLiveness: true
+body: |
+ bb.0:
+ ; CHECK-LABEL: name: remat_FPR64
+
+ ; NOTCPU-LINUX: %0:fpr64 = FMOVDi 64
+ ; NOTCPU-LINUX-NEXT: %1:fpr64 = FMOVDi 64
+ ; NOTCPU-LINUX: BL @foo_double
+
+ ; NOTCPU-APPLE: %0:fpr64 = FMOVDi 64
+ ; NOTCPU-APPLE-NEXT: %1:fpr64 = FMOVDi 64
+ ; NOTCPU-APPLE: BL @foo_double
+
+ ; CPU: %0:fpr64 = FMOVDi 64
+ ; CPU-NEXT: %1:fpr64 = COPY %0
+ ; CPU: BL @foo_double
+
+ ; NOTATTR: %0:fpr64 = FMOVDi 64
+ ; NOTATTR-NEXT: %1:fpr64 = FMOVDi 64
+ ; NOTATTR: BL @foo_double
+
+ ; ATTR: %0:fpr64 = FMOVDi 64
+ ; ATTR-NEXT: %1:fpr64 = COPY %0
+ ; ATTR: BL @foo_double
+
+ %0:fpr64 = FMOVDi 64
+ %1:fpr64 = COPY %0
+
+ ; Creates a live range interference to prevent coalescing and force
+ ; trying to rematerialize the previous COPY.
+ %1 = FADDDrr %1, %1, implicit $fpcr
+
+ BL @foo_double, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit %0, implicit %1
+ RET_ReallyLR
+
+---
+name: remat_FPR32
+tracksRegLiveness: true
+body: |
+ bb.0:
+ ; CHECK-LABEL: name: remat_FPR32
+
+ ; NOTCPU-LINUX: %0:fpr32 = FMOVSi 64
+ ; NOTCPU-LINUX-NEXT: %1:fpr32 = FMOVSi 64
+ ; NOTCPU-LINUX: BL @foo_float
+
+ ; NOTCPU-APPLE: %0:fpr32 = FMOVSi 64
+ ; NOTCPU-APPLE-NEXT: %1:fpr32 = FMOVSi 64
+ ; NOTCPU-APPLE: BL @foo_float
+
+ ; CPU: %0:fpr32 = FMOVSi 64
+ ; CPU-NEXT: %1:fpr32 = COPY %0
+ ; CPU: BL @foo_float
+
+ ; NOTATTR: %0:fpr32 = FMOVSi 64
+ ; NOTATTR-NEXT: %1:fpr32 = FMOVSi 64
+ ; NOTATTR: BL @foo_float
+
+ ; ATTR: %0:fpr32 = FMOVSi 64
+ ; ATTR-NEXT: %1:fpr32 = COPY %0
+ ; ATTR: BL @foo_float
+
+ %0:fpr32 = FMOVSi 64
+ %1:fpr32 = COPY %0
+
+ ; Creates a live range interference to prevent coalescing and force
+ ; trying to rematerialize the previous COPY.
+ %1 = FADDSrr %1, %1, implicit $fpcr
+
+ BL @foo_float, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit %0, implicit %1
+ RET_ReallyLR
+
+---
+name: remat_FPR16
+tracksRegLiveness: true
+body: |
+ bb.0:
+ ; CHECK-LABEL: name: remat_FPR16
+
+ ; NOTCPU-LINUX: %0:fpr16 = FMOVHi 64
+ ; NOTCPU-LINUX-NEXT: %1:fpr16 = FMOVHi 64
+ ; NOTCPU-LINUX: BL @foo_half
+
+ ; NOTCPU-APPLE: %0:fpr16 = FMOVHi 64
+ ; NOTCPU-APPLE-NEXT: %1:fpr16 = FMOVHi 64
+ ; NOTCPU-APPLE: BL @foo_half
+
+ ; CPU: %0:fpr16 = FMOVHi 64
+ ; CPU-NEXT: %1:fpr16 = COPY %0
+ ; CPU: BL @foo_half
+
+ ; NOTATTR: %0:fpr16 = FMOVHi 64
+ ; NOTATTR-NEXT: %1:fpr16 = FMOVHi 64
+ ; NOTATTR: BL @foo_half
+
+ ; ATTR: %0:fpr16 = FMOVHi 64
+ ; ATTR-NEXT: %1:fpr16 = COPY %0
+ ; ATTR: BL @foo_half
+
+ %0:fpr16 = FMOVHi 64
+ %1:fpr16 = COPY %0
+
+ ; Creates a live range interference to prevent coalescing and force
+ ; trying to rematerialize the previous COPY.
+ %1 = FADDHrr %1, %1, implicit $fpcr
+
+ BL @foo_half, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit %0, implicit %1
+ RET_ReallyLR
diff --git a/llvm/test/CodeGen/AArch64/arm64-reg-coalesce-remat-zero-cycle-regmov-gpr.mir b/llvm/test/CodeGen/AArch64/arm64-reg-coalesce-remat-zero-cycle-regmov-gpr.mir
new file mode 100644
index 0000000000000..6247572b2cf2c
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/arm64-reg-coalesce-remat-zero-cycle-regmov-gpr.mir
@@ -0,0 +1,90 @@
+# RUN: llc -o - -mtriple=arm64-linux-gnu -run-pass=register-coalescer -verify-coalescing %s | FileCheck %s -check-prefixes=NOTCPU-LINUX
+# RUN: llc -o - -mtriple=arm64-apple-macosx -mcpu=generic -run-pass=register-coalescer -verify-coalescing %s | FileCheck %s -check-prefixes=NOTCPU-APPLE
+# RUN: llc -o - -mtriple=arm64-apple-macosx -mcpu=apple-m1 -run-pass=register-coalescer -verify-coalescing %s | FileCheck %s -check-prefixes=CPU
+# RUN: llc -o - -mtriple=arm64-apple-macosx -mcpu=apple-m1 -mattr=-zcm-gpr64 -run-pass=register-coalescer -verify-coalescing %s | FileCheck %s -check-prefixes=NOTATTR
+# RUN: llc -o - -mtriple=arm64-apple-macosx -mattr=+zcm-gpr64 -run-pass=register-coalescer -verify-coalescing %s | FileCheck %s -check-prefixes=ATTR
+
+--- |
+ define void @remat_GPR32() {
+ ret void
+ }
+ declare void @foo_i32(i32, i32)
+
+ define void @remat_GPR64() {
+ ret void
+ }
+ declare void @foo_i64(i64, i64)
+...
+---
+name: remat_GPR32
+tracksRegLiveness: true
+body: |
+ bb.0:
+ ; CHECK-LABEL: name: remat_GPR32
+
+ ; NOTCPU-LINUX: %0:gpr32 = MOVi32imm 32
+ ; NOTCPU-LINUX-NEXT: %1:gpr32common = MOVi32imm 32
+ ; NOTCPU-LINUX: BL @foo_i32
+
+ ; NOTCPU-APPLE: %0:gpr32 = MOVi32imm 32
+ ; NOTCPU-APPLE-NEXT: %1:gpr32common = MOVi32imm 32
+ ; NOTCPU-APPLE: BL @foo_i32
+
+ ; CPU: %0:gpr32 = MOVi32imm 32
+ ; CPU-NEXT: %1:gpr32sp = COPY %0
+ ; CPU: BL @foo_i32
+
+ ; NOTATTR: %0:gpr32 = MOVi32imm 32
+ ; NOTATTR-NEXT: %1:gpr32common = MOVi32imm 32
+ ; NOTATTR: BL @foo_i32
+
+ ; ATTR: %0:gpr32 = MOVi32imm 32
+ ; ATTR-NEXT: %1:gpr32sp = COPY %0
+ ; ATTR: BL @foo_i32
+
+ %0:gpr32 = MOVi32imm 32
+ %1:gpr32sp = COPY %0
+
+ ; Creates a live range interference to prevent coalescing and force
+ ; trying to rematerialize the previous COPY.
+ %1 = ADDWri %1, 1, 0
+
+ BL @foo_i32, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit %0, implicit %1
+ RET_ReallyLR
+
+---
+name: remat_GPR64
+tracksRegLiveness: true
+body: |
+ bb.0:
+ ; CHECK-LABEL: name: remat_GPR64
+
+ ; NOTCPU-LINUX: %0:gpr64 = MOVi64imm 64
+ ; NOTCPU-LINUX-NEXT: %1:gpr64common = MOVi64imm 64
+ ; NOTCPU-LINUX: BL @foo_i64
+
+ ; NOTCPU-APPLE: %0:gpr64 = MOVi64imm 64
+ ; NOTCPU-APPLE-NEXT: %1:gpr64common = MOVi64imm 64
+ ; NOTCPU-APPLE: BL @foo_i64
+
+ ; CPU: %0:gpr64 = MOVi64imm 64
+ ; CPU-NEXT: %1:gpr64sp = COPY %0
+ ; CPU: BL @foo_i64
+
+ ; NOTATTR: %0:gpr64 = MOVi64imm 64
+ ; NOTATTR-NEXT: %1:gpr64common = MOVi64imm 64
+ ; NOTATTR: BL @foo_i64
+
+ ; ATTR: %0:gpr64 = MOVi64imm 64
+ ; ATTR-NEXT: %1:gpr64sp = COPY %0
+ ; ATTR: BL @foo_i64
+
+ %0:gpr64 = MOVi64imm 64
+ %1:gpr64sp = COPY %0
+
+ ; Creates a live range interference to prevent coalescing and force
+ ; trying to rematerialize the previous COPY.
+ %1 = ADDXri %1, 1, 0
+
+ BL @foo_i64, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit %0, implicit %1
+ RET_ReallyLR
diff --git a/llvm/test/CodeGen/AArch64/arm64-vshuffle.ll b/llvm/test/CodeGen/AArch64/arm64-vshuffle.ll
index b225d9a1acaf5..3edec9c9d8fc6 100644
--- a/llvm/test/CodeGen/AArch64/arm64-vshuffle.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-vshuffle.ll
@@ -4,6 +4,7 @@ define <8 x i1> @test1() {
; CHECK-LABEL: test1:
; CHECK: ; %bb.0: ; %entry
; CHECK-NEXT: movi.16b v0, #0
+; CHECK-NEXT: ; kill: def $d0 killed $d0 killed $q0
; CHECK-NEXT: ret
entry:
%Shuff = shufflevector <8 x i1> <i1 0, i1 1, i1 2, i1 3, i1 4, i1 5, i1 6,
@@ -58,9 +59,14 @@ bb:
; CHECK: .byte 0 ; 0x0
; CHECK: .byte 0 ; 0x0
define <16 x i1> @test4(ptr %ptr, i32 %v) {
-; CHECK-LABEL: _test4:
-; CHECK: adrp x[[REG3:[0-9]+]], lCPI3_0@PAGE
-; CHECK: ldr q[[REG2:[0-9]+]], [x[[REG3]], lCPI3_0@PAGEOFF]
+; CHECK-LABEL: test4:
+; CHECK: ; %bb.0: ; %bb
+; CHECK-NEXT: Lloh0:
+; CHECK-NEXT: adrp x8, lCPI3_0@PAGE
+; CHECK-NEXT: Lloh1:
+; CHECK-NEXT: ldr q0, [x8, lCPI3_0@PAGEOFF]
+; CHECK-NEXT: ret
+; CHECK-NEXT: .loh AdrpLdr Lloh0, Lloh1
bb:
%Shuff = shufflevector <16 x i1> zeroinitializer,
<16 x i1> <i1 0, i1 1, i1 1, i1 0, i1 0, i1 1, i1 0, i1 0, i1 0, i1 1,
diff --git a/llvm/test/CodeGen/AArch64/clear-dead-implicit-def-impdef.ll b/llvm/test/CodeGen/AArch64/clear-dead-implicit-def-impdef.ll
index bc26eca6f27ef..b5c5124b664a2 100644
--- a/llvm/test/CodeGen/AArch64/clear-dead-implicit-def-impdef.ll
+++ b/llvm/test/CodeGen/AArch64/clear-dead-implicit-def-impdef.ll
@@ -13,16 +13,17 @@ define void @_ZN38SanitizerCommonInterceptors_Scanf_Test8TestBodyEv(ptr %.str.40
; CHECK-NEXT: stp x22, x21, [sp, #80] ; 16-byte Folded Spill
; CHECK-NEXT: stp x20, x19, [sp, #96] ; 16-byte Folded Spill
; CHECK-NEXT: stp x29, x30, [sp, #112] ; 16-byte Folded Spill
-; CHECK-NEXT: mov x24, x6
-; CHECK-NEXT: mov x19, x5
-; CHECK-NEXT: mov x20, x4
-; CHECK-NEXT: mov x21, x3
-; CHECK-NEXT: mov x22, x2
-; CHECK-NEXT: mov x23, x1
-; CHECK-NEXT: mov x25, x0
+; CHECK-NEXT: mov x25, x6
+; CHECK-NEXT: str x5, [sp, #24] ; 8-byte Folded Spill
+; CHECK-NEXT: mov x21, x4
+; CHECK-NEXT: mov x22, x3
+; CHECK-NEXT: mov x23, x2
+; CHECK-NEXT: mov x24, x1
+; CHECK-NEXT: mov x26, x0
; CHECK-NEXT: str xzr, [sp]
+; CHECK-NEXT: mov w19, #1 ; =0x1
; CHECK-NEXT: mov x0, #0 ; =0x0
-; CHECK-NEXT: mov w1, #1 ; =0x1
+; CHECK-NEXT: mov x1, x19
; CHECK-NEXT: bl __ZL9testScanfPKcjz
; CHECK-NEXT: mov w28, #4 ; =0x4
; CHECK-NEXT: stp x28, x28, [sp, #8]
@@ -34,59 +35,57 @@ define void @_ZN38SanitizerCommonInterceptors_Scanf_Test8TestBodyEv(ptr %.str.40
; CHECK-NEXT: mov x0, #0 ; =0x0
; CHECK-NEXT: mov w1, #0 ; =0x0
; CHECK-NEXT: bl __ZL9testScanfPKcjz
-; CHECK-NEXT: mov w27, #8 ; =0x8
-; CHECK-NEXT: str x27, [sp]
+; CHECK-NEXT: mov w20, #8 ; =0x8
+; CHECK-NEXT: str x20, [sp]
; CHECK-NEXT: mov x0, #0 ; =0x0
; CHECK-NEXT: mov w1, #0 ; =0x0
; CHECK-NEXT: bl __ZL9testScanfPKcjz
-; CHECK-NEXT: mov w26, #1 ; =0x1
-; CHECK-NEXT: stp xzr, x26, [sp]
+; CHECK-NEXT: stp xzr, x19, [sp]
; CHECK-NEXT: mov x0, #0 ; =0x0
; CHECK-NEXT: mov w1, #0 ; =0x0
; CHECK-NEXT: bl __ZL9testScanfPKcjz
-; CHECK-NEXT: str x26, [sp]
+; CHECK-NEXT: str x19, [sp]
; CHECK-NEXT: mov x0, #0 ; =0x0
; CHECK-NEXT: mov w1, #0 ; =0x0
; CHECK-NEXT: bl __ZL9testScanfPKcjz
; CHECK-NEXT: str x28, [sp]
; CHECK-NEXT: Lloh0:
-; CHECK-NEXT: adrp x26, _.str@GOTPAGE
+; CHECK-NEXT: adrp x27, _.str@GOTPAGE
; CHECK-NEXT: Lloh1:
-; CHECK-NEXT: ldr x26, [x26, _.str@GOTPAGEOFF]
-; CHECK-NEXT: mov x0, x26
+; CHECK-NEXT: ldr x27, [x27, _.str@GOTPAGEOFF]
+; CHECK-NEXT: mov x0, x27
; CHECK-NEXT: mov w1, #0 ; =0x0
; CHECK-NEXT: bl __ZL9testScanfPKcjz
-; CHECK-NEXT: str wzr, [x24]
-; CHECK-NEXT: str x27, [sp]
-; CHECK-NEXT: mov x0, x25
+; CHECK-NEXT: str wzr, [x25]
+; CHECK-NEXT: str x20, [sp]
+; CHECK-NEXT: mov x0, x26
; CHECK-NEXT: mov w1, #0 ; =0x0
; CHECK-NEXT: bl __ZL20testScanfNoGnuMallocPKcjz
; CHECK-NEXT: str x28, [sp]
-; CHECK-NEXT: mov x0, x23
+; CHECK-NEXT: mov x0, x24
; CHECK-NEXT: mov w1, #0 ; =0x0
; CHECK-NEXT: bl __ZL20testScanfNoGnuMallocPKcjz
; CHECK-NEXT: str x28, [sp]
-; CHECK-NEXT: mov x0, x22
+; CHECK-NEXT: mov x0, x23
; CHECK-NEXT: mov w1, #0 ; =0x0
; CHECK-NEXT: bl __ZL20testScanfNoGnuMallocPKcjz
; CHECK-NEXT: str x28, [sp]
-; CHECK-NEXT: mov x0, x21
+; CHECK-NEXT: mov x0, x22
; CHECK-NEXT: mov w1, #0 ; =0x0
; CHECK-NEXT: bl __ZL20testScanfNoGnuMallocPKcjz
; CHECK-NEXT: str x28, [sp]
-; CHECK-NEXT: mov x0, x20
+; CHECK-NEXT: mov x0, x21
; CHECK-NEXT: mov w1, #0 ; =0x0
; CHECK-NEXT: bl __ZL20testScanfNoGnuMallocPKcjz
; CHECK-NEXT: str xzr, [sp]
-; CHECK-NEXT: mov x0, x19
+; CHECK-NEXT: ldr x0, [sp, #24] ; 8-byte Folded Reload
; CHECK-NEXT: mov w1, #0 ; =0x0
; CHECK-NEXT: bl __ZL20testScanfNoGnuMallocPKcjz
; CHECK-NEXT: str xzr, [sp]
-; CHECK-NEXT: mov x0, x26
+; CHECK-NEXT: mov x0, x27
; CHECK-NEXT: mov w1, #0 ; =0x0
; CHECK-NEXT: bl __ZL20testScanfNoGnuMallocPKcjz
-; CHECK-NEXT: mov w8, #1 ; =0x1
-; CHECK-NEXT: stp x8, xzr, [sp, #8]
+; CHECK-NEXT: stp x19, xzr, [sp, #8]
; CHECK-NEXT: str xzr, [sp]
; CHECK-NEXT: mov x0, #0 ; =0x0
; CHECK-NEXT: mov w1, #0 ; =0x0
|
This is part of a patch series: |
# RUN: llc -o - -mtriple=arm64-apple-macosx -mattr=+zcm-fpr64 -mattr=+zcm-fpr128 -run-pass=register-coalescer -verify-coalescing %s | FileCheck %s -check-prefixes=ATTR | ||
|
||
--- | | ||
define void @remat_FPR128() { |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Don't need the IR section
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
why not? need to provide a function def, otherwise it fails with: function 'remat_FPR128' isn't defined in the provided LLVM IR
@@ -1400,6 +1403,12 @@ bool RegisterCoalescer::reMaterializeTrivialDef(const CoalescerPair &CP, | |||
if (!Edit.canRematerializeAt(RM, ValNo, CopyIdx)) | |||
return false; | |||
|
|||
if (!TII->shouldReMaterializeTrivialRegDef(CopyMI, DstReg, SrcReg)) { |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Missing an implementation of this? Also this API seems lacking in parameters. At minimum should probably be passing in LiveIntervals, the function
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The API and its impl appear in the prev patch of the series: #148429
I designed the API lazily based on use, but will add function and LiveIntervals in the other patch
Created using spr 1.3.6
✅ With the latest revision this PR passed the C/C++ code formatter. |
Created using spr 1.3.6
This change makes the register coalescer prevent rematerialization of a trivial def for a move instruction, if the target guides against it, based on the new target hook
shouldReMaterializeTrivialRegDef
. The filter is appended to the exiting logic. The patch includes isolated MIR tests for all register classes supported, and fixes existing tests.