From e2e26b486d6ae336927e99898eddc01e08c37e44 Mon Sep 17 00:00:00 2001 From: Chad Rosier Date: Tue, 9 Jun 2015 20:59:41 +0000 Subject: [PATCH] [AArch64] Remove an overly conservative check when generating store pairs. Store instructions do not modify register values and therefore it's safe to form a store pair even if the source register has been read in between the two store instructions. Previously, the read of w1 (see below) prevented the formation of a stp. str w0, [x2] ldr w8, [x2, #8] add w0, w8, w1 str w1, [x2, #4] ret We now generate the following code. stp w0, w1, [x2] ldr w8, [x2, #8] add w0, w8, w1 ret All correctness tests with -Ofast on A57 with Spec200x and EEMBC pass. Performance results for SPEC2K were within noise. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@239432 91177308-0d34-0410-b5e6-96231b3b80d8 --- .../AArch64/AArch64LoadStoreOptimizer.cpp | 5 +-- test/CodeGen/AArch64/arm64-stp.ll | 32 +++++++++++++++++++ 2 files changed, 35 insertions(+), 2 deletions(-) diff --git a/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp b/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp index 186e71a3307..82f77a77ab5 100644 --- a/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp +++ b/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp @@ -623,7 +623,7 @@ AArch64LoadStoreOpt::findMatchingInsn(MachineBasicBlock::iterator I, // and first alias with the second, we can combine the second into the // first. if (!ModifiedRegs[MI->getOperand(0).getReg()] && - !UsedRegs[MI->getOperand(0).getReg()] && + !(MI->mayLoad() && UsedRegs[MI->getOperand(0).getReg()]) && !mayAlias(MI, MemInsns, TII)) { MergeForward = false; return MBBI; @@ -634,7 +634,8 @@ AArch64LoadStoreOpt::findMatchingInsn(MachineBasicBlock::iterator I, // first and the second alias with the first, we can combine the first // into the second. if (!ModifiedRegs[FirstMI->getOperand(0).getReg()] && - !UsedRegs[FirstMI->getOperand(0).getReg()] && + !(FirstMI->mayLoad() && + UsedRegs[FirstMI->getOperand(0).getReg()]) && !mayAlias(FirstMI, MemInsns, TII)) { MergeForward = true; return MBBI; diff --git a/test/CodeGen/AArch64/arm64-stp.ll b/test/CodeGen/AArch64/arm64-stp.ll index 4d76396471a..72561aac6e8 100644 --- a/test/CodeGen/AArch64/arm64-stp.ll +++ b/test/CodeGen/AArch64/arm64-stp.ll @@ -99,3 +99,35 @@ entry: store <4 x i32> %p20, <4 x i32>* %p21, align 4 ret void } + +; Read of %b to compute %tmp2 shouldn't prevent formation of stp +; CHECK-LABEL: stp_int_rar_hazard +; CHECK: stp w0, w1, [x2] +; CHECK: ldr [[REG:w[0-9]+]], [x2, #8] +; CHECK: add w0, [[REG]], w1 +; CHECK: ret +define i32 @stp_int_rar_hazard(i32 %a, i32 %b, i32* nocapture %p) nounwind { + store i32 %a, i32* %p, align 4 + %ld.ptr = getelementptr inbounds i32, i32* %p, i64 2 + %tmp = load i32, i32* %ld.ptr, align 4 + %tmp2 = add i32 %tmp, %b + %add.ptr = getelementptr inbounds i32, i32* %p, i64 1 + store i32 %b, i32* %add.ptr, align 4 + ret i32 %tmp2 +} + +; Read of %b to compute %tmp2 shouldn't prevent formation of stp +; CHECK-LABEL: stp_int_rar_hazard_after +; CHECK: ldr [[REG:w[0-9]+]], [x3, #4] +; CHECK: add w0, [[REG]], w2 +; CHECK: stp w1, w2, [x3] +; CHECK: ret +define i32 @stp_int_rar_hazard_after(i32 %w0, i32 %a, i32 %b, i32* nocapture %p) nounwind { + store i32 %a, i32* %p, align 4 + %ld.ptr = getelementptr inbounds i32, i32* %p, i64 1 + %tmp = load i32, i32* %ld.ptr, align 4 + %tmp2 = add i32 %tmp, %b + %add.ptr = getelementptr inbounds i32, i32* %p, i64 1 + store i32 %b, i32* %add.ptr, align 4 + ret i32 %tmp2 +} -- 2.34.1