From: Moritz Roth Date: Thu, 21 Aug 2014 17:11:03 +0000 (+0000) Subject: Thumb1 load/store optimizer: Improve code to materialize new base register. X-Git-Url: http://plrg.eecs.uci.edu/git/?a=commitdiff_plain;h=a6afad8b3364463f0ca8fd047e1bfb26ddb4dcc4;p=oota-llvm.git Thumb1 load/store optimizer: Improve code to materialize new base register. There are two add-immediate instructions in Thumb1: tADDi8 and tADDi3. Only the latter supports using different source and destination registers, so whenever we materialize a new base register (at a certain offset) we'd do so by moving the base register value to the new register and then adding in place. This patch changes the code to use a single tADDi3 if the offset is small enough to fit in 3 bits. Differential Revision: http://reviews.llvm.org/D5006 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@216193 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/lib/Target/ARM/ARMLoadStoreOptimizer.cpp b/lib/Target/ARM/ARMLoadStoreOptimizer.cpp index dc8cafa2e32..9dc74a185ae 100644 --- a/lib/Target/ARM/ARMLoadStoreOptimizer.cpp +++ b/lib/Target/ARM/ARMLoadStoreOptimizer.cpp @@ -360,13 +360,15 @@ ARMLoadStoreOpt::MergeOps(MachineBasicBlock &MBB, int BaseOpc = isThumb2 ? ARM::t2ADDri : + (isThumb1 && Offset < 8) ? ARM::tADDi3 : isThumb1 ? ARM::tADDi8 : ARM::ADDri; if (Offset < 0) { + Offset = - Offset; BaseOpc = isThumb2 ? ARM::t2SUBri : + (isThumb1 && Offset < 8) ? ARM::tSUBi3 : isThumb1 ? ARM::tSUBi8 : ARM::SUBri; - Offset = - Offset; } if (!TL->isLegalAddImmediate(Offset)) @@ -374,22 +376,28 @@ ARMLoadStoreOpt::MergeOps(MachineBasicBlock &MBB, return false; // Probably not worth it then. if (isThumb1) { - if (Base != NewBase) { + // Thumb1: depending on immediate size, use either + // ADD NewBase, Base, #imm3 + // or + // MOV NewBase, Base + // ADD NewBase, #imm8. + if (Base != NewBase && Offset >= 8) { // Need to insert a MOV to the new base first. - // FIXME: If the immediate fits in 3 bits, use ADD instead. BuildMI(MBB, MBBI, dl, TII->get(ARM::tMOVr), NewBase) .addReg(Base, getKillRegState(BaseKill)) .addImm(Pred).addReg(PredReg); + // Set up BaseKill and Base correctly to insert the ADDS/SUBS below. + Base = NewBase; + BaseKill = false; } AddDefaultT1CC(BuildMI(MBB, MBBI, dl, TII->get(BaseOpc), NewBase)) - .addReg(NewBase, getKillRegState(true)).addImm(Offset) + .addReg(Base, getKillRegState(BaseKill)).addImm(Offset) .addImm(Pred).addReg(PredReg); } else { BuildMI(MBB, MBBI, dl, TII->get(BaseOpc), NewBase) .addReg(Base, getKillRegState(BaseKill)).addImm(Offset) .addImm(Pred).addReg(PredReg).addReg(0); } - Base = NewBase; BaseKill = true; // New base is always killed straight away. } diff --git a/test/CodeGen/Thumb/ldm-stm-base-materialization.ll b/test/CodeGen/Thumb/ldm-stm-base-materialization.ll new file mode 100644 index 00000000000..6382c25b60f --- /dev/null +++ b/test/CodeGen/Thumb/ldm-stm-base-materialization.ll @@ -0,0 +1,29 @@ +; RUN: llc < %s -mtriple=thumbv6m-eabi -verify-machineinstrs -o - | FileCheck %s +target datalayout = "e-m:e-p:32:32-i1:8:32-i8:8:32-i16:16:32-i64:64-v128:64:128-a:0:32-n32-S64" +target triple = "thumbv6m-none--eabi" + +@a = external global i32* +@b = external global i32* + +; Function Attrs: nounwind +define void @foo() #0 { +entry: +; CHECK-LABEL: foo: +; CHECK: ldr r[[SB:[0-9]]], .LCPI +; CHECK: ldr r[[LB:[0-9]]], .LCPI +; CHECK: adds r[[NLB:[0-9]]], r[[LB]], #4 +; CHECK-NEXT: ldm r[[NLB]], +; CHECK: adds r[[NSB:[0-9]]], r[[SB]], #4 +; CHECK-NEXT: stm r[[NSB]] + %0 = load i32** @a, align 4 + %arrayidx = getelementptr inbounds i32* %0, i32 1 + %1 = bitcast i32* %arrayidx to i8* + %2 = load i32** @b, align 4 + %arrayidx1 = getelementptr inbounds i32* %2, i32 1 + %3 = bitcast i32* %arrayidx1 to i8* + tail call void @llvm.memcpy.p0i8.p0i8.i32(i8* %1, i8* %3, i32 24, i32 4, i1 false) + ret void +} + +; Function Attrs: nounwind +declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture readonly, i32, i32, i1) #1 diff --git a/test/CodeGen/Thumb/thumb-ldm.ll b/test/CodeGen/Thumb/thumb-ldm.ll index dd98e6f300e..7e9560eec8d 100644 --- a/test/CodeGen/Thumb/thumb-ldm.ll +++ b/test/CodeGen/Thumb/thumb-ldm.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -mtriple=thumbv6m-eabi -o - | FileCheck %s +; RUN: llc < %s -mtriple=thumbv6m-eabi -verify-machineinstrs -o - | FileCheck %s @X = external global [0 x i32] ; <[0 x i32]*> [#uses=5] diff --git a/test/CodeGen/Thumb/thumb-memcpy-ldm-stm.ll b/test/CodeGen/Thumb/thumb-memcpy-ldm-stm.ll index e1e38739f1d..88a060a5778 100644 --- a/test/CodeGen/Thumb/thumb-memcpy-ldm-stm.ll +++ b/test/CodeGen/Thumb/thumb-memcpy-ldm-stm.ll @@ -1,4 +1,4 @@ -; RUN: llc -mtriple=thumbv6m-eabi %s -o - | FileCheck %s +; RUN: llc -mtriple=thumbv6m-eabi -verify-machineinstrs %s -o - | FileCheck %s @d = external global [64 x i32] @s = external global [64 x i32]