From 4cc1407b84dc6dbbc0d62b1d1b8db7c0ddec86cc Mon Sep 17 00:00:00 2001 From: Tim Northover Date: Sun, 21 Apr 2013 11:57:07 +0000 Subject: [PATCH] ARM: Use ldrd/strd to spill 64-bit pairs when available. This allows common sp-offsets to be part of the instruction and is probably faster on modern CPUs too. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@179977 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/ARM/ARMBaseInstrInfo.cpp | 55 ++++++++++++------ lib/Target/ARM/ARMBaseInstrInfo.h | 4 ++ lib/Target/ARM/Thumb2InstrInfo.cpp | 84 +++++++++++++++++++++------- test/CodeGen/ARM/gpr-paired-spill.ll | 40 ++++++++----- 4 files changed, 133 insertions(+), 50 deletions(-) diff --git a/lib/Target/ARM/ARMBaseInstrInfo.cpp b/lib/Target/ARM/ARMBaseInstrInfo.cpp index 7a8077e3f9e..0d1417dd17c 100644 --- a/lib/Target/ARM/ARMBaseInstrInfo.cpp +++ b/lib/Target/ARM/ARMBaseInstrInfo.cpp @@ -747,10 +747,10 @@ void ARMBaseInstrInfo::copyPhysReg(MachineBasicBlock &MBB, Mov->addRegisterKilled(SrcReg, TRI); } -static const -MachineInstrBuilder &AddDReg(MachineInstrBuilder &MIB, - unsigned Reg, unsigned SubIdx, unsigned State, - const TargetRegisterInfo *TRI) { +const MachineInstrBuilder & +ARMBaseInstrInfo::AddDReg(MachineInstrBuilder &MIB, unsigned Reg, + unsigned SubIdx, unsigned State, + const TargetRegisterInfo *TRI) const { if (!SubIdx) return MIB.addReg(Reg, State); @@ -795,12 +795,22 @@ storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, .addReg(SrcReg, getKillRegState(isKill)) .addFrameIndex(FI).addImm(0).addMemOperand(MMO)); } else if (ARM::GPRPairRegClass.hasSubClassEq(RC)) { - MachineInstrBuilder MIB = - AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::STMIA)) - .addFrameIndex(FI)) - .addMemOperand(MMO); - MIB = AddDReg(MIB, SrcReg, ARM::gsub_0, getKillRegState(isKill), TRI); - AddDReg(MIB, SrcReg, ARM::gsub_1, 0, TRI); + if (Subtarget.hasV5TEOps()) { + MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(ARM::STRD)); + AddDReg(MIB, SrcReg, ARM::gsub_0, getKillRegState(isKill), TRI); + AddDReg(MIB, SrcReg, ARM::gsub_1, 0, TRI); + MIB.addFrameIndex(FI).addReg(0).addImm(0).addMemOperand(MMO); + + AddDefaultPred(MIB); + } else { + // Fallback to STM instruction, which has existed since the dawn of + // time. + MachineInstrBuilder MIB = + AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::STMIA)) + .addFrameIndex(FI).addMemOperand(MMO)); + AddDReg(MIB, SrcReg, ARM::gsub_0, getKillRegState(isKill), TRI); + AddDReg(MIB, SrcReg, ARM::gsub_1, 0, TRI); + } } else llvm_unreachable("Unknown reg class!"); break; @@ -948,7 +958,6 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, DebugLoc DL; if (I != MBB.end()) DL = I->getDebugLoc(); MachineFunction &MF = *MBB.getParent(); - ARMFunctionInfo *AFI = MF.getInfo(); MachineFrameInfo &MFI = *MF.getFrameInfo(); unsigned Align = MFI.getObjectAlignment(FI); MachineMemOperand *MMO = @@ -975,12 +984,24 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLDRD), DestReg) .addFrameIndex(FI).addImm(0).addMemOperand(MMO)); } else if (ARM::GPRPairRegClass.hasSubClassEq(RC)) { - unsigned LdmOpc = AFI->isThumbFunction() ? ARM::t2LDMIA : ARM::LDMIA; - MachineInstrBuilder MIB = - AddDefaultPred(BuildMI(MBB, I, DL, get(LdmOpc)) - .addFrameIndex(FI).addMemOperand(MMO)); - MIB = AddDReg(MIB, DestReg, ARM::gsub_0, RegState::DefineNoRead, TRI); - MIB = AddDReg(MIB, DestReg, ARM::gsub_1, RegState::DefineNoRead, TRI); + MachineInstrBuilder MIB; + + if (Subtarget.hasV5TEOps()) { + MIB = BuildMI(MBB, I, DL, get(ARM::LDRD)); + AddDReg(MIB, DestReg, ARM::gsub_0, RegState::DefineNoRead, TRI); + AddDReg(MIB, DestReg, ARM::gsub_1, RegState::DefineNoRead, TRI); + MIB.addFrameIndex(FI).addReg(0).addImm(0).addMemOperand(MMO); + + AddDefaultPred(MIB); + } else { + // Fallback to LDM instruction, which has existed since the dawn of + // time. + MIB = AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::LDMIA)) + .addFrameIndex(FI).addMemOperand(MMO)); + MIB = AddDReg(MIB, DestReg, ARM::gsub_0, RegState::DefineNoRead, TRI); + MIB = AddDReg(MIB, DestReg, ARM::gsub_1, RegState::DefineNoRead, TRI); + } + if (TargetRegisterInfo::isPhysicalRegister(DestReg)) MIB.addReg(DestReg, RegState::ImplicitDefine); } else diff --git a/lib/Target/ARM/ARMBaseInstrInfo.h b/lib/Target/ARM/ARMBaseInstrInfo.h index 7c107bb4195..2ef659c23bd 100644 --- a/lib/Target/ARM/ARMBaseInstrInfo.h +++ b/lib/Target/ARM/ARMBaseInstrInfo.h @@ -141,6 +141,10 @@ public: MachineInstr *commuteInstruction(MachineInstr*, bool=false) const; + const MachineInstrBuilder &AddDReg(MachineInstrBuilder &MIB, unsigned Reg, + unsigned SubIdx, unsigned State, + const TargetRegisterInfo *TRI) const; + virtual bool produceSameValue(const MachineInstr *MI0, const MachineInstr *MI1, const MachineRegisterInfo *MRI) const; diff --git a/lib/Target/ARM/Thumb2InstrInfo.cpp b/lib/Target/ARM/Thumb2InstrInfo.cpp index 67e8ec7c5ff..a1b48c226aa 100644 --- a/lib/Target/ARM/Thumb2InstrInfo.cpp +++ b/lib/Target/ARM/Thumb2InstrInfo.cpp @@ -19,6 +19,7 @@ #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineMemOperand.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/MC/MCInst.h" #include "llvm/Support/CommandLine.h" @@ -126,25 +127,41 @@ storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, unsigned SrcReg, bool isKill, int FI, const TargetRegisterClass *RC, const TargetRegisterInfo *TRI) const { + DebugLoc DL; + if (I != MBB.end()) DL = I->getDebugLoc(); + + MachineFunction &MF = *MBB.getParent(); + MachineFrameInfo &MFI = *MF.getFrameInfo(); + MachineMemOperand *MMO = + MF.getMachineMemOperand(MachinePointerInfo::getFixedStack(FI), + MachineMemOperand::MOStore, + MFI.getObjectSize(FI), + MFI.getObjectAlignment(FI)); + if (RC == &ARM::GPRRegClass || RC == &ARM::tGPRRegClass || RC == &ARM::tcGPRRegClass || RC == &ARM::rGPRRegClass || RC == &ARM::GPRnopcRegClass) { - DebugLoc DL; - if (I != MBB.end()) DL = I->getDebugLoc(); - - MachineFunction &MF = *MBB.getParent(); - MachineFrameInfo &MFI = *MF.getFrameInfo(); - MachineMemOperand *MMO = - MF.getMachineMemOperand(MachinePointerInfo::getFixedStack(FI), - MachineMemOperand::MOStore, - MFI.getObjectSize(FI), - MFI.getObjectAlignment(FI)); AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::t2STRi12)) .addReg(SrcReg, getKillRegState(isKill)) .addFrameIndex(FI).addImm(0).addMemOperand(MMO)); return; } + if (ARM::GPRPairRegClass.hasSubClassEq(RC)) { + // Thumb2 STRD expects its dest-registers to be in rGPR. Not a problem for + // gsub_0, but needs an extra constraint for gsub_1 (which could be sp + // otherwise). + MachineRegisterInfo *MRI = &MF.getRegInfo(); + MRI->constrainRegClass(SrcReg, &ARM::GPRPair_with_gsub_1_in_rGPRRegClass); + + MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(ARM::t2STRDi8)); + AddDReg(MIB, SrcReg, ARM::gsub_0, getKillRegState(isKill), TRI); + AddDReg(MIB, SrcReg, ARM::gsub_1, 0, TRI); + MIB.addFrameIndex(FI).addImm(0).addMemOperand(MMO); + AddDefaultPred(MIB); + return; + } + ARMBaseInstrInfo::storeRegToStackSlot(MBB, I, SrcReg, isKill, FI, RC, TRI); } @@ -153,24 +170,42 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, unsigned DestReg, int FI, const TargetRegisterClass *RC, const TargetRegisterInfo *TRI) const { + MachineFunction &MF = *MBB.getParent(); + MachineFrameInfo &MFI = *MF.getFrameInfo(); + MachineMemOperand *MMO = + MF.getMachineMemOperand(MachinePointerInfo::getFixedStack(FI), + MachineMemOperand::MOLoad, + MFI.getObjectSize(FI), + MFI.getObjectAlignment(FI)); + DebugLoc DL; + if (I != MBB.end()) DL = I->getDebugLoc(); + if (RC == &ARM::GPRRegClass || RC == &ARM::tGPRRegClass || RC == &ARM::tcGPRRegClass || RC == &ARM::rGPRRegClass || RC == &ARM::GPRnopcRegClass) { - DebugLoc DL; - if (I != MBB.end()) DL = I->getDebugLoc(); - - MachineFunction &MF = *MBB.getParent(); - MachineFrameInfo &MFI = *MF.getFrameInfo(); - MachineMemOperand *MMO = - MF.getMachineMemOperand(MachinePointerInfo::getFixedStack(FI), - MachineMemOperand::MOLoad, - MFI.getObjectSize(FI), - MFI.getObjectAlignment(FI)); AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::t2LDRi12), DestReg) .addFrameIndex(FI).addImm(0).addMemOperand(MMO)); return; } + if (ARM::GPRPairRegClass.hasSubClassEq(RC)) { + // Thumb2 LDRD expects its dest-registers to be in rGPR. Not a problem for + // gsub_0, but needs an extra constraint for gsub_1 (which could be sp + // otherwise). + MachineRegisterInfo *MRI = &MF.getRegInfo(); + MRI->constrainRegClass(DestReg, &ARM::GPRPair_with_gsub_1_in_rGPRRegClass); + + MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(ARM::t2LDRDi8)); + AddDReg(MIB, DestReg, ARM::gsub_0, RegState::DefineNoRead, TRI); + AddDReg(MIB, DestReg, ARM::gsub_1, RegState::DefineNoRead, TRI); + MIB.addFrameIndex(FI).addImm(0).addMemOperand(MMO); + AddDefaultPred(MIB); + + if (TargetRegisterInfo::isPhysicalRegister(DestReg)) + MIB.addReg(DestReg, RegState::ImplicitDefine); + return; + } + ARMBaseInstrInfo::loadRegFromStackSlot(MBB, I, DestReg, FI, RC, TRI); } @@ -514,6 +549,15 @@ bool llvm::rewriteT2FrameIndex(MachineInstr &MI, unsigned FrameRegIdx, Offset = -Offset; isSub = true; } + } else if (AddrMode == ARMII::AddrModeT2_i8s4) { + Offset += MI.getOperand(FrameRegIdx + 1).getImm() * 4; + NumBits = 8; + // MCInst operand has already scaled value. + Scale = 1; + if (Offset < 0) { + isSub = true; + Offset = -Offset; + } } else { llvm_unreachable("Unsupported addressing mode!"); } diff --git a/test/CodeGen/ARM/gpr-paired-spill.ll b/test/CodeGen/ARM/gpr-paired-spill.ll index 400d69d0d41..86088326b5b 100644 --- a/test/CodeGen/ARM/gpr-paired-spill.ll +++ b/test/CodeGen/ARM/gpr-paired-spill.ll @@ -1,29 +1,43 @@ -; RUN: llc -mtriple=armv7-none-linux-gnueabi < %s | FileCheck %s +; RUN: llc -mtriple=armv7-none-linux-gnueabi -verify-machineinstrs < %s | FileCheck %s --check-prefix=CHECK-WITH-LDRD +; RUN: llc -mtriple=armv4-none-linux-gnueabi -verify-machineinstrs < %s | FileCheck %s --check-prefix=CHECK-WITHOUT-LDRD +; RUN: llc -mtriple=thumbv7-none-linux-gnueabi -verify-machineinstrs < %s | FileCheck %s --check-prefix=CHECK-WITH-LDRD +; RUN: llc -mtriple=thumbv7-none-linux-gnueabi -debug -o /dev/null < %s 2>&1 | FileCheck %s --check-prefix=INSTRS-ARE-THUMB define void @foo(i64* %addr) { - %val1 = tail call i64 asm sideeffect "ldrd $0, ${0:H}, [r0]", "=&r,r"(i64* %addr) - %val2 = tail call i64 asm sideeffect "ldrd $0, ${0:H}, [r0]", "=&r,r"(i64* %addr) - %val3 = tail call i64 asm sideeffect "ldrd $0, ${0:H}, [r0]", "=&r,r"(i64* %addr) - %val4 = tail call i64 asm sideeffect "ldrd $0, ${0:H}, [r0]", "=&r,r"(i64* %addr) - %val5 = tail call i64 asm sideeffect "ldrd $0, ${0:H}, [r0]", "=&r,r"(i64* %addr) - %val6 = tail call i64 asm sideeffect "ldrd $0, ${0:H}, [r0]", "=&r,r"(i64* %addr) - %val7 = tail call i64 asm sideeffect "ldrd $0, ${0:H}, [r0]", "=&r,r"(i64* %addr) + %val1 = tail call i64 asm sideeffect "ldrexd $0, ${0:H}, [r0]", "=&r,r"(i64* %addr) + %val2 = tail call i64 asm sideeffect "ldrexd $0, ${0:H}, [r0]", "=&r,r"(i64* %addr) + %val3 = tail call i64 asm sideeffect "ldrexd $0, ${0:H}, [r0]", "=&r,r"(i64* %addr) + %val4 = tail call i64 asm sideeffect "ldrexd $0, ${0:H}, [r0]", "=&r,r"(i64* %addr) + %val5 = tail call i64 asm sideeffect "ldrexd $0, ${0:H}, [r0]", "=&r,r"(i64* %addr) + %val6 = tail call i64 asm sideeffect "ldrexd $0, ${0:H}, [r0]", "=&r,r"(i64* %addr) + %val7 = tail call i64 asm sideeffect "ldrexd $0, ${0:H}, [r0]", "=&r,r"(i64* %addr) ; Key point is that enough 64-bit paired GPR values are live that ; one of them has to be spilled. This used to cause an abort because ; an LDMIA was created with both a FrameIndex and an offset, which ; is not allowed. +; CHECK-WITH-LDRD: strd {{r[0-9]+}}, {{r[0-9]+}}, [sp, #8] +; CHECK-WITH-LDRD: strd {{r[0-9]+}}, {{r[0-9]+}}, [sp] + +; CHECK-WITH-LDRD: ldrd {{r[0-9]+}}, {{r[0-9]+}}, [sp, #8] +; CHECK-WITH-LDRD: ldrd {{r[0-9]+}}, {{r[0-9]+}}, [sp] + ; We also want to ensure the register scavenger is working (i.e. an ; offset from sp can be generated), so we need two spills. -; CHECK: add [[ADDRREG:[a-z0-9]+]], sp, #{{[0-9]+}} -; CHECK: stm [[ADDRREG]], {r{{[0-9]+}}, r{{[0-9]+}}} -; CHECK: stm sp, {r{{[0-9]+}}, r{{[0-9]+}}} +; CHECK-WITHOUT-LDRD: add [[ADDRREG:[a-z0-9]+]], sp, #{{[0-9]+}} +; CHECK-WITHOUT-LDRD: stm [[ADDRREG]], {r{{[0-9]+}}, r{{[0-9]+}}} +; CHECK-WITHOUT-LDRD: stm sp, {r{{[0-9]+}}, r{{[0-9]+}}} ; In principle LLVM may have to recalculate the offset. At the moment ; it reuses the original though. -; CHECK: ldm [[ADDRREG]], {r{{[0-9]+}}, r{{[0-9]+}}} -; CHECK: ldm sp, {r{{[0-9]+}}, r{{[0-9]+}}} +; CHECK-WITHOUT-LDRD: ldm [[ADDRREG]], {r{{[0-9]+}}, r{{[0-9]+}}} +; CHECK-WITHOUT-LDRD: ldm sp, {r{{[0-9]+}}, r{{[0-9]+}}} + + ; Make sure we are actually creating the Thumb versions of the spill + ; instructions. +; INSTRS-ARE-THUMB: t2STRDi8 +; INSTRS-ARE-THUMB: t2LDRDi8 store volatile i64 %val1, i64* %addr store volatile i64 %val2, i64* %addr -- 2.34.1