From f95215f551949d5e5adfbf4753aa833b9009b77a Mon Sep 17 00:00:00 2001 From: Anton Korobeynikov Date: Mon, 2 Nov 2009 00:10:38 +0000 Subject: [PATCH] Use NEON reg-reg moves, where profitable. This reduces "domain-cross" stalls, when we used to mix vfp and neon code (the former were used for reg-reg moves) git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@85764 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/ARM/ARM.td | 8 +++-- lib/Target/ARM/ARMBaseInstrInfo.cpp | 54 ++++++++++++++++++++++++----- lib/Target/ARM/ARMBaseInstrInfo.h | 12 ++++++- lib/Target/ARM/ARMInstrFormats.td | 45 +++++++++++++++--------- lib/Target/ARM/ARMInstrInfo.cpp | 2 +- lib/Target/ARM/ARMInstrInfo.h | 1 - lib/Target/ARM/Thumb1InstrInfo.cpp | 3 +- lib/Target/ARM/Thumb2InstrInfo.cpp | 3 +- 8 files changed, 97 insertions(+), 31 deletions(-) diff --git a/lib/Target/ARM/ARM.td b/lib/Target/ARM/ARM.td index 8851fbbf248..cb9bd6a0948 100644 --- a/lib/Target/ARM/ARM.td +++ b/lib/Target/ARM/ARM.td @@ -125,12 +125,16 @@ def ARMInstrInfo : InstrInfo { "SizeFlag", "IndexModeBits", "Form", - "isUnaryDataProc"]; + "isUnaryDataProc", + "canXformTo16Bit", + "Dom"]; let TSFlagsShifts = [0, 4, 7, 9, - 15]; + 15, + 16, + 17]; } //===----------------------------------------------------------------------===// diff --git a/lib/Target/ARM/ARMBaseInstrInfo.cpp b/lib/Target/ARM/ARMBaseInstrInfo.cpp index 6b031da1438..4cb2407aeb1 100644 --- a/lib/Target/ARM/ARMBaseInstrInfo.cpp +++ b/lib/Target/ARM/ARMBaseInstrInfo.cpp @@ -16,15 +16,18 @@ #include "ARMAddressingModes.h" #include "ARMGenInstrInfo.inc" #include "ARMMachineFunctionInfo.h" +#include "ARMRegisterInfo.h" #include "llvm/ADT/STLExtras.h" #include "llvm/CodeGen/LiveVariables.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineJumpTableInfo.h" #include "llvm/CodeGen/MachineMemOperand.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/PseudoSourceValue.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" using namespace llvm; @@ -32,8 +35,9 @@ static cl::opt EnableARM3Addr("enable-arm-3-addr-conv", cl::Hidden, cl::desc("Enable ARM 2-addr to 3-addr conv")); -ARMBaseInstrInfo::ARMBaseInstrInfo() - : TargetInstrInfoImpl(ARMInsts, array_lengthof(ARMInsts)) { +ARMBaseInstrInfo::ARMBaseInstrInfo(const ARMSubtarget& STI) + : TargetInstrInfoImpl(ARMInsts, array_lengthof(ARMInsts)), + Subtarget(STI) { } MachineInstr * @@ -504,7 +508,7 @@ ARMBaseInstrInfo::isMoveInstr(const MachineInstr &MI, case ARM::FCPYS: case ARM::FCPYD: case ARM::VMOVD: - case ARM::VMOVQ: { + case ARM::VMOVQ: { SrcReg = MI.getOperand(1).getReg(); DstReg = MI.getOperand(0).getReg(); return true; @@ -647,11 +651,45 @@ ARMBaseInstrInfo::copyRegToReg(MachineBasicBlock &MBB, } else if (DestRC == ARM::SPRRegisterClass) { AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::FCPYS), DestReg) .addReg(SrcReg)); - } else if ((DestRC == ARM::DPRRegisterClass) || - (DestRC == ARM::DPR_VFP2RegisterClass) || - (DestRC == ARM::DPR_8RegisterClass)) { - AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::FCPYD), DestReg) - .addReg(SrcReg)); + } else if (DestRC == ARM::DPR_VFP2RegisterClass || + DestRC == ARM::DPR_8RegisterClass || + SrcRC == ARM::DPR_VFP2RegisterClass || + SrcRC == ARM::DPR_8RegisterClass) { + // Always use neon reg-reg move if source or dest is NEON-only regclass. + BuildMI(MBB, I, DL, get(ARM::VMOVD), DestReg).addReg(SrcReg); + } else if (DestRC == ARM::DPRRegisterClass) { + const ARMBaseRegisterInfo* TRI = &getRegisterInfo(); + + // Find the Machine Instruction which defines SrcReg. + MachineBasicBlock::iterator J = (I == MBB.begin() ? I : prior(I)); + while (J != MBB.begin()) { + if (J->modifiesRegister(SrcReg, TRI)) + break; + --J; + } + + unsigned Domain; + if (J->modifiesRegister(SrcReg, TRI)) { + Domain = J->getDesc().TSFlags & ARMII::DomainMask; + // Instructions in general domain are subreg accesses. + // Map them to NEON reg-reg moves. + if (Domain == ARMII::DomainGeneral) + Domain = ARMII::DomainNEON; + } else { + // We reached the beginning of the BB and found no instruction defining + // the reg. This means that register should be live-in for this BB. + // It's always to better to use NEON reg-reg moves. + Domain = ARMII::DomainNEON; + } + + if ((Domain & ARMII::DomainNEON) && getSubtarget().hasNEON()) { + BuildMI(MBB, I, DL, get(ARM::VMOVQ), DestReg).addReg(SrcReg); + } else { + assert((Domain & ARMII::DomainVFP || + !getSubtarget().hasNEON()) && "Invalid domain!"); + AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::FCPYD), DestReg) + .addReg(SrcReg)); + } } else if (DestRC == ARM::QPRRegisterClass || DestRC == ARM::QPR_VFP2RegisterClass) { BuildMI(MBB, I, DL, get(ARM::VMOVQ), DestReg).addReg(SrcReg); diff --git a/lib/Target/ARM/ARMBaseInstrInfo.h b/lib/Target/ARM/ARMBaseInstrInfo.h index acb7fdd9f1d..2ba377474e9 100644 --- a/lib/Target/ARM/ARMBaseInstrInfo.h +++ b/lib/Target/ARM/ARMBaseInstrInfo.h @@ -130,6 +130,14 @@ namespace ARMII { // a 16-bit Thumb instruction if certain conditions are met. Xform16Bit = 1 << 16, + //===------------------------------------------------------------------===// + // Code domain. + DomainShift = 17, + DomainMask = 3 << DomainShift, + DomainGeneral = 0 << DomainShift, + DomainVFP = 1 << DomainShift, + DomainNEON = 2 << DomainShift, + //===------------------------------------------------------------------===// // Field shifts - such shifts are used to set field while generating // machine instructions. @@ -157,9 +165,10 @@ namespace ARMII { } class ARMBaseInstrInfo : public TargetInstrInfoImpl { + const ARMSubtarget& Subtarget; protected: // Can be only subclassed. - explicit ARMBaseInstrInfo(); + explicit ARMBaseInstrInfo(const ARMSubtarget &STI); public: // Return the non-pre/post incrementing version of 'Opc'. Return 0 // if there is not such an opcode. @@ -173,6 +182,7 @@ public: LiveVariables *LV) const; virtual const ARMBaseRegisterInfo &getRegisterInfo() const =0; + const ARMSubtarget &getSubtarget() const { return Subtarget; } // Branch analysis. virtual bool AnalyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, diff --git a/lib/Target/ARM/ARMInstrFormats.td b/lib/Target/ARM/ARMInstrFormats.td index 1c351e379e5..1a42d0e0d9c 100644 --- a/lib/Target/ARM/ARMInstrFormats.td +++ b/lib/Target/ARM/ARMInstrFormats.td @@ -108,6 +108,15 @@ def IndexModeNone : IndexMode<0>; def IndexModePre : IndexMode<1>; def IndexModePost : IndexMode<2>; +// Instruction execution domain. +class Domain val> { + bits<2> Value = val; +} +def GenericDomain : Domain<0>; +def VFPDomain : Domain<1>; // Instructions in VFP domain only +def NeonDomain : Domain<2>; // Instructions in Neon domain only +def VFPNeonDomain : Domain<3>; // Instructions in both VFP & Neon domains + //===----------------------------------------------------------------------===// // ARM special operands. @@ -136,7 +145,7 @@ def s_cc_out : OptionalDefOperand { // class InstARM + Format f, Domain d, string cstr, InstrItinClass itin> : Instruction { field bits<32> Inst; @@ -155,6 +164,9 @@ class InstARM Form = F.Value; + Domain D = d; + bits<2> Dom = D.Value; + // // Attributes specific to ARM instructions... // @@ -167,7 +179,8 @@ class InstARM pattern> - : InstARM { + : InstARM { let OutOperandList = oops; let InOperandList = iops; let AsmString = asm; @@ -179,7 +192,7 @@ class I pattern> - : InstARM { + : InstARM { let OutOperandList = oops; let InOperandList = !con(iops, (ops pred:$p)); let AsmString = !strconcat(opc, !strconcat("${p}", asm)); @@ -194,7 +207,7 @@ class sI pattern> - : InstARM { + : InstARM { let OutOperandList = oops; let InOperandList = !con(iops, (ops pred:$p, cc_out:$s)); let AsmString = !strconcat(opc, !strconcat("${p}${s}", asm)); @@ -206,7 +219,7 @@ class sI pattern> - : InstARM { + : InstARM { let OutOperandList = oops; let InOperandList = iops; let AsmString = asm; @@ -807,7 +820,7 @@ class ARMV6Pat : Pat { class ThumbI pattern> - : InstARM { + : InstARM { let OutOperandList = oops; let InOperandList = iops; let AsmString = asm; @@ -833,7 +846,7 @@ class TJTI patter // Thumb1 only class Thumb1I pattern> - : InstARM { + : InstARM { let OutOperandList = oops; let InOperandList = iops; let AsmString = asm; @@ -861,7 +874,7 @@ class T1It pattern> - : InstARM { + : InstARM { let OutOperandList = !con(oops, (ops s_cc_out:$s)); let InOperandList = !con(iops, (ops pred:$p)); let AsmString = !strconcat(opc, !strconcat("${s}${p}", asm)); @@ -883,7 +896,7 @@ class T1sIt pattern> - : InstARM { + : InstARM { let OutOperandList = oops; let InOperandList = !con(iops, (ops pred:$p)); let AsmString = !strconcat(opc, !strconcat("${p}", asm)); @@ -918,7 +931,7 @@ class T1pIs pattern> - : InstARM { + : InstARM { let OutOperandList = oops; let InOperandList = !con(iops, (ops pred:$p)); let AsmString = !strconcat(opc, !strconcat("${p}", asm)); @@ -934,7 +947,7 @@ class Thumb2I pattern> - : InstARM { + : InstARM { let OutOperandList = oops; let InOperandList = !con(iops, (ops pred:$p, cc_out:$s)); let AsmString = !strconcat(opc, !strconcat("${s}${p}", asm)); @@ -946,7 +959,7 @@ class Thumb2sI pattern> - : InstARM { + : InstARM { let OutOperandList = oops; let InOperandList = iops; let AsmString = asm; @@ -993,7 +1006,7 @@ class T2Ix2 pattern> - : InstARM { + : InstARM { let OutOperandList = oops; let InOperandList = !con(iops, (ops pred:$p)); let AsmString = !strconcat(opc, !strconcat("${p}", asm)); @@ -1026,7 +1039,7 @@ class T2Pat : Pat { class VFPI pattern> - : InstARM { + : InstARM { let OutOperandList = oops; let InOperandList = !con(iops, (ops pred:$p)); let AsmString = !strconcat(opc, !strconcat("${p}", asm)); @@ -1038,7 +1051,7 @@ class VFPI pattern> - : InstARM { + : InstARM { let OutOperandList = oops; let InOperandList = iops; let AsmString = asm; @@ -1199,7 +1212,7 @@ class AVConv5I opcod1, bits<4> opcod2, dag oops, dag iops, class NeonI pattern> - : InstARM { + : InstARM { let OutOperandList = oops; let InOperandList = iops; let AsmString = asm; diff --git a/lib/Target/ARM/ARMInstrInfo.cpp b/lib/Target/ARM/ARMInstrInfo.cpp index c24c690f253..86bbe2a4c61 100644 --- a/lib/Target/ARM/ARMInstrInfo.cpp +++ b/lib/Target/ARM/ARMInstrInfo.cpp @@ -25,7 +25,7 @@ using namespace llvm; ARMInstrInfo::ARMInstrInfo(const ARMSubtarget &STI) - : RI(*this, STI), Subtarget(STI) { + : ARMBaseInstrInfo(STI), RI(*this, STI) { } unsigned ARMInstrInfo::getUnindexedOpcode(unsigned Opc) const { diff --git a/lib/Target/ARM/ARMInstrInfo.h b/lib/Target/ARM/ARMInstrInfo.h index c616949e379..5d1678d6854 100644 --- a/lib/Target/ARM/ARMInstrInfo.h +++ b/lib/Target/ARM/ARMInstrInfo.h @@ -25,7 +25,6 @@ namespace llvm { class ARMInstrInfo : public ARMBaseInstrInfo { ARMRegisterInfo RI; - const ARMSubtarget &Subtarget; public: explicit ARMInstrInfo(const ARMSubtarget &STI); diff --git a/lib/Target/ARM/Thumb1InstrInfo.cpp b/lib/Target/ARM/Thumb1InstrInfo.cpp index 6943036e457..b6dd56c7abf 100644 --- a/lib/Target/ARM/Thumb1InstrInfo.cpp +++ b/lib/Target/ARM/Thumb1InstrInfo.cpp @@ -24,7 +24,8 @@ using namespace llvm; -Thumb1InstrInfo::Thumb1InstrInfo(const ARMSubtarget &STI) : RI(*this, STI) { +Thumb1InstrInfo::Thumb1InstrInfo(const ARMSubtarget &STI) + : ARMBaseInstrInfo(STI), RI(*this, STI) { } unsigned Thumb1InstrInfo::getUnindexedOpcode(unsigned Opc) const { diff --git a/lib/Target/ARM/Thumb2InstrInfo.cpp b/lib/Target/ARM/Thumb2InstrInfo.cpp index 3ec147f2662..21fff51cb75 100644 --- a/lib/Target/ARM/Thumb2InstrInfo.cpp +++ b/lib/Target/ARM/Thumb2InstrInfo.cpp @@ -25,7 +25,8 @@ using namespace llvm; -Thumb2InstrInfo::Thumb2InstrInfo(const ARMSubtarget &STI) : RI(*this, STI) { +Thumb2InstrInfo::Thumb2InstrInfo(const ARMSubtarget &STI) + : ARMBaseInstrInfo(STI), RI(*this, STI) { } unsigned Thumb2InstrInfo::getUnindexedOpcode(unsigned Opc) const { -- 2.34.1