#include "llvm/IR/Function.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/TargetRegistry.h"
-
#include <algorithm>
-#define GET_INSTRINFO_CTOR
-#include "AArch64GenInstrInfo.inc"
-
using namespace llvm;
+#define GET_INSTRINFO_CTOR_DTOR
+#include "AArch64GenInstrInfo.inc"
+
AArch64InstrInfo::AArch64InstrInfo(const AArch64Subtarget &STI)
: AArch64GenInstrInfo(AArch64::ADJCALLSTACKDOWN, AArch64::ADJCALLSTACKUP),
- RI(*this, STI), Subtarget(STI) {}
+ Subtarget(STI) {}
void AArch64InstrInfo::copyPhysReg(MachineBasicBlock &MBB,
MachineBasicBlock::iterator I, DebugLoc DL,
BuildMI(MBB, I, DL, get(AArch64::MRSxi), DestReg)
.addImm(A64SysReg::NZCV);
} else if (AArch64::GPR64RegClass.contains(DestReg)) {
- assert(AArch64::GPR64RegClass.contains(SrcReg));
- Opc = AArch64::ORRxxx_lsl;
- ZeroReg = AArch64::XZR;
+ if(AArch64::GPR64RegClass.contains(SrcReg)){
+ Opc = AArch64::ORRxxx_lsl;
+ ZeroReg = AArch64::XZR;
+ } else{
+ assert(AArch64::FPR64RegClass.contains(SrcReg));
+ BuildMI(MBB, I, DL, get(AArch64::FMOVxd), DestReg)
+ .addReg(SrcReg);
+ return;
+ }
} else if (AArch64::GPR32RegClass.contains(DestReg)) {
- assert(AArch64::GPR32RegClass.contains(SrcReg));
- Opc = AArch64::ORRwww_lsl;
- ZeroReg = AArch64::WZR;
+ if(AArch64::GPR32RegClass.contains(SrcReg)){
+ Opc = AArch64::ORRwww_lsl;
+ ZeroReg = AArch64::WZR;
+ } else{
+ assert(AArch64::FPR32RegClass.contains(SrcReg));
+ BuildMI(MBB, I, DL, get(AArch64::FMOVws), DestReg)
+ .addReg(SrcReg);
+ return;
+ }
} else if (AArch64::FPR32RegClass.contains(DestReg)) {
- assert(AArch64::FPR32RegClass.contains(SrcReg));
- BuildMI(MBB, I, DL, get(AArch64::FMOVss), DestReg)
- .addReg(SrcReg);
- return;
+ if(AArch64::FPR32RegClass.contains(SrcReg)){
+ BuildMI(MBB, I, DL, get(AArch64::FMOVss), DestReg)
+ .addReg(SrcReg);
+ return;
+ }
+ else {
+ assert(AArch64::GPR32RegClass.contains(SrcReg));
+ BuildMI(MBB, I, DL, get(AArch64::FMOVsw), DestReg)
+ .addReg(SrcReg);
+ return;
+ }
} else if (AArch64::FPR64RegClass.contains(DestReg)) {
- assert(AArch64::FPR64RegClass.contains(SrcReg));
- BuildMI(MBB, I, DL, get(AArch64::FMOVdd), DestReg)
- .addReg(SrcReg);
- return;
+ if(AArch64::FPR64RegClass.contains(SrcReg)){
+ BuildMI(MBB, I, DL, get(AArch64::FMOVdd), DestReg)
+ .addReg(SrcReg);
+ return;
+ }
+ else {
+ assert(AArch64::GPR64RegClass.contains(SrcReg));
+ BuildMI(MBB, I, DL, get(AArch64::FMOVdx), DestReg)
+ .addReg(SrcReg);
+ return;
+ }
} else if (AArch64::FPR128RegClass.contains(DestReg)) {
assert(AArch64::FPR128RegClass.contains(SrcReg));
- // FIXME: there's no good way to do this, at least without NEON:
- // + There's no single move instruction for q-registers
- // + We can't create a spill slot and use normal STR/LDR because stack
- // allocation has already happened
- // + We can't go via X-registers with FMOV because register allocation has
- // already happened.
- // This may not be efficient, but at least it works.
- BuildMI(MBB, I, DL, get(AArch64::LSFP128_PreInd_STR), AArch64::XSP)
- .addReg(SrcReg)
- .addReg(AArch64::XSP)
- .addImm(0x1ff & -16);
-
- BuildMI(MBB, I, DL, get(AArch64::LSFP128_PostInd_LDR), DestReg)
- .addReg(AArch64::XSP, RegState::Define)
- .addReg(AArch64::XSP)
- .addImm(16);
+ // If NEON is enable, we use ORR to implement this copy.
+ // If NEON isn't available, emit STR and LDR to handle this.
+ if(getSubTarget().hasNEON()) {
+ BuildMI(MBB, I, DL, get(AArch64::ORRvvv_16B), DestReg)
+ .addReg(SrcReg)
+ .addReg(SrcReg);
+ return;
+ } else {
+ BuildMI(MBB, I, DL, get(AArch64::LSFP128_PreInd_STR), AArch64::XSP)
+ .addReg(SrcReg)
+ .addReg(AArch64::XSP)
+ .addImm(0x1ff & -16);
+
+ BuildMI(MBB, I, DL, get(AArch64::LSFP128_PostInd_LDR), DestReg)
+ .addReg(AArch64::XSP, RegState::Define)
+ .addReg(AArch64::XSP)
+ .addImm(16);
+ return;
+ }
+ } else if (AArch64::FPR8RegClass.contains(DestReg, SrcReg)) {
+ // The copy of two FPR8 registers is implemented by the copy of two FPR32
+ const TargetRegisterInfo *TRI = &getRegisterInfo();
+ unsigned Dst = TRI->getMatchingSuperReg(DestReg, AArch64::sub_8,
+ &AArch64::FPR32RegClass);
+ unsigned Src = TRI->getMatchingSuperReg(SrcReg, AArch64::sub_8,
+ &AArch64::FPR32RegClass);
+ BuildMI(MBB, I, DL, get(AArch64::FMOVss), Dst)
+ .addReg(Src);
+ return;
+ } else if (AArch64::FPR16RegClass.contains(DestReg, SrcReg)) {
+ // The copy of two FPR16 registers is implemented by the copy of two FPR32
+ const TargetRegisterInfo *TRI = &getRegisterInfo();
+ unsigned Dst = TRI->getMatchingSuperReg(DestReg, AArch64::sub_16,
+ &AArch64::FPR32RegClass);
+ unsigned Src = TRI->getMatchingSuperReg(SrcReg, AArch64::sub_16,
+ &AArch64::FPR32RegClass);
+ BuildMI(MBB, I, DL, get(AArch64::FMOVss), Dst)
+ .addReg(Src);
return;
} else {
- llvm_unreachable("Unknown register class in copyPhysReg");
+ CopyPhysRegTuple(MBB, I, DL, DestReg, SrcReg);
+ return;
}
// E.g. ORR xDst, xzr, xSrc, lsl #0
.addImm(0);
}
-MachineInstr *
-AArch64InstrInfo::emitFrameIndexDebugValue(MachineFunction &MF, int FrameIx,
- uint64_t Offset, const MDNode *MDPtr,
- DebugLoc DL) const {
- MachineInstrBuilder MIB = BuildMI(MF, DL, get(AArch64::DBG_VALUE))
- .addFrameIndex(FrameIx).addImm(0)
- .addImm(Offset)
- .addMetadata(MDPtr);
- return &*MIB;
+void AArch64InstrInfo::CopyPhysRegTuple(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I,
+ DebugLoc DL, unsigned DestReg,
+ unsigned SrcReg) const {
+ unsigned SubRegs;
+ bool IsQRegs;
+ if (AArch64::DPairRegClass.contains(DestReg, SrcReg)) {
+ SubRegs = 2;
+ IsQRegs = false;
+ } else if (AArch64::DTripleRegClass.contains(DestReg, SrcReg)) {
+ SubRegs = 3;
+ IsQRegs = false;
+ } else if (AArch64::DQuadRegClass.contains(DestReg, SrcReg)) {
+ SubRegs = 4;
+ IsQRegs = false;
+ } else if (AArch64::QPairRegClass.contains(DestReg, SrcReg)) {
+ SubRegs = 2;
+ IsQRegs = true;
+ } else if (AArch64::QTripleRegClass.contains(DestReg, SrcReg)) {
+ SubRegs = 3;
+ IsQRegs = true;
+ } else if (AArch64::QQuadRegClass.contains(DestReg, SrcReg)) {
+ SubRegs = 4;
+ IsQRegs = true;
+ } else
+ llvm_unreachable("Unknown register class");
+
+ unsigned BeginIdx = IsQRegs ? AArch64::qsub_0 : AArch64::dsub_0;
+ int Spacing = 1;
+ const TargetRegisterInfo *TRI = &getRegisterInfo();
+ // Copy register tuples backward when the first Dest reg overlaps
+ // with SrcReg.
+ if (TRI->regsOverlap(SrcReg, TRI->getSubReg(DestReg, BeginIdx))) {
+ BeginIdx = BeginIdx + (SubRegs - 1);
+ Spacing = -1;
+ }
+
+ unsigned Opc = IsQRegs ? AArch64::ORRvvv_16B : AArch64::ORRvvv_8B;
+ for (unsigned i = 0; i != SubRegs; ++i) {
+ unsigned Dst = TRI->getSubReg(DestReg, BeginIdx + i * Spacing);
+ unsigned Src = TRI->getSubReg(SrcReg, BeginIdx + i * Spacing);
+ assert(Dst && Src && "Bad sub-register");
+ BuildMI(MBB, I, I->getDebugLoc(), get(Opc), Dst)
+ .addReg(Src)
+ .addReg(Src);
+ }
+ return;
}
/// Does the Opcode represent a conditional branch that we can remove and re-add
MachineBasicBlock *FBB,
const SmallVectorImpl<MachineOperand> &Cond,
DebugLoc DL) const {
- if (FBB == 0 && Cond.empty()) {
+ if (!FBB && Cond.empty()) {
BuildMI(&MBB, DL, get(AArch64::Bimm)).addMBB(TBB);
return 1;
- } else if (FBB == 0) {
+ } else if (!FBB) {
MachineInstrBuilder MIB = BuildMI(&MBB, DL, get(Cond[0].getImm()));
for (int i = 1, e = Cond.size(); i != e; ++i)
MIB.addOperand(Cond[i]);
default:
llvm_unreachable("Unknown size for regclass");
}
- } else {
- assert((RC->hasType(MVT::f32) || RC->hasType(MVT::f64) ||
- RC->hasType(MVT::f128))
- && "Expected integer or floating type for store");
+ } else if (AArch64::FPR8RegClass.hasSubClassEq(RC)) {
+ StoreOp = AArch64::LSFP8_STR;
+ } else if (AArch64::FPR16RegClass.hasSubClassEq(RC)) {
+ StoreOp = AArch64::LSFP16_STR;
+ } else if (RC->hasType(MVT::f32) || RC->hasType(MVT::f64) ||
+ RC->hasType(MVT::f128)) {
switch (RC->getSize()) {
case 4: StoreOp = AArch64::LSFP32_STR; break;
case 8: StoreOp = AArch64::LSFP64_STR; break;
default:
llvm_unreachable("Unknown size for regclass");
}
+ } else { // For a super register class has more than one sub registers
+ if (AArch64::DPairRegClass.hasSubClassEq(RC))
+ StoreOp = AArch64::ST1x2_8B;
+ else if (AArch64::DTripleRegClass.hasSubClassEq(RC))
+ StoreOp = AArch64::ST1x3_8B;
+ else if (AArch64::DQuadRegClass.hasSubClassEq(RC))
+ StoreOp = AArch64::ST1x4_8B;
+ else if (AArch64::QPairRegClass.hasSubClassEq(RC))
+ StoreOp = AArch64::ST1x2_16B;
+ else if (AArch64::QTripleRegClass.hasSubClassEq(RC))
+ StoreOp = AArch64::ST1x3_16B;
+ else if (AArch64::QQuadRegClass.hasSubClassEq(RC))
+ StoreOp = AArch64::ST1x4_16B;
+ else
+ llvm_unreachable("Unknown reg class");
+
+ MachineInstrBuilder NewMI = BuildMI(MBB, MBBI, DL, get(StoreOp));
+ // Vector store has different operands from other store instructions.
+ NewMI.addFrameIndex(FrameIdx)
+ .addReg(SrcReg, getKillRegState(isKill))
+ .addMemOperand(MMO);
+ return;
}
MachineInstrBuilder NewMI = BuildMI(MBB, MBBI, DL, get(StoreOp));
default:
llvm_unreachable("Unknown size for regclass");
}
- } else {
- assert((RC->hasType(MVT::f32) || RC->hasType(MVT::f64)
- || RC->hasType(MVT::f128))
- && "Expected integer or floating type for store");
+ } else if (AArch64::FPR8RegClass.hasSubClassEq(RC)) {
+ LoadOp = AArch64::LSFP8_LDR;
+ } else if (AArch64::FPR16RegClass.hasSubClassEq(RC)) {
+ LoadOp = AArch64::LSFP16_LDR;
+ } else if (RC->hasType(MVT::f32) || RC->hasType(MVT::f64) ||
+ RC->hasType(MVT::f128)) {
switch (RC->getSize()) {
case 4: LoadOp = AArch64::LSFP32_LDR; break;
case 8: LoadOp = AArch64::LSFP64_LDR; break;
default:
llvm_unreachable("Unknown size for regclass");
}
+ } else { // For a super register class has more than one sub registers
+ if (AArch64::DPairRegClass.hasSubClassEq(RC))
+ LoadOp = AArch64::LD1x2_8B;
+ else if (AArch64::DTripleRegClass.hasSubClassEq(RC))
+ LoadOp = AArch64::LD1x3_8B;
+ else if (AArch64::DQuadRegClass.hasSubClassEq(RC))
+ LoadOp = AArch64::LD1x4_8B;
+ else if (AArch64::QPairRegClass.hasSubClassEq(RC))
+ LoadOp = AArch64::LD1x2_16B;
+ else if (AArch64::QTripleRegClass.hasSubClassEq(RC))
+ LoadOp = AArch64::LD1x3_16B;
+ else if (AArch64::QQuadRegClass.hasSubClassEq(RC))
+ LoadOp = AArch64::LD1x4_16B;
+ else
+ llvm_unreachable("Unknown reg class");
+
+ MachineInstrBuilder NewMI = BuildMI(MBB, MBBI, DL, get(LoadOp), DestReg);
+ // Vector load has different operands from other load instructions.
+ NewMI.addFrameIndex(FrameIdx)
+ .addMemOperand(MMO);
+ return;
}
MachineInstrBuilder NewMI = BuildMI(MBB, MBBI, DL, get(LoadOp), DestReg);
int &AccessScale, int &MinOffset,
int &MaxOffset) const {
switch (MI.getOpcode()) {
- default: llvm_unreachable("Unkown load/store kind");
+ default:
+ llvm_unreachable("Unknown load/store kind");
case TargetOpcode::DBG_VALUE:
AccessScale = 1;
MinOffset = INT_MIN;
MinOffset = -0x40 * AccessScale;
MaxOffset = 0x3f * AccessScale;
return;
+ case AArch64::LD1x2_8B: case AArch64::ST1x2_8B:
+ AccessScale = 16;
+ MinOffset = 0;
+ MaxOffset = 0xfff * AccessScale;
+ return;
+ case AArch64::LD1x3_8B: case AArch64::ST1x3_8B:
+ AccessScale = 24;
+ MinOffset = 0;
+ MaxOffset = 0xfff * AccessScale;
+ return;
+ case AArch64::LD1x4_8B: case AArch64::ST1x4_8B:
+ case AArch64::LD1x2_16B: case AArch64::ST1x2_16B:
+ AccessScale = 32;
+ MinOffset = 0;
+ MaxOffset = 0xfff * AccessScale;
+ return;
+ case AArch64::LD1x3_16B: case AArch64::ST1x3_16B:
+ AccessScale = 48;
+ MinOffset = 0;
+ MaxOffset = 0xfff * AccessScale;
+ return;
+ case AArch64::LD1x4_16B: case AArch64::ST1x4_16B:
+ AccessScale = 64;
+ MinOffset = 0;
+ MaxOffset = 0xfff * AccessScale;
+ return;
}
}
if (MI.getOpcode() == AArch64::INLINEASM)
return getInlineAsmLength(MI.getOperand(0).getSymbolName(), MAI);
- if (MI.isLabel())
- return 0;
-
switch (MI.getOpcode()) {
case TargetOpcode::BUNDLE:
return getInstBundleLength(MI);
case TargetOpcode::IMPLICIT_DEF:
case TargetOpcode::KILL:
- case TargetOpcode::PROLOG_LABEL:
+ case TargetOpcode::CFI_INSTRUCTION:
case TargetOpcode::EH_LABEL:
+ case TargetOpcode::GC_LABEL:
case TargetOpcode::DBG_VALUE:
- return 0;
- case AArch64::CONSTPOOL_ENTRY:
- return MI.getOperand(2).getImm();
case AArch64::TLSDESCCALL:
return 0;
default:
int64_t NumBytes, MachineInstr::MIFlag MIFlags) {
if (NumBytes == 0 && DstReg == SrcReg)
return;
- else if (abs(NumBytes) & ~0xffffff) {
+ else if (abs64(NumBytes) & ~0xffffff) {
// Generically, we have to materialize the offset into a temporary register
// and subtract it. There are a couple of ways this could be done, for now
- // we'll go for a literal-pool load.
- MachineFunction &MF = *MBB.getParent();
- MachineConstantPool *MCP = MF.getConstantPool();
- const Constant *C
- = ConstantInt::get(Type::getInt64Ty(MF.getFunction()->getContext()),
- abs(NumBytes));
- unsigned CPI = MCP->getConstantPoolIndex(C, 8);
-
- // LDR xTMP, .LITPOOL
- BuildMI(MBB, MBBI, dl, TII.get(AArch64::LDRx_lit), ScratchReg)
- .addConstantPoolIndex(CPI)
- .setMIFlag(MIFlags);
+ // we'll use a movz/movk or movn/movk sequence.
+ uint64_t Bits = static_cast<uint64_t>(abs64(NumBytes));
+ BuildMI(MBB, MBBI, dl, TII.get(AArch64::MOVZxii), ScratchReg)
+ .addImm(0xffff & Bits).addImm(0)
+ .setMIFlags(MIFlags);
+
+ Bits >>= 16;
+ if (Bits & 0xffff) {
+ BuildMI(MBB, MBBI, dl, TII.get(AArch64::MOVKxii), ScratchReg)
+ .addReg(ScratchReg)
+ .addImm(0xffff & Bits).addImm(1)
+ .setMIFlags(MIFlags);
+ }
+
+ Bits >>= 16;
+ if (Bits & 0xffff) {
+ BuildMI(MBB, MBBI, dl, TII.get(AArch64::MOVKxii), ScratchReg)
+ .addReg(ScratchReg)
+ .addImm(0xffff & Bits).addImm(2)
+ .setMIFlags(MIFlags);
+ }
+
+ Bits >>= 16;
+ if (Bits & 0xffff) {
+ BuildMI(MBB, MBBI, dl, TII.get(AArch64::MOVKxii), ScratchReg)
+ .addReg(ScratchReg)
+ .addImm(0xffff & Bits).addImm(3)
+ .setMIFlags(MIFlags);
+ }
// ADD DST, SRC, xTMP (, lsl #0)
unsigned AddOp = NumBytes > 0 ? AArch64::ADDxxx_uxtx : AArch64::SUBxxx_uxtx;
} else {
LowOp = AArch64::SUBxxi_lsl0_s;
HighOp = AArch64::SUBxxi_lsl12_s;
- NumBytes = abs(NumBytes);
+ NumBytes = abs64(NumBytes);
}
// If we're here, at the very least a move needs to be produced, which just
static char ID;
LDTLSCleanup() : MachineFunctionPass(ID) {}
- virtual bool runOnMachineFunction(MachineFunction &MF) {
+ bool runOnMachineFunction(MachineFunction &MF) override {
AArch64MachineFunctionInfo* MFI
= MF.getInfo<AArch64MachineFunctionInfo>();
if (MFI->getNumLocalDynamicTLSAccesses() < 2) {
return Copy;
}
- virtual const char *getPassName() const {
+ const char *getPassName() const override {
return "Local Dynamic TLS Access Clean-up";
}
- virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.setPreservesCFG();
AU.addRequired<MachineDominatorTree>();
MachineFunctionPass::getAnalysisUsage(AU);