From 28f08c93e75d291695ea89b9004145103292e85b Mon Sep 17 00:00:00 2001 From: Jim Grosbach Date: Mon, 5 Mar 2012 19:33:30 +0000 Subject: [PATCH] ARM refactor away a bunch of VLD/VST pseudo instructions. With the new composite physical registers to represent arbitrary pairs of DPR registers, we don't need the pseudo-registers anymore. Get rid of a bunch of them that use DPR register pairs and just use the real instructions directly instead. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@152045 91177308-0d34-0410-b5e6-96231b3b80d8 --- include/llvm/MC/MCRegisterInfo.h | 2 +- lib/Target/ARM/ARMBaseInstrInfo.cpp | 50 ++++----- lib/Target/ARM/ARMExpandPseudoInsts.cpp | 89 --------------- lib/Target/ARM/ARMFrameLowering.cpp | 8 +- lib/Target/ARM/ARMISelDAGToDAG.cpp | 82 +++++++------- lib/Target/ARM/ARMInstrNEON.td | 104 +++++------------- lib/Target/ARM/ARMRegisterInfo.td | 6 +- lib/Target/ARM/AsmParser/ARMAsmParser.cpp | 24 ++++ .../ARM/Disassembler/ARMDisassembler.cpp | 95 +++++++++++++++- lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp | 9 ++ lib/Target/ARM/InstPrinter/ARMInstPrinter.h | 1 + lib/Target/ARM/MCTargetDesc/ARMBaseInfo.h | 17 +++ utils/TableGen/EDEmitter.cpp | 1 + 13 files changed, 241 insertions(+), 247 deletions(-) diff --git a/include/llvm/MC/MCRegisterInfo.h b/include/llvm/MC/MCRegisterInfo.h index 2e5968de543..762558d8651 100644 --- a/include/llvm/MC/MCRegisterInfo.h +++ b/include/llvm/MC/MCRegisterInfo.h @@ -252,7 +252,7 @@ public: /// Reg so its sub-register of index SubIdx is Reg. unsigned getMatchingSuperReg(unsigned Reg, unsigned SubIdx, const MCRegisterClass *RC) const { - for (const unsigned *SRs = getSuperRegisters(Reg); unsigned SR = *SRs;++SRs) + for (const uint16_t *SRs = getSuperRegisters(Reg); unsigned SR = *SRs;++SRs) if (Reg == getSubReg(SR, SubIdx) && RC->contains(SR)) return SR; return 0; diff --git a/lib/Target/ARM/ARMBaseInstrInfo.cpp b/lib/Target/ARM/ARMBaseInstrInfo.cpp index 5409621aff2..75b796e9e26 100644 --- a/lib/Target/ARM/ARMBaseInstrInfo.cpp +++ b/lib/Target/ARM/ARMBaseInstrInfo.cpp @@ -760,7 +760,7 @@ storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, if (ARM::QPRRegClass.hasSubClassEq(RC)) { // Use aligned spills if the stack can be realigned. if (Align >= 16 && getRegisterInfo().canRealignStack(MF)) { - AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VST1q64Pseudo)) + AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VST1q64)) .addFrameIndex(FI).addImm(16) .addReg(SrcReg, getKillRegState(isKill)) .addMemOperand(MMO)); @@ -845,7 +845,7 @@ ARMBaseInstrInfo::isStoreToStackSlot(const MachineInstr *MI, return MI->getOperand(0).getReg(); } break; - case ARM::VST1q64Pseudo: + case ARM::VST1q64: if (MI->getOperand(0).isFI() && MI->getOperand(2).getSubReg() == 0) { FrameIndex = MI->getOperand(0).getIndex(); @@ -909,7 +909,7 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, case 16: if (ARM::QPRRegClass.hasSubClassEq(RC)) { if (Align >= 16 && getRegisterInfo().canRealignStack(MF)) { - AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLD1q64Pseudo), DestReg) + AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLD1q64), DestReg) .addFrameIndex(FI).addImm(16) .addMemOperand(MMO)); } else { @@ -989,7 +989,7 @@ ARMBaseInstrInfo::isLoadFromStackSlot(const MachineInstr *MI, return MI->getOperand(0).getReg(); } break; - case ARM::VLD1q64Pseudo: + case ARM::VLD1q64: if (MI->getOperand(1).isFI() && MI->getOperand(0).getSubReg() == 0) { FrameIndex = MI->getOperand(1).getIndex(); @@ -2694,33 +2694,33 @@ ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData, if (DefAlign < 8 && Subtarget.isCortexA9()) switch (DefMCID.getOpcode()) { default: break; - case ARM::VLD1q8Pseudo: - case ARM::VLD1q16Pseudo: - case ARM::VLD1q32Pseudo: - case ARM::VLD1q64Pseudo: - case ARM::VLD1q8PseudoWB_register: - case ARM::VLD1q16PseudoWB_register: - case ARM::VLD1q32PseudoWB_register: - case ARM::VLD1q64PseudoWB_register: - case ARM::VLD1q8PseudoWB_fixed: - case ARM::VLD1q16PseudoWB_fixed: - case ARM::VLD1q32PseudoWB_fixed: - case ARM::VLD1q64PseudoWB_fixed: - case ARM::VLD2d8Pseudo: - case ARM::VLD2d16Pseudo: - case ARM::VLD2d32Pseudo: + case ARM::VLD1q8: + case ARM::VLD1q16: + case ARM::VLD1q32: + case ARM::VLD1q64: + case ARM::VLD1q8wb_register: + case ARM::VLD1q16wb_register: + case ARM::VLD1q32wb_register: + case ARM::VLD1q64wb_register: + case ARM::VLD1q8wb_fixed: + case ARM::VLD1q16wb_fixed: + case ARM::VLD1q32wb_fixed: + case ARM::VLD1q64wb_fixed: + case ARM::VLD2d8: + case ARM::VLD2d16: + case ARM::VLD2d32: case ARM::VLD2q8Pseudo: case ARM::VLD2q16Pseudo: case ARM::VLD2q32Pseudo: - case ARM::VLD2d8PseudoWB_fixed: - case ARM::VLD2d16PseudoWB_fixed: - case ARM::VLD2d32PseudoWB_fixed: + case ARM::VLD2d8wb_fixed: + case ARM::VLD2d16wb_fixed: + case ARM::VLD2d32wb_fixed: case ARM::VLD2q8PseudoWB_fixed: case ARM::VLD2q16PseudoWB_fixed: case ARM::VLD2q32PseudoWB_fixed: - case ARM::VLD2d8PseudoWB_register: - case ARM::VLD2d16PseudoWB_register: - case ARM::VLD2d32PseudoWB_register: + case ARM::VLD2d8wb_register: + case ARM::VLD2d16wb_register: + case ARM::VLD2d32wb_register: case ARM::VLD2q8PseudoWB_register: case ARM::VLD2q16PseudoWB_register: case ARM::VLD2q32PseudoWB_register: diff --git a/lib/Target/ARM/ARMExpandPseudoInsts.cpp b/lib/Target/ARM/ARMExpandPseudoInsts.cpp index 7fbf7d5bc77..c4ab99d4542 100644 --- a/lib/Target/ARM/ARMExpandPseudoInsts.cpp +++ b/lib/Target/ARM/ARMExpandPseudoInsts.cpp @@ -148,18 +148,6 @@ static const NEONLdStTableEntry NEONLdStTable[] = { { ARM::VLD1d64QPseudo, ARM::VLD1d64Q, true, false, false, SingleSpc, 4, 1 ,false}, { ARM::VLD1d64TPseudo, ARM::VLD1d64T, true, false, false, SingleSpc, 3, 1 ,false}, -{ ARM::VLD1q16Pseudo, ARM::VLD1q16, true, false, false, SingleSpc, 2, 4 ,false}, -{ ARM::VLD1q16PseudoWB_fixed, ARM::VLD1q16wb_fixed,true,false,false,SingleSpc, 2, 4 ,false}, -{ ARM::VLD1q16PseudoWB_register, ARM::VLD1q16wb_register, true, true, true, SingleSpc, 2, 4 ,false}, -{ ARM::VLD1q32Pseudo, ARM::VLD1q32, true, false, false, SingleSpc, 2, 2 ,false}, -{ ARM::VLD1q32PseudoWB_fixed, ARM::VLD1q32wb_fixed,true,false, false,SingleSpc, 2, 2 ,false}, -{ ARM::VLD1q32PseudoWB_register, ARM::VLD1q32wb_register, true, true, true, SingleSpc, 2, 2 ,false}, -{ ARM::VLD1q64Pseudo, ARM::VLD1q64, true, false, false, SingleSpc, 2, 1 ,false}, -{ ARM::VLD1q64PseudoWB_fixed, ARM::VLD1q64wb_fixed,true,false, false,SingleSpc, 2, 2 ,false}, -{ ARM::VLD1q64PseudoWB_register, ARM::VLD1q64wb_register, true, true, true, SingleSpc, 2, 1 ,false}, -{ ARM::VLD1q8Pseudo, ARM::VLD1q8, true, false, false, SingleSpc, 2, 8 ,false}, -{ ARM::VLD1q8PseudoWB_fixed, ARM::VLD1q8wb_fixed,true,false, false, SingleSpc, 2, 8 ,false}, -{ ARM::VLD1q8PseudoWB_register, ARM::VLD1q8wb_register,true,true, true,SingleSpc,2,8,false}, { ARM::VLD2DUPd16Pseudo, ARM::VLD2DUPd16, true, false, false, SingleSpc, 2, 4,false}, { ARM::VLD2DUPd16PseudoWB_fixed, ARM::VLD2DUPd16wb_fixed, true, true, false, SingleSpc, 2, 4,false}, @@ -182,16 +170,6 @@ static const NEONLdStTableEntry NEONLdStTable[] = { { ARM::VLD2LNq32Pseudo, ARM::VLD2LNq32, true, false, false, EvenDblSpc, 2, 2 ,true}, { ARM::VLD2LNq32Pseudo_UPD, ARM::VLD2LNq32_UPD, true, true, true, EvenDblSpc, 2, 2 ,true}, -{ ARM::VLD2d16Pseudo, ARM::VLD2d16, true, false, false, SingleSpc, 2, 4 ,false}, -{ ARM::VLD2d16PseudoWB_fixed, ARM::VLD2d16wb_fixed, true, true, false, SingleSpc, 2, 4 ,false}, -{ ARM::VLD2d16PseudoWB_register, ARM::VLD2d16wb_register, true, true, true, SingleSpc, 2, 4 ,false}, -{ ARM::VLD2d32Pseudo, ARM::VLD2d32, true, false, false, SingleSpc, 2, 2 ,false}, -{ ARM::VLD2d32PseudoWB_fixed, ARM::VLD2d32wb_fixed, true, true, false, SingleSpc, 2, 2 ,false}, -{ ARM::VLD2d32PseudoWB_register, ARM::VLD2d32wb_register, true, true, true, SingleSpc, 2, 2 ,false}, -{ ARM::VLD2d8Pseudo, ARM::VLD2d8, true, false, false, SingleSpc, 2, 8 ,false}, -{ ARM::VLD2d8PseudoWB_fixed, ARM::VLD2d8wb_fixed, true, true, false, SingleSpc, 2, 8 ,false}, -{ ARM::VLD2d8PseudoWB_register, ARM::VLD2d8wb_register, true, true, true, SingleSpc, 2, 8 ,false}, - { ARM::VLD2q16Pseudo, ARM::VLD2q16, true, false, false, SingleSpc, 4, 4 ,false}, { ARM::VLD2q16PseudoWB_fixed, ARM::VLD2q16wb_fixed, true, true, false, SingleSpc, 4, 4 ,false}, { ARM::VLD2q16PseudoWB_register, ARM::VLD2q16wb_register, true, true, true, SingleSpc, 4, 4 ,false}, @@ -286,19 +264,6 @@ static const NEONLdStTableEntry NEONLdStTable[] = { { ARM::VST1d64TPseudoWB_fixed, ARM::VST1d64Twb_fixed, false, true, false, SingleSpc, 3, 1 ,false}, { ARM::VST1d64TPseudoWB_register, ARM::VST1d64Twb_register, false, true, true, SingleSpc, 3, 1 ,false}, -{ ARM::VST1q16Pseudo, ARM::VST1q16, false, false, false, SingleSpc, 2, 4 ,false}, -{ ARM::VST1q16PseudoWB_fixed, ARM::VST1q16wb_fixed, false, true, false, SingleSpc, 2, 4 ,false}, -{ ARM::VST1q16PseudoWB_register, ARM::VST1q16wb_register, false, true, true, SingleSpc, 2, 4 ,false}, -{ ARM::VST1q32Pseudo, ARM::VST1q32, false, false, false, SingleSpc, 2, 2 ,false}, -{ ARM::VST1q32PseudoWB_fixed, ARM::VST1q32wb_fixed, false, true, false, SingleSpc, 2, 2 ,false}, -{ ARM::VST1q32PseudoWB_register, ARM::VST1q32wb_register, false, true, true, SingleSpc, 2, 2 ,false}, -{ ARM::VST1q64Pseudo, ARM::VST1q64, false, false, false, SingleSpc, 2, 1 ,false}, -{ ARM::VST1q64PseudoWB_fixed, ARM::VST1q64wb_fixed, false, true, false, SingleSpc, 2, 1 ,false}, -{ ARM::VST1q64PseudoWB_register, ARM::VST1q64wb_register, false, true, true, SingleSpc, 2, 1 ,false}, -{ ARM::VST1q8Pseudo, ARM::VST1q8, false, false, false, SingleSpc, 2, 8 ,false}, -{ ARM::VST1q8PseudoWB_fixed, ARM::VST1q8wb_fixed, false, true, false, SingleSpc, 2, 8 ,false}, -{ ARM::VST1q8PseudoWB_register, ARM::VST1q8wb_register, false, true, true, SingleSpc, 2, 8 ,false}, - { ARM::VST2LNd16Pseudo, ARM::VST2LNd16, false, false, false, SingleSpc, 2, 4 ,true}, { ARM::VST2LNd16Pseudo_UPD, ARM::VST2LNd16_UPD, false, true, true, SingleSpc, 2, 4 ,true}, { ARM::VST2LNd32Pseudo, ARM::VST2LNd32, false, false, false, SingleSpc, 2, 2 ,true}, @@ -310,16 +275,6 @@ static const NEONLdStTableEntry NEONLdStTable[] = { { ARM::VST2LNq32Pseudo, ARM::VST2LNq32, false, false, false, EvenDblSpc, 2, 2,true}, { ARM::VST2LNq32Pseudo_UPD, ARM::VST2LNq32_UPD, false, true, true, EvenDblSpc, 2, 2,true}, -{ ARM::VST2d16Pseudo, ARM::VST2d16, false, false, false, SingleSpc, 2, 4 ,false}, -{ ARM::VST2d16PseudoWB_fixed, ARM::VST2d16wb_fixed, false, true, false, SingleSpc, 2, 4 ,false}, -{ ARM::VST2d16PseudoWB_register, ARM::VST2d16wb_register, false, true, true, SingleSpc, 2, 4 ,false}, -{ ARM::VST2d32Pseudo, ARM::VST2d32, false, false, false, SingleSpc, 2, 2 ,false}, -{ ARM::VST2d32PseudoWB_fixed, ARM::VST2d32wb_fixed, false, true, false, SingleSpc, 2, 2 ,false}, -{ ARM::VST2d32PseudoWB_register, ARM::VST2d32wb_register, false, true, true, SingleSpc, 2, 2 ,false}, -{ ARM::VST2d8Pseudo, ARM::VST2d8, false, false, false, SingleSpc, 2, 8 ,false}, -{ ARM::VST2d8PseudoWB_fixed, ARM::VST2d8wb_fixed, false, true, false, SingleSpc, 2, 8 ,false}, -{ ARM::VST2d8PseudoWB_register, ARM::VST2d8wb_register, false, true, true, SingleSpc, 2, 8 ,false}, - { ARM::VST2q16Pseudo, ARM::VST2q16, false, false, false, SingleSpc, 4, 4 ,false}, { ARM::VST2q16PseudoWB_fixed, ARM::VST2q16wb_fixed, false, true, false, SingleSpc, 4, 4 ,false}, { ARM::VST2q16PseudoWB_register, ARM::VST2q16wb_register, false, true, true, SingleSpc, 4, 4 ,false}, @@ -1094,33 +1049,12 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB, return true; } - case ARM::VLD1q8Pseudo: - case ARM::VLD1q16Pseudo: - case ARM::VLD1q32Pseudo: - case ARM::VLD1q64Pseudo: - case ARM::VLD1q8PseudoWB_register: - case ARM::VLD1q16PseudoWB_register: - case ARM::VLD1q32PseudoWB_register: - case ARM::VLD1q64PseudoWB_register: - case ARM::VLD1q8PseudoWB_fixed: - case ARM::VLD1q16PseudoWB_fixed: - case ARM::VLD1q32PseudoWB_fixed: - case ARM::VLD1q64PseudoWB_fixed: - case ARM::VLD2d8Pseudo: - case ARM::VLD2d16Pseudo: - case ARM::VLD2d32Pseudo: case ARM::VLD2q8Pseudo: case ARM::VLD2q16Pseudo: case ARM::VLD2q32Pseudo: - case ARM::VLD2d8PseudoWB_fixed: - case ARM::VLD2d16PseudoWB_fixed: - case ARM::VLD2d32PseudoWB_fixed: case ARM::VLD2q8PseudoWB_fixed: case ARM::VLD2q16PseudoWB_fixed: case ARM::VLD2q32PseudoWB_fixed: - case ARM::VLD2d8PseudoWB_register: - case ARM::VLD2d16PseudoWB_register: - case ARM::VLD2d32PseudoWB_register: case ARM::VLD2q8PseudoWB_register: case ARM::VLD2q16PseudoWB_register: case ARM::VLD2q32PseudoWB_register: @@ -1189,33 +1123,12 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB, ExpandVLD(MBBI); return true; - case ARM::VST1q8Pseudo: - case ARM::VST1q16Pseudo: - case ARM::VST1q32Pseudo: - case ARM::VST1q64Pseudo: - case ARM::VST1q8PseudoWB_fixed: - case ARM::VST1q16PseudoWB_fixed: - case ARM::VST1q32PseudoWB_fixed: - case ARM::VST1q64PseudoWB_fixed: - case ARM::VST1q8PseudoWB_register: - case ARM::VST1q16PseudoWB_register: - case ARM::VST1q32PseudoWB_register: - case ARM::VST1q64PseudoWB_register: - case ARM::VST2d8Pseudo: - case ARM::VST2d16Pseudo: - case ARM::VST2d32Pseudo: case ARM::VST2q8Pseudo: case ARM::VST2q16Pseudo: case ARM::VST2q32Pseudo: - case ARM::VST2d8PseudoWB_fixed: - case ARM::VST2d16PseudoWB_fixed: - case ARM::VST2d32PseudoWB_fixed: case ARM::VST2q8PseudoWB_fixed: case ARM::VST2q16PseudoWB_fixed: case ARM::VST2q32PseudoWB_fixed: - case ARM::VST2d8PseudoWB_register: - case ARM::VST2d16PseudoWB_register: - case ARM::VST2d32PseudoWB_register: case ARM::VST2q8PseudoWB_register: case ARM::VST2q16PseudoWB_register: case ARM::VST2q32PseudoWB_register: @@ -1333,10 +1246,8 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB, ExpandLaneOp(MBBI); return true; - case ARM::VTBL2Pseudo: ExpandVTBL(MBBI, ARM::VTBL2, false); return true; case ARM::VTBL3Pseudo: ExpandVTBL(MBBI, ARM::VTBL3, false); return true; case ARM::VTBL4Pseudo: ExpandVTBL(MBBI, ARM::VTBL4, false); return true; - case ARM::VTBX2Pseudo: ExpandVTBL(MBBI, ARM::VTBX2, true); return true; case ARM::VTBX3Pseudo: ExpandVTBL(MBBI, ARM::VTBX3, true); return true; case ARM::VTBX4Pseudo: ExpandVTBL(MBBI, ARM::VTBX4, true); return true; } diff --git a/lib/Target/ARM/ARMFrameLowering.cpp b/lib/Target/ARM/ARMFrameLowering.cpp index 6351f4371a1..0fd60254dfa 100644 --- a/lib/Target/ARM/ARMFrameLowering.cpp +++ b/lib/Target/ARM/ARMFrameLowering.cpp @@ -830,8 +830,7 @@ static void emitAlignedDPRCS2Spills(MachineBasicBlock &MBB, ARM::QPRRegisterClass); MBB.addLiveIn(SupReg); AddDefaultPred(BuildMI(MBB, MI, DL, TII.get(ARM::VST1q64)) - .addReg(ARM::R4).addImm(16).addReg(NextReg) - .addReg(SupReg, RegState::ImplicitKill)); + .addReg(ARM::R4).addImm(16).addReg(SupReg)); NextReg += 2; NumAlignedDPRCS2Regs -= 2; } @@ -944,9 +943,8 @@ static void emitAlignedDPRCS2Restores(MachineBasicBlock &MBB, if (NumAlignedDPRCS2Regs >= 2) { unsigned SupReg = TRI->getMatchingSuperReg(NextReg, ARM::dsub_0, ARM::QPRRegisterClass); - AddDefaultPred(BuildMI(MBB, MI, DL, TII.get(ARM::VLD1q64), NextReg) - .addReg(ARM::R4).addImm(16) - .addReg(SupReg, RegState::ImplicitDefine)); + AddDefaultPred(BuildMI(MBB, MI, DL, TII.get(ARM::VLD1q64), SupReg) + .addReg(ARM::R4).addImm(16)); NextReg += 2; NumAlignedDPRCS2Regs -= 2; } diff --git a/lib/Target/ARM/ARMISelDAGToDAG.cpp b/lib/Target/ARM/ARMISelDAGToDAG.cpp index d3cce5899b8..c99db98fbcc 100644 --- a/lib/Target/ARM/ARMISelDAGToDAG.cpp +++ b/lib/Target/ARM/ARMISelDAGToDAG.cpp @@ -1563,10 +1563,6 @@ static unsigned getVLDSTRegisterUpdateOpcode(unsigned Opc) { case ARM::VLD1q16wb_fixed: return ARM::VLD1q16wb_register; case ARM::VLD1q32wb_fixed: return ARM::VLD1q32wb_register; case ARM::VLD1q64wb_fixed: return ARM::VLD1q64wb_register; - case ARM::VLD1q8PseudoWB_fixed: return ARM::VLD1q8PseudoWB_register; - case ARM::VLD1q16PseudoWB_fixed: return ARM::VLD1q16PseudoWB_register; - case ARM::VLD1q32PseudoWB_fixed: return ARM::VLD1q32PseudoWB_register; - case ARM::VLD1q64PseudoWB_fixed: return ARM::VLD1q64PseudoWB_register; case ARM::VST1d8wb_fixed: return ARM::VST1d8wb_register; case ARM::VST1d16wb_fixed: return ARM::VST1d16wb_register; @@ -1576,23 +1572,19 @@ static unsigned getVLDSTRegisterUpdateOpcode(unsigned Opc) { case ARM::VST1q16wb_fixed: return ARM::VST1q16wb_register; case ARM::VST1q32wb_fixed: return ARM::VST1q32wb_register; case ARM::VST1q64wb_fixed: return ARM::VST1q64wb_register; - case ARM::VST1q8PseudoWB_fixed: return ARM::VST1q8PseudoWB_register; - case ARM::VST1q16PseudoWB_fixed: return ARM::VST1q16PseudoWB_register; - case ARM::VST1q32PseudoWB_fixed: return ARM::VST1q32PseudoWB_register; - case ARM::VST1q64PseudoWB_fixed: return ARM::VST1q64PseudoWB_register; case ARM::VST1d64TPseudoWB_fixed: return ARM::VST1d64TPseudoWB_register; case ARM::VST1d64QPseudoWB_fixed: return ARM::VST1d64QPseudoWB_register; - case ARM::VLD2d8PseudoWB_fixed: return ARM::VLD2d8PseudoWB_register; - case ARM::VLD2d16PseudoWB_fixed: return ARM::VLD2d16PseudoWB_register; - case ARM::VLD2d32PseudoWB_fixed: return ARM::VLD2d32PseudoWB_register; + case ARM::VLD2d8wb_fixed: return ARM::VLD2d8wb_register; + case ARM::VLD2d16wb_fixed: return ARM::VLD2d16wb_register; + case ARM::VLD2d32wb_fixed: return ARM::VLD2d32wb_register; case ARM::VLD2q8PseudoWB_fixed: return ARM::VLD2q8PseudoWB_register; case ARM::VLD2q16PseudoWB_fixed: return ARM::VLD2q16PseudoWB_register; case ARM::VLD2q32PseudoWB_fixed: return ARM::VLD2q32PseudoWB_register; - case ARM::VST2d8PseudoWB_fixed: return ARM::VST2d8PseudoWB_register; - case ARM::VST2d16PseudoWB_fixed: return ARM::VST2d16PseudoWB_register; - case ARM::VST2d32PseudoWB_fixed: return ARM::VST2d32PseudoWB_register; + case ARM::VST2d8wb_fixed: return ARM::VST2d8wb_register; + case ARM::VST2d16wb_fixed: return ARM::VST2d16wb_register; + case ARM::VST2d32wb_fixed: return ARM::VST2d32wb_register; case ARM::VST2q8PseudoWB_fixed: return ARM::VST2q8PseudoWB_register; case ARM::VST2q16PseudoWB_fixed: return ARM::VST2q16PseudoWB_register; case ARM::VST2q32PseudoWB_fixed: return ARM::VST2q32PseudoWB_register; @@ -1673,7 +1665,7 @@ SDNode *ARMDAGToDAGISel::SelectVLD(SDNode *N, bool isUpdating, unsigned NumVecs, Opc = getVLDSTRegisterUpdateOpcode(Opc); // We use a VLD1 for v1i64 even if the pseudo says vld2/3/4, so // check for that explicitly too. Horribly hacky, but temporary. - if ((NumVecs != 1 && NumVecs != 2 && Opc != ARM::VLD1q64PseudoWB_fixed) || + if ((NumVecs != 1 && NumVecs != 2 && Opc != ARM::VLD1q64wb_fixed) || !isa(Inc.getNode())) Ops.push_back(isa(Inc.getNode()) ? Reg0 : Inc); } @@ -1823,7 +1815,7 @@ SDNode *ARMDAGToDAGISel::SelectVST(SDNode *N, bool isUpdating, unsigned NumVecs, Opc = getVLDSTRegisterUpdateOpcode(Opc); // We use a VST1 for v1i64 even if the pseudo says vld2/3/4, so // check for that explicitly too. Horribly hacky, but temporary. - if ((NumVecs > 2 && Opc != ARM::VST1q64PseudoWB_fixed) || + if ((NumVecs > 2 && Opc != ARM::VST1q64wb_fixed) || !isa(Inc.getNode())) Ops.push_back(isa(Inc.getNode()) ? Reg0 : Inc); } @@ -2938,18 +2930,18 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) { case ARMISD::VLD1_UPD: { unsigned DOpcodes[] = { ARM::VLD1d8wb_fixed, ARM::VLD1d16wb_fixed, ARM::VLD1d32wb_fixed, ARM::VLD1d64wb_fixed }; - unsigned QOpcodes[] = { ARM::VLD1q8PseudoWB_fixed, - ARM::VLD1q16PseudoWB_fixed, - ARM::VLD1q32PseudoWB_fixed, - ARM::VLD1q64PseudoWB_fixed }; + unsigned QOpcodes[] = { ARM::VLD1q8wb_fixed, + ARM::VLD1q16wb_fixed, + ARM::VLD1q32wb_fixed, + ARM::VLD1q64wb_fixed }; return SelectVLD(N, true, 1, DOpcodes, QOpcodes, 0); } case ARMISD::VLD2_UPD: { - unsigned DOpcodes[] = { ARM::VLD2d8PseudoWB_fixed, - ARM::VLD2d16PseudoWB_fixed, - ARM::VLD2d32PseudoWB_fixed, - ARM::VLD1q64PseudoWB_fixed}; + unsigned DOpcodes[] = { ARM::VLD2d8wb_fixed, + ARM::VLD2d16wb_fixed, + ARM::VLD2d32wb_fixed, + ARM::VLD1q64wb_fixed}; unsigned QOpcodes[] = { ARM::VLD2q8PseudoWB_fixed, ARM::VLD2q16PseudoWB_fixed, ARM::VLD2q32PseudoWB_fixed }; @@ -2958,7 +2950,7 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) { case ARMISD::VLD3_UPD: { unsigned DOpcodes[] = { ARM::VLD3d8Pseudo_UPD, ARM::VLD3d16Pseudo_UPD, - ARM::VLD3d32Pseudo_UPD, ARM::VLD1q64PseudoWB_fixed}; + ARM::VLD3d32Pseudo_UPD, ARM::VLD1q64wb_fixed}; unsigned QOpcodes0[] = { ARM::VLD3q8Pseudo_UPD, ARM::VLD3q16Pseudo_UPD, ARM::VLD3q32Pseudo_UPD }; @@ -2970,7 +2962,7 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) { case ARMISD::VLD4_UPD: { unsigned DOpcodes[] = { ARM::VLD4d8Pseudo_UPD, ARM::VLD4d16Pseudo_UPD, - ARM::VLD4d32Pseudo_UPD, ARM::VLD1q64PseudoWB_fixed}; + ARM::VLD4d32Pseudo_UPD, ARM::VLD1q64wb_fixed}; unsigned QOpcodes0[] = { ARM::VLD4q8Pseudo_UPD, ARM::VLD4q16Pseudo_UPD, ARM::VLD4q32Pseudo_UPD }; @@ -3007,18 +2999,18 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) { case ARMISD::VST1_UPD: { unsigned DOpcodes[] = { ARM::VST1d8wb_fixed, ARM::VST1d16wb_fixed, ARM::VST1d32wb_fixed, ARM::VST1d64wb_fixed }; - unsigned QOpcodes[] = { ARM::VST1q8PseudoWB_fixed, - ARM::VST1q16PseudoWB_fixed, - ARM::VST1q32PseudoWB_fixed, - ARM::VST1q64PseudoWB_fixed }; + unsigned QOpcodes[] = { ARM::VST1q8wb_fixed, + ARM::VST1q16wb_fixed, + ARM::VST1q32wb_fixed, + ARM::VST1q64wb_fixed }; return SelectVST(N, true, 1, DOpcodes, QOpcodes, 0); } case ARMISD::VST2_UPD: { - unsigned DOpcodes[] = { ARM::VST2d8PseudoWB_fixed, - ARM::VST2d16PseudoWB_fixed, - ARM::VST2d32PseudoWB_fixed, - ARM::VST1q64PseudoWB_fixed}; + unsigned DOpcodes[] = { ARM::VST2d8wb_fixed, + ARM::VST2d16wb_fixed, + ARM::VST2d32wb_fixed, + ARM::VST1q64wb_fixed}; unsigned QOpcodes[] = { ARM::VST2q8PseudoWB_fixed, ARM::VST2q16PseudoWB_fixed, ARM::VST2q32PseudoWB_fixed }; @@ -3188,14 +3180,14 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) { case Intrinsic::arm_neon_vld1: { unsigned DOpcodes[] = { ARM::VLD1d8, ARM::VLD1d16, ARM::VLD1d32, ARM::VLD1d64 }; - unsigned QOpcodes[] = { ARM::VLD1q8Pseudo, ARM::VLD1q16Pseudo, - ARM::VLD1q32Pseudo, ARM::VLD1q64Pseudo }; + unsigned QOpcodes[] = { ARM::VLD1q8, ARM::VLD1q16, + ARM::VLD1q32, ARM::VLD1q64}; return SelectVLD(N, false, 1, DOpcodes, QOpcodes, 0); } case Intrinsic::arm_neon_vld2: { - unsigned DOpcodes[] = { ARM::VLD2d8Pseudo, ARM::VLD2d16Pseudo, - ARM::VLD2d32Pseudo, ARM::VLD1q64Pseudo }; + unsigned DOpcodes[] = { ARM::VLD2d8, ARM::VLD2d16, + ARM::VLD2d32, ARM::VLD1q64 }; unsigned QOpcodes[] = { ARM::VLD2q8Pseudo, ARM::VLD2q16Pseudo, ARM::VLD2q32Pseudo }; return SelectVLD(N, false, 2, DOpcodes, QOpcodes, 0); @@ -3249,14 +3241,14 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) { case Intrinsic::arm_neon_vst1: { unsigned DOpcodes[] = { ARM::VST1d8, ARM::VST1d16, ARM::VST1d32, ARM::VST1d64 }; - unsigned QOpcodes[] = { ARM::VST1q8Pseudo, ARM::VST1q16Pseudo, - ARM::VST1q32Pseudo, ARM::VST1q64Pseudo }; + unsigned QOpcodes[] = { ARM::VST1q8, ARM::VST1q16, + ARM::VST1q32, ARM::VST1q64 }; return SelectVST(N, false, 1, DOpcodes, QOpcodes, 0); } case Intrinsic::arm_neon_vst2: { - unsigned DOpcodes[] = { ARM::VST2d8Pseudo, ARM::VST2d16Pseudo, - ARM::VST2d32Pseudo, ARM::VST1q64Pseudo }; + unsigned DOpcodes[] = { ARM::VST2d8, ARM::VST2d16, + ARM::VST2d32, ARM::VST1q64 }; unsigned QOpcodes[] = { ARM::VST2q8Pseudo, ARM::VST2q16Pseudo, ARM::VST2q32Pseudo }; return SelectVST(N, false, 2, DOpcodes, QOpcodes, 0); @@ -3317,14 +3309,14 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) { break; case Intrinsic::arm_neon_vtbl2: - return SelectVTBL(N, false, 2, ARM::VTBL2Pseudo); + return SelectVTBL(N, false, 2, ARM::VTBL2); case Intrinsic::arm_neon_vtbl3: return SelectVTBL(N, false, 3, ARM::VTBL3Pseudo); case Intrinsic::arm_neon_vtbl4: return SelectVTBL(N, false, 4, ARM::VTBL4Pseudo); case Intrinsic::arm_neon_vtbx2: - return SelectVTBL(N, true, 2, ARM::VTBX2Pseudo); + return SelectVTBL(N, true, 2, ARM::VTBX2); case Intrinsic::arm_neon_vtbx3: return SelectVTBL(N, true, 3, ARM::VTBX3Pseudo); case Intrinsic::arm_neon_vtbx4: @@ -3358,7 +3350,7 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) { Ops.push_back(N->getOperand(2)); Ops.push_back(getAL(CurDAG)); // Predicate Ops.push_back(CurDAG->getRegister(0, MVT::i32)); // Predicate Register - return CurDAG->getMachineNode(ARM::VTBL2Pseudo, dl, VT, + return CurDAG->getMachineNode(ARM::VTBL2, dl, VT, Ops.data(), Ops.size()); } diff --git a/lib/Target/ARM/ARMInstrNEON.td b/lib/Target/ARM/ARMInstrNEON.td index 17fe80851c0..b0ab9941f96 100644 --- a/lib/Target/ARM/ARMInstrNEON.td +++ b/lib/Target/ARM/ARMInstrNEON.td @@ -97,6 +97,15 @@ def VecListTwoDAsmOperand : AsmOperandClass { def VecListTwoD : RegisterOperand { let ParserMatchClass = VecListTwoDAsmOperand; } +// FIXME: Replace all VecListTwoD with VecListDPair +def VecListDPairAsmOperand : AsmOperandClass { + let Name = "VecListDPair"; + let ParserMethod = "parseVectorList"; + let RenderMethod = "addVecListOperands"; +} +def VecListDPair : RegisterOperand { + let ParserMatchClass = VecListDPairAsmOperand; +} // Register list of three sequential D registers. def VecListThreeDAsmOperand : AsmOperandClass { let Name = "VecListThreeD"; @@ -593,7 +602,7 @@ class VLD1D op7_4, string Dt> let DecoderMethod = "DecodeVLDInstruction"; } class VLD1Q op7_4, string Dt> - : NLdSt<0,0b10,0b1010,op7_4, (outs VecListTwoD:$Vd), + : NLdSt<0,0b10,0b1010,op7_4, (outs VecListDPair:$Vd), (ins addrmode6:$Rn), IIC_VLD1x2, "vld1", Dt, "$Vd, $Rn", "", []> { let Rm = 0b1111; @@ -611,11 +620,6 @@ def VLD1q16 : VLD1Q<{0,1,?,?}, "16">; def VLD1q32 : VLD1Q<{1,0,?,?}, "32">; def VLD1q64 : VLD1Q<{1,1,?,?}, "64">; -def VLD1q8Pseudo : VLDQPseudo; -def VLD1q16Pseudo : VLDQPseudo; -def VLD1q32Pseudo : VLDQPseudo; -def VLD1q64Pseudo : VLDQPseudo; - // ...with address register writeback: multiclass VLD1DWB op7_4, string Dt> { def _fixed : NLdSt<0,0b10, 0b0111,op7_4, (outs VecListOneD:$Vd, GPR:$wb), @@ -637,7 +641,7 @@ multiclass VLD1DWB op7_4, string Dt> { } } multiclass VLD1QWB op7_4, string Dt> { - def _fixed : NLdSt<0,0b10,0b1010,op7_4, (outs VecListTwoD:$Vd, GPR:$wb), + def _fixed : NLdSt<0,0b10,0b1010,op7_4, (outs VecListDPair:$Vd, GPR:$wb), (ins addrmode6:$Rn), IIC_VLD1x2u, "vld1", Dt, "$Vd, $Rn!", "$Rn.addr = $wb", []> { @@ -646,7 +650,7 @@ multiclass VLD1QWB op7_4, string Dt> { let DecoderMethod = "DecodeVLDInstruction"; let AsmMatchConverter = "cvtVLDwbFixed"; } - def _register : NLdSt<0,0b10,0b1010,op7_4, (outs VecListTwoD:$Vd, GPR:$wb), + def _register : NLdSt<0,0b10,0b1010,op7_4, (outs VecListDPair:$Vd, GPR:$wb), (ins addrmode6:$Rn, rGPR:$Rm), IIC_VLD1x2u, "vld1", Dt, "$Vd, $Rn, $Rm", "$Rn.addr = $wb", []> { @@ -665,15 +669,6 @@ defm VLD1q16wb : VLD1QWB<{0,1,?,?}, "16">; defm VLD1q32wb : VLD1QWB<{1,0,?,?}, "32">; defm VLD1q64wb : VLD1QWB<{1,1,?,?}, "64">; -def VLD1q8PseudoWB_fixed : VLDQWBfixedPseudo; -def VLD1q16PseudoWB_fixed : VLDQWBfixedPseudo; -def VLD1q32PseudoWB_fixed : VLDQWBfixedPseudo; -def VLD1q64PseudoWB_fixed : VLDQWBfixedPseudo; -def VLD1q8PseudoWB_register : VLDQWBregisterPseudo; -def VLD1q16PseudoWB_register : VLDQWBregisterPseudo; -def VLD1q32PseudoWB_register : VLDQWBregisterPseudo; -def VLD1q64PseudoWB_register : VLDQWBregisterPseudo; - // ...with 3 registers class VLD1D3 op7_4, string Dt> : NLdSt<0,0b10,0b0110,op7_4, (outs VecListThreeD:$Vd), @@ -767,18 +762,14 @@ class VLD2 op11_8, bits<4> op7_4, string Dt, RegisterOperand VdTy, let DecoderMethod = "DecodeVLDInstruction"; } -def VLD2d8 : VLD2<0b1000, {0,0,?,?}, "8", VecListTwoD, IIC_VLD2>; -def VLD2d16 : VLD2<0b1000, {0,1,?,?}, "16", VecListTwoD, IIC_VLD2>; -def VLD2d32 : VLD2<0b1000, {1,0,?,?}, "32", VecListTwoD, IIC_VLD2>; +def VLD2d8 : VLD2<0b1000, {0,0,?,?}, "8", VecListDPair, IIC_VLD2>; +def VLD2d16 : VLD2<0b1000, {0,1,?,?}, "16", VecListDPair, IIC_VLD2>; +def VLD2d32 : VLD2<0b1000, {1,0,?,?}, "32", VecListDPair, IIC_VLD2>; def VLD2q8 : VLD2<0b0011, {0,0,?,?}, "8", VecListFourD, IIC_VLD2x2>; def VLD2q16 : VLD2<0b0011, {0,1,?,?}, "16", VecListFourD, IIC_VLD2x2>; def VLD2q32 : VLD2<0b0011, {1,0,?,?}, "32", VecListFourD, IIC_VLD2x2>; -def VLD2d8Pseudo : VLDQPseudo; -def VLD2d16Pseudo : VLDQPseudo; -def VLD2d32Pseudo : VLDQPseudo; - def VLD2q8Pseudo : VLDQQPseudo; def VLD2q16Pseudo : VLDQQPseudo; def VLD2q32Pseudo : VLDQQPseudo; @@ -805,21 +796,14 @@ multiclass VLD2WB op11_8, bits<4> op7_4, string Dt, } } -defm VLD2d8wb : VLD2WB<0b1000, {0,0,?,?}, "8", VecListTwoD, IIC_VLD2u>; -defm VLD2d16wb : VLD2WB<0b1000, {0,1,?,?}, "16", VecListTwoD, IIC_VLD2u>; -defm VLD2d32wb : VLD2WB<0b1000, {1,0,?,?}, "32", VecListTwoD, IIC_VLD2u>; +defm VLD2d8wb : VLD2WB<0b1000, {0,0,?,?}, "8", VecListDPair, IIC_VLD2u>; +defm VLD2d16wb : VLD2WB<0b1000, {0,1,?,?}, "16", VecListDPair, IIC_VLD2u>; +defm VLD2d32wb : VLD2WB<0b1000, {1,0,?,?}, "32", VecListDPair, IIC_VLD2u>; defm VLD2q8wb : VLD2WB<0b0011, {0,0,?,?}, "8", VecListFourD, IIC_VLD2x2u>; defm VLD2q16wb : VLD2WB<0b0011, {0,1,?,?}, "16", VecListFourD, IIC_VLD2x2u>; defm VLD2q32wb : VLD2WB<0b0011, {1,0,?,?}, "32", VecListFourD, IIC_VLD2x2u>; -def VLD2d8PseudoWB_fixed : VLDQWBfixedPseudo; -def VLD2d16PseudoWB_fixed : VLDQWBfixedPseudo; -def VLD2d32PseudoWB_fixed : VLDQWBfixedPseudo; -def VLD2d8PseudoWB_register : VLDQWBregisterPseudo; -def VLD2d16PseudoWB_register : VLDQWBregisterPseudo; -def VLD2d32PseudoWB_register : VLDQWBregisterPseudo; - def VLD2q8PseudoWB_fixed : VLDQQWBfixedPseudo; def VLD2q16PseudoWB_fixed : VLDQQWBfixedPseudo; def VLD2q32PseudoWB_fixed : VLDQQWBfixedPseudo; @@ -1597,7 +1581,7 @@ class VST1D op7_4, string Dt> let DecoderMethod = "DecodeVSTInstruction"; } class VST1Q op7_4, string Dt> - : NLdSt<0,0b00,0b1010,op7_4, (outs), (ins addrmode6:$Rn, VecListTwoD:$Vd), + : NLdSt<0,0b00,0b1010,op7_4, (outs), (ins addrmode6:$Rn, VecListDPair:$Vd), IIC_VST1x2, "vst1", Dt, "$Vd, $Rn", "", []> { let Rm = 0b1111; let Inst{5-4} = Rn{5-4}; @@ -1614,11 +1598,6 @@ def VST1q16 : VST1Q<{0,1,?,?}, "16">; def VST1q32 : VST1Q<{1,0,?,?}, "32">; def VST1q64 : VST1Q<{1,1,?,?}, "64">; -def VST1q8Pseudo : VSTQPseudo; -def VST1q16Pseudo : VSTQPseudo; -def VST1q32Pseudo : VSTQPseudo; -def VST1q64Pseudo : VSTQPseudo; - // ...with address register writeback: multiclass VST1DWB op7_4, string Dt> { def _fixed : NLdSt<0,0b00, 0b0111,op7_4, (outs GPR:$wb), @@ -1642,7 +1621,7 @@ multiclass VST1DWB op7_4, string Dt> { } multiclass VST1QWB op7_4, string Dt> { def _fixed : NLdSt<0,0b00,0b1010,op7_4, (outs GPR:$wb), - (ins addrmode6:$Rn, VecListTwoD:$Vd), IIC_VLD1x2u, + (ins addrmode6:$Rn, VecListDPair:$Vd), IIC_VLD1x2u, "vst1", Dt, "$Vd, $Rn!", "$Rn.addr = $wb", []> { let Rm = 0b1101; // NLdSt will assign to the right encoding bits. @@ -1651,7 +1630,7 @@ multiclass VST1QWB op7_4, string Dt> { let AsmMatchConverter = "cvtVSTwbFixed"; } def _register : NLdSt<0,0b00,0b1010,op7_4, (outs GPR:$wb), - (ins addrmode6:$Rn, rGPR:$Rm, VecListTwoD:$Vd), + (ins addrmode6:$Rn, rGPR:$Rm, VecListDPair:$Vd), IIC_VLD1x2u, "vst1", Dt, "$Vd, $Rn, $Rm", "$Rn.addr = $wb", []> { @@ -1671,15 +1650,6 @@ defm VST1q16wb : VST1QWB<{0,1,?,?}, "16">; defm VST1q32wb : VST1QWB<{1,0,?,?}, "32">; defm VST1q64wb : VST1QWB<{1,1,?,?}, "64">; -def VST1q8PseudoWB_fixed : VSTQWBfixedPseudo; -def VST1q16PseudoWB_fixed : VSTQWBfixedPseudo; -def VST1q32PseudoWB_fixed : VSTQWBfixedPseudo; -def VST1q64PseudoWB_fixed : VSTQWBfixedPseudo; -def VST1q8PseudoWB_register : VSTQWBregisterPseudo; -def VST1q16PseudoWB_register : VSTQWBregisterPseudo; -def VST1q32PseudoWB_register : VSTQWBregisterPseudo; -def VST1q64PseudoWB_register : VSTQWBregisterPseudo; - // ...with 3 registers class VST1D3 op7_4, string Dt> : NLdSt<0, 0b00, 0b0110, op7_4, (outs), @@ -1779,18 +1749,14 @@ class VST2 op11_8, bits<4> op7_4, string Dt, RegisterOperand VdTy, let DecoderMethod = "DecodeVSTInstruction"; } -def VST2d8 : VST2<0b1000, {0,0,?,?}, "8", VecListTwoD, IIC_VST2>; -def VST2d16 : VST2<0b1000, {0,1,?,?}, "16", VecListTwoD, IIC_VST2>; -def VST2d32 : VST2<0b1000, {1,0,?,?}, "32", VecListTwoD, IIC_VST2>; +def VST2d8 : VST2<0b1000, {0,0,?,?}, "8", VecListDPair, IIC_VST2>; +def VST2d16 : VST2<0b1000, {0,1,?,?}, "16", VecListDPair, IIC_VST2>; +def VST2d32 : VST2<0b1000, {1,0,?,?}, "32", VecListDPair, IIC_VST2>; def VST2q8 : VST2<0b0011, {0,0,?,?}, "8", VecListFourD, IIC_VST2x2>; def VST2q16 : VST2<0b0011, {0,1,?,?}, "16", VecListFourD, IIC_VST2x2>; def VST2q32 : VST2<0b0011, {1,0,?,?}, "32", VecListFourD, IIC_VST2x2>; -def VST2d8Pseudo : VSTQPseudo; -def VST2d16Pseudo : VSTQPseudo; -def VST2d32Pseudo : VSTQPseudo; - def VST2q8Pseudo : VSTQQPseudo; def VST2q16Pseudo : VSTQQPseudo; def VST2q32Pseudo : VSTQQPseudo; @@ -1837,21 +1803,14 @@ multiclass VST2QWB op7_4, string Dt> { } } -defm VST2d8wb : VST2DWB<0b1000, {0,0,?,?}, "8", VecListTwoD>; -defm VST2d16wb : VST2DWB<0b1000, {0,1,?,?}, "16", VecListTwoD>; -defm VST2d32wb : VST2DWB<0b1000, {1,0,?,?}, "32", VecListTwoD>; +defm VST2d8wb : VST2DWB<0b1000, {0,0,?,?}, "8", VecListDPair>; +defm VST2d16wb : VST2DWB<0b1000, {0,1,?,?}, "16", VecListDPair>; +defm VST2d32wb : VST2DWB<0b1000, {1,0,?,?}, "32", VecListDPair>; defm VST2q8wb : VST2QWB<{0,0,?,?}, "8">; defm VST2q16wb : VST2QWB<{0,1,?,?}, "16">; defm VST2q32wb : VST2QWB<{1,0,?,?}, "32">; -def VST2d8PseudoWB_fixed : VSTQWBfixedPseudo; -def VST2d16PseudoWB_fixed : VSTQWBfixedPseudo; -def VST2d32PseudoWB_fixed : VSTQWBfixedPseudo; -def VST2d8PseudoWB_register : VSTQWBregisterPseudo; -def VST2d16PseudoWB_register : VSTQWBregisterPseudo; -def VST2d32PseudoWB_register : VSTQWBregisterPseudo; - def VST2q8PseudoWB_fixed : VSTQQWBfixedPseudo; def VST2q16PseudoWB_fixed : VSTQQWBfixedPseudo; def VST2q32PseudoWB_fixed : VSTQQWBfixedPseudo; @@ -5444,7 +5403,7 @@ def VTBL1 let hasExtraSrcRegAllocReq = 1 in { def VTBL2 : N3V<1,1,0b11,0b1001,0,0, (outs DPR:$Vd), - (ins VecListTwoD:$Vn, DPR:$Vm), NVTBLFrm, IIC_VTB2, + (ins VecListDPair:$Vn, DPR:$Vm), NVTBLFrm, IIC_VTB2, "vtbl", "8", "$Vd, $Vn, $Vm", "", []>; def VTBL3 : N3V<1,1,0b11,0b1010,0,0, (outs DPR:$Vd), @@ -5457,8 +5416,6 @@ def VTBL4 "vtbl", "8", "$Vd, $Vn, $Vm", "", []>; } // hasExtraSrcRegAllocReq = 1 -def VTBL2Pseudo - : PseudoNeonI<(outs DPR:$dst), (ins QPR:$tbl, DPR:$src), IIC_VTB2, "", []>; def VTBL3Pseudo : PseudoNeonI<(outs DPR:$dst), (ins QQPR:$tbl, DPR:$src), IIC_VTB3, "", []>; def VTBL4Pseudo @@ -5474,7 +5431,7 @@ def VTBX1 let hasExtraSrcRegAllocReq = 1 in { def VTBX2 : N3V<1,1,0b11,0b1001,1,0, (outs DPR:$Vd), - (ins DPR:$orig, VecListTwoD:$Vn, DPR:$Vm), NVTBLFrm, IIC_VTBX2, + (ins DPR:$orig, VecListDPair:$Vn, DPR:$Vm), NVTBLFrm, IIC_VTBX2, "vtbx", "8", "$Vd, $Vn, $Vm", "$orig = $Vd", []>; def VTBX3 : N3V<1,1,0b11,0b1010,1,0, (outs DPR:$Vd), @@ -5489,9 +5446,6 @@ def VTBX4 "$orig = $Vd", []>; } // hasExtraSrcRegAllocReq = 1 -def VTBX2Pseudo - : PseudoNeonI<(outs DPR:$dst), (ins DPR:$orig, QPR:$tbl, DPR:$src), - IIC_VTBX2, "$orig = $dst", []>; def VTBX3Pseudo : PseudoNeonI<(outs DPR:$dst), (ins DPR:$orig, QQPR:$tbl, DPR:$src), IIC_VTBX3, "$orig = $dst", []>; diff --git a/lib/Target/ARM/ARMRegisterInfo.td b/lib/Target/ARM/ARMRegisterInfo.td index 16998b2d7b4..b16a12c08d7 100644 --- a/lib/Target/ARM/ARMRegisterInfo.td +++ b/lib/Target/ARM/ARMRegisterInfo.td @@ -304,7 +304,11 @@ def TuplesOE2D : RegisterTuples<[dsub_0, dsub_1], // Register class representing a pair of consecutive D registers. // Use the Q registers for the even-odd pairs. -def DPair : RegisterClass<"ARM", [v2i64], 128, (interleave QPR, TuplesOE2D)>; +def DPair : RegisterClass<"ARM", [v2i64], 128, (interleave QPR, TuplesOE2D)> { + // Allocate starting at non-VFP2 registers D16-D31 first. + let AltOrders = [(rotl DPair, 16)]; + let AltOrderSelect = [{ return 1; }]; +} // Pseudo-registers representing 3 consecutive D registers. def Tuples3D : RegisterTuples<[dsub_0, dsub_1, dsub_2], diff --git a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp index 2283a5a7c2a..845f0a32996 100644 --- a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp +++ b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp @@ -44,6 +44,7 @@ enum VectorLaneTy { NoLanes, AllLanes, IndexedLane }; class ARMAsmParser : public MCTargetAsmParser { MCSubtargetInfo &STI; MCAsmParser &Parser; + const MCRegisterInfo *MRI; // Map of register aliases registers via the .req directive. StringMap RegisterReqs; @@ -236,6 +237,9 @@ public: : MCTargetAsmParser(), STI(_STI), Parser(_Parser) { MCAsmParserExtension::Initialize(_Parser); + // Cache the MCRegisterInfo. + MRI = &getContext().getRegisterInfo(); + // Initialize the set of available features. setAvailableFeatures(ComputeAvailableFeatures(STI.getFeatureBits())); @@ -1086,6 +1090,12 @@ public: return VectorList.Count == 2; } + bool isVecListDPair() const { + if (!isSingleSpacedVectorList()) return false; + return (ARMMCRegisterClasses[ARM::DPairRegClassID] + .contains(VectorList.RegNum)); + } + bool isVecListThreeD() const { if (!isSingleSpacedVectorList()) return false; return VectorList.Count == 3; @@ -2969,6 +2979,12 @@ parseVectorList(SmallVectorImpl &Operands) { switch (LaneKind) { case NoLanes: E = Parser.getTok().getLoc(); + // VLD1 wants a DPair register. + // FIXME: Make the rest of the two-reg instructions want the same + // thing. + Reg = MRI->getMatchingSuperReg(Reg, ARM::dsub_0, + &ARMMCRegisterClasses[ARM::DPairRegClassID]); + Operands.push_back(ARMOperand::CreateVectorList(Reg, 2, false, S, E)); break; case AllLanes: @@ -3138,6 +3154,14 @@ parseVectorList(SmallVectorImpl &Operands) { switch (LaneKind) { case NoLanes: + if (Count == 2 && Spacing == 1) + // VLD1 wants a DPair register. + // FIXME: Make the rest of the two-reg instructions want the same + // thing. + FirstReg = MRI->getMatchingSuperReg(FirstReg, ARM::dsub_0, + &ARMMCRegisterClasses[ARM::DPairRegClassID]); + + Operands.push_back(ARMOperand::CreateVectorList(FirstReg, Count, (Spacing == 2), S, E)); break; diff --git a/lib/Target/ARM/Disassembler/ARMDisassembler.cpp b/lib/Target/ARM/Disassembler/ARMDisassembler.cpp index fc6b9a11f84..15e1c71eb58 100644 --- a/lib/Target/ARM/Disassembler/ARMDisassembler.cpp +++ b/lib/Target/ARM/Disassembler/ARMDisassembler.cpp @@ -126,6 +126,8 @@ static DecodeStatus DecodeDPR_VFP2RegisterClass(llvm::MCInst &Inst, const void *Decoder); static DecodeStatus DecodeQPRRegisterClass(llvm::MCInst &Inst, unsigned RegNo, uint64_t Address, const void *Decoder); +static DecodeStatus DecodeDPairRegisterClass(llvm::MCInst &Inst, unsigned RegNo, + uint64_t Address, const void *Decoder); static DecodeStatus DecodePredicateOperand(llvm::MCInst &Inst, unsigned Val, uint64_t Address, const void *Decoder); @@ -987,6 +989,25 @@ static DecodeStatus DecodeQPRRegisterClass(llvm::MCInst &Inst, unsigned RegNo, return MCDisassembler::Success; } +static const unsigned DPairDecoderTable[] = { + ARM::Q0, ARM::D1_D2, ARM::Q1, ARM::D3_D4, ARM::Q2, ARM::D5_D6, + ARM::Q3, ARM::D7_D8, ARM::Q4, ARM::D9_D10, ARM::Q5, ARM::D11_D12, + ARM::Q6, ARM::D13_D14, ARM::Q7, ARM::D15_D16, ARM::Q8, ARM::D17_D18, + ARM::Q9, ARM::D19_D20, ARM::Q10, ARM::D21_D22, ARM::Q11, ARM::D23_D24, + ARM::Q12, ARM::D25_D26, ARM::Q13, ARM::D27_D28, ARM::Q14, ARM::D29_D30, + ARM::Q15 +}; + +static DecodeStatus DecodeDPairRegisterClass(llvm::MCInst &Inst, unsigned RegNo, + uint64_t Address, const void *Decoder) { + if (RegNo > 30) + return MCDisassembler::Fail; + + unsigned Register = DPairDecoderTable[RegNo]; + Inst.addOperand(MCOperand::CreateReg(Register)); + return MCDisassembler::Success; +} + static DecodeStatus DecodePredicateOperand(llvm::MCInst &Inst, unsigned Val, uint64_t Address, const void *Decoder) { if (Val == 0xF) return MCDisassembler::Fail; @@ -1953,8 +1974,35 @@ static DecodeStatus DecodeVLDInstruction(llvm::MCInst &Inst, unsigned Insn, unsigned Rm = fieldFromInstruction32(Insn, 0, 4); // First output register - if (!Check(S, DecodeDPRRegisterClass(Inst, Rd, Address, Decoder))) - return MCDisassembler::Fail; + switch (Inst.getOpcode()) { + case ARM::VLD1q16: + case ARM::VLD1q32: + case ARM::VLD1q64: + case ARM::VLD1q8: + case ARM::VLD1q16wb_fixed: + case ARM::VLD1q16wb_register: + case ARM::VLD1q32wb_fixed: + case ARM::VLD1q32wb_register: + case ARM::VLD1q64wb_fixed: + case ARM::VLD1q64wb_register: + case ARM::VLD1q8wb_fixed: + case ARM::VLD1q8wb_register: + case ARM::VLD2d16: + case ARM::VLD2d32: + case ARM::VLD2d8: + case ARM::VLD2d16wb_fixed: + case ARM::VLD2d16wb_register: + case ARM::VLD2d32wb_fixed: + case ARM::VLD2d32wb_register: + case ARM::VLD2d8wb_fixed: + case ARM::VLD2d8wb_register: + if (!Check(S, DecodeDPairRegisterClass(Inst, Rd, Address, Decoder))) + return MCDisassembler::Fail; + break; + default: + if (!Check(S, DecodeDPRRegisterClass(Inst, Rd, Address, Decoder))) + return MCDisassembler::Fail; + } // Second output register switch (Inst.getOpcode()) { @@ -2285,8 +2333,35 @@ static DecodeStatus DecodeVSTInstruction(llvm::MCInst &Inst, unsigned Insn, // First input register - if (!Check(S, DecodeDPRRegisterClass(Inst, Rd, Address, Decoder))) - return MCDisassembler::Fail; + switch (Inst.getOpcode()) { + case ARM::VST1q16: + case ARM::VST1q32: + case ARM::VST1q64: + case ARM::VST1q8: + case ARM::VST1q16wb_fixed: + case ARM::VST1q16wb_register: + case ARM::VST1q32wb_fixed: + case ARM::VST1q32wb_register: + case ARM::VST1q64wb_fixed: + case ARM::VST1q64wb_register: + case ARM::VST1q8wb_fixed: + case ARM::VST1q8wb_register: + case ARM::VST2d16: + case ARM::VST2d32: + case ARM::VST2d8: + case ARM::VST2d16wb_fixed: + case ARM::VST2d16wb_register: + case ARM::VST2d32wb_fixed: + case ARM::VST2d32wb_register: + case ARM::VST2d8wb_fixed: + case ARM::VST2d8wb_register: + if (!Check(S, DecodeDPairRegisterClass(Inst, Rd, Address, Decoder))) + return MCDisassembler::Fail; + break; + default: + if (!Check(S, DecodeDPRRegisterClass(Inst, Rd, Address, Decoder))) + return MCDisassembler::Fail; + } // Second input register switch (Inst.getOpcode()) { @@ -2652,8 +2727,16 @@ static DecodeStatus DecodeTBLInstruction(llvm::MCInst &Inst, unsigned Insn, return MCDisassembler::Fail; // Writeback } - if (!Check(S, DecodeDPRRegisterClass(Inst, Rn, Address, Decoder))) - return MCDisassembler::Fail; + switch (Inst.getOpcode()) { + case ARM::VTBL2: + case ARM::VTBX2: + if (!Check(S, DecodeDPairRegisterClass(Inst, Rn, Address, Decoder))) + return MCDisassembler::Fail; + break; + default: + if (!Check(S, DecodeDPRRegisterClass(Inst, Rn, Address, Decoder))) + return MCDisassembler::Fail; + } if (!Check(S, DecodeDPRRegisterClass(Inst, Rm, Address, Decoder))) return MCDisassembler::Fail; diff --git a/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp b/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp index cfb5b92d0c0..06c1634bc0f 100644 --- a/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp +++ b/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp @@ -18,6 +18,7 @@ #include "llvm/MC/MCInst.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCExpr.h" +#include "llvm/MC/MCRegisterInfo.h" #include "llvm/Support/raw_ostream.h" using namespace llvm; @@ -1033,6 +1034,14 @@ void ARMInstPrinter::printVectorListTwo(const MCInst *MI, unsigned OpNum, << getRegisterName(MI->getOperand(OpNum).getReg() + 1) << "}"; } +void ARMInstPrinter::printVectorListDPair(const MCInst *MI, unsigned OpNum, + raw_ostream &O) { + unsigned Reg = MI->getOperand(OpNum).getReg(); + unsigned Reg0 = MRI.getSubReg(Reg, ARM::dsub_0); + unsigned Reg1 = MRI.getSubReg(Reg, ARM::dsub_1); + O << "{" << getRegisterName(Reg0) << ", " << getRegisterName(Reg1) << "}"; +} + void ARMInstPrinter::printVectorListThree(const MCInst *MI, unsigned OpNum, raw_ostream &O) { // Normally, it's not safe to use register enum values directly with diff --git a/lib/Target/ARM/InstPrinter/ARMInstPrinter.h b/lib/Target/ARM/InstPrinter/ARMInstPrinter.h index 3c8b6cf3051..5084f61c4e0 100644 --- a/lib/Target/ARM/InstPrinter/ARMInstPrinter.h +++ b/lib/Target/ARM/InstPrinter/ARMInstPrinter.h @@ -134,6 +134,7 @@ public: void printVectorIndex(const MCInst *MI, unsigned OpNum, raw_ostream &O); void printVectorListOne(const MCInst *MI, unsigned OpNum, raw_ostream &O); void printVectorListTwo(const MCInst *MI, unsigned OpNum, raw_ostream &O); + void printVectorListDPair(const MCInst *MI, unsigned OpNum, raw_ostream &O); void printVectorListThree(const MCInst *MI, unsigned OpNum, raw_ostream &O); void printVectorListFour(const MCInst *MI, unsigned OpNum, raw_ostream &O); void printVectorListOneAllLanes(const MCInst *MI, unsigned OpNum, diff --git a/lib/Target/ARM/MCTargetDesc/ARMBaseInfo.h b/lib/Target/ARM/MCTargetDesc/ARMBaseInfo.h index 9169a69af24..06eb4e5078f 100644 --- a/lib/Target/ARM/MCTargetDesc/ARMBaseInfo.h +++ b/lib/Target/ARM/MCTargetDesc/ARMBaseInfo.h @@ -185,6 +185,23 @@ inline static unsigned getARMRegisterNumbering(unsigned Reg) { case S29: case D29: return 29; case S30: case D30: return 30; case S31: case D31: return 31; + + // Composite registers use the regnum of the first register in the list. + case D1_D2: return 1; + case D3_D5: return 3; + case D5_D7: return 5; + case D7_D9: return 7; + case D9_D10: return 9; + case D11_D12: return 11; + case D13_D14: return 13; + case D15_D16: return 15; + case D17_D18: return 17; + case D19_D20: return 19; + case D21_D22: return 21; + case D23_D24: return 23; + case D25_D26: return 25; + case D27_D28: return 27; + case D29_D30: return 29; } } diff --git a/utils/TableGen/EDEmitter.cpp b/utils/TableGen/EDEmitter.cpp index e0fd566d8db..5c330b0a93c 100644 --- a/utils/TableGen/EDEmitter.cpp +++ b/utils/TableGen/EDEmitter.cpp @@ -575,6 +575,7 @@ static int ARMFlagFromOpName(LiteralConstantEmitter *type, REG("QQQQPR"); REG("VecListOneD"); REG("VecListTwoD"); + REG("VecListDPair"); REG("VecListThreeD"); REG("VecListFourD"); REG("VecListTwoQ"); -- 2.34.1