From b990a2f249196ad3e0cc451d40a45fc2f9278eaf Mon Sep 17 00:00:00 2001 From: Evan Cheng Date: Fri, 14 May 2010 23:21:14 +0000 Subject: [PATCH] Teach two-address pass to do some coalescing while eliminating REG_SEQUENCE instructions. e.g. %reg1026 = VLDMQ %reg1025, 260, pred:14, pred:%reg0 %reg1027 = EXTRACT_SUBREG %reg1026, 6 %reg1028 = EXTRACT_SUBREG %reg1026, 5 ... %reg1029 = REG_SEQUENCE %reg1028, 5, %reg1027, 6, %reg1028, 7, %reg1027, 8, %reg1028, 9, %reg1027, 10, %reg1030, 11, %reg1032, 12 After REG_SEQUENCE is eliminated, we are left with: %reg1026 = VLDMQ %reg1025, 260, pred:14, pred:%reg0 %reg1029:6 = EXTRACT_SUBREG %reg1026, 6 %reg1029:5 = EXTRACT_SUBREG %reg1026, 5 The regular coalescer will not be able to coalesce reg1026 and reg1029 because it doesn't know how to combine sub-register indices 5 and 6. Now 2-address pass will consult the target whether sub-registers 5 and 6 of reg1026 can be combined to into a larger sub-register (or combined to be reg1026 itself as is the case here). If it is possible, it will be able to replace references of reg1026 with reg1029 + the larger sub-register index. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@103835 91177308-0d34-0410-b5e6-96231b3b80d8 --- include/llvm/Target/TargetRegisterInfo.h | 12 +++ lib/CodeGen/TwoAddressInstructionPass.cpp | 49 +++++++++ lib/Target/ARM/ARMBaseRegisterInfo.cpp | 117 ++++++++++++++++++++++ lib/Target/ARM/ARMBaseRegisterInfo.h | 9 ++ lib/Target/ARM/ARMRegisterInfo.h | 3 +- lib/Target/ARM/ARMRegisterInfo.td | 11 ++ lib/Target/ARM/NEONPreAllocPass.cpp | 6 +- 7 files changed, 204 insertions(+), 3 deletions(-) diff --git a/include/llvm/Target/TargetRegisterInfo.h b/include/llvm/Target/TargetRegisterInfo.h index 29b862aa02f..aeb669d6730 100644 --- a/include/llvm/Target/TargetRegisterInfo.h +++ b/include/llvm/Target/TargetRegisterInfo.h @@ -28,6 +28,7 @@ class BitVector; class MachineFunction; class MachineMove; class RegScavenger; +template class SmallVectorImpl; /// TargetRegisterDesc - This record contains all of the information known about /// a particular register. The AliasSet field (if not null) contains a pointer @@ -479,6 +480,17 @@ public: return 0; } + /// canCombinedSubRegIndex - Given a register class and a list of sub-register + /// indices, return true if it's possible to combine the sub-register indices + /// into one that corresponds to a larger sub-register. Return the new sub- + /// register index by reference. Note the new index by be zero if the given + /// sub-registers combined to form the whole register. + virtual bool canCombinedSubRegIndex(const TargetRegisterClass *RC, + SmallVectorImpl &SubIndices, + unsigned &NewSubIdx) const { + return 0; + } + /// getMatchingSuperRegClass - Return a subclass of the specified register /// class A so that each register in it has a sub-register of the /// specified sub-register index which is in the specified register class B. diff --git a/lib/CodeGen/TwoAddressInstructionPass.cpp b/lib/CodeGen/TwoAddressInstructionPass.cpp index 9f00311fb44..80bb1a9e9d0 100644 --- a/lib/CodeGen/TwoAddressInstructionPass.cpp +++ b/lib/CodeGen/TwoAddressInstructionPass.cpp @@ -1166,6 +1166,7 @@ bool TwoAddressInstructionPass::EliminateRegSequences() { llvm_unreachable(0); } + SmallVector RealSrcs; SmallSet Seen; for (unsigned i = 1, e = MI->getNumOperands(); i < e; i += 2) { unsigned SrcReg = MI->getOperand(i).getReg(); @@ -1176,6 +1177,16 @@ bool TwoAddressInstructionPass::EliminateRegSequences() { } MachineInstr *DefMI = MRI->getVRegDef(SrcReg); + if (DefMI->isImplicitDef()) { + DefMI->eraseFromParent(); + continue; + } + + // Remember EXTRACT_SUBREG sources. These might be candidate for + // coalescing. + if (DefMI->isExtractSubreg()) + RealSrcs.push_back(DefMI->getOperand(1).getReg()); + if (!Seen.insert(SrcReg) || MI->getParent() != DefMI->getParent()) { // REG_SEQUENCE cannot have duplicated operands, add a copy. // Also add an copy if the source if live-in the block. We don't want @@ -1216,6 +1227,44 @@ bool TwoAddressInstructionPass::EliminateRegSequences() { DEBUG(dbgs() << "Eliminated: " << *MI); MI->eraseFromParent(); + + // Try coalescing some EXTRACT_SUBREG instructions. + Seen.clear(); + for (unsigned i = 0, e = RealSrcs.size(); i != e; ++i) { + unsigned SrcReg = RealSrcs[i]; + if (!Seen.insert(SrcReg)) + continue; + + // If there are no other uses than extract_subreg which feed into + // the reg_sequence, then we might be able to coalesce them. + bool CanCoalesce = true; + SmallVector SubIndices; + for (MachineRegisterInfo::use_nodbg_iterator + UI = MRI->use_nodbg_begin(SrcReg), + UE = MRI->use_nodbg_end(); UI != UE; ++UI) { + MachineInstr *UseMI = &*UI; + if (!UseMI->isExtractSubreg() || + UseMI->getOperand(0).getReg() != DstReg) { + CanCoalesce = false; + break; + } + SubIndices.push_back(UseMI->getOperand(2).getImm()); + } + + if (!CanCoalesce) + continue; + + // %reg1026 = VLDMQ %reg1025, 260, pred:14, pred:%reg0 + // %reg1029:6 = EXTRACT_SUBREG %reg1026, 6 + // %reg1029:5 = EXTRACT_SUBREG %reg1026, 5 + // Since D subregs 5, 6 can combine to a Q register, we can coalesce + // reg1026 to reg1029. + std::sort(SubIndices.begin(), SubIndices.end()); + unsigned NewSubIdx = 0; + if (TRI->canCombinedSubRegIndex(MRI->getRegClass(SrcReg), SubIndices, + NewSubIdx)) + UpdateRegSequenceSrcs(SrcReg, DstReg, NewSubIdx, MRI); + } } RegSequences.clear(); diff --git a/lib/Target/ARM/ARMBaseRegisterInfo.cpp b/lib/Target/ARM/ARMBaseRegisterInfo.cpp index 9b7dc309c52..9dcdce05e4f 100644 --- a/lib/Target/ARM/ARMBaseRegisterInfo.cpp +++ b/lib/Target/ARM/ARMBaseRegisterInfo.cpp @@ -351,6 +351,123 @@ ARMBaseRegisterInfo::getMatchingSuperRegClass(const TargetRegisterClass *A, return 0; } +bool +ARMBaseRegisterInfo::canCombinedSubRegIndex(const TargetRegisterClass *RC, + SmallVectorImpl &SubIndices, + unsigned &NewSubIdx) const { + + unsigned Size = RC->getSize() * 8; + if (Size < 6) + return 0; + + NewSubIdx = 0; // Whole register. + unsigned NumRegs = SubIndices.size(); + if (NumRegs == 8) { + // 8 D registers -> 1 QQQQ register. + return (Size == 512 && + SubIndices[0] == ARM::DSUBREG_0 && + SubIndices[1] == ARM::DSUBREG_1 && + SubIndices[2] == ARM::DSUBREG_2 && + SubIndices[3] == ARM::DSUBREG_3 && + SubIndices[4] == ARM::DSUBREG_4 && + SubIndices[5] == ARM::DSUBREG_5 && + SubIndices[6] == ARM::DSUBREG_6 && + SubIndices[7] == ARM::DSUBREG_7); + } else if (NumRegs == 4) { + if (SubIndices[0] == ARM::QSUBREG_0) { + // 4 Q registers -> 1 QQQQ register. + return (Size == 512 && + SubIndices[1] == ARM::QSUBREG_1 && + SubIndices[2] == ARM::QSUBREG_2 && + SubIndices[3] == ARM::QSUBREG_3); + } else if (SubIndices[0] == ARM::DSUBREG_0) { + // 4 D registers -> 1 QQ register. + if (Size >= 256 && + SubIndices[1] == ARM::DSUBREG_1 && + SubIndices[2] == ARM::DSUBREG_2 && + SubIndices[3] == ARM::DSUBREG_3) { + if (Size == 512) + NewSubIdx = ARM::QQSUBREG_0; + return true; + } + } else if (SubIndices[0] == ARM::DSUBREG_4) { + // 4 D registers -> 1 QQ register (2nd). + if (Size == 512 && + SubIndices[1] == ARM::DSUBREG_5 && + SubIndices[2] == ARM::DSUBREG_6 && + SubIndices[3] == ARM::DSUBREG_7) { + NewSubIdx = ARM::QQSUBREG_1; + return true; + } + } else if (SubIndices[0] == ARM::SSUBREG_0) { + // 4 S registers -> 1 Q register. + if (Size >= 128 && + SubIndices[1] == ARM::SSUBREG_1 && + SubIndices[2] == ARM::SSUBREG_2 && + SubIndices[3] == ARM::SSUBREG_3) { + if (Size >= 256) + NewSubIdx = ARM::QSUBREG_0; + return true; + } + } + } else if (NumRegs == 2) { + if (SubIndices[0] == ARM::QSUBREG_0) { + // 2 Q registers -> 1 QQ register. + if (Size >= 256 && SubIndices[1] == ARM::QSUBREG_1) { + if (Size == 512) + NewSubIdx = ARM::QQSUBREG_0; + return true; + } + } else if (SubIndices[0] == ARM::QSUBREG_2) { + // 2 Q registers -> 1 QQ register (2nd). + if (Size == 512 && SubIndices[1] == ARM::QSUBREG_3) { + NewSubIdx = ARM::QQSUBREG_1; + return true; + } + } else if (SubIndices[0] == ARM::DSUBREG_0) { + // 2 D registers -> 1 Q register. + if (Size >= 128 && SubIndices[1] == ARM::DSUBREG_1) { + if (Size >= 256) + NewSubIdx = ARM::QSUBREG_0; + return true; + } + } else if (SubIndices[0] == ARM::DSUBREG_2) { + // 2 D registers -> 1 Q register (2nd). + if (Size >= 256 && SubIndices[1] == ARM::DSUBREG_3) { + NewSubIdx = ARM::QSUBREG_1; + return true; + } + } else if (SubIndices[0] == ARM::DSUBREG_4) { + // 2 D registers -> 1 Q register (3rd). + if (Size == 512 && SubIndices[1] == ARM::DSUBREG_5) { + NewSubIdx = ARM::QSUBREG_2; + return true; + } + } else if (SubIndices[0] == ARM::DSUBREG_6) { + // 2 D registers -> 1 Q register (3rd). + if (Size == 512 && SubIndices[1] == ARM::DSUBREG_7) { + NewSubIdx = ARM::QSUBREG_3; + return true; + } + } else if (SubIndices[0] == ARM::SSUBREG_0) { + // 2 S registers -> 1 D register. + if (SubIndices[1] == ARM::SSUBREG_1) { + if (Size >= 128) + NewSubIdx = ARM::DSUBREG_0; + return true; + } + } else if (SubIndices[0] == ARM::SSUBREG_2) { + // 2 S registers -> 1 D register (2nd). + if (Size >= 128 && SubIndices[1] == ARM::SSUBREG_3) { + NewSubIdx = ARM::DSUBREG_1; + return true; + } + } + } + return false; +} + + const TargetRegisterClass * ARMBaseRegisterInfo::getPointerRegClass(unsigned Kind) const { return ARM::GPRRegisterClass; diff --git a/lib/Target/ARM/ARMBaseRegisterInfo.h b/lib/Target/ARM/ARMBaseRegisterInfo.h index 456c39237d4..2c9c82d0318 100644 --- a/lib/Target/ARM/ARMBaseRegisterInfo.h +++ b/lib/Target/ARM/ARMBaseRegisterInfo.h @@ -81,6 +81,15 @@ public: getMatchingSuperRegClass(const TargetRegisterClass *A, const TargetRegisterClass *B, unsigned Idx) const; + /// canCombinedSubRegIndex - Given a register class and a list of sub-register + /// indices, return true if it's possible to combine the sub-register indices + /// into one that corresponds to a larger sub-register. Return the new sub- + /// register index by reference. Note the new index by be zero if the given + /// sub-registers combined to form the whole register. + virtual bool canCombinedSubRegIndex(const TargetRegisterClass *RC, + SmallVectorImpl &SubIndices, + unsigned &NewSubIdx) const; + const TargetRegisterClass *getPointerRegClass(unsigned Kind = 0) const; std::pair diff --git a/lib/Target/ARM/ARMRegisterInfo.h b/lib/Target/ARM/ARMRegisterInfo.h index efc0cbb0651..62514c5c929 100644 --- a/lib/Target/ARM/ARMRegisterInfo.h +++ b/lib/Target/ARM/ARMRegisterInfo.h @@ -31,7 +31,8 @@ namespace ARM { SSUBREG_0 = 1, SSUBREG_1 = 2, SSUBREG_2 = 3, SSUBREG_3 = 4, DSUBREG_0 = 5, DSUBREG_1 = 6, DSUBREG_2 = 7, DSUBREG_3 = 8, DSUBREG_4 = 9, DSUBREG_5 = 10, DSUBREG_6 = 11, DSUBREG_7 = 12, - QSUBREG_0 = 13, QSUBREG_1 = 14, QSUBREG_2 = 15, QSUBREG_3 = 16 + QSUBREG_0 = 13, QSUBREG_1 = 14, QSUBREG_2 = 15, QSUBREG_3 = 16, + QQSUBREG_0= 17, QQSUBREG_1= 18 }; } diff --git a/lib/Target/ARM/ARMRegisterInfo.td b/lib/Target/ARM/ARMRegisterInfo.td index ae2b95bef60..80325ae54e7 100644 --- a/lib/Target/ARM/ARMRegisterInfo.td +++ b/lib/Target/ARM/ARMRegisterInfo.td @@ -465,6 +465,10 @@ def arm_qsubreg_1 : PatLeaf<(i32 14)>; def arm_qsubreg_2 : PatLeaf<(i32 15)>; def arm_qsubreg_3 : PatLeaf<(i32 16)>; +def arm_qqsubreg_0 : PatLeaf<(i32 17)>; +def arm_qqsubreg_1 : PatLeaf<(i32 18)>; + + // S sub-registers of D registers. def : SubRegSet<1, [D0, D1, D2, D3, D4, D5, D6, D7, D8, D9, D10, D11, D12, D13, D14, D15], @@ -552,3 +556,10 @@ def : SubRegSet<15, [QQQQ0, QQQQ1, QQQQ2, QQQQ3], [Q2, Q6, Q10, Q14]>; def : SubRegSet<16, [QQQQ0, QQQQ1, QQQQ2, QQQQ3], [Q3, Q7, Q11, Q15]>; + +// QQ sub-registers of QQQQQQQQ registers. +def : SubRegSet<17, [QQQQ0, QQQQ1, QQQQ2, QQQQ3], + [QQ0, QQ2, QQ4, QQ6]>; +def : SubRegSet<18, [QQQQ0, QQQQ1, QQQQ2, QQQQ3], + [QQ1, QQ3, QQ5, QQ7]>; + diff --git a/lib/Target/ARM/NEONPreAllocPass.cpp b/lib/Target/ARM/NEONPreAllocPass.cpp index 017e6f74439..77375e52d1a 100644 --- a/lib/Target/ARM/NEONPreAllocPass.cpp +++ b/lib/Target/ARM/NEONPreAllocPass.cpp @@ -414,7 +414,9 @@ NEONPreAllocPass::FormsRegSequence(MachineInstr *MI, return false; LastSrcReg = VirtReg; const TargetRegisterClass *RC = MRI->getRegClass(VirtReg); - if (RC != ARM::QPRRegisterClass && RC != ARM::QQPRRegisterClass) + if (RC != ARM::QPRRegisterClass && + RC != ARM::QQPRRegisterClass && + RC != ARM::QQQQPRRegisterClass) return false; unsigned SubIdx = DefMI->getOperand(2).getImm(); if (LastSubIdx) { @@ -432,7 +434,7 @@ NEONPreAllocPass::FormsRegSequence(MachineInstr *MI, // FIXME: Update the uses of EXTRACT_SUBREG from REG_SEQUENCE is // currently required for correctness. e.g. - // %reg1041; = REG_SEQUENCE %reg1040, 5, %reg1035, 6 + // %reg1041; = REG_SEQUENCE %reg1040, 5, %reg1035, 6 // %reg1042 = EXTRACT_SUBREG %reg1041, 6 // %reg1043 = EXTRACT_SUBREG %reg1041, 5 // VST1q16 %reg1025, 0, %reg1043, %reg1042, -- 2.34.1