1 //===-- SIFoldOperands.cpp - Fold operands --- ----------------------------===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
9 //===----------------------------------------------------------------------===//
13 #include "AMDGPUSubtarget.h"
14 #include "SIInstrInfo.h"
15 #include "llvm/CodeGen/LiveIntervalAnalysis.h"
16 #include "llvm/CodeGen/MachineDominators.h"
17 #include "llvm/CodeGen/MachineFunctionPass.h"
18 #include "llvm/CodeGen/MachineInstrBuilder.h"
19 #include "llvm/CodeGen/MachineRegisterInfo.h"
20 #include "llvm/IR/LLVMContext.h"
21 #include "llvm/IR/Function.h"
22 #include "llvm/Support/Debug.h"
23 #include "llvm/Target/TargetMachine.h"
25 #define DEBUG_TYPE "si-fold-operands"
30 class SIFoldOperands : public MachineFunctionPass {
35 SIFoldOperands() : MachineFunctionPass(ID) {
36 initializeSIFoldOperandsPass(*PassRegistry::getPassRegistry());
39 bool runOnMachineFunction(MachineFunction &MF) override;
41 const char *getPassName() const override {
42 return "SI Fold Operands";
45 void getAnalysisUsage(AnalysisUsage &AU) const override {
46 AU.addRequired<MachineDominatorTree>();
48 MachineFunctionPass::getAnalysisUsage(AU);
52 struct FoldCandidate {
55 MachineOperand *OpToFold;
58 FoldCandidate(MachineInstr *MI, unsigned OpNo, MachineOperand *FoldOp) :
59 UseMI(MI), UseOpNo(OpNo), OpToFold(FoldOp), ImmToFold(0) { }
61 FoldCandidate(MachineInstr *MI, unsigned OpNo, uint64_t Imm) :
62 UseMI(MI), UseOpNo(OpNo), OpToFold(nullptr), ImmToFold(Imm) { }
69 } // End anonymous namespace.
71 INITIALIZE_PASS_BEGIN(SIFoldOperands, DEBUG_TYPE,
72 "SI Fold Operands", false, false)
73 INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
74 INITIALIZE_PASS_END(SIFoldOperands, DEBUG_TYPE,
75 "SI Fold Operands", false, false)
77 char SIFoldOperands::ID = 0;
79 char &llvm::SIFoldOperandsID = SIFoldOperands::ID;
81 FunctionPass *llvm::createSIFoldOperandsPass() {
82 return new SIFoldOperands();
85 static bool isSafeToFold(unsigned Opcode) {
87 case AMDGPU::V_MOV_B32_e32:
88 case AMDGPU::V_MOV_B32_e64:
89 case AMDGPU::V_MOV_B64_PSEUDO:
90 case AMDGPU::S_MOV_B32:
91 case AMDGPU::S_MOV_B64:
99 static bool updateOperand(FoldCandidate &Fold,
100 const TargetRegisterInfo &TRI) {
101 MachineInstr *MI = Fold.UseMI;
102 MachineOperand &Old = MI->getOperand(Fold.UseOpNo);
106 Old.ChangeToImmediate(Fold.ImmToFold);
110 MachineOperand *New = Fold.OpToFold;
111 if (TargetRegisterInfo::isVirtualRegister(Old.getReg()) &&
112 TargetRegisterInfo::isVirtualRegister(New->getReg())) {
113 Old.substVirtReg(New->getReg(), New->getSubReg(), TRI);
117 // FIXME: Handle physical registers.
122 bool SIFoldOperands::runOnMachineFunction(MachineFunction &MF) {
123 MachineRegisterInfo &MRI = MF.getRegInfo();
124 const SIInstrInfo *TII =
125 static_cast<const SIInstrInfo *>(MF.getSubtarget().getInstrInfo());
126 const SIRegisterInfo &TRI = TII->getRegisterInfo();
128 for (MachineFunction::iterator BI = MF.begin(), BE = MF.end();
131 MachineBasicBlock &MBB = *BI;
132 MachineBasicBlock::iterator I, Next;
133 for (I = MBB.begin(); I != MBB.end(); I = Next) {
135 MachineInstr &MI = *I;
137 if (!isSafeToFold(MI.getOpcode()))
140 MachineOperand &OpToFold = MI.getOperand(1);
142 // FIXME: Fold operands with subregs.
143 if (OpToFold.isReg() &&
144 (!TargetRegisterInfo::isVirtualRegister(OpToFold.getReg()) ||
145 OpToFold.getSubReg()))
148 std::vector<FoldCandidate> FoldList;
149 for (MachineRegisterInfo::use_iterator
150 Use = MRI.use_begin(MI.getOperand(0).getReg()), E = MRI.use_end();
153 MachineInstr *UseMI = Use->getParent();
154 const MachineOperand &UseOp = UseMI->getOperand(Use.getOperandNo());
156 // FIXME: Fold operands with subregs.
157 if (UseOp.isReg() && UseOp.getSubReg() && OpToFold.isReg()) {
161 bool FoldingImm = OpToFold.isImm() || OpToFold.isFPImm();
165 const TargetRegisterClass *UseRC = MRI.getRegClass(UseOp.getReg());
167 if (OpToFold.isFPImm()) {
168 Imm = OpToFold.getFPImm()->getValueAPF().bitcastToAPInt();
170 Imm = APInt(64, OpToFold.getImm());
173 // Split 64-bit constants into 32-bits for folding.
174 if (UseOp.getSubReg()) {
175 if (UseRC->getSize() != 8)
178 if (UseOp.getSubReg() == AMDGPU::sub0) {
179 Imm = Imm.getLoBits(32);
181 assert(UseOp.getSubReg() == AMDGPU::sub1);
182 Imm = Imm.getHiBits(32);
186 // In order to fold immediates into copies, we need to change the
188 if (UseMI->getOpcode() == AMDGPU::COPY) {
189 unsigned MovOp = TII->getMovOpcode(
190 MRI.getRegClass(UseMI->getOperand(0).getReg()));
191 if (MovOp == AMDGPU::COPY)
194 UseMI->setDesc(TII->get(MovOp));
198 const MCInstrDesc &UseDesc = UseMI->getDesc();
200 // Don't fold into target independent nodes. Target independent opcodes
201 // don't have defined register classes.
202 if (UseDesc.isVariadic() ||
203 UseDesc.OpInfo[Use.getOperandNo()].RegClass == -1)
208 const MachineOperand ImmOp = MachineOperand::CreateImm(Imm.getSExtValue());
209 if (TII->isOperandLegal(UseMI, Use.getOperandNo(), &ImmOp)) {
210 FoldList.push_back(FoldCandidate(UseMI, Use.getOperandNo(),
211 Imm.getSExtValue()));
216 // Normal substitution with registers
217 if (TII->isOperandLegal(UseMI, Use.getOperandNo(), &OpToFold)) {
218 FoldList.push_back(FoldCandidate(UseMI, Use.getOperandNo(), &OpToFold));
222 // FIXME: We could commute the instruction to create more opportunites
223 // for folding. This will only be useful if we have 32-bit instructions.
225 // FIXME: We could try to change the instruction from 64-bit to 32-bit
226 // to enable more folding opportunites. The shrink operands pass
227 // already does this.
230 for (FoldCandidate &Fold : FoldList) {
231 if (updateOperand(Fold, TRI)) {
234 assert(Fold.OpToFold && Fold.OpToFold->isReg());
235 Fold.OpToFold->setIsKill(false);
237 DEBUG(dbgs() << "Folded source from " << MI << " into OpNo " <<
238 Fold.UseOpNo << " of " << *Fold.UseMI << '\n');