1 //===-- Thumb2SizeReduction.cpp - Thumb2 code size reduction pass -*- C++ -*-=//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 #define DEBUG_TYPE "t2-reduce-size"
12 #include "ARMBaseRegisterInfo.h"
13 #include "ARMBaseInstrInfo.h"
14 #include "Thumb2InstrInfo.h"
15 #include "llvm/CodeGen/MachineInstr.h"
16 #include "llvm/CodeGen/MachineInstrBuilder.h"
17 #include "llvm/CodeGen/MachineFunctionPass.h"
18 #include "llvm/Support/CommandLine.h"
19 #include "llvm/Support/Compiler.h"
20 #include "llvm/Support/Debug.h"
21 #include "llvm/ADT/DenseMap.h"
22 #include "llvm/ADT/Statistic.h"
25 STATISTIC(NumNarrows, "Number of 32-bit instrs reduced to 16-bit ones");
26 STATISTIC(Num2Addrs, "Number of 32-bit instrs reduced to 2addr 16-bit ones");
27 STATISTIC(NumLdSts, "Number of 32-bit load / store reduced to 16-bit ones");
29 static cl::opt<int> ReduceLimit("t2-reduce-limit", cl::init(-1), cl::Hidden);
32 /// ReduceTable - A static table with information on mapping from wide
35 unsigned WideOpc; // Wide opcode
36 unsigned NarrowOpc1; // Narrow opcode to transform to
37 unsigned NarrowOpc2; // Narrow opcode when it's two-address
38 uint8_t Imm1Limit; // Limit of immediate field (bits)
39 uint8_t Imm2Limit; // Limit of immediate field when it's two-address
40 unsigned LowRegs1 : 1; // Only possible if low-registers are used
41 unsigned LowRegs2 : 1; // Only possible if low-registers are used (2addr)
42 unsigned PredCC1 : 1; // 0 - If predicated, cc is on and vice versa.
45 unsigned Special : 1; // Needs to be dealt with specially
48 static const ReduceEntry ReduceTable[] = {
49 // Wide, Narrow1, Narrow2, imm1,imm2, lo1, lo2, P/C, S
50 { ARM::t2ADCrr, ARM::tADC, 0, 0, 0, 1, 0, 0,0, 0 },
51 // FIXME: t2ADDS variants.
52 { ARM::t2ADDri, ARM::tADDi3, ARM::tADDi8, 3, 8, 1, 1, 0,0, 0 },
53 { ARM::t2ADDrr, ARM::tADDrr, ARM::tADDhirr, 0, 0, 1, 0, 0,1, 0 },
54 { ARM::t2ANDrr, 0, ARM::tAND, 0, 0, 0, 1, 0,0, 0 },
55 { ARM::t2ASRri, ARM::tASRri, 0, 5, 0, 1, 0, 0,0, 0 },
56 { ARM::t2ASRrr, 0, ARM::tASRrr, 0, 0, 0, 1, 0,0, 0 },
57 { ARM::t2BICrr, 0, ARM::tBIC, 0, 0, 0, 1, 0,0, 0 },
58 { ARM::t2CMNrr, ARM::tCMN, 0, 0, 0, 1, 0, 1,0, 0 },
59 { ARM::t2CMPri, ARM::tCMPi8, 0, 8, 0, 1, 0, 1,0, 0 },
60 { ARM::t2CMPrr, ARM::tCMPhir, 0, 0, 0, 0, 0, 1,0, 0 },
61 { ARM::t2CMPzri,ARM::tCMPzi8, 0, 8, 0, 1, 0, 1,0, 0 },
62 { ARM::t2CMPzrr,ARM::tCMPzhir,0, 0, 0, 0, 0, 1,0, 0 },
63 { ARM::t2EORrr, 0, ARM::tEOR, 0, 0, 0, 1, 0,0, 0 },
64 { ARM::t2LSLri, ARM::tLSLri, 0, 5, 0, 1, 0, 0,0, 0 },
65 { ARM::t2LSLrr, 0, ARM::tLSLrr, 0, 0, 0, 1, 0,0, 0 },
66 { ARM::t2LSRri, ARM::tLSRri, 0, 5, 0, 1, 0, 0,0, 0 },
67 { ARM::t2LSRrr, 0, ARM::tLSRrr, 0, 0, 0, 1, 0,0, 0 },
68 { ARM::t2MOVi, ARM::tMOVi8, 0, 8, 0, 1, 0, 0,0, 0 },
69 // FIXME: Do we need the 16-bit 'S' variant?
71 { ARM::t2MOVr,ARM::tMOVgpr2gpr,0, 0, 0, 0, 0, 1,0, 0 },
72 { ARM::t2MUL, 0, ARM::tMUL, 0, 0, 0, 1, 0,0, 0 },
73 { ARM::t2MVNr, ARM::tMVN, 0, 0, 0, 1, 0, 0,0, 0 },
74 { ARM::t2ORRrr, 0, ARM::tORR, 0, 0, 0, 1, 0,0, 0 },
75 { ARM::t2REV, ARM::tREV, 0, 0, 0, 1, 0, 1,0, 0 },
76 { ARM::t2REV16, ARM::tREV16, 0, 0, 0, 1, 0, 1,0, 0 },
77 { ARM::t2REVSH, ARM::tREVSH, 0, 0, 0, 1, 0, 1,0, 0 },
78 { ARM::t2RORrr, 0, ARM::tROR, 0, 0, 0, 1, 0,0, 0 },
79 // FIXME: T2RSBri immediate must be zero. Also need entry for T2RSBS
80 //{ ARM::t2RSBri, ARM::tRSB, 0, 0, 0, 1, 0, 0,0, 0 },
81 { ARM::t2SUBri, ARM::tSUBi3, ARM::tSUBi8, 3, 8, 1, 1, 0,0, 0 },
82 { ARM::t2SUBrr, ARM::tSUBrr, 0, 0, 0, 1, 0, 0,0, 0 },
83 { ARM::t2SXTBr, ARM::tSXTB, 0, 0, 0, 1, 0, 1,0, 0 },
84 { ARM::t2SXTHr, ARM::tSXTH, 0, 0, 0, 1, 0, 1,0, 0 },
85 { ARM::t2TSTrr, ARM::tTST, 0, 0, 0, 1, 0, 1,0, 0 },
86 { ARM::t2UXTBr, ARM::tUXTB, 0, 0, 0, 1, 0, 1,0, 0 },
87 { ARM::t2UXTHr, ARM::tUXTH, 0, 0, 0, 1, 0, 1,0, 0 },
89 // FIXME: Clean this up after splitting each Thumb load / store opcode
90 // into multiple ones.
91 { ARM::t2LDRi12,ARM::tLDR, 0, 5, 0, 1, 0, 0,0, 1 },
92 { ARM::t2LDRs, ARM::tLDR, 0, 0, 0, 1, 0, 0,0, 1 },
93 { ARM::t2LDRBi12,ARM::tLDRB, 0, 5, 0, 1, 0, 0,0, 1 },
94 { ARM::t2LDRBs, ARM::tLDRB, 0, 0, 0, 1, 0, 0,0, 1 },
95 { ARM::t2LDRHi12,ARM::tLDRH, 0, 5, 0, 1, 0, 0,0, 1 },
96 { ARM::t2LDRHs, ARM::tLDRH, 0, 0, 0, 1, 0, 0,0, 1 },
97 { ARM::t2LDRSBs,ARM::tLDRSB, 0, 0, 0, 1, 0, 0,0, 1 },
98 { ARM::t2LDRSHs,ARM::tLDRSH, 0, 0, 0, 1, 0, 0,0, 1 },
99 { ARM::t2STRi12,ARM::tSTR, 0, 5, 0, 1, 0, 0,0, 1 },
100 { ARM::t2STRs, ARM::tSTR, 0, 0, 0, 1, 0, 0,0, 1 },
101 { ARM::t2STRBi12,ARM::tSTRB, 0, 5, 0, 1, 0, 0,0, 1 },
102 { ARM::t2STRBs, ARM::tSTRB, 0, 0, 0, 1, 0, 0,0, 1 },
103 { ARM::t2STRHi12,ARM::tSTRH, 0, 5, 0, 1, 0, 0,0, 1 },
104 { ARM::t2STRHs, ARM::tSTRH, 0, 0, 0, 1, 0, 0,0, 1 }
107 class VISIBILITY_HIDDEN Thumb2SizeReduce : public MachineFunctionPass {
112 const TargetInstrInfo *TII;
114 virtual bool runOnMachineFunction(MachineFunction &MF);
116 virtual const char *getPassName() const {
117 return "Thumb2 instruction size reduction pass";
121 /// ReduceOpcodeMap - Maps wide opcode to index of entry in ReduceTable.
122 DenseMap<unsigned, unsigned> ReduceOpcodeMap;
124 bool ReduceLoadStore(MachineBasicBlock &MBB, MachineInstr *MI,
125 const ReduceEntry &Entry);
127 bool ReduceSpecial(MachineBasicBlock &MBB, MachineInstr *MI,
128 const ReduceEntry &Entry, bool LiveCPSR);
130 /// ReduceTo2Addr - Reduce a 32-bit instruction to a 16-bit two-address
132 bool ReduceTo2Addr(MachineBasicBlock &MBB, MachineInstr *MI,
133 const ReduceEntry &Entry,
136 /// ReduceToNarrow - Reduce a 32-bit instruction to a 16-bit
137 /// non-two-address instruction.
138 bool ReduceToNarrow(MachineBasicBlock &MBB, MachineInstr *MI,
139 const ReduceEntry &Entry,
142 /// ReduceMBB - Reduce width of instructions in the specified basic block.
143 bool ReduceMBB(MachineBasicBlock &MBB);
145 char Thumb2SizeReduce::ID = 0;
148 Thumb2SizeReduce::Thumb2SizeReduce() : MachineFunctionPass(&ID) {
149 for (unsigned i = 0, e = array_lengthof(ReduceTable); i != e; ++i) {
150 unsigned FromOpc = ReduceTable[i].WideOpc;
151 if (!ReduceOpcodeMap.insert(std::make_pair(FromOpc, i)).second)
152 assert(false && "Duplicated entries?");
156 static bool VerifyPredAndCC(MachineInstr *MI, const ReduceEntry &Entry,
157 bool is2Addr, ARMCC::CondCodes Pred,
158 bool LiveCPSR, bool &HasCC, bool &CCDead) {
159 if ((is2Addr && Entry.PredCC2 == 0) ||
160 (!is2Addr && Entry.PredCC1 == 0)) {
161 if (Pred == ARMCC::AL) {
162 // Not predicated, must set CPSR.
164 // Original instruction was not setting CPSR, but CPSR is not
165 // currently live anyway. It's ok to set it. The CPSR def is
175 // Predicated, must not set CPSR.
180 // 16-bit instruction does not set CPSR.
189 Thumb2SizeReduce::ReduceLoadStore(MachineBasicBlock &MBB, MachineInstr *MI,
190 const ReduceEntry &Entry) {
192 bool HasImmOffset = false;
193 bool HasShift = false;
194 switch (Entry.WideOpc) {
196 llvm_unreachable("Unexpected Thumb2 load / store opcode!");
223 unsigned OffsetReg = 0;
224 bool OffsetKill = false;
226 OffsetReg = MI->getOperand(2).getReg();
227 OffsetKill = MI->getOperand(2).isKill();
228 if (MI->getOperand(3).getImm())
229 // Thumb1 addressing mode doesn't support shift.
233 unsigned OffsetImm = 0;
235 OffsetImm = MI->getOperand(2).getImm();
236 unsigned MaxOffset = ((1 << Entry.Imm1Limit) - 1) * Scale;
237 if ((OffsetImm & (Scale-1)) || OffsetImm > MaxOffset)
238 // Make sure the immediate field fits.
242 // Add the 16-bit load / store instruction.
243 // FIXME: Thumb1 addressing mode encode both immediate and register offset.
244 DebugLoc dl = MI->getDebugLoc();
245 MachineInstrBuilder MIB = BuildMI(MBB, *MI, dl, TII->get(Entry.NarrowOpc1))
246 .addOperand(MI->getOperand(0))
247 .addOperand(MI->getOperand(1));
248 if (Entry.NarrowOpc1 != ARM::tLDRSB && Entry.NarrowOpc1 != ARM::tLDRSH) {
249 // tLDRSB and tLDRSH do not have an immediate offset field. On the other
250 // hand, it must have an offset register.
251 // FIXME: Remove this special case.
252 MIB.addImm(OffsetImm/Scale);
255 assert((!HasShift || OffsetReg) && "Invalid so_reg load / store address!");
257 MIB.addReg(OffsetReg, getKillRegState(OffsetKill));
259 // Transfer the rest of operands.
260 unsigned OpNum = HasShift ? 4 : 3;
261 for (unsigned e = MI->getNumOperands(); OpNum != e; ++OpNum)
262 MIB.addOperand(MI->getOperand(OpNum));
264 DOUT << "Converted 32-bit: " << *MI << " to 16-bit: " << *MIB;
271 static bool VerifyLowRegs(MachineInstr *MI, const TargetInstrDesc &TID) {
272 for (unsigned i = 0, e = TID.getNumOperands(); i != e; ++i) {
273 const MachineOperand &MO = MI->getOperand(i);
276 unsigned Reg = MO.getReg();
277 if (Reg == 0 || Reg == ARM::CPSR)
279 if (!isARMLowRegister(Reg))
286 Thumb2SizeReduce::ReduceSpecial(MachineBasicBlock &MBB, MachineInstr *MI,
287 const ReduceEntry &Entry,
289 const TargetInstrDesc &TID = MI->getDesc();
290 if (Entry.LowRegs1 && !VerifyLowRegs(MI, TID))
293 if (TID.mayLoad() || TID.mayStore())
294 return ReduceLoadStore(MBB, MI, Entry);
299 Thumb2SizeReduce::ReduceTo2Addr(MachineBasicBlock &MBB, MachineInstr *MI,
300 const ReduceEntry &Entry,
302 const TargetInstrDesc &TID = MI->getDesc();
303 unsigned Reg0 = MI->getOperand(0).getReg();
304 unsigned Reg1 = MI->getOperand(1).getReg();
307 if (Entry.LowRegs2 && !isARMLowRegister(Reg0))
309 if (Entry.Imm2Limit) {
310 unsigned Imm = MI->getOperand(2).getImm();
311 unsigned Limit = (1 << Entry.Imm2Limit) - 1;
315 unsigned Reg2 = MI->getOperand(2).getReg();
316 if (Entry.LowRegs2 && !isARMLowRegister(Reg2))
320 // Check if it's possible / necessary to transfer the predicate.
321 const TargetInstrDesc &NewTID = TII->get(Entry.NarrowOpc2);
322 unsigned PredReg = 0;
323 ARMCC::CondCodes Pred = getInstrPredicate(MI, PredReg);
324 bool SkipPred = false;
325 if (Pred != ARMCC::AL) {
326 if (!NewTID.isPredicable())
327 // Can't transfer predicate, fail.
330 SkipPred = !NewTID.isPredicable();
335 if (TID.hasOptionalDef()) {
336 unsigned NumOps = TID.getNumOperands();
337 HasCC = (MI->getOperand(NumOps-1).getReg() == ARM::CPSR);
338 if (HasCC && MI->getOperand(NumOps-1).isDead())
341 if (!VerifyPredAndCC(MI, Entry, true, Pred, LiveCPSR, HasCC, CCDead))
344 // Add the 16-bit instruction.
345 DebugLoc dl = MI->getDebugLoc();
346 MachineInstrBuilder MIB = BuildMI(MBB, *MI, dl, TII->get(Entry.NarrowOpc2));
347 MIB.addOperand(MI->getOperand(0));
349 AddDefaultT1CC(MIB, CCDead);
351 // Transfer the rest of operands.
352 unsigned NumOps = TID.getNumOperands();
353 for (unsigned i = 1, e = MI->getNumOperands(); i != e; ++i) {
354 if (i < NumOps && TID.OpInfo[i].isOptionalDef())
356 if (SkipPred && TID.OpInfo[i].isPredicate())
358 MIB.addOperand(MI->getOperand(i));
361 DOUT << "Converted 32-bit: " << *MI << " to 16-bit: " << *MIB;
369 Thumb2SizeReduce::ReduceToNarrow(MachineBasicBlock &MBB, MachineInstr *MI,
370 const ReduceEntry &Entry,
372 unsigned Limit = ~0U;
374 Limit = (1 << Entry.Imm1Limit) - 1;
376 const TargetInstrDesc &TID = MI->getDesc();
377 for (unsigned i = 0, e = TID.getNumOperands(); i != e; ++i) {
378 if (TID.OpInfo[i].isPredicate())
380 const MachineOperand &MO = MI->getOperand(i);
382 unsigned Reg = MO.getReg();
383 if (!Reg || Reg == ARM::CPSR)
385 if (Entry.LowRegs1 && !isARMLowRegister(Reg))
387 } else if (MO.isImm()) {
388 if (MO.getImm() > Limit)
393 // Check if it's possible / necessary to transfer the predicate.
394 const TargetInstrDesc &NewTID = TII->get(Entry.NarrowOpc1);
395 unsigned PredReg = 0;
396 ARMCC::CondCodes Pred = getInstrPredicate(MI, PredReg);
397 bool SkipPred = false;
398 if (Pred != ARMCC::AL) {
399 if (!NewTID.isPredicable())
400 // Can't transfer predicate, fail.
403 SkipPred = !NewTID.isPredicable();
408 if (TID.hasOptionalDef()) {
409 unsigned NumOps = TID.getNumOperands();
410 HasCC = (MI->getOperand(NumOps-1).getReg() == ARM::CPSR);
411 if (HasCC && MI->getOperand(NumOps-1).isDead())
414 if (!VerifyPredAndCC(MI, Entry, false, Pred, LiveCPSR, HasCC, CCDead))
417 // Add the 16-bit instruction.
418 DebugLoc dl = MI->getDebugLoc();
419 MachineInstrBuilder MIB = BuildMI(MBB, *MI, dl, TII->get(Entry.NarrowOpc1));
420 MIB.addOperand(MI->getOperand(0));
422 AddDefaultT1CC(MIB, CCDead);
424 // Transfer the rest of operands.
425 unsigned NumOps = TID.getNumOperands();
426 for (unsigned i = 1, e = MI->getNumOperands(); i != e; ++i) {
427 if (i < NumOps && TID.OpInfo[i].isOptionalDef())
429 if (SkipPred && TID.OpInfo[i].isPredicate())
431 MIB.addOperand(MI->getOperand(i));
435 DOUT << "Converted 32-bit: " << *MI << " to 16-bit: " << *MIB;
442 static bool UpdateCPSRLiveness(MachineInstr &MI, bool LiveCPSR) {
444 for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
445 const MachineOperand &MO = MI.getOperand(i);
446 if (!MO.isReg() || MO.isUndef())
448 if (MO.getReg() != ARM::CPSR)
456 assert(LiveCPSR && "CPSR liveness tracking is wrong!");
463 return HasDef || LiveCPSR;
466 bool Thumb2SizeReduce::ReduceMBB(MachineBasicBlock &MBB) {
467 bool Modified = false;
469 bool LiveCPSR = false;
470 // Yes, CPSR could be livein.
471 for (MachineBasicBlock::const_livein_iterator I = MBB.livein_begin(),
472 E = MBB.livein_end(); I != E; ++I) {
473 if (*I == ARM::CPSR) {
479 MachineBasicBlock::iterator MII = MBB.begin(), E = MBB.end();
480 MachineBasicBlock::iterator NextMII;
481 for (; MII != E; MII = NextMII) {
484 MachineInstr *MI = &*MII;
485 unsigned Opcode = MI->getOpcode();
486 DenseMap<unsigned, unsigned>::iterator OPI = ReduceOpcodeMap.find(Opcode);
487 if (OPI != ReduceOpcodeMap.end()) {
488 const ReduceEntry &Entry = ReduceTable[OPI->second];
489 // Ignore "special" cases for now.
491 if (ReduceSpecial(MBB, MI, Entry, LiveCPSR)) {
493 MachineBasicBlock::iterator I = prior(NextMII);
499 // Try to transform to a 16-bit two-address instruction.
500 if (Entry.NarrowOpc2 && ReduceTo2Addr(MBB, MI, Entry, LiveCPSR)) {
502 MachineBasicBlock::iterator I = prior(NextMII);
507 // Try to transform ro a 16-bit non-two-address instruction.
508 if (Entry.NarrowOpc1 && ReduceToNarrow(MBB, MI, Entry, LiveCPSR))
513 LiveCPSR = UpdateCPSRLiveness(*MI, LiveCPSR);
515 if (ReduceLimit != -1 && ((int)(NumNarrows + Num2Addrs) > ReduceLimit))
522 bool Thumb2SizeReduce::runOnMachineFunction(MachineFunction &MF) {
523 const TargetMachine &TM = MF.getTarget();
524 TII = TM.getInstrInfo();
526 bool Modified = false;
527 for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I)
528 Modified |= ReduceMBB(*I);
532 /// createThumb2SizeReductionPass - Returns an instance of the Thumb2 size
534 FunctionPass *llvm::createThumb2SizeReductionPass() {
535 return new Thumb2SizeReduce();