lib/Target/ARM/Thumb2SizeReduction.cpp

   1 //===-- Thumb2SizeReduction.cpp - Thumb2 code size reduction pass -*- C++ -*-=//
   2 //
   3 //                     The LLVM Compiler Infrastructure
   4 //
   5 // This file is distributed under the University of Illinois Open Source
   6 // License. See LICENSE.TXT for details.
   7 //
   8 //===----------------------------------------------------------------------===//
   9
  10 #define DEBUG_TYPE "t2-reduce-size"
  11 #include "ARM.h"
  12 #include "ARMBaseRegisterInfo.h"
  13 #include "ARMBaseInstrInfo.h"
  14 #include "Thumb2InstrInfo.h"
  15 #include "llvm/CodeGen/MachineInstr.h"
  16 #include "llvm/CodeGen/MachineInstrBuilder.h"
  17 #include "llvm/CodeGen/MachineFunctionPass.h"
  18 #include "llvm/Support/CommandLine.h"
  19 #include "llvm/Support/Compiler.h"
  20 #include "llvm/Support/Debug.h"
  21 #include "llvm/ADT/DenseMap.h"
  22 #include "llvm/ADT/Statistic.h"
  23 using namespace llvm;
  24
  25 STATISTIC(NumNarrows,  "Number of 32-bit instrs reduced to 16-bit ones");
  26 STATISTIC(Num2Addrs,   "Number of 32-bit instrs reduced to 2addr 16-bit ones");
  27
  28 static cl::opt<int> ReduceLimit("t2-reduce-limit", cl::init(-1), cl::Hidden);
  29
  30 namespace {
  31   /// ReduceTable - A static table with information on mapping from wide
  32   /// opcodes to narrow
  33   struct ReduceEntry {
  34     unsigned WideOpc;      // Wide opcode
  35     unsigned NarrowOpc1;   // Narrow opcode to transform to
  36     unsigned NarrowOpc2;   // Narrow opcode when it's two-address
  37     uint8_t  Imm1Limit;    // Limit of immediate field (bits)
  38     uint8_t  Imm2Limit;    // Limit of immediate field when it's two-address
  39     unsigned LowRegs1 : 1; // Only possible if low-registers are used
  40     unsigned LowRegs2 : 1; // Only possible if low-registers are used (2addr)
  41     unsigned PredCC1  : 1; // 0 - If predicated, cc is on and vice versa.
  42                            // 1 - No cc field.
  43     unsigned PredCC2  : 1;
  44     unsigned Special  : 1; // Needs to be dealt with specially
  45   };
  46
  47   static const ReduceEntry ReduceTable[] = {
  48     // Wide,        Narrow1,      Narrow2,     imm1,imm2,  lo1, lo2, P/C, S
  49     { ARM::t2ADCrr, ARM::tADC,    0,             0,   0,    1,   0,  0,0, 0 },
  50     // FIXME: t2ADDS variants.
  51     { ARM::t2ADDri, ARM::tADDi3,  ARM::tADDi8,   3,   8,    1,   1,  0,0, 0 },
  52     { ARM::t2ADDrr, ARM::tADDrr,  ARM::tADDhirr, 0,   0,    1,   0,  0,1, 0 },
  53     { ARM::t2ANDrr, 0,            ARM::tAND,     0,   0,    0,   1,  0,0, 0 },
  54     { ARM::t2ASRri, ARM::tASRri,  0,             5,   0,    1,   0,  0,0, 0 },
  55     { ARM::t2ASRrr, 0,            ARM::tASRrr,   0,   0,    0,   1,  0,0, 0 },
  56     { ARM::t2BICrr, 0,            ARM::tBIC,     0,   0,    0,   1,  0,0, 0 },
  57     { ARM::t2CMNrr, ARM::tCMN,    0,             0,   0,    1,   0,  1,0, 0 },
  58     { ARM::t2CMPri, ARM::tCMPi8,  0,             8,   0,    1,   0,  1,0, 0 },
  59     { ARM::t2CMPrr, ARM::tCMPhir, 0,             0,   0,    0,   0,  1,0, 0 },
  60     { ARM::t2CMPzri,ARM::tCMPzi8, 0,             8,   0,    1,   0,  1,0, 0 },
  61     { ARM::t2CMPzrr,ARM::tCMPzhir,0,             0,   0,    0,   0,  1,0, 0 },
  62     { ARM::t2EORrr, 0,            ARM::tEOR,     0,   0,    0,   1,  0,0, 0 },
  63     { ARM::t2LSLri, ARM::tLSLri,  0,             5,   0,    1,   0,  0,0, 0 },
  64     { ARM::t2LSLrr, 0,            ARM::tLSLrr,   0,   0,    0,   1,  0,0, 0 },
  65     { ARM::t2LSRri, ARM::tLSRri,  0,             5,   0,    1,   0,  0,0, 0 },
  66     { ARM::t2LSRrr, 0,            ARM::tLSRrr,   0,   0,    0,   1,  0,0, 0 },
  67     { ARM::t2MOVi,  ARM::tMOVi8,  0,             8,   0,    1,   0,  0,0, 0 },
  68     // FIXME: Do we need the 16-bit 'S' variant?
  69     // FIXME: t2MOVcc
  70     { ARM::t2MOVr,ARM::tMOVgpr2gpr,0,            0,   0,    0,   0,  1,0, 0 },
  71     { ARM::t2MUL,   0,            ARM::tMUL,     0,   0,    0,   1,  0,0, 0 },
  72     { ARM::t2MVNr,  ARM::tMVN,    0,             0,   0,    1,   0,  0,0, 0 },
  73     { ARM::t2ORRrr, 0,            ARM::tORR,     0,   0,    0,   1,  0,0, 0 },
  74     { ARM::t2REV,   ARM::tREV,    0,             0,   0,    1,   0,  1,0, 0 },
  75     { ARM::t2REV16, ARM::tREV16,  0,             0,   0,    1,   0,  1,0, 0 },
  76     { ARM::t2REVSH, ARM::tREVSH,  0,             0,   0,    1,   0,  1,0, 0 },
  77     { ARM::t2RORrr, 0,            ARM::tROR,     0,   0,    0,   1,  0,0, 0 },
  78     // FIXME: T2RSBri immediate must be zero. Also need entry for T2RSBS
  79     //{ ARM::t2RSBri, ARM::tRSB,    0,             0,   0,    1,   0,  0,0, 0 },
  80     { ARM::t2SUBri, ARM::tSUBi3,  ARM::tSUBi8,   3,   8,    1,   1,  0,0, 0 },
  81     { ARM::t2SUBrr, ARM::tSUBrr,  0,             0,   0,    1,   0,  0,0, 0 },
  82     { ARM::t2SXTBr, ARM::tSXTB,   0,             0,   0,    1,   0,  1,0, 0 },
  83     { ARM::t2SXTHr, ARM::tSXTH,   0,             0,   0,    1,   0,  1,0, 0 },
  84     { ARM::t2TSTrr, ARM::tTST,    0,             0,   0,    1,   0,  1,0, 0 },
  85     { ARM::t2UXTBr, ARM::tUXTB,   0,             0,   0,    1,   0,  1,0, 0 },
  86     { ARM::t2UXTHr, ARM::tUXTH,   0,             0,   0,    1,   0,  1,0, 0 }
  87   };
  88
  89   class VISIBILITY_HIDDEN Thumb2SizeReduce : public MachineFunctionPass {
  90   public:
  91     static char ID;
  92     Thumb2SizeReduce();
  93
  94     const TargetInstrInfo *TII;
  95
  96     virtual bool runOnMachineFunction(MachineFunction &MF);
  97
  98     virtual const char *getPassName() const {
  99       return "Thumb2 instruction size reduction pass";
 100     }
 101
 102   private:
 103     /// ReduceOpcodeMap - Maps wide opcode to index of entry in ReduceTable.
 104     DenseMap<unsigned, unsigned> ReduceOpcodeMap;
 105
 106     /// ReduceTo2Addr - Reduce a 32-bit instruction to a 16-bit two-address
 107     /// instruction.
 108     bool ReduceTo2Addr(MachineBasicBlock &MBB, MachineInstr *MI,
 109                        const ReduceEntry &Entry,
 110                        bool LiveCPSR);
 111
 112     /// ReduceToNarrow - Reduce a 32-bit instruction to a 16-bit
 113     /// non-two-address instruction.
 114     bool ReduceToNarrow(MachineBasicBlock &MBB, MachineInstr *MI,
 115                         const ReduceEntry &Entry,
 116                         bool LiveCPSR);
 117
 118     /// ReduceMBB - Reduce width of instructions in the specified basic block.
 119     bool ReduceMBB(MachineBasicBlock &MBB);
 120   };
 121   char Thumb2SizeReduce::ID = 0;
 122 }
 123
 124 Thumb2SizeReduce::Thumb2SizeReduce() : MachineFunctionPass(&ID) {
 125   for (unsigned i = 0, e = array_lengthof(ReduceTable); i != e; ++i) {
 126     unsigned FromOpc = ReduceTable[i].WideOpc;
 127     if (!ReduceOpcodeMap.insert(std::make_pair(FromOpc, i)).second)
 128       assert(false && "Duplicated entries?");
 129   }
 130 }
 131
 132 static bool VerifyPredAndCC(MachineInstr *MI, const ReduceEntry &Entry,
 133                             bool is2Addr, ARMCC::CondCodes Pred,
 134                             bool LiveCPSR, bool &HasCC, bool &CCDead) {
 135   if ((is2Addr  && Entry.PredCC2 == 0) ||
 136       (!is2Addr && Entry.PredCC1 == 0)) {
 137     if (Pred == ARMCC::AL) {
 138       // Not predicated, must set CPSR.
 139       if (!HasCC) {
 140         // Original instruction was not setting CPSR, but CPSR is not
 141         // currently live anyway. It's ok to set it. The CPSR def is
 142         // dead though.
 143         if (!LiveCPSR) {
 144           HasCC = true;
 145           CCDead = true;
 146           return true;
 147         }
 148         return false;
 149       }
 150     } else {
 151       // Predicated, must not set CPSR.
 152       if (HasCC)
 153         return false;
 154     }
 155   } else {
 156     // 16-bit instruction does not set CPSR.
 157     if (HasCC)
 158       return false;
 159   }
 160
 161   return true;
 162 }
 163
 164 bool
 165 Thumb2SizeReduce::ReduceTo2Addr(MachineBasicBlock &MBB, MachineInstr *MI,
 166                                 const ReduceEntry &Entry,
 167                                 bool LiveCPSR) {
 168   const TargetInstrDesc &TID = MI->getDesc();
 169   unsigned Reg0 = MI->getOperand(0).getReg();
 170   unsigned Reg1 = MI->getOperand(1).getReg();
 171   if (Reg0 != Reg1)
 172     return false;
 173   if (Entry.LowRegs2 && !isARMLowRegister(Reg0))
 174     return false;
 175   if (Entry.Imm2Limit) {
 176     unsigned Imm = MI->getOperand(2).getImm();
 177     unsigned Limit = (1 << Entry.Imm2Limit) - 1;
 178     if (Imm > Limit)
 179       return false;
 180   } else {
 181     unsigned Reg2 = MI->getOperand(2).getReg();
 182     if (Entry.LowRegs2 && !isARMLowRegister(Reg2))
 183       return false;
 184   }
 185
 186   // Check if it's possible / necessary to transfer the predicate.
 187   const TargetInstrDesc &NewTID = TII->get(Entry.NarrowOpc2);
 188   unsigned PredReg = 0;
 189   ARMCC::CondCodes Pred = getInstrPredicate(MI, PredReg);
 190   bool SkipPred = false;
 191   if (Pred != ARMCC::AL) {
 192     if (!NewTID.isPredicable())
 193       // Can't transfer predicate, fail.
 194       return false;
 195   } else {
 196     SkipPred = !NewTID.isPredicable();
 197   }
 198
 199   bool HasCC = false;
 200   bool CCDead = false;
 201   if (TID.hasOptionalDef()) {
 202     unsigned NumOps = TID.getNumOperands();
 203     HasCC = (MI->getOperand(NumOps-1).getReg() == ARM::CPSR);
 204     if (HasCC && MI->getOperand(NumOps-1).isDead())
 205       CCDead = true;
 206   }
 207   if (!VerifyPredAndCC(MI, Entry, true, Pred, LiveCPSR, HasCC, CCDead))
 208     return false;
 209
 210   // Add the 16-bit instruction.
 211   DebugLoc dl = MI->getDebugLoc();
 212   MachineInstrBuilder MIB = BuildMI(MBB, *MI, dl, TII->get(Entry.NarrowOpc2));
 213   MIB.addOperand(MI->getOperand(0));
 214   if (HasCC)
 215     AddDefaultT1CC(MIB, CCDead);
 216
 217   // Transfer the rest of operands.
 218   unsigned NumOps = TID.getNumOperands();
 219   for (unsigned i = 1, e = MI->getNumOperands(); i != e; ++i) {
 220     if (i < NumOps && TID.OpInfo[i].isOptionalDef())
 221       continue;
 222     if (SkipPred && TID.OpInfo[i].isPredicate())
 223       continue;
 224     MIB.addOperand(MI->getOperand(i));
 225   }
 226
 227   DOUT << "Converted 32-bit: " << *MI << "       to 16-bit: " << *MIB;
 228
 229   MBB.erase(MI);
 230   ++Num2Addrs;
 231   return true;
 232 }
 233
 234 bool
 235 Thumb2SizeReduce::ReduceToNarrow(MachineBasicBlock &MBB, MachineInstr *MI,
 236                                  const ReduceEntry &Entry,
 237                                  bool LiveCPSR) {
 238   unsigned Limit = ~0U;
 239   if (Entry.Imm1Limit)
 240     Limit = (1 << Entry.Imm1Limit) - 1;
 241
 242   const TargetInstrDesc &TID = MI->getDesc();
 243   for (unsigned i = 0, e = TID.getNumOperands(); i != e; ++i) {
 244     if (TID.OpInfo[i].isPredicate())
 245       continue;
 246     const MachineOperand &MO = MI->getOperand(i);
 247     if (MO.isReg()) {
 248       unsigned Reg = MO.getReg();
 249       if (!Reg || Reg == ARM::CPSR)
 250         continue;
 251       if (Entry.LowRegs1 && !isARMLowRegister(Reg))
 252         return false;
 253     } else if (MO.isImm()) {
 254       if (MO.getImm() > Limit)
 255         return false;
 256     }
 257   }
 258
 259   // Check if it's possible / necessary to transfer the predicate.
 260   const TargetInstrDesc &NewTID = TII->get(Entry.NarrowOpc1);
 261   unsigned PredReg = 0;
 262   ARMCC::CondCodes Pred = getInstrPredicate(MI, PredReg);
 263   bool SkipPred = false;
 264   if (Pred != ARMCC::AL) {
 265     if (!NewTID.isPredicable())
 266       // Can't transfer predicate, fail.
 267       return false;
 268   } else {
 269     SkipPred = !NewTID.isPredicable();
 270   }
 271
 272   bool HasCC = false;
 273   bool CCDead = false;
 274   if (TID.hasOptionalDef()) {
 275     unsigned NumOps = TID.getNumOperands();
 276     HasCC = (MI->getOperand(NumOps-1).getReg() == ARM::CPSR);
 277     if (HasCC && MI->getOperand(NumOps-1).isDead())
 278       CCDead = true;
 279   }
 280   if (!VerifyPredAndCC(MI, Entry, false, Pred, LiveCPSR, HasCC, CCDead))
 281     return false;
 282
 283   // Add the 16-bit instruction.
 284   DebugLoc dl = MI->getDebugLoc();
 285   MachineInstrBuilder MIB = BuildMI(MBB, *MI, dl, TII->get(Entry.NarrowOpc1));
 286   MIB.addOperand(MI->getOperand(0));
 287   if (HasCC)
 288     AddDefaultT1CC(MIB, CCDead);
 289
 290   // Transfer the rest of operands.
 291   unsigned NumOps = TID.getNumOperands();
 292   for (unsigned i = 1, e = MI->getNumOperands(); i != e; ++i) {
 293     if (i < NumOps && TID.OpInfo[i].isOptionalDef())
 294       continue;
 295     if (SkipPred && TID.OpInfo[i].isPredicate())
 296       continue;
 297     MIB.addOperand(MI->getOperand(i));
 298   }
 299
 300
 301   DOUT << "Converted 32-bit: " << *MI << "       to 16-bit: " << *MIB;
 302
 303   MBB.erase(MI);
 304   ++NumNarrows;
 305   return true;
 306 }
 307
 308 static bool UpdateCPSRLiveness(MachineInstr &MI, bool LiveCPSR) {
 309   bool HasDef = false;
 310   for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
 311     const MachineOperand &MO = MI.getOperand(i);
 312     if (!MO.isReg() || MO.isUndef())
 313       continue;
 314     if (MO.getReg() != ARM::CPSR)
 315       continue;
 316     if (MO.isDef()) {
 317       if (!MO.isDead())
 318         HasDef = true;
 319       continue;
 320     }
 321
 322     assert(LiveCPSR && "CPSR liveness tracking is wrong!");
 323     if (MO.isKill()) {
 324       LiveCPSR = false;
 325       break;
 326     }
 327   }
 328
 329   return HasDef || LiveCPSR;
 330 }
 331
 332 bool Thumb2SizeReduce::ReduceMBB(MachineBasicBlock &MBB) {
 333   bool Modified = false;
 334
 335   bool LiveCPSR = false;
 336   // Yes, CPSR could be livein.
 337   for (MachineBasicBlock::const_livein_iterator I = MBB.livein_begin(),
 338          E = MBB.livein_end(); I != E; ++I) {
 339     if (*I == ARM::CPSR) {
 340       LiveCPSR = true;
 341       break;
 342     }
 343   }
 344
 345   MachineBasicBlock::iterator MII = MBB.begin(), E = MBB.end();
 346   MachineBasicBlock::iterator NextMII;
 347   for (; MII != E; MII = NextMII) {
 348     NextMII = next(MII);
 349
 350     MachineInstr *MI = &*MII;
 351     unsigned Opcode = MI->getOpcode();
 352     DenseMap<unsigned, unsigned>::iterator OPI = ReduceOpcodeMap.find(Opcode);
 353     if (OPI != ReduceOpcodeMap.end()) {
 354       const ReduceEntry &Entry = ReduceTable[OPI->second];
 355       // Ignore "special" cases for now.
 356       if (Entry.Special)
 357         goto ProcessNext;
 358
 359       // Try to transform to a 16-bit two-address instruction.
 360       if (Entry.NarrowOpc2 && ReduceTo2Addr(MBB, MI, Entry, LiveCPSR)) {
 361         Modified = true;
 362         MachineBasicBlock::iterator I = prior(NextMII);
 363         MI = &*I;
 364         goto ProcessNext;
 365       }
 366
 367       // Try to transform ro a 16-bit non-two-address instruction.
 368       if (Entry.NarrowOpc1 && ReduceToNarrow(MBB, MI, Entry, LiveCPSR))
 369         Modified = true;
 370     }
 371
 372   ProcessNext:
 373     LiveCPSR = UpdateCPSRLiveness(*MI, LiveCPSR);
 374
 375     if (ReduceLimit != -1 && ((int)(NumNarrows + Num2Addrs) > ReduceLimit))
 376       break;
 377   }
 378
 379   return Modified;
 380 }
 381
 382 bool Thumb2SizeReduce::runOnMachineFunction(MachineFunction &MF) {
 383   const TargetMachine &TM = MF.getTarget();
 384   TII = TM.getInstrInfo();
 385
 386   bool Modified = false;
 387   for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I)
 388     Modified |= ReduceMBB(*I);
 389   return Modified;
 390 }
 391
 392 /// createThumb2SizeReductionPass - Returns an instance of the Thumb2 size
 393 /// reduction pass.
 394 FunctionPass *llvm::createThumb2SizeReductionPass() {
 395   return new Thumb2SizeReduce();
 396 }