lib/Target/ARM/ARMFastISel.cpp

   1 //===-- ARMFastISel.cpp - ARM FastISel implementation ---------------------===//
   2 //
   3 //                     The LLVM Compiler Infrastructure
   4 //
   5 // This file is distributed under the University of Illinois Open Source
   6 // License. See LICENSE.TXT for details.
   7 //
   8 //===----------------------------------------------------------------------===//
   9 //
  10 // This file defines the ARM-specific support for the FastISel class. Some
  11 // of the target-specific code is generated by tablegen in the file
  12 // ARMGenFastISel.inc, which is #included here.
  13 //
  14 //===----------------------------------------------------------------------===//
  15
  16 #include "ARM.h"
  17 #include "ARMBaseInstrInfo.h"
  18 #include "ARMCallingConv.h"
  19 #include "ARMRegisterInfo.h"
  20 #include "ARMTargetMachine.h"
  21 #include "ARMSubtarget.h"
  22 #include "llvm/CallingConv.h"
  23 #include "llvm/DerivedTypes.h"
  24 #include "llvm/GlobalVariable.h"
  25 #include "llvm/Instructions.h"
  26 #include "llvm/IntrinsicInst.h"
  27 #include "llvm/Module.h"
  28 #include "llvm/CodeGen/Analysis.h"
  29 #include "llvm/CodeGen/FastISel.h"
  30 #include "llvm/CodeGen/FunctionLoweringInfo.h"
  31 #include "llvm/CodeGen/MachineInstrBuilder.h"
  32 #include "llvm/CodeGen/MachineModuleInfo.h"
  33 #include "llvm/CodeGen/MachineConstantPool.h"
  34 #include "llvm/CodeGen/MachineFrameInfo.h"
  35 #include "llvm/CodeGen/MachineRegisterInfo.h"
  36 #include "llvm/Support/CallSite.h"
  37 #include "llvm/Support/CommandLine.h"
  38 #include "llvm/Support/ErrorHandling.h"
  39 #include "llvm/Support/GetElementPtrTypeIterator.h"
  40 #include "llvm/Target/TargetData.h"
  41 #include "llvm/Target/TargetInstrInfo.h"
  42 #include "llvm/Target/TargetLowering.h"
  43 #include "llvm/Target/TargetMachine.h"
  44 #include "llvm/Target/TargetOptions.h"
  45 using namespace llvm;
  46
  47 static cl::opt<bool>
  48 EnableARMFastISel("arm-fast-isel",
  49                   cl::desc("Turn on experimental ARM fast-isel support"),
  50                   cl::init(false), cl::Hidden);
  51
  52 namespace {
  53
  54 class ARMFastISel : public FastISel {
  55
  56   /// Subtarget - Keep a pointer to the ARMSubtarget around so that we can
  57   /// make the right decision when generating code for different targets.
  58   const ARMSubtarget *Subtarget;
  59   const TargetMachine &TM;
  60   const TargetInstrInfo &TII;
  61   const TargetLowering &TLI;
  62   const ARMFunctionInfo *AFI;
  63
  64   // Convenience variables to avoid some queries.
  65   bool isThumb;
  66   LLVMContext *Context;
  67
  68   public:
  69     explicit ARMFastISel(FunctionLoweringInfo &funcInfo)
  70     : FastISel(funcInfo),
  71       TM(funcInfo.MF->getTarget()),
  72       TII(*TM.getInstrInfo()),
  73       TLI(*TM.getTargetLowering()) {
  74       Subtarget = &TM.getSubtarget<ARMSubtarget>();
  75       AFI = funcInfo.MF->getInfo<ARMFunctionInfo>();
  76       isThumb = AFI->isThumbFunction();
  77       Context = &funcInfo.Fn->getContext();
  78     }
  79
  80     // Code from FastISel.cpp.
  81     virtual unsigned FastEmitInst_(unsigned MachineInstOpcode,
  82                                    const TargetRegisterClass *RC);
  83     virtual unsigned FastEmitInst_r(unsigned MachineInstOpcode,
  84                                     const TargetRegisterClass *RC,
  85                                     unsigned Op0, bool Op0IsKill);
  86     virtual unsigned FastEmitInst_rr(unsigned MachineInstOpcode,
  87                                      const TargetRegisterClass *RC,
  88                                      unsigned Op0, bool Op0IsKill,
  89                                      unsigned Op1, bool Op1IsKill);
  90     virtual unsigned FastEmitInst_ri(unsigned MachineInstOpcode,
  91                                      const TargetRegisterClass *RC,
  92                                      unsigned Op0, bool Op0IsKill,
  93                                      uint64_t Imm);
  94     virtual unsigned FastEmitInst_rf(unsigned MachineInstOpcode,
  95                                      const TargetRegisterClass *RC,
  96                                      unsigned Op0, bool Op0IsKill,
  97                                      const ConstantFP *FPImm);
  98     virtual unsigned FastEmitInst_i(unsigned MachineInstOpcode,
  99                                     const TargetRegisterClass *RC,
 100                                     uint64_t Imm);
 101     virtual unsigned FastEmitInst_rri(unsigned MachineInstOpcode,
 102                                       const TargetRegisterClass *RC,
 103                                       unsigned Op0, bool Op0IsKill,
 104                                       unsigned Op1, bool Op1IsKill,
 105                                       uint64_t Imm);
 106     virtual unsigned FastEmitInst_extractsubreg(MVT RetVT,
 107                                                 unsigned Op0, bool Op0IsKill,
 108                                                 uint32_t Idx);
 109
 110     // Backend specific FastISel code.
 111     virtual bool TargetSelectInstruction(const Instruction *I);
 112     virtual unsigned TargetMaterializeConstant(const Constant *C);
 113     virtual unsigned TargetMaterializeAlloca(const AllocaInst *AI);
 114
 115   #include "ARMGenFastISel.inc"
 116
 117     // Instruction selection routines.
 118   private:
 119     virtual bool SelectLoad(const Instruction *I);
 120     virtual bool SelectStore(const Instruction *I);
 121     virtual bool SelectBranch(const Instruction *I);
 122     virtual bool SelectCmp(const Instruction *I);
 123     virtual bool SelectFPExt(const Instruction *I);
 124     virtual bool SelectFPTrunc(const Instruction *I);
 125     virtual bool SelectBinaryOp(const Instruction *I, unsigned ISDOpcode);
 126     virtual bool SelectSIToFP(const Instruction *I);
 127     virtual bool SelectFPToSI(const Instruction *I);
 128     virtual bool SelectSDiv(const Instruction *I);
 129     virtual bool SelectCall(const Instruction *I);
 130
 131     // Utility routines.
 132   private:
 133     bool isTypeLegal(const Type *Ty, EVT &VT);
 134     bool isLoadTypeLegal(const Type *Ty, EVT &VT);
 135     bool ARMEmitLoad(EVT VT, unsigned &ResultReg, unsigned Reg, int Offset);
 136     bool ARMEmitStore(EVT VT, unsigned SrcReg, unsigned Reg, int Offset);
 137     bool ARMLoadAlloca(const Instruction *I, EVT VT);
 138     bool ARMStoreAlloca(const Instruction *I, unsigned SrcReg, EVT VT);
 139     bool ARMComputeRegOffset(const Value *Obj, unsigned &Reg, int &Offset);
 140     unsigned ARMMaterializeFP(const ConstantFP *CFP, EVT VT);
 141     unsigned ARMMaterializeInt(const Constant *C, EVT VT);
 142     unsigned ARMMoveToFPReg(EVT VT, unsigned SrcReg);
 143     unsigned ARMMoveToIntReg(EVT VT, unsigned SrcReg);
 144
 145     // Call handling routines.
 146   private:
 147     CCAssignFn *CCAssignFnForCall(CallingConv::ID CC, bool Return);
 148     bool ProcessCallArgs(SmallVectorImpl<Value*> &Args,
 149                          SmallVectorImpl<unsigned> &ArgRegs,
 150                          SmallVectorImpl<EVT> &ArgVTs,
 151                          SmallVectorImpl<ISD::ArgFlagsTy> &ArgFlags,
 152                          SmallVectorImpl<unsigned> &RegArgs,
 153                          CallingConv::ID CC,
 154                          unsigned &NumBytes);
 155     bool FinishCall(EVT RetVT, SmallVectorImpl<unsigned> &UsedRegs,
 156                     const Instruction *I, CallingConv::ID CC,
 157                     unsigned &NumBytes);
 158     bool ARMEmitLibcall(const Instruction *I, RTLIB::Libcall Call);
 159
 160     // OptionalDef handling routines.
 161   private:
 162     bool DefinesOptionalPredicate(MachineInstr *MI, bool *CPSR);
 163     const MachineInstrBuilder &AddOptionalDefs(const MachineInstrBuilder &MIB);
 164 };
 165
 166 } // end anonymous namespace
 167
 168 #include "ARMGenCallingConv.inc"
 169
 170 // DefinesOptionalPredicate - This is different from DefinesPredicate in that
 171 // we don't care about implicit defs here, just places we'll need to add a
 172 // default CCReg argument. Sets CPSR if we're setting CPSR instead of CCR.
 173 bool ARMFastISel::DefinesOptionalPredicate(MachineInstr *MI, bool *CPSR) {
 174   const TargetInstrDesc &TID = MI->getDesc();
 175   if (!TID.hasOptionalDef())
 176     return false;
 177
 178   // Look to see if our OptionalDef is defining CPSR or CCR.
 179   for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
 180     const MachineOperand &MO = MI->getOperand(i);
 181     if (!MO.isReg() || !MO.isDef()) continue;
 182     if (MO.getReg() == ARM::CPSR)
 183       *CPSR = true;
 184   }
 185   return true;
 186 }
 187
 188 // If the machine is predicable go ahead and add the predicate operands, if
 189 // it needs default CC operands add those.
 190 const MachineInstrBuilder &
 191 ARMFastISel::AddOptionalDefs(const MachineInstrBuilder &MIB) {
 192   MachineInstr *MI = &*MIB;
 193
 194   // Do we use a predicate?
 195   if (TII.isPredicable(MI))
 196     AddDefaultPred(MIB);
 197
 198   // Do we optionally set a predicate?  Preds is size > 0 iff the predicate
 199   // defines CPSR. All other OptionalDefines in ARM are the CCR register.
 200   bool CPSR = false;
 201   if (DefinesOptionalPredicate(MI, &CPSR)) {
 202     if (CPSR)
 203       AddDefaultT1CC(MIB);
 204     else
 205       AddDefaultCC(MIB);
 206   }
 207   return MIB;
 208 }
 209
 210 unsigned ARMFastISel::FastEmitInst_(unsigned MachineInstOpcode,
 211                                     const TargetRegisterClass* RC) {
 212   unsigned ResultReg = createResultReg(RC);
 213   const TargetInstrDesc &II = TII.get(MachineInstOpcode);
 214
 215   AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg));
 216   return ResultReg;
 217 }
 218
 219 unsigned ARMFastISel::FastEmitInst_r(unsigned MachineInstOpcode,
 220                                      const TargetRegisterClass *RC,
 221                                      unsigned Op0, bool Op0IsKill) {
 222   unsigned ResultReg = createResultReg(RC);
 223   const TargetInstrDesc &II = TII.get(MachineInstOpcode);
 224
 225   if (II.getNumDefs() >= 1)
 226     AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg)
 227                    .addReg(Op0, Op0IsKill * RegState::Kill));
 228   else {
 229     AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II)
 230                    .addReg(Op0, Op0IsKill * RegState::Kill));
 231     AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
 232                    TII.get(TargetOpcode::COPY), ResultReg)
 233                    .addReg(II.ImplicitDefs[0]));
 234   }
 235   return ResultReg;
 236 }
 237
 238 unsigned ARMFastISel::FastEmitInst_rr(unsigned MachineInstOpcode,
 239                                       const TargetRegisterClass *RC,
 240                                       unsigned Op0, bool Op0IsKill,
 241                                       unsigned Op1, bool Op1IsKill) {
 242   unsigned ResultReg = createResultReg(RC);
 243   const TargetInstrDesc &II = TII.get(MachineInstOpcode);
 244
 245   if (II.getNumDefs() >= 1)
 246     AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg)
 247                    .addReg(Op0, Op0IsKill * RegState::Kill)
 248                    .addReg(Op1, Op1IsKill * RegState::Kill));
 249   else {
 250     AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II)
 251                    .addReg(Op0, Op0IsKill * RegState::Kill)
 252                    .addReg(Op1, Op1IsKill * RegState::Kill));
 253     AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
 254                            TII.get(TargetOpcode::COPY), ResultReg)
 255                    .addReg(II.ImplicitDefs[0]));
 256   }
 257   return ResultReg;
 258 }
 259
 260 unsigned ARMFastISel::FastEmitInst_ri(unsigned MachineInstOpcode,
 261                                       const TargetRegisterClass *RC,
 262                                       unsigned Op0, bool Op0IsKill,
 263                                       uint64_t Imm) {
 264   unsigned ResultReg = createResultReg(RC);
 265   const TargetInstrDesc &II = TII.get(MachineInstOpcode);
 266
 267   if (II.getNumDefs() >= 1)
 268     AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg)
 269                    .addReg(Op0, Op0IsKill * RegState::Kill)
 270                    .addImm(Imm));
 271   else {
 272     AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II)
 273                    .addReg(Op0, Op0IsKill * RegState::Kill)
 274                    .addImm(Imm));
 275     AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
 276                            TII.get(TargetOpcode::COPY), ResultReg)
 277                    .addReg(II.ImplicitDefs[0]));
 278   }
 279   return ResultReg;
 280 }
 281
 282 unsigned ARMFastISel::FastEmitInst_rf(unsigned MachineInstOpcode,
 283                                       const TargetRegisterClass *RC,
 284                                       unsigned Op0, bool Op0IsKill,
 285                                       const ConstantFP *FPImm) {
 286   unsigned ResultReg = createResultReg(RC);
 287   const TargetInstrDesc &II = TII.get(MachineInstOpcode);
 288
 289   if (II.getNumDefs() >= 1)
 290     AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg)
 291                    .addReg(Op0, Op0IsKill * RegState::Kill)
 292                    .addFPImm(FPImm));
 293   else {
 294     AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II)
 295                    .addReg(Op0, Op0IsKill * RegState::Kill)
 296                    .addFPImm(FPImm));
 297     AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
 298                            TII.get(TargetOpcode::COPY), ResultReg)
 299                    .addReg(II.ImplicitDefs[0]));
 300   }
 301   return ResultReg;
 302 }
 303
 304 unsigned ARMFastISel::FastEmitInst_rri(unsigned MachineInstOpcode,
 305                                        const TargetRegisterClass *RC,
 306                                        unsigned Op0, bool Op0IsKill,
 307                                        unsigned Op1, bool Op1IsKill,
 308                                        uint64_t Imm) {
 309   unsigned ResultReg = createResultReg(RC);
 310   const TargetInstrDesc &II = TII.get(MachineInstOpcode);
 311
 312   if (II.getNumDefs() >= 1)
 313     AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg)
 314                    .addReg(Op0, Op0IsKill * RegState::Kill)
 315                    .addReg(Op1, Op1IsKill * RegState::Kill)
 316                    .addImm(Imm));
 317   else {
 318     AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II)
 319                    .addReg(Op0, Op0IsKill * RegState::Kill)
 320                    .addReg(Op1, Op1IsKill * RegState::Kill)
 321                    .addImm(Imm));
 322     AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
 323                            TII.get(TargetOpcode::COPY), ResultReg)
 324                    .addReg(II.ImplicitDefs[0]));
 325   }
 326   return ResultReg;
 327 }
 328
 329 unsigned ARMFastISel::FastEmitInst_i(unsigned MachineInstOpcode,
 330                                      const TargetRegisterClass *RC,
 331                                      uint64_t Imm) {
 332   unsigned ResultReg = createResultReg(RC);
 333   const TargetInstrDesc &II = TII.get(MachineInstOpcode);
 334
 335   if (II.getNumDefs() >= 1)
 336     AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg)
 337                    .addImm(Imm));
 338   else {
 339     AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II)
 340                    .addImm(Imm));
 341     AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
 342                            TII.get(TargetOpcode::COPY), ResultReg)
 343                    .addReg(II.ImplicitDefs[0]));
 344   }
 345   return ResultReg;
 346 }
 347
 348 unsigned ARMFastISel::FastEmitInst_extractsubreg(MVT RetVT,
 349                                                  unsigned Op0, bool Op0IsKill,
 350                                                  uint32_t Idx) {
 351   unsigned ResultReg = createResultReg(TLI.getRegClassFor(RetVT));
 352   assert(TargetRegisterInfo::isVirtualRegister(Op0) &&
 353          "Cannot yet extract from physregs");
 354   AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt,
 355                          DL, TII.get(TargetOpcode::COPY), ResultReg)
 356                  .addReg(Op0, getKillRegState(Op0IsKill), Idx));
 357   return ResultReg;
 358 }
 359
 360 // TODO: Don't worry about 64-bit now, but when this is fixed remove the
 361 // checks from the various callers.
 362 unsigned ARMFastISel::ARMMoveToFPReg(EVT VT, unsigned SrcReg) {
 363   if (VT.getSimpleVT().SimpleTy == MVT::f64) return 0;
 364
 365   unsigned MoveReg = createResultReg(TLI.getRegClassFor(VT));
 366   AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
 367                           TII.get(ARM::VMOVRS), MoveReg)
 368                   .addReg(SrcReg));
 369   return MoveReg;
 370 }
 371
 372 unsigned ARMFastISel::ARMMoveToIntReg(EVT VT, unsigned SrcReg) {
 373   if (VT.getSimpleVT().SimpleTy == MVT::i64) return 0;
 374
 375   unsigned MoveReg = createResultReg(TLI.getRegClassFor(VT));
 376   AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
 377                           TII.get(ARM::VMOVSR), MoveReg)
 378                   .addReg(SrcReg));
 379   return MoveReg;
 380 }
 381
 382 // For double width floating point we need to materialize two constants
 383 // (the high and the low) into integer registers then use a move to get
 384 // the combined constant into an FP reg.
 385 unsigned ARMFastISel::ARMMaterializeFP(const ConstantFP *CFP, EVT VT) {
 386   const APFloat Val = CFP->getValueAPF();
 387   bool is64bit = VT.getSimpleVT().SimpleTy == MVT::f64;
 388
 389   // This checks to see if we can use VFP3 instructions to materialize
 390   // a constant, otherwise we have to go through the constant pool.
 391   if (TLI.isFPImmLegal(Val, VT)) {
 392     unsigned Opc = is64bit ? ARM::FCONSTD : ARM::FCONSTS;
 393     unsigned DestReg = createResultReg(TLI.getRegClassFor(VT));
 394     AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc),
 395                             DestReg)
 396                     .addFPImm(CFP));
 397     return DestReg;
 398   }
 399
 400   // Require VFP2 for loading fp constants.
 401   if (!Subtarget->hasVFP2()) return false;
 402
 403   // MachineConstantPool wants an explicit alignment.
 404   unsigned Align = TD.getPrefTypeAlignment(CFP->getType());
 405   if (Align == 0) {
 406     // TODO: Figure out if this is correct.
 407     Align = TD.getTypeAllocSize(CFP->getType());
 408   }
 409   unsigned Idx = MCP.getConstantPoolIndex(cast<Constant>(CFP), Align);
 410   unsigned DestReg = createResultReg(TLI.getRegClassFor(VT));
 411   unsigned Opc = is64bit ? ARM::VLDRD : ARM::VLDRS;
 412
 413   // The extra reg is for addrmode5.
 414   AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc),
 415                           DestReg)
 416                   .addConstantPoolIndex(Idx)
 417                   .addReg(0));
 418   return DestReg;
 419 }
 420
 421 unsigned ARMFastISel::ARMMaterializeInt(const Constant *C, EVT VT) {
 422
 423   // For now 32-bit only.
 424   if (VT.getSimpleVT().SimpleTy != MVT::i32) return false;
 425
 426   // MachineConstantPool wants an explicit alignment.
 427   unsigned Align = TD.getPrefTypeAlignment(C->getType());
 428   if (Align == 0) {
 429     // TODO: Figure out if this is correct.
 430     Align = TD.getTypeAllocSize(C->getType());
 431   }
 432   unsigned Idx = MCP.getConstantPoolIndex(C, Align);
 433   unsigned DestReg = createResultReg(TLI.getRegClassFor(VT));
 434
 435   if (isThumb)
 436     AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
 437                             TII.get(ARM::t2LDRpci), DestReg)
 438                     .addConstantPoolIndex(Idx));
 439   else
 440     // The extra reg and immediate are for addrmode2.
 441     AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
 442                             TII.get(ARM::LDRcp), DestReg)
 443                     .addConstantPoolIndex(Idx)
 444                     .addReg(0).addImm(0));
 445
 446   return DestReg;
 447 }
 448
 449 unsigned ARMFastISel::TargetMaterializeConstant(const Constant *C) {
 450   EVT VT = TLI.getValueType(C->getType(), true);
 451
 452   // Only handle simple types.
 453   if (!VT.isSimple()) return 0;
 454
 455   if (const ConstantFP *CFP = dyn_cast<ConstantFP>(C))
 456     return ARMMaterializeFP(CFP, VT);
 457   return ARMMaterializeInt(C, VT);
 458 }
 459
 460 unsigned ARMFastISel::TargetMaterializeAlloca(const AllocaInst *AI) {
 461   // Don't handle dynamic allocas.
 462   if (!FuncInfo.StaticAllocaMap.count(AI)) return 0;
 463
 464   EVT VT;
 465   if (!isTypeLegal(AI->getType(), VT)) return false;
 466
 467   DenseMap<const AllocaInst*, int>::iterator SI =
 468     FuncInfo.StaticAllocaMap.find(AI);
 469
 470   // This will get lowered later into the correct offsets and registers
 471   // via rewriteXFrameIndex.
 472   if (SI != FuncInfo.StaticAllocaMap.end()) {
 473     TargetRegisterClass* RC = TLI.getRegClassFor(VT);
 474     unsigned ResultReg = createResultReg(RC);
 475     unsigned Opc = isThumb ? ARM::t2ADDri : ARM::ADDri;
 476     AddOptionalDefs(BuildMI(*FuncInfo.MBB, *FuncInfo.InsertPt, DL,
 477                             TII.get(Opc), ResultReg)
 478                             .addFrameIndex(SI->second)
 479                             .addImm(0));
 480     return ResultReg;
 481   }
 482
 483   return 0;
 484 }
 485
 486 bool ARMFastISel::isTypeLegal(const Type *Ty, EVT &VT) {
 487   VT = TLI.getValueType(Ty, true);
 488
 489   // Only handle simple types.
 490   if (VT == MVT::Other || !VT.isSimple()) return false;
 491
 492   // Handle all legal types, i.e. a register that will directly hold this
 493   // value.
 494   return TLI.isTypeLegal(VT);
 495 }
 496
 497 bool ARMFastISel::isLoadTypeLegal(const Type *Ty, EVT &VT) {
 498   if (isTypeLegal(Ty, VT)) return true;
 499
 500   // If this is a type than can be sign or zero-extended to a basic operation
 501   // go ahead and accept it now.
 502   if (VT == MVT::i8 || VT == MVT::i16)
 503     return true;
 504
 505   return false;
 506 }
 507
 508 // Computes the Reg+Offset to get to an object.
 509 bool ARMFastISel::ARMComputeRegOffset(const Value *Obj, unsigned &Reg,
 510                                       int &Offset) {
 511   // Some boilerplate from the X86 FastISel.
 512   const User *U = NULL;
 513   unsigned Opcode = Instruction::UserOp1;
 514   if (const Instruction *I = dyn_cast<Instruction>(Obj)) {
 515     // Don't walk into other basic blocks; it's possible we haven't
 516     // visited them yet, so the instructions may not yet be assigned
 517     // virtual registers.
 518     if (FuncInfo.MBBMap[I->getParent()] != FuncInfo.MBB)
 519       return false;
 520     Opcode = I->getOpcode();
 521     U = I;
 522   } else if (const ConstantExpr *C = dyn_cast<ConstantExpr>(Obj)) {
 523     Opcode = C->getOpcode();
 524     U = C;
 525   }
 526
 527   if (const PointerType *Ty = dyn_cast<PointerType>(Obj->getType()))
 528     if (Ty->getAddressSpace() > 255)
 529       // Fast instruction selection doesn't support the special
 530       // address spaces.
 531       return false;
 532
 533   switch (Opcode) {
 534     default:
 535     break;
 536     case Instruction::Alloca: {
 537       assert(false && "Alloca should have been handled earlier!");
 538       return false;
 539     }
 540   }
 541
 542   // FIXME: Handle global variables.
 543   if (const GlobalValue *GV = dyn_cast<GlobalValue>(Obj)) {
 544     (void)GV;
 545     return false;
 546   }
 547
 548   // Try to get this in a register if nothing else has worked.
 549   Reg = getRegForValue(Obj);
 550   if (Reg == 0) return false;
 551
 552   // Since the offset may be too large for the load instruction
 553   // get the reg+offset into a register.
 554   // TODO: Verify the additions work, otherwise we'll need to add the
 555   // offset instead of 0 to the instructions and do all sorts of operand
 556   // munging.
 557   // TODO: Optimize this somewhat.
 558   if (Offset != 0) {
 559     ARMCC::CondCodes Pred = ARMCC::AL;
 560     unsigned PredReg = 0;
 561
 562     if (!isThumb)
 563       emitARMRegPlusImmediate(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
 564                               Reg, Reg, Offset, Pred, PredReg,
 565                               static_cast<const ARMBaseInstrInfo&>(TII));
 566     else {
 567       assert(AFI->isThumb2Function());
 568       emitT2RegPlusImmediate(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
 569                              Reg, Reg, Offset, Pred, PredReg,
 570                              static_cast<const ARMBaseInstrInfo&>(TII));
 571     }
 572   }
 573   return true;
 574 }
 575
 576 bool ARMFastISel::ARMLoadAlloca(const Instruction *I, EVT VT) {
 577   Value *Op0 = I->getOperand(0);
 578
 579   // Verify it's an alloca.
 580   if (const AllocaInst *AI = dyn_cast<AllocaInst>(Op0)) {
 581     DenseMap<const AllocaInst*, int>::iterator SI =
 582       FuncInfo.StaticAllocaMap.find(AI);
 583
 584     if (SI != FuncInfo.StaticAllocaMap.end()) {
 585       TargetRegisterClass* RC = TLI.getRegClassFor(VT);
 586       unsigned ResultReg = createResultReg(RC);
 587       TII.loadRegFromStackSlot(*FuncInfo.MBB, *FuncInfo.InsertPt,
 588                                ResultReg, SI->second, RC,
 589                                TM.getRegisterInfo());
 590       UpdateValueMap(I, ResultReg);
 591       return true;
 592     }
 593   }
 594   return false;
 595 }
 596
 597 bool ARMFastISel::ARMEmitLoad(EVT VT, unsigned &ResultReg,
 598                               unsigned Reg, int Offset) {
 599
 600   assert(VT.isSimple() && "Non-simple types are invalid here!");
 601   unsigned Opc;
 602   bool isFloat = false;
 603   switch (VT.getSimpleVT().SimpleTy) {
 604     default:
 605       // This is mostly going to be Neon/vector support.
 606       return false;
 607     case MVT::i16:
 608       Opc = isThumb ? ARM::tLDRH : ARM::LDRH;
 609       VT = MVT::i32;
 610       break;
 611     case MVT::i8:
 612       Opc = isThumb ? ARM::tLDRB : ARM::LDRB;
 613       VT = MVT::i32;
 614       break;
 615     case MVT::i32:
 616       Opc = isThumb ? ARM::tLDR : ARM::LDR;
 617       break;
 618     case MVT::f32:
 619       Opc = ARM::VLDRS;
 620       isFloat = true;
 621       break;
 622     case MVT::f64:
 623       Opc = ARM::VLDRD;
 624       isFloat = true;
 625       break;
 626   }
 627
 628   ResultReg = createResultReg(TLI.getRegClassFor(VT));
 629
 630   // TODO: Fix the Addressing modes so that these can share some code.
 631   // Since this is a Thumb1 load this will work in Thumb1 or 2 mode.
 632   // The thumb addressing mode has operands swapped from the arm addressing
 633   // mode, the floating point one only has two operands.
 634   if (isFloat)
 635     AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
 636                             TII.get(Opc), ResultReg)
 637                     .addReg(Reg).addImm(Offset));
 638   else if (isThumb)
 639     AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
 640                             TII.get(Opc), ResultReg)
 641                     .addReg(Reg).addImm(Offset).addReg(0));
 642   else
 643     AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
 644                             TII.get(Opc), ResultReg)
 645                     .addReg(Reg).addReg(0).addImm(Offset));
 646   return true;
 647 }
 648
 649 bool ARMFastISel::SelectLoad(const Instruction *I) {
 650   // Verify we have a legal type before going any further.
 651   EVT VT;
 652   if (!isLoadTypeLegal(I->getType(), VT))
 653     return false;
 654
 655   // If we're an alloca we know we have a frame index and can emit the load
 656   // directly in short order.
 657   if (ARMLoadAlloca(I, VT))
 658     return true;
 659
 660   // Our register and offset with innocuous defaults.
 661   unsigned Reg = 0;
 662   int Offset = 0;
 663
 664   // See if we can handle this as Reg + Offset
 665   if (!ARMComputeRegOffset(I->getOperand(0), Reg, Offset))
 666     return false;
 667
 668   unsigned ResultReg;
 669   if (!ARMEmitLoad(VT, ResultReg, Reg, Offset /* 0 */)) return false;
 670
 671   UpdateValueMap(I, ResultReg);
 672   return true;
 673 }
 674
 675 bool ARMFastISel::ARMStoreAlloca(const Instruction *I, unsigned SrcReg, EVT VT){
 676   Value *Op1 = I->getOperand(1);
 677
 678   // Verify it's an alloca.
 679   if (const AllocaInst *AI = dyn_cast<AllocaInst>(Op1)) {
 680     DenseMap<const AllocaInst*, int>::iterator SI =
 681       FuncInfo.StaticAllocaMap.find(AI);
 682
 683     if (SI != FuncInfo.StaticAllocaMap.end()) {
 684       TargetRegisterClass* RC = TLI.getRegClassFor(VT);
 685       assert(SrcReg != 0 && "Nothing to store!");
 686       TII.storeRegToStackSlot(*FuncInfo.MBB, *FuncInfo.InsertPt,
 687                               SrcReg, true /*isKill*/, SI->second, RC,
 688                               TM.getRegisterInfo());
 689       return true;
 690     }
 691   }
 692   return false;
 693 }
 694
 695 bool ARMFastISel::ARMEmitStore(EVT VT, unsigned SrcReg,
 696                                unsigned DstReg, int Offset) {
 697   unsigned StrOpc;
 698   bool isFloat = false;
 699   switch (VT.getSimpleVT().SimpleTy) {
 700     default: return false;
 701     case MVT::i1:
 702     case MVT::i8: StrOpc = isThumb ? ARM::tSTRB : ARM::STRB; break;
 703     case MVT::i16: StrOpc = isThumb ? ARM::tSTRH : ARM::STRH; break;
 704     case MVT::i32: StrOpc = isThumb ? ARM::tSTR : ARM::STR; break;
 705     case MVT::f32:
 706       if (!Subtarget->hasVFP2()) return false;
 707       StrOpc = ARM::VSTRS;
 708       isFloat = true;
 709       break;
 710     case MVT::f64:
 711       if (!Subtarget->hasVFP2()) return false;
 712       StrOpc = ARM::VSTRD;
 713       isFloat = true;
 714       break;
 715   }
 716
 717   // The thumb addressing mode has operands swapped from the arm addressing
 718   // mode, the floating point one only has two operands.
 719   if (isFloat)
 720     AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
 721                             TII.get(StrOpc), SrcReg)
 722                     .addReg(DstReg).addImm(Offset));
 723   else if (isThumb)
 724     AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
 725                             TII.get(StrOpc), SrcReg)
 726                     .addReg(DstReg).addImm(Offset).addReg(0));
 727
 728   else
 729     AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
 730                             TII.get(StrOpc), SrcReg)
 731                     .addReg(DstReg).addReg(0).addImm(Offset));
 732
 733   return true;
 734 }
 735
 736 bool ARMFastISel::SelectStore(const Instruction *I) {
 737   Value *Op0 = I->getOperand(0);
 738   unsigned SrcReg = 0;
 739
 740   // Yay type legalization
 741   EVT VT;
 742   if (!isLoadTypeLegal(I->getOperand(0)->getType(), VT))
 743     return false;
 744
 745   // Get the value to be stored into a register.
 746   SrcReg = getRegForValue(Op0);
 747   if (SrcReg == 0)
 748     return false;
 749
 750   // If we're an alloca we know we have a frame index and can emit the store
 751   // quickly.
 752   if (ARMStoreAlloca(I, SrcReg, VT))
 753     return true;
 754
 755   // Our register and offset with innocuous defaults.
 756   unsigned Reg = 0;
 757   int Offset = 0;
 758
 759   // See if we can handle this as Reg + Offset
 760   if (!ARMComputeRegOffset(I->getOperand(1), Reg, Offset))
 761     return false;
 762
 763   if (!ARMEmitStore(VT, SrcReg, Reg, Offset /* 0 */)) return false;
 764
 765   return true;
 766 }
 767
 768 static ARMCC::CondCodes getComparePred(CmpInst::Predicate Pred) {
 769   switch (Pred) {
 770     // Needs two compares...
 771     case CmpInst::FCMP_ONE:
 772     case CmpInst::FCMP_UEQ:
 773     default:
 774       assert(false && "Unhandled CmpInst::Predicate!");
 775       return ARMCC::AL;
 776     case CmpInst::ICMP_EQ:
 777     case CmpInst::FCMP_OEQ:
 778       return ARMCC::EQ;
 779     case CmpInst::ICMP_SGT:
 780     case CmpInst::FCMP_OGT:
 781       return ARMCC::GT;
 782     case CmpInst::ICMP_SGE:
 783     case CmpInst::FCMP_OGE:
 784       return ARMCC::GE;
 785     case CmpInst::ICMP_UGT:
 786     case CmpInst::FCMP_UGT:
 787       return ARMCC::HI;
 788     case CmpInst::FCMP_OLT:
 789       return ARMCC::MI;
 790     case CmpInst::ICMP_ULE:
 791     case CmpInst::FCMP_OLE:
 792       return ARMCC::LS;
 793     case CmpInst::FCMP_ORD:
 794       return ARMCC::VC;
 795     case CmpInst::FCMP_UNO:
 796       return ARMCC::VS;
 797     case CmpInst::FCMP_UGE:
 798       return ARMCC::PL;
 799     case CmpInst::ICMP_SLT:
 800     case CmpInst::FCMP_ULT:
 801       return ARMCC::LT;
 802     case CmpInst::ICMP_SLE:
 803     case CmpInst::FCMP_ULE:
 804       return ARMCC::LE;
 805     case CmpInst::FCMP_UNE:
 806     case CmpInst::ICMP_NE:
 807       return ARMCC::NE;
 808     case CmpInst::ICMP_UGE:
 809       return ARMCC::HS;
 810     case CmpInst::ICMP_ULT:
 811       return ARMCC::LO;
 812   }
 813 }
 814
 815 bool ARMFastISel::SelectBranch(const Instruction *I) {
 816   const BranchInst *BI = cast<BranchInst>(I);
 817   MachineBasicBlock *TBB = FuncInfo.MBBMap[BI->getSuccessor(0)];
 818   MachineBasicBlock *FBB = FuncInfo.MBBMap[BI->getSuccessor(1)];
 819
 820   // Simple branch support.
 821   // TODO: Try to avoid the re-computation in some places.
 822   unsigned CondReg = getRegForValue(BI->getCondition());
 823   if (CondReg == 0) return false;
 824
 825   // Re-set the flags just in case.
 826   unsigned CmpOpc = isThumb ? ARM::t2CMPri : ARM::CMPri;
 827   AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(CmpOpc))
 828                   .addReg(CondReg).addImm(1));
 829
 830   unsigned BrOpc = isThumb ? ARM::t2Bcc : ARM::Bcc;
 831   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(BrOpc))
 832                   .addMBB(TBB).addImm(ARMCC::EQ).addReg(ARM::CPSR);
 833   FastEmitBranch(FBB, DL);
 834   FuncInfo.MBB->addSuccessor(TBB);
 835   return true;
 836 }
 837
 838 bool ARMFastISel::SelectCmp(const Instruction *I) {
 839   const CmpInst *CI = cast<CmpInst>(I);
 840
 841   EVT VT;
 842   const Type *Ty = CI->getOperand(0)->getType();
 843   if (!isTypeLegal(Ty, VT))
 844     return false;
 845
 846   bool isFloat = (Ty->isDoubleTy() || Ty->isFloatTy());
 847   if (isFloat && !Subtarget->hasVFP2())
 848     return false;
 849
 850   unsigned CmpOpc;
 851   unsigned CondReg;
 852   switch (VT.getSimpleVT().SimpleTy) {
 853     default: return false;
 854     // TODO: Verify compares.
 855     case MVT::f32:
 856       CmpOpc = ARM::VCMPES;
 857       CondReg = ARM::FPSCR;
 858       break;
 859     case MVT::f64:
 860       CmpOpc = ARM::VCMPED;
 861       CondReg = ARM::FPSCR;
 862       break;
 863     case MVT::i32:
 864       CmpOpc = isThumb ? ARM::t2CMPrr : ARM::CMPrr;
 865       CondReg = ARM::CPSR;
 866       break;
 867   }
 868
 869   // Get the compare predicate.
 870   ARMCC::CondCodes ARMPred = getComparePred(CI->getPredicate());
 871
 872   // We may not handle every CC for now.
 873   if (ARMPred == ARMCC::AL) return false;
 874
 875   unsigned Arg1 = getRegForValue(CI->getOperand(0));
 876   if (Arg1 == 0) return false;
 877
 878   unsigned Arg2 = getRegForValue(CI->getOperand(1));
 879   if (Arg2 == 0) return false;
 880
 881   AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(CmpOpc))
 882                   .addReg(Arg1).addReg(Arg2));
 883
 884   // For floating point we need to move the result to a comparison register
 885   // that we can then use for branches.
 886   if (isFloat)
 887     AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
 888                             TII.get(ARM::FMSTAT)));
 889
 890   // Now set a register based on the comparison. Explicitly set the predicates
 891   // here.
 892   unsigned MovCCOpc = isThumb ? ARM::tMOVCCi : ARM::MOVCCi;
 893   unsigned DestReg = createResultReg(ARM::GPRRegisterClass);
 894   Constant *Zero
 895     = ConstantInt::get(Type::getInt32Ty(*Context), 0);
 896   unsigned ZeroReg = TargetMaterializeConstant(Zero);
 897   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(MovCCOpc), DestReg)
 898           .addReg(ZeroReg).addImm(1)
 899           .addImm(ARMPred).addReg(CondReg);
 900
 901   UpdateValueMap(I, DestReg);
 902   return true;
 903 }
 904
 905 bool ARMFastISel::SelectFPExt(const Instruction *I) {
 906   // Make sure we have VFP and that we're extending float to double.
 907   if (!Subtarget->hasVFP2()) return false;
 908
 909   Value *V = I->getOperand(0);
 910   if (!I->getType()->isDoubleTy() ||
 911       !V->getType()->isFloatTy()) return false;
 912
 913   unsigned Op = getRegForValue(V);
 914   if (Op == 0) return false;
 915
 916   unsigned Result = createResultReg(ARM::DPRRegisterClass);
 917   AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
 918                           TII.get(ARM::VCVTDS), Result)
 919                   .addReg(Op));
 920   UpdateValueMap(I, Result);
 921   return true;
 922 }
 923
 924 bool ARMFastISel::SelectFPTrunc(const Instruction *I) {
 925   // Make sure we have VFP and that we're truncating double to float.
 926   if (!Subtarget->hasVFP2()) return false;
 927
 928   Value *V = I->getOperand(0);
 929   if (!I->getType()->isFloatTy() ||
 930       !V->getType()->isDoubleTy()) return false;
 931
 932   unsigned Op = getRegForValue(V);
 933   if (Op == 0) return false;
 934
 935   unsigned Result = createResultReg(ARM::SPRRegisterClass);
 936   AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
 937                           TII.get(ARM::VCVTSD), Result)
 938                   .addReg(Op));
 939   UpdateValueMap(I, Result);
 940   return true;
 941 }
 942
 943 bool ARMFastISel::SelectSIToFP(const Instruction *I) {
 944   // Make sure we have VFP.
 945   if (!Subtarget->hasVFP2()) return false;
 946
 947   EVT DstVT;
 948   const Type *Ty = I->getType();
 949   if (!isTypeLegal(Ty, DstVT))
 950     return false;
 951
 952   unsigned Op = getRegForValue(I->getOperand(0));
 953   if (Op == 0) return false;
 954
 955   // The conversion routine works on fp-reg to fp-reg and the operand above
 956   // was an integer, move it to the fp registers if possible.
 957   unsigned FP = ARMMoveToFPReg(DstVT, Op);
 958   if (FP == 0) return false;
 959
 960   unsigned Opc;
 961   if (Ty->isFloatTy()) Opc = ARM::VSITOS;
 962   else if (Ty->isDoubleTy()) Opc = ARM::VSITOD;
 963   else return 0;
 964
 965   unsigned ResultReg = createResultReg(TLI.getRegClassFor(DstVT));
 966   AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc),
 967                           ResultReg)
 968                   .addReg(FP));
 969   UpdateValueMap(I, ResultReg);
 970   return true;
 971 }
 972
 973 bool ARMFastISel::SelectFPToSI(const Instruction *I) {
 974   // Make sure we have VFP.
 975   if (!Subtarget->hasVFP2()) return false;
 976
 977   EVT DstVT;
 978   const Type *RetTy = I->getType();
 979   if (!isTypeLegal(RetTy, DstVT))
 980     return false;
 981
 982   unsigned Op = getRegForValue(I->getOperand(0));
 983   if (Op == 0) return false;
 984
 985   unsigned Opc;
 986   const Type *OpTy = I->getOperand(0)->getType();
 987   if (OpTy->isFloatTy()) Opc = ARM::VTOSIZS;
 988   else if (OpTy->isDoubleTy()) Opc = ARM::VTOSIZD;
 989   else return 0;
 990   EVT OpVT = TLI.getValueType(OpTy, true);
 991
 992   unsigned ResultReg = createResultReg(TLI.getRegClassFor(OpVT));
 993   AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc),
 994                           ResultReg)
 995                   .addReg(Op));
 996
 997   // This result needs to be in an integer register, but the conversion only
 998   // takes place in fp-regs.
 999   unsigned IntReg = ARMMoveToIntReg(DstVT, ResultReg);
1000   if (IntReg == 0) return false;
1001
1002   UpdateValueMap(I, IntReg);
1003   return true;
1004 }
1005
1006 bool ARMFastISel::SelectBinaryOp(const Instruction *I, unsigned ISDOpcode) {
1007   EVT VT  = TLI.getValueType(I->getType(), true);
1008
1009   // We can get here in the case when we want to use NEON for our fp
1010   // operations, but can't figure out how to. Just use the vfp instructions
1011   // if we have them.
1012   // FIXME: It'd be nice to use NEON instructions.
1013   const Type *Ty = I->getType();
1014   bool isFloat = (Ty->isDoubleTy() || Ty->isFloatTy());
1015   if (isFloat && !Subtarget->hasVFP2())
1016     return false;
1017
1018   unsigned Op1 = getRegForValue(I->getOperand(0));
1019   if (Op1 == 0) return false;
1020
1021   unsigned Op2 = getRegForValue(I->getOperand(1));
1022   if (Op2 == 0) return false;
1023
1024   unsigned Opc;
1025   bool is64bit = VT.getSimpleVT().SimpleTy == MVT::f64 ||
1026                  VT.getSimpleVT().SimpleTy == MVT::i64;
1027   switch (ISDOpcode) {
1028     default: return false;
1029     case ISD::FADD:
1030       Opc = is64bit ? ARM::VADDD : ARM::VADDS;
1031       break;
1032     case ISD::FSUB:
1033       Opc = is64bit ? ARM::VSUBD : ARM::VSUBS;
1034       break;
1035     case ISD::FMUL:
1036       Opc = is64bit ? ARM::VMULD : ARM::VMULS;
1037       break;
1038   }
1039   unsigned ResultReg = createResultReg(TLI.getRegClassFor(VT));
1040   AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
1041                           TII.get(Opc), ResultReg)
1042                   .addReg(Op1).addReg(Op2));
1043   UpdateValueMap(I, ResultReg);
1044   return true;
1045 }
1046
1047 // Call Handling Code
1048
1049 // This is largely taken directly from CCAssignFnForNode - we don't support
1050 // varargs in FastISel so that part has been removed.
1051 // TODO: We may not support all of this.
1052 CCAssignFn *ARMFastISel::CCAssignFnForCall(CallingConv::ID CC, bool Return) {
1053   switch (CC) {
1054   default:
1055     llvm_unreachable("Unsupported calling convention");
1056   case CallingConv::C:
1057   case CallingConv::Fast:
1058     // Use target triple & subtarget features to do actual dispatch.
1059     if (Subtarget->isAAPCS_ABI()) {
1060       if (Subtarget->hasVFP2() &&
1061           FloatABIType == FloatABI::Hard)
1062         return (Return ? RetCC_ARM_AAPCS_VFP: CC_ARM_AAPCS_VFP);
1063       else
1064         return (Return ? RetCC_ARM_AAPCS: CC_ARM_AAPCS);
1065     } else
1066         return (Return ? RetCC_ARM_APCS: CC_ARM_APCS);
1067   case CallingConv::ARM_AAPCS_VFP:
1068     return (Return ? RetCC_ARM_AAPCS_VFP: CC_ARM_AAPCS_VFP);
1069   case CallingConv::ARM_AAPCS:
1070     return (Return ? RetCC_ARM_AAPCS: CC_ARM_AAPCS);
1071   case CallingConv::ARM_APCS:
1072     return (Return ? RetCC_ARM_APCS: CC_ARM_APCS);
1073   }
1074 }
1075
1076 bool ARMFastISel::ProcessCallArgs(SmallVectorImpl<Value*> &Args,
1077                                   SmallVectorImpl<unsigned> &ArgRegs,
1078                                   SmallVectorImpl<EVT> &ArgVTs,
1079                                   SmallVectorImpl<ISD::ArgFlagsTy> &ArgFlags,
1080                                   SmallVectorImpl<unsigned> &RegArgs,
1081                                   CallingConv::ID CC,
1082                                   unsigned &NumBytes) {
1083   SmallVector<CCValAssign, 16> ArgLocs;
1084   CCState CCInfo(CC, false, TM, ArgLocs, *Context);
1085   CCInfo.AnalyzeCallOperands(ArgVTs, ArgFlags, CCAssignFnForCall(CC, false));
1086
1087   // Get a count of how many bytes are to be pushed on the stack.
1088   NumBytes = CCInfo.getNextStackOffset();
1089
1090   // Issue CALLSEQ_START
1091   unsigned AdjStackDown = TM.getRegisterInfo()->getCallFrameSetupOpcode();
1092   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(AdjStackDown))
1093           .addImm(NumBytes);
1094
1095   // Process the args.
1096   for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
1097     CCValAssign &VA = ArgLocs[i];
1098     unsigned Arg = ArgRegs[VA.getValNo()];
1099     EVT ArgVT = ArgVTs[VA.getValNo()];
1100
1101     // Handle arg promotion, etc.
1102     switch (VA.getLocInfo()) {
1103       case CCValAssign::Full: break;
1104       default:
1105       assert(false && "Handle arg promotion.");
1106       return false;
1107     }
1108
1109     // Now copy/store arg to correct locations.
1110     if (VA.isRegLoc()) {
1111       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY),
1112               VA.getLocReg())
1113       .addReg(Arg);
1114       RegArgs.push_back(VA.getLocReg());
1115     } else {
1116       // Need to store
1117       return false;
1118     }
1119   }
1120
1121   return true;
1122 }
1123
1124 bool ARMFastISel::FinishCall(EVT RetVT, SmallVectorImpl<unsigned> &UsedRegs,
1125                              const Instruction *I, CallingConv::ID CC,
1126                              unsigned &NumBytes) {
1127   // Issue CALLSEQ_END
1128   unsigned AdjStackUp = TM.getRegisterInfo()->getCallFrameDestroyOpcode();
1129   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(AdjStackUp))
1130           .addImm(NumBytes).addImm(0);
1131
1132   // Now the return value.
1133   if (RetVT.getSimpleVT().SimpleTy != MVT::isVoid) {
1134     SmallVector<CCValAssign, 16> RVLocs;
1135     CCState CCInfo(CC, false, TM, RVLocs, *Context);
1136     CCInfo.AnalyzeCallResult(RetVT, CCAssignFnForCall(CC, true));
1137
1138     // Copy all of the result registers out of their specified physreg.
1139     assert(RVLocs.size() == 1 && "Can't handle multi-value calls!");
1140     EVT CopyVT = RVLocs[0].getValVT();
1141     TargetRegisterClass* DstRC = TLI.getRegClassFor(CopyVT);
1142
1143     unsigned ResultReg = createResultReg(DstRC);
1144     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY),
1145             ResultReg).addReg(RVLocs[0].getLocReg());
1146     UsedRegs.push_back(RVLocs[0].getLocReg());
1147
1148     // Finally update the result.
1149     UpdateValueMap(I, ResultReg);
1150   }
1151
1152   return true;
1153 }
1154
1155 // A quick function that will emit a call for a named libcall in F with the
1156 // vector of passed arguments for the Instruction in I. We can assume that we
1157 // can emit a call for any libcall we can produce. This is an abridged version
1158 // of the full call infrastructure since we won't need to worry about things
1159 // like computed function pointers or strange arguments at call sites.
1160 // TODO: Try to unify this and the normal call bits for ARM, then try to unify
1161 // with X86.
1162 bool ARMFastISel::ARMEmitLibcall(const Instruction *I, RTLIB::Libcall Call) {
1163   CallingConv::ID CC = TLI.getLibcallCallingConv(Call);
1164
1165   // Handle *simple* calls for now.
1166   const Type *RetTy = I->getType();
1167   EVT RetVT;
1168   if (RetTy->isVoidTy())
1169     RetVT = MVT::isVoid;
1170   else if (!isTypeLegal(RetTy, RetVT))
1171     return false;
1172
1173   // For now we're using BLX etc on the assumption that we have v5t ops.
1174   if (!Subtarget->hasV5TOps()) return false;
1175
1176   // Set up the argument vectors.
1177   SmallVector<Value*, 8> Args;
1178   SmallVector<unsigned, 8> ArgRegs;
1179   SmallVector<EVT, 8> ArgVTs;
1180   SmallVector<ISD::ArgFlagsTy, 8> ArgFlags;
1181   Args.reserve(I->getNumOperands());
1182   ArgRegs.reserve(I->getNumOperands());
1183   ArgVTs.reserve(I->getNumOperands());
1184   ArgFlags.reserve(I->getNumOperands());
1185   for (unsigned i = 0; i < I->getNumOperands(); ++i) {
1186     Value *Op = I->getOperand(i);
1187     unsigned Arg = getRegForValue(Op);
1188     if (Arg == 0) return false;
1189
1190     const Type *ArgTy = Op->getType();
1191     EVT ArgVT;
1192     if (!isTypeLegal(ArgTy, ArgVT)) return false;
1193
1194     ISD::ArgFlagsTy Flags;
1195     unsigned OriginalAlignment = TD.getABITypeAlignment(ArgTy);
1196     Flags.setOrigAlign(OriginalAlignment);
1197
1198     Args.push_back(Op);
1199     ArgRegs.push_back(Arg);
1200     ArgVTs.push_back(ArgVT);
1201     ArgFlags.push_back(Flags);
1202   }
1203
1204   // Handle the arguments now that we've gotten them.
1205   SmallVector<unsigned, 4> RegArgs;
1206   unsigned NumBytes;
1207   if (!ProcessCallArgs(Args, ArgRegs, ArgVTs, ArgFlags, RegArgs, CC, NumBytes))
1208     return false;
1209
1210   // Issue the call, BLXr9 for darwin, BLX otherwise. This uses V5 ops.
1211   // TODO: Turn this into the table of arm call ops.
1212   MachineInstrBuilder MIB;
1213   unsigned CallOpc;
1214   if(isThumb)
1215     CallOpc = Subtarget->isTargetDarwin() ? ARM::tBLXi_r9 : ARM::tBLXi;
1216   else
1217     CallOpc = Subtarget->isTargetDarwin() ? ARM::BLr9 : ARM::BL;
1218   MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(CallOpc))
1219         .addExternalSymbol(TLI.getLibcallName(Call));
1220
1221   // Add implicit physical register uses to the call.
1222   for (unsigned i = 0, e = RegArgs.size(); i != e; ++i)
1223     MIB.addReg(RegArgs[i]);
1224
1225   // Finish off the call including any return values.
1226   SmallVector<unsigned, 4> UsedRegs;
1227   if (!FinishCall(RetVT, UsedRegs, I, CC, NumBytes)) return false;
1228
1229   // Set all unused physreg defs as dead.
1230   static_cast<MachineInstr *>(MIB)->setPhysRegsDeadExcept(UsedRegs, TRI);
1231
1232   return true;
1233 }
1234
1235 bool ARMFastISel::SelectSDiv(const Instruction *I) {
1236   EVT VT;
1237   const Type *Ty = I->getType();
1238   if (!isTypeLegal(Ty, VT))
1239     return false;
1240
1241   // If we have integer div support we should have selected this automagically.
1242   // In case we have a real miss go ahead and return false and we'll pick
1243   // it up later.
1244   if (Subtarget->hasDivide()) return false;
1245
1246   // Otherwise emit a libcall.
1247   RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL;
1248   if (VT == MVT::i16)
1249     LC = RTLIB::SDIV_I16;
1250   else if (VT == MVT::i32)
1251     LC = RTLIB::SDIV_I32;
1252   else if (VT == MVT::i64)
1253     LC = RTLIB::SDIV_I64;
1254   else if (VT == MVT::i128)
1255     LC = RTLIB::SDIV_I128;
1256   assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported SDIV!");
1257
1258   return ARMEmitLibcall(I, LC);
1259 }
1260
1261 bool ARMFastISel::SelectCall(const Instruction *I) {
1262   const CallInst *CI = cast<CallInst>(I);
1263   const Value *Callee = CI->getCalledValue();
1264
1265   // Can't handle inline asm or worry about intrinsics yet.
1266   if (isa<InlineAsm>(Callee) || isa<IntrinsicInst>(CI)) return false;
1267
1268   // Only handle global variable Callees
1269   const GlobalValue *GV = dyn_cast<GlobalValue>(Callee);
1270   if (!GV) return false;
1271
1272   // Check the calling convention.
1273   ImmutableCallSite CS(CI);
1274   CallingConv::ID CC = CS.getCallingConv();
1275   // TODO: Avoid some calling conventions?
1276   if (CC != CallingConv::C) {
1277     errs() << "Can't handle calling convention: " << CC << "\n";
1278     return false;
1279   }
1280
1281   // Let SDISel handle vararg functions.
1282   const PointerType *PT = cast<PointerType>(CS.getCalledValue()->getType());
1283   const FunctionType *FTy = cast<FunctionType>(PT->getElementType());
1284   if (FTy->isVarArg())
1285     return false;
1286
1287   // Handle *simple* calls for now.
1288   const Type *RetTy = I->getType();
1289   EVT RetVT;
1290   if (RetTy->isVoidTy())
1291     RetVT = MVT::isVoid;
1292   else if (!isTypeLegal(RetTy, RetVT))
1293     return false;
1294
1295   // For now we're using BLX etc on the assumption that we have v5t ops.
1296   // TODO: Maybe?
1297   if (!Subtarget->hasV5TOps()) return false;
1298
1299   // Set up the argument vectors.
1300   SmallVector<Value*, 8> Args;
1301   SmallVector<unsigned, 8> ArgRegs;
1302   SmallVector<EVT, 8> ArgVTs;
1303   SmallVector<ISD::ArgFlagsTy, 8> ArgFlags;
1304   Args.reserve(CS.arg_size());
1305   ArgRegs.reserve(CS.arg_size());
1306   ArgVTs.reserve(CS.arg_size());
1307   ArgFlags.reserve(CS.arg_size());
1308   for (ImmutableCallSite::arg_iterator i = CS.arg_begin(), e = CS.arg_end();
1309        i != e; ++i) {
1310     unsigned Arg = getRegForValue(*i);
1311
1312     if (Arg == 0)
1313       return false;
1314     ISD::ArgFlagsTy Flags;
1315     unsigned AttrInd = i - CS.arg_begin() + 1;
1316     if (CS.paramHasAttr(AttrInd, Attribute::SExt))
1317       Flags.setSExt();
1318     if (CS.paramHasAttr(AttrInd, Attribute::ZExt))
1319       Flags.setZExt();
1320
1321          // FIXME: Only handle *easy* calls for now.
1322     if (CS.paramHasAttr(AttrInd, Attribute::InReg) ||
1323         CS.paramHasAttr(AttrInd, Attribute::StructRet) ||
1324         CS.paramHasAttr(AttrInd, Attribute::Nest) ||
1325         CS.paramHasAttr(AttrInd, Attribute::ByVal))
1326       return false;
1327
1328     const Type *ArgTy = (*i)->getType();
1329     EVT ArgVT;
1330     if (!isTypeLegal(ArgTy, ArgVT))
1331       return false;
1332     unsigned OriginalAlignment = TD.getABITypeAlignment(ArgTy);
1333     Flags.setOrigAlign(OriginalAlignment);
1334
1335     Args.push_back(*i);
1336     ArgRegs.push_back(Arg);
1337     ArgVTs.push_back(ArgVT);
1338     ArgFlags.push_back(Flags);
1339   }
1340
1341   // Handle the arguments now that we've gotten them.
1342   SmallVector<unsigned, 4> RegArgs;
1343   unsigned NumBytes;
1344   if (!ProcessCallArgs(Args, ArgRegs, ArgVTs, ArgFlags, RegArgs, CC, NumBytes))
1345     return false;
1346
1347   // Issue the call, BLXr9 for darwin, BLX otherwise. This uses V5 ops.
1348   // TODO: Turn this into the table of arm call ops.
1349   MachineInstrBuilder MIB;
1350   unsigned CallOpc;
1351   if(isThumb)
1352     CallOpc = Subtarget->isTargetDarwin() ? ARM::tBLXi_r9 : ARM::tBLXi;
1353   else
1354     CallOpc = Subtarget->isTargetDarwin() ? ARM::BLr9 : ARM::BL;
1355   MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(CallOpc))
1356               .addGlobalAddress(GV, 0, 0);
1357
1358   // Add implicit physical register uses to the call.
1359   for (unsigned i = 0, e = RegArgs.size(); i != e; ++i)
1360     MIB.addReg(RegArgs[i]);
1361
1362   // Finish off the call including any return values.
1363   SmallVector<unsigned, 4> UsedRegs;
1364   if (!FinishCall(RetVT, UsedRegs, I, CC, NumBytes)) return false;
1365
1366   // Set all unused physreg defs as dead.
1367   static_cast<MachineInstr *>(MIB)->setPhysRegsDeadExcept(UsedRegs, TRI);
1368
1369   return true;
1370
1371 }
1372
1373 // TODO: SoftFP support.
1374 bool ARMFastISel::TargetSelectInstruction(const Instruction *I) {
1375   // No Thumb-1 for now.
1376   if (isThumb && !AFI->isThumb2Function()) return false;
1377
1378   switch (I->getOpcode()) {
1379     case Instruction::Load:
1380       return SelectLoad(I);
1381     case Instruction::Store:
1382       return SelectStore(I);
1383     case Instruction::Br:
1384       return SelectBranch(I);
1385     case Instruction::ICmp:
1386     case Instruction::FCmp:
1387       return SelectCmp(I);
1388     case Instruction::FPExt:
1389       return SelectFPExt(I);
1390     case Instruction::FPTrunc:
1391       return SelectFPTrunc(I);
1392     case Instruction::SIToFP:
1393       return SelectSIToFP(I);
1394     case Instruction::FPToSI:
1395       return SelectFPToSI(I);
1396     case Instruction::FAdd:
1397       return SelectBinaryOp(I, ISD::FADD);
1398     case Instruction::FSub:
1399       return SelectBinaryOp(I, ISD::FSUB);
1400     case Instruction::FMul:
1401       return SelectBinaryOp(I, ISD::FMUL);
1402     case Instruction::SDiv:
1403       return SelectSDiv(I);
1404     case Instruction::Call:
1405       return SelectCall(I);
1406     default: break;
1407   }
1408   return false;
1409 }
1410
1411 namespace llvm {
1412   llvm::FastISel *ARM::createFastISel(FunctionLoweringInfo &funcInfo) {
1413     if (EnableARMFastISel) return new ARMFastISel(funcInfo);
1414     return 0;
1415   }
1416 }