lib/Target/ARM/ARMFastISel.cpp

   1 //===-- ARMFastISel.cpp - ARM FastISel implementation ---------------------===//
   2 //
   3 //                     The LLVM Compiler Infrastructure
   4 //
   5 // This file is distributed under the University of Illinois Open Source
   6 // License. See LICENSE.TXT for details.
   7 //
   8 //===----------------------------------------------------------------------===//
   9 //
  10 // This file defines the ARM-specific support for the FastISel class. Some
  11 // of the target-specific code is generated by tablegen in the file
  12 // ARMGenFastISel.inc, which is #included here.
  13 //
  14 //===----------------------------------------------------------------------===//
  15
  16 #include "ARM.h"
  17 #include "ARMBaseInstrInfo.h"
  18 #include "ARMCallingConv.h"
  19 #include "ARMRegisterInfo.h"
  20 #include "ARMTargetMachine.h"
  21 #include "ARMSubtarget.h"
  22 #include "llvm/CallingConv.h"
  23 #include "llvm/DerivedTypes.h"
  24 #include "llvm/GlobalVariable.h"
  25 #include "llvm/Instructions.h"
  26 #include "llvm/IntrinsicInst.h"
  27 #include "llvm/Module.h"
  28 #include "llvm/CodeGen/Analysis.h"
  29 #include "llvm/CodeGen/FastISel.h"
  30 #include "llvm/CodeGen/FunctionLoweringInfo.h"
  31 #include "llvm/CodeGen/MachineInstrBuilder.h"
  32 #include "llvm/CodeGen/MachineModuleInfo.h"
  33 #include "llvm/CodeGen/MachineConstantPool.h"
  34 #include "llvm/CodeGen/MachineFrameInfo.h"
  35 #include "llvm/CodeGen/MachineRegisterInfo.h"
  36 #include "llvm/Support/CallSite.h"
  37 #include "llvm/Support/CommandLine.h"
  38 #include "llvm/Support/ErrorHandling.h"
  39 #include "llvm/Support/GetElementPtrTypeIterator.h"
  40 #include "llvm/Target/TargetData.h"
  41 #include "llvm/Target/TargetInstrInfo.h"
  42 #include "llvm/Target/TargetLowering.h"
  43 #include "llvm/Target/TargetMachine.h"
  44 #include "llvm/Target/TargetOptions.h"
  45 using namespace llvm;
  46
  47 static cl::opt<bool>
  48 EnableARMFastISel("arm-fast-isel",
  49                   cl::desc("Turn on experimental ARM fast-isel support"),
  50                   cl::init(false), cl::Hidden);
  51
  52 namespace {
  53
  54 class ARMFastISel : public FastISel {
  55
  56   /// Subtarget - Keep a pointer to the ARMSubtarget around so that we can
  57   /// make the right decision when generating code for different targets.
  58   const ARMSubtarget *Subtarget;
  59   const TargetMachine &TM;
  60   const TargetInstrInfo &TII;
  61   const TargetLowering &TLI;
  62   const ARMFunctionInfo *AFI;
  63
  64   // Convenience variables to avoid some queries.
  65   bool isThumb;
  66   LLVMContext *Context;
  67
  68   public:
  69     explicit ARMFastISel(FunctionLoweringInfo &funcInfo)
  70     : FastISel(funcInfo),
  71       TM(funcInfo.MF->getTarget()),
  72       TII(*TM.getInstrInfo()),
  73       TLI(*TM.getTargetLowering()) {
  74       Subtarget = &TM.getSubtarget<ARMSubtarget>();
  75       AFI = funcInfo.MF->getInfo<ARMFunctionInfo>();
  76       isThumb = AFI->isThumbFunction();
  77       Context = &funcInfo.Fn->getContext();
  78     }
  79
  80     // Code from FastISel.cpp.
  81     virtual unsigned FastEmitInst_(unsigned MachineInstOpcode,
  82                                    const TargetRegisterClass *RC);
  83     virtual unsigned FastEmitInst_r(unsigned MachineInstOpcode,
  84                                     const TargetRegisterClass *RC,
  85                                     unsigned Op0, bool Op0IsKill);
  86     virtual unsigned FastEmitInst_rr(unsigned MachineInstOpcode,
  87                                      const TargetRegisterClass *RC,
  88                                      unsigned Op0, bool Op0IsKill,
  89                                      unsigned Op1, bool Op1IsKill);
  90     virtual unsigned FastEmitInst_ri(unsigned MachineInstOpcode,
  91                                      const TargetRegisterClass *RC,
  92                                      unsigned Op0, bool Op0IsKill,
  93                                      uint64_t Imm);
  94     virtual unsigned FastEmitInst_rf(unsigned MachineInstOpcode,
  95                                      const TargetRegisterClass *RC,
  96                                      unsigned Op0, bool Op0IsKill,
  97                                      const ConstantFP *FPImm);
  98     virtual unsigned FastEmitInst_i(unsigned MachineInstOpcode,
  99                                     const TargetRegisterClass *RC,
 100                                     uint64_t Imm);
 101     virtual unsigned FastEmitInst_rri(unsigned MachineInstOpcode,
 102                                       const TargetRegisterClass *RC,
 103                                       unsigned Op0, bool Op0IsKill,
 104                                       unsigned Op1, bool Op1IsKill,
 105                                       uint64_t Imm);
 106     virtual unsigned FastEmitInst_extractsubreg(MVT RetVT,
 107                                                 unsigned Op0, bool Op0IsKill,
 108                                                 uint32_t Idx);
 109
 110     // Backend specific FastISel code.
 111     virtual bool TargetSelectInstruction(const Instruction *I);
 112     virtual unsigned TargetMaterializeConstant(const Constant *C);
 113     virtual unsigned TargetMaterializeAlloca(const AllocaInst *AI);
 114
 115   #include "ARMGenFastISel.inc"
 116
 117     // Instruction selection routines.
 118   private:
 119     virtual bool SelectLoad(const Instruction *I);
 120     virtual bool SelectStore(const Instruction *I);
 121     virtual bool SelectBranch(const Instruction *I);
 122     virtual bool SelectCmp(const Instruction *I);
 123     virtual bool SelectFPExt(const Instruction *I);
 124     virtual bool SelectFPTrunc(const Instruction *I);
 125     virtual bool SelectBinaryOp(const Instruction *I, unsigned ISDOpcode);
 126     virtual bool SelectSIToFP(const Instruction *I);
 127     virtual bool SelectFPToSI(const Instruction *I);
 128     virtual bool SelectSDiv(const Instruction *I);
 129     virtual bool SelectCall(const Instruction *I);
 130
 131     // Utility routines.
 132   private:
 133     bool isTypeLegal(const Type *Ty, EVT &VT);
 134     bool isLoadTypeLegal(const Type *Ty, EVT &VT);
 135     bool ARMEmitLoad(EVT VT, unsigned &ResultReg, unsigned Reg, int Offset);
 136     bool ARMEmitStore(EVT VT, unsigned SrcReg, unsigned Reg, int Offset);
 137     bool ARMLoadAlloca(const Instruction *I, EVT VT);
 138     bool ARMStoreAlloca(const Instruction *I, unsigned SrcReg, EVT VT);
 139     bool ARMComputeRegOffset(const Value *Obj, unsigned &Reg, int &Offset);
 140     unsigned ARMMaterializeFP(const ConstantFP *CFP, EVT VT);
 141     unsigned ARMMaterializeInt(const Constant *C, EVT VT);
 142     unsigned ARMMoveToFPReg(EVT VT, unsigned SrcReg);
 143     unsigned ARMMoveToIntReg(EVT VT, unsigned SrcReg);
 144
 145     // Call handling routines.
 146   private:
 147     CCAssignFn *CCAssignFnForCall(CallingConv::ID CC, bool Return);
 148     bool ProcessCallArgs(SmallVectorImpl<Value*> &Args,
 149                          SmallVectorImpl<unsigned> &ArgRegs,
 150                          SmallVectorImpl<EVT> &ArgVTs,
 151                          SmallVectorImpl<ISD::ArgFlagsTy> &ArgFlags,
 152                          SmallVectorImpl<unsigned> &RegArgs,
 153                          CallingConv::ID CC,
 154                          unsigned &NumBytes);
 155     bool FinishCall(EVT RetVT, SmallVectorImpl<unsigned> &UsedRegs,
 156                     const Instruction *I, CallingConv::ID CC,
 157                     unsigned &NumBytes);
 158     bool ARMEmitLibcall(const Instruction *I, RTLIB::Libcall Call);
 159
 160     // OptionalDef handling routines.
 161   private:
 162     bool DefinesOptionalPredicate(MachineInstr *MI, bool *CPSR);
 163     const MachineInstrBuilder &AddOptionalDefs(const MachineInstrBuilder &MIB);
 164 };
 165
 166 } // end anonymous namespace
 167
 168 #include "ARMGenCallingConv.inc"
 169
 170 // DefinesOptionalPredicate - This is different from DefinesPredicate in that
 171 // we don't care about implicit defs here, just places we'll need to add a
 172 // default CCReg argument. Sets CPSR if we're setting CPSR instead of CCR.
 173 bool ARMFastISel::DefinesOptionalPredicate(MachineInstr *MI, bool *CPSR) {
 174   const TargetInstrDesc &TID = MI->getDesc();
 175   if (!TID.hasOptionalDef())
 176     return false;
 177
 178   // Look to see if our OptionalDef is defining CPSR or CCR.
 179   for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
 180     const MachineOperand &MO = MI->getOperand(i);
 181     if (!MO.isReg() || !MO.isDef()) continue;
 182     if (MO.getReg() == ARM::CPSR)
 183       *CPSR = true;
 184   }
 185   return true;
 186 }
 187
 188 // If the machine is predicable go ahead and add the predicate operands, if
 189 // it needs default CC operands add those.
 190 const MachineInstrBuilder &
 191 ARMFastISel::AddOptionalDefs(const MachineInstrBuilder &MIB) {
 192   MachineInstr *MI = &*MIB;
 193
 194   // Do we use a predicate?
 195   if (TII.isPredicable(MI))
 196     AddDefaultPred(MIB);
 197
 198   // Do we optionally set a predicate?  Preds is size > 0 iff the predicate
 199   // defines CPSR. All other OptionalDefines in ARM are the CCR register.
 200   bool CPSR = false;
 201   if (DefinesOptionalPredicate(MI, &CPSR)) {
 202     if (CPSR)
 203       AddDefaultT1CC(MIB);
 204     else
 205       AddDefaultCC(MIB);
 206   }
 207   return MIB;
 208 }
 209
 210 unsigned ARMFastISel::FastEmitInst_(unsigned MachineInstOpcode,
 211                                     const TargetRegisterClass* RC) {
 212   unsigned ResultReg = createResultReg(RC);
 213   const TargetInstrDesc &II = TII.get(MachineInstOpcode);
 214
 215   AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg));
 216   return ResultReg;
 217 }
 218
 219 unsigned ARMFastISel::FastEmitInst_r(unsigned MachineInstOpcode,
 220                                      const TargetRegisterClass *RC,
 221                                      unsigned Op0, bool Op0IsKill) {
 222   unsigned ResultReg = createResultReg(RC);
 223   const TargetInstrDesc &II = TII.get(MachineInstOpcode);
 224
 225   if (II.getNumDefs() >= 1)
 226     AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg)
 227                    .addReg(Op0, Op0IsKill * RegState::Kill));
 228   else {
 229     AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II)
 230                    .addReg(Op0, Op0IsKill * RegState::Kill));
 231     AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
 232                    TII.get(TargetOpcode::COPY), ResultReg)
 233                    .addReg(II.ImplicitDefs[0]));
 234   }
 235   return ResultReg;
 236 }
 237
 238 unsigned ARMFastISel::FastEmitInst_rr(unsigned MachineInstOpcode,
 239                                       const TargetRegisterClass *RC,
 240                                       unsigned Op0, bool Op0IsKill,
 241                                       unsigned Op1, bool Op1IsKill) {
 242   unsigned ResultReg = createResultReg(RC);
 243   const TargetInstrDesc &II = TII.get(MachineInstOpcode);
 244
 245   if (II.getNumDefs() >= 1)
 246     AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg)
 247                    .addReg(Op0, Op0IsKill * RegState::Kill)
 248                    .addReg(Op1, Op1IsKill * RegState::Kill));
 249   else {
 250     AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II)
 251                    .addReg(Op0, Op0IsKill * RegState::Kill)
 252                    .addReg(Op1, Op1IsKill * RegState::Kill));
 253     AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
 254                            TII.get(TargetOpcode::COPY), ResultReg)
 255                    .addReg(II.ImplicitDefs[0]));
 256   }
 257   return ResultReg;
 258 }
 259
 260 unsigned ARMFastISel::FastEmitInst_ri(unsigned MachineInstOpcode,
 261                                       const TargetRegisterClass *RC,
 262                                       unsigned Op0, bool Op0IsKill,
 263                                       uint64_t Imm) {
 264   unsigned ResultReg = createResultReg(RC);
 265   const TargetInstrDesc &II = TII.get(MachineInstOpcode);
 266
 267   if (II.getNumDefs() >= 1)
 268     AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg)
 269                    .addReg(Op0, Op0IsKill * RegState::Kill)
 270                    .addImm(Imm));
 271   else {
 272     AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II)
 273                    .addReg(Op0, Op0IsKill * RegState::Kill)
 274                    .addImm(Imm));
 275     AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
 276                            TII.get(TargetOpcode::COPY), ResultReg)
 277                    .addReg(II.ImplicitDefs[0]));
 278   }
 279   return ResultReg;
 280 }
 281
 282 unsigned ARMFastISel::FastEmitInst_rf(unsigned MachineInstOpcode,
 283                                       const TargetRegisterClass *RC,
 284                                       unsigned Op0, bool Op0IsKill,
 285                                       const ConstantFP *FPImm) {
 286   unsigned ResultReg = createResultReg(RC);
 287   const TargetInstrDesc &II = TII.get(MachineInstOpcode);
 288
 289   if (II.getNumDefs() >= 1)
 290     AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg)
 291                    .addReg(Op0, Op0IsKill * RegState::Kill)
 292                    .addFPImm(FPImm));
 293   else {
 294     AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II)
 295                    .addReg(Op0, Op0IsKill * RegState::Kill)
 296                    .addFPImm(FPImm));
 297     AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
 298                            TII.get(TargetOpcode::COPY), ResultReg)
 299                    .addReg(II.ImplicitDefs[0]));
 300   }
 301   return ResultReg;
 302 }
 303
 304 unsigned ARMFastISel::FastEmitInst_rri(unsigned MachineInstOpcode,
 305                                        const TargetRegisterClass *RC,
 306                                        unsigned Op0, bool Op0IsKill,
 307                                        unsigned Op1, bool Op1IsKill,
 308                                        uint64_t Imm) {
 309   unsigned ResultReg = createResultReg(RC);
 310   const TargetInstrDesc &II = TII.get(MachineInstOpcode);
 311
 312   if (II.getNumDefs() >= 1)
 313     AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg)
 314                    .addReg(Op0, Op0IsKill * RegState::Kill)
 315                    .addReg(Op1, Op1IsKill * RegState::Kill)
 316                    .addImm(Imm));
 317   else {
 318     AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II)
 319                    .addReg(Op0, Op0IsKill * RegState::Kill)
 320                    .addReg(Op1, Op1IsKill * RegState::Kill)
 321                    .addImm(Imm));
 322     AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
 323                            TII.get(TargetOpcode::COPY), ResultReg)
 324                    .addReg(II.ImplicitDefs[0]));
 325   }
 326   return ResultReg;
 327 }
 328
 329 unsigned ARMFastISel::FastEmitInst_i(unsigned MachineInstOpcode,
 330                                      const TargetRegisterClass *RC,
 331                                      uint64_t Imm) {
 332   unsigned ResultReg = createResultReg(RC);
 333   const TargetInstrDesc &II = TII.get(MachineInstOpcode);
 334
 335   if (II.getNumDefs() >= 1)
 336     AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg)
 337                    .addImm(Imm));
 338   else {
 339     AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II)
 340                    .addImm(Imm));
 341     AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
 342                            TII.get(TargetOpcode::COPY), ResultReg)
 343                    .addReg(II.ImplicitDefs[0]));
 344   }
 345   return ResultReg;
 346 }
 347
 348 unsigned ARMFastISel::FastEmitInst_extractsubreg(MVT RetVT,
 349                                                  unsigned Op0, bool Op0IsKill,
 350                                                  uint32_t Idx) {
 351   unsigned ResultReg = createResultReg(TLI.getRegClassFor(RetVT));
 352   assert(TargetRegisterInfo::isVirtualRegister(Op0) &&
 353          "Cannot yet extract from physregs");
 354   AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt,
 355                          DL, TII.get(TargetOpcode::COPY), ResultReg)
 356                  .addReg(Op0, getKillRegState(Op0IsKill), Idx));
 357   return ResultReg;
 358 }
 359
 360 // TODO: Don't worry about 64-bit now, but when this is fixed remove the
 361 // checks from the various callers.
 362 unsigned ARMFastISel::ARMMoveToFPReg(EVT VT, unsigned SrcReg) {
 363   if (VT.getSimpleVT().SimpleTy == MVT::f64) return 0;
 364
 365   unsigned MoveReg = createResultReg(TLI.getRegClassFor(VT));
 366   AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
 367                           TII.get(ARM::VMOVRS), MoveReg)
 368                   .addReg(SrcReg));
 369   return MoveReg;
 370 }
 371
 372 unsigned ARMFastISel::ARMMoveToIntReg(EVT VT, unsigned SrcReg) {
 373   if (VT.getSimpleVT().SimpleTy == MVT::i64) return 0;
 374
 375   unsigned MoveReg = createResultReg(TLI.getRegClassFor(VT));
 376   AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
 377                           TII.get(ARM::VMOVSR), MoveReg)
 378                   .addReg(SrcReg));
 379   return MoveReg;
 380 }
 381
 382 // For double width floating point we need to materialize two constants
 383 // (the high and the low) into integer registers then use a move to get
 384 // the combined constant into an FP reg.
 385 unsigned ARMFastISel::ARMMaterializeFP(const ConstantFP *CFP, EVT VT) {
 386   const APFloat Val = CFP->getValueAPF();
 387   bool is64bit = VT.getSimpleVT().SimpleTy == MVT::f64;
 388
 389   // This checks to see if we can use VFP3 instructions to materialize
 390   // a constant, otherwise we have to go through the constant pool.
 391   if (TLI.isFPImmLegal(Val, VT)) {
 392     unsigned Opc = is64bit ? ARM::FCONSTD : ARM::FCONSTS;
 393     unsigned DestReg = createResultReg(TLI.getRegClassFor(VT));
 394     AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc),
 395                             DestReg)
 396                     .addFPImm(CFP));
 397     return DestReg;
 398   }
 399
 400   // Require VFP2 for loading fp constants.
 401   if (!Subtarget->hasVFP2()) return false;
 402
 403   // MachineConstantPool wants an explicit alignment.
 404   unsigned Align = TD.getPrefTypeAlignment(CFP->getType());
 405   if (Align == 0) {
 406     // TODO: Figure out if this is correct.
 407     Align = TD.getTypeAllocSize(CFP->getType());
 408   }
 409   unsigned Idx = MCP.getConstantPoolIndex(cast<Constant>(CFP), Align);
 410   unsigned DestReg = createResultReg(TLI.getRegClassFor(VT));
 411   unsigned Opc = is64bit ? ARM::VLDRD : ARM::VLDRS;
 412
 413   // The extra reg is for addrmode5.
 414   AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc),
 415                           DestReg)
 416                   .addConstantPoolIndex(Idx)
 417                   .addReg(0));
 418   return DestReg;
 419 }
 420
 421 unsigned ARMFastISel::ARMMaterializeInt(const Constant *C, EVT VT) {
 422
 423   // For now 32-bit only.
 424   if (VT.getSimpleVT().SimpleTy != MVT::i32) return false;
 425
 426   // MachineConstantPool wants an explicit alignment.
 427   unsigned Align = TD.getPrefTypeAlignment(C->getType());
 428   if (Align == 0) {
 429     // TODO: Figure out if this is correct.
 430     Align = TD.getTypeAllocSize(C->getType());
 431   }
 432   unsigned Idx = MCP.getConstantPoolIndex(C, Align);
 433   unsigned DestReg = createResultReg(TLI.getRegClassFor(VT));
 434
 435   if (isThumb)
 436     AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
 437                             TII.get(ARM::t2LDRpci), DestReg)
 438                     .addConstantPoolIndex(Idx));
 439   else
 440     // The extra reg and immediate are for addrmode2.
 441     AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
 442                             TII.get(ARM::LDRcp), DestReg)
 443                     .addConstantPoolIndex(Idx)
 444                     .addReg(0).addImm(0));
 445
 446   return DestReg;
 447 }
 448
 449 unsigned ARMFastISel::TargetMaterializeConstant(const Constant *C) {
 450   EVT VT = TLI.getValueType(C->getType(), true);
 451
 452   // Only handle simple types.
 453   if (!VT.isSimple()) return 0;
 454
 455   if (const ConstantFP *CFP = dyn_cast<ConstantFP>(C))
 456     return ARMMaterializeFP(CFP, VT);
 457   return ARMMaterializeInt(C, VT);
 458 }
 459
 460 unsigned ARMFastISel::TargetMaterializeAlloca(const AllocaInst *AI) {
 461   // Don't handle dynamic allocas.
 462   if (!FuncInfo.StaticAllocaMap.count(AI)) return 0;
 463
 464   EVT VT;
 465   if (!isTypeLegal(AI->getType(), VT)) return false;
 466
 467   DenseMap<const AllocaInst*, int>::iterator SI =
 468     FuncInfo.StaticAllocaMap.find(AI);
 469
 470   // This will get lowered later into the correct offsets and registers
 471   // via rewriteXFrameIndex.
 472   if (SI != FuncInfo.StaticAllocaMap.end()) {
 473     TargetRegisterClass* RC = TLI.getRegClassFor(VT);
 474     unsigned ResultReg = createResultReg(RC);
 475     unsigned Opc = isThumb ? ARM::t2ADDri : ARM::ADDri;
 476     AddOptionalDefs(BuildMI(*FuncInfo.MBB, *FuncInfo.InsertPt, DL,
 477                             TII.get(Opc), ResultReg)
 478                             .addFrameIndex(SI->second)
 479                             .addImm(0));
 480     return ResultReg;
 481   }
 482
 483   return 0;
 484 }
 485
 486 bool ARMFastISel::isTypeLegal(const Type *Ty, EVT &VT) {
 487   VT = TLI.getValueType(Ty, true);
 488
 489   // Only handle simple types.
 490   if (VT == MVT::Other || !VT.isSimple()) return false;
 491
 492   // Handle all legal types, i.e. a register that will directly hold this
 493   // value.
 494   return TLI.isTypeLegal(VT);
 495 }
 496
 497 bool ARMFastISel::isLoadTypeLegal(const Type *Ty, EVT &VT) {
 498   if (isTypeLegal(Ty, VT)) return true;
 499
 500   // If this is a type than can be sign or zero-extended to a basic operation
 501   // go ahead and accept it now.
 502   if (VT == MVT::i8 || VT == MVT::i16)
 503     return true;
 504
 505   return false;
 506 }
 507
 508 // Computes the Reg+Offset to get to an object.
 509 bool ARMFastISel::ARMComputeRegOffset(const Value *Obj, unsigned &Reg,
 510                                       int &Offset) {
 511   // Some boilerplate from the X86 FastISel.
 512   const User *U = NULL;
 513   unsigned Opcode = Instruction::UserOp1;
 514   if (const Instruction *I = dyn_cast<Instruction>(Obj)) {
 515     // Don't walk into other basic blocks; it's possible we haven't
 516     // visited them yet, so the instructions may not yet be assigned
 517     // virtual registers.
 518     if (FuncInfo.MBBMap[I->getParent()] != FuncInfo.MBB)
 519       return false;
 520     Opcode = I->getOpcode();
 521     U = I;
 522   } else if (const ConstantExpr *C = dyn_cast<ConstantExpr>(Obj)) {
 523     Opcode = C->getOpcode();
 524     U = C;
 525   }
 526
 527   if (const PointerType *Ty = dyn_cast<PointerType>(Obj->getType()))
 528     if (Ty->getAddressSpace() > 255)
 529       // Fast instruction selection doesn't support the special
 530       // address spaces.
 531       return false;
 532
 533   switch (Opcode) {
 534     default:
 535     break;
 536     case Instruction::Alloca: {
 537       assert(false && "Alloca should have been handled earlier!");
 538       return false;
 539     }
 540   }
 541
 542   // FIXME: Handle global variables.
 543   if (const GlobalValue *GV = dyn_cast<GlobalValue>(Obj)) {
 544     (void)GV;
 545     return false;
 546   }
 547
 548   // Try to get this in a register if nothing else has worked.
 549   Reg = getRegForValue(Obj);
 550   if (Reg == 0) return false;
 551
 552   // Since the offset may be too large for the load instruction
 553   // get the reg+offset into a register.
 554   // TODO: Verify the additions work, otherwise we'll need to add the
 555   // offset instead of 0 to the instructions and do all sorts of operand
 556   // munging.
 557   // TODO: Optimize this somewhat.
 558   if (Offset != 0) {
 559     ARMCC::CondCodes Pred = ARMCC::AL;
 560     unsigned PredReg = 0;
 561
 562     if (!isThumb)
 563       emitARMRegPlusImmediate(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
 564                               Reg, Reg, Offset, Pred, PredReg,
 565                               static_cast<const ARMBaseInstrInfo&>(TII));
 566     else {
 567       assert(AFI->isThumb2Function());
 568       emitT2RegPlusImmediate(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
 569                              Reg, Reg, Offset, Pred, PredReg,
 570                              static_cast<const ARMBaseInstrInfo&>(TII));
 571     }
 572   }
 573   return true;
 574 }
 575
 576 bool ARMFastISel::ARMLoadAlloca(const Instruction *I, EVT VT) {
 577   Value *Op0 = I->getOperand(0);
 578
 579   // Verify it's an alloca.
 580   if (const AllocaInst *AI = dyn_cast<AllocaInst>(Op0)) {
 581     DenseMap<const AllocaInst*, int>::iterator SI =
 582       FuncInfo.StaticAllocaMap.find(AI);
 583
 584     if (SI != FuncInfo.StaticAllocaMap.end()) {
 585       TargetRegisterClass* RC = TLI.getRegClassFor(VT);
 586       unsigned ResultReg = createResultReg(RC);
 587       TII.loadRegFromStackSlot(*FuncInfo.MBB, *FuncInfo.InsertPt,
 588                                ResultReg, SI->second, RC,
 589                                TM.getRegisterInfo());
 590       UpdateValueMap(I, ResultReg);
 591       return true;
 592     }
 593   }
 594   return false;
 595 }
 596
 597 bool ARMFastISel::ARMEmitLoad(EVT VT, unsigned &ResultReg,
 598                               unsigned Reg, int Offset) {
 599
 600   assert(VT.isSimple() && "Non-simple types are invalid here!");
 601   unsigned Opc;
 602   bool isFloat = false;
 603   switch (VT.getSimpleVT().SimpleTy) {
 604     default:
 605       // This is mostly going to be Neon/vector support.
 606       return false;
 607     case MVT::i16:
 608       Opc = isThumb ? ARM::tLDRH : ARM::LDRH;
 609       VT = MVT::i32;
 610       break;
 611     case MVT::i8:
 612       Opc = isThumb ? ARM::tLDRB : ARM::LDRB;
 613       VT = MVT::i32;
 614       break;
 615     case MVT::i32:
 616       Opc = isThumb ? ARM::tLDR : ARM::LDR;
 617       break;
 618     case MVT::f32:
 619       Opc = ARM::VLDRS;
 620       isFloat = true;
 621       break;
 622     case MVT::f64:
 623       Opc = ARM::VLDRD;
 624       isFloat = true;
 625       break;
 626   }
 627
 628   ResultReg = createResultReg(TLI.getRegClassFor(VT));
 629
 630   // TODO: Fix the Addressing modes so that these can share some code.
 631   // Since this is a Thumb1 load this will work in Thumb1 or 2 mode.
 632   // The thumb addressing mode has operands swapped from the arm addressing
 633   // mode, the floating point one only has two operands.
 634   if (isFloat)
 635     AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
 636                             TII.get(Opc), ResultReg)
 637                     .addReg(Reg).addImm(Offset));
 638   else if (isThumb)
 639     AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
 640                             TII.get(Opc), ResultReg)
 641                     .addReg(Reg).addImm(Offset).addReg(0));
 642   else
 643     AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
 644                             TII.get(Opc), ResultReg)
 645                     .addReg(Reg).addReg(0).addImm(Offset));
 646   return true;
 647 }
 648
 649 bool ARMFastISel::SelectLoad(const Instruction *I) {
 650   // Verify we have a legal type before going any further.
 651   EVT VT;
 652   if (!isLoadTypeLegal(I->getType(), VT))
 653     return false;
 654
 655   // If we're an alloca we know we have a frame index and can emit the load
 656   // directly in short order.
 657   if (ARMLoadAlloca(I, VT))
 658     return true;
 659
 660   // Our register and offset with innocuous defaults.
 661   unsigned Reg = 0;
 662   int Offset = 0;
 663
 664   // See if we can handle this as Reg + Offset
 665   if (!ARMComputeRegOffset(I->getOperand(0), Reg, Offset))
 666     return false;
 667
 668   unsigned ResultReg;
 669   if (!ARMEmitLoad(VT, ResultReg, Reg, Offset /* 0 */)) return false;
 670
 671   UpdateValueMap(I, ResultReg);
 672   return true;
 673 }
 674
 675 bool ARMFastISel::ARMStoreAlloca(const Instruction *I, unsigned SrcReg, EVT VT){
 676   Value *Op1 = I->getOperand(1);
 677
 678   // Verify it's an alloca.
 679   if (const AllocaInst *AI = dyn_cast<AllocaInst>(Op1)) {
 680     DenseMap<const AllocaInst*, int>::iterator SI =
 681       FuncInfo.StaticAllocaMap.find(AI);
 682
 683     if (SI != FuncInfo.StaticAllocaMap.end()) {
 684       TargetRegisterClass* RC = TLI.getRegClassFor(VT);
 685       assert(SrcReg != 0 && "Nothing to store!");
 686       TII.storeRegToStackSlot(*FuncInfo.MBB, *FuncInfo.InsertPt,
 687                               SrcReg, true /*isKill*/, SI->second, RC,
 688                               TM.getRegisterInfo());
 689       return true;
 690     }
 691   }
 692   return false;
 693 }
 694
 695 bool ARMFastISel::ARMEmitStore(EVT VT, unsigned SrcReg,
 696                                unsigned DstReg, int Offset) {
 697   unsigned StrOpc;
 698   bool isFloat = false;
 699   switch (VT.getSimpleVT().SimpleTy) {
 700     default: return false;
 701     case MVT::i1:
 702     case MVT::i8: StrOpc = isThumb ? ARM::tSTRB : ARM::STRB; break;
 703     case MVT::i16: StrOpc = isThumb ? ARM::tSTRH : ARM::STRH; break;
 704     case MVT::i32: StrOpc = isThumb ? ARM::tSTR : ARM::STR; break;
 705     case MVT::f32:
 706       if (!Subtarget->hasVFP2()) return false;
 707       StrOpc = ARM::VSTRS;
 708       isFloat = true;
 709       break;
 710     case MVT::f64:
 711       if (!Subtarget->hasVFP2()) return false;
 712       StrOpc = ARM::VSTRD;
 713       isFloat = true;
 714       break;
 715   }
 716
 717   // The thumb addressing mode has operands swapped from the arm addressing
 718   // mode, the floating point one only has two operands.
 719   if (isFloat)
 720     AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
 721                             TII.get(StrOpc), SrcReg)
 722                     .addReg(DstReg).addImm(Offset));
 723   else if (isThumb)
 724     AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
 725                             TII.get(StrOpc), SrcReg)
 726                     .addReg(DstReg).addImm(Offset).addReg(0));
 727
 728   else
 729     AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
 730                             TII.get(StrOpc), SrcReg)
 731                     .addReg(DstReg).addReg(0).addImm(Offset));
 732
 733   return true;
 734 }
 735
 736 bool ARMFastISel::SelectStore(const Instruction *I) {
 737   Value *Op0 = I->getOperand(0);
 738   unsigned SrcReg = 0;
 739
 740   // Yay type legalization
 741   EVT VT;
 742   if (!isLoadTypeLegal(I->getOperand(0)->getType(), VT))
 743     return false;
 744
 745   // Get the value to be stored into a register.
 746   SrcReg = getRegForValue(Op0);
 747   if (SrcReg == 0)
 748     return false;
 749
 750   // If we're an alloca we know we have a frame index and can emit the store
 751   // quickly.
 752   if (ARMStoreAlloca(I, SrcReg, VT))
 753     return true;
 754
 755   // Our register and offset with innocuous defaults.
 756   unsigned Reg = 0;
 757   int Offset = 0;
 758
 759   // See if we can handle this as Reg + Offset
 760   if (!ARMComputeRegOffset(I->getOperand(1), Reg, Offset))
 761     return false;
 762
 763   if (!ARMEmitStore(VT, SrcReg, Reg, Offset /* 0 */)) return false;
 764
 765   return true;
 766 }
 767
 768 static ARMCC::CondCodes getComparePred(CmpInst::Predicate Pred) {
 769   switch (Pred) {
 770     // Needs two compares...
 771     case CmpInst::FCMP_ONE:
 772     case CmpInst::FCMP_UEQ:
 773     default:
 774       assert(false && "Unhandled CmpInst::Predicate!");
 775       return ARMCC::AL;
 776     case CmpInst::ICMP_EQ:
 777     case CmpInst::FCMP_OEQ:
 778       return ARMCC::EQ;
 779     case CmpInst::ICMP_SGT:
 780     case CmpInst::FCMP_OGT:
 781       return ARMCC::GT;
 782     case CmpInst::ICMP_SGE:
 783     case CmpInst::FCMP_OGE:
 784       return ARMCC::GE;
 785     case CmpInst::ICMP_UGT:
 786     case CmpInst::FCMP_UGT:
 787       return ARMCC::HI;
 788     case CmpInst::FCMP_OLT:
 789       return ARMCC::MI;
 790     case CmpInst::ICMP_ULE:
 791     case CmpInst::FCMP_OLE:
 792       return ARMCC::LS;
 793     case CmpInst::FCMP_ORD:
 794       return ARMCC::VC;
 795     case CmpInst::FCMP_UNO:
 796       return ARMCC::VS;
 797     case CmpInst::FCMP_UGE:
 798       return ARMCC::PL;
 799     case CmpInst::ICMP_SLT:
 800     case CmpInst::FCMP_ULT:
 801       return ARMCC::LT;
 802     case CmpInst::ICMP_SLE:
 803     case CmpInst::FCMP_ULE:
 804       return ARMCC::LE;
 805     case CmpInst::FCMP_UNE:
 806     case CmpInst::ICMP_NE:
 807       return ARMCC::NE;
 808     case CmpInst::ICMP_UGE:
 809       return ARMCC::HS;
 810     case CmpInst::ICMP_ULT:
 811       return ARMCC::LO;
 812   }
 813 }
 814
 815 bool ARMFastISel::SelectBranch(const Instruction *I) {
 816   const BranchInst *BI = cast<BranchInst>(I);
 817   MachineBasicBlock *TBB = FuncInfo.MBBMap[BI->getSuccessor(0)];
 818   MachineBasicBlock *FBB = FuncInfo.MBBMap[BI->getSuccessor(1)];
 819
 820   // Simple branch support.
 821   // TODO: Try to avoid the re-computation in some places.
 822   unsigned CondReg = getRegForValue(BI->getCondition());
 823   if (CondReg == 0) return false;
 824
 825   // Re-set the flags just in case.
 826   unsigned CmpOpc = isThumb ? ARM::t2CMPri : ARM::CMPri;
 827   AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(CmpOpc))
 828                   .addReg(CondReg).addImm(1));
 829
 830   unsigned BrOpc = isThumb ? ARM::t2Bcc : ARM::Bcc;
 831   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(BrOpc))
 832                   .addMBB(TBB).addImm(ARMCC::EQ).addReg(ARM::CPSR);
 833   FastEmitBranch(FBB, DL);
 834   FuncInfo.MBB->addSuccessor(TBB);
 835   return true;
 836 }
 837
 838 bool ARMFastISel::SelectCmp(const Instruction *I) {
 839   const CmpInst *CI = cast<CmpInst>(I);
 840
 841   EVT VT;
 842   const Type *Ty = CI->getOperand(0)->getType();
 843   if (!isTypeLegal(Ty, VT))
 844     return false;
 845
 846   bool isFloat = (Ty->isDoubleTy() || Ty->isFloatTy());
 847   if (isFloat && !Subtarget->hasVFP2())
 848     return false;
 849
 850   unsigned CmpOpc;
 851   unsigned CondReg;
 852   switch (VT.getSimpleVT().SimpleTy) {
 853     default: return false;
 854     // TODO: Verify compares.
 855     case MVT::f32:
 856       CmpOpc = ARM::VCMPES;
 857       CondReg = ARM::FPSCR;
 858       break;
 859     case MVT::f64:
 860       CmpOpc = ARM::VCMPED;
 861       CondReg = ARM::FPSCR;
 862       break;
 863     case MVT::i32:
 864       CmpOpc = isThumb ? ARM::t2CMPrr : ARM::CMPrr;
 865       CondReg = ARM::CPSR;
 866       break;
 867   }
 868
 869   // Get the compare predicate.
 870   ARMCC::CondCodes ARMPred = getComparePred(CI->getPredicate());
 871
 872   // We may not handle every CC for now.
 873   if (ARMPred == ARMCC::AL) return false;
 874
 875   unsigned Arg1 = getRegForValue(CI->getOperand(0));
 876   if (Arg1 == 0) return false;
 877
 878   unsigned Arg2 = getRegForValue(CI->getOperand(1));
 879   if (Arg2 == 0) return false;
 880
 881   AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(CmpOpc))
 882                   .addReg(Arg1).addReg(Arg2));
 883
 884   // For floating point we need to move the result to a comparison register
 885   // that we can then use for branches.
 886   if (isFloat)
 887     AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
 888                             TII.get(ARM::FMSTAT)));
 889
 890   // Now set a register based on the comparison. Explicitly set the predicates
 891   // here.
 892   unsigned MovCCOpc = isThumb ? ARM::tMOVCCi : ARM::MOVCCi;
 893   unsigned DestReg = createResultReg(ARM::GPRRegisterClass);
 894   Constant *Zero
 895     = ConstantInt::get(Type::getInt32Ty(*Context), 0);
 896   unsigned ZeroReg = TargetMaterializeConstant(Zero);
 897   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(MovCCOpc), DestReg)
 898           .addReg(ZeroReg).addImm(1)
 899           .addImm(ARMPred).addReg(CondReg);
 900
 901   UpdateValueMap(I, DestReg);
 902   return true;
 903 }
 904
 905 bool ARMFastISel::SelectFPExt(const Instruction *I) {
 906   // Make sure we have VFP and that we're extending float to double.
 907   if (!Subtarget->hasVFP2()) return false;
 908
 909   Value *V = I->getOperand(0);
 910   if (!I->getType()->isDoubleTy() ||
 911       !V->getType()->isFloatTy()) return false;
 912
 913   unsigned Op = getRegForValue(V);
 914   if (Op == 0) return false;
 915
 916   unsigned Result = createResultReg(ARM::DPRRegisterClass);
 917   AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
 918                           TII.get(ARM::VCVTDS), Result)
 919                   .addReg(Op));
 920   UpdateValueMap(I, Result);
 921   return true;
 922 }
 923
 924 bool ARMFastISel::SelectFPTrunc(const Instruction *I) {
 925   // Make sure we have VFP and that we're truncating double to float.
 926   if (!Subtarget->hasVFP2()) return false;
 927
 928   Value *V = I->getOperand(0);
 929   if (!I->getType()->isFloatTy() ||
 930       !V->getType()->isDoubleTy()) return false;
 931
 932   unsigned Op = getRegForValue(V);
 933   if (Op == 0) return false;
 934
 935   unsigned Result = createResultReg(ARM::SPRRegisterClass);
 936   AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
 937                           TII.get(ARM::VCVTSD), Result)
 938                   .addReg(Op));
 939   UpdateValueMap(I, Result);
 940   return true;
 941 }
 942
 943 bool ARMFastISel::SelectSIToFP(const Instruction *I) {
 944   // Make sure we have VFP.
 945   if (!Subtarget->hasVFP2()) return false;
 946
 947   EVT DstVT;
 948   const Type *Ty = I->getType();
 949   if (!isTypeLegal(Ty, DstVT))
 950     return false;
 951
 952   unsigned Op = getRegForValue(I->getOperand(0));
 953   if (Op == 0) return false;
 954
 955   // The conversion routine works on fp-reg to fp-reg and the operand above
 956   // was an integer, move it to the fp registers if possible.
 957   unsigned FP = ARMMoveToFPReg(DstVT, Op);
 958   if (FP == 0) return false;
 959
 960   unsigned Opc;
 961   if (Ty->isFloatTy()) Opc = ARM::VSITOS;
 962   else if (Ty->isDoubleTy()) Opc = ARM::VSITOD;
 963   else return 0;
 964
 965   unsigned ResultReg = createResultReg(TLI.getRegClassFor(DstVT));
 966   AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc),
 967                           ResultReg)
 968                   .addReg(FP));
 969   UpdateValueMap(I, ResultReg);
 970   return true;
 971 }
 972
 973 bool ARMFastISel::SelectFPToSI(const Instruction *I) {
 974   // Make sure we have VFP.
 975   if (!Subtarget->hasVFP2()) return false;
 976
 977   EVT DstVT;
 978   const Type *RetTy = I->getType();
 979   if (!isTypeLegal(RetTy, DstVT))
 980     return false;
 981
 982   unsigned Op = getRegForValue(I->getOperand(0));
 983   if (Op == 0) return false;
 984
 985   unsigned Opc;
 986   const Type *OpTy = I->getOperand(0)->getType();
 987   if (OpTy->isFloatTy()) Opc = ARM::VTOSIZS;
 988   else if (OpTy->isDoubleTy()) Opc = ARM::VTOSIZD;
 989   else return 0;
 990   EVT OpVT = TLI.getValueType(OpTy, true);
 991
 992   unsigned ResultReg = createResultReg(TLI.getRegClassFor(OpVT));
 993   AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc),
 994                           ResultReg)
 995                   .addReg(Op));
 996
 997   // This result needs to be in an integer register, but the conversion only
 998   // takes place in fp-regs.
 999   unsigned IntReg = ARMMoveToIntReg(DstVT, ResultReg);
1000   if (IntReg == 0) return false;
1001
1002   UpdateValueMap(I, IntReg);
1003   return true;
1004 }
1005
1006 bool ARMFastISel::SelectSDiv(const Instruction *I) {
1007   EVT VT;
1008   const Type *Ty = I->getType();
1009   if (!isTypeLegal(Ty, VT))
1010     return false;
1011
1012   // If we have integer div support we should have selected this automagically.
1013   // In case we have a real miss go ahead and return false and we'll pick
1014   // it up later.
1015   if (Subtarget->hasDivide()) return false;
1016
1017   // Otherwise emit a libcall.
1018   RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL;
1019   if (VT == MVT::i16)
1020     LC = RTLIB::SDIV_I16;
1021   else if (VT == MVT::i32)
1022     LC = RTLIB::SDIV_I32;
1023   else if (VT == MVT::i64)
1024     LC = RTLIB::SDIV_I64;
1025   else if (VT == MVT::i128)
1026     LC = RTLIB::SDIV_I128;
1027   assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported SDIV!");
1028
1029   return ARMEmitLibcall(I, LC);
1030 }
1031
1032 bool ARMFastISel::SelectBinaryOp(const Instruction *I, unsigned ISDOpcode) {
1033   EVT VT  = TLI.getValueType(I->getType(), true);
1034
1035   // We can get here in the case when we want to use NEON for our fp
1036   // operations, but can't figure out how to. Just use the vfp instructions
1037   // if we have them.
1038   // FIXME: It'd be nice to use NEON instructions.
1039   const Type *Ty = I->getType();
1040   bool isFloat = (Ty->isDoubleTy() || Ty->isFloatTy());
1041   if (isFloat && !Subtarget->hasVFP2())
1042     return false;
1043
1044   unsigned Op1 = getRegForValue(I->getOperand(0));
1045   if (Op1 == 0) return false;
1046
1047   unsigned Op2 = getRegForValue(I->getOperand(1));
1048   if (Op2 == 0) return false;
1049
1050   unsigned Opc;
1051   bool is64bit = VT.getSimpleVT().SimpleTy == MVT::f64 ||
1052                  VT.getSimpleVT().SimpleTy == MVT::i64;
1053   switch (ISDOpcode) {
1054     default: return false;
1055     case ISD::FADD:
1056       Opc = is64bit ? ARM::VADDD : ARM::VADDS;
1057       break;
1058     case ISD::FSUB:
1059       Opc = is64bit ? ARM::VSUBD : ARM::VSUBS;
1060       break;
1061     case ISD::FMUL:
1062       Opc = is64bit ? ARM::VMULD : ARM::VMULS;
1063       break;
1064   }
1065   unsigned ResultReg = createResultReg(TLI.getRegClassFor(VT));
1066   AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
1067                           TII.get(Opc), ResultReg)
1068                   .addReg(Op1).addReg(Op2));
1069   UpdateValueMap(I, ResultReg);
1070   return true;
1071 }
1072
1073 // Call Handling Code
1074
1075 // This is largely taken directly from CCAssignFnForNode - we don't support
1076 // varargs in FastISel so that part has been removed.
1077 // TODO: We may not support all of this.
1078 CCAssignFn *ARMFastISel::CCAssignFnForCall(CallingConv::ID CC, bool Return) {
1079   switch (CC) {
1080   default:
1081     llvm_unreachable("Unsupported calling convention");
1082   case CallingConv::C:
1083   case CallingConv::Fast:
1084     // Use target triple & subtarget features to do actual dispatch.
1085     if (Subtarget->isAAPCS_ABI()) {
1086       if (Subtarget->hasVFP2() &&
1087           FloatABIType == FloatABI::Hard)
1088         return (Return ? RetCC_ARM_AAPCS_VFP: CC_ARM_AAPCS_VFP);
1089       else
1090         return (Return ? RetCC_ARM_AAPCS: CC_ARM_AAPCS);
1091     } else
1092         return (Return ? RetCC_ARM_APCS: CC_ARM_APCS);
1093   case CallingConv::ARM_AAPCS_VFP:
1094     return (Return ? RetCC_ARM_AAPCS_VFP: CC_ARM_AAPCS_VFP);
1095   case CallingConv::ARM_AAPCS:
1096     return (Return ? RetCC_ARM_AAPCS: CC_ARM_AAPCS);
1097   case CallingConv::ARM_APCS:
1098     return (Return ? RetCC_ARM_APCS: CC_ARM_APCS);
1099   }
1100 }
1101
1102 bool ARMFastISel::ProcessCallArgs(SmallVectorImpl<Value*> &Args,
1103                                   SmallVectorImpl<unsigned> &ArgRegs,
1104                                   SmallVectorImpl<EVT> &ArgVTs,
1105                                   SmallVectorImpl<ISD::ArgFlagsTy> &ArgFlags,
1106                                   SmallVectorImpl<unsigned> &RegArgs,
1107                                   CallingConv::ID CC,
1108                                   unsigned &NumBytes) {
1109   SmallVector<CCValAssign, 16> ArgLocs;
1110   CCState CCInfo(CC, false, TM, ArgLocs, *Context);
1111   CCInfo.AnalyzeCallOperands(ArgVTs, ArgFlags, CCAssignFnForCall(CC, false));
1112
1113   // Get a count of how many bytes are to be pushed on the stack.
1114   NumBytes = CCInfo.getNextStackOffset();
1115
1116   // Issue CALLSEQ_START
1117   unsigned AdjStackDown = TM.getRegisterInfo()->getCallFrameSetupOpcode();
1118   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(AdjStackDown))
1119           .addImm(NumBytes);
1120
1121   // Process the args.
1122   for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
1123     CCValAssign &VA = ArgLocs[i];
1124     unsigned Arg = ArgRegs[VA.getValNo()];
1125     EVT ArgVT = ArgVTs[VA.getValNo()];
1126
1127     // Handle arg promotion, etc.
1128     switch (VA.getLocInfo()) {
1129       case CCValAssign::Full: break;
1130       default:
1131       assert(false && "Handle arg promotion.");
1132       return false;
1133     }
1134
1135     // Now copy/store arg to correct locations.
1136     if (VA.isRegLoc()) {
1137       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY),
1138               VA.getLocReg())
1139       .addReg(Arg);
1140       RegArgs.push_back(VA.getLocReg());
1141     } else {
1142       // Need to store
1143       return false;
1144     }
1145   }
1146
1147   return true;
1148 }
1149
1150 bool ARMFastISel::FinishCall(EVT RetVT, SmallVectorImpl<unsigned> &UsedRegs,
1151                              const Instruction *I, CallingConv::ID CC,
1152                              unsigned &NumBytes) {
1153   // Issue CALLSEQ_END
1154   unsigned AdjStackUp = TM.getRegisterInfo()->getCallFrameDestroyOpcode();
1155   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(AdjStackUp))
1156           .addImm(NumBytes).addImm(0);
1157
1158   // Now the return value.
1159   if (RetVT.getSimpleVT().SimpleTy != MVT::isVoid) {
1160     SmallVector<CCValAssign, 16> RVLocs;
1161     CCState CCInfo(CC, false, TM, RVLocs, *Context);
1162     CCInfo.AnalyzeCallResult(RetVT, CCAssignFnForCall(CC, true));
1163
1164     // Copy all of the result registers out of their specified physreg.
1165     if (RVLocs.size() == 2 && RetVT.getSimpleVT().SimpleTy == MVT::f64) {
1166       // For this move we copy into two registers and then move into the
1167       // double fp reg we want.
1168       // TODO: Are the copies necessary?
1169       TargetRegisterClass *CopyRC = TLI.getRegClassFor(MVT::i32);
1170       unsigned Copy1 = createResultReg(CopyRC);
1171       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY),
1172               Copy1).addReg(RVLocs[0].getLocReg());
1173       UsedRegs.push_back(RVLocs[0].getLocReg());
1174
1175       unsigned Copy2 = createResultReg(CopyRC);
1176       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY),
1177               Copy2).addReg(RVLocs[1].getLocReg());
1178       UsedRegs.push_back(RVLocs[1].getLocReg());
1179
1180       EVT DestVT = RVLocs[0].getValVT();
1181       TargetRegisterClass* DstRC = TLI.getRegClassFor(DestVT);
1182       unsigned ResultReg = createResultReg(DstRC);
1183       AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
1184                               TII.get(ARM::VMOVDRR), ResultReg)
1185                       .addReg(Copy1).addReg(Copy2));
1186
1187       // Finally update the result.
1188       UpdateValueMap(I, ResultReg);
1189     } else {
1190       assert(RVLocs.size() == 1 && "Can't handle non-double multi-reg retvals!");
1191       EVT CopyVT = RVLocs[0].getValVT();
1192       TargetRegisterClass* DstRC = TLI.getRegClassFor(CopyVT);
1193
1194       unsigned ResultReg = createResultReg(DstRC);
1195       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY),
1196               ResultReg).addReg(RVLocs[0].getLocReg());
1197       UsedRegs.push_back(RVLocs[0].getLocReg());
1198
1199       // Finally update the result.
1200       UpdateValueMap(I, ResultReg);
1201     }
1202   }
1203
1204   return true;
1205 }
1206
1207 // A quick function that will emit a call for a named libcall in F with the
1208 // vector of passed arguments for the Instruction in I. We can assume that we
1209 // can emit a call for any libcall we can produce. This is an abridged version
1210 // of the full call infrastructure since we won't need to worry about things
1211 // like computed function pointers or strange arguments at call sites.
1212 // TODO: Try to unify this and the normal call bits for ARM, then try to unify
1213 // with X86.
1214 bool ARMFastISel::ARMEmitLibcall(const Instruction *I, RTLIB::Libcall Call) {
1215   CallingConv::ID CC = TLI.getLibcallCallingConv(Call);
1216
1217   // Handle *simple* calls for now.
1218   const Type *RetTy = I->getType();
1219   EVT RetVT;
1220   if (RetTy->isVoidTy())
1221     RetVT = MVT::isVoid;
1222   else if (!isTypeLegal(RetTy, RetVT))
1223     return false;
1224
1225   // For now we're using BLX etc on the assumption that we have v5t ops.
1226   if (!Subtarget->hasV5TOps()) return false;
1227
1228   // Set up the argument vectors.
1229   SmallVector<Value*, 8> Args;
1230   SmallVector<unsigned, 8> ArgRegs;
1231   SmallVector<EVT, 8> ArgVTs;
1232   SmallVector<ISD::ArgFlagsTy, 8> ArgFlags;
1233   Args.reserve(I->getNumOperands());
1234   ArgRegs.reserve(I->getNumOperands());
1235   ArgVTs.reserve(I->getNumOperands());
1236   ArgFlags.reserve(I->getNumOperands());
1237   for (unsigned i = 0; i < I->getNumOperands(); ++i) {
1238     Value *Op = I->getOperand(i);
1239     unsigned Arg = getRegForValue(Op);
1240     if (Arg == 0) return false;
1241
1242     const Type *ArgTy = Op->getType();
1243     EVT ArgVT;
1244     if (!isTypeLegal(ArgTy, ArgVT)) return false;
1245
1246     ISD::ArgFlagsTy Flags;
1247     unsigned OriginalAlignment = TD.getABITypeAlignment(ArgTy);
1248     Flags.setOrigAlign(OriginalAlignment);
1249
1250     Args.push_back(Op);
1251     ArgRegs.push_back(Arg);
1252     ArgVTs.push_back(ArgVT);
1253     ArgFlags.push_back(Flags);
1254   }
1255
1256   // Handle the arguments now that we've gotten them.
1257   SmallVector<unsigned, 4> RegArgs;
1258   unsigned NumBytes;
1259   if (!ProcessCallArgs(Args, ArgRegs, ArgVTs, ArgFlags, RegArgs, CC, NumBytes))
1260     return false;
1261
1262   // Issue the call, BLXr9 for darwin, BLX otherwise. This uses V5 ops.
1263   // TODO: Turn this into the table of arm call ops.
1264   MachineInstrBuilder MIB;
1265   unsigned CallOpc;
1266   if(isThumb)
1267     CallOpc = Subtarget->isTargetDarwin() ? ARM::tBLXi_r9 : ARM::tBLXi;
1268   else
1269     CallOpc = Subtarget->isTargetDarwin() ? ARM::BLr9 : ARM::BL;
1270   MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(CallOpc))
1271         .addExternalSymbol(TLI.getLibcallName(Call));
1272
1273   // Add implicit physical register uses to the call.
1274   for (unsigned i = 0, e = RegArgs.size(); i != e; ++i)
1275     MIB.addReg(RegArgs[i]);
1276
1277   // Finish off the call including any return values.
1278   SmallVector<unsigned, 4> UsedRegs;
1279   if (!FinishCall(RetVT, UsedRegs, I, CC, NumBytes)) return false;
1280
1281   // Set all unused physreg defs as dead.
1282   static_cast<MachineInstr *>(MIB)->setPhysRegsDeadExcept(UsedRegs, TRI);
1283
1284   return true;
1285 }
1286
1287 bool ARMFastISel::SelectCall(const Instruction *I) {
1288   const CallInst *CI = cast<CallInst>(I);
1289   const Value *Callee = CI->getCalledValue();
1290
1291   // Can't handle inline asm or worry about intrinsics yet.
1292   if (isa<InlineAsm>(Callee) || isa<IntrinsicInst>(CI)) return false;
1293
1294   // Only handle global variable Callees
1295   const GlobalValue *GV = dyn_cast<GlobalValue>(Callee);
1296   if (!GV) return false;
1297
1298   // Check the calling convention.
1299   ImmutableCallSite CS(CI);
1300   CallingConv::ID CC = CS.getCallingConv();
1301   // TODO: Avoid some calling conventions?
1302   if (CC != CallingConv::C) {
1303     errs() << "Can't handle calling convention: " << CC << "\n";
1304     return false;
1305   }
1306
1307   // Let SDISel handle vararg functions.
1308   const PointerType *PT = cast<PointerType>(CS.getCalledValue()->getType());
1309   const FunctionType *FTy = cast<FunctionType>(PT->getElementType());
1310   if (FTy->isVarArg())
1311     return false;
1312
1313   // Handle *simple* calls for now.
1314   const Type *RetTy = I->getType();
1315   EVT RetVT;
1316   if (RetTy->isVoidTy())
1317     RetVT = MVT::isVoid;
1318   else if (!isTypeLegal(RetTy, RetVT))
1319     return false;
1320
1321   // For now we're using BLX etc on the assumption that we have v5t ops.
1322   // TODO: Maybe?
1323   if (!Subtarget->hasV5TOps()) return false;
1324
1325   // Set up the argument vectors.
1326   SmallVector<Value*, 8> Args;
1327   SmallVector<unsigned, 8> ArgRegs;
1328   SmallVector<EVT, 8> ArgVTs;
1329   SmallVector<ISD::ArgFlagsTy, 8> ArgFlags;
1330   Args.reserve(CS.arg_size());
1331   ArgRegs.reserve(CS.arg_size());
1332   ArgVTs.reserve(CS.arg_size());
1333   ArgFlags.reserve(CS.arg_size());
1334   for (ImmutableCallSite::arg_iterator i = CS.arg_begin(), e = CS.arg_end();
1335        i != e; ++i) {
1336     unsigned Arg = getRegForValue(*i);
1337
1338     if (Arg == 0)
1339       return false;
1340     ISD::ArgFlagsTy Flags;
1341     unsigned AttrInd = i - CS.arg_begin() + 1;
1342     if (CS.paramHasAttr(AttrInd, Attribute::SExt))
1343       Flags.setSExt();
1344     if (CS.paramHasAttr(AttrInd, Attribute::ZExt))
1345       Flags.setZExt();
1346
1347          // FIXME: Only handle *easy* calls for now.
1348     if (CS.paramHasAttr(AttrInd, Attribute::InReg) ||
1349         CS.paramHasAttr(AttrInd, Attribute::StructRet) ||
1350         CS.paramHasAttr(AttrInd, Attribute::Nest) ||
1351         CS.paramHasAttr(AttrInd, Attribute::ByVal))
1352       return false;
1353
1354     const Type *ArgTy = (*i)->getType();
1355     EVT ArgVT;
1356     if (!isTypeLegal(ArgTy, ArgVT))
1357       return false;
1358     unsigned OriginalAlignment = TD.getABITypeAlignment(ArgTy);
1359     Flags.setOrigAlign(OriginalAlignment);
1360
1361     Args.push_back(*i);
1362     ArgRegs.push_back(Arg);
1363     ArgVTs.push_back(ArgVT);
1364     ArgFlags.push_back(Flags);
1365   }
1366
1367   // Handle the arguments now that we've gotten them.
1368   SmallVector<unsigned, 4> RegArgs;
1369   unsigned NumBytes;
1370   if (!ProcessCallArgs(Args, ArgRegs, ArgVTs, ArgFlags, RegArgs, CC, NumBytes))
1371     return false;
1372
1373   // Issue the call, BLXr9 for darwin, BLX otherwise. This uses V5 ops.
1374   // TODO: Turn this into the table of arm call ops.
1375   MachineInstrBuilder MIB;
1376   unsigned CallOpc;
1377   if(isThumb)
1378     CallOpc = Subtarget->isTargetDarwin() ? ARM::tBLXi_r9 : ARM::tBLXi;
1379   else
1380     CallOpc = Subtarget->isTargetDarwin() ? ARM::BLr9 : ARM::BL;
1381   MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(CallOpc))
1382               .addGlobalAddress(GV, 0, 0);
1383
1384   // Add implicit physical register uses to the call.
1385   for (unsigned i = 0, e = RegArgs.size(); i != e; ++i)
1386     MIB.addReg(RegArgs[i]);
1387
1388   // Finish off the call including any return values.
1389   SmallVector<unsigned, 4> UsedRegs;
1390   if (!FinishCall(RetVT, UsedRegs, I, CC, NumBytes)) return false;
1391
1392   // Set all unused physreg defs as dead.
1393   static_cast<MachineInstr *>(MIB)->setPhysRegsDeadExcept(UsedRegs, TRI);
1394
1395   return true;
1396
1397 }
1398
1399 // TODO: SoftFP support.
1400 bool ARMFastISel::TargetSelectInstruction(const Instruction *I) {
1401   // No Thumb-1 for now.
1402   if (isThumb && !AFI->isThumb2Function()) return false;
1403
1404   switch (I->getOpcode()) {
1405     case Instruction::Load:
1406       return SelectLoad(I);
1407     case Instruction::Store:
1408       return SelectStore(I);
1409     case Instruction::Br:
1410       return SelectBranch(I);
1411     case Instruction::ICmp:
1412     case Instruction::FCmp:
1413       return SelectCmp(I);
1414     case Instruction::FPExt:
1415       return SelectFPExt(I);
1416     case Instruction::FPTrunc:
1417       return SelectFPTrunc(I);
1418     case Instruction::SIToFP:
1419       return SelectSIToFP(I);
1420     case Instruction::FPToSI:
1421       return SelectFPToSI(I);
1422     case Instruction::FAdd:
1423       return SelectBinaryOp(I, ISD::FADD);
1424     case Instruction::FSub:
1425       return SelectBinaryOp(I, ISD::FSUB);
1426     case Instruction::FMul:
1427       return SelectBinaryOp(I, ISD::FMUL);
1428     case Instruction::SDiv:
1429       return SelectSDiv(I);
1430     case Instruction::Call:
1431       return SelectCall(I);
1432     default: break;
1433   }
1434   return false;
1435 }
1436
1437 namespace llvm {
1438   llvm::FastISel *ARM::createFastISel(FunctionLoweringInfo &funcInfo) {
1439     if (EnableARMFastISel) return new ARMFastISel(funcInfo);
1440     return 0;
1441   }
1442 }