lib/Target/ARM/ARMFastISel.cpp

   1 //===-- ARMFastISel.cpp - ARM FastISel implementation ---------------------===//
   2 //
   3 //                     The LLVM Compiler Infrastructure
   4 //
   5 // This file is distributed under the University of Illinois Open Source
   6 // License. See LICENSE.TXT for details.
   7 //
   8 //===----------------------------------------------------------------------===//
   9 //
  10 // This file defines the ARM-specific support for the FastISel class. Some
  11 // of the target-specific code is generated by tablegen in the file
  12 // ARMGenFastISel.inc, which is #included here.
  13 //
  14 //===----------------------------------------------------------------------===//
  15
  16 #include "ARM.h"
  17 #include "ARMAddressingModes.h"
  18 #include "ARMBaseInstrInfo.h"
  19 #include "ARMCallingConv.h"
  20 #include "ARMRegisterInfo.h"
  21 #include "ARMTargetMachine.h"
  22 #include "ARMSubtarget.h"
  23 #include "ARMConstantPoolValue.h"
  24 #include "llvm/CallingConv.h"
  25 #include "llvm/DerivedTypes.h"
  26 #include "llvm/GlobalVariable.h"
  27 #include "llvm/Instructions.h"
  28 #include "llvm/IntrinsicInst.h"
  29 #include "llvm/Module.h"
  30 #include "llvm/Operator.h"
  31 #include "llvm/CodeGen/Analysis.h"
  32 #include "llvm/CodeGen/FastISel.h"
  33 #include "llvm/CodeGen/FunctionLoweringInfo.h"
  34 #include "llvm/CodeGen/MachineInstrBuilder.h"
  35 #include "llvm/CodeGen/MachineModuleInfo.h"
  36 #include "llvm/CodeGen/MachineConstantPool.h"
  37 #include "llvm/CodeGen/MachineFrameInfo.h"
  38 #include "llvm/CodeGen/MachineMemOperand.h"
  39 #include "llvm/CodeGen/MachineRegisterInfo.h"
  40 #include "llvm/CodeGen/PseudoSourceValue.h"
  41 #include "llvm/Support/CallSite.h"
  42 #include "llvm/Support/CommandLine.h"
  43 #include "llvm/Support/ErrorHandling.h"
  44 #include "llvm/Support/GetElementPtrTypeIterator.h"
  45 #include "llvm/Target/TargetData.h"
  46 #include "llvm/Target/TargetInstrInfo.h"
  47 #include "llvm/Target/TargetLowering.h"
  48 #include "llvm/Target/TargetMachine.h"
  49 #include "llvm/Target/TargetOptions.h"
  50 using namespace llvm;
  51
  52 static cl::opt<bool>
  53 DisableARMFastISel("disable-arm-fast-isel",
  54                     cl::desc("Turn off experimental ARM fast-isel support"),
  55                     cl::init(false), cl::Hidden);
  56
  57 extern cl::opt<bool> EnableARMLongCalls;
  58
  59 namespace {
  60
  61   // All possible address modes, plus some.
  62   typedef struct Address {
  63     enum {
  64       RegBase,
  65       FrameIndexBase
  66     } BaseType;
  67
  68     union {
  69       unsigned Reg;
  70       int FI;
  71     } Base;
  72
  73     int Offset;
  74
  75     // Innocuous defaults for our address.
  76     Address()
  77      : BaseType(RegBase), Offset(0) {
  78        Base.Reg = 0;
  79      }
  80   } Address;
  81
  82 class ARMFastISel : public FastISel {
  83
  84   /// Subtarget - Keep a pointer to the ARMSubtarget around so that we can
  85   /// make the right decision when generating code for different targets.
  86   const ARMSubtarget *Subtarget;
  87   const TargetMachine &TM;
  88   const TargetInstrInfo &TII;
  89   const TargetLowering &TLI;
  90   ARMFunctionInfo *AFI;
  91
  92   // Convenience variables to avoid some queries.
  93   bool isThumb;
  94   LLVMContext *Context;
  95
  96   public:
  97     explicit ARMFastISel(FunctionLoweringInfo &funcInfo)
  98     : FastISel(funcInfo),
  99       TM(funcInfo.MF->getTarget()),
 100       TII(*TM.getInstrInfo()),
 101       TLI(*TM.getTargetLowering()) {
 102       Subtarget = &TM.getSubtarget<ARMSubtarget>();
 103       AFI = funcInfo.MF->getInfo<ARMFunctionInfo>();
 104       isThumb = AFI->isThumbFunction();
 105       Context = &funcInfo.Fn->getContext();
 106     }
 107
 108     // Code from FastISel.cpp.
 109     virtual unsigned FastEmitInst_(unsigned MachineInstOpcode,
 110                                    const TargetRegisterClass *RC);
 111     virtual unsigned FastEmitInst_r(unsigned MachineInstOpcode,
 112                                     const TargetRegisterClass *RC,
 113                                     unsigned Op0, bool Op0IsKill);
 114     virtual unsigned FastEmitInst_rr(unsigned MachineInstOpcode,
 115                                      const TargetRegisterClass *RC,
 116                                      unsigned Op0, bool Op0IsKill,
 117                                      unsigned Op1, bool Op1IsKill);
 118     virtual unsigned FastEmitInst_rrr(unsigned MachineInstOpcode,
 119                                       const TargetRegisterClass *RC,
 120                                       unsigned Op0, bool Op0IsKill,
 121                                       unsigned Op1, bool Op1IsKill,
 122                                       unsigned Op2, bool Op2IsKill);
 123     virtual unsigned FastEmitInst_ri(unsigned MachineInstOpcode,
 124                                      const TargetRegisterClass *RC,
 125                                      unsigned Op0, bool Op0IsKill,
 126                                      uint64_t Imm);
 127     virtual unsigned FastEmitInst_rf(unsigned MachineInstOpcode,
 128                                      const TargetRegisterClass *RC,
 129                                      unsigned Op0, bool Op0IsKill,
 130                                      const ConstantFP *FPImm);
 131     virtual unsigned FastEmitInst_rri(unsigned MachineInstOpcode,
 132                                       const TargetRegisterClass *RC,
 133                                       unsigned Op0, bool Op0IsKill,
 134                                       unsigned Op1, bool Op1IsKill,
 135                                       uint64_t Imm);
 136     virtual unsigned FastEmitInst_i(unsigned MachineInstOpcode,
 137                                     const TargetRegisterClass *RC,
 138                                     uint64_t Imm);
 139     virtual unsigned FastEmitInst_ii(unsigned MachineInstOpcode,
 140                                      const TargetRegisterClass *RC,
 141                                      uint64_t Imm1, uint64_t Imm2);
 142
 143     virtual unsigned FastEmitInst_extractsubreg(MVT RetVT,
 144                                                 unsigned Op0, bool Op0IsKill,
 145                                                 uint32_t Idx);
 146
 147     // Backend specific FastISel code.
 148     virtual bool TargetSelectInstruction(const Instruction *I);
 149     virtual unsigned TargetMaterializeConstant(const Constant *C);
 150     virtual unsigned TargetMaterializeAlloca(const AllocaInst *AI);
 151
 152   #include "ARMGenFastISel.inc"
 153
 154     // Instruction selection routines.
 155   private:
 156     bool SelectLoad(const Instruction *I);
 157     bool SelectStore(const Instruction *I);
 158     bool SelectBranch(const Instruction *I);
 159     bool SelectCmp(const Instruction *I);
 160     bool SelectFPExt(const Instruction *I);
 161     bool SelectFPTrunc(const Instruction *I);
 162     bool SelectBinaryOp(const Instruction *I, unsigned ISDOpcode);
 163     bool SelectSIToFP(const Instruction *I);
 164     bool SelectFPToSI(const Instruction *I);
 165     bool SelectSDiv(const Instruction *I);
 166     bool SelectSRem(const Instruction *I);
 167     bool SelectCall(const Instruction *I);
 168     bool SelectSelect(const Instruction *I);
 169     bool SelectRet(const Instruction *I);
 170
 171     // Utility routines.
 172   private:
 173     bool isTypeLegal(const Type *Ty, MVT &VT);
 174     bool isLoadTypeLegal(const Type *Ty, MVT &VT);
 175     bool ARMEmitLoad(EVT VT, unsigned &ResultReg, Address &Addr);
 176     bool ARMEmitStore(EVT VT, unsigned SrcReg, Address &Addr);
 177     bool ARMComputeAddress(const Value *Obj, Address &Addr);
 178     void ARMSimplifyAddress(Address &Addr, EVT VT);
 179     unsigned ARMMaterializeFP(const ConstantFP *CFP, EVT VT);
 180     unsigned ARMMaterializeInt(const Constant *C, EVT VT);
 181     unsigned ARMMaterializeGV(const GlobalValue *GV, EVT VT);
 182     unsigned ARMMoveToFPReg(EVT VT, unsigned SrcReg);
 183     unsigned ARMMoveToIntReg(EVT VT, unsigned SrcReg);
 184     unsigned ARMSelectCallOp(const GlobalValue *GV);
 185
 186     // Call handling routines.
 187   private:
 188     bool FastEmitExtend(ISD::NodeType Opc, EVT DstVT, unsigned Src, EVT SrcVT,
 189                         unsigned &ResultReg);
 190     CCAssignFn *CCAssignFnForCall(CallingConv::ID CC, bool Return);
 191     bool ProcessCallArgs(SmallVectorImpl<Value*> &Args,
 192                          SmallVectorImpl<unsigned> &ArgRegs,
 193                          SmallVectorImpl<MVT> &ArgVTs,
 194                          SmallVectorImpl<ISD::ArgFlagsTy> &ArgFlags,
 195                          SmallVectorImpl<unsigned> &RegArgs,
 196                          CallingConv::ID CC,
 197                          unsigned &NumBytes);
 198     bool FinishCall(MVT RetVT, SmallVectorImpl<unsigned> &UsedRegs,
 199                     const Instruction *I, CallingConv::ID CC,
 200                     unsigned &NumBytes);
 201     bool ARMEmitLibcall(const Instruction *I, RTLIB::Libcall Call);
 202
 203     // OptionalDef handling routines.
 204   private:
 205     bool isARMNEONPred(const MachineInstr *MI);
 206     bool DefinesOptionalPredicate(MachineInstr *MI, bool *CPSR);
 207     const MachineInstrBuilder &AddOptionalDefs(const MachineInstrBuilder &MIB);
 208     void AddLoadStoreOperands(EVT VT, Address &Addr,
 209                               const MachineInstrBuilder &MIB);
 210 };
 211
 212 } // end anonymous namespace
 213
 214 #include "ARMGenCallingConv.inc"
 215
 216 // DefinesOptionalPredicate - This is different from DefinesPredicate in that
 217 // we don't care about implicit defs here, just places we'll need to add a
 218 // default CCReg argument. Sets CPSR if we're setting CPSR instead of CCR.
 219 bool ARMFastISel::DefinesOptionalPredicate(MachineInstr *MI, bool *CPSR) {
 220   const TargetInstrDesc &TID = MI->getDesc();
 221   if (!TID.hasOptionalDef())
 222     return false;
 223
 224   // Look to see if our OptionalDef is defining CPSR or CCR.
 225   for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
 226     const MachineOperand &MO = MI->getOperand(i);
 227     if (!MO.isReg() || !MO.isDef()) continue;
 228     if (MO.getReg() == ARM::CPSR)
 229       *CPSR = true;
 230   }
 231   return true;
 232 }
 233
 234 bool ARMFastISel::isARMNEONPred(const MachineInstr *MI) {
 235   const TargetInstrDesc &TID = MI->getDesc();
 236
 237   // If we're a thumb2 or not NEON function we were handled via isPredicable.
 238   if ((TID.TSFlags & ARMII::DomainMask) != ARMII::DomainNEON ||
 239        AFI->isThumb2Function())
 240     return false;
 241
 242   for (unsigned i = 0, e = TID.getNumOperands(); i != e; ++i)
 243     if (TID.OpInfo[i].isPredicate())
 244       return true;
 245
 246   return false;
 247 }
 248
 249 // If the machine is predicable go ahead and add the predicate operands, if
 250 // it needs default CC operands add those.
 251 // TODO: If we want to support thumb1 then we'll need to deal with optional
 252 // CPSR defs that need to be added before the remaining operands. See s_cc_out
 253 // for descriptions why.
 254 const MachineInstrBuilder &
 255 ARMFastISel::AddOptionalDefs(const MachineInstrBuilder &MIB) {
 256   MachineInstr *MI = &*MIB;
 257
 258   // Do we use a predicate? or...
 259   // Are we NEON in ARM mode and have a predicate operand? If so, I know
 260   // we're not predicable but add it anyways.
 261   if (TII.isPredicable(MI) || isARMNEONPred(MI))
 262     AddDefaultPred(MIB);
 263
 264   // Do we optionally set a predicate?  Preds is size > 0 iff the predicate
 265   // defines CPSR. All other OptionalDefines in ARM are the CCR register.
 266   bool CPSR = false;
 267   if (DefinesOptionalPredicate(MI, &CPSR)) {
 268     if (CPSR)
 269       AddDefaultT1CC(MIB);
 270     else
 271       AddDefaultCC(MIB);
 272   }
 273   return MIB;
 274 }
 275
 276 unsigned ARMFastISel::FastEmitInst_(unsigned MachineInstOpcode,
 277                                     const TargetRegisterClass* RC) {
 278   unsigned ResultReg = createResultReg(RC);
 279   const TargetInstrDesc &II = TII.get(MachineInstOpcode);
 280
 281   AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg));
 282   return ResultReg;
 283 }
 284
 285 unsigned ARMFastISel::FastEmitInst_r(unsigned MachineInstOpcode,
 286                                      const TargetRegisterClass *RC,
 287                                      unsigned Op0, bool Op0IsKill) {
 288   unsigned ResultReg = createResultReg(RC);
 289   const TargetInstrDesc &II = TII.get(MachineInstOpcode);
 290
 291   if (II.getNumDefs() >= 1)
 292     AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg)
 293                    .addReg(Op0, Op0IsKill * RegState::Kill));
 294   else {
 295     AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II)
 296                    .addReg(Op0, Op0IsKill * RegState::Kill));
 297     AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
 298                    TII.get(TargetOpcode::COPY), ResultReg)
 299                    .addReg(II.ImplicitDefs[0]));
 300   }
 301   return ResultReg;
 302 }
 303
 304 unsigned ARMFastISel::FastEmitInst_rr(unsigned MachineInstOpcode,
 305                                       const TargetRegisterClass *RC,
 306                                       unsigned Op0, bool Op0IsKill,
 307                                       unsigned Op1, bool Op1IsKill) {
 308   unsigned ResultReg = createResultReg(RC);
 309   const TargetInstrDesc &II = TII.get(MachineInstOpcode);
 310
 311   if (II.getNumDefs() >= 1)
 312     AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg)
 313                    .addReg(Op0, Op0IsKill * RegState::Kill)
 314                    .addReg(Op1, Op1IsKill * RegState::Kill));
 315   else {
 316     AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II)
 317                    .addReg(Op0, Op0IsKill * RegState::Kill)
 318                    .addReg(Op1, Op1IsKill * RegState::Kill));
 319     AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
 320                            TII.get(TargetOpcode::COPY), ResultReg)
 321                    .addReg(II.ImplicitDefs[0]));
 322   }
 323   return ResultReg;
 324 }
 325
 326 unsigned ARMFastISel::FastEmitInst_rrr(unsigned MachineInstOpcode,
 327                                        const TargetRegisterClass *RC,
 328                                        unsigned Op0, bool Op0IsKill,
 329                                        unsigned Op1, bool Op1IsKill,
 330                                        unsigned Op2, bool Op2IsKill) {
 331   unsigned ResultReg = createResultReg(RC);
 332   const TargetInstrDesc &II = TII.get(MachineInstOpcode);
 333
 334   if (II.getNumDefs() >= 1)
 335     AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg)
 336                    .addReg(Op0, Op0IsKill * RegState::Kill)
 337                    .addReg(Op1, Op1IsKill * RegState::Kill)
 338                    .addReg(Op2, Op2IsKill * RegState::Kill));
 339   else {
 340     AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II)
 341                    .addReg(Op0, Op0IsKill * RegState::Kill)
 342                    .addReg(Op1, Op1IsKill * RegState::Kill)
 343                    .addReg(Op2, Op2IsKill * RegState::Kill));
 344     AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
 345                            TII.get(TargetOpcode::COPY), ResultReg)
 346                    .addReg(II.ImplicitDefs[0]));
 347   }
 348   return ResultReg;
 349 }
 350
 351 unsigned ARMFastISel::FastEmitInst_ri(unsigned MachineInstOpcode,
 352                                       const TargetRegisterClass *RC,
 353                                       unsigned Op0, bool Op0IsKill,
 354                                       uint64_t Imm) {
 355   unsigned ResultReg = createResultReg(RC);
 356   const TargetInstrDesc &II = TII.get(MachineInstOpcode);
 357
 358   if (II.getNumDefs() >= 1)
 359     AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg)
 360                    .addReg(Op0, Op0IsKill * RegState::Kill)
 361                    .addImm(Imm));
 362   else {
 363     AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II)
 364                    .addReg(Op0, Op0IsKill * RegState::Kill)
 365                    .addImm(Imm));
 366     AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
 367                            TII.get(TargetOpcode::COPY), ResultReg)
 368                    .addReg(II.ImplicitDefs[0]));
 369   }
 370   return ResultReg;
 371 }
 372
 373 unsigned ARMFastISel::FastEmitInst_rf(unsigned MachineInstOpcode,
 374                                       const TargetRegisterClass *RC,
 375                                       unsigned Op0, bool Op0IsKill,
 376                                       const ConstantFP *FPImm) {
 377   unsigned ResultReg = createResultReg(RC);
 378   const TargetInstrDesc &II = TII.get(MachineInstOpcode);
 379
 380   if (II.getNumDefs() >= 1)
 381     AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg)
 382                    .addReg(Op0, Op0IsKill * RegState::Kill)
 383                    .addFPImm(FPImm));
 384   else {
 385     AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II)
 386                    .addReg(Op0, Op0IsKill * RegState::Kill)
 387                    .addFPImm(FPImm));
 388     AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
 389                            TII.get(TargetOpcode::COPY), ResultReg)
 390                    .addReg(II.ImplicitDefs[0]));
 391   }
 392   return ResultReg;
 393 }
 394
 395 unsigned ARMFastISel::FastEmitInst_rri(unsigned MachineInstOpcode,
 396                                        const TargetRegisterClass *RC,
 397                                        unsigned Op0, bool Op0IsKill,
 398                                        unsigned Op1, bool Op1IsKill,
 399                                        uint64_t Imm) {
 400   unsigned ResultReg = createResultReg(RC);
 401   const TargetInstrDesc &II = TII.get(MachineInstOpcode);
 402
 403   if (II.getNumDefs() >= 1)
 404     AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg)
 405                    .addReg(Op0, Op0IsKill * RegState::Kill)
 406                    .addReg(Op1, Op1IsKill * RegState::Kill)
 407                    .addImm(Imm));
 408   else {
 409     AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II)
 410                    .addReg(Op0, Op0IsKill * RegState::Kill)
 411                    .addReg(Op1, Op1IsKill * RegState::Kill)
 412                    .addImm(Imm));
 413     AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
 414                            TII.get(TargetOpcode::COPY), ResultReg)
 415                    .addReg(II.ImplicitDefs[0]));
 416   }
 417   return ResultReg;
 418 }
 419
 420 unsigned ARMFastISel::FastEmitInst_i(unsigned MachineInstOpcode,
 421                                      const TargetRegisterClass *RC,
 422                                      uint64_t Imm) {
 423   unsigned ResultReg = createResultReg(RC);
 424   const TargetInstrDesc &II = TII.get(MachineInstOpcode);
 425
 426   if (II.getNumDefs() >= 1)
 427     AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg)
 428                    .addImm(Imm));
 429   else {
 430     AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II)
 431                    .addImm(Imm));
 432     AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
 433                            TII.get(TargetOpcode::COPY), ResultReg)
 434                    .addReg(II.ImplicitDefs[0]));
 435   }
 436   return ResultReg;
 437 }
 438
 439 unsigned ARMFastISel::FastEmitInst_ii(unsigned MachineInstOpcode,
 440                                       const TargetRegisterClass *RC,
 441                                       uint64_t Imm1, uint64_t Imm2) {
 442   unsigned ResultReg = createResultReg(RC);
 443   const TargetInstrDesc &II = TII.get(MachineInstOpcode);
 444
 445   if (II.getNumDefs() >= 1)
 446     AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg)
 447                     .addImm(Imm1).addImm(Imm2));
 448   else {
 449     AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II)
 450                     .addImm(Imm1).addImm(Imm2));
 451     AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
 452                             TII.get(TargetOpcode::COPY),
 453                             ResultReg)
 454                     .addReg(II.ImplicitDefs[0]));
 455   }
 456   return ResultReg;
 457 }
 458
 459 unsigned ARMFastISel::FastEmitInst_extractsubreg(MVT RetVT,
 460                                                  unsigned Op0, bool Op0IsKill,
 461                                                  uint32_t Idx) {
 462   unsigned ResultReg = createResultReg(TLI.getRegClassFor(RetVT));
 463   assert(TargetRegisterInfo::isVirtualRegister(Op0) &&
 464          "Cannot yet extract from physregs");
 465   AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt,
 466                          DL, TII.get(TargetOpcode::COPY), ResultReg)
 467                  .addReg(Op0, getKillRegState(Op0IsKill), Idx));
 468   return ResultReg;
 469 }
 470
 471 // TODO: Don't worry about 64-bit now, but when this is fixed remove the
 472 // checks from the various callers.
 473 unsigned ARMFastISel::ARMMoveToFPReg(EVT VT, unsigned SrcReg) {
 474   if (VT == MVT::f64) return 0;
 475
 476   unsigned MoveReg = createResultReg(TLI.getRegClassFor(VT));
 477   AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
 478                           TII.get(ARM::VMOVRS), MoveReg)
 479                   .addReg(SrcReg));
 480   return MoveReg;
 481 }
 482
 483 unsigned ARMFastISel::ARMMoveToIntReg(EVT VT, unsigned SrcReg) {
 484   if (VT == MVT::i64) return 0;
 485
 486   unsigned MoveReg = createResultReg(TLI.getRegClassFor(VT));
 487   AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
 488                           TII.get(ARM::VMOVSR), MoveReg)
 489                   .addReg(SrcReg));
 490   return MoveReg;
 491 }
 492
 493 // For double width floating point we need to materialize two constants
 494 // (the high and the low) into integer registers then use a move to get
 495 // the combined constant into an FP reg.
 496 unsigned ARMFastISel::ARMMaterializeFP(const ConstantFP *CFP, EVT VT) {
 497   const APFloat Val = CFP->getValueAPF();
 498   bool is64bit = VT == MVT::f64;
 499
 500   // This checks to see if we can use VFP3 instructions to materialize
 501   // a constant, otherwise we have to go through the constant pool.
 502   if (TLI.isFPImmLegal(Val, VT)) {
 503     unsigned Opc = is64bit ? ARM::FCONSTD : ARM::FCONSTS;
 504     unsigned DestReg = createResultReg(TLI.getRegClassFor(VT));
 505     AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc),
 506                             DestReg)
 507                     .addFPImm(CFP));
 508     return DestReg;
 509   }
 510
 511   // Require VFP2 for loading fp constants.
 512   if (!Subtarget->hasVFP2()) return false;
 513
 514   // MachineConstantPool wants an explicit alignment.
 515   unsigned Align = TD.getPrefTypeAlignment(CFP->getType());
 516   if (Align == 0) {
 517     // TODO: Figure out if this is correct.
 518     Align = TD.getTypeAllocSize(CFP->getType());
 519   }
 520   unsigned Idx = MCP.getConstantPoolIndex(cast<Constant>(CFP), Align);
 521   unsigned DestReg = createResultReg(TLI.getRegClassFor(VT));
 522   unsigned Opc = is64bit ? ARM::VLDRD : ARM::VLDRS;
 523
 524   // The extra reg is for addrmode5.
 525   AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc),
 526                           DestReg)
 527                   .addConstantPoolIndex(Idx)
 528                   .addReg(0));
 529   return DestReg;
 530 }
 531
 532 unsigned ARMFastISel::ARMMaterializeInt(const Constant *C, EVT VT) {
 533
 534   // For now 32-bit only.
 535   if (VT != MVT::i32) return false;
 536
 537   unsigned DestReg = createResultReg(TLI.getRegClassFor(VT));
 538
 539   // If we can do this in a single instruction without a constant pool entry
 540   // do so now.
 541   const ConstantInt *CI = cast<ConstantInt>(C);
 542   if (Subtarget->hasV6T2Ops() && isUInt<16>(CI->getSExtValue())) {
 543     unsigned Opc = isThumb ? ARM::t2MOVi16 : ARM::MOVi16;
 544     AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
 545                             TII.get(Opc), DestReg)
 546                     .addImm(CI->getSExtValue()));
 547     return DestReg;
 548   }
 549
 550   // MachineConstantPool wants an explicit alignment.
 551   unsigned Align = TD.getPrefTypeAlignment(C->getType());
 552   if (Align == 0) {
 553     // TODO: Figure out if this is correct.
 554     Align = TD.getTypeAllocSize(C->getType());
 555   }
 556   unsigned Idx = MCP.getConstantPoolIndex(C, Align);
 557
 558   if (isThumb)
 559     AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
 560                             TII.get(ARM::t2LDRpci), DestReg)
 561                     .addConstantPoolIndex(Idx));
 562   else
 563     // The extra immediate is for addrmode2.
 564     AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
 565                             TII.get(ARM::LDRcp), DestReg)
 566                     .addConstantPoolIndex(Idx)
 567                     .addImm(0));
 568
 569   return DestReg;
 570 }
 571
 572 unsigned ARMFastISel::ARMMaterializeGV(const GlobalValue *GV, EVT VT) {
 573   // For now 32-bit only.
 574   if (VT != MVT::i32) return 0;
 575
 576   Reloc::Model RelocM = TM.getRelocationModel();
 577
 578   // TODO: No external globals for now.
 579   if (Subtarget->GVIsIndirectSymbol(GV, RelocM)) return 0;
 580
 581   // TODO: Need more magic for ARM PIC.
 582   if (!isThumb && (RelocM == Reloc::PIC_)) return 0;
 583
 584   // MachineConstantPool wants an explicit alignment.
 585   unsigned Align = TD.getPrefTypeAlignment(GV->getType());
 586   if (Align == 0) {
 587     // TODO: Figure out if this is correct.
 588     Align = TD.getTypeAllocSize(GV->getType());
 589   }
 590
 591   // Grab index.
 592   unsigned PCAdj = (RelocM != Reloc::PIC_) ? 0 : (Subtarget->isThumb() ? 4 : 8);
 593   unsigned Id = AFI->createPICLabelUId();
 594   ARMConstantPoolValue *CPV = new ARMConstantPoolValue(GV, Id,
 595                                                        ARMCP::CPValue, PCAdj);
 596   unsigned Idx = MCP.getConstantPoolIndex(CPV, Align);
 597
 598   // Load value.
 599   MachineInstrBuilder MIB;
 600   unsigned DestReg = createResultReg(TLI.getRegClassFor(VT));
 601   if (isThumb) {
 602     unsigned Opc = (RelocM != Reloc::PIC_) ? ARM::t2LDRpci : ARM::t2LDRpci_pic;
 603     MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc), DestReg)
 604           .addConstantPoolIndex(Idx);
 605     if (RelocM == Reloc::PIC_)
 606       MIB.addImm(Id);
 607   } else {
 608     // The extra immediate is for addrmode2.
 609     MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(ARM::LDRcp),
 610                   DestReg)
 611           .addConstantPoolIndex(Idx)
 612           .addImm(0);
 613   }
 614   AddOptionalDefs(MIB);
 615   return DestReg;
 616 }
 617
 618 unsigned ARMFastISel::TargetMaterializeConstant(const Constant *C) {
 619   EVT VT = TLI.getValueType(C->getType(), true);
 620
 621   // Only handle simple types.
 622   if (!VT.isSimple()) return 0;
 623
 624   if (const ConstantFP *CFP = dyn_cast<ConstantFP>(C))
 625     return ARMMaterializeFP(CFP, VT);
 626   else if (const GlobalValue *GV = dyn_cast<GlobalValue>(C))
 627     return ARMMaterializeGV(GV, VT);
 628   else if (isa<ConstantInt>(C))
 629     return ARMMaterializeInt(C, VT);
 630
 631   return 0;
 632 }
 633
 634 unsigned ARMFastISel::TargetMaterializeAlloca(const AllocaInst *AI) {
 635   // Don't handle dynamic allocas.
 636   if (!FuncInfo.StaticAllocaMap.count(AI)) return 0;
 637
 638   MVT VT;
 639   if (!isLoadTypeLegal(AI->getType(), VT)) return false;
 640
 641   DenseMap<const AllocaInst*, int>::iterator SI =
 642     FuncInfo.StaticAllocaMap.find(AI);
 643
 644   // This will get lowered later into the correct offsets and registers
 645   // via rewriteXFrameIndex.
 646   if (SI != FuncInfo.StaticAllocaMap.end()) {
 647     TargetRegisterClass* RC = TLI.getRegClassFor(VT);
 648     unsigned ResultReg = createResultReg(RC);
 649     unsigned Opc = isThumb ? ARM::t2ADDri : ARM::ADDri;
 650     AddOptionalDefs(BuildMI(*FuncInfo.MBB, *FuncInfo.InsertPt, DL,
 651                             TII.get(Opc), ResultReg)
 652                             .addFrameIndex(SI->second)
 653                             .addImm(0));
 654     return ResultReg;
 655   }
 656
 657   return 0;
 658 }
 659
 660 bool ARMFastISel::isTypeLegal(const Type *Ty, MVT &VT) {
 661   EVT evt = TLI.getValueType(Ty, true);
 662
 663   // Only handle simple types.
 664   if (evt == MVT::Other || !evt.isSimple()) return false;
 665   VT = evt.getSimpleVT();
 666
 667   // Handle all legal types, i.e. a register that will directly hold this
 668   // value.
 669   return TLI.isTypeLegal(VT);
 670 }
 671
 672 bool ARMFastISel::isLoadTypeLegal(const Type *Ty, MVT &VT) {
 673   if (isTypeLegal(Ty, VT)) return true;
 674
 675   // If this is a type than can be sign or zero-extended to a basic operation
 676   // go ahead and accept it now.
 677   if (VT == MVT::i8 || VT == MVT::i16)
 678     return true;
 679
 680   return false;
 681 }
 682
 683 // Computes the address to get to an object.
 684 bool ARMFastISel::ARMComputeAddress(const Value *Obj, Address &Addr) {
 685   // Some boilerplate from the X86 FastISel.
 686   const User *U = NULL;
 687   unsigned Opcode = Instruction::UserOp1;
 688   if (const Instruction *I = dyn_cast<Instruction>(Obj)) {
 689     // Don't walk into other basic blocks unless the object is an alloca from
 690     // another block, otherwise it may not have a virtual register assigned.
 691     if (FuncInfo.StaticAllocaMap.count(static_cast<const AllocaInst *>(Obj)) ||
 692         FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB) {
 693       Opcode = I->getOpcode();
 694       U = I;
 695     }
 696   } else if (const ConstantExpr *C = dyn_cast<ConstantExpr>(Obj)) {
 697     Opcode = C->getOpcode();
 698     U = C;
 699   }
 700
 701   if (const PointerType *Ty = dyn_cast<PointerType>(Obj->getType()))
 702     if (Ty->getAddressSpace() > 255)
 703       // Fast instruction selection doesn't support the special
 704       // address spaces.
 705       return false;
 706
 707   switch (Opcode) {
 708     default:
 709     break;
 710     case Instruction::BitCast: {
 711       // Look through bitcasts.
 712       return ARMComputeAddress(U->getOperand(0), Addr);
 713     }
 714     case Instruction::IntToPtr: {
 715       // Look past no-op inttoptrs.
 716       if (TLI.getValueType(U->getOperand(0)->getType()) == TLI.getPointerTy())
 717         return ARMComputeAddress(U->getOperand(0), Addr);
 718       break;
 719     }
 720     case Instruction::PtrToInt: {
 721       // Look past no-op ptrtoints.
 722       if (TLI.getValueType(U->getType()) == TLI.getPointerTy())
 723         return ARMComputeAddress(U->getOperand(0), Addr);
 724       break;
 725     }
 726     case Instruction::GetElementPtr: {
 727       Address SavedAddr = Addr;
 728       int TmpOffset = Addr.Offset;
 729
 730       // Iterate through the GEP folding the constants into offsets where
 731       // we can.
 732       gep_type_iterator GTI = gep_type_begin(U);
 733       for (User::const_op_iterator i = U->op_begin() + 1, e = U->op_end();
 734            i != e; ++i, ++GTI) {
 735         const Value *Op = *i;
 736         if (const StructType *STy = dyn_cast<StructType>(*GTI)) {
 737           const StructLayout *SL = TD.getStructLayout(STy);
 738           unsigned Idx = cast<ConstantInt>(Op)->getZExtValue();
 739           TmpOffset += SL->getElementOffset(Idx);
 740         } else {
 741           uint64_t S = TD.getTypeAllocSize(GTI.getIndexedType());
 742           for (;;) {
 743             if (const ConstantInt *CI = dyn_cast<ConstantInt>(Op)) {
 744               // Constant-offset addressing.
 745               TmpOffset += CI->getSExtValue() * S;
 746               break;
 747             }
 748             if (isa<AddOperator>(Op) &&
 749                 (!isa<Instruction>(Op) ||
 750                  FuncInfo.MBBMap[cast<Instruction>(Op)->getParent()]
 751                  == FuncInfo.MBB) &&
 752                 isa<ConstantInt>(cast<AddOperator>(Op)->getOperand(1))) {
 753               // An add (in the same block) with a constant operand. Fold the
 754               // constant.
 755               ConstantInt *CI =
 756               cast<ConstantInt>(cast<AddOperator>(Op)->getOperand(1));
 757               TmpOffset += CI->getSExtValue() * S;
 758               // Iterate on the other operand.
 759               Op = cast<AddOperator>(Op)->getOperand(0);
 760               continue;
 761             }
 762             // Unsupported
 763             goto unsupported_gep;
 764           }
 765         }
 766       }
 767
 768       // Try to grab the base operand now.
 769       Addr.Offset = TmpOffset;
 770       if (ARMComputeAddress(U->getOperand(0), Addr)) return true;
 771
 772       // We failed, restore everything and try the other options.
 773       Addr = SavedAddr;
 774
 775       unsupported_gep:
 776       break;
 777     }
 778     case Instruction::Alloca: {
 779       const AllocaInst *AI = cast<AllocaInst>(Obj);
 780       DenseMap<const AllocaInst*, int>::iterator SI =
 781         FuncInfo.StaticAllocaMap.find(AI);
 782       if (SI != FuncInfo.StaticAllocaMap.end()) {
 783         Addr.BaseType = Address::FrameIndexBase;
 784         Addr.Base.FI = SI->second;
 785         return true;
 786       }
 787       break;
 788     }
 789   }
 790
 791   // Materialize the global variable's address into a reg which can
 792   // then be used later to load the variable.
 793   if (const GlobalValue *GV = dyn_cast<GlobalValue>(Obj)) {
 794     unsigned Tmp = ARMMaterializeGV(GV, TLI.getValueType(Obj->getType()));
 795     if (Tmp == 0) return false;
 796
 797     Addr.Base.Reg = Tmp;
 798     return true;
 799   }
 800
 801   // Try to get this in a register if nothing else has worked.
 802   if (Addr.Base.Reg == 0) Addr.Base.Reg = getRegForValue(Obj);
 803   return Addr.Base.Reg != 0;
 804 }
 805
 806 void ARMFastISel::ARMSimplifyAddress(Address &Addr, EVT VT) {
 807
 808   assert(VT.isSimple() && "Non-simple types are invalid here!");
 809
 810   bool needsLowering = false;
 811   switch (VT.getSimpleVT().SimpleTy) {
 812     default:
 813       assert(false && "Unhandled load/store type!");
 814     case MVT::i1:
 815     case MVT::i8:
 816     case MVT::i16:
 817     case MVT::i32:
 818       // Integer loads/stores handle 12-bit offsets.
 819       needsLowering = ((Addr.Offset & 0xfff) != Addr.Offset);
 820       break;
 821     case MVT::f32:
 822     case MVT::f64:
 823       // Floating point operands handle 8-bit offsets.
 824       needsLowering = ((Addr.Offset & 0xff) != Addr.Offset);
 825       break;
 826   }
 827
 828   // If this is a stack pointer and the offset needs to be simplified then
 829   // put the alloca address into a register, set the base type back to
 830   // register and continue. This should almost never happen.
 831   if (needsLowering && Addr.BaseType == Address::FrameIndexBase) {
 832     TargetRegisterClass *RC = isThumb ? ARM::tGPRRegisterClass :
 833                               ARM::GPRRegisterClass;
 834     unsigned ResultReg = createResultReg(RC);
 835     unsigned Opc = isThumb ? ARM::t2ADDri : ARM::ADDri;
 836     AddOptionalDefs(BuildMI(*FuncInfo.MBB, *FuncInfo.InsertPt, DL,
 837                             TII.get(Opc), ResultReg)
 838                             .addFrameIndex(Addr.Base.FI)
 839                             .addImm(0));
 840     Addr.Base.Reg = ResultReg;
 841     Addr.BaseType = Address::RegBase;
 842   }
 843
 844   // Since the offset is too large for the load/store instruction
 845   // get the reg+offset into a register.
 846   if (needsLowering) {
 847     Addr.Base.Reg = FastEmit_ri_(MVT::i32, ISD::ADD, Addr.Base.Reg,
 848                                  /*Op0IsKill*/false, Addr.Offset, MVT::i32);
 849     Addr.Offset = 0;
 850   }
 851 }
 852
 853 void ARMFastISel::AddLoadStoreOperands(EVT VT, Address &Addr,
 854                                        const MachineInstrBuilder &MIB) {
 855   // addrmode5 output depends on the selection dag addressing dividing the
 856   // offset by 4 that it then later multiplies. Do this here as well.
 857   if (VT.getSimpleVT().SimpleTy == MVT::f32 ||
 858       VT.getSimpleVT().SimpleTy == MVT::f64)
 859     Addr.Offset /= 4;
 860
 861   // Frame base works a bit differently. Handle it separately.
 862   if (Addr.BaseType == Address::FrameIndexBase) {
 863     int FI = Addr.Base.FI;
 864     int Offset = Addr.Offset;
 865     MachineMemOperand *MMO =
 866           FuncInfo.MF->getMachineMemOperand(
 867                                   MachinePointerInfo::getFixedStack(FI, Offset),
 868                                   MachineMemOperand::MOLoad,
 869                                   MFI.getObjectSize(FI),
 870                                   MFI.getObjectAlignment(FI));
 871     // Now add the rest of the operands.
 872     MIB.addFrameIndex(FI);
 873
 874     // ARM halfword load/stores need an additional operand.
 875     if (!isThumb && VT.getSimpleVT().SimpleTy == MVT::i16) MIB.addReg(0);
 876
 877     MIB.addImm(Addr.Offset);
 878     MIB.addMemOperand(MMO);
 879   } else {
 880     // Now add the rest of the operands.
 881     MIB.addReg(Addr.Base.Reg);
 882
 883     // ARM halfword load/stores need an additional operand.
 884     if (!isThumb && VT.getSimpleVT().SimpleTy == MVT::i16) MIB.addReg(0);
 885
 886     MIB.addImm(Addr.Offset);
 887   }
 888   AddOptionalDefs(MIB);
 889 }
 890
 891 bool ARMFastISel::ARMEmitLoad(EVT VT, unsigned &ResultReg, Address &Addr) {
 892
 893   assert(VT.isSimple() && "Non-simple types are invalid here!");
 894   unsigned Opc;
 895   TargetRegisterClass *RC;
 896   switch (VT.getSimpleVT().SimpleTy) {
 897     // This is mostly going to be Neon/vector support.
 898     default: return false;
 899     case MVT::i16:
 900       Opc = isThumb ? ARM::t2LDRHi12 : ARM::LDRH;
 901       RC = ARM::GPRRegisterClass;
 902       break;
 903     case MVT::i8:
 904       Opc = isThumb ? ARM::t2LDRBi12 : ARM::LDRBi12;
 905       RC = ARM::GPRRegisterClass;
 906       break;
 907     case MVT::i32:
 908       Opc = isThumb ? ARM::t2LDRi12 : ARM::LDRi12;
 909       RC = ARM::GPRRegisterClass;
 910       break;
 911     case MVT::f32:
 912       Opc = ARM::VLDRS;
 913       RC = TLI.getRegClassFor(VT);
 914       break;
 915     case MVT::f64:
 916       Opc = ARM::VLDRD;
 917       RC = TLI.getRegClassFor(VT);
 918       break;
 919   }
 920   // Simplify this down to something we can handle.
 921   ARMSimplifyAddress(Addr, VT);
 922
 923   // Create the base instruction, then add the operands.
 924   ResultReg = createResultReg(RC);
 925   MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
 926                                     TII.get(Opc), ResultReg);
 927   AddLoadStoreOperands(VT, Addr, MIB);
 928   return true;
 929 }
 930
 931 bool ARMFastISel::SelectLoad(const Instruction *I) {
 932   // Verify we have a legal type before going any further.
 933   MVT VT;
 934   if (!isLoadTypeLegal(I->getType(), VT))
 935     return false;
 936
 937   // See if we can handle this address.
 938   Address Addr;
 939   if (!ARMComputeAddress(I->getOperand(0), Addr)) return false;
 940
 941   unsigned ResultReg;
 942   if (!ARMEmitLoad(VT, ResultReg, Addr)) return false;
 943   UpdateValueMap(I, ResultReg);
 944   return true;
 945 }
 946
 947 bool ARMFastISel::ARMEmitStore(EVT VT, unsigned SrcReg, Address &Addr) {
 948   unsigned StrOpc;
 949   switch (VT.getSimpleVT().SimpleTy) {
 950     // This is mostly going to be Neon/vector support.
 951     default: return false;
 952     case MVT::i1: {
 953       unsigned Res = createResultReg(isThumb ? ARM::tGPRRegisterClass :
 954                                                ARM::GPRRegisterClass);
 955       unsigned Opc = isThumb ? ARM::t2ANDri : ARM::ANDri;
 956       AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
 957                               TII.get(Opc), Res)
 958                       .addReg(SrcReg).addImm(1));
 959       SrcReg = Res;
 960     } // Fallthrough here.
 961     case MVT::i8:
 962       StrOpc = isThumb ? ARM::t2STRBi12 : ARM::STRBi12;
 963       break;
 964     case MVT::i16:
 965       StrOpc = isThumb ? ARM::t2STRHi12 : ARM::STRH;
 966       break;
 967     case MVT::i32:
 968       StrOpc = isThumb ? ARM::t2STRi12 : ARM::STRi12;
 969       break;
 970     case MVT::f32:
 971       if (!Subtarget->hasVFP2()) return false;
 972       StrOpc = ARM::VSTRS;
 973       break;
 974     case MVT::f64:
 975       if (!Subtarget->hasVFP2()) return false;
 976       StrOpc = ARM::VSTRD;
 977       break;
 978   }
 979   // Simplify this down to something we can handle.
 980   ARMSimplifyAddress(Addr, VT);
 981
 982   // Create the base instruction, then add the operands.
 983   MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
 984                                     TII.get(StrOpc))
 985                             .addReg(SrcReg, getKillRegState(true));
 986   AddLoadStoreOperands(VT, Addr, MIB);
 987   return true;
 988 }
 989
 990 bool ARMFastISel::SelectStore(const Instruction *I) {
 991   Value *Op0 = I->getOperand(0);
 992   unsigned SrcReg = 0;
 993
 994   // Verify we have a legal type before going any further.
 995   MVT VT;
 996   if (!isLoadTypeLegal(I->getOperand(0)->getType(), VT))
 997     return false;
 998
 999   // Get the value to be stored into a register.
1000   SrcReg = getRegForValue(Op0);
1001   if (SrcReg == 0) return false;
1002
1003   // See if we can handle this address.
1004   Address Addr;
1005   if (!ARMComputeAddress(I->getOperand(1), Addr))
1006     return false;
1007
1008   if (!ARMEmitStore(VT, SrcReg, Addr)) return false;
1009   return true;
1010 }
1011
1012 static ARMCC::CondCodes getComparePred(CmpInst::Predicate Pred) {
1013   switch (Pred) {
1014     // Needs two compares...
1015     case CmpInst::FCMP_ONE:
1016     case CmpInst::FCMP_UEQ:
1017     default:
1018       // AL is our "false" for now. The other two need more compares.
1019       return ARMCC::AL;
1020     case CmpInst::ICMP_EQ:
1021     case CmpInst::FCMP_OEQ:
1022       return ARMCC::EQ;
1023     case CmpInst::ICMP_SGT:
1024     case CmpInst::FCMP_OGT:
1025       return ARMCC::GT;
1026     case CmpInst::ICMP_SGE:
1027     case CmpInst::FCMP_OGE:
1028       return ARMCC::GE;
1029     case CmpInst::ICMP_UGT:
1030     case CmpInst::FCMP_UGT:
1031       return ARMCC::HI;
1032     case CmpInst::FCMP_OLT:
1033       return ARMCC::MI;
1034     case CmpInst::ICMP_ULE:
1035     case CmpInst::FCMP_OLE:
1036       return ARMCC::LS;
1037     case CmpInst::FCMP_ORD:
1038       return ARMCC::VC;
1039     case CmpInst::FCMP_UNO:
1040       return ARMCC::VS;
1041     case CmpInst::FCMP_UGE:
1042       return ARMCC::PL;
1043     case CmpInst::ICMP_SLT:
1044     case CmpInst::FCMP_ULT:
1045       return ARMCC::LT;
1046     case CmpInst::ICMP_SLE:
1047     case CmpInst::FCMP_ULE:
1048       return ARMCC::LE;
1049     case CmpInst::FCMP_UNE:
1050     case CmpInst::ICMP_NE:
1051       return ARMCC::NE;
1052     case CmpInst::ICMP_UGE:
1053       return ARMCC::HS;
1054     case CmpInst::ICMP_ULT:
1055       return ARMCC::LO;
1056   }
1057 }
1058
1059 bool ARMFastISel::SelectBranch(const Instruction *I) {
1060   const BranchInst *BI = cast<BranchInst>(I);
1061   MachineBasicBlock *TBB = FuncInfo.MBBMap[BI->getSuccessor(0)];
1062   MachineBasicBlock *FBB = FuncInfo.MBBMap[BI->getSuccessor(1)];
1063
1064   // Simple branch support.
1065
1066   // If we can, avoid recomputing the compare - redoing it could lead to wonky
1067   // behavior.
1068   // TODO: Factor this out.
1069   if (const CmpInst *CI = dyn_cast<CmpInst>(BI->getCondition())) {
1070     MVT SourceVT;
1071     const Type *Ty = CI->getOperand(0)->getType();
1072     if (CI->hasOneUse() && (CI->getParent() == I->getParent())
1073         && isTypeLegal(Ty, SourceVT)) {
1074       bool isFloat = (Ty->isDoubleTy() || Ty->isFloatTy());
1075       if (isFloat && !Subtarget->hasVFP2())
1076         return false;
1077
1078       unsigned CmpOpc;
1079       switch (SourceVT.SimpleTy) {
1080         default: return false;
1081         // TODO: Verify compares.
1082         case MVT::f32:
1083           CmpOpc = ARM::VCMPES;
1084           break;
1085         case MVT::f64:
1086           CmpOpc = ARM::VCMPED;
1087           break;
1088         case MVT::i32:
1089           CmpOpc = isThumb ? ARM::t2CMPrr : ARM::CMPrr;
1090           break;
1091       }
1092
1093       // Get the compare predicate.
1094       // Try to take advantage of fallthrough opportunities.
1095       CmpInst::Predicate Predicate = CI->getPredicate();
1096       if (FuncInfo.MBB->isLayoutSuccessor(TBB)) {
1097         std::swap(TBB, FBB);
1098         Predicate = CmpInst::getInversePredicate(Predicate);
1099       }
1100
1101       ARMCC::CondCodes ARMPred = getComparePred(Predicate);
1102
1103       // We may not handle every CC for now.
1104       if (ARMPred == ARMCC::AL) return false;
1105
1106       unsigned Arg1 = getRegForValue(CI->getOperand(0));
1107       if (Arg1 == 0) return false;
1108
1109       unsigned Arg2 = getRegForValue(CI->getOperand(1));
1110       if (Arg2 == 0) return false;
1111
1112       AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
1113                               TII.get(CmpOpc))
1114                       .addReg(Arg1).addReg(Arg2));
1115
1116       // For floating point we need to move the result to a comparison register
1117       // that we can then use for branches.
1118       if (isFloat)
1119         AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
1120                                 TII.get(ARM::FMSTAT)));
1121
1122       unsigned BrOpc = isThumb ? ARM::t2Bcc : ARM::Bcc;
1123       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(BrOpc))
1124       .addMBB(TBB).addImm(ARMPred).addReg(ARM::CPSR);
1125       FastEmitBranch(FBB, DL);
1126       FuncInfo.MBB->addSuccessor(TBB);
1127       return true;
1128     }
1129   } else if (TruncInst *TI = dyn_cast<TruncInst>(BI->getCondition())) {
1130     MVT SourceVT;
1131     if (TI->hasOneUse() && TI->getParent() == I->getParent() &&
1132         (isTypeLegal(TI->getOperand(0)->getType(), SourceVT))) {
1133       unsigned TstOpc = isThumb ? ARM::t2TSTri : ARM::TSTri;
1134       unsigned OpReg = getRegForValue(TI->getOperand(0));
1135       AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
1136                               TII.get(TstOpc))
1137                       .addReg(OpReg).addImm(1));
1138
1139       unsigned CCMode = ARMCC::NE;
1140       if (FuncInfo.MBB->isLayoutSuccessor(TBB)) {
1141         std::swap(TBB, FBB);
1142         CCMode = ARMCC::EQ;
1143       }
1144
1145       unsigned BrOpc = isThumb ? ARM::t2Bcc : ARM::Bcc;
1146       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(BrOpc))
1147       .addMBB(TBB).addImm(CCMode).addReg(ARM::CPSR);
1148
1149       FastEmitBranch(FBB, DL);
1150       FuncInfo.MBB->addSuccessor(TBB);
1151       return true;
1152     }
1153   }
1154
1155   unsigned CmpReg = getRegForValue(BI->getCondition());
1156   if (CmpReg == 0) return false;
1157
1158   // We've been divorced from our compare!  Our block was split, and
1159   // now our compare lives in a predecessor block.  We musn't
1160   // re-compare here, as the children of the compare aren't guaranteed
1161   // live across the block boundary (we *could* check for this).
1162   // Regardless, the compare has been done in the predecessor block,
1163   // and it left a value for us in a virtual register.  Ergo, we test
1164   // the one-bit value left in the virtual register.
1165   unsigned TstOpc = isThumb ? ARM::t2TSTri : ARM::TSTri;
1166   AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TstOpc))
1167                   .addReg(CmpReg).addImm(1));
1168
1169   unsigned CCMode = ARMCC::NE;
1170   if (FuncInfo.MBB->isLayoutSuccessor(TBB)) {
1171     std::swap(TBB, FBB);
1172     CCMode = ARMCC::EQ;
1173   }
1174
1175   unsigned BrOpc = isThumb ? ARM::t2Bcc : ARM::Bcc;
1176   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(BrOpc))
1177                   .addMBB(TBB).addImm(CCMode).addReg(ARM::CPSR);
1178   FastEmitBranch(FBB, DL);
1179   FuncInfo.MBB->addSuccessor(TBB);
1180   return true;
1181 }
1182
1183 bool ARMFastISel::SelectCmp(const Instruction *I) {
1184   const CmpInst *CI = cast<CmpInst>(I);
1185
1186   MVT VT;
1187   const Type *Ty = CI->getOperand(0)->getType();
1188   if (!isTypeLegal(Ty, VT))
1189     return false;
1190
1191   bool isFloat = (Ty->isDoubleTy() || Ty->isFloatTy());
1192   if (isFloat && !Subtarget->hasVFP2())
1193     return false;
1194
1195   unsigned CmpOpc;
1196   unsigned CondReg;
1197   switch (VT.SimpleTy) {
1198     default: return false;
1199     // TODO: Verify compares.
1200     case MVT::f32:
1201       CmpOpc = ARM::VCMPES;
1202       CondReg = ARM::FPSCR;
1203       break;
1204     case MVT::f64:
1205       CmpOpc = ARM::VCMPED;
1206       CondReg = ARM::FPSCR;
1207       break;
1208     case MVT::i32:
1209       CmpOpc = isThumb ? ARM::t2CMPrr : ARM::CMPrr;
1210       CondReg = ARM::CPSR;
1211       break;
1212   }
1213
1214   // Get the compare predicate.
1215   ARMCC::CondCodes ARMPred = getComparePred(CI->getPredicate());
1216
1217   // We may not handle every CC for now.
1218   if (ARMPred == ARMCC::AL) return false;
1219
1220   unsigned Arg1 = getRegForValue(CI->getOperand(0));
1221   if (Arg1 == 0) return false;
1222
1223   unsigned Arg2 = getRegForValue(CI->getOperand(1));
1224   if (Arg2 == 0) return false;
1225
1226   AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(CmpOpc))
1227                   .addReg(Arg1).addReg(Arg2));
1228
1229   // For floating point we need to move the result to a comparison register
1230   // that we can then use for branches.
1231   if (isFloat)
1232     AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
1233                             TII.get(ARM::FMSTAT)));
1234
1235   // Now set a register based on the comparison. Explicitly set the predicates
1236   // here.
1237   unsigned MovCCOpc = isThumb ? ARM::t2MOVCCi : ARM::MOVCCi;
1238   TargetRegisterClass *RC = isThumb ? ARM::rGPRRegisterClass
1239                                     : ARM::GPRRegisterClass;
1240   unsigned DestReg = createResultReg(RC);
1241   Constant *Zero
1242     = ConstantInt::get(Type::getInt32Ty(*Context), 0);
1243   unsigned ZeroReg = TargetMaterializeConstant(Zero);
1244   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(MovCCOpc), DestReg)
1245           .addReg(ZeroReg).addImm(1)
1246           .addImm(ARMPred).addReg(CondReg);
1247
1248   UpdateValueMap(I, DestReg);
1249   return true;
1250 }
1251
1252 bool ARMFastISel::SelectFPExt(const Instruction *I) {
1253   // Make sure we have VFP and that we're extending float to double.
1254   if (!Subtarget->hasVFP2()) return false;
1255
1256   Value *V = I->getOperand(0);
1257   if (!I->getType()->isDoubleTy() ||
1258       !V->getType()->isFloatTy()) return false;
1259
1260   unsigned Op = getRegForValue(V);
1261   if (Op == 0) return false;
1262
1263   unsigned Result = createResultReg(ARM::DPRRegisterClass);
1264   AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
1265                           TII.get(ARM::VCVTDS), Result)
1266                   .addReg(Op));
1267   UpdateValueMap(I, Result);
1268   return true;
1269 }
1270
1271 bool ARMFastISel::SelectFPTrunc(const Instruction *I) {
1272   // Make sure we have VFP and that we're truncating double to float.
1273   if (!Subtarget->hasVFP2()) return false;
1274
1275   Value *V = I->getOperand(0);
1276   if (!(I->getType()->isFloatTy() &&
1277         V->getType()->isDoubleTy())) return false;
1278
1279   unsigned Op = getRegForValue(V);
1280   if (Op == 0) return false;
1281
1282   unsigned Result = createResultReg(ARM::SPRRegisterClass);
1283   AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
1284                           TII.get(ARM::VCVTSD), Result)
1285                   .addReg(Op));
1286   UpdateValueMap(I, Result);
1287   return true;
1288 }
1289
1290 bool ARMFastISel::SelectSIToFP(const Instruction *I) {
1291   // Make sure we have VFP.
1292   if (!Subtarget->hasVFP2()) return false;
1293
1294   MVT DstVT;
1295   const Type *Ty = I->getType();
1296   if (!isTypeLegal(Ty, DstVT))
1297     return false;
1298
1299   // FIXME: Handle sign-extension where necessary.
1300   if (!I->getOperand(0)->getType()->isIntegerTy(32))
1301     return false;
1302
1303   unsigned Op = getRegForValue(I->getOperand(0));
1304   if (Op == 0) return false;
1305
1306   // The conversion routine works on fp-reg to fp-reg and the operand above
1307   // was an integer, move it to the fp registers if possible.
1308   unsigned FP = ARMMoveToFPReg(MVT::f32, Op);
1309   if (FP == 0) return false;
1310
1311   unsigned Opc;
1312   if (Ty->isFloatTy()) Opc = ARM::VSITOS;
1313   else if (Ty->isDoubleTy()) Opc = ARM::VSITOD;
1314   else return 0;
1315
1316   unsigned ResultReg = createResultReg(TLI.getRegClassFor(DstVT));
1317   AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc),
1318                           ResultReg)
1319                   .addReg(FP));
1320   UpdateValueMap(I, ResultReg);
1321   return true;
1322 }
1323
1324 bool ARMFastISel::SelectFPToSI(const Instruction *I) {
1325   // Make sure we have VFP.
1326   if (!Subtarget->hasVFP2()) return false;
1327
1328   MVT DstVT;
1329   const Type *RetTy = I->getType();
1330   if (!isTypeLegal(RetTy, DstVT))
1331     return false;
1332
1333   unsigned Op = getRegForValue(I->getOperand(0));
1334   if (Op == 0) return false;
1335
1336   unsigned Opc;
1337   const Type *OpTy = I->getOperand(0)->getType();
1338   if (OpTy->isFloatTy()) Opc = ARM::VTOSIZS;
1339   else if (OpTy->isDoubleTy()) Opc = ARM::VTOSIZD;
1340   else return 0;
1341
1342   // f64->s32 or f32->s32 both need an intermediate f32 reg.
1343   unsigned ResultReg = createResultReg(TLI.getRegClassFor(MVT::f32));
1344   AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc),
1345                           ResultReg)
1346                   .addReg(Op));
1347
1348   // This result needs to be in an integer register, but the conversion only
1349   // takes place in fp-regs.
1350   unsigned IntReg = ARMMoveToIntReg(DstVT, ResultReg);
1351   if (IntReg == 0) return false;
1352
1353   UpdateValueMap(I, IntReg);
1354   return true;
1355 }
1356
1357 bool ARMFastISel::SelectSelect(const Instruction *I) {
1358   MVT VT;
1359   if (!isTypeLegal(I->getType(), VT))
1360     return false;
1361
1362   // Things need to be register sized for register moves.
1363   if (VT != MVT::i32) return false;
1364   const TargetRegisterClass *RC = TLI.getRegClassFor(VT);
1365
1366   unsigned CondReg = getRegForValue(I->getOperand(0));
1367   if (CondReg == 0) return false;
1368   unsigned Op1Reg = getRegForValue(I->getOperand(1));
1369   if (Op1Reg == 0) return false;
1370   unsigned Op2Reg = getRegForValue(I->getOperand(2));
1371   if (Op2Reg == 0) return false;
1372
1373   unsigned CmpOpc = isThumb ? ARM::t2TSTri : ARM::TSTri;
1374   AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(CmpOpc))
1375                   .addReg(CondReg).addImm(1));
1376   unsigned ResultReg = createResultReg(RC);
1377   unsigned MovCCOpc = isThumb ? ARM::t2MOVCCr : ARM::MOVCCr;
1378   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(MovCCOpc), ResultReg)
1379     .addReg(Op1Reg).addReg(Op2Reg)
1380     .addImm(ARMCC::EQ).addReg(ARM::CPSR);
1381   UpdateValueMap(I, ResultReg);
1382   return true;
1383 }
1384
1385 bool ARMFastISel::SelectSDiv(const Instruction *I) {
1386   MVT VT;
1387   const Type *Ty = I->getType();
1388   if (!isTypeLegal(Ty, VT))
1389     return false;
1390
1391   // If we have integer div support we should have selected this automagically.
1392   // In case we have a real miss go ahead and return false and we'll pick
1393   // it up later.
1394   if (Subtarget->hasDivide()) return false;
1395
1396   // Otherwise emit a libcall.
1397   RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL;
1398   if (VT == MVT::i8)
1399     LC = RTLIB::SDIV_I8;
1400   else if (VT == MVT::i16)
1401     LC = RTLIB::SDIV_I16;
1402   else if (VT == MVT::i32)
1403     LC = RTLIB::SDIV_I32;
1404   else if (VT == MVT::i64)
1405     LC = RTLIB::SDIV_I64;
1406   else if (VT == MVT::i128)
1407     LC = RTLIB::SDIV_I128;
1408   assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported SDIV!");
1409
1410   return ARMEmitLibcall(I, LC);
1411 }
1412
1413 bool ARMFastISel::SelectSRem(const Instruction *I) {
1414   MVT VT;
1415   const Type *Ty = I->getType();
1416   if (!isTypeLegal(Ty, VT))
1417     return false;
1418
1419   RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL;
1420   if (VT == MVT::i8)
1421     LC = RTLIB::SREM_I8;
1422   else if (VT == MVT::i16)
1423     LC = RTLIB::SREM_I16;
1424   else if (VT == MVT::i32)
1425     LC = RTLIB::SREM_I32;
1426   else if (VT == MVT::i64)
1427     LC = RTLIB::SREM_I64;
1428   else if (VT == MVT::i128)
1429     LC = RTLIB::SREM_I128;
1430   assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported SREM!");
1431
1432   return ARMEmitLibcall(I, LC);
1433 }
1434
1435 bool ARMFastISel::SelectBinaryOp(const Instruction *I, unsigned ISDOpcode) {
1436   EVT VT  = TLI.getValueType(I->getType(), true);
1437
1438   // We can get here in the case when we want to use NEON for our fp
1439   // operations, but can't figure out how to. Just use the vfp instructions
1440   // if we have them.
1441   // FIXME: It'd be nice to use NEON instructions.
1442   const Type *Ty = I->getType();
1443   bool isFloat = (Ty->isDoubleTy() || Ty->isFloatTy());
1444   if (isFloat && !Subtarget->hasVFP2())
1445     return false;
1446
1447   unsigned Op1 = getRegForValue(I->getOperand(0));
1448   if (Op1 == 0) return false;
1449
1450   unsigned Op2 = getRegForValue(I->getOperand(1));
1451   if (Op2 == 0) return false;
1452
1453   unsigned Opc;
1454   bool is64bit = VT == MVT::f64 || VT == MVT::i64;
1455   switch (ISDOpcode) {
1456     default: return false;
1457     case ISD::FADD:
1458       Opc = is64bit ? ARM::VADDD : ARM::VADDS;
1459       break;
1460     case ISD::FSUB:
1461       Opc = is64bit ? ARM::VSUBD : ARM::VSUBS;
1462       break;
1463     case ISD::FMUL:
1464       Opc = is64bit ? ARM::VMULD : ARM::VMULS;
1465       break;
1466   }
1467   unsigned ResultReg = createResultReg(TLI.getRegClassFor(VT));
1468   AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
1469                           TII.get(Opc), ResultReg)
1470                   .addReg(Op1).addReg(Op2));
1471   UpdateValueMap(I, ResultReg);
1472   return true;
1473 }
1474
1475 // Call Handling Code
1476
1477 bool ARMFastISel::FastEmitExtend(ISD::NodeType Opc, EVT DstVT, unsigned Src,
1478                                  EVT SrcVT, unsigned &ResultReg) {
1479   unsigned RR = FastEmit_r(SrcVT.getSimpleVT(), DstVT.getSimpleVT(), Opc,
1480                            Src, /*TODO: Kill=*/false);
1481
1482   if (RR != 0) {
1483     ResultReg = RR;
1484     return true;
1485   } else
1486     return false;
1487 }
1488
1489 // This is largely taken directly from CCAssignFnForNode - we don't support
1490 // varargs in FastISel so that part has been removed.
1491 // TODO: We may not support all of this.
1492 CCAssignFn *ARMFastISel::CCAssignFnForCall(CallingConv::ID CC, bool Return) {
1493   switch (CC) {
1494   default:
1495     llvm_unreachable("Unsupported calling convention");
1496   case CallingConv::Fast:
1497     // Ignore fastcc. Silence compiler warnings.
1498     (void)RetFastCC_ARM_APCS;
1499     (void)FastCC_ARM_APCS;
1500     // Fallthrough
1501   case CallingConv::C:
1502     // Use target triple & subtarget features to do actual dispatch.
1503     if (Subtarget->isAAPCS_ABI()) {
1504       if (Subtarget->hasVFP2() &&
1505           FloatABIType == FloatABI::Hard)
1506         return (Return ? RetCC_ARM_AAPCS_VFP: CC_ARM_AAPCS_VFP);
1507       else
1508         return (Return ? RetCC_ARM_AAPCS: CC_ARM_AAPCS);
1509     } else
1510         return (Return ? RetCC_ARM_APCS: CC_ARM_APCS);
1511   case CallingConv::ARM_AAPCS_VFP:
1512     return (Return ? RetCC_ARM_AAPCS_VFP: CC_ARM_AAPCS_VFP);
1513   case CallingConv::ARM_AAPCS:
1514     return (Return ? RetCC_ARM_AAPCS: CC_ARM_AAPCS);
1515   case CallingConv::ARM_APCS:
1516     return (Return ? RetCC_ARM_APCS: CC_ARM_APCS);
1517   }
1518 }
1519
1520 bool ARMFastISel::ProcessCallArgs(SmallVectorImpl<Value*> &Args,
1521                                   SmallVectorImpl<unsigned> &ArgRegs,
1522                                   SmallVectorImpl<MVT> &ArgVTs,
1523                                   SmallVectorImpl<ISD::ArgFlagsTy> &ArgFlags,
1524                                   SmallVectorImpl<unsigned> &RegArgs,
1525                                   CallingConv::ID CC,
1526                                   unsigned &NumBytes) {
1527   SmallVector<CCValAssign, 16> ArgLocs;
1528   CCState CCInfo(CC, false, TM, ArgLocs, *Context);
1529   CCInfo.AnalyzeCallOperands(ArgVTs, ArgFlags, CCAssignFnForCall(CC, false));
1530
1531   // Get a count of how many bytes are to be pushed on the stack.
1532   NumBytes = CCInfo.getNextStackOffset();
1533
1534   // Issue CALLSEQ_START
1535   unsigned AdjStackDown = TM.getRegisterInfo()->getCallFrameSetupOpcode();
1536   AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
1537                           TII.get(AdjStackDown))
1538                   .addImm(NumBytes));
1539
1540   // Process the args.
1541   for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
1542     CCValAssign &VA = ArgLocs[i];
1543     unsigned Arg = ArgRegs[VA.getValNo()];
1544     MVT ArgVT = ArgVTs[VA.getValNo()];
1545
1546     // We don't handle NEON/vector parameters yet.
1547     if (ArgVT.isVector() || ArgVT.getSizeInBits() > 64)
1548       return false;
1549
1550     // Handle arg promotion, etc.
1551     switch (VA.getLocInfo()) {
1552       case CCValAssign::Full: break;
1553       case CCValAssign::SExt: {
1554         bool Emitted = FastEmitExtend(ISD::SIGN_EXTEND, VA.getLocVT(),
1555                                          Arg, ArgVT, Arg);
1556         assert(Emitted && "Failed to emit a sext!"); (void)Emitted;
1557         Emitted = true;
1558         ArgVT = VA.getLocVT();
1559         break;
1560       }
1561       case CCValAssign::ZExt: {
1562         bool Emitted = FastEmitExtend(ISD::ZERO_EXTEND, VA.getLocVT(),
1563                                          Arg, ArgVT, Arg);
1564         assert(Emitted && "Failed to emit a zext!"); (void)Emitted;
1565         Emitted = true;
1566         ArgVT = VA.getLocVT();
1567         break;
1568       }
1569       case CCValAssign::AExt: {
1570         bool Emitted = FastEmitExtend(ISD::ANY_EXTEND, VA.getLocVT(),
1571                                          Arg, ArgVT, Arg);
1572         if (!Emitted)
1573           Emitted = FastEmitExtend(ISD::ZERO_EXTEND, VA.getLocVT(),
1574                                       Arg, ArgVT, Arg);
1575         if (!Emitted)
1576           Emitted = FastEmitExtend(ISD::SIGN_EXTEND, VA.getLocVT(),
1577                                       Arg, ArgVT, Arg);
1578
1579         assert(Emitted && "Failed to emit a aext!"); (void)Emitted;
1580         ArgVT = VA.getLocVT();
1581         break;
1582       }
1583       case CCValAssign::BCvt: {
1584         unsigned BC = FastEmit_r(ArgVT, VA.getLocVT(), ISD::BITCAST, Arg,
1585                                  /*TODO: Kill=*/false);
1586         assert(BC != 0 && "Failed to emit a bitcast!");
1587         Arg = BC;
1588         ArgVT = VA.getLocVT();
1589         break;
1590       }
1591       default: llvm_unreachable("Unknown arg promotion!");
1592     }
1593
1594     // Now copy/store arg to correct locations.
1595     if (VA.isRegLoc() && !VA.needsCustom()) {
1596       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY),
1597               VA.getLocReg())
1598       .addReg(Arg);
1599       RegArgs.push_back(VA.getLocReg());
1600     } else if (VA.needsCustom()) {
1601       // TODO: We need custom lowering for vector (v2f64) args.
1602       if (VA.getLocVT() != MVT::f64) return false;
1603
1604       CCValAssign &NextVA = ArgLocs[++i];
1605
1606       // TODO: Only handle register args for now.
1607       if(!(VA.isRegLoc() && NextVA.isRegLoc())) return false;
1608
1609       AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
1610                               TII.get(ARM::VMOVRRD), VA.getLocReg())
1611                       .addReg(NextVA.getLocReg(), RegState::Define)
1612                       .addReg(Arg));
1613       RegArgs.push_back(VA.getLocReg());
1614       RegArgs.push_back(NextVA.getLocReg());
1615     } else {
1616       assert(VA.isMemLoc());
1617       // Need to store on the stack.
1618       Address Addr;
1619       Addr.BaseType = Address::RegBase;
1620       Addr.Base.Reg = ARM::SP;
1621       Addr.Offset = VA.getLocMemOffset();
1622
1623       if (!ARMEmitStore(ArgVT, Arg, Addr)) return false;
1624     }
1625   }
1626   return true;
1627 }
1628
1629 bool ARMFastISel::FinishCall(MVT RetVT, SmallVectorImpl<unsigned> &UsedRegs,
1630                              const Instruction *I, CallingConv::ID CC,
1631                              unsigned &NumBytes) {
1632   // Issue CALLSEQ_END
1633   unsigned AdjStackUp = TM.getRegisterInfo()->getCallFrameDestroyOpcode();
1634   AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
1635                           TII.get(AdjStackUp))
1636                   .addImm(NumBytes).addImm(0));
1637
1638   // Now the return value.
1639   if (RetVT != MVT::isVoid) {
1640     SmallVector<CCValAssign, 16> RVLocs;
1641     CCState CCInfo(CC, false, TM, RVLocs, *Context);
1642     CCInfo.AnalyzeCallResult(RetVT, CCAssignFnForCall(CC, true));
1643
1644     // Copy all of the result registers out of their specified physreg.
1645     if (RVLocs.size() == 2 && RetVT == MVT::f64) {
1646       // For this move we copy into two registers and then move into the
1647       // double fp reg we want.
1648       EVT DestVT = RVLocs[0].getValVT();
1649       TargetRegisterClass* DstRC = TLI.getRegClassFor(DestVT);
1650       unsigned ResultReg = createResultReg(DstRC);
1651       AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
1652                               TII.get(ARM::VMOVDRR), ResultReg)
1653                       .addReg(RVLocs[0].getLocReg())
1654                       .addReg(RVLocs[1].getLocReg()));
1655
1656       UsedRegs.push_back(RVLocs[0].getLocReg());
1657       UsedRegs.push_back(RVLocs[1].getLocReg());
1658
1659       // Finally update the result.
1660       UpdateValueMap(I, ResultReg);
1661     } else {
1662       assert(RVLocs.size() == 1 &&"Can't handle non-double multi-reg retvals!");
1663       EVT CopyVT = RVLocs[0].getValVT();
1664       TargetRegisterClass* DstRC = TLI.getRegClassFor(CopyVT);
1665
1666       unsigned ResultReg = createResultReg(DstRC);
1667       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY),
1668               ResultReg).addReg(RVLocs[0].getLocReg());
1669       UsedRegs.push_back(RVLocs[0].getLocReg());
1670
1671       // Finally update the result.
1672       UpdateValueMap(I, ResultReg);
1673     }
1674   }
1675
1676   return true;
1677 }
1678
1679 bool ARMFastISel::SelectRet(const Instruction *I) {
1680   const ReturnInst *Ret = cast<ReturnInst>(I);
1681   const Function &F = *I->getParent()->getParent();
1682
1683   if (!FuncInfo.CanLowerReturn)
1684     return false;
1685
1686   if (F.isVarArg())
1687     return false;
1688
1689   CallingConv::ID CC = F.getCallingConv();
1690   if (Ret->getNumOperands() > 0) {
1691     SmallVector<ISD::OutputArg, 4> Outs;
1692     GetReturnInfo(F.getReturnType(), F.getAttributes().getRetAttributes(),
1693                   Outs, TLI);
1694
1695     // Analyze operands of the call, assigning locations to each operand.
1696     SmallVector<CCValAssign, 16> ValLocs;
1697     CCState CCInfo(CC, F.isVarArg(), TM, ValLocs, I->getContext());
1698     CCInfo.AnalyzeReturn(Outs, CCAssignFnForCall(CC, true /* is Ret */));
1699
1700     const Value *RV = Ret->getOperand(0);
1701     unsigned Reg = getRegForValue(RV);
1702     if (Reg == 0)
1703       return false;
1704
1705     // Only handle a single return value for now.
1706     if (ValLocs.size() != 1)
1707       return false;
1708
1709     CCValAssign &VA = ValLocs[0];
1710
1711     // Don't bother handling odd stuff for now.
1712     if (VA.getLocInfo() != CCValAssign::Full)
1713       return false;
1714     // Only handle register returns for now.
1715     if (!VA.isRegLoc())
1716       return false;
1717     // TODO: For now, don't try to handle cases where getLocInfo()
1718     // says Full but the types don't match.
1719     if (TLI.getValueType(RV->getType()) != VA.getValVT())
1720       return false;
1721
1722     // Make the copy.
1723     unsigned SrcReg = Reg + VA.getValNo();
1724     unsigned DstReg = VA.getLocReg();
1725     const TargetRegisterClass* SrcRC = MRI.getRegClass(SrcReg);
1726     // Avoid a cross-class copy. This is very unlikely.
1727     if (!SrcRC->contains(DstReg))
1728       return false;
1729     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY),
1730             DstReg).addReg(SrcReg);
1731
1732     // Mark the register as live out of the function.
1733     MRI.addLiveOut(VA.getLocReg());
1734   }
1735
1736   unsigned RetOpc = isThumb ? ARM::tBX_RET : ARM::BX_RET;
1737   AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
1738                           TII.get(RetOpc)));
1739   return true;
1740 }
1741
1742 unsigned ARMFastISel::ARMSelectCallOp(const GlobalValue *GV) {
1743
1744   // Darwin needs the r9 versions of the opcodes.
1745   bool isDarwin = Subtarget->isTargetDarwin();
1746   if (isThumb) {
1747     return isDarwin ? ARM::tBLr9 : ARM::tBL;
1748   } else  {
1749     return isDarwin ? ARM::BLr9 : ARM::BL;
1750   }
1751 }
1752
1753 // A quick function that will emit a call for a named libcall in F with the
1754 // vector of passed arguments for the Instruction in I. We can assume that we
1755 // can emit a call for any libcall we can produce. This is an abridged version
1756 // of the full call infrastructure since we won't need to worry about things
1757 // like computed function pointers or strange arguments at call sites.
1758 // TODO: Try to unify this and the normal call bits for ARM, then try to unify
1759 // with X86.
1760 bool ARMFastISel::ARMEmitLibcall(const Instruction *I, RTLIB::Libcall Call) {
1761   CallingConv::ID CC = TLI.getLibcallCallingConv(Call);
1762
1763   // Handle *simple* calls for now.
1764   const Type *RetTy = I->getType();
1765   MVT RetVT;
1766   if (RetTy->isVoidTy())
1767     RetVT = MVT::isVoid;
1768   else if (!isTypeLegal(RetTy, RetVT))
1769     return false;
1770
1771   // TODO: For now if we have long calls specified we don't handle the call.
1772   if (EnableARMLongCalls) return false;
1773
1774   // Set up the argument vectors.
1775   SmallVector<Value*, 8> Args;
1776   SmallVector<unsigned, 8> ArgRegs;
1777   SmallVector<MVT, 8> ArgVTs;
1778   SmallVector<ISD::ArgFlagsTy, 8> ArgFlags;
1779   Args.reserve(I->getNumOperands());
1780   ArgRegs.reserve(I->getNumOperands());
1781   ArgVTs.reserve(I->getNumOperands());
1782   ArgFlags.reserve(I->getNumOperands());
1783   for (unsigned i = 0; i < I->getNumOperands(); ++i) {
1784     Value *Op = I->getOperand(i);
1785     unsigned Arg = getRegForValue(Op);
1786     if (Arg == 0) return false;
1787
1788     const Type *ArgTy = Op->getType();
1789     MVT ArgVT;
1790     if (!isTypeLegal(ArgTy, ArgVT)) return false;
1791
1792     ISD::ArgFlagsTy Flags;
1793     unsigned OriginalAlignment = TD.getABITypeAlignment(ArgTy);
1794     Flags.setOrigAlign(OriginalAlignment);
1795
1796     Args.push_back(Op);
1797     ArgRegs.push_back(Arg);
1798     ArgVTs.push_back(ArgVT);
1799     ArgFlags.push_back(Flags);
1800   }
1801
1802   // Handle the arguments now that we've gotten them.
1803   SmallVector<unsigned, 4> RegArgs;
1804   unsigned NumBytes;
1805   if (!ProcessCallArgs(Args, ArgRegs, ArgVTs, ArgFlags, RegArgs, CC, NumBytes))
1806     return false;
1807
1808   // Issue the call, BLr9 for darwin, BL otherwise.
1809   // TODO: Turn this into the table of arm call ops.
1810   MachineInstrBuilder MIB;
1811   unsigned CallOpc = ARMSelectCallOp(NULL);
1812   if(isThumb)
1813     // Explicitly adding the predicate here.
1814     MIB = AddDefaultPred(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
1815                          TII.get(CallOpc)))
1816                          .addExternalSymbol(TLI.getLibcallName(Call));
1817   else
1818     // Explicitly adding the predicate here.
1819     MIB = AddDefaultPred(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
1820                          TII.get(CallOpc))
1821           .addExternalSymbol(TLI.getLibcallName(Call)));
1822
1823   // Add implicit physical register uses to the call.
1824   for (unsigned i = 0, e = RegArgs.size(); i != e; ++i)
1825     MIB.addReg(RegArgs[i]);
1826
1827   // Finish off the call including any return values.
1828   SmallVector<unsigned, 4> UsedRegs;
1829   if (!FinishCall(RetVT, UsedRegs, I, CC, NumBytes)) return false;
1830
1831   // Set all unused physreg defs as dead.
1832   static_cast<MachineInstr *>(MIB)->setPhysRegsDeadExcept(UsedRegs, TRI);
1833
1834   return true;
1835 }
1836
1837 bool ARMFastISel::SelectCall(const Instruction *I) {
1838   const CallInst *CI = cast<CallInst>(I);
1839   const Value *Callee = CI->getCalledValue();
1840
1841   // Can't handle inline asm or worry about intrinsics yet.
1842   if (isa<InlineAsm>(Callee) || isa<IntrinsicInst>(CI)) return false;
1843
1844   // Only handle global variable Callees.
1845   const GlobalValue *GV = dyn_cast<GlobalValue>(Callee);
1846   if (!GV)
1847     return false;
1848
1849   // Check the calling convention.
1850   ImmutableCallSite CS(CI);
1851   CallingConv::ID CC = CS.getCallingConv();
1852
1853   // TODO: Avoid some calling conventions?
1854
1855   // Let SDISel handle vararg functions.
1856   const PointerType *PT = cast<PointerType>(CS.getCalledValue()->getType());
1857   const FunctionType *FTy = cast<FunctionType>(PT->getElementType());
1858   if (FTy->isVarArg())
1859     return false;
1860
1861   // Handle *simple* calls for now.
1862   const Type *RetTy = I->getType();
1863   MVT RetVT;
1864   if (RetTy->isVoidTy())
1865     RetVT = MVT::isVoid;
1866   else if (!isTypeLegal(RetTy, RetVT))
1867     return false;
1868
1869   // TODO: For now if we have long calls specified we don't handle the call.
1870   if (EnableARMLongCalls) return false;
1871
1872   // Set up the argument vectors.
1873   SmallVector<Value*, 8> Args;
1874   SmallVector<unsigned, 8> ArgRegs;
1875   SmallVector<MVT, 8> ArgVTs;
1876   SmallVector<ISD::ArgFlagsTy, 8> ArgFlags;
1877   Args.reserve(CS.arg_size());
1878   ArgRegs.reserve(CS.arg_size());
1879   ArgVTs.reserve(CS.arg_size());
1880   ArgFlags.reserve(CS.arg_size());
1881   for (ImmutableCallSite::arg_iterator i = CS.arg_begin(), e = CS.arg_end();
1882        i != e; ++i) {
1883     unsigned Arg = getRegForValue(*i);
1884
1885     if (Arg == 0)
1886       return false;
1887     ISD::ArgFlagsTy Flags;
1888     unsigned AttrInd = i - CS.arg_begin() + 1;
1889     if (CS.paramHasAttr(AttrInd, Attribute::SExt))
1890       Flags.setSExt();
1891     if (CS.paramHasAttr(AttrInd, Attribute::ZExt))
1892       Flags.setZExt();
1893
1894          // FIXME: Only handle *easy* calls for now.
1895     if (CS.paramHasAttr(AttrInd, Attribute::InReg) ||
1896         CS.paramHasAttr(AttrInd, Attribute::StructRet) ||
1897         CS.paramHasAttr(AttrInd, Attribute::Nest) ||
1898         CS.paramHasAttr(AttrInd, Attribute::ByVal))
1899       return false;
1900
1901     const Type *ArgTy = (*i)->getType();
1902     MVT ArgVT;
1903     if (!isTypeLegal(ArgTy, ArgVT))
1904       return false;
1905     unsigned OriginalAlignment = TD.getABITypeAlignment(ArgTy);
1906     Flags.setOrigAlign(OriginalAlignment);
1907
1908     Args.push_back(*i);
1909     ArgRegs.push_back(Arg);
1910     ArgVTs.push_back(ArgVT);
1911     ArgFlags.push_back(Flags);
1912   }
1913
1914   // Handle the arguments now that we've gotten them.
1915   SmallVector<unsigned, 4> RegArgs;
1916   unsigned NumBytes;
1917   if (!ProcessCallArgs(Args, ArgRegs, ArgVTs, ArgFlags, RegArgs, CC, NumBytes))
1918     return false;
1919
1920   // Issue the call, BLr9 for darwin, BL otherwise.
1921   // TODO: Turn this into the table of arm call ops.
1922   MachineInstrBuilder MIB;
1923   unsigned CallOpc = ARMSelectCallOp(GV);
1924   // Explicitly adding the predicate here.
1925   if(isThumb)
1926     // Explicitly adding the predicate here.
1927     MIB = AddDefaultPred(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
1928                          TII.get(CallOpc)))
1929           .addGlobalAddress(GV, 0, 0);
1930   else
1931     // Explicitly adding the predicate here.
1932     MIB = AddDefaultPred(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
1933                          TII.get(CallOpc))
1934           .addGlobalAddress(GV, 0, 0));
1935
1936   // Add implicit physical register uses to the call.
1937   for (unsigned i = 0, e = RegArgs.size(); i != e; ++i)
1938     MIB.addReg(RegArgs[i]);
1939
1940   // Finish off the call including any return values.
1941   SmallVector<unsigned, 4> UsedRegs;
1942   if (!FinishCall(RetVT, UsedRegs, I, CC, NumBytes)) return false;
1943
1944   // Set all unused physreg defs as dead.
1945   static_cast<MachineInstr *>(MIB)->setPhysRegsDeadExcept(UsedRegs, TRI);
1946
1947   return true;
1948
1949 }
1950
1951 // TODO: SoftFP support.
1952 bool ARMFastISel::TargetSelectInstruction(const Instruction *I) {
1953
1954   switch (I->getOpcode()) {
1955     case Instruction::Load:
1956       return SelectLoad(I);
1957     case Instruction::Store:
1958       return SelectStore(I);
1959     case Instruction::Br:
1960       return SelectBranch(I);
1961     case Instruction::ICmp:
1962     case Instruction::FCmp:
1963       return SelectCmp(I);
1964     case Instruction::FPExt:
1965       return SelectFPExt(I);
1966     case Instruction::FPTrunc:
1967       return SelectFPTrunc(I);
1968     case Instruction::SIToFP:
1969       return SelectSIToFP(I);
1970     case Instruction::FPToSI:
1971       return SelectFPToSI(I);
1972     case Instruction::FAdd:
1973       return SelectBinaryOp(I, ISD::FADD);
1974     case Instruction::FSub:
1975       return SelectBinaryOp(I, ISD::FSUB);
1976     case Instruction::FMul:
1977       return SelectBinaryOp(I, ISD::FMUL);
1978     case Instruction::SDiv:
1979       return SelectSDiv(I);
1980     case Instruction::SRem:
1981       return SelectSRem(I);
1982     case Instruction::Call:
1983       return SelectCall(I);
1984     case Instruction::Select:
1985       return SelectSelect(I);
1986     case Instruction::Ret:
1987       return SelectRet(I);
1988     default: break;
1989   }
1990   return false;
1991 }
1992
1993 namespace llvm {
1994   llvm::FastISel *ARM::createFastISel(FunctionLoweringInfo &funcInfo) {
1995     // Completely untested on non-darwin.
1996     const TargetMachine &TM = funcInfo.MF->getTarget();
1997
1998     // Darwin and thumb1 only for now.
1999     const ARMSubtarget *Subtarget = &TM.getSubtarget<ARMSubtarget>();
2000     if (Subtarget->isTargetDarwin() && !Subtarget->isThumb1Only() &&
2001         !DisableARMFastISel)
2002       return new ARMFastISel(funcInfo);
2003     return 0;
2004   }
2005 }