lib/Target/AArch64/AArch64FastISel.cpp

   1 //===-- AArch6464FastISel.cpp - AArch64 FastISel implementation -----------===//
   2 //
   3 //                     The LLVM Compiler Infrastructure
   4 //
   5 // This file is distributed under the University of Illinois Open Source
   6 // License. See LICENSE.TXT for details.
   7 //
   8 //===----------------------------------------------------------------------===//
   9 //
  10 // This file defines the AArch64-specific support for the FastISel class. Some
  11 // of the target-specific code is generated by tablegen in the file
  12 // AArch64GenFastISel.inc, which is #included here.
  13 //
  14 //===----------------------------------------------------------------------===//
  15
  16 #include "AArch64.h"
  17 #include "AArch64Subtarget.h"
  18 #include "AArch64TargetMachine.h"
  19 #include "MCTargetDesc/AArch64AddressingModes.h"
  20 #include "llvm/Analysis/BranchProbabilityInfo.h"
  21 #include "llvm/CodeGen/CallingConvLower.h"
  22 #include "llvm/CodeGen/FastISel.h"
  23 #include "llvm/CodeGen/FunctionLoweringInfo.h"
  24 #include "llvm/CodeGen/MachineConstantPool.h"
  25 #include "llvm/CodeGen/MachineFrameInfo.h"
  26 #include "llvm/CodeGen/MachineInstrBuilder.h"
  27 #include "llvm/CodeGen/MachineRegisterInfo.h"
  28 #include "llvm/IR/CallingConv.h"
  29 #include "llvm/IR/DataLayout.h"
  30 #include "llvm/IR/DerivedTypes.h"
  31 #include "llvm/IR/Function.h"
  32 #include "llvm/IR/GetElementPtrTypeIterator.h"
  33 #include "llvm/IR/GlobalAlias.h"
  34 #include "llvm/IR/GlobalVariable.h"
  35 #include "llvm/IR/Instructions.h"
  36 #include "llvm/IR/IntrinsicInst.h"
  37 #include "llvm/IR/Operator.h"
  38 #include "llvm/Support/CommandLine.h"
  39 using namespace llvm;
  40
  41 namespace {
  42
  43 class AArch64FastISel : public FastISel {
  44
  45   class Address {
  46   public:
  47     typedef enum {
  48       RegBase,
  49       FrameIndexBase
  50     } BaseKind;
  51
  52   private:
  53     BaseKind Kind;
  54     union {
  55       unsigned Reg;
  56       int FI;
  57     } Base;
  58     int64_t Offset;
  59     const GlobalValue *GV;
  60
  61   public:
  62     Address() : Kind(RegBase), Offset(0), GV(nullptr) { Base.Reg = 0; }
  63     void setKind(BaseKind K) { Kind = K; }
  64     BaseKind getKind() const { return Kind; }
  65     bool isRegBase() const { return Kind == RegBase; }
  66     bool isFIBase() const { return Kind == FrameIndexBase; }
  67     void setReg(unsigned Reg) {
  68       assert(isRegBase() && "Invalid base register access!");
  69       Base.Reg = Reg;
  70     }
  71     unsigned getReg() const {
  72       assert(isRegBase() && "Invalid base register access!");
  73       return Base.Reg;
  74     }
  75     void setFI(unsigned FI) {
  76       assert(isFIBase() && "Invalid base frame index  access!");
  77       Base.FI = FI;
  78     }
  79     unsigned getFI() const {
  80       assert(isFIBase() && "Invalid base frame index access!");
  81       return Base.FI;
  82     }
  83     void setOffset(int64_t O) { Offset = O; }
  84     int64_t getOffset() { return Offset; }
  85
  86     void setGlobalValue(const GlobalValue *G) { GV = G; }
  87     const GlobalValue *getGlobalValue() { return GV; }
  88
  89     bool isValid() { return isFIBase() || (isRegBase() && getReg() != 0); }
  90   };
  91
  92   /// Subtarget - Keep a pointer to the AArch64Subtarget around so that we can
  93   /// make the right decision when generating code for different targets.
  94   const AArch64Subtarget *Subtarget;
  95   LLVMContext *Context;
  96
  97   bool FastLowerCall(CallLoweringInfo &CLI) override;
  98   bool FastLowerIntrinsicCall(const IntrinsicInst *II) override;
  99
 100 private:
 101   // Selection routines.
 102   bool SelectLoad(const Instruction *I);
 103   bool SelectStore(const Instruction *I);
 104   bool SelectBranch(const Instruction *I);
 105   bool SelectIndirectBr(const Instruction *I);
 106   bool SelectCmp(const Instruction *I);
 107   bool SelectSelect(const Instruction *I);
 108   bool SelectFPExt(const Instruction *I);
 109   bool SelectFPTrunc(const Instruction *I);
 110   bool SelectFPToInt(const Instruction *I, bool Signed);
 111   bool SelectIntToFP(const Instruction *I, bool Signed);
 112   bool SelectRem(const Instruction *I, unsigned ISDOpcode);
 113   bool SelectRet(const Instruction *I);
 114   bool SelectTrunc(const Instruction *I);
 115   bool SelectIntExt(const Instruction *I);
 116   bool SelectMul(const Instruction *I);
 117   bool SelectShift(const Instruction *I, bool IsLeftShift, bool IsArithmetic);
 118   bool SelectBitCast(const Instruction *I);
 119
 120   // Utility helper routines.
 121   bool isTypeLegal(Type *Ty, MVT &VT);
 122   bool isLoadStoreTypeLegal(Type *Ty, MVT &VT);
 123   bool ComputeAddress(const Value *Obj, Address &Addr);
 124   bool ComputeCallAddress(const Value *V, Address &Addr);
 125   bool SimplifyAddress(Address &Addr, MVT VT, int64_t ScaleFactor,
 126                        bool UseUnscaled);
 127   void AddLoadStoreOperands(Address &Addr, const MachineInstrBuilder &MIB,
 128                             unsigned Flags, bool UseUnscaled);
 129   bool IsMemCpySmall(uint64_t Len, unsigned Alignment);
 130   bool TryEmitSmallMemCpy(Address Dest, Address Src, uint64_t Len,
 131                           unsigned Alignment);
 132   bool foldXALUIntrinsic(AArch64CC::CondCode &CC, const Instruction *I,
 133                          const Value *Cond);
 134
 135   // Emit functions.
 136   bool EmitCmp(Value *Src1Value, Value *Src2Value, bool isZExt);
 137   bool EmitLoad(MVT VT, unsigned &ResultReg, Address Addr,
 138                 bool UseUnscaled = false);
 139   bool EmitStore(MVT VT, unsigned SrcReg, Address Addr,
 140                  bool UseUnscaled = false);
 141   unsigned EmitIntExt(MVT SrcVT, unsigned SrcReg, MVT DestVT, bool isZExt);
 142   unsigned Emiti1Ext(unsigned SrcReg, MVT DestVT, bool isZExt);
 143   unsigned Emit_MUL_rr(MVT RetVT, unsigned Op0, bool Op0IsKill,
 144                        unsigned Op1, bool Op1IsKill);
 145   unsigned Emit_SMULL_rr(MVT RetVT, unsigned Op0, bool Op0IsKill,
 146                          unsigned Op1, bool Op1IsKill);
 147   unsigned Emit_UMULL_rr(MVT RetVT, unsigned Op0, bool Op0IsKill,
 148                          unsigned Op1, bool Op1IsKill);
 149   unsigned Emit_LSL_ri(MVT RetVT, unsigned Op0, bool Op0IsKill, uint64_t Imm);
 150   unsigned Emit_LSR_ri(MVT RetVT, unsigned Op0, bool Op0IsKill, uint64_t Imm);
 151   unsigned Emit_ASR_ri(MVT RetVT, unsigned Op0, bool Op0IsKill, uint64_t Imm);
 152
 153   unsigned AArch64MaterializeFP(const ConstantFP *CFP, MVT VT);
 154   unsigned AArch64MaterializeGV(const GlobalValue *GV);
 155
 156   // Call handling routines.
 157 private:
 158   CCAssignFn *CCAssignFnForCall(CallingConv::ID CC) const;
 159   bool ProcessCallArgs(CallLoweringInfo &CLI, SmallVectorImpl<MVT> &ArgVTs,
 160                        unsigned &NumBytes);
 161   bool FinishCall(CallLoweringInfo &CLI, MVT RetVT, unsigned NumBytes);
 162
 163 public:
 164   // Backend specific FastISel code.
 165   unsigned TargetMaterializeAlloca(const AllocaInst *AI) override;
 166   unsigned TargetMaterializeConstant(const Constant *C) override;
 167
 168   explicit AArch64FastISel(FunctionLoweringInfo &funcInfo,
 169                          const TargetLibraryInfo *libInfo)
 170       : FastISel(funcInfo, libInfo) {
 171     Subtarget = &TM.getSubtarget<AArch64Subtarget>();
 172     Context = &funcInfo.Fn->getContext();
 173   }
 174
 175   bool TargetSelectInstruction(const Instruction *I) override;
 176
 177 #include "AArch64GenFastISel.inc"
 178 };
 179
 180 } // end anonymous namespace
 181
 182 #include "AArch64GenCallingConv.inc"
 183
 184 CCAssignFn *AArch64FastISel::CCAssignFnForCall(CallingConv::ID CC) const {
 185   if (CC == CallingConv::WebKit_JS)
 186     return CC_AArch64_WebKit_JS;
 187   return Subtarget->isTargetDarwin() ? CC_AArch64_DarwinPCS : CC_AArch64_AAPCS;
 188 }
 189
 190 unsigned AArch64FastISel::TargetMaterializeAlloca(const AllocaInst *AI) {
 191   assert(TLI.getValueType(AI->getType(), true) == MVT::i64 &&
 192          "Alloca should always return a pointer.");
 193
 194   // Don't handle dynamic allocas.
 195   if (!FuncInfo.StaticAllocaMap.count(AI))
 196     return 0;
 197
 198   DenseMap<const AllocaInst *, int>::iterator SI =
 199       FuncInfo.StaticAllocaMap.find(AI);
 200
 201   if (SI != FuncInfo.StaticAllocaMap.end()) {
 202     unsigned ResultReg = createResultReg(&AArch64::GPR64RegClass);
 203     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADDXri),
 204             ResultReg)
 205         .addFrameIndex(SI->second)
 206         .addImm(0)
 207         .addImm(0);
 208     return ResultReg;
 209   }
 210
 211   return 0;
 212 }
 213
 214 unsigned AArch64FastISel::AArch64MaterializeFP(const ConstantFP *CFP, MVT VT) {
 215   if (VT != MVT::f32 && VT != MVT::f64)
 216     return 0;
 217
 218   const APFloat Val = CFP->getValueAPF();
 219   bool is64bit = (VT == MVT::f64);
 220
 221   // This checks to see if we can use FMOV instructions to materialize
 222   // a constant, otherwise we have to materialize via the constant pool.
 223   if (TLI.isFPImmLegal(Val, VT)) {
 224     int Imm;
 225     unsigned Opc;
 226     if (is64bit) {
 227       Imm = AArch64_AM::getFP64Imm(Val);
 228       Opc = AArch64::FMOVDi;
 229     } else {
 230       Imm = AArch64_AM::getFP32Imm(Val);
 231       Opc = AArch64::FMOVSi;
 232     }
 233     unsigned ResultReg = createResultReg(TLI.getRegClassFor(VT));
 234     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg)
 235         .addImm(Imm);
 236     return ResultReg;
 237   }
 238
 239   // Materialize via constant pool.  MachineConstantPool wants an explicit
 240   // alignment.
 241   unsigned Align = DL.getPrefTypeAlignment(CFP->getType());
 242   if (Align == 0)
 243     Align = DL.getTypeAllocSize(CFP->getType());
 244
 245   unsigned Idx = MCP.getConstantPoolIndex(cast<Constant>(CFP), Align);
 246   unsigned ADRPReg = createResultReg(&AArch64::GPR64commonRegClass);
 247   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADRP),
 248           ADRPReg).addConstantPoolIndex(Idx, 0, AArch64II::MO_PAGE);
 249
 250   unsigned Opc = is64bit ? AArch64::LDRDui : AArch64::LDRSui;
 251   unsigned ResultReg = createResultReg(TLI.getRegClassFor(VT));
 252   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg)
 253       .addReg(ADRPReg)
 254       .addConstantPoolIndex(Idx, 0, AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
 255   return ResultReg;
 256 }
 257
 258 unsigned AArch64FastISel::AArch64MaterializeGV(const GlobalValue *GV) {
 259   // We can't handle thread-local variables quickly yet.
 260   if (GV->isThreadLocal())
 261     return 0;
 262
 263   // MachO still uses GOT for large code-model accesses, but ELF requires
 264   // movz/movk sequences, which FastISel doesn't handle yet.
 265   if (TM.getCodeModel() != CodeModel::Small && !Subtarget->isTargetMachO())
 266     return 0;
 267
 268   unsigned char OpFlags = Subtarget->ClassifyGlobalReference(GV, TM);
 269
 270   EVT DestEVT = TLI.getValueType(GV->getType(), true);
 271   if (!DestEVT.isSimple())
 272     return 0;
 273
 274   unsigned ADRPReg = createResultReg(&AArch64::GPR64commonRegClass);
 275   unsigned ResultReg;
 276
 277   if (OpFlags & AArch64II::MO_GOT) {
 278     // ADRP + LDRX
 279     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADRP),
 280             ADRPReg)
 281         .addGlobalAddress(GV, 0, AArch64II::MO_GOT | AArch64II::MO_PAGE);
 282
 283     ResultReg = createResultReg(&AArch64::GPR64RegClass);
 284     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::LDRXui),
 285             ResultReg)
 286         .addReg(ADRPReg)
 287         .addGlobalAddress(GV, 0, AArch64II::MO_GOT | AArch64II::MO_PAGEOFF |
 288                           AArch64II::MO_NC);
 289   } else {
 290     // ADRP + ADDX
 291     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADRP),
 292             ADRPReg).addGlobalAddress(GV, 0, AArch64II::MO_PAGE);
 293
 294     ResultReg = createResultReg(&AArch64::GPR64spRegClass);
 295     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADDXri),
 296             ResultReg)
 297         .addReg(ADRPReg)
 298         .addGlobalAddress(GV, 0, AArch64II::MO_PAGEOFF | AArch64II::MO_NC)
 299         .addImm(0);
 300   }
 301   return ResultReg;
 302 }
 303
 304 unsigned AArch64FastISel::TargetMaterializeConstant(const Constant *C) {
 305   EVT CEVT = TLI.getValueType(C->getType(), true);
 306
 307   // Only handle simple types.
 308   if (!CEVT.isSimple())
 309     return 0;
 310   MVT VT = CEVT.getSimpleVT();
 311
 312   // FIXME: Handle ConstantInt.
 313   if (const ConstantFP *CFP = dyn_cast<ConstantFP>(C))
 314     return AArch64MaterializeFP(CFP, VT);
 315   else if (const GlobalValue *GV = dyn_cast<GlobalValue>(C))
 316     return AArch64MaterializeGV(GV);
 317
 318   return 0;
 319 }
 320
 321 // Computes the address to get to an object.
 322 bool AArch64FastISel::ComputeAddress(const Value *Obj, Address &Addr) {
 323   const User *U = nullptr;
 324   unsigned Opcode = Instruction::UserOp1;
 325   if (const Instruction *I = dyn_cast<Instruction>(Obj)) {
 326     // Don't walk into other basic blocks unless the object is an alloca from
 327     // another block, otherwise it may not have a virtual register assigned.
 328     if (FuncInfo.StaticAllocaMap.count(static_cast<const AllocaInst *>(Obj)) ||
 329         FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB) {
 330       Opcode = I->getOpcode();
 331       U = I;
 332     }
 333   } else if (const ConstantExpr *C = dyn_cast<ConstantExpr>(Obj)) {
 334     Opcode = C->getOpcode();
 335     U = C;
 336   }
 337
 338   if (const PointerType *Ty = dyn_cast<PointerType>(Obj->getType()))
 339     if (Ty->getAddressSpace() > 255)
 340       // Fast instruction selection doesn't support the special
 341       // address spaces.
 342       return false;
 343
 344   switch (Opcode) {
 345   default:
 346     break;
 347   case Instruction::BitCast: {
 348     // Look through bitcasts.
 349     return ComputeAddress(U->getOperand(0), Addr);
 350   }
 351   case Instruction::IntToPtr: {
 352     // Look past no-op inttoptrs.
 353     if (TLI.getValueType(U->getOperand(0)->getType()) == TLI.getPointerTy())
 354       return ComputeAddress(U->getOperand(0), Addr);
 355     break;
 356   }
 357   case Instruction::PtrToInt: {
 358     // Look past no-op ptrtoints.
 359     if (TLI.getValueType(U->getType()) == TLI.getPointerTy())
 360       return ComputeAddress(U->getOperand(0), Addr);
 361     break;
 362   }
 363   case Instruction::GetElementPtr: {
 364     Address SavedAddr = Addr;
 365     uint64_t TmpOffset = Addr.getOffset();
 366
 367     // Iterate through the GEP folding the constants into offsets where
 368     // we can.
 369     gep_type_iterator GTI = gep_type_begin(U);
 370     for (User::const_op_iterator i = U->op_begin() + 1, e = U->op_end(); i != e;
 371          ++i, ++GTI) {
 372       const Value *Op = *i;
 373       if (StructType *STy = dyn_cast<StructType>(*GTI)) {
 374         const StructLayout *SL = DL.getStructLayout(STy);
 375         unsigned Idx = cast<ConstantInt>(Op)->getZExtValue();
 376         TmpOffset += SL->getElementOffset(Idx);
 377       } else {
 378         uint64_t S = DL.getTypeAllocSize(GTI.getIndexedType());
 379         for (;;) {
 380           if (const ConstantInt *CI = dyn_cast<ConstantInt>(Op)) {
 381             // Constant-offset addressing.
 382             TmpOffset += CI->getSExtValue() * S;
 383             break;
 384           }
 385           if (canFoldAddIntoGEP(U, Op)) {
 386             // A compatible add with a constant operand. Fold the constant.
 387             ConstantInt *CI =
 388                 cast<ConstantInt>(cast<AddOperator>(Op)->getOperand(1));
 389             TmpOffset += CI->getSExtValue() * S;
 390             // Iterate on the other operand.
 391             Op = cast<AddOperator>(Op)->getOperand(0);
 392             continue;
 393           }
 394           // Unsupported
 395           goto unsupported_gep;
 396         }
 397       }
 398     }
 399
 400     // Try to grab the base operand now.
 401     Addr.setOffset(TmpOffset);
 402     if (ComputeAddress(U->getOperand(0), Addr))
 403       return true;
 404
 405     // We failed, restore everything and try the other options.
 406     Addr = SavedAddr;
 407
 408   unsupported_gep:
 409     break;
 410   }
 411   case Instruction::Alloca: {
 412     const AllocaInst *AI = cast<AllocaInst>(Obj);
 413     DenseMap<const AllocaInst *, int>::iterator SI =
 414         FuncInfo.StaticAllocaMap.find(AI);
 415     if (SI != FuncInfo.StaticAllocaMap.end()) {
 416       Addr.setKind(Address::FrameIndexBase);
 417       Addr.setFI(SI->second);
 418       return true;
 419     }
 420     break;
 421   }
 422   case Instruction::Add:
 423     // Adds of constants are common and easy enough.
 424     if (const ConstantInt *CI = dyn_cast<ConstantInt>(U->getOperand(1))) {
 425       Addr.setOffset(Addr.getOffset() + (uint64_t)CI->getSExtValue());
 426       return ComputeAddress(U->getOperand(0), Addr);
 427     }
 428     break;
 429   }
 430
 431   // Try to get this in a register if nothing else has worked.
 432   if (!Addr.isValid())
 433     Addr.setReg(getRegForValue(Obj));
 434   return Addr.isValid();
 435 }
 436
 437 bool AArch64FastISel::ComputeCallAddress(const Value *V, Address &Addr) {
 438   const User *U = nullptr;
 439   unsigned Opcode = Instruction::UserOp1;
 440   bool InMBB = true;
 441
 442   if (const auto *I = dyn_cast<Instruction>(V)) {
 443     Opcode = I->getOpcode();
 444     U = I;
 445     InMBB = I->getParent() == FuncInfo.MBB->getBasicBlock();
 446   } else if (const auto *C = dyn_cast<ConstantExpr>(V)) {
 447     Opcode = C->getOpcode();
 448     U = C;
 449   }
 450
 451   switch (Opcode) {
 452   default: break;
 453   case Instruction::BitCast:
 454     // Look past bitcasts if its operand is in the same BB.
 455     if (InMBB)
 456       return ComputeCallAddress(U->getOperand(0), Addr);
 457     break;
 458   case Instruction::IntToPtr:
 459     // Look past no-op inttoptrs if its operand is in the same BB.
 460     if (InMBB &&
 461         TLI.getValueType(U->getOperand(0)->getType()) == TLI.getPointerTy())
 462       return ComputeCallAddress(U->getOperand(0), Addr);
 463     break;
 464   case Instruction::PtrToInt:
 465     // Look past no-op ptrtoints if its operand is in the same BB.
 466     if (InMBB &&
 467         TLI.getValueType(U->getType()) == TLI.getPointerTy())
 468       return ComputeCallAddress(U->getOperand(0), Addr);
 469     break;
 470   }
 471
 472   if (const GlobalValue *GV = dyn_cast<GlobalValue>(V)) {
 473     Addr.setGlobalValue(GV);
 474     return true;
 475   }
 476
 477   // If all else fails, try to materialize the value in a register.
 478   if (!Addr.getGlobalValue()) {
 479     Addr.setReg(getRegForValue(V));
 480     return Addr.getReg() != 0;
 481   }
 482
 483   return false;
 484 }
 485
 486
 487 bool AArch64FastISel::isTypeLegal(Type *Ty, MVT &VT) {
 488   EVT evt = TLI.getValueType(Ty, true);
 489
 490   // Only handle simple types.
 491   if (evt == MVT::Other || !evt.isSimple())
 492     return false;
 493   VT = evt.getSimpleVT();
 494
 495   // This is a legal type, but it's not something we handle in fast-isel.
 496   if (VT == MVT::f128)
 497     return false;
 498
 499   // Handle all other legal types, i.e. a register that will directly hold this
 500   // value.
 501   return TLI.isTypeLegal(VT);
 502 }
 503
 504 bool AArch64FastISel::isLoadStoreTypeLegal(Type *Ty, MVT &VT) {
 505   if (isTypeLegal(Ty, VT))
 506     return true;
 507
 508   // If this is a type than can be sign or zero-extended to a basic operation
 509   // go ahead and accept it now. For stores, this reflects truncation.
 510   if (VT == MVT::i1 || VT == MVT::i8 || VT == MVT::i16)
 511     return true;
 512
 513   return false;
 514 }
 515
 516 bool AArch64FastISel::SimplifyAddress(Address &Addr, MVT VT,
 517                                       int64_t ScaleFactor, bool UseUnscaled) {
 518   bool needsLowering = false;
 519   int64_t Offset = Addr.getOffset();
 520   switch (VT.SimpleTy) {
 521   default:
 522     return false;
 523   case MVT::i1:
 524   case MVT::i8:
 525   case MVT::i16:
 526   case MVT::i32:
 527   case MVT::i64:
 528   case MVT::f32:
 529   case MVT::f64:
 530     if (!UseUnscaled)
 531       // Using scaled, 12-bit, unsigned immediate offsets.
 532       needsLowering = ((Offset & 0xfff) != Offset);
 533     else
 534       // Using unscaled, 9-bit, signed immediate offsets.
 535       needsLowering = (Offset > 256 || Offset < -256);
 536     break;
 537   }
 538
 539   //If this is a stack pointer and the offset needs to be simplified then put
 540   // the alloca address into a register, set the base type back to register and
 541   // continue. This should almost never happen.
 542   if (needsLowering && Addr.getKind() == Address::FrameIndexBase) {
 543     unsigned ResultReg = createResultReg(&AArch64::GPR64RegClass);
 544     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADDXri),
 545             ResultReg)
 546         .addFrameIndex(Addr.getFI())
 547         .addImm(0)
 548         .addImm(0);
 549     Addr.setKind(Address::RegBase);
 550     Addr.setReg(ResultReg);
 551   }
 552
 553   // Since the offset is too large for the load/store instruction get the
 554   // reg+offset into a register.
 555   if (needsLowering) {
 556     uint64_t UnscaledOffset = Addr.getOffset() * ScaleFactor;
 557     unsigned ResultReg = FastEmit_ri_(MVT::i64, ISD::ADD, Addr.getReg(), false,
 558                                       UnscaledOffset, MVT::i64);
 559     if (ResultReg == 0)
 560       return false;
 561     Addr.setReg(ResultReg);
 562     Addr.setOffset(0);
 563   }
 564   return true;
 565 }
 566
 567 void AArch64FastISel::AddLoadStoreOperands(Address &Addr,
 568                                            const MachineInstrBuilder &MIB,
 569                                            unsigned Flags, bool UseUnscaled) {
 570   int64_t Offset = Addr.getOffset();
 571   // Frame base works a bit differently. Handle it separately.
 572   if (Addr.getKind() == Address::FrameIndexBase) {
 573     int FI = Addr.getFI();
 574     // FIXME: We shouldn't be using getObjectSize/getObjectAlignment.  The size
 575     // and alignment should be based on the VT.
 576     MachineMemOperand *MMO = FuncInfo.MF->getMachineMemOperand(
 577         MachinePointerInfo::getFixedStack(FI, Offset), Flags,
 578         MFI.getObjectSize(FI), MFI.getObjectAlignment(FI));
 579     // Now add the rest of the operands.
 580     MIB.addFrameIndex(FI).addImm(Offset).addMemOperand(MMO);
 581   } else {
 582     // Now add the rest of the operands.
 583     MIB.addReg(Addr.getReg());
 584     MIB.addImm(Offset);
 585   }
 586 }
 587
 588 bool AArch64FastISel::EmitLoad(MVT VT, unsigned &ResultReg, Address Addr,
 589                                bool UseUnscaled) {
 590   // Negative offsets require unscaled, 9-bit, signed immediate offsets.
 591   // Otherwise, we try using scaled, 12-bit, unsigned immediate offsets.
 592   if (!UseUnscaled && Addr.getOffset() < 0)
 593     UseUnscaled = true;
 594
 595   unsigned Opc;
 596   const TargetRegisterClass *RC;
 597   bool VTIsi1 = false;
 598   int64_t ScaleFactor = 0;
 599   switch (VT.SimpleTy) {
 600   default:
 601     return false;
 602   case MVT::i1:
 603     VTIsi1 = true;
 604   // Intentional fall-through.
 605   case MVT::i8:
 606     Opc = UseUnscaled ? AArch64::LDURBBi : AArch64::LDRBBui;
 607     RC = &AArch64::GPR32RegClass;
 608     ScaleFactor = 1;
 609     break;
 610   case MVT::i16:
 611     Opc = UseUnscaled ? AArch64::LDURHHi : AArch64::LDRHHui;
 612     RC = &AArch64::GPR32RegClass;
 613     ScaleFactor = 2;
 614     break;
 615   case MVT::i32:
 616     Opc = UseUnscaled ? AArch64::LDURWi : AArch64::LDRWui;
 617     RC = &AArch64::GPR32RegClass;
 618     ScaleFactor = 4;
 619     break;
 620   case MVT::i64:
 621     Opc = UseUnscaled ? AArch64::LDURXi : AArch64::LDRXui;
 622     RC = &AArch64::GPR64RegClass;
 623     ScaleFactor = 8;
 624     break;
 625   case MVT::f32:
 626     Opc = UseUnscaled ? AArch64::LDURSi : AArch64::LDRSui;
 627     RC = TLI.getRegClassFor(VT);
 628     ScaleFactor = 4;
 629     break;
 630   case MVT::f64:
 631     Opc = UseUnscaled ? AArch64::LDURDi : AArch64::LDRDui;
 632     RC = TLI.getRegClassFor(VT);
 633     ScaleFactor = 8;
 634     break;
 635   }
 636   // Scale the offset.
 637   if (!UseUnscaled) {
 638     int64_t Offset = Addr.getOffset();
 639     if (Offset & (ScaleFactor - 1))
 640       // Retry using an unscaled, 9-bit, signed immediate offset.
 641       return EmitLoad(VT, ResultReg, Addr, /*UseUnscaled*/ true);
 642
 643     Addr.setOffset(Offset / ScaleFactor);
 644   }
 645
 646   // Simplify this down to something we can handle.
 647   if (!SimplifyAddress(Addr, VT, UseUnscaled ? 1 : ScaleFactor, UseUnscaled))
 648     return false;
 649
 650   // Create the base instruction, then add the operands.
 651   ResultReg = createResultReg(RC);
 652   MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
 653                                     TII.get(Opc), ResultReg);
 654   AddLoadStoreOperands(Addr, MIB, MachineMemOperand::MOLoad, UseUnscaled);
 655
 656   // Loading an i1 requires special handling.
 657   if (VTIsi1) {
 658     MRI.constrainRegClass(ResultReg, &AArch64::GPR32RegClass);
 659     unsigned ANDReg = createResultReg(&AArch64::GPR32spRegClass);
 660     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ANDWri),
 661             ANDReg)
 662         .addReg(ResultReg)
 663         .addImm(AArch64_AM::encodeLogicalImmediate(1, 32));
 664     ResultReg = ANDReg;
 665   }
 666   return true;
 667 }
 668
 669 bool AArch64FastISel::SelectLoad(const Instruction *I) {
 670   MVT VT;
 671   // Verify we have a legal type before going any further.  Currently, we handle
 672   // simple types that will directly fit in a register (i32/f32/i64/f64) or
 673   // those that can be sign or zero-extended to a basic operation (i1/i8/i16).
 674   if (!isLoadStoreTypeLegal(I->getType(), VT) || cast<LoadInst>(I)->isAtomic())
 675     return false;
 676
 677   // See if we can handle this address.
 678   Address Addr;
 679   if (!ComputeAddress(I->getOperand(0), Addr))
 680     return false;
 681
 682   unsigned ResultReg;
 683   if (!EmitLoad(VT, ResultReg, Addr))
 684     return false;
 685
 686   UpdateValueMap(I, ResultReg);
 687   return true;
 688 }
 689
 690 bool AArch64FastISel::EmitStore(MVT VT, unsigned SrcReg, Address Addr,
 691                                 bool UseUnscaled) {
 692   // Negative offsets require unscaled, 9-bit, signed immediate offsets.
 693   // Otherwise, we try using scaled, 12-bit, unsigned immediate offsets.
 694   if (!UseUnscaled && Addr.getOffset() < 0)
 695     UseUnscaled = true;
 696
 697   unsigned StrOpc;
 698   bool VTIsi1 = false;
 699   int64_t ScaleFactor = 0;
 700   // Using scaled, 12-bit, unsigned immediate offsets.
 701   switch (VT.SimpleTy) {
 702   default:
 703     return false;
 704   case MVT::i1:
 705     VTIsi1 = true;
 706   case MVT::i8:
 707     StrOpc = UseUnscaled ? AArch64::STURBBi : AArch64::STRBBui;
 708     ScaleFactor = 1;
 709     break;
 710   case MVT::i16:
 711     StrOpc = UseUnscaled ? AArch64::STURHHi : AArch64::STRHHui;
 712     ScaleFactor = 2;
 713     break;
 714   case MVT::i32:
 715     StrOpc = UseUnscaled ? AArch64::STURWi : AArch64::STRWui;
 716     ScaleFactor = 4;
 717     break;
 718   case MVT::i64:
 719     StrOpc = UseUnscaled ? AArch64::STURXi : AArch64::STRXui;
 720     ScaleFactor = 8;
 721     break;
 722   case MVT::f32:
 723     StrOpc = UseUnscaled ? AArch64::STURSi : AArch64::STRSui;
 724     ScaleFactor = 4;
 725     break;
 726   case MVT::f64:
 727     StrOpc = UseUnscaled ? AArch64::STURDi : AArch64::STRDui;
 728     ScaleFactor = 8;
 729     break;
 730   }
 731   // Scale the offset.
 732   if (!UseUnscaled) {
 733     int64_t Offset = Addr.getOffset();
 734     if (Offset & (ScaleFactor - 1))
 735       // Retry using an unscaled, 9-bit, signed immediate offset.
 736       return EmitStore(VT, SrcReg, Addr, /*UseUnscaled*/ true);
 737
 738     Addr.setOffset(Offset / ScaleFactor);
 739   }
 740
 741   // Simplify this down to something we can handle.
 742   if (!SimplifyAddress(Addr, VT, UseUnscaled ? 1 : ScaleFactor, UseUnscaled))
 743     return false;
 744
 745   // Storing an i1 requires special handling.
 746   if (VTIsi1) {
 747     MRI.constrainRegClass(SrcReg, &AArch64::GPR32RegClass);
 748     unsigned ANDReg = createResultReg(&AArch64::GPR32spRegClass);
 749     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ANDWri),
 750             ANDReg)
 751         .addReg(SrcReg)
 752         .addImm(AArch64_AM::encodeLogicalImmediate(1, 32));
 753     SrcReg = ANDReg;
 754   }
 755   // Create the base instruction, then add the operands.
 756   MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
 757                                     TII.get(StrOpc)).addReg(SrcReg);
 758   AddLoadStoreOperands(Addr, MIB, MachineMemOperand::MOStore, UseUnscaled);
 759   return true;
 760 }
 761
 762 bool AArch64FastISel::SelectStore(const Instruction *I) {
 763   MVT VT;
 764   Value *Op0 = I->getOperand(0);
 765   // Verify we have a legal type before going any further.  Currently, we handle
 766   // simple types that will directly fit in a register (i32/f32/i64/f64) or
 767   // those that can be sign or zero-extended to a basic operation (i1/i8/i16).
 768   if (!isLoadStoreTypeLegal(Op0->getType(), VT) ||
 769       cast<StoreInst>(I)->isAtomic())
 770     return false;
 771
 772   // Get the value to be stored into a register.
 773   unsigned SrcReg = getRegForValue(Op0);
 774   if (SrcReg == 0)
 775     return false;
 776
 777   // See if we can handle this address.
 778   Address Addr;
 779   if (!ComputeAddress(I->getOperand(1), Addr))
 780     return false;
 781
 782   if (!EmitStore(VT, SrcReg, Addr))
 783     return false;
 784   return true;
 785 }
 786
 787 static AArch64CC::CondCode getCompareCC(CmpInst::Predicate Pred) {
 788   switch (Pred) {
 789   case CmpInst::FCMP_ONE:
 790   case CmpInst::FCMP_UEQ:
 791   default:
 792     // AL is our "false" for now. The other two need more compares.
 793     return AArch64CC::AL;
 794   case CmpInst::ICMP_EQ:
 795   case CmpInst::FCMP_OEQ:
 796     return AArch64CC::EQ;
 797   case CmpInst::ICMP_SGT:
 798   case CmpInst::FCMP_OGT:
 799     return AArch64CC::GT;
 800   case CmpInst::ICMP_SGE:
 801   case CmpInst::FCMP_OGE:
 802     return AArch64CC::GE;
 803   case CmpInst::ICMP_UGT:
 804   case CmpInst::FCMP_UGT:
 805     return AArch64CC::HI;
 806   case CmpInst::FCMP_OLT:
 807     return AArch64CC::MI;
 808   case CmpInst::ICMP_ULE:
 809   case CmpInst::FCMP_OLE:
 810     return AArch64CC::LS;
 811   case CmpInst::FCMP_ORD:
 812     return AArch64CC::VC;
 813   case CmpInst::FCMP_UNO:
 814     return AArch64CC::VS;
 815   case CmpInst::FCMP_UGE:
 816     return AArch64CC::PL;
 817   case CmpInst::ICMP_SLT:
 818   case CmpInst::FCMP_ULT:
 819     return AArch64CC::LT;
 820   case CmpInst::ICMP_SLE:
 821   case CmpInst::FCMP_ULE:
 822     return AArch64CC::LE;
 823   case CmpInst::FCMP_UNE:
 824   case CmpInst::ICMP_NE:
 825     return AArch64CC::NE;
 826   case CmpInst::ICMP_UGE:
 827     return AArch64CC::HS;
 828   case CmpInst::ICMP_ULT:
 829     return AArch64CC::LO;
 830   }
 831 }
 832
 833 bool AArch64FastISel::SelectBranch(const Instruction *I) {
 834   const BranchInst *BI = cast<BranchInst>(I);
 835   MachineBasicBlock *TBB = FuncInfo.MBBMap[BI->getSuccessor(0)];
 836   MachineBasicBlock *FBB = FuncInfo.MBBMap[BI->getSuccessor(1)];
 837
 838   AArch64CC::CondCode CC = AArch64CC::NE;
 839   if (const CmpInst *CI = dyn_cast<CmpInst>(BI->getCondition())) {
 840     if (CI->hasOneUse() && (CI->getParent() == I->getParent())) {
 841       // We may not handle every CC for now.
 842       CC = getCompareCC(CI->getPredicate());
 843       if (CC == AArch64CC::AL)
 844         return false;
 845
 846       // Emit the cmp.
 847       if (!EmitCmp(CI->getOperand(0), CI->getOperand(1), CI->isUnsigned()))
 848         return false;
 849
 850       // Emit the branch.
 851       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::Bcc))
 852           .addImm(CC)
 853           .addMBB(TBB);
 854
 855       // Obtain the branch weight and add the TrueBB to the successor list.
 856       uint32_t BranchWeight = 0;
 857       if (FuncInfo.BPI)
 858         BranchWeight = FuncInfo.BPI->getEdgeWeight(BI->getParent(),
 859                                                   TBB->getBasicBlock());
 860       FuncInfo.MBB->addSuccessor(TBB, BranchWeight);
 861
 862       FastEmitBranch(FBB, DbgLoc);
 863       return true;
 864     }
 865   } else if (TruncInst *TI = dyn_cast<TruncInst>(BI->getCondition())) {
 866     MVT SrcVT;
 867     if (TI->hasOneUse() && TI->getParent() == I->getParent() &&
 868         (isLoadStoreTypeLegal(TI->getOperand(0)->getType(), SrcVT))) {
 869       unsigned CondReg = getRegForValue(TI->getOperand(0));
 870       if (CondReg == 0)
 871         return false;
 872
 873       // Issue an extract_subreg to get the lower 32-bits.
 874       if (SrcVT == MVT::i64)
 875         CondReg = FastEmitInst_extractsubreg(MVT::i32, CondReg, /*Kill=*/true,
 876                                              AArch64::sub_32);
 877
 878       MRI.constrainRegClass(CondReg, &AArch64::GPR32RegClass);
 879       unsigned ANDReg = createResultReg(&AArch64::GPR32spRegClass);
 880       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
 881               TII.get(AArch64::ANDWri), ANDReg)
 882           .addReg(CondReg)
 883           .addImm(AArch64_AM::encodeLogicalImmediate(1, 32));
 884       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
 885               TII.get(AArch64::SUBSWri))
 886           .addReg(ANDReg)
 887           .addReg(ANDReg)
 888           .addImm(0)
 889           .addImm(0);
 890
 891       if (FuncInfo.MBB->isLayoutSuccessor(TBB)) {
 892         std::swap(TBB, FBB);
 893         CC = AArch64CC::EQ;
 894       }
 895       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::Bcc))
 896           .addImm(CC)
 897           .addMBB(TBB);
 898
 899       // Obtain the branch weight and add the TrueBB to the successor list.
 900       uint32_t BranchWeight = 0;
 901       if (FuncInfo.BPI)
 902         BranchWeight = FuncInfo.BPI->getEdgeWeight(BI->getParent(),
 903                                                   TBB->getBasicBlock());
 904       FuncInfo.MBB->addSuccessor(TBB, BranchWeight);
 905
 906       FastEmitBranch(FBB, DbgLoc);
 907       return true;
 908     }
 909   } else if (const ConstantInt *CI =
 910                  dyn_cast<ConstantInt>(BI->getCondition())) {
 911     uint64_t Imm = CI->getZExtValue();
 912     MachineBasicBlock *Target = (Imm == 0) ? FBB : TBB;
 913     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::B))
 914         .addMBB(Target);
 915
 916     // Obtain the branch weight and add the target to the successor list.
 917     uint32_t BranchWeight = 0;
 918     if (FuncInfo.BPI)
 919       BranchWeight = FuncInfo.BPI->getEdgeWeight(BI->getParent(),
 920                                                  Target->getBasicBlock());
 921     FuncInfo.MBB->addSuccessor(Target, BranchWeight);
 922     return true;
 923   } else if (foldXALUIntrinsic(CC, I, BI->getCondition())) {
 924     // Fake request the condition, otherwise the intrinsic might be completely
 925     // optimized away.
 926     unsigned CondReg = getRegForValue(BI->getCondition());
 927     if (!CondReg)
 928       return false;
 929
 930     // Emit the branch.
 931     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::Bcc))
 932       .addImm(CC)
 933       .addMBB(TBB);
 934
 935     // Obtain the branch weight and add the TrueBB to the successor list.
 936     uint32_t BranchWeight = 0;
 937     if (FuncInfo.BPI)
 938       BranchWeight = FuncInfo.BPI->getEdgeWeight(BI->getParent(),
 939                                                  TBB->getBasicBlock());
 940     FuncInfo.MBB->addSuccessor(TBB, BranchWeight);
 941
 942     FastEmitBranch(FBB, DbgLoc);
 943     return true;
 944   }
 945
 946   unsigned CondReg = getRegForValue(BI->getCondition());
 947   if (CondReg == 0)
 948     return false;
 949
 950   // We've been divorced from our compare!  Our block was split, and
 951   // now our compare lives in a predecessor block.  We musn't
 952   // re-compare here, as the children of the compare aren't guaranteed
 953   // live across the block boundary (we *could* check for this).
 954   // Regardless, the compare has been done in the predecessor block,
 955   // and it left a value for us in a virtual register.  Ergo, we test
 956   // the one-bit value left in the virtual register.
 957   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::SUBSWri),
 958           AArch64::WZR)
 959       .addReg(CondReg)
 960       .addImm(0)
 961       .addImm(0);
 962
 963   if (FuncInfo.MBB->isLayoutSuccessor(TBB)) {
 964     std::swap(TBB, FBB);
 965     CC = AArch64CC::EQ;
 966   }
 967
 968   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::Bcc))
 969       .addImm(CC)
 970       .addMBB(TBB);
 971
 972   // Obtain the branch weight and add the TrueBB to the successor list.
 973   uint32_t BranchWeight = 0;
 974   if (FuncInfo.BPI)
 975     BranchWeight = FuncInfo.BPI->getEdgeWeight(BI->getParent(),
 976                                                TBB->getBasicBlock());
 977   FuncInfo.MBB->addSuccessor(TBB, BranchWeight);
 978
 979   FastEmitBranch(FBB, DbgLoc);
 980   return true;
 981 }
 982
 983 bool AArch64FastISel::SelectIndirectBr(const Instruction *I) {
 984   const IndirectBrInst *BI = cast<IndirectBrInst>(I);
 985   unsigned AddrReg = getRegForValue(BI->getOperand(0));
 986   if (AddrReg == 0)
 987     return false;
 988
 989   // Emit the indirect branch.
 990   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::BR))
 991       .addReg(AddrReg);
 992
 993   // Make sure the CFG is up-to-date.
 994   for (unsigned i = 0, e = BI->getNumSuccessors(); i != e; ++i)
 995     FuncInfo.MBB->addSuccessor(FuncInfo.MBBMap[BI->getSuccessor(i)]);
 996
 997   return true;
 998 }
 999
1000 bool AArch64FastISel::EmitCmp(Value *Src1Value, Value *Src2Value, bool isZExt) {
1001   Type *Ty = Src1Value->getType();
1002   EVT SrcEVT = TLI.getValueType(Ty, true);
1003   if (!SrcEVT.isSimple())
1004     return false;
1005   MVT SrcVT = SrcEVT.getSimpleVT();
1006
1007   // Check to see if the 2nd operand is a constant that we can encode directly
1008   // in the compare.
1009   uint64_t Imm;
1010   bool UseImm = false;
1011   bool isNegativeImm = false;
1012   if (const ConstantInt *ConstInt = dyn_cast<ConstantInt>(Src2Value)) {
1013     if (SrcVT == MVT::i64 || SrcVT == MVT::i32 || SrcVT == MVT::i16 ||
1014         SrcVT == MVT::i8 || SrcVT == MVT::i1) {
1015       const APInt &CIVal = ConstInt->getValue();
1016
1017       Imm = (isZExt) ? CIVal.getZExtValue() : CIVal.getSExtValue();
1018       if (CIVal.isNegative()) {
1019         isNegativeImm = true;
1020         Imm = -Imm;
1021       }
1022       // FIXME: We can handle more immediates using shifts.
1023       UseImm = ((Imm & 0xfff) == Imm);
1024     }
1025   } else if (const ConstantFP *ConstFP = dyn_cast<ConstantFP>(Src2Value)) {
1026     if (SrcVT == MVT::f32 || SrcVT == MVT::f64)
1027       if (ConstFP->isZero() && !ConstFP->isNegative())
1028         UseImm = true;
1029   }
1030
1031   unsigned ZReg;
1032   unsigned CmpOpc;
1033   bool isICmp = true;
1034   bool needsExt = false;
1035   switch (SrcVT.SimpleTy) {
1036   default:
1037     return false;
1038   case MVT::i1:
1039   case MVT::i8:
1040   case MVT::i16:
1041     needsExt = true;
1042   // Intentional fall-through.
1043   case MVT::i32:
1044     ZReg = AArch64::WZR;
1045     if (UseImm)
1046       CmpOpc = isNegativeImm ? AArch64::ADDSWri : AArch64::SUBSWri;
1047     else
1048       CmpOpc = AArch64::SUBSWrr;
1049     break;
1050   case MVT::i64:
1051     ZReg = AArch64::XZR;
1052     if (UseImm)
1053       CmpOpc = isNegativeImm ? AArch64::ADDSXri : AArch64::SUBSXri;
1054     else
1055       CmpOpc = AArch64::SUBSXrr;
1056     break;
1057   case MVT::f32:
1058     isICmp = false;
1059     CmpOpc = UseImm ? AArch64::FCMPSri : AArch64::FCMPSrr;
1060     break;
1061   case MVT::f64:
1062     isICmp = false;
1063     CmpOpc = UseImm ? AArch64::FCMPDri : AArch64::FCMPDrr;
1064     break;
1065   }
1066
1067   unsigned SrcReg1 = getRegForValue(Src1Value);
1068   if (SrcReg1 == 0)
1069     return false;
1070
1071   unsigned SrcReg2;
1072   if (!UseImm) {
1073     SrcReg2 = getRegForValue(Src2Value);
1074     if (SrcReg2 == 0)
1075       return false;
1076   }
1077
1078   // We have i1, i8, or i16, we need to either zero extend or sign extend.
1079   if (needsExt) {
1080     SrcReg1 = EmitIntExt(SrcVT, SrcReg1, MVT::i32, isZExt);
1081     if (SrcReg1 == 0)
1082       return false;
1083     if (!UseImm) {
1084       SrcReg2 = EmitIntExt(SrcVT, SrcReg2, MVT::i32, isZExt);
1085       if (SrcReg2 == 0)
1086         return false;
1087     }
1088   }
1089
1090   if (isICmp) {
1091     if (UseImm)
1092       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(CmpOpc))
1093           .addReg(ZReg)
1094           .addReg(SrcReg1)
1095           .addImm(Imm)
1096           .addImm(0);
1097     else
1098       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(CmpOpc))
1099           .addReg(ZReg)
1100           .addReg(SrcReg1)
1101           .addReg(SrcReg2);
1102   } else {
1103     if (UseImm)
1104       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(CmpOpc))
1105           .addReg(SrcReg1);
1106     else
1107       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(CmpOpc))
1108           .addReg(SrcReg1)
1109           .addReg(SrcReg2);
1110   }
1111   return true;
1112 }
1113
1114 bool AArch64FastISel::SelectCmp(const Instruction *I) {
1115   const CmpInst *CI = cast<CmpInst>(I);
1116
1117   // We may not handle every CC for now.
1118   AArch64CC::CondCode CC = getCompareCC(CI->getPredicate());
1119   if (CC == AArch64CC::AL)
1120     return false;
1121
1122   // Emit the cmp.
1123   if (!EmitCmp(CI->getOperand(0), CI->getOperand(1), CI->isUnsigned()))
1124     return false;
1125
1126   // Now set a register based on the comparison.
1127   AArch64CC::CondCode invertedCC = getInvertedCondCode(CC);
1128   unsigned ResultReg = createResultReg(&AArch64::GPR32RegClass);
1129   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::CSINCWr),
1130           ResultReg)
1131       .addReg(AArch64::WZR)
1132       .addReg(AArch64::WZR)
1133       .addImm(invertedCC);
1134
1135   UpdateValueMap(I, ResultReg);
1136   return true;
1137 }
1138
1139 bool AArch64FastISel::SelectSelect(const Instruction *I) {
1140   const SelectInst *SI = cast<SelectInst>(I);
1141
1142   EVT DestEVT = TLI.getValueType(SI->getType(), true);
1143   if (!DestEVT.isSimple())
1144     return false;
1145
1146   MVT DestVT = DestEVT.getSimpleVT();
1147   if (DestVT != MVT::i32 && DestVT != MVT::i64 && DestVT != MVT::f32 &&
1148       DestVT != MVT::f64)
1149     return false;
1150
1151   unsigned SelectOpc;
1152   switch (DestVT.SimpleTy) {
1153   default: return false;
1154   case MVT::i32: SelectOpc = AArch64::CSELWr;    break;
1155   case MVT::i64: SelectOpc = AArch64::CSELXr;    break;
1156   case MVT::f32: SelectOpc = AArch64::FCSELSrrr; break;
1157   case MVT::f64: SelectOpc = AArch64::FCSELDrrr; break;
1158   }
1159
1160   const Value *Cond = SI->getCondition();
1161   bool NeedTest = true;
1162   AArch64CC::CondCode CC = AArch64CC::NE;
1163   if (foldXALUIntrinsic(CC, I, Cond))
1164     NeedTest = false;
1165
1166   unsigned CondReg = getRegForValue(Cond);
1167   if (!CondReg)
1168     return false;
1169   bool CondIsKill = hasTrivialKill(Cond);
1170
1171   if (NeedTest) {
1172     MRI.constrainRegClass(CondReg, &AArch64::GPR32RegClass);
1173     unsigned ANDReg = createResultReg(&AArch64::GPR32spRegClass);
1174     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ANDWri),
1175             ANDReg)
1176       .addReg(CondReg, getKillRegState(CondIsKill))
1177       .addImm(AArch64_AM::encodeLogicalImmediate(1, 32));
1178
1179     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::SUBSWri))
1180       .addReg(ANDReg)
1181       .addReg(ANDReg)
1182       .addImm(0)
1183       .addImm(0);
1184   }
1185
1186   unsigned TrueReg = getRegForValue(SI->getTrueValue());
1187   bool TrueIsKill = hasTrivialKill(SI->getTrueValue());
1188
1189   unsigned FalseReg = getRegForValue(SI->getFalseValue());
1190   bool FalseIsKill = hasTrivialKill(SI->getFalseValue());
1191
1192   if (!TrueReg || !FalseReg)
1193     return false;
1194
1195   unsigned ResultReg = createResultReg(TLI.getRegClassFor(DestVT));
1196   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(SelectOpc),
1197           ResultReg)
1198     .addReg(TrueReg, getKillRegState(TrueIsKill))
1199     .addReg(FalseReg, getKillRegState(FalseIsKill))
1200     .addImm(CC);
1201
1202   UpdateValueMap(I, ResultReg);
1203   return true;
1204 }
1205
1206 bool AArch64FastISel::SelectFPExt(const Instruction *I) {
1207   Value *V = I->getOperand(0);
1208   if (!I->getType()->isDoubleTy() || !V->getType()->isFloatTy())
1209     return false;
1210
1211   unsigned Op = getRegForValue(V);
1212   if (Op == 0)
1213     return false;
1214
1215   unsigned ResultReg = createResultReg(&AArch64::FPR64RegClass);
1216   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::FCVTDSr),
1217           ResultReg).addReg(Op);
1218   UpdateValueMap(I, ResultReg);
1219   return true;
1220 }
1221
1222 bool AArch64FastISel::SelectFPTrunc(const Instruction *I) {
1223   Value *V = I->getOperand(0);
1224   if (!I->getType()->isFloatTy() || !V->getType()->isDoubleTy())
1225     return false;
1226
1227   unsigned Op = getRegForValue(V);
1228   if (Op == 0)
1229     return false;
1230
1231   unsigned ResultReg = createResultReg(&AArch64::FPR32RegClass);
1232   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::FCVTSDr),
1233           ResultReg).addReg(Op);
1234   UpdateValueMap(I, ResultReg);
1235   return true;
1236 }
1237
1238 // FPToUI and FPToSI
1239 bool AArch64FastISel::SelectFPToInt(const Instruction *I, bool Signed) {
1240   MVT DestVT;
1241   if (!isTypeLegal(I->getType(), DestVT) || DestVT.isVector())
1242     return false;
1243
1244   unsigned SrcReg = getRegForValue(I->getOperand(0));
1245   if (SrcReg == 0)
1246     return false;
1247
1248   EVT SrcVT = TLI.getValueType(I->getOperand(0)->getType(), true);
1249   if (SrcVT == MVT::f128)
1250     return false;
1251
1252   unsigned Opc;
1253   if (SrcVT == MVT::f64) {
1254     if (Signed)
1255       Opc = (DestVT == MVT::i32) ? AArch64::FCVTZSUWDr : AArch64::FCVTZSUXDr;
1256     else
1257       Opc = (DestVT == MVT::i32) ? AArch64::FCVTZUUWDr : AArch64::FCVTZUUXDr;
1258   } else {
1259     if (Signed)
1260       Opc = (DestVT == MVT::i32) ? AArch64::FCVTZSUWSr : AArch64::FCVTZSUXSr;
1261     else
1262       Opc = (DestVT == MVT::i32) ? AArch64::FCVTZUUWSr : AArch64::FCVTZUUXSr;
1263   }
1264   unsigned ResultReg = createResultReg(
1265       DestVT == MVT::i32 ? &AArch64::GPR32RegClass : &AArch64::GPR64RegClass);
1266   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg)
1267       .addReg(SrcReg);
1268   UpdateValueMap(I, ResultReg);
1269   return true;
1270 }
1271
1272 bool AArch64FastISel::SelectIntToFP(const Instruction *I, bool Signed) {
1273   MVT DestVT;
1274   if (!isTypeLegal(I->getType(), DestVT) || DestVT.isVector())
1275     return false;
1276   assert ((DestVT == MVT::f32 || DestVT == MVT::f64) &&
1277           "Unexpected value type.");
1278
1279   unsigned SrcReg = getRegForValue(I->getOperand(0));
1280   if (SrcReg == 0)
1281     return false;
1282
1283   EVT SrcVT = TLI.getValueType(I->getOperand(0)->getType(), true);
1284
1285   // Handle sign-extension.
1286   if (SrcVT == MVT::i16 || SrcVT == MVT::i8 || SrcVT == MVT::i1) {
1287     SrcReg =
1288         EmitIntExt(SrcVT.getSimpleVT(), SrcReg, MVT::i32, /*isZExt*/ !Signed);
1289     if (SrcReg == 0)
1290       return false;
1291   }
1292
1293   MRI.constrainRegClass(SrcReg, SrcVT == MVT::i64 ? &AArch64::GPR64RegClass
1294                                                   : &AArch64::GPR32RegClass);
1295
1296   unsigned Opc;
1297   if (SrcVT == MVT::i64) {
1298     if (Signed)
1299       Opc = (DestVT == MVT::f32) ? AArch64::SCVTFUXSri : AArch64::SCVTFUXDri;
1300     else
1301       Opc = (DestVT == MVT::f32) ? AArch64::UCVTFUXSri : AArch64::UCVTFUXDri;
1302   } else {
1303     if (Signed)
1304       Opc = (DestVT == MVT::f32) ? AArch64::SCVTFUWSri : AArch64::SCVTFUWDri;
1305     else
1306       Opc = (DestVT == MVT::f32) ? AArch64::UCVTFUWSri : AArch64::UCVTFUWDri;
1307   }
1308
1309   unsigned ResultReg = createResultReg(TLI.getRegClassFor(DestVT));
1310   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg)
1311       .addReg(SrcReg);
1312   UpdateValueMap(I, ResultReg);
1313   return true;
1314 }
1315
1316 bool AArch64FastISel::ProcessCallArgs(CallLoweringInfo &CLI,
1317                                       SmallVectorImpl<MVT> &OutVTs,
1318                                       unsigned &NumBytes) {
1319   CallingConv::ID CC = CLI.CallConv;
1320   SmallVector<CCValAssign, 16> ArgLocs;
1321   CCState CCInfo(CC, false, *FuncInfo.MF, TM, ArgLocs, *Context);
1322   CCInfo.AnalyzeCallOperands(OutVTs, CLI.OutFlags, CCAssignFnForCall(CC));
1323
1324   // Get a count of how many bytes are to be pushed on the stack.
1325   NumBytes = CCInfo.getNextStackOffset();
1326
1327   // Issue CALLSEQ_START
1328   unsigned AdjStackDown = TII.getCallFrameSetupOpcode();
1329   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AdjStackDown))
1330     .addImm(NumBytes);
1331
1332   // Process the args.
1333   for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
1334     CCValAssign &VA = ArgLocs[i];
1335     const Value *ArgVal = CLI.OutVals[VA.getValNo()];
1336     MVT ArgVT = OutVTs[VA.getValNo()];
1337
1338     unsigned ArgReg = getRegForValue(ArgVal);
1339     if (!ArgReg)
1340       return false;
1341
1342     // Handle arg promotion: SExt, ZExt, AExt.
1343     switch (VA.getLocInfo()) {
1344     case CCValAssign::Full:
1345       break;
1346     case CCValAssign::SExt: {
1347       MVT DestVT = VA.getLocVT();
1348       MVT SrcVT = ArgVT;
1349       ArgReg = EmitIntExt(SrcVT, ArgReg, DestVT, /*isZExt=*/false);
1350       if (!ArgReg)
1351         return false;
1352       break;
1353     }
1354     case CCValAssign::AExt:
1355     // Intentional fall-through.
1356     case CCValAssign::ZExt: {
1357       MVT DestVT = VA.getLocVT();
1358       MVT SrcVT = ArgVT;
1359       ArgReg = EmitIntExt(SrcVT, ArgReg, DestVT, /*isZExt=*/true);
1360       if (!ArgReg)
1361         return false;
1362       break;
1363     }
1364     default:
1365       llvm_unreachable("Unknown arg promotion!");
1366     }
1367
1368     // Now copy/store arg to correct locations.
1369     if (VA.isRegLoc() && !VA.needsCustom()) {
1370       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
1371               TII.get(TargetOpcode::COPY), VA.getLocReg()).addReg(ArgReg);
1372       CLI.OutRegs.push_back(VA.getLocReg());
1373     } else if (VA.needsCustom()) {
1374       // FIXME: Handle custom args.
1375       return false;
1376     } else {
1377       assert(VA.isMemLoc() && "Assuming store on stack.");
1378
1379       // Don't emit stores for undef values.
1380       if (isa<UndefValue>(ArgVal))
1381         continue;
1382
1383       // Need to store on the stack.
1384       unsigned ArgSize = (ArgVT.getSizeInBits() + 7) / 8;
1385
1386       unsigned BEAlign = 0;
1387       if (ArgSize < 8 && !Subtarget->isLittleEndian())
1388         BEAlign = 8 - ArgSize;
1389
1390       Address Addr;
1391       Addr.setKind(Address::RegBase);
1392       Addr.setReg(AArch64::SP);
1393       Addr.setOffset(VA.getLocMemOffset() + BEAlign);
1394
1395       if (!EmitStore(ArgVT, ArgReg, Addr))
1396         return false;
1397     }
1398   }
1399   return true;
1400 }
1401
1402 bool AArch64FastISel::FinishCall(CallLoweringInfo &CLI, MVT RetVT,
1403                                  unsigned NumBytes) {
1404   CallingConv::ID CC = CLI.CallConv;
1405
1406   // Issue CALLSEQ_END
1407   unsigned AdjStackUp = TII.getCallFrameDestroyOpcode();
1408   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AdjStackUp))
1409     .addImm(NumBytes).addImm(0);
1410
1411   // Now the return value.
1412   if (RetVT != MVT::isVoid) {
1413     SmallVector<CCValAssign, 16> RVLocs;
1414     CCState CCInfo(CC, false, *FuncInfo.MF, TM, RVLocs, *Context);
1415     CCInfo.AnalyzeCallResult(RetVT, CCAssignFnForCall(CC));
1416
1417     // Only handle a single return value.
1418     if (RVLocs.size() != 1)
1419       return false;
1420
1421     // Copy all of the result registers out of their specified physreg.
1422     MVT CopyVT = RVLocs[0].getValVT();
1423     unsigned ResultReg = createResultReg(TLI.getRegClassFor(CopyVT));
1424     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
1425             TII.get(TargetOpcode::COPY), ResultReg)
1426       .addReg(RVLocs[0].getLocReg());
1427     CLI.InRegs.push_back(RVLocs[0].getLocReg());
1428
1429     CLI.ResultReg = ResultReg;
1430     CLI.NumResultRegs = 1;
1431   }
1432
1433   return true;
1434 }
1435
1436 bool AArch64FastISel::FastLowerCall(CallLoweringInfo &CLI) {
1437   CallingConv::ID CC  = CLI.CallConv;
1438   bool IsVarArg       = CLI.IsVarArg;
1439   const Value *Callee = CLI.Callee;
1440   const char *SymName = CLI.SymName;
1441
1442   CodeModel::Model CM = TM.getCodeModel();
1443   // Only support the small and large code model.
1444   if (CM != CodeModel::Small && CM != CodeModel::Large)
1445     return false;
1446
1447   // FIXME: Add large code model support for ELF.
1448   if (CM == CodeModel::Large && !Subtarget->isTargetMachO())
1449     return false;
1450
1451   // Let SDISel handle vararg functions.
1452   if (IsVarArg)
1453     return false;
1454
1455   // FIXME: Only handle *simple* calls for now.
1456   MVT RetVT;
1457   if (CLI.RetTy->isVoidTy())
1458     RetVT = MVT::isVoid;
1459   else if (!isTypeLegal(CLI.RetTy, RetVT))
1460     return false;
1461
1462   for (auto Flag : CLI.OutFlags)
1463     if (Flag.isInReg() || Flag.isSRet() || Flag.isNest() || Flag.isByVal())
1464       return false;
1465
1466   // Set up the argument vectors.
1467   SmallVector<MVT, 16> OutVTs;
1468   OutVTs.reserve(CLI.OutVals.size());
1469
1470   for (auto *Val : CLI.OutVals) {
1471     MVT VT;
1472     if (!isTypeLegal(Val->getType(), VT) &&
1473         !(VT == MVT::i1 || VT == MVT::i8 || VT == MVT::i16))
1474       return false;
1475
1476     // We don't handle vector parameters yet.
1477     if (VT.isVector() || VT.getSizeInBits() > 64)
1478       return false;
1479
1480     OutVTs.push_back(VT);
1481   }
1482
1483   Address Addr;
1484   if (!ComputeCallAddress(Callee, Addr))
1485     return false;
1486
1487   // Handle the arguments now that we've gotten them.
1488   unsigned NumBytes;
1489   if (!ProcessCallArgs(CLI, OutVTs, NumBytes))
1490     return false;
1491
1492   // Issue the call.
1493   MachineInstrBuilder MIB;
1494   if (CM == CodeModel::Small) {
1495     unsigned CallOpc = Addr.getReg() ? AArch64::BLR : AArch64::BL;
1496     MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(CallOpc));
1497     if (SymName)
1498       MIB.addExternalSymbol(SymName, 0);
1499     else if (Addr.getGlobalValue())
1500       MIB.addGlobalAddress(Addr.getGlobalValue(), 0, 0);
1501     else if (Addr.getReg())
1502       MIB.addReg(Addr.getReg());
1503     else
1504       return false;
1505   } else {
1506     unsigned CallReg = 0;
1507     if (SymName) {
1508       unsigned ADRPReg = createResultReg(&AArch64::GPR64commonRegClass);
1509       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADRP),
1510               ADRPReg)
1511         .addExternalSymbol(SymName, AArch64II::MO_GOT | AArch64II::MO_PAGE);
1512
1513       CallReg = createResultReg(&AArch64::GPR64RegClass);
1514       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::LDRXui),
1515               CallReg)
1516         .addReg(ADRPReg)
1517         .addExternalSymbol(SymName, AArch64II::MO_GOT | AArch64II::MO_PAGEOFF |
1518                            AArch64II::MO_NC);
1519     } else if (Addr.getGlobalValue()) {
1520       CallReg = AArch64MaterializeGV(Addr.getGlobalValue());
1521     } else if (Addr.getReg())
1522       CallReg = Addr.getReg();
1523
1524     if (!CallReg)
1525       return false;
1526
1527     MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
1528                   TII.get(AArch64::BLR)).addReg(CallReg);
1529   }
1530
1531   // Add implicit physical register uses to the call.
1532   for (auto Reg : CLI.OutRegs)
1533     MIB.addReg(Reg, RegState::Implicit);
1534
1535   // Add a register mask with the call-preserved registers.
1536   // Proper defs for return values will be added by setPhysRegsDeadExcept().
1537   MIB.addRegMask(TRI.getCallPreservedMask(CC));
1538
1539   CLI.Call = MIB;
1540
1541   // Finish off the call including any return values.
1542   return FinishCall(CLI, RetVT, NumBytes);
1543 }
1544
1545 bool AArch64FastISel::IsMemCpySmall(uint64_t Len, unsigned Alignment) {
1546   if (Alignment)
1547     return Len / Alignment <= 4;
1548   else
1549     return Len < 32;
1550 }
1551
1552 bool AArch64FastISel::TryEmitSmallMemCpy(Address Dest, Address Src,
1553                                          uint64_t Len, unsigned Alignment) {
1554   // Make sure we don't bloat code by inlining very large memcpy's.
1555   if (!IsMemCpySmall(Len, Alignment))
1556     return false;
1557
1558   int64_t UnscaledOffset = 0;
1559   Address OrigDest = Dest;
1560   Address OrigSrc = Src;
1561
1562   while (Len) {
1563     MVT VT;
1564     if (!Alignment || Alignment >= 8) {
1565       if (Len >= 8)
1566         VT = MVT::i64;
1567       else if (Len >= 4)
1568         VT = MVT::i32;
1569       else if (Len >= 2)
1570         VT = MVT::i16;
1571       else {
1572         VT = MVT::i8;
1573       }
1574     } else {
1575       // Bound based on alignment.
1576       if (Len >= 4 && Alignment == 4)
1577         VT = MVT::i32;
1578       else if (Len >= 2 && Alignment == 2)
1579         VT = MVT::i16;
1580       else {
1581         VT = MVT::i8;
1582       }
1583     }
1584
1585     bool RV;
1586     unsigned ResultReg;
1587     RV = EmitLoad(VT, ResultReg, Src);
1588     if (!RV)
1589       return false;
1590
1591     RV = EmitStore(VT, ResultReg, Dest);
1592     if (!RV)
1593       return false;
1594
1595     int64_t Size = VT.getSizeInBits() / 8;
1596     Len -= Size;
1597     UnscaledOffset += Size;
1598
1599     // We need to recompute the unscaled offset for each iteration.
1600     Dest.setOffset(OrigDest.getOffset() + UnscaledOffset);
1601     Src.setOffset(OrigSrc.getOffset() + UnscaledOffset);
1602   }
1603
1604   return true;
1605 }
1606
1607 /// \brief Check if it is possible to fold the condition from the XALU intrinsic
1608 /// into the user. The condition code will only be updated on success.
1609 bool AArch64FastISel::foldXALUIntrinsic(AArch64CC::CondCode &CC,
1610                                         const Instruction *I,
1611                                         const Value *Cond) {
1612   if (!isa<ExtractValueInst>(Cond))
1613     return false;
1614
1615   const auto *EV = cast<ExtractValueInst>(Cond);
1616   if (!isa<IntrinsicInst>(EV->getAggregateOperand()))
1617     return false;
1618
1619   const auto *II = cast<IntrinsicInst>(EV->getAggregateOperand());
1620   MVT RetVT;
1621   const Function *Callee = II->getCalledFunction();
1622   Type *RetTy =
1623   cast<StructType>(Callee->getReturnType())->getTypeAtIndex(0U);
1624   if (!isTypeLegal(RetTy, RetVT))
1625     return false;
1626
1627   if (RetVT != MVT::i32 && RetVT != MVT::i64)
1628     return false;
1629
1630   AArch64CC::CondCode TmpCC;
1631   switch (II->getIntrinsicID()) {
1632     default: return false;
1633     case Intrinsic::sadd_with_overflow:
1634     case Intrinsic::ssub_with_overflow: TmpCC = AArch64CC::VS; break;
1635     case Intrinsic::uadd_with_overflow: TmpCC = AArch64CC::HS; break;
1636     case Intrinsic::usub_with_overflow: TmpCC = AArch64CC::LO; break;
1637     case Intrinsic::smul_with_overflow:
1638     case Intrinsic::umul_with_overflow: TmpCC = AArch64CC::NE; break;
1639   }
1640
1641   // Check if both instructions are in the same basic block.
1642   if (II->getParent() != I->getParent())
1643     return false;
1644
1645   // Make sure nothing is in the way
1646   BasicBlock::const_iterator Start = I;
1647   BasicBlock::const_iterator End = II;
1648   for (auto Itr = std::prev(Start); Itr != End; --Itr) {
1649     // We only expect extractvalue instructions between the intrinsic and the
1650     // instruction to be selected.
1651     if (!isa<ExtractValueInst>(Itr))
1652       return false;
1653
1654     // Check that the extractvalue operand comes from the intrinsic.
1655     const auto *EVI = cast<ExtractValueInst>(Itr);
1656     if (EVI->getAggregateOperand() != II)
1657       return false;
1658   }
1659
1660   CC = TmpCC;
1661   return true;
1662 }
1663
1664 bool AArch64FastISel::FastLowerIntrinsicCall(const IntrinsicInst *II) {
1665   // FIXME: Handle more intrinsics.
1666   switch (II->getIntrinsicID()) {
1667   default: return false;
1668   case Intrinsic::frameaddress: {
1669     MachineFrameInfo *MFI = FuncInfo.MF->getFrameInfo();
1670     MFI->setFrameAddressIsTaken(true);
1671
1672     const AArch64RegisterInfo *RegInfo =
1673         static_cast<const AArch64RegisterInfo *>(
1674             TM.getSubtargetImpl()->getRegisterInfo());
1675     unsigned FramePtr = RegInfo->getFrameRegister(*(FuncInfo.MF));
1676     unsigned SrcReg = FramePtr;
1677
1678     // Recursively load frame address
1679     // ldr x0, [fp]
1680     // ldr x0, [x0]
1681     // ldr x0, [x0]
1682     // ...
1683     unsigned DestReg;
1684     unsigned Depth = cast<ConstantInt>(II->getOperand(0))->getZExtValue();
1685     while (Depth--) {
1686       DestReg = createResultReg(&AArch64::GPR64RegClass);
1687       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
1688               TII.get(AArch64::LDRXui), DestReg)
1689         .addReg(SrcReg).addImm(0);
1690       SrcReg = DestReg;
1691     }
1692
1693     UpdateValueMap(II, SrcReg);
1694     return true;
1695   }
1696   case Intrinsic::memcpy:
1697   case Intrinsic::memmove: {
1698     const auto *MTI = cast<MemTransferInst>(II);
1699     // Don't handle volatile.
1700     if (MTI->isVolatile())
1701       return false;
1702
1703     // Disable inlining for memmove before calls to ComputeAddress.  Otherwise,
1704     // we would emit dead code because we don't currently handle memmoves.
1705     bool IsMemCpy = (II->getIntrinsicID() == Intrinsic::memcpy);
1706     if (isa<ConstantInt>(MTI->getLength()) && IsMemCpy) {
1707       // Small memcpy's are common enough that we want to do them without a call
1708       // if possible.
1709       uint64_t Len = cast<ConstantInt>(MTI->getLength())->getZExtValue();
1710       unsigned Alignment = MTI->getAlignment();
1711       if (IsMemCpySmall(Len, Alignment)) {
1712         Address Dest, Src;
1713         if (!ComputeAddress(MTI->getRawDest(), Dest) ||
1714             !ComputeAddress(MTI->getRawSource(), Src))
1715           return false;
1716         if (TryEmitSmallMemCpy(Dest, Src, Len, Alignment))
1717           return true;
1718       }
1719     }
1720
1721     if (!MTI->getLength()->getType()->isIntegerTy(64))
1722       return false;
1723
1724     if (MTI->getSourceAddressSpace() > 255 || MTI->getDestAddressSpace() > 255)
1725       // Fast instruction selection doesn't support the special
1726       // address spaces.
1727       return false;
1728
1729     const char *IntrMemName = isa<MemCpyInst>(II) ? "memcpy" : "memmove";
1730     return LowerCallTo(II, IntrMemName, II->getNumArgOperands() - 2);
1731   }
1732   case Intrinsic::memset: {
1733     const MemSetInst *MSI = cast<MemSetInst>(II);
1734     // Don't handle volatile.
1735     if (MSI->isVolatile())
1736       return false;
1737
1738     if (!MSI->getLength()->getType()->isIntegerTy(64))
1739       return false;
1740
1741     if (MSI->getDestAddressSpace() > 255)
1742       // Fast instruction selection doesn't support the special
1743       // address spaces.
1744       return false;
1745
1746     return LowerCallTo(II, "memset", II->getNumArgOperands() - 2);
1747   }
1748   case Intrinsic::trap: {
1749     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::BRK))
1750         .addImm(1);
1751     return true;
1752   }
1753   case Intrinsic::sqrt: {
1754     Type *RetTy = II->getCalledFunction()->getReturnType();
1755
1756     MVT VT;
1757     if (!isTypeLegal(RetTy, VT))
1758       return false;
1759
1760     unsigned Op0Reg = getRegForValue(II->getOperand(0));
1761     if (!Op0Reg)
1762       return false;
1763     bool Op0IsKill = hasTrivialKill(II->getOperand(0));
1764
1765     unsigned ResultReg = FastEmit_r(VT, VT, ISD::FSQRT, Op0Reg, Op0IsKill);
1766     if (!ResultReg)
1767       return false;
1768
1769     UpdateValueMap(II, ResultReg);
1770     return true;
1771   }
1772   case Intrinsic::sadd_with_overflow:
1773   case Intrinsic::uadd_with_overflow:
1774   case Intrinsic::ssub_with_overflow:
1775   case Intrinsic::usub_with_overflow:
1776   case Intrinsic::smul_with_overflow:
1777   case Intrinsic::umul_with_overflow: {
1778     // This implements the basic lowering of the xalu with overflow intrinsics.
1779     const Function *Callee = II->getCalledFunction();
1780     auto *Ty = cast<StructType>(Callee->getReturnType());
1781     Type *RetTy = Ty->getTypeAtIndex(0U);
1782     Type *CondTy = Ty->getTypeAtIndex(1);
1783
1784     MVT VT;
1785     if (!isTypeLegal(RetTy, VT))
1786       return false;
1787
1788     if (VT != MVT::i32 && VT != MVT::i64)
1789       return false;
1790
1791     const Value *LHS = II->getArgOperand(0);
1792     const Value *RHS = II->getArgOperand(1);
1793     // Canonicalize immediate to the RHS.
1794     if (isa<ConstantInt>(LHS) && !isa<ConstantInt>(RHS) &&
1795         isCommutativeIntrinsic(II))
1796       std::swap(LHS, RHS);
1797
1798     unsigned LHSReg = getRegForValue(LHS);
1799     if (!LHSReg)
1800       return false;
1801     bool LHSIsKill = hasTrivialKill(LHS);
1802
1803     // Check if the immediate can be encoded in the instruction and if we should
1804     // invert the instruction (adds -> subs) to handle negative immediates.
1805     bool UseImm = false;
1806     bool UseInverse = false;
1807     uint64_t Imm = 0;
1808     if (const auto *C = dyn_cast<ConstantInt>(RHS)) {
1809       if (C->isNegative()) {
1810         UseInverse = true;
1811         Imm = -(C->getSExtValue());
1812       } else
1813         Imm = C->getZExtValue();
1814
1815       if (isUInt<12>(Imm))
1816         UseImm = true;
1817
1818       UseInverse = UseImm && UseInverse;
1819     }
1820
1821     static const unsigned OpcTable[2][2][2] = {
1822       { {AArch64::ADDSWrr, AArch64::ADDSXrr},
1823         {AArch64::ADDSWri, AArch64::ADDSXri} },
1824       { {AArch64::SUBSWrr, AArch64::SUBSXrr},
1825         {AArch64::SUBSWri, AArch64::SUBSXri} }
1826     };
1827     unsigned Opc = 0;
1828     unsigned MulReg = 0;
1829     unsigned RHSReg = 0;
1830     bool RHSIsKill = false;
1831     AArch64CC::CondCode CC = AArch64CC::Invalid;
1832     bool Is64Bit = VT == MVT::i64;
1833     switch (II->getIntrinsicID()) {
1834     default: llvm_unreachable("Unexpected intrinsic!");
1835     case Intrinsic::sadd_with_overflow:
1836       Opc = OpcTable[UseInverse][UseImm][Is64Bit]; CC = AArch64CC::VS; break;
1837     case Intrinsic::uadd_with_overflow:
1838       Opc = OpcTable[UseInverse][UseImm][Is64Bit]; CC = AArch64CC::HS; break;
1839     case Intrinsic::ssub_with_overflow:
1840       Opc = OpcTable[!UseInverse][UseImm][Is64Bit]; CC = AArch64CC::VS; break;
1841     case Intrinsic::usub_with_overflow:
1842       Opc = OpcTable[!UseInverse][UseImm][Is64Bit]; CC = AArch64CC::LO; break;
1843     case Intrinsic::smul_with_overflow: {
1844       CC = AArch64CC::NE;
1845       RHSReg = getRegForValue(RHS);
1846       if (!RHSReg)
1847         return false;
1848       RHSIsKill = hasTrivialKill(RHS);
1849
1850       if (VT == MVT::i32) {
1851         MulReg = Emit_SMULL_rr(MVT::i64, LHSReg, LHSIsKill, RHSReg, RHSIsKill);
1852         unsigned ShiftReg = Emit_LSR_ri(MVT::i64, MulReg, false, 32);
1853         MulReg = FastEmitInst_extractsubreg(VT, MulReg, /*IsKill=*/true,
1854                                             AArch64::sub_32);
1855         ShiftReg = FastEmitInst_extractsubreg(VT, ShiftReg, /*IsKill=*/true,
1856                                               AArch64::sub_32);
1857         unsigned CmpReg = createResultReg(TLI.getRegClassFor(VT));
1858         BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
1859                 TII.get(AArch64::SUBSWrs), CmpReg)
1860           .addReg(ShiftReg, getKillRegState(true))
1861           .addReg(MulReg, getKillRegState(false))
1862           .addImm(159); // 159 <-> asr #31
1863       } else {
1864         assert(VT == MVT::i64 && "Unexpected value type.");
1865         MulReg = Emit_MUL_rr(VT, LHSReg, LHSIsKill, RHSReg, RHSIsKill);
1866         unsigned SMULHReg = FastEmit_rr(VT, VT, ISD::MULHS, LHSReg, LHSIsKill,
1867                                         RHSReg, RHSIsKill);
1868         unsigned CmpReg = createResultReg(TLI.getRegClassFor(VT));
1869         BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
1870                 TII.get(AArch64::SUBSXrs), CmpReg)
1871           .addReg(SMULHReg, getKillRegState(true))
1872           .addReg(MulReg, getKillRegState(false))
1873           .addImm(191); // 191 <-> asr #63
1874       }
1875       break;
1876     }
1877     case Intrinsic::umul_with_overflow: {
1878       CC = AArch64CC::NE;
1879       RHSReg = getRegForValue(RHS);
1880       if (!RHSReg)
1881         return false;
1882       RHSIsKill = hasTrivialKill(RHS);
1883
1884       if (VT == MVT::i32) {
1885         MulReg = Emit_UMULL_rr(MVT::i64, LHSReg, LHSIsKill, RHSReg, RHSIsKill);
1886         unsigned CmpReg = createResultReg(TLI.getRegClassFor(MVT::i64));
1887         BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
1888                 TII.get(AArch64::SUBSXrs), CmpReg)
1889           .addReg(AArch64::XZR, getKillRegState(true))
1890           .addReg(MulReg, getKillRegState(false))
1891           .addImm(96); // 96 <-> lsr #32
1892         MulReg = FastEmitInst_extractsubreg(VT, MulReg, /*IsKill=*/true,
1893                                             AArch64::sub_32);
1894       } else {
1895         assert(VT == MVT::i64 && "Unexpected value type.");
1896         MulReg = Emit_MUL_rr(VT, LHSReg, LHSIsKill, RHSReg, RHSIsKill);
1897         unsigned UMULHReg = FastEmit_rr(VT, VT, ISD::MULHU, LHSReg, LHSIsKill,
1898                                         RHSReg, RHSIsKill);
1899         unsigned CmpReg = createResultReg(TLI.getRegClassFor(VT));
1900         BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
1901                 TII.get(AArch64::SUBSXrr), CmpReg)
1902         .addReg(AArch64::XZR, getKillRegState(true))
1903         .addReg(UMULHReg, getKillRegState(false));
1904       }
1905       break;
1906     }
1907     }
1908
1909     if (!UseImm) {
1910       RHSReg = getRegForValue(RHS);
1911       if (!RHSReg)
1912         return false;
1913       RHSIsKill = hasTrivialKill(RHS);
1914     }
1915
1916     unsigned ResultReg = createResultReg(TLI.getRegClassFor(VT));
1917     if (Opc) {
1918       MachineInstrBuilder MIB;
1919       MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc),
1920                     ResultReg)
1921               .addReg(LHSReg, getKillRegState(LHSIsKill));
1922       if (UseImm) {
1923         MIB.addImm(Imm);
1924         MIB.addImm(0);
1925       } else
1926         MIB.addReg(RHSReg, getKillRegState(RHSIsKill));
1927     }
1928     else
1929       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
1930               TII.get(TargetOpcode::COPY), ResultReg)
1931         .addReg(MulReg);
1932
1933     unsigned ResultReg2 = FuncInfo.CreateRegs(CondTy);
1934     assert((ResultReg+1) == ResultReg2 && "Nonconsecutive result registers.");
1935     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::CSINCWr),
1936             ResultReg2)
1937       .addReg(AArch64::WZR, getKillRegState(true))
1938       .addReg(AArch64::WZR, getKillRegState(true))
1939       .addImm(getInvertedCondCode(CC));
1940
1941     UpdateValueMap(II, ResultReg, 2);
1942     return true;
1943   }
1944   }
1945   return false;
1946 }
1947
1948 bool AArch64FastISel::SelectRet(const Instruction *I) {
1949   const ReturnInst *Ret = cast<ReturnInst>(I);
1950   const Function &F = *I->getParent()->getParent();
1951
1952   if (!FuncInfo.CanLowerReturn)
1953     return false;
1954
1955   if (F.isVarArg())
1956     return false;
1957
1958   // Build a list of return value registers.
1959   SmallVector<unsigned, 4> RetRegs;
1960
1961   if (Ret->getNumOperands() > 0) {
1962     CallingConv::ID CC = F.getCallingConv();
1963     SmallVector<ISD::OutputArg, 4> Outs;
1964     GetReturnInfo(F.getReturnType(), F.getAttributes(), Outs, TLI);
1965
1966     // Analyze operands of the call, assigning locations to each operand.
1967     SmallVector<CCValAssign, 16> ValLocs;
1968     CCState CCInfo(CC, F.isVarArg(), *FuncInfo.MF, TM, ValLocs,
1969                    I->getContext());
1970     CCAssignFn *RetCC = CC == CallingConv::WebKit_JS ? RetCC_AArch64_WebKit_JS
1971                                                      : RetCC_AArch64_AAPCS;
1972     CCInfo.AnalyzeReturn(Outs, RetCC);
1973
1974     // Only handle a single return value for now.
1975     if (ValLocs.size() != 1)
1976       return false;
1977
1978     CCValAssign &VA = ValLocs[0];
1979     const Value *RV = Ret->getOperand(0);
1980
1981     // Don't bother handling odd stuff for now.
1982     if (VA.getLocInfo() != CCValAssign::Full)
1983       return false;
1984     // Only handle register returns for now.
1985     if (!VA.isRegLoc())
1986       return false;
1987     unsigned Reg = getRegForValue(RV);
1988     if (Reg == 0)
1989       return false;
1990
1991     unsigned SrcReg = Reg + VA.getValNo();
1992     unsigned DestReg = VA.getLocReg();
1993     // Avoid a cross-class copy. This is very unlikely.
1994     if (!MRI.getRegClass(SrcReg)->contains(DestReg))
1995       return false;
1996
1997     EVT RVEVT = TLI.getValueType(RV->getType());
1998     if (!RVEVT.isSimple())
1999       return false;
2000
2001     // Vectors (of > 1 lane) in big endian need tricky handling.
2002     if (RVEVT.isVector() && RVEVT.getVectorNumElements() > 1)
2003       return false;
2004
2005     MVT RVVT = RVEVT.getSimpleVT();
2006     if (RVVT == MVT::f128)
2007       return false;
2008     MVT DestVT = VA.getValVT();
2009     // Special handling for extended integers.
2010     if (RVVT != DestVT) {
2011       if (RVVT != MVT::i1 && RVVT != MVT::i8 && RVVT != MVT::i16)
2012         return false;
2013
2014       if (!Outs[0].Flags.isZExt() && !Outs[0].Flags.isSExt())
2015         return false;
2016
2017       bool isZExt = Outs[0].Flags.isZExt();
2018       SrcReg = EmitIntExt(RVVT, SrcReg, DestVT, isZExt);
2019       if (SrcReg == 0)
2020         return false;
2021     }
2022
2023     // Make the copy.
2024     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2025             TII.get(TargetOpcode::COPY), DestReg).addReg(SrcReg);
2026
2027     // Add register to return instruction.
2028     RetRegs.push_back(VA.getLocReg());
2029   }
2030
2031   MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2032                                     TII.get(AArch64::RET_ReallyLR));
2033   for (unsigned i = 0, e = RetRegs.size(); i != e; ++i)
2034     MIB.addReg(RetRegs[i], RegState::Implicit);
2035   return true;
2036 }
2037
2038 bool AArch64FastISel::SelectTrunc(const Instruction *I) {
2039   Type *DestTy = I->getType();
2040   Value *Op = I->getOperand(0);
2041   Type *SrcTy = Op->getType();
2042
2043   EVT SrcEVT = TLI.getValueType(SrcTy, true);
2044   EVT DestEVT = TLI.getValueType(DestTy, true);
2045   if (!SrcEVT.isSimple())
2046     return false;
2047   if (!DestEVT.isSimple())
2048     return false;
2049
2050   MVT SrcVT = SrcEVT.getSimpleVT();
2051   MVT DestVT = DestEVT.getSimpleVT();
2052
2053   if (SrcVT != MVT::i64 && SrcVT != MVT::i32 && SrcVT != MVT::i16 &&
2054       SrcVT != MVT::i8)
2055     return false;
2056   if (DestVT != MVT::i32 && DestVT != MVT::i16 && DestVT != MVT::i8 &&
2057       DestVT != MVT::i1)
2058     return false;
2059
2060   unsigned SrcReg = getRegForValue(Op);
2061   if (!SrcReg)
2062     return false;
2063
2064   // If we're truncating from i64 to a smaller non-legal type then generate an
2065   // AND.  Otherwise, we know the high bits are undefined and a truncate doesn't
2066   // generate any code.
2067   if (SrcVT == MVT::i64) {
2068     uint64_t Mask = 0;
2069     switch (DestVT.SimpleTy) {
2070     default:
2071       // Trunc i64 to i32 is handled by the target-independent fast-isel.
2072       return false;
2073     case MVT::i1:
2074       Mask = 0x1;
2075       break;
2076     case MVT::i8:
2077       Mask = 0xff;
2078       break;
2079     case MVT::i16:
2080       Mask = 0xffff;
2081       break;
2082     }
2083     // Issue an extract_subreg to get the lower 32-bits.
2084     unsigned Reg32 = FastEmitInst_extractsubreg(MVT::i32, SrcReg, /*Kill=*/true,
2085                                                 AArch64::sub_32);
2086     MRI.constrainRegClass(Reg32, &AArch64::GPR32RegClass);
2087     // Create the AND instruction which performs the actual truncation.
2088     unsigned ANDReg = createResultReg(&AArch64::GPR32spRegClass);
2089     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ANDWri),
2090             ANDReg)
2091         .addReg(Reg32)
2092         .addImm(AArch64_AM::encodeLogicalImmediate(Mask, 32));
2093     SrcReg = ANDReg;
2094   }
2095
2096   UpdateValueMap(I, SrcReg);
2097   return true;
2098 }
2099
2100 unsigned AArch64FastISel::Emiti1Ext(unsigned SrcReg, MVT DestVT, bool isZExt) {
2101   assert((DestVT == MVT::i8 || DestVT == MVT::i16 || DestVT == MVT::i32 ||
2102           DestVT == MVT::i64) &&
2103          "Unexpected value type.");
2104   // Handle i8 and i16 as i32.
2105   if (DestVT == MVT::i8 || DestVT == MVT::i16)
2106     DestVT = MVT::i32;
2107
2108   if (isZExt) {
2109     MRI.constrainRegClass(SrcReg, &AArch64::GPR32RegClass);
2110     unsigned ResultReg = createResultReg(&AArch64::GPR32spRegClass);
2111     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ANDWri),
2112             ResultReg)
2113         .addReg(SrcReg)
2114         .addImm(AArch64_AM::encodeLogicalImmediate(1, 32));
2115
2116     if (DestVT == MVT::i64) {
2117       // We're ZExt i1 to i64.  The ANDWri Wd, Ws, #1 implicitly clears the
2118       // upper 32 bits.  Emit a SUBREG_TO_REG to extend from Wd to Xd.
2119       unsigned Reg64 = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
2120       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2121               TII.get(AArch64::SUBREG_TO_REG), Reg64)
2122           .addImm(0)
2123           .addReg(ResultReg)
2124           .addImm(AArch64::sub_32);
2125       ResultReg = Reg64;
2126     }
2127     return ResultReg;
2128   } else {
2129     if (DestVT == MVT::i64) {
2130       // FIXME: We're SExt i1 to i64.
2131       return 0;
2132     }
2133     unsigned ResultReg = createResultReg(&AArch64::GPR32RegClass);
2134     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::SBFMWri),
2135             ResultReg)
2136         .addReg(SrcReg)
2137         .addImm(0)
2138         .addImm(0);
2139     return ResultReg;
2140   }
2141 }
2142
2143 unsigned AArch64FastISel::Emit_MUL_rr(MVT RetVT, unsigned Op0, bool Op0IsKill,
2144                                       unsigned Op1, bool Op1IsKill) {
2145   unsigned Opc, ZReg;
2146   switch (RetVT.SimpleTy) {
2147   default: return 0;
2148   case MVT::i8:
2149   case MVT::i16:
2150   case MVT::i32:
2151     RetVT = MVT::i32;
2152     Opc = AArch64::MADDWrrr; ZReg = AArch64::WZR; break;
2153   case MVT::i64:
2154     Opc = AArch64::MADDXrrr; ZReg = AArch64::XZR; break;
2155   }
2156
2157   // Create the base instruction, then add the operands.
2158   unsigned ResultReg = createResultReg(TLI.getRegClassFor(RetVT));
2159   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg)
2160     .addReg(Op0, getKillRegState(Op0IsKill))
2161     .addReg(Op1, getKillRegState(Op1IsKill))
2162     .addReg(ZReg, getKillRegState(true));
2163
2164   return ResultReg;
2165 }
2166
2167 unsigned AArch64FastISel::Emit_SMULL_rr(MVT RetVT, unsigned Op0, bool Op0IsKill,
2168                                         unsigned Op1, bool Op1IsKill) {
2169   if (RetVT != MVT::i64)
2170     return 0;
2171
2172   // Create the base instruction, then add the operands.
2173   unsigned ResultReg = createResultReg(&AArch64::GPR64RegClass);
2174   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::SMADDLrrr),
2175           ResultReg)
2176     .addReg(Op0, getKillRegState(Op0IsKill))
2177     .addReg(Op1, getKillRegState(Op1IsKill))
2178     .addReg(AArch64::XZR, getKillRegState(true));
2179
2180   return ResultReg;
2181 }
2182
2183 unsigned AArch64FastISel::Emit_UMULL_rr(MVT RetVT, unsigned Op0, bool Op0IsKill,
2184                                         unsigned Op1, bool Op1IsKill) {
2185   if (RetVT != MVT::i64)
2186     return 0;
2187
2188   // Create the base instruction, then add the operands.
2189   unsigned ResultReg = createResultReg(&AArch64::GPR64RegClass);
2190   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::UMADDLrrr),
2191           ResultReg)
2192     .addReg(Op0, getKillRegState(Op0IsKill))
2193     .addReg(Op1, getKillRegState(Op1IsKill))
2194     .addReg(AArch64::XZR, getKillRegState(true));
2195
2196   return ResultReg;
2197 }
2198
2199 unsigned AArch64FastISel::Emit_LSL_ri(MVT RetVT, unsigned Op0, bool Op0IsKill,
2200                                       uint64_t Shift) {
2201   unsigned Opc, ImmR, ImmS;
2202   switch (RetVT.SimpleTy) {
2203   default: return 0;
2204   case MVT::i8:
2205   case MVT::i16:
2206   case MVT::i32:
2207     RetVT = MVT::i32;
2208     Opc = AArch64::UBFMWri; ImmR = -Shift % 32; ImmS = 31 - Shift; break;
2209   case MVT::i64:
2210     Opc = AArch64::UBFMXri; ImmR = -Shift % 64; ImmS = 63 - Shift; break;
2211   }
2212
2213   return FastEmitInst_rii(Opc, TLI.getRegClassFor(RetVT), Op0, Op0IsKill, ImmR,
2214                           ImmS);
2215 }
2216
2217 unsigned AArch64FastISel::Emit_LSR_ri(MVT RetVT, unsigned Op0, bool Op0IsKill,
2218                                       uint64_t Shift) {
2219   unsigned Opc, ImmS;
2220   switch (RetVT.SimpleTy) {
2221   default: return 0;
2222   case MVT::i8:
2223   case MVT::i16:
2224   case MVT::i32:
2225     RetVT = MVT::i32;
2226     Opc = AArch64::UBFMWri; ImmS = 31; break;
2227   case MVT::i64:
2228     Opc = AArch64::UBFMXri; ImmS = 63; break;
2229   }
2230
2231   return FastEmitInst_rii(Opc, TLI.getRegClassFor(RetVT), Op0, Op0IsKill, Shift,
2232                           ImmS);
2233 }
2234
2235 unsigned AArch64FastISel::Emit_ASR_ri(MVT RetVT, unsigned Op0, bool Op0IsKill,
2236                                       uint64_t Shift) {
2237   unsigned Opc, ImmS;
2238   switch (RetVT.SimpleTy) {
2239   default: return 0;
2240   case MVT::i8:
2241   case MVT::i16:
2242   case MVT::i32:
2243     RetVT = MVT::i32;
2244     Opc = AArch64::SBFMWri; ImmS = 31; break;
2245   case MVT::i64:
2246     Opc = AArch64::SBFMXri; ImmS = 63; break;
2247   }
2248
2249   return FastEmitInst_rii(Opc, TLI.getRegClassFor(RetVT), Op0, Op0IsKill, Shift,
2250                           ImmS);
2251 }
2252
2253 unsigned AArch64FastISel::EmitIntExt(MVT SrcVT, unsigned SrcReg, MVT DestVT,
2254                                      bool isZExt) {
2255   assert(DestVT != MVT::i1 && "ZeroExt/SignExt an i1?");
2256
2257   // FastISel does not have plumbing to deal with extensions where the SrcVT or
2258   // DestVT are odd things, so test to make sure that they are both types we can
2259   // handle (i1/i8/i16/i32 for SrcVT and i8/i16/i32/i64 for DestVT), otherwise
2260   // bail out to SelectionDAG.
2261   if (((DestVT != MVT::i8) && (DestVT != MVT::i16) &&
2262        (DestVT != MVT::i32) && (DestVT != MVT::i64)) ||
2263       ((SrcVT !=  MVT::i1) && (SrcVT !=  MVT::i8) &&
2264        (SrcVT !=  MVT::i16) && (SrcVT !=  MVT::i32)))
2265     return 0;
2266
2267   unsigned Opc;
2268   unsigned Imm = 0;
2269
2270   switch (SrcVT.SimpleTy) {
2271   default:
2272     return 0;
2273   case MVT::i1:
2274     return Emiti1Ext(SrcReg, DestVT, isZExt);
2275   case MVT::i8:
2276     if (DestVT == MVT::i64)
2277       Opc = isZExt ? AArch64::UBFMXri : AArch64::SBFMXri;
2278     else
2279       Opc = isZExt ? AArch64::UBFMWri : AArch64::SBFMWri;
2280     Imm = 7;
2281     break;
2282   case MVT::i16:
2283     if (DestVT == MVT::i64)
2284       Opc = isZExt ? AArch64::UBFMXri : AArch64::SBFMXri;
2285     else
2286       Opc = isZExt ? AArch64::UBFMWri : AArch64::SBFMWri;
2287     Imm = 15;
2288     break;
2289   case MVT::i32:
2290     assert(DestVT == MVT::i64 && "IntExt i32 to i32?!?");
2291     Opc = isZExt ? AArch64::UBFMXri : AArch64::SBFMXri;
2292     Imm = 31;
2293     break;
2294   }
2295
2296   // Handle i8 and i16 as i32.
2297   if (DestVT == MVT::i8 || DestVT == MVT::i16)
2298     DestVT = MVT::i32;
2299   else if (DestVT == MVT::i64) {
2300     unsigned Src64 = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
2301     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2302             TII.get(AArch64::SUBREG_TO_REG), Src64)
2303         .addImm(0)
2304         .addReg(SrcReg)
2305         .addImm(AArch64::sub_32);
2306     SrcReg = Src64;
2307   }
2308
2309   unsigned ResultReg = createResultReg(TLI.getRegClassFor(DestVT));
2310   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg)
2311       .addReg(SrcReg)
2312       .addImm(0)
2313       .addImm(Imm);
2314
2315   return ResultReg;
2316 }
2317
2318 bool AArch64FastISel::SelectIntExt(const Instruction *I) {
2319   // On ARM, in general, integer casts don't involve legal types; this code
2320   // handles promotable integers.  The high bits for a type smaller than
2321   // the register size are assumed to be undefined.
2322   Type *DestTy = I->getType();
2323   Value *Src = I->getOperand(0);
2324   Type *SrcTy = Src->getType();
2325
2326   bool isZExt = isa<ZExtInst>(I);
2327   unsigned SrcReg = getRegForValue(Src);
2328   if (!SrcReg)
2329     return false;
2330
2331   EVT SrcEVT = TLI.getValueType(SrcTy, true);
2332   EVT DestEVT = TLI.getValueType(DestTy, true);
2333   if (!SrcEVT.isSimple())
2334     return false;
2335   if (!DestEVT.isSimple())
2336     return false;
2337
2338   MVT SrcVT = SrcEVT.getSimpleVT();
2339   MVT DestVT = DestEVT.getSimpleVT();
2340   unsigned ResultReg = EmitIntExt(SrcVT, SrcReg, DestVT, isZExt);
2341   if (ResultReg == 0)
2342     return false;
2343   UpdateValueMap(I, ResultReg);
2344   return true;
2345 }
2346
2347 bool AArch64FastISel::SelectRem(const Instruction *I, unsigned ISDOpcode) {
2348   EVT DestEVT = TLI.getValueType(I->getType(), true);
2349   if (!DestEVT.isSimple())
2350     return false;
2351
2352   MVT DestVT = DestEVT.getSimpleVT();
2353   if (DestVT != MVT::i64 && DestVT != MVT::i32)
2354     return false;
2355
2356   unsigned DivOpc;
2357   bool is64bit = (DestVT == MVT::i64);
2358   switch (ISDOpcode) {
2359   default:
2360     return false;
2361   case ISD::SREM:
2362     DivOpc = is64bit ? AArch64::SDIVXr : AArch64::SDIVWr;
2363     break;
2364   case ISD::UREM:
2365     DivOpc = is64bit ? AArch64::UDIVXr : AArch64::UDIVWr;
2366     break;
2367   }
2368   unsigned MSubOpc = is64bit ? AArch64::MSUBXrrr : AArch64::MSUBWrrr;
2369   unsigned Src0Reg = getRegForValue(I->getOperand(0));
2370   if (!Src0Reg)
2371     return false;
2372
2373   unsigned Src1Reg = getRegForValue(I->getOperand(1));
2374   if (!Src1Reg)
2375     return false;
2376
2377   unsigned QuotReg = createResultReg(TLI.getRegClassFor(DestVT));
2378   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(DivOpc), QuotReg)
2379       .addReg(Src0Reg)
2380       .addReg(Src1Reg);
2381   // The remainder is computed as numerator - (quotient * denominator) using the
2382   // MSUB instruction.
2383   unsigned ResultReg = createResultReg(TLI.getRegClassFor(DestVT));
2384   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(MSubOpc), ResultReg)
2385       .addReg(QuotReg)
2386       .addReg(Src1Reg)
2387       .addReg(Src0Reg);
2388   UpdateValueMap(I, ResultReg);
2389   return true;
2390 }
2391
2392 bool AArch64FastISel::SelectMul(const Instruction *I) {
2393   EVT SrcEVT = TLI.getValueType(I->getOperand(0)->getType(), true);
2394   if (!SrcEVT.isSimple())
2395     return false;
2396   MVT SrcVT = SrcEVT.getSimpleVT();
2397
2398   // Must be simple value type.  Don't handle vectors.
2399   if (SrcVT != MVT::i64 && SrcVT != MVT::i32 && SrcVT != MVT::i16 &&
2400       SrcVT != MVT::i8)
2401     return false;
2402
2403   unsigned Src0Reg = getRegForValue(I->getOperand(0));
2404   if (!Src0Reg)
2405     return false;
2406   bool Src0IsKill = hasTrivialKill(I->getOperand(0));
2407
2408   unsigned Src1Reg = getRegForValue(I->getOperand(1));
2409   if (!Src1Reg)
2410     return false;
2411   bool Src1IsKill = hasTrivialKill(I->getOperand(1));
2412
2413   unsigned ResultReg =
2414     Emit_MUL_rr(SrcVT, Src0Reg, Src0IsKill, Src1Reg, Src1IsKill);
2415
2416   if (!ResultReg)
2417     return false;
2418
2419   UpdateValueMap(I, ResultReg);
2420   return true;
2421 }
2422
2423 bool AArch64FastISel::SelectShift(const Instruction *I, bool IsLeftShift,
2424                                   bool IsArithmetic) {
2425   EVT RetEVT = TLI.getValueType(I->getType(), true);
2426   if (!RetEVT.isSimple())
2427     return false;
2428   MVT RetVT = RetEVT.getSimpleVT();
2429
2430   if (!isa<ConstantInt>(I->getOperand(1)))
2431     return false;
2432
2433   unsigned Op0Reg = getRegForValue(I->getOperand(0));
2434   if (!Op0Reg)
2435     return false;
2436   bool Op0IsKill = hasTrivialKill(I->getOperand(0));
2437
2438   uint64_t ShiftVal = cast<ConstantInt>(I->getOperand(1))->getZExtValue();
2439
2440   unsigned ResultReg;
2441   if (IsLeftShift)
2442     ResultReg = Emit_LSL_ri(RetVT, Op0Reg, Op0IsKill, ShiftVal);
2443   else {
2444     if (IsArithmetic)
2445       ResultReg = Emit_ASR_ri(RetVT, Op0Reg, Op0IsKill, ShiftVal);
2446     else
2447       ResultReg = Emit_LSR_ri(RetVT, Op0Reg, Op0IsKill, ShiftVal);
2448   }
2449
2450   if (!ResultReg)
2451     return false;
2452
2453   UpdateValueMap(I, ResultReg);
2454   return true;
2455 }
2456
2457 bool AArch64FastISel::SelectBitCast(const Instruction *I) {
2458   MVT RetVT, SrcVT;
2459
2460   if (!isTypeLegal(I->getOperand(0)->getType(), SrcVT))
2461     return false;
2462   if (!isTypeLegal(I->getType(), RetVT))
2463     return false;
2464
2465   unsigned Opc;
2466   if (RetVT == MVT::f32 && SrcVT == MVT::i32)
2467     Opc = AArch64::FMOVWSr;
2468   else if (RetVT == MVT::f64 && SrcVT == MVT::i64)
2469     Opc = AArch64::FMOVXDr;
2470   else if (RetVT == MVT::i32 && SrcVT == MVT::f32)
2471     Opc = AArch64::FMOVSWr;
2472   else if (RetVT == MVT::i64 && SrcVT == MVT::f64)
2473     Opc = AArch64::FMOVDXr;
2474   else
2475     return false;
2476
2477   unsigned Op0Reg = getRegForValue(I->getOperand(0));
2478   if (!Op0Reg)
2479     return false;
2480   bool Op0IsKill = hasTrivialKill(I->getOperand(0));
2481   unsigned ResultReg = FastEmitInst_r(Opc, TLI.getRegClassFor(RetVT),
2482                                       Op0Reg, Op0IsKill);
2483
2484   if (!ResultReg)
2485     return false;
2486
2487   UpdateValueMap(I, ResultReg);
2488   return true;
2489 }
2490
2491 bool AArch64FastISel::TargetSelectInstruction(const Instruction *I) {
2492   switch (I->getOpcode()) {
2493   default:
2494     break;
2495   case Instruction::Load:
2496     return SelectLoad(I);
2497   case Instruction::Store:
2498     return SelectStore(I);
2499   case Instruction::Br:
2500     return SelectBranch(I);
2501   case Instruction::IndirectBr:
2502     return SelectIndirectBr(I);
2503   case Instruction::FCmp:
2504   case Instruction::ICmp:
2505     return SelectCmp(I);
2506   case Instruction::Select:
2507     return SelectSelect(I);
2508   case Instruction::FPExt:
2509     return SelectFPExt(I);
2510   case Instruction::FPTrunc:
2511     return SelectFPTrunc(I);
2512   case Instruction::FPToSI:
2513     return SelectFPToInt(I, /*Signed=*/true);
2514   case Instruction::FPToUI:
2515     return SelectFPToInt(I, /*Signed=*/false);
2516   case Instruction::SIToFP:
2517     return SelectIntToFP(I, /*Signed=*/true);
2518   case Instruction::UIToFP:
2519     return SelectIntToFP(I, /*Signed=*/false);
2520   case Instruction::SRem:
2521     return SelectRem(I, ISD::SREM);
2522   case Instruction::URem:
2523     return SelectRem(I, ISD::UREM);
2524   case Instruction::Ret:
2525     return SelectRet(I);
2526   case Instruction::Trunc:
2527     return SelectTrunc(I);
2528   case Instruction::ZExt:
2529   case Instruction::SExt:
2530     return SelectIntExt(I);
2531
2532   // FIXME: All of these should really be handled by the target-independent
2533   // selector -> improve FastISel tblgen.
2534   case Instruction::Mul:
2535     return SelectMul(I);
2536   case Instruction::Shl:
2537       return SelectShift(I, /*IsLeftShift=*/true, /*IsArithmetic=*/false);
2538   case Instruction::LShr:
2539     return SelectShift(I, /*IsLeftShift=*/false, /*IsArithmetic=*/false);
2540   case Instruction::AShr:
2541     return SelectShift(I, /*IsLeftShift=*/false, /*IsArithmetic=*/true);
2542   case Instruction::BitCast:
2543     return SelectBitCast(I);
2544   }
2545   return false;
2546   // Silence warnings.
2547   (void)&CC_AArch64_DarwinPCS_VarArg;
2548 }
2549
2550 namespace llvm {
2551 llvm::FastISel *AArch64::createFastISel(FunctionLoweringInfo &funcInfo,
2552                                         const TargetLibraryInfo *libInfo) {
2553   return new AArch64FastISel(funcInfo, libInfo);
2554 }
2555 }