lib/Target/AArch64/AArch64FastISel.cpp

   1 //===-- AArch6464FastISel.cpp - AArch64 FastISel implementation -----------===//
   2 //
   3 //                     The LLVM Compiler Infrastructure
   4 //
   5 // This file is distributed under the University of Illinois Open Source
   6 // License. See LICENSE.TXT for details.
   7 //
   8 //===----------------------------------------------------------------------===//
   9 //
  10 // This file defines the AArch64-specific support for the FastISel class. Some
  11 // of the target-specific code is generated by tablegen in the file
  12 // AArch64GenFastISel.inc, which is #included here.
  13 //
  14 //===----------------------------------------------------------------------===//
  15
  16 #include "AArch64.h"
  17 #include "AArch64Subtarget.h"
  18 #include "AArch64TargetMachine.h"
  19 #include "MCTargetDesc/AArch64AddressingModes.h"
  20 #include "llvm/Analysis/BranchProbabilityInfo.h"
  21 #include "llvm/CodeGen/CallingConvLower.h"
  22 #include "llvm/CodeGen/FastISel.h"
  23 #include "llvm/CodeGen/FunctionLoweringInfo.h"
  24 #include "llvm/CodeGen/MachineConstantPool.h"
  25 #include "llvm/CodeGen/MachineFrameInfo.h"
  26 #include "llvm/CodeGen/MachineInstrBuilder.h"
  27 #include "llvm/CodeGen/MachineRegisterInfo.h"
  28 #include "llvm/IR/CallingConv.h"
  29 #include "llvm/IR/DataLayout.h"
  30 #include "llvm/IR/DerivedTypes.h"
  31 #include "llvm/IR/Function.h"
  32 #include "llvm/IR/GetElementPtrTypeIterator.h"
  33 #include "llvm/IR/GlobalAlias.h"
  34 #include "llvm/IR/GlobalVariable.h"
  35 #include "llvm/IR/Instructions.h"
  36 #include "llvm/IR/IntrinsicInst.h"
  37 #include "llvm/IR/Operator.h"
  38 #include "llvm/Support/CommandLine.h"
  39 using namespace llvm;
  40
  41 namespace {
  42
  43 class AArch64FastISel : public FastISel {
  44   class Address {
  45   public:
  46     typedef enum {
  47       RegBase,
  48       FrameIndexBase
  49     } BaseKind;
  50
  51   private:
  52     BaseKind Kind;
  53     AArch64_AM::ShiftExtendType ExtType;
  54     union {
  55       unsigned Reg;
  56       int FI;
  57     } Base;
  58     unsigned OffsetReg;
  59     unsigned Shift;
  60     int64_t Offset;
  61     const GlobalValue *GV;
  62
  63   public:
  64     Address() : Kind(RegBase), ExtType(AArch64_AM::InvalidShiftExtend),
  65       OffsetReg(0), Shift(0), Offset(0), GV(nullptr) { Base.Reg = 0; }
  66     void setKind(BaseKind K) { Kind = K; }
  67     BaseKind getKind() const { return Kind; }
  68     void setExtendType(AArch64_AM::ShiftExtendType E) { ExtType = E; }
  69     AArch64_AM::ShiftExtendType getExtendType() const { return ExtType; }
  70     bool isRegBase() const { return Kind == RegBase; }
  71     bool isFIBase() const { return Kind == FrameIndexBase; }
  72     void setReg(unsigned Reg) {
  73       assert(isRegBase() && "Invalid base register access!");
  74       Base.Reg = Reg;
  75     }
  76     unsigned getReg() const {
  77       assert(isRegBase() && "Invalid base register access!");
  78       return Base.Reg;
  79     }
  80     void setOffsetReg(unsigned Reg) {
  81       assert(isRegBase() && "Invalid offset register access!");
  82       OffsetReg = Reg;
  83     }
  84     unsigned getOffsetReg() const {
  85       assert(isRegBase() && "Invalid offset register access!");
  86       return OffsetReg;
  87     }
  88     void setFI(unsigned FI) {
  89       assert(isFIBase() && "Invalid base frame index  access!");
  90       Base.FI = FI;
  91     }
  92     unsigned getFI() const {
  93       assert(isFIBase() && "Invalid base frame index access!");
  94       return Base.FI;
  95     }
  96     void setOffset(int64_t O) { Offset = O; }
  97     int64_t getOffset() { return Offset; }
  98     void setShift(unsigned S) { Shift = S; }
  99     unsigned getShift() { return Shift; }
 100
 101     void setGlobalValue(const GlobalValue *G) { GV = G; }
 102     const GlobalValue *getGlobalValue() { return GV; }
 103   };
 104
 105   /// Subtarget - Keep a pointer to the AArch64Subtarget around so that we can
 106   /// make the right decision when generating code for different targets.
 107   const AArch64Subtarget *Subtarget;
 108   LLVMContext *Context;
 109
 110   bool FastLowerArguments() override;
 111   bool FastLowerCall(CallLoweringInfo &CLI) override;
 112   bool FastLowerIntrinsicCall(const IntrinsicInst *II) override;
 113
 114 private:
 115   // Selection routines.
 116   bool SelectLoad(const Instruction *I);
 117   bool SelectStore(const Instruction *I);
 118   bool SelectBranch(const Instruction *I);
 119   bool SelectIndirectBr(const Instruction *I);
 120   bool SelectCmp(const Instruction *I);
 121   bool SelectSelect(const Instruction *I);
 122   bool SelectFPExt(const Instruction *I);
 123   bool SelectFPTrunc(const Instruction *I);
 124   bool SelectFPToInt(const Instruction *I, bool Signed);
 125   bool SelectIntToFP(const Instruction *I, bool Signed);
 126   bool SelectRem(const Instruction *I, unsigned ISDOpcode);
 127   bool SelectRet(const Instruction *I);
 128   bool SelectTrunc(const Instruction *I);
 129   bool SelectIntExt(const Instruction *I);
 130   bool SelectMul(const Instruction *I);
 131   bool SelectShift(const Instruction *I, bool IsLeftShift, bool IsArithmetic);
 132   bool SelectBitCast(const Instruction *I);
 133
 134   // Utility helper routines.
 135   bool isTypeLegal(Type *Ty, MVT &VT);
 136   bool isLoadStoreTypeLegal(Type *Ty, MVT &VT);
 137   bool ComputeAddress(const Value *Obj, Address &Addr, Type *Ty = nullptr);
 138   bool ComputeCallAddress(const Value *V, Address &Addr);
 139   bool SimplifyAddress(Address &Addr, MVT VT);
 140   void AddLoadStoreOperands(Address &Addr, const MachineInstrBuilder &MIB,
 141                             unsigned Flags, unsigned ScaleFactor,
 142                             MachineMemOperand *MMO);
 143   bool IsMemCpySmall(uint64_t Len, unsigned Alignment);
 144   bool TryEmitSmallMemCpy(Address Dest, Address Src, uint64_t Len,
 145                           unsigned Alignment);
 146   bool foldXALUIntrinsic(AArch64CC::CondCode &CC, const Instruction *I,
 147                          const Value *Cond);
 148
 149   // Emit functions.
 150   bool EmitCmp(Value *Src1Value, Value *Src2Value, bool isZExt);
 151   bool EmitLoad(MVT VT, unsigned &ResultReg, Address Addr,
 152                 MachineMemOperand *MMO = nullptr);
 153   bool EmitStore(MVT VT, unsigned SrcReg, Address Addr,
 154                  MachineMemOperand *MMO = nullptr);
 155   unsigned EmitIntExt(MVT SrcVT, unsigned SrcReg, MVT DestVT, bool isZExt);
 156   unsigned Emiti1Ext(unsigned SrcReg, MVT DestVT, bool isZExt);
 157   unsigned Emit_MUL_rr(MVT RetVT, unsigned Op0, bool Op0IsKill,
 158                        unsigned Op1, bool Op1IsKill);
 159   unsigned Emit_SMULL_rr(MVT RetVT, unsigned Op0, bool Op0IsKill,
 160                          unsigned Op1, bool Op1IsKill);
 161   unsigned Emit_UMULL_rr(MVT RetVT, unsigned Op0, bool Op0IsKill,
 162                          unsigned Op1, bool Op1IsKill);
 163   unsigned Emit_LSL_ri(MVT RetVT, unsigned Op0, bool Op0IsKill, uint64_t Imm);
 164   unsigned Emit_LSR_ri(MVT RetVT, unsigned Op0, bool Op0IsKill, uint64_t Imm);
 165   unsigned Emit_ASR_ri(MVT RetVT, unsigned Op0, bool Op0IsKill, uint64_t Imm);
 166
 167   unsigned AArch64MaterializeInt(const ConstantInt *CI, MVT VT);
 168   unsigned AArch64MaterializeFP(const ConstantFP *CFP, MVT VT);
 169   unsigned AArch64MaterializeGV(const GlobalValue *GV);
 170
 171   // Call handling routines.
 172 private:
 173   CCAssignFn *CCAssignFnForCall(CallingConv::ID CC) const;
 174   bool ProcessCallArgs(CallLoweringInfo &CLI, SmallVectorImpl<MVT> &ArgVTs,
 175                        unsigned &NumBytes);
 176   bool FinishCall(CallLoweringInfo &CLI, MVT RetVT, unsigned NumBytes);
 177
 178 public:
 179   // Backend specific FastISel code.
 180   unsigned TargetMaterializeAlloca(const AllocaInst *AI) override;
 181   unsigned TargetMaterializeConstant(const Constant *C) override;
 182
 183   explicit AArch64FastISel(FunctionLoweringInfo &funcInfo,
 184                          const TargetLibraryInfo *libInfo)
 185       : FastISel(funcInfo, libInfo) {
 186     Subtarget = &TM.getSubtarget<AArch64Subtarget>();
 187     Context = &funcInfo.Fn->getContext();
 188   }
 189
 190   bool TargetSelectInstruction(const Instruction *I) override;
 191
 192 #include "AArch64GenFastISel.inc"
 193 };
 194
 195 } // end anonymous namespace
 196
 197 #include "AArch64GenCallingConv.inc"
 198
 199 CCAssignFn *AArch64FastISel::CCAssignFnForCall(CallingConv::ID CC) const {
 200   if (CC == CallingConv::WebKit_JS)
 201     return CC_AArch64_WebKit_JS;
 202   return Subtarget->isTargetDarwin() ? CC_AArch64_DarwinPCS : CC_AArch64_AAPCS;
 203 }
 204
 205 unsigned AArch64FastISel::TargetMaterializeAlloca(const AllocaInst *AI) {
 206   assert(TLI.getValueType(AI->getType(), true) == MVT::i64 &&
 207          "Alloca should always return a pointer.");
 208
 209   // Don't handle dynamic allocas.
 210   if (!FuncInfo.StaticAllocaMap.count(AI))
 211     return 0;
 212
 213   DenseMap<const AllocaInst *, int>::iterator SI =
 214       FuncInfo.StaticAllocaMap.find(AI);
 215
 216   if (SI != FuncInfo.StaticAllocaMap.end()) {
 217     unsigned ResultReg = createResultReg(&AArch64::GPR64RegClass);
 218     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADDXri),
 219             ResultReg)
 220         .addFrameIndex(SI->second)
 221         .addImm(0)
 222         .addImm(0);
 223     return ResultReg;
 224   }
 225
 226   return 0;
 227 }
 228
 229 unsigned AArch64FastISel::AArch64MaterializeInt(const ConstantInt *CI, MVT VT) {
 230   if (VT > MVT::i64)
 231     return 0;
 232
 233   if (!CI->isZero())
 234     return FastEmit_i(VT, VT, ISD::Constant, CI->getZExtValue());
 235
 236   // Create a copy from the zero register to materialize a "0" value.
 237   const TargetRegisterClass *RC = (VT == MVT::i64) ? &AArch64::GPR64RegClass
 238                                                    : &AArch64::GPR32RegClass;
 239   unsigned ZeroReg = (VT == MVT::i64) ? AArch64::XZR : AArch64::WZR;
 240   unsigned ResultReg = createResultReg(RC);
 241   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
 242           TII.get(TargetOpcode::COPY), ResultReg)
 243     .addReg(ZeroReg, getKillRegState(true));
 244   return ResultReg;
 245 }
 246
 247 unsigned AArch64FastISel::AArch64MaterializeFP(const ConstantFP *CFP, MVT VT) {
 248   if (VT != MVT::f32 && VT != MVT::f64)
 249     return 0;
 250
 251   const APFloat Val = CFP->getValueAPF();
 252   bool Is64Bit = (VT == MVT::f64);
 253
 254   // This checks to see if we can use FMOV instructions to materialize
 255   // a constant, otherwise we have to materialize via the constant pool.
 256   if (TLI.isFPImmLegal(Val, VT)) {
 257     int Imm = Is64Bit ? AArch64_AM::getFP64Imm(Val)
 258                       : AArch64_AM::getFP32Imm(Val);
 259     unsigned Opc = Is64Bit ? AArch64::FMOVDi : AArch64::FMOVSi;
 260     unsigned ResultReg = createResultReg(TLI.getRegClassFor(VT));
 261     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg)
 262       .addImm(Imm);
 263     return ResultReg;
 264   }
 265
 266   // Materialize via constant pool.  MachineConstantPool wants an explicit
 267   // alignment.
 268   unsigned Align = DL.getPrefTypeAlignment(CFP->getType());
 269   if (Align == 0)
 270     Align = DL.getTypeAllocSize(CFP->getType());
 271
 272   unsigned CPI = MCP.getConstantPoolIndex(cast<Constant>(CFP), Align);
 273   unsigned ADRPReg = createResultReg(&AArch64::GPR64commonRegClass);
 274   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADRP),
 275           ADRPReg)
 276     .addConstantPoolIndex(CPI, 0, AArch64II::MO_PAGE);
 277
 278   unsigned Opc = Is64Bit ? AArch64::LDRDui : AArch64::LDRSui;
 279   unsigned ResultReg = createResultReg(TLI.getRegClassFor(VT));
 280   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg)
 281     .addReg(ADRPReg)
 282     .addConstantPoolIndex(CPI, 0, AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
 283   return ResultReg;
 284 }
 285
 286 unsigned AArch64FastISel::AArch64MaterializeGV(const GlobalValue *GV) {
 287   // We can't handle thread-local variables quickly yet.
 288   if (GV->isThreadLocal())
 289     return 0;
 290
 291   // MachO still uses GOT for large code-model accesses, but ELF requires
 292   // movz/movk sequences, which FastISel doesn't handle yet.
 293   if (TM.getCodeModel() != CodeModel::Small && !Subtarget->isTargetMachO())
 294     return 0;
 295
 296   unsigned char OpFlags = Subtarget->ClassifyGlobalReference(GV, TM);
 297
 298   EVT DestEVT = TLI.getValueType(GV->getType(), true);
 299   if (!DestEVT.isSimple())
 300     return 0;
 301
 302   unsigned ADRPReg = createResultReg(&AArch64::GPR64commonRegClass);
 303   unsigned ResultReg;
 304
 305   if (OpFlags & AArch64II::MO_GOT) {
 306     // ADRP + LDRX
 307     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADRP),
 308             ADRPReg)
 309       .addGlobalAddress(GV, 0, AArch64II::MO_GOT | AArch64II::MO_PAGE);
 310
 311     ResultReg = createResultReg(&AArch64::GPR64RegClass);
 312     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::LDRXui),
 313             ResultReg)
 314       .addReg(ADRPReg)
 315       .addGlobalAddress(GV, 0, AArch64II::MO_GOT | AArch64II::MO_PAGEOFF |
 316                         AArch64II::MO_NC);
 317   } else {
 318     // ADRP + ADDX
 319     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADRP),
 320             ADRPReg)
 321       .addGlobalAddress(GV, 0, AArch64II::MO_PAGE);
 322
 323     ResultReg = createResultReg(&AArch64::GPR64spRegClass);
 324     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADDXri),
 325             ResultReg)
 326       .addReg(ADRPReg)
 327       .addGlobalAddress(GV, 0, AArch64II::MO_PAGEOFF | AArch64II::MO_NC)
 328       .addImm(0);
 329   }
 330   return ResultReg;
 331 }
 332
 333 unsigned AArch64FastISel::TargetMaterializeConstant(const Constant *C) {
 334   EVT CEVT = TLI.getValueType(C->getType(), true);
 335
 336   // Only handle simple types.
 337   if (!CEVT.isSimple())
 338     return 0;
 339   MVT VT = CEVT.getSimpleVT();
 340
 341   if (const auto *CI = dyn_cast<ConstantInt>(C))
 342     return AArch64MaterializeInt(CI, VT);
 343   else if (const ConstantFP *CFP = dyn_cast<ConstantFP>(C))
 344     return AArch64MaterializeFP(CFP, VT);
 345   else if (const GlobalValue *GV = dyn_cast<GlobalValue>(C))
 346     return AArch64MaterializeGV(GV);
 347
 348   return 0;
 349 }
 350
 351 // Computes the address to get to an object.
 352 bool AArch64FastISel::ComputeAddress(const Value *Obj, Address &Addr, Type *Ty)
 353 {
 354   const User *U = nullptr;
 355   unsigned Opcode = Instruction::UserOp1;
 356   if (const Instruction *I = dyn_cast<Instruction>(Obj)) {
 357     // Don't walk into other basic blocks unless the object is an alloca from
 358     // another block, otherwise it may not have a virtual register assigned.
 359     if (FuncInfo.StaticAllocaMap.count(static_cast<const AllocaInst *>(Obj)) ||
 360         FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB) {
 361       Opcode = I->getOpcode();
 362       U = I;
 363     }
 364   } else if (const ConstantExpr *C = dyn_cast<ConstantExpr>(Obj)) {
 365     Opcode = C->getOpcode();
 366     U = C;
 367   }
 368
 369   if (const PointerType *Ty = dyn_cast<PointerType>(Obj->getType()))
 370     if (Ty->getAddressSpace() > 255)
 371       // Fast instruction selection doesn't support the special
 372       // address spaces.
 373       return false;
 374
 375   switch (Opcode) {
 376   default:
 377     break;
 378   case Instruction::BitCast: {
 379     // Look through bitcasts.
 380     return ComputeAddress(U->getOperand(0), Addr, Ty);
 381   }
 382   case Instruction::IntToPtr: {
 383     // Look past no-op inttoptrs.
 384     if (TLI.getValueType(U->getOperand(0)->getType()) == TLI.getPointerTy())
 385       return ComputeAddress(U->getOperand(0), Addr, Ty);
 386     break;
 387   }
 388   case Instruction::PtrToInt: {
 389     // Look past no-op ptrtoints.
 390     if (TLI.getValueType(U->getType()) == TLI.getPointerTy())
 391       return ComputeAddress(U->getOperand(0), Addr, Ty);
 392     break;
 393   }
 394   case Instruction::GetElementPtr: {
 395     Address SavedAddr = Addr;
 396     uint64_t TmpOffset = Addr.getOffset();
 397
 398     // Iterate through the GEP folding the constants into offsets where
 399     // we can.
 400     gep_type_iterator GTI = gep_type_begin(U);
 401     for (User::const_op_iterator i = U->op_begin() + 1, e = U->op_end(); i != e;
 402          ++i, ++GTI) {
 403       const Value *Op = *i;
 404       if (StructType *STy = dyn_cast<StructType>(*GTI)) {
 405         const StructLayout *SL = DL.getStructLayout(STy);
 406         unsigned Idx = cast<ConstantInt>(Op)->getZExtValue();
 407         TmpOffset += SL->getElementOffset(Idx);
 408       } else {
 409         uint64_t S = DL.getTypeAllocSize(GTI.getIndexedType());
 410         for (;;) {
 411           if (const ConstantInt *CI = dyn_cast<ConstantInt>(Op)) {
 412             // Constant-offset addressing.
 413             TmpOffset += CI->getSExtValue() * S;
 414             break;
 415           }
 416           if (canFoldAddIntoGEP(U, Op)) {
 417             // A compatible add with a constant operand. Fold the constant.
 418             ConstantInt *CI =
 419                 cast<ConstantInt>(cast<AddOperator>(Op)->getOperand(1));
 420             TmpOffset += CI->getSExtValue() * S;
 421             // Iterate on the other operand.
 422             Op = cast<AddOperator>(Op)->getOperand(0);
 423             continue;
 424           }
 425           // Unsupported
 426           goto unsupported_gep;
 427         }
 428       }
 429     }
 430
 431     // Try to grab the base operand now.
 432     Addr.setOffset(TmpOffset);
 433     if (ComputeAddress(U->getOperand(0), Addr, Ty))
 434       return true;
 435
 436     // We failed, restore everything and try the other options.
 437     Addr = SavedAddr;
 438
 439   unsupported_gep:
 440     break;
 441   }
 442   case Instruction::Alloca: {
 443     const AllocaInst *AI = cast<AllocaInst>(Obj);
 444     DenseMap<const AllocaInst *, int>::iterator SI =
 445         FuncInfo.StaticAllocaMap.find(AI);
 446     if (SI != FuncInfo.StaticAllocaMap.end()) {
 447       Addr.setKind(Address::FrameIndexBase);
 448       Addr.setFI(SI->second);
 449       return true;
 450     }
 451     break;
 452   }
 453   case Instruction::Add: {
 454     // Adds of constants are common and easy enough.
 455     const Value *LHS = U->getOperand(0);
 456     const Value *RHS = U->getOperand(1);
 457
 458     if (isa<ConstantInt>(LHS))
 459       std::swap(LHS, RHS);
 460
 461     if (const ConstantInt *CI = dyn_cast<ConstantInt>(RHS)) {
 462       Addr.setOffset(Addr.getOffset() + (uint64_t)CI->getSExtValue());
 463       return ComputeAddress(LHS, Addr, Ty);
 464     }
 465
 466     Address Backup = Addr;
 467     if (ComputeAddress(LHS, Addr, Ty) && ComputeAddress(RHS, Addr, Ty))
 468       return true;
 469     Addr = Backup;
 470
 471     break;
 472   }
 473   case Instruction::Shl:
 474     if (Addr.getOffsetReg())
 475       break;
 476
 477     if (const auto *CI = dyn_cast<ConstantInt>(U->getOperand(1))) {
 478       unsigned Val = CI->getZExtValue();
 479       if (Val < 1 || Val > 3)
 480         break;
 481
 482       uint64_t NumBytes = 0;
 483       if (Ty && Ty->isSized()) {
 484         uint64_t NumBits = DL.getTypeSizeInBits(Ty);
 485         NumBytes = NumBits / 8;
 486         if (!isPowerOf2_64(NumBits))
 487           NumBytes = 0;
 488       }
 489
 490       if (NumBytes != (1U << Val))
 491         break;
 492
 493       Addr.setShift(Val);
 494       Addr.setExtendType(AArch64_AM::LSL);
 495
 496       if (const auto *I = dyn_cast<Instruction>(U->getOperand(0)))
 497         if (FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB)
 498           U = I;
 499
 500       if (const auto *ZE = dyn_cast<ZExtInst>(U))
 501         if (ZE->getOperand(0)->getType()->isIntegerTy(32))
 502           Addr.setExtendType(AArch64_AM::UXTW);
 503
 504       if (const auto *SE = dyn_cast<SExtInst>(U))
 505         if (SE->getOperand(0)->getType()->isIntegerTy(32))
 506           Addr.setExtendType(AArch64_AM::SXTW);
 507
 508       unsigned Reg = getRegForValue(U->getOperand(0));
 509       if (!Reg)
 510         return false;
 511       Addr.setOffsetReg(Reg);
 512       return true;
 513     }
 514     break;
 515   }
 516
 517   if (Addr.getReg()) {
 518     if (!Addr.getOffsetReg()) {
 519       unsigned Reg = getRegForValue(Obj);
 520       if (!Reg)
 521         return false;
 522       Addr.setOffsetReg(Reg);
 523       return true;
 524     }
 525     return false;
 526   }
 527
 528   unsigned Reg = getRegForValue(Obj);
 529   if (!Reg)
 530     return false;
 531   Addr.setReg(Reg);
 532   return true;
 533 }
 534
 535 bool AArch64FastISel::ComputeCallAddress(const Value *V, Address &Addr) {
 536   const User *U = nullptr;
 537   unsigned Opcode = Instruction::UserOp1;
 538   bool InMBB = true;
 539
 540   if (const auto *I = dyn_cast<Instruction>(V)) {
 541     Opcode = I->getOpcode();
 542     U = I;
 543     InMBB = I->getParent() == FuncInfo.MBB->getBasicBlock();
 544   } else if (const auto *C = dyn_cast<ConstantExpr>(V)) {
 545     Opcode = C->getOpcode();
 546     U = C;
 547   }
 548
 549   switch (Opcode) {
 550   default: break;
 551   case Instruction::BitCast:
 552     // Look past bitcasts if its operand is in the same BB.
 553     if (InMBB)
 554       return ComputeCallAddress(U->getOperand(0), Addr);
 555     break;
 556   case Instruction::IntToPtr:
 557     // Look past no-op inttoptrs if its operand is in the same BB.
 558     if (InMBB &&
 559         TLI.getValueType(U->getOperand(0)->getType()) == TLI.getPointerTy())
 560       return ComputeCallAddress(U->getOperand(0), Addr);
 561     break;
 562   case Instruction::PtrToInt:
 563     // Look past no-op ptrtoints if its operand is in the same BB.
 564     if (InMBB &&
 565         TLI.getValueType(U->getType()) == TLI.getPointerTy())
 566       return ComputeCallAddress(U->getOperand(0), Addr);
 567     break;
 568   }
 569
 570   if (const GlobalValue *GV = dyn_cast<GlobalValue>(V)) {
 571     Addr.setGlobalValue(GV);
 572     return true;
 573   }
 574
 575   // If all else fails, try to materialize the value in a register.
 576   if (!Addr.getGlobalValue()) {
 577     Addr.setReg(getRegForValue(V));
 578     return Addr.getReg() != 0;
 579   }
 580
 581   return false;
 582 }
 583
 584
 585 bool AArch64FastISel::isTypeLegal(Type *Ty, MVT &VT) {
 586   EVT evt = TLI.getValueType(Ty, true);
 587
 588   // Only handle simple types.
 589   if (evt == MVT::Other || !evt.isSimple())
 590     return false;
 591   VT = evt.getSimpleVT();
 592
 593   // This is a legal type, but it's not something we handle in fast-isel.
 594   if (VT == MVT::f128)
 595     return false;
 596
 597   // Handle all other legal types, i.e. a register that will directly hold this
 598   // value.
 599   return TLI.isTypeLegal(VT);
 600 }
 601
 602 bool AArch64FastISel::isLoadStoreTypeLegal(Type *Ty, MVT &VT) {
 603   if (isTypeLegal(Ty, VT))
 604     return true;
 605
 606   // If this is a type than can be sign or zero-extended to a basic operation
 607   // go ahead and accept it now. For stores, this reflects truncation.
 608   if (VT == MVT::i1 || VT == MVT::i8 || VT == MVT::i16)
 609     return true;
 610
 611   return false;
 612 }
 613
 614 bool AArch64FastISel::SimplifyAddress(Address &Addr, MVT VT) {
 615   unsigned ScaleFactor;
 616   switch (VT.SimpleTy) {
 617   default: return false;
 618   case MVT::i1:  // fall-through
 619   case MVT::i8:  ScaleFactor = 1; break;
 620   case MVT::i16: ScaleFactor = 2; break;
 621   case MVT::i32: // fall-through
 622   case MVT::f32: ScaleFactor = 4; break;
 623   case MVT::i64: // fall-through
 624   case MVT::f64: ScaleFactor = 8; break;
 625   }
 626
 627   bool ImmediateOffsetNeedsLowering = false;
 628   bool RegisterOffsetNeedsLowering = false;
 629   int64_t Offset = Addr.getOffset();
 630   if (((Offset < 0) || (Offset & (ScaleFactor - 1))) && !isInt<9>(Offset))
 631     ImmediateOffsetNeedsLowering = true;
 632   else if (Offset > 0 && !(Offset & (ScaleFactor - 1)) &&
 633            !isUInt<12>(Offset / ScaleFactor))
 634     ImmediateOffsetNeedsLowering = true;
 635
 636   // Cannot encode an offset register and an immediate offset in the same
 637   // instruction. Fold the immediate offset into the load/store instruction and
 638   // emit an additonal add to take care of the offset register.
 639   if (!ImmediateOffsetNeedsLowering && Addr.getOffset() && Addr.isRegBase() &&
 640       Addr.getOffsetReg())
 641     RegisterOffsetNeedsLowering = true;
 642
 643   // If this is a stack pointer and the offset needs to be simplified then put
 644   // the alloca address into a register, set the base type back to register and
 645   // continue. This should almost never happen.
 646   if (ImmediateOffsetNeedsLowering && Addr.isFIBase()) {
 647     unsigned ResultReg = createResultReg(&AArch64::GPR64RegClass);
 648     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADDXri),
 649             ResultReg)
 650       .addFrameIndex(Addr.getFI())
 651       .addImm(0)
 652       .addImm(0);
 653     Addr.setKind(Address::RegBase);
 654     Addr.setReg(ResultReg);
 655   }
 656
 657   if (RegisterOffsetNeedsLowering) {
 658     unsigned ResultReg = 0;
 659     if (Addr.getReg()) {
 660       ResultReg = createResultReg(&AArch64::GPR64RegClass);
 661       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
 662               TII.get(AArch64::ADDXrs), ResultReg)
 663         .addReg(Addr.getReg())
 664         .addReg(Addr.getOffsetReg())
 665         .addImm(Addr.getShift());
 666     } else
 667       ResultReg = Emit_LSL_ri(MVT::i64, Addr.getOffsetReg(),
 668                               /*Op0IsKill=*/false, Addr.getShift());
 669     if (!ResultReg)
 670       return false;
 671
 672     Addr.setReg(ResultReg);
 673     Addr.setOffsetReg(0);
 674     Addr.setShift(0);
 675   }
 676
 677   // Since the offset is too large for the load/store instruction get the
 678   // reg+offset into a register.
 679   if (ImmediateOffsetNeedsLowering) {
 680     unsigned ResultReg = 0;
 681     if (Addr.getReg())
 682       ResultReg = FastEmit_ri_(MVT::i64, ISD::ADD, Addr.getReg(),
 683                                /*IsKill=*/false, Offset, MVT::i64);
 684     else
 685       ResultReg = FastEmit_i(MVT::i64, MVT::i64, ISD::Constant, Offset);
 686
 687     if (!ResultReg)
 688       return false;
 689     Addr.setReg(ResultReg);
 690     Addr.setOffset(0);
 691   }
 692   return true;
 693 }
 694
 695 void AArch64FastISel::AddLoadStoreOperands(Address &Addr,
 696                                            const MachineInstrBuilder &MIB,
 697                                            unsigned Flags,
 698                                            unsigned ScaleFactor,
 699                                            MachineMemOperand *MMO) {
 700   int64_t Offset = Addr.getOffset() / ScaleFactor;
 701   // Frame base works a bit differently. Handle it separately.
 702   if (Addr.isFIBase()) {
 703     int FI = Addr.getFI();
 704     // FIXME: We shouldn't be using getObjectSize/getObjectAlignment.  The size
 705     // and alignment should be based on the VT.
 706     MMO = FuncInfo.MF->getMachineMemOperand(
 707       MachinePointerInfo::getFixedStack(FI, Offset), Flags,
 708       MFI.getObjectSize(FI), MFI.getObjectAlignment(FI));
 709     // Now add the rest of the operands.
 710     MIB.addFrameIndex(FI).addImm(Offset);
 711   } else {
 712     assert(Addr.isRegBase() && "Unexpected address kind.");
 713     if (Addr.getOffsetReg()) {
 714       assert(Addr.getOffset() == 0 && "Unexpected offset");
 715       bool IsSigned = Addr.getExtendType() == AArch64_AM::SXTW ||
 716                       Addr.getExtendType() == AArch64_AM::SXTX;
 717       MIB.addReg(Addr.getReg());
 718       MIB.addReg(Addr.getOffsetReg());
 719       MIB.addImm(IsSigned);
 720       MIB.addImm(Addr.getShift() != 0);
 721     } else {
 722       MIB.addReg(Addr.getReg());
 723       MIB.addImm(Offset);
 724     }
 725   }
 726
 727   if (MMO)
 728     MIB.addMemOperand(MMO);
 729 }
 730
 731 bool AArch64FastISel::EmitLoad(MVT VT, unsigned &ResultReg, Address Addr,
 732                                MachineMemOperand *MMO) {
 733   // Simplify this down to something we can handle.
 734   if (!SimplifyAddress(Addr, VT))
 735     return false;
 736
 737   unsigned ScaleFactor;
 738   switch (VT.SimpleTy) {
 739   default: llvm_unreachable("Unexpected value type.");
 740   case MVT::i1:  // fall-through
 741   case MVT::i8:  ScaleFactor = 1; break;
 742   case MVT::i16: ScaleFactor = 2; break;
 743   case MVT::i32: // fall-through
 744   case MVT::f32: ScaleFactor = 4; break;
 745   case MVT::i64: // fall-through
 746   case MVT::f64: ScaleFactor = 8; break;
 747   }
 748
 749   // Negative offsets require unscaled, 9-bit, signed immediate offsets.
 750   // Otherwise, we try using scaled, 12-bit, unsigned immediate offsets.
 751   bool UseScaled = true;
 752   if ((Addr.getOffset() < 0) || (Addr.getOffset() & (ScaleFactor - 1))) {
 753     UseScaled = false;
 754     ScaleFactor = 1;
 755   }
 756
 757   static const unsigned OpcTable[4][6] = {
 758     { AArch64::LDURBBi,  AArch64::LDURHHi,  AArch64::LDURWi,  AArch64::LDURXi,
 759       AArch64::LDURSi,   AArch64::LDURDi },
 760     { AArch64::LDRBBui,  AArch64::LDRHHui,  AArch64::LDRWui,  AArch64::LDRXui,
 761       AArch64::LDRSui,   AArch64::LDRDui },
 762     { AArch64::LDRBBroX, AArch64::LDRHHroX, AArch64::LDRWroX, AArch64::LDRXroX,
 763       AArch64::LDRSroX,  AArch64::LDRDroX },
 764     { AArch64::LDRBBroW, AArch64::LDRHHroW, AArch64::LDRWroW, AArch64::LDRXroW,
 765       AArch64::LDRSroW,  AArch64::LDRDroW }
 766   };
 767
 768   unsigned Opc;
 769   const TargetRegisterClass *RC;
 770   bool VTIsi1 = false;
 771   bool UseRegOffset = Addr.isRegBase() && !Addr.getOffset() && Addr.getReg() &&
 772                       Addr.getOffsetReg();
 773   unsigned Idx = UseRegOffset ? 2 : UseScaled ? 1 : 0;
 774   if (Addr.getExtendType() == AArch64_AM::UXTW ||
 775       Addr.getExtendType() == AArch64_AM::SXTW)
 776     Idx++;
 777
 778   switch (VT.SimpleTy) {
 779   default: llvm_unreachable("Unexpected value type.");
 780   case MVT::i1:  VTIsi1 = true; // Intentional fall-through.
 781   case MVT::i8:  Opc = OpcTable[Idx][0]; RC = &AArch64::GPR32RegClass; break;
 782   case MVT::i16: Opc = OpcTable[Idx][1]; RC = &AArch64::GPR32RegClass; break;
 783   case MVT::i32: Opc = OpcTable[Idx][2]; RC = &AArch64::GPR32RegClass; break;
 784   case MVT::i64: Opc = OpcTable[Idx][3]; RC = &AArch64::GPR64RegClass; break;
 785   case MVT::f32: Opc = OpcTable[Idx][4]; RC = &AArch64::FPR32RegClass; break;
 786   case MVT::f64: Opc = OpcTable[Idx][5]; RC = &AArch64::FPR64RegClass; break;
 787   }
 788
 789   // Create the base instruction, then add the operands.
 790   ResultReg = createResultReg(RC);
 791   MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
 792                                     TII.get(Opc), ResultReg);
 793   AddLoadStoreOperands(Addr, MIB, MachineMemOperand::MOLoad, ScaleFactor, MMO);
 794
 795   // Loading an i1 requires special handling.
 796   if (VTIsi1) {
 797     MRI.constrainRegClass(ResultReg, &AArch64::GPR32RegClass);
 798     unsigned ANDReg = createResultReg(&AArch64::GPR32spRegClass);
 799     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ANDWri),
 800             ANDReg)
 801       .addReg(ResultReg)
 802       .addImm(AArch64_AM::encodeLogicalImmediate(1, 32));
 803     ResultReg = ANDReg;
 804   }
 805   return true;
 806 }
 807
 808 bool AArch64FastISel::SelectLoad(const Instruction *I) {
 809   MVT VT;
 810   // Verify we have a legal type before going any further.  Currently, we handle
 811   // simple types that will directly fit in a register (i32/f32/i64/f64) or
 812   // those that can be sign or zero-extended to a basic operation (i1/i8/i16).
 813   if (!isLoadStoreTypeLegal(I->getType(), VT) || cast<LoadInst>(I)->isAtomic())
 814     return false;
 815
 816   // See if we can handle this address.
 817   Address Addr;
 818   if (!ComputeAddress(I->getOperand(0), Addr, I->getType()))
 819     return false;
 820
 821   unsigned ResultReg;
 822   if (!EmitLoad(VT, ResultReg, Addr, createMachineMemOperandFor(I)))
 823     return false;
 824
 825   UpdateValueMap(I, ResultReg);
 826   return true;
 827 }
 828
 829 bool AArch64FastISel::EmitStore(MVT VT, unsigned SrcReg, Address Addr,
 830                                 MachineMemOperand *MMO) {
 831   // Simplify this down to something we can handle.
 832   if (!SimplifyAddress(Addr, VT))
 833     return false;
 834
 835   unsigned ScaleFactor;
 836   switch (VT.SimpleTy) {
 837   default: llvm_unreachable("Unexpected value type.");
 838   case MVT::i1:  // fall-through
 839   case MVT::i8:  ScaleFactor = 1; break;
 840   case MVT::i16: ScaleFactor = 2; break;
 841   case MVT::i32: // fall-through
 842   case MVT::f32: ScaleFactor = 4; break;
 843   case MVT::i64: // fall-through
 844   case MVT::f64: ScaleFactor = 8; break;
 845   }
 846
 847   // Negative offsets require unscaled, 9-bit, signed immediate offsets.
 848   // Otherwise, we try using scaled, 12-bit, unsigned immediate offsets.
 849   bool UseScaled = true;
 850   if ((Addr.getOffset() < 0) || (Addr.getOffset() & (ScaleFactor - 1))) {
 851     UseScaled = false;
 852     ScaleFactor = 1;
 853   }
 854
 855
 856   static const unsigned OpcTable[4][6] = {
 857     { AArch64::STURBBi,  AArch64::STURHHi,  AArch64::STURWi,  AArch64::STURXi,
 858       AArch64::STURSi,   AArch64::STURDi },
 859     { AArch64::STRBBui,  AArch64::STRHHui,  AArch64::STRWui,  AArch64::STRXui,
 860       AArch64::STRSui,   AArch64::STRDui },
 861     { AArch64::STRBBroX, AArch64::STRHHroX, AArch64::STRWroX, AArch64::STRXroX,
 862       AArch64::STRSroX,  AArch64::STRDroX },
 863     { AArch64::STRBBroW, AArch64::STRHHroW, AArch64::STRWroW, AArch64::STRXroW,
 864       AArch64::STRSroW,  AArch64::STRDroW }
 865
 866   };
 867
 868   unsigned Opc;
 869   bool VTIsi1 = false;
 870   bool UseRegOffset = Addr.isRegBase() && !Addr.getOffset() && Addr.getReg() &&
 871                       Addr.getOffsetReg();
 872   unsigned Idx = UseRegOffset ? 2 : UseScaled ? 1 : 0;
 873   if (Addr.getExtendType() == AArch64_AM::UXTW ||
 874       Addr.getExtendType() == AArch64_AM::SXTW)
 875     Idx++;
 876
 877   switch (VT.SimpleTy) {
 878   default: llvm_unreachable("Unexpected value type.");
 879   case MVT::i1:  VTIsi1 = true;
 880   case MVT::i8:  Opc = OpcTable[Idx][0]; break;
 881   case MVT::i16: Opc = OpcTable[Idx][1]; break;
 882   case MVT::i32: Opc = OpcTable[Idx][2]; break;
 883   case MVT::i64: Opc = OpcTable[Idx][3]; break;
 884   case MVT::f32: Opc = OpcTable[Idx][4]; break;
 885   case MVT::f64: Opc = OpcTable[Idx][5]; break;
 886   }
 887
 888   // Storing an i1 requires special handling.
 889   if (VTIsi1) {
 890     MRI.constrainRegClass(SrcReg, &AArch64::GPR32RegClass);
 891     unsigned ANDReg = createResultReg(&AArch64::GPR32spRegClass);
 892     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ANDWri),
 893             ANDReg)
 894       .addReg(SrcReg)
 895       .addImm(AArch64_AM::encodeLogicalImmediate(1, 32));
 896     SrcReg = ANDReg;
 897   }
 898   // Create the base instruction, then add the operands.
 899   MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
 900                                     TII.get(Opc))
 901                               .addReg(SrcReg);
 902   AddLoadStoreOperands(Addr, MIB, MachineMemOperand::MOStore, ScaleFactor, MMO);
 903
 904   return true;
 905 }
 906
 907 bool AArch64FastISel::SelectStore(const Instruction *I) {
 908   MVT VT;
 909   Value *Op0 = I->getOperand(0);
 910   // Verify we have a legal type before going any further.  Currently, we handle
 911   // simple types that will directly fit in a register (i32/f32/i64/f64) or
 912   // those that can be sign or zero-extended to a basic operation (i1/i8/i16).
 913   if (!isLoadStoreTypeLegal(Op0->getType(), VT) ||
 914       cast<StoreInst>(I)->isAtomic())
 915     return false;
 916
 917   // Get the value to be stored into a register.
 918   unsigned SrcReg = getRegForValue(Op0);
 919   if (SrcReg == 0)
 920     return false;
 921
 922   // See if we can handle this address.
 923   Address Addr;
 924   if (!ComputeAddress(I->getOperand(1), Addr, I->getOperand(0)->getType()))
 925     return false;
 926
 927   if (!EmitStore(VT, SrcReg, Addr, createMachineMemOperandFor(I)))
 928     return false;
 929   return true;
 930 }
 931
 932 static AArch64CC::CondCode getCompareCC(CmpInst::Predicate Pred) {
 933   switch (Pred) {
 934   case CmpInst::FCMP_ONE:
 935   case CmpInst::FCMP_UEQ:
 936   default:
 937     // AL is our "false" for now. The other two need more compares.
 938     return AArch64CC::AL;
 939   case CmpInst::ICMP_EQ:
 940   case CmpInst::FCMP_OEQ:
 941     return AArch64CC::EQ;
 942   case CmpInst::ICMP_SGT:
 943   case CmpInst::FCMP_OGT:
 944     return AArch64CC::GT;
 945   case CmpInst::ICMP_SGE:
 946   case CmpInst::FCMP_OGE:
 947     return AArch64CC::GE;
 948   case CmpInst::ICMP_UGT:
 949   case CmpInst::FCMP_UGT:
 950     return AArch64CC::HI;
 951   case CmpInst::FCMP_OLT:
 952     return AArch64CC::MI;
 953   case CmpInst::ICMP_ULE:
 954   case CmpInst::FCMP_OLE:
 955     return AArch64CC::LS;
 956   case CmpInst::FCMP_ORD:
 957     return AArch64CC::VC;
 958   case CmpInst::FCMP_UNO:
 959     return AArch64CC::VS;
 960   case CmpInst::FCMP_UGE:
 961     return AArch64CC::PL;
 962   case CmpInst::ICMP_SLT:
 963   case CmpInst::FCMP_ULT:
 964     return AArch64CC::LT;
 965   case CmpInst::ICMP_SLE:
 966   case CmpInst::FCMP_ULE:
 967     return AArch64CC::LE;
 968   case CmpInst::FCMP_UNE:
 969   case CmpInst::ICMP_NE:
 970     return AArch64CC::NE;
 971   case CmpInst::ICMP_UGE:
 972     return AArch64CC::HS;
 973   case CmpInst::ICMP_ULT:
 974     return AArch64CC::LO;
 975   }
 976 }
 977
 978 bool AArch64FastISel::SelectBranch(const Instruction *I) {
 979   const BranchInst *BI = cast<BranchInst>(I);
 980   MachineBasicBlock *TBB = FuncInfo.MBBMap[BI->getSuccessor(0)];
 981   MachineBasicBlock *FBB = FuncInfo.MBBMap[BI->getSuccessor(1)];
 982
 983   AArch64CC::CondCode CC = AArch64CC::NE;
 984   if (const CmpInst *CI = dyn_cast<CmpInst>(BI->getCondition())) {
 985     if (CI->hasOneUse() && (CI->getParent() == I->getParent())) {
 986       // We may not handle every CC for now.
 987       CC = getCompareCC(CI->getPredicate());
 988       if (CC == AArch64CC::AL)
 989         return false;
 990
 991       // Emit the cmp.
 992       if (!EmitCmp(CI->getOperand(0), CI->getOperand(1), CI->isUnsigned()))
 993         return false;
 994
 995       // Emit the branch.
 996       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::Bcc))
 997           .addImm(CC)
 998           .addMBB(TBB);
 999
1000       // Obtain the branch weight and add the TrueBB to the successor list.
1001       uint32_t BranchWeight = 0;
1002       if (FuncInfo.BPI)
1003         BranchWeight = FuncInfo.BPI->getEdgeWeight(BI->getParent(),
1004                                                   TBB->getBasicBlock());
1005       FuncInfo.MBB->addSuccessor(TBB, BranchWeight);
1006
1007       FastEmitBranch(FBB, DbgLoc);
1008       return true;
1009     }
1010   } else if (TruncInst *TI = dyn_cast<TruncInst>(BI->getCondition())) {
1011     MVT SrcVT;
1012     if (TI->hasOneUse() && TI->getParent() == I->getParent() &&
1013         (isLoadStoreTypeLegal(TI->getOperand(0)->getType(), SrcVT))) {
1014       unsigned CondReg = getRegForValue(TI->getOperand(0));
1015       if (CondReg == 0)
1016         return false;
1017
1018       // Issue an extract_subreg to get the lower 32-bits.
1019       if (SrcVT == MVT::i64)
1020         CondReg = FastEmitInst_extractsubreg(MVT::i32, CondReg, /*Kill=*/true,
1021                                              AArch64::sub_32);
1022
1023       MRI.constrainRegClass(CondReg, &AArch64::GPR32RegClass);
1024       unsigned ANDReg = createResultReg(&AArch64::GPR32spRegClass);
1025       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
1026               TII.get(AArch64::ANDWri), ANDReg)
1027           .addReg(CondReg)
1028           .addImm(AArch64_AM::encodeLogicalImmediate(1, 32));
1029       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
1030               TII.get(AArch64::SUBSWri))
1031           .addReg(ANDReg)
1032           .addReg(ANDReg)
1033           .addImm(0)
1034           .addImm(0);
1035
1036       if (FuncInfo.MBB->isLayoutSuccessor(TBB)) {
1037         std::swap(TBB, FBB);
1038         CC = AArch64CC::EQ;
1039       }
1040       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::Bcc))
1041           .addImm(CC)
1042           .addMBB(TBB);
1043
1044       // Obtain the branch weight and add the TrueBB to the successor list.
1045       uint32_t BranchWeight = 0;
1046       if (FuncInfo.BPI)
1047         BranchWeight = FuncInfo.BPI->getEdgeWeight(BI->getParent(),
1048                                                   TBB->getBasicBlock());
1049       FuncInfo.MBB->addSuccessor(TBB, BranchWeight);
1050
1051       FastEmitBranch(FBB, DbgLoc);
1052       return true;
1053     }
1054   } else if (const ConstantInt *CI =
1055                  dyn_cast<ConstantInt>(BI->getCondition())) {
1056     uint64_t Imm = CI->getZExtValue();
1057     MachineBasicBlock *Target = (Imm == 0) ? FBB : TBB;
1058     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::B))
1059         .addMBB(Target);
1060
1061     // Obtain the branch weight and add the target to the successor list.
1062     uint32_t BranchWeight = 0;
1063     if (FuncInfo.BPI)
1064       BranchWeight = FuncInfo.BPI->getEdgeWeight(BI->getParent(),
1065                                                  Target->getBasicBlock());
1066     FuncInfo.MBB->addSuccessor(Target, BranchWeight);
1067     return true;
1068   } else if (foldXALUIntrinsic(CC, I, BI->getCondition())) {
1069     // Fake request the condition, otherwise the intrinsic might be completely
1070     // optimized away.
1071     unsigned CondReg = getRegForValue(BI->getCondition());
1072     if (!CondReg)
1073       return false;
1074
1075     // Emit the branch.
1076     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::Bcc))
1077       .addImm(CC)
1078       .addMBB(TBB);
1079
1080     // Obtain the branch weight and add the TrueBB to the successor list.
1081     uint32_t BranchWeight = 0;
1082     if (FuncInfo.BPI)
1083       BranchWeight = FuncInfo.BPI->getEdgeWeight(BI->getParent(),
1084                                                  TBB->getBasicBlock());
1085     FuncInfo.MBB->addSuccessor(TBB, BranchWeight);
1086
1087     FastEmitBranch(FBB, DbgLoc);
1088     return true;
1089   }
1090
1091   unsigned CondReg = getRegForValue(BI->getCondition());
1092   if (CondReg == 0)
1093     return false;
1094
1095   // We've been divorced from our compare!  Our block was split, and
1096   // now our compare lives in a predecessor block.  We musn't
1097   // re-compare here, as the children of the compare aren't guaranteed
1098   // live across the block boundary (we *could* check for this).
1099   // Regardless, the compare has been done in the predecessor block,
1100   // and it left a value for us in a virtual register.  Ergo, we test
1101   // the one-bit value left in the virtual register.
1102   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::SUBSWri),
1103           AArch64::WZR)
1104       .addReg(CondReg)
1105       .addImm(0)
1106       .addImm(0);
1107
1108   if (FuncInfo.MBB->isLayoutSuccessor(TBB)) {
1109     std::swap(TBB, FBB);
1110     CC = AArch64CC::EQ;
1111   }
1112
1113   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::Bcc))
1114       .addImm(CC)
1115       .addMBB(TBB);
1116
1117   // Obtain the branch weight and add the TrueBB to the successor list.
1118   uint32_t BranchWeight = 0;
1119   if (FuncInfo.BPI)
1120     BranchWeight = FuncInfo.BPI->getEdgeWeight(BI->getParent(),
1121                                                TBB->getBasicBlock());
1122   FuncInfo.MBB->addSuccessor(TBB, BranchWeight);
1123
1124   FastEmitBranch(FBB, DbgLoc);
1125   return true;
1126 }
1127
1128 bool AArch64FastISel::SelectIndirectBr(const Instruction *I) {
1129   const IndirectBrInst *BI = cast<IndirectBrInst>(I);
1130   unsigned AddrReg = getRegForValue(BI->getOperand(0));
1131   if (AddrReg == 0)
1132     return false;
1133
1134   // Emit the indirect branch.
1135   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::BR))
1136       .addReg(AddrReg);
1137
1138   // Make sure the CFG is up-to-date.
1139   for (unsigned i = 0, e = BI->getNumSuccessors(); i != e; ++i)
1140     FuncInfo.MBB->addSuccessor(FuncInfo.MBBMap[BI->getSuccessor(i)]);
1141
1142   return true;
1143 }
1144
1145 bool AArch64FastISel::EmitCmp(Value *Src1Value, Value *Src2Value, bool isZExt) {
1146   Type *Ty = Src1Value->getType();
1147   EVT SrcEVT = TLI.getValueType(Ty, true);
1148   if (!SrcEVT.isSimple())
1149     return false;
1150   MVT SrcVT = SrcEVT.getSimpleVT();
1151
1152   // Check to see if the 2nd operand is a constant that we can encode directly
1153   // in the compare.
1154   uint64_t Imm;
1155   bool UseImm = false;
1156   bool isNegativeImm = false;
1157   if (const ConstantInt *ConstInt = dyn_cast<ConstantInt>(Src2Value)) {
1158     if (SrcVT == MVT::i64 || SrcVT == MVT::i32 || SrcVT == MVT::i16 ||
1159         SrcVT == MVT::i8 || SrcVT == MVT::i1) {
1160       const APInt &CIVal = ConstInt->getValue();
1161
1162       Imm = (isZExt) ? CIVal.getZExtValue() : CIVal.getSExtValue();
1163       if (CIVal.isNegative()) {
1164         isNegativeImm = true;
1165         Imm = -Imm;
1166       }
1167       // FIXME: We can handle more immediates using shifts.
1168       UseImm = ((Imm & 0xfff) == Imm);
1169     }
1170   } else if (const ConstantFP *ConstFP = dyn_cast<ConstantFP>(Src2Value)) {
1171     if (SrcVT == MVT::f32 || SrcVT == MVT::f64)
1172       if (ConstFP->isZero() && !ConstFP->isNegative())
1173         UseImm = true;
1174   }
1175
1176   unsigned ZReg;
1177   unsigned CmpOpc;
1178   bool isICmp = true;
1179   bool needsExt = false;
1180   switch (SrcVT.SimpleTy) {
1181   default:
1182     return false;
1183   case MVT::i1:
1184   case MVT::i8:
1185   case MVT::i16:
1186     needsExt = true;
1187   // Intentional fall-through.
1188   case MVT::i32:
1189     ZReg = AArch64::WZR;
1190     if (UseImm)
1191       CmpOpc = isNegativeImm ? AArch64::ADDSWri : AArch64::SUBSWri;
1192     else
1193       CmpOpc = AArch64::SUBSWrr;
1194     break;
1195   case MVT::i64:
1196     ZReg = AArch64::XZR;
1197     if (UseImm)
1198       CmpOpc = isNegativeImm ? AArch64::ADDSXri : AArch64::SUBSXri;
1199     else
1200       CmpOpc = AArch64::SUBSXrr;
1201     break;
1202   case MVT::f32:
1203     isICmp = false;
1204     CmpOpc = UseImm ? AArch64::FCMPSri : AArch64::FCMPSrr;
1205     break;
1206   case MVT::f64:
1207     isICmp = false;
1208     CmpOpc = UseImm ? AArch64::FCMPDri : AArch64::FCMPDrr;
1209     break;
1210   }
1211
1212   unsigned SrcReg1 = getRegForValue(Src1Value);
1213   if (SrcReg1 == 0)
1214     return false;
1215
1216   unsigned SrcReg2;
1217   if (!UseImm) {
1218     SrcReg2 = getRegForValue(Src2Value);
1219     if (SrcReg2 == 0)
1220       return false;
1221   }
1222
1223   // We have i1, i8, or i16, we need to either zero extend or sign extend.
1224   if (needsExt) {
1225     SrcReg1 = EmitIntExt(SrcVT, SrcReg1, MVT::i32, isZExt);
1226     if (SrcReg1 == 0)
1227       return false;
1228     if (!UseImm) {
1229       SrcReg2 = EmitIntExt(SrcVT, SrcReg2, MVT::i32, isZExt);
1230       if (SrcReg2 == 0)
1231         return false;
1232     }
1233   }
1234
1235   if (isICmp) {
1236     if (UseImm)
1237       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(CmpOpc))
1238           .addReg(ZReg)
1239           .addReg(SrcReg1)
1240           .addImm(Imm)
1241           .addImm(0);
1242     else
1243       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(CmpOpc))
1244           .addReg(ZReg)
1245           .addReg(SrcReg1)
1246           .addReg(SrcReg2);
1247   } else {
1248     if (UseImm)
1249       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(CmpOpc))
1250           .addReg(SrcReg1);
1251     else
1252       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(CmpOpc))
1253           .addReg(SrcReg1)
1254           .addReg(SrcReg2);
1255   }
1256   return true;
1257 }
1258
1259 bool AArch64FastISel::SelectCmp(const Instruction *I) {
1260   const CmpInst *CI = cast<CmpInst>(I);
1261
1262   // We may not handle every CC for now.
1263   AArch64CC::CondCode CC = getCompareCC(CI->getPredicate());
1264   if (CC == AArch64CC::AL)
1265     return false;
1266
1267   // Emit the cmp.
1268   if (!EmitCmp(CI->getOperand(0), CI->getOperand(1), CI->isUnsigned()))
1269     return false;
1270
1271   // Now set a register based on the comparison.
1272   AArch64CC::CondCode invertedCC = getInvertedCondCode(CC);
1273   unsigned ResultReg = createResultReg(&AArch64::GPR32RegClass);
1274   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::CSINCWr),
1275           ResultReg)
1276       .addReg(AArch64::WZR)
1277       .addReg(AArch64::WZR)
1278       .addImm(invertedCC);
1279
1280   UpdateValueMap(I, ResultReg);
1281   return true;
1282 }
1283
1284 bool AArch64FastISel::SelectSelect(const Instruction *I) {
1285   const SelectInst *SI = cast<SelectInst>(I);
1286
1287   EVT DestEVT = TLI.getValueType(SI->getType(), true);
1288   if (!DestEVT.isSimple())
1289     return false;
1290
1291   MVT DestVT = DestEVT.getSimpleVT();
1292   if (DestVT != MVT::i32 && DestVT != MVT::i64 && DestVT != MVT::f32 &&
1293       DestVT != MVT::f64)
1294     return false;
1295
1296   unsigned SelectOpc;
1297   switch (DestVT.SimpleTy) {
1298   default: return false;
1299   case MVT::i32: SelectOpc = AArch64::CSELWr;    break;
1300   case MVT::i64: SelectOpc = AArch64::CSELXr;    break;
1301   case MVT::f32: SelectOpc = AArch64::FCSELSrrr; break;
1302   case MVT::f64: SelectOpc = AArch64::FCSELDrrr; break;
1303   }
1304
1305   const Value *Cond = SI->getCondition();
1306   bool NeedTest = true;
1307   AArch64CC::CondCode CC = AArch64CC::NE;
1308   if (foldXALUIntrinsic(CC, I, Cond))
1309     NeedTest = false;
1310
1311   unsigned CondReg = getRegForValue(Cond);
1312   if (!CondReg)
1313     return false;
1314   bool CondIsKill = hasTrivialKill(Cond);
1315
1316   if (NeedTest) {
1317     MRI.constrainRegClass(CondReg, &AArch64::GPR32RegClass);
1318     unsigned ANDReg = createResultReg(&AArch64::GPR32spRegClass);
1319     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ANDWri),
1320             ANDReg)
1321       .addReg(CondReg, getKillRegState(CondIsKill))
1322       .addImm(AArch64_AM::encodeLogicalImmediate(1, 32));
1323
1324     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::SUBSWri))
1325       .addReg(ANDReg)
1326       .addReg(ANDReg)
1327       .addImm(0)
1328       .addImm(0);
1329   }
1330
1331   unsigned TrueReg = getRegForValue(SI->getTrueValue());
1332   bool TrueIsKill = hasTrivialKill(SI->getTrueValue());
1333
1334   unsigned FalseReg = getRegForValue(SI->getFalseValue());
1335   bool FalseIsKill = hasTrivialKill(SI->getFalseValue());
1336
1337   if (!TrueReg || !FalseReg)
1338     return false;
1339
1340   unsigned ResultReg = createResultReg(TLI.getRegClassFor(DestVT));
1341   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(SelectOpc),
1342           ResultReg)
1343     .addReg(TrueReg, getKillRegState(TrueIsKill))
1344     .addReg(FalseReg, getKillRegState(FalseIsKill))
1345     .addImm(CC);
1346
1347   UpdateValueMap(I, ResultReg);
1348   return true;
1349 }
1350
1351 bool AArch64FastISel::SelectFPExt(const Instruction *I) {
1352   Value *V = I->getOperand(0);
1353   if (!I->getType()->isDoubleTy() || !V->getType()->isFloatTy())
1354     return false;
1355
1356   unsigned Op = getRegForValue(V);
1357   if (Op == 0)
1358     return false;
1359
1360   unsigned ResultReg = createResultReg(&AArch64::FPR64RegClass);
1361   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::FCVTDSr),
1362           ResultReg).addReg(Op);
1363   UpdateValueMap(I, ResultReg);
1364   return true;
1365 }
1366
1367 bool AArch64FastISel::SelectFPTrunc(const Instruction *I) {
1368   Value *V = I->getOperand(0);
1369   if (!I->getType()->isFloatTy() || !V->getType()->isDoubleTy())
1370     return false;
1371
1372   unsigned Op = getRegForValue(V);
1373   if (Op == 0)
1374     return false;
1375
1376   unsigned ResultReg = createResultReg(&AArch64::FPR32RegClass);
1377   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::FCVTSDr),
1378           ResultReg).addReg(Op);
1379   UpdateValueMap(I, ResultReg);
1380   return true;
1381 }
1382
1383 // FPToUI and FPToSI
1384 bool AArch64FastISel::SelectFPToInt(const Instruction *I, bool Signed) {
1385   MVT DestVT;
1386   if (!isTypeLegal(I->getType(), DestVT) || DestVT.isVector())
1387     return false;
1388
1389   unsigned SrcReg = getRegForValue(I->getOperand(0));
1390   if (SrcReg == 0)
1391     return false;
1392
1393   EVT SrcVT = TLI.getValueType(I->getOperand(0)->getType(), true);
1394   if (SrcVT == MVT::f128)
1395     return false;
1396
1397   unsigned Opc;
1398   if (SrcVT == MVT::f64) {
1399     if (Signed)
1400       Opc = (DestVT == MVT::i32) ? AArch64::FCVTZSUWDr : AArch64::FCVTZSUXDr;
1401     else
1402       Opc = (DestVT == MVT::i32) ? AArch64::FCVTZUUWDr : AArch64::FCVTZUUXDr;
1403   } else {
1404     if (Signed)
1405       Opc = (DestVT == MVT::i32) ? AArch64::FCVTZSUWSr : AArch64::FCVTZSUXSr;
1406     else
1407       Opc = (DestVT == MVT::i32) ? AArch64::FCVTZUUWSr : AArch64::FCVTZUUXSr;
1408   }
1409   unsigned ResultReg = createResultReg(
1410       DestVT == MVT::i32 ? &AArch64::GPR32RegClass : &AArch64::GPR64RegClass);
1411   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg)
1412       .addReg(SrcReg);
1413   UpdateValueMap(I, ResultReg);
1414   return true;
1415 }
1416
1417 bool AArch64FastISel::SelectIntToFP(const Instruction *I, bool Signed) {
1418   MVT DestVT;
1419   if (!isTypeLegal(I->getType(), DestVT) || DestVT.isVector())
1420     return false;
1421   assert ((DestVT == MVT::f32 || DestVT == MVT::f64) &&
1422           "Unexpected value type.");
1423
1424   unsigned SrcReg = getRegForValue(I->getOperand(0));
1425   if (SrcReg == 0)
1426     return false;
1427
1428   EVT SrcVT = TLI.getValueType(I->getOperand(0)->getType(), true);
1429
1430   // Handle sign-extension.
1431   if (SrcVT == MVT::i16 || SrcVT == MVT::i8 || SrcVT == MVT::i1) {
1432     SrcReg =
1433         EmitIntExt(SrcVT.getSimpleVT(), SrcReg, MVT::i32, /*isZExt*/ !Signed);
1434     if (SrcReg == 0)
1435       return false;
1436   }
1437
1438   MRI.constrainRegClass(SrcReg, SrcVT == MVT::i64 ? &AArch64::GPR64RegClass
1439                                                   : &AArch64::GPR32RegClass);
1440
1441   unsigned Opc;
1442   if (SrcVT == MVT::i64) {
1443     if (Signed)
1444       Opc = (DestVT == MVT::f32) ? AArch64::SCVTFUXSri : AArch64::SCVTFUXDri;
1445     else
1446       Opc = (DestVT == MVT::f32) ? AArch64::UCVTFUXSri : AArch64::UCVTFUXDri;
1447   } else {
1448     if (Signed)
1449       Opc = (DestVT == MVT::f32) ? AArch64::SCVTFUWSri : AArch64::SCVTFUWDri;
1450     else
1451       Opc = (DestVT == MVT::f32) ? AArch64::UCVTFUWSri : AArch64::UCVTFUWDri;
1452   }
1453
1454   unsigned ResultReg = createResultReg(TLI.getRegClassFor(DestVT));
1455   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg)
1456       .addReg(SrcReg);
1457   UpdateValueMap(I, ResultReg);
1458   return true;
1459 }
1460
1461 bool AArch64FastISel::FastLowerArguments() {
1462   if (!FuncInfo.CanLowerReturn)
1463     return false;
1464
1465   const Function *F = FuncInfo.Fn;
1466   if (F->isVarArg())
1467     return false;
1468
1469   CallingConv::ID CC = F->getCallingConv();
1470   if (CC != CallingConv::C)
1471     return false;
1472
1473   // Only handle simple cases like i1/i8/i16/i32/i64/f32/f64 of up to 8 GPR and
1474   // FPR each.
1475   unsigned GPRCnt = 0;
1476   unsigned FPRCnt = 0;
1477   unsigned Idx = 0;
1478   for (auto const &Arg : F->args()) {
1479     // The first argument is at index 1.
1480     ++Idx;
1481     if (F->getAttributes().hasAttribute(Idx, Attribute::ByVal) ||
1482         F->getAttributes().hasAttribute(Idx, Attribute::InReg) ||
1483         F->getAttributes().hasAttribute(Idx, Attribute::StructRet) ||
1484         F->getAttributes().hasAttribute(Idx, Attribute::Nest))
1485       return false;
1486
1487     Type *ArgTy = Arg.getType();
1488     if (ArgTy->isStructTy() || ArgTy->isArrayTy() || ArgTy->isVectorTy())
1489       return false;
1490
1491     EVT ArgVT = TLI.getValueType(ArgTy);
1492     if (!ArgVT.isSimple()) return false;
1493     switch (ArgVT.getSimpleVT().SimpleTy) {
1494     default: return false;
1495     case MVT::i1:
1496     case MVT::i8:
1497     case MVT::i16:
1498     case MVT::i32:
1499     case MVT::i64:
1500       ++GPRCnt;
1501       break;
1502     case MVT::f16:
1503     case MVT::f32:
1504     case MVT::f64:
1505       ++FPRCnt;
1506       break;
1507     }
1508
1509     if (GPRCnt > 8 || FPRCnt > 8)
1510       return false;
1511   }
1512
1513   static const MCPhysReg Registers[5][8] = {
1514     { AArch64::W0, AArch64::W1, AArch64::W2, AArch64::W3, AArch64::W4,
1515       AArch64::W5, AArch64::W6, AArch64::W7 },
1516     { AArch64::X0, AArch64::X1, AArch64::X2, AArch64::X3, AArch64::X4,
1517       AArch64::X5, AArch64::X6, AArch64::X7 },
1518     { AArch64::H0, AArch64::H1, AArch64::H2, AArch64::H3, AArch64::H4,
1519       AArch64::H5, AArch64::H6, AArch64::H7 },
1520     { AArch64::S0, AArch64::S1, AArch64::S2, AArch64::S3, AArch64::S4,
1521       AArch64::S5, AArch64::S6, AArch64::S7 },
1522     { AArch64::D0, AArch64::D1, AArch64::D2, AArch64::D3, AArch64::D4,
1523       AArch64::D5, AArch64::D6, AArch64::D7 }
1524   };
1525
1526   unsigned GPRIdx = 0;
1527   unsigned FPRIdx = 0;
1528   for (auto const &Arg : F->args()) {
1529     MVT VT = TLI.getSimpleValueType(Arg.getType());
1530     unsigned SrcReg;
1531     switch (VT.SimpleTy) {
1532     default: llvm_unreachable("Unexpected value type.");
1533     case MVT::i1:
1534     case MVT::i8:
1535     case MVT::i16: VT = MVT::i32; // fall-through
1536     case MVT::i32: SrcReg = Registers[0][GPRIdx++]; break;
1537     case MVT::i64: SrcReg = Registers[1][GPRIdx++]; break;
1538     case MVT::f16: SrcReg = Registers[2][FPRIdx++]; break;
1539     case MVT::f32: SrcReg = Registers[3][FPRIdx++]; break;
1540     case MVT::f64: SrcReg = Registers[4][FPRIdx++]; break;
1541     }
1542
1543     // Skip unused arguments.
1544     if (Arg.use_empty()) {
1545       UpdateValueMap(&Arg, 0);
1546       continue;
1547     }
1548
1549     const TargetRegisterClass *RC = TLI.getRegClassFor(VT);
1550     unsigned DstReg = FuncInfo.MF->addLiveIn(SrcReg, RC);
1551     // FIXME: Unfortunately it's necessary to emit a copy from the livein copy.
1552     // Without this, EmitLiveInCopies may eliminate the livein if its only
1553     // use is a bitcast (which isn't turned into an instruction).
1554     unsigned ResultReg = createResultReg(RC);
1555     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
1556             TII.get(TargetOpcode::COPY), ResultReg)
1557       .addReg(DstReg, getKillRegState(true));
1558     UpdateValueMap(&Arg, ResultReg);
1559   }
1560   return true;
1561 }
1562
1563 bool AArch64FastISel::ProcessCallArgs(CallLoweringInfo &CLI,
1564                                       SmallVectorImpl<MVT> &OutVTs,
1565                                       unsigned &NumBytes) {
1566   CallingConv::ID CC = CLI.CallConv;
1567   SmallVector<CCValAssign, 16> ArgLocs;
1568   CCState CCInfo(CC, false, *FuncInfo.MF, ArgLocs, *Context);
1569   CCInfo.AnalyzeCallOperands(OutVTs, CLI.OutFlags, CCAssignFnForCall(CC));
1570
1571   // Get a count of how many bytes are to be pushed on the stack.
1572   NumBytes = CCInfo.getNextStackOffset();
1573
1574   // Issue CALLSEQ_START
1575   unsigned AdjStackDown = TII.getCallFrameSetupOpcode();
1576   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AdjStackDown))
1577     .addImm(NumBytes);
1578
1579   // Process the args.
1580   for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
1581     CCValAssign &VA = ArgLocs[i];
1582     const Value *ArgVal = CLI.OutVals[VA.getValNo()];
1583     MVT ArgVT = OutVTs[VA.getValNo()];
1584
1585     unsigned ArgReg = getRegForValue(ArgVal);
1586     if (!ArgReg)
1587       return false;
1588
1589     // Handle arg promotion: SExt, ZExt, AExt.
1590     switch (VA.getLocInfo()) {
1591     case CCValAssign::Full:
1592       break;
1593     case CCValAssign::SExt: {
1594       MVT DestVT = VA.getLocVT();
1595       MVT SrcVT = ArgVT;
1596       ArgReg = EmitIntExt(SrcVT, ArgReg, DestVT, /*isZExt=*/false);
1597       if (!ArgReg)
1598         return false;
1599       break;
1600     }
1601     case CCValAssign::AExt:
1602     // Intentional fall-through.
1603     case CCValAssign::ZExt: {
1604       MVT DestVT = VA.getLocVT();
1605       MVT SrcVT = ArgVT;
1606       ArgReg = EmitIntExt(SrcVT, ArgReg, DestVT, /*isZExt=*/true);
1607       if (!ArgReg)
1608         return false;
1609       break;
1610     }
1611     default:
1612       llvm_unreachable("Unknown arg promotion!");
1613     }
1614
1615     // Now copy/store arg to correct locations.
1616     if (VA.isRegLoc() && !VA.needsCustom()) {
1617       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
1618               TII.get(TargetOpcode::COPY), VA.getLocReg()).addReg(ArgReg);
1619       CLI.OutRegs.push_back(VA.getLocReg());
1620     } else if (VA.needsCustom()) {
1621       // FIXME: Handle custom args.
1622       return false;
1623     } else {
1624       assert(VA.isMemLoc() && "Assuming store on stack.");
1625
1626       // Don't emit stores for undef values.
1627       if (isa<UndefValue>(ArgVal))
1628         continue;
1629
1630       // Need to store on the stack.
1631       unsigned ArgSize = (ArgVT.getSizeInBits() + 7) / 8;
1632
1633       unsigned BEAlign = 0;
1634       if (ArgSize < 8 && !Subtarget->isLittleEndian())
1635         BEAlign = 8 - ArgSize;
1636
1637       Address Addr;
1638       Addr.setKind(Address::RegBase);
1639       Addr.setReg(AArch64::SP);
1640       Addr.setOffset(VA.getLocMemOffset() + BEAlign);
1641
1642       unsigned Alignment = DL.getABITypeAlignment(ArgVal->getType());
1643       MachineMemOperand *MMO = FuncInfo.MF->getMachineMemOperand(
1644         MachinePointerInfo::getStack(Addr.getOffset()),
1645         MachineMemOperand::MOStore, ArgVT.getStoreSize(), Alignment);
1646
1647       if (!EmitStore(ArgVT, ArgReg, Addr, MMO))
1648         return false;
1649     }
1650   }
1651   return true;
1652 }
1653
1654 bool AArch64FastISel::FinishCall(CallLoweringInfo &CLI, MVT RetVT,
1655                                  unsigned NumBytes) {
1656   CallingConv::ID CC = CLI.CallConv;
1657
1658   // Issue CALLSEQ_END
1659   unsigned AdjStackUp = TII.getCallFrameDestroyOpcode();
1660   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AdjStackUp))
1661     .addImm(NumBytes).addImm(0);
1662
1663   // Now the return value.
1664   if (RetVT != MVT::isVoid) {
1665     SmallVector<CCValAssign, 16> RVLocs;
1666     CCState CCInfo(CC, false, *FuncInfo.MF, RVLocs, *Context);
1667     CCInfo.AnalyzeCallResult(RetVT, CCAssignFnForCall(CC));
1668
1669     // Only handle a single return value.
1670     if (RVLocs.size() != 1)
1671       return false;
1672
1673     // Copy all of the result registers out of their specified physreg.
1674     MVT CopyVT = RVLocs[0].getValVT();
1675     unsigned ResultReg = createResultReg(TLI.getRegClassFor(CopyVT));
1676     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
1677             TII.get(TargetOpcode::COPY), ResultReg)
1678       .addReg(RVLocs[0].getLocReg());
1679     CLI.InRegs.push_back(RVLocs[0].getLocReg());
1680
1681     CLI.ResultReg = ResultReg;
1682     CLI.NumResultRegs = 1;
1683   }
1684
1685   return true;
1686 }
1687
1688 bool AArch64FastISel::FastLowerCall(CallLoweringInfo &CLI) {
1689   CallingConv::ID CC  = CLI.CallConv;
1690   bool IsTailCall     = CLI.IsTailCall;
1691   bool IsVarArg       = CLI.IsVarArg;
1692   const Value *Callee = CLI.Callee;
1693   const char *SymName = CLI.SymName;
1694
1695   // Allow SelectionDAG isel to handle tail calls.
1696   if (IsTailCall)
1697     return false;
1698
1699   CodeModel::Model CM = TM.getCodeModel();
1700   // Only support the small and large code model.
1701   if (CM != CodeModel::Small && CM != CodeModel::Large)
1702     return false;
1703
1704   // FIXME: Add large code model support for ELF.
1705   if (CM == CodeModel::Large && !Subtarget->isTargetMachO())
1706     return false;
1707
1708   // Let SDISel handle vararg functions.
1709   if (IsVarArg)
1710     return false;
1711
1712   // FIXME: Only handle *simple* calls for now.
1713   MVT RetVT;
1714   if (CLI.RetTy->isVoidTy())
1715     RetVT = MVT::isVoid;
1716   else if (!isTypeLegal(CLI.RetTy, RetVT))
1717     return false;
1718
1719   for (auto Flag : CLI.OutFlags)
1720     if (Flag.isInReg() || Flag.isSRet() || Flag.isNest() || Flag.isByVal())
1721       return false;
1722
1723   // Set up the argument vectors.
1724   SmallVector<MVT, 16> OutVTs;
1725   OutVTs.reserve(CLI.OutVals.size());
1726
1727   for (auto *Val : CLI.OutVals) {
1728     MVT VT;
1729     if (!isTypeLegal(Val->getType(), VT) &&
1730         !(VT == MVT::i1 || VT == MVT::i8 || VT == MVT::i16))
1731       return false;
1732
1733     // We don't handle vector parameters yet.
1734     if (VT.isVector() || VT.getSizeInBits() > 64)
1735       return false;
1736
1737     OutVTs.push_back(VT);
1738   }
1739
1740   Address Addr;
1741   if (!ComputeCallAddress(Callee, Addr))
1742     return false;
1743
1744   // Handle the arguments now that we've gotten them.
1745   unsigned NumBytes;
1746   if (!ProcessCallArgs(CLI, OutVTs, NumBytes))
1747     return false;
1748
1749   // Issue the call.
1750   MachineInstrBuilder MIB;
1751   if (CM == CodeModel::Small) {
1752     unsigned CallOpc = Addr.getReg() ? AArch64::BLR : AArch64::BL;
1753     MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(CallOpc));
1754     if (SymName)
1755       MIB.addExternalSymbol(SymName, 0);
1756     else if (Addr.getGlobalValue())
1757       MIB.addGlobalAddress(Addr.getGlobalValue(), 0, 0);
1758     else if (Addr.getReg())
1759       MIB.addReg(Addr.getReg());
1760     else
1761       return false;
1762   } else {
1763     unsigned CallReg = 0;
1764     if (SymName) {
1765       unsigned ADRPReg = createResultReg(&AArch64::GPR64commonRegClass);
1766       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADRP),
1767               ADRPReg)
1768         .addExternalSymbol(SymName, AArch64II::MO_GOT | AArch64II::MO_PAGE);
1769
1770       CallReg = createResultReg(&AArch64::GPR64RegClass);
1771       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::LDRXui),
1772               CallReg)
1773         .addReg(ADRPReg)
1774         .addExternalSymbol(SymName, AArch64II::MO_GOT | AArch64II::MO_PAGEOFF |
1775                            AArch64II::MO_NC);
1776     } else if (Addr.getGlobalValue()) {
1777       CallReg = AArch64MaterializeGV(Addr.getGlobalValue());
1778     } else if (Addr.getReg())
1779       CallReg = Addr.getReg();
1780
1781     if (!CallReg)
1782       return false;
1783
1784     MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
1785                   TII.get(AArch64::BLR)).addReg(CallReg);
1786   }
1787
1788   // Add implicit physical register uses to the call.
1789   for (auto Reg : CLI.OutRegs)
1790     MIB.addReg(Reg, RegState::Implicit);
1791
1792   // Add a register mask with the call-preserved registers.
1793   // Proper defs for return values will be added by setPhysRegsDeadExcept().
1794   MIB.addRegMask(TRI.getCallPreservedMask(CC));
1795
1796   CLI.Call = MIB;
1797
1798   // Finish off the call including any return values.
1799   return FinishCall(CLI, RetVT, NumBytes);
1800 }
1801
1802 bool AArch64FastISel::IsMemCpySmall(uint64_t Len, unsigned Alignment) {
1803   if (Alignment)
1804     return Len / Alignment <= 4;
1805   else
1806     return Len < 32;
1807 }
1808
1809 bool AArch64FastISel::TryEmitSmallMemCpy(Address Dest, Address Src,
1810                                          uint64_t Len, unsigned Alignment) {
1811   // Make sure we don't bloat code by inlining very large memcpy's.
1812   if (!IsMemCpySmall(Len, Alignment))
1813     return false;
1814
1815   int64_t UnscaledOffset = 0;
1816   Address OrigDest = Dest;
1817   Address OrigSrc = Src;
1818
1819   while (Len) {
1820     MVT VT;
1821     if (!Alignment || Alignment >= 8) {
1822       if (Len >= 8)
1823         VT = MVT::i64;
1824       else if (Len >= 4)
1825         VT = MVT::i32;
1826       else if (Len >= 2)
1827         VT = MVT::i16;
1828       else {
1829         VT = MVT::i8;
1830       }
1831     } else {
1832       // Bound based on alignment.
1833       if (Len >= 4 && Alignment == 4)
1834         VT = MVT::i32;
1835       else if (Len >= 2 && Alignment == 2)
1836         VT = MVT::i16;
1837       else {
1838         VT = MVT::i8;
1839       }
1840     }
1841
1842     bool RV;
1843     unsigned ResultReg;
1844     RV = EmitLoad(VT, ResultReg, Src);
1845     if (!RV)
1846       return false;
1847
1848     RV = EmitStore(VT, ResultReg, Dest);
1849     if (!RV)
1850       return false;
1851
1852     int64_t Size = VT.getSizeInBits() / 8;
1853     Len -= Size;
1854     UnscaledOffset += Size;
1855
1856     // We need to recompute the unscaled offset for each iteration.
1857     Dest.setOffset(OrigDest.getOffset() + UnscaledOffset);
1858     Src.setOffset(OrigSrc.getOffset() + UnscaledOffset);
1859   }
1860
1861   return true;
1862 }
1863
1864 /// \brief Check if it is possible to fold the condition from the XALU intrinsic
1865 /// into the user. The condition code will only be updated on success.
1866 bool AArch64FastISel::foldXALUIntrinsic(AArch64CC::CondCode &CC,
1867                                         const Instruction *I,
1868                                         const Value *Cond) {
1869   if (!isa<ExtractValueInst>(Cond))
1870     return false;
1871
1872   const auto *EV = cast<ExtractValueInst>(Cond);
1873   if (!isa<IntrinsicInst>(EV->getAggregateOperand()))
1874     return false;
1875
1876   const auto *II = cast<IntrinsicInst>(EV->getAggregateOperand());
1877   MVT RetVT;
1878   const Function *Callee = II->getCalledFunction();
1879   Type *RetTy =
1880   cast<StructType>(Callee->getReturnType())->getTypeAtIndex(0U);
1881   if (!isTypeLegal(RetTy, RetVT))
1882     return false;
1883
1884   if (RetVT != MVT::i32 && RetVT != MVT::i64)
1885     return false;
1886
1887   AArch64CC::CondCode TmpCC;
1888   switch (II->getIntrinsicID()) {
1889     default: return false;
1890     case Intrinsic::sadd_with_overflow:
1891     case Intrinsic::ssub_with_overflow: TmpCC = AArch64CC::VS; break;
1892     case Intrinsic::uadd_with_overflow: TmpCC = AArch64CC::HS; break;
1893     case Intrinsic::usub_with_overflow: TmpCC = AArch64CC::LO; break;
1894     case Intrinsic::smul_with_overflow:
1895     case Intrinsic::umul_with_overflow: TmpCC = AArch64CC::NE; break;
1896   }
1897
1898   // Check if both instructions are in the same basic block.
1899   if (II->getParent() != I->getParent())
1900     return false;
1901
1902   // Make sure nothing is in the way
1903   BasicBlock::const_iterator Start = I;
1904   BasicBlock::const_iterator End = II;
1905   for (auto Itr = std::prev(Start); Itr != End; --Itr) {
1906     // We only expect extractvalue instructions between the intrinsic and the
1907     // instruction to be selected.
1908     if (!isa<ExtractValueInst>(Itr))
1909       return false;
1910
1911     // Check that the extractvalue operand comes from the intrinsic.
1912     const auto *EVI = cast<ExtractValueInst>(Itr);
1913     if (EVI->getAggregateOperand() != II)
1914       return false;
1915   }
1916
1917   CC = TmpCC;
1918   return true;
1919 }
1920
1921 bool AArch64FastISel::FastLowerIntrinsicCall(const IntrinsicInst *II) {
1922   // FIXME: Handle more intrinsics.
1923   switch (II->getIntrinsicID()) {
1924   default: return false;
1925   case Intrinsic::frameaddress: {
1926     MachineFrameInfo *MFI = FuncInfo.MF->getFrameInfo();
1927     MFI->setFrameAddressIsTaken(true);
1928
1929     const AArch64RegisterInfo *RegInfo =
1930         static_cast<const AArch64RegisterInfo *>(
1931             TM.getSubtargetImpl()->getRegisterInfo());
1932     unsigned FramePtr = RegInfo->getFrameRegister(*(FuncInfo.MF));
1933     unsigned SrcReg = FramePtr;
1934
1935     // Recursively load frame address
1936     // ldr x0, [fp]
1937     // ldr x0, [x0]
1938     // ldr x0, [x0]
1939     // ...
1940     unsigned DestReg;
1941     unsigned Depth = cast<ConstantInt>(II->getOperand(0))->getZExtValue();
1942     while (Depth--) {
1943       DestReg = createResultReg(&AArch64::GPR64RegClass);
1944       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
1945               TII.get(AArch64::LDRXui), DestReg)
1946         .addReg(SrcReg).addImm(0);
1947       SrcReg = DestReg;
1948     }
1949
1950     UpdateValueMap(II, SrcReg);
1951     return true;
1952   }
1953   case Intrinsic::memcpy:
1954   case Intrinsic::memmove: {
1955     const auto *MTI = cast<MemTransferInst>(II);
1956     // Don't handle volatile.
1957     if (MTI->isVolatile())
1958       return false;
1959
1960     // Disable inlining for memmove before calls to ComputeAddress.  Otherwise,
1961     // we would emit dead code because we don't currently handle memmoves.
1962     bool IsMemCpy = (II->getIntrinsicID() == Intrinsic::memcpy);
1963     if (isa<ConstantInt>(MTI->getLength()) && IsMemCpy) {
1964       // Small memcpy's are common enough that we want to do them without a call
1965       // if possible.
1966       uint64_t Len = cast<ConstantInt>(MTI->getLength())->getZExtValue();
1967       unsigned Alignment = MTI->getAlignment();
1968       if (IsMemCpySmall(Len, Alignment)) {
1969         Address Dest, Src;
1970         if (!ComputeAddress(MTI->getRawDest(), Dest) ||
1971             !ComputeAddress(MTI->getRawSource(), Src))
1972           return false;
1973         if (TryEmitSmallMemCpy(Dest, Src, Len, Alignment))
1974           return true;
1975       }
1976     }
1977
1978     if (!MTI->getLength()->getType()->isIntegerTy(64))
1979       return false;
1980
1981     if (MTI->getSourceAddressSpace() > 255 || MTI->getDestAddressSpace() > 255)
1982       // Fast instruction selection doesn't support the special
1983       // address spaces.
1984       return false;
1985
1986     const char *IntrMemName = isa<MemCpyInst>(II) ? "memcpy" : "memmove";
1987     return LowerCallTo(II, IntrMemName, II->getNumArgOperands() - 2);
1988   }
1989   case Intrinsic::memset: {
1990     const MemSetInst *MSI = cast<MemSetInst>(II);
1991     // Don't handle volatile.
1992     if (MSI->isVolatile())
1993       return false;
1994
1995     if (!MSI->getLength()->getType()->isIntegerTy(64))
1996       return false;
1997
1998     if (MSI->getDestAddressSpace() > 255)
1999       // Fast instruction selection doesn't support the special
2000       // address spaces.
2001       return false;
2002
2003     return LowerCallTo(II, "memset", II->getNumArgOperands() - 2);
2004   }
2005   case Intrinsic::trap: {
2006     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::BRK))
2007         .addImm(1);
2008     return true;
2009   }
2010   case Intrinsic::sqrt: {
2011     Type *RetTy = II->getCalledFunction()->getReturnType();
2012
2013     MVT VT;
2014     if (!isTypeLegal(RetTy, VT))
2015       return false;
2016
2017     unsigned Op0Reg = getRegForValue(II->getOperand(0));
2018     if (!Op0Reg)
2019       return false;
2020     bool Op0IsKill = hasTrivialKill(II->getOperand(0));
2021
2022     unsigned ResultReg = FastEmit_r(VT, VT, ISD::FSQRT, Op0Reg, Op0IsKill);
2023     if (!ResultReg)
2024       return false;
2025
2026     UpdateValueMap(II, ResultReg);
2027     return true;
2028   }
2029   case Intrinsic::sadd_with_overflow:
2030   case Intrinsic::uadd_with_overflow:
2031   case Intrinsic::ssub_with_overflow:
2032   case Intrinsic::usub_with_overflow:
2033   case Intrinsic::smul_with_overflow:
2034   case Intrinsic::umul_with_overflow: {
2035     // This implements the basic lowering of the xalu with overflow intrinsics.
2036     const Function *Callee = II->getCalledFunction();
2037     auto *Ty = cast<StructType>(Callee->getReturnType());
2038     Type *RetTy = Ty->getTypeAtIndex(0U);
2039     Type *CondTy = Ty->getTypeAtIndex(1);
2040
2041     MVT VT;
2042     if (!isTypeLegal(RetTy, VT))
2043       return false;
2044
2045     if (VT != MVT::i32 && VT != MVT::i64)
2046       return false;
2047
2048     const Value *LHS = II->getArgOperand(0);
2049     const Value *RHS = II->getArgOperand(1);
2050     // Canonicalize immediate to the RHS.
2051     if (isa<ConstantInt>(LHS) && !isa<ConstantInt>(RHS) &&
2052         isCommutativeIntrinsic(II))
2053       std::swap(LHS, RHS);
2054
2055     unsigned LHSReg = getRegForValue(LHS);
2056     if (!LHSReg)
2057       return false;
2058     bool LHSIsKill = hasTrivialKill(LHS);
2059
2060     // Check if the immediate can be encoded in the instruction and if we should
2061     // invert the instruction (adds -> subs) to handle negative immediates.
2062     bool UseImm = false;
2063     bool UseInverse = false;
2064     uint64_t Imm = 0;
2065     if (const auto *C = dyn_cast<ConstantInt>(RHS)) {
2066       if (C->isNegative()) {
2067         UseInverse = true;
2068         Imm = -(C->getSExtValue());
2069       } else
2070         Imm = C->getZExtValue();
2071
2072       if (isUInt<12>(Imm))
2073         UseImm = true;
2074
2075       UseInverse = UseImm && UseInverse;
2076     }
2077
2078     static const unsigned OpcTable[2][2][2] = {
2079       { {AArch64::ADDSWrr, AArch64::ADDSXrr},
2080         {AArch64::ADDSWri, AArch64::ADDSXri} },
2081       { {AArch64::SUBSWrr, AArch64::SUBSXrr},
2082         {AArch64::SUBSWri, AArch64::SUBSXri} }
2083     };
2084     unsigned Opc = 0;
2085     unsigned MulReg = 0;
2086     unsigned RHSReg = 0;
2087     bool RHSIsKill = false;
2088     AArch64CC::CondCode CC = AArch64CC::Invalid;
2089     bool Is64Bit = VT == MVT::i64;
2090     switch (II->getIntrinsicID()) {
2091     default: llvm_unreachable("Unexpected intrinsic!");
2092     case Intrinsic::sadd_with_overflow:
2093       Opc = OpcTable[UseInverse][UseImm][Is64Bit]; CC = AArch64CC::VS; break;
2094     case Intrinsic::uadd_with_overflow:
2095       Opc = OpcTable[UseInverse][UseImm][Is64Bit]; CC = AArch64CC::HS; break;
2096     case Intrinsic::ssub_with_overflow:
2097       Opc = OpcTable[!UseInverse][UseImm][Is64Bit]; CC = AArch64CC::VS; break;
2098     case Intrinsic::usub_with_overflow:
2099       Opc = OpcTable[!UseInverse][UseImm][Is64Bit]; CC = AArch64CC::LO; break;
2100     case Intrinsic::smul_with_overflow: {
2101       CC = AArch64CC::NE;
2102       RHSReg = getRegForValue(RHS);
2103       if (!RHSReg)
2104         return false;
2105       RHSIsKill = hasTrivialKill(RHS);
2106
2107       if (VT == MVT::i32) {
2108         MulReg = Emit_SMULL_rr(MVT::i64, LHSReg, LHSIsKill, RHSReg, RHSIsKill);
2109         unsigned ShiftReg = Emit_LSR_ri(MVT::i64, MulReg, false, 32);
2110         MulReg = FastEmitInst_extractsubreg(VT, MulReg, /*IsKill=*/true,
2111                                             AArch64::sub_32);
2112         ShiftReg = FastEmitInst_extractsubreg(VT, ShiftReg, /*IsKill=*/true,
2113                                               AArch64::sub_32);
2114         unsigned CmpReg = createResultReg(TLI.getRegClassFor(VT));
2115         BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2116                 TII.get(AArch64::SUBSWrs), CmpReg)
2117           .addReg(ShiftReg, getKillRegState(true))
2118           .addReg(MulReg, getKillRegState(false))
2119           .addImm(159); // 159 <-> asr #31
2120       } else {
2121         assert(VT == MVT::i64 && "Unexpected value type.");
2122         MulReg = Emit_MUL_rr(VT, LHSReg, LHSIsKill, RHSReg, RHSIsKill);
2123         unsigned SMULHReg = FastEmit_rr(VT, VT, ISD::MULHS, LHSReg, LHSIsKill,
2124                                         RHSReg, RHSIsKill);
2125         unsigned CmpReg = createResultReg(TLI.getRegClassFor(VT));
2126         BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2127                 TII.get(AArch64::SUBSXrs), CmpReg)
2128           .addReg(SMULHReg, getKillRegState(true))
2129           .addReg(MulReg, getKillRegState(false))
2130           .addImm(191); // 191 <-> asr #63
2131       }
2132       break;
2133     }
2134     case Intrinsic::umul_with_overflow: {
2135       CC = AArch64CC::NE;
2136       RHSReg = getRegForValue(RHS);
2137       if (!RHSReg)
2138         return false;
2139       RHSIsKill = hasTrivialKill(RHS);
2140
2141       if (VT == MVT::i32) {
2142         MulReg = Emit_UMULL_rr(MVT::i64, LHSReg, LHSIsKill, RHSReg, RHSIsKill);
2143         unsigned CmpReg = createResultReg(TLI.getRegClassFor(MVT::i64));
2144         BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2145                 TII.get(AArch64::SUBSXrs), CmpReg)
2146           .addReg(AArch64::XZR, getKillRegState(true))
2147           .addReg(MulReg, getKillRegState(false))
2148           .addImm(96); // 96 <-> lsr #32
2149         MulReg = FastEmitInst_extractsubreg(VT, MulReg, /*IsKill=*/true,
2150                                             AArch64::sub_32);
2151       } else {
2152         assert(VT == MVT::i64 && "Unexpected value type.");
2153         MulReg = Emit_MUL_rr(VT, LHSReg, LHSIsKill, RHSReg, RHSIsKill);
2154         unsigned UMULHReg = FastEmit_rr(VT, VT, ISD::MULHU, LHSReg, LHSIsKill,
2155                                         RHSReg, RHSIsKill);
2156         unsigned CmpReg = createResultReg(TLI.getRegClassFor(VT));
2157         BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2158                 TII.get(AArch64::SUBSXrr), CmpReg)
2159         .addReg(AArch64::XZR, getKillRegState(true))
2160         .addReg(UMULHReg, getKillRegState(false));
2161       }
2162       break;
2163     }
2164     }
2165
2166     if (!UseImm) {
2167       RHSReg = getRegForValue(RHS);
2168       if (!RHSReg)
2169         return false;
2170       RHSIsKill = hasTrivialKill(RHS);
2171     }
2172
2173     unsigned ResultReg = createResultReg(TLI.getRegClassFor(VT));
2174     if (Opc) {
2175       MachineInstrBuilder MIB;
2176       MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc),
2177                     ResultReg)
2178               .addReg(LHSReg, getKillRegState(LHSIsKill));
2179       if (UseImm) {
2180         MIB.addImm(Imm);
2181         MIB.addImm(0);
2182       } else
2183         MIB.addReg(RHSReg, getKillRegState(RHSIsKill));
2184     }
2185     else
2186       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2187               TII.get(TargetOpcode::COPY), ResultReg)
2188         .addReg(MulReg);
2189
2190     unsigned ResultReg2 = FuncInfo.CreateRegs(CondTy);
2191     assert((ResultReg+1) == ResultReg2 && "Nonconsecutive result registers.");
2192     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::CSINCWr),
2193             ResultReg2)
2194       .addReg(AArch64::WZR, getKillRegState(true))
2195       .addReg(AArch64::WZR, getKillRegState(true))
2196       .addImm(getInvertedCondCode(CC));
2197
2198     UpdateValueMap(II, ResultReg, 2);
2199     return true;
2200   }
2201   }
2202   return false;
2203 }
2204
2205 bool AArch64FastISel::SelectRet(const Instruction *I) {
2206   const ReturnInst *Ret = cast<ReturnInst>(I);
2207   const Function &F = *I->getParent()->getParent();
2208
2209   if (!FuncInfo.CanLowerReturn)
2210     return false;
2211
2212   if (F.isVarArg())
2213     return false;
2214
2215   // Build a list of return value registers.
2216   SmallVector<unsigned, 4> RetRegs;
2217
2218   if (Ret->getNumOperands() > 0) {
2219     CallingConv::ID CC = F.getCallingConv();
2220     SmallVector<ISD::OutputArg, 4> Outs;
2221     GetReturnInfo(F.getReturnType(), F.getAttributes(), Outs, TLI);
2222
2223     // Analyze operands of the call, assigning locations to each operand.
2224     SmallVector<CCValAssign, 16> ValLocs;
2225     CCState CCInfo(CC, F.isVarArg(), *FuncInfo.MF, ValLocs, I->getContext());
2226     CCAssignFn *RetCC = CC == CallingConv::WebKit_JS ? RetCC_AArch64_WebKit_JS
2227                                                      : RetCC_AArch64_AAPCS;
2228     CCInfo.AnalyzeReturn(Outs, RetCC);
2229
2230     // Only handle a single return value for now.
2231     if (ValLocs.size() != 1)
2232       return false;
2233
2234     CCValAssign &VA = ValLocs[0];
2235     const Value *RV = Ret->getOperand(0);
2236
2237     // Don't bother handling odd stuff for now.
2238     if (VA.getLocInfo() != CCValAssign::Full)
2239       return false;
2240     // Only handle register returns for now.
2241     if (!VA.isRegLoc())
2242       return false;
2243     unsigned Reg = getRegForValue(RV);
2244     if (Reg == 0)
2245       return false;
2246
2247     unsigned SrcReg = Reg + VA.getValNo();
2248     unsigned DestReg = VA.getLocReg();
2249     // Avoid a cross-class copy. This is very unlikely.
2250     if (!MRI.getRegClass(SrcReg)->contains(DestReg))
2251       return false;
2252
2253     EVT RVEVT = TLI.getValueType(RV->getType());
2254     if (!RVEVT.isSimple())
2255       return false;
2256
2257     // Vectors (of > 1 lane) in big endian need tricky handling.
2258     if (RVEVT.isVector() && RVEVT.getVectorNumElements() > 1)
2259       return false;
2260
2261     MVT RVVT = RVEVT.getSimpleVT();
2262     if (RVVT == MVT::f128)
2263       return false;
2264     MVT DestVT = VA.getValVT();
2265     // Special handling for extended integers.
2266     if (RVVT != DestVT) {
2267       if (RVVT != MVT::i1 && RVVT != MVT::i8 && RVVT != MVT::i16)
2268         return false;
2269
2270       if (!Outs[0].Flags.isZExt() && !Outs[0].Flags.isSExt())
2271         return false;
2272
2273       bool isZExt = Outs[0].Flags.isZExt();
2274       SrcReg = EmitIntExt(RVVT, SrcReg, DestVT, isZExt);
2275       if (SrcReg == 0)
2276         return false;
2277     }
2278
2279     // Make the copy.
2280     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2281             TII.get(TargetOpcode::COPY), DestReg).addReg(SrcReg);
2282
2283     // Add register to return instruction.
2284     RetRegs.push_back(VA.getLocReg());
2285   }
2286
2287   MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2288                                     TII.get(AArch64::RET_ReallyLR));
2289   for (unsigned i = 0, e = RetRegs.size(); i != e; ++i)
2290     MIB.addReg(RetRegs[i], RegState::Implicit);
2291   return true;
2292 }
2293
2294 bool AArch64FastISel::SelectTrunc(const Instruction *I) {
2295   Type *DestTy = I->getType();
2296   Value *Op = I->getOperand(0);
2297   Type *SrcTy = Op->getType();
2298
2299   EVT SrcEVT = TLI.getValueType(SrcTy, true);
2300   EVT DestEVT = TLI.getValueType(DestTy, true);
2301   if (!SrcEVT.isSimple())
2302     return false;
2303   if (!DestEVT.isSimple())
2304     return false;
2305
2306   MVT SrcVT = SrcEVT.getSimpleVT();
2307   MVT DestVT = DestEVT.getSimpleVT();
2308
2309   if (SrcVT != MVT::i64 && SrcVT != MVT::i32 && SrcVT != MVT::i16 &&
2310       SrcVT != MVT::i8)
2311     return false;
2312   if (DestVT != MVT::i32 && DestVT != MVT::i16 && DestVT != MVT::i8 &&
2313       DestVT != MVT::i1)
2314     return false;
2315
2316   unsigned SrcReg = getRegForValue(Op);
2317   if (!SrcReg)
2318     return false;
2319
2320   // If we're truncating from i64 to a smaller non-legal type then generate an
2321   // AND.  Otherwise, we know the high bits are undefined and a truncate doesn't
2322   // generate any code.
2323   if (SrcVT == MVT::i64) {
2324     uint64_t Mask = 0;
2325     switch (DestVT.SimpleTy) {
2326     default:
2327       // Trunc i64 to i32 is handled by the target-independent fast-isel.
2328       return false;
2329     case MVT::i1:
2330       Mask = 0x1;
2331       break;
2332     case MVT::i8:
2333       Mask = 0xff;
2334       break;
2335     case MVT::i16:
2336       Mask = 0xffff;
2337       break;
2338     }
2339     // Issue an extract_subreg to get the lower 32-bits.
2340     unsigned Reg32 = FastEmitInst_extractsubreg(MVT::i32, SrcReg, /*Kill=*/true,
2341                                                 AArch64::sub_32);
2342     MRI.constrainRegClass(Reg32, &AArch64::GPR32RegClass);
2343     // Create the AND instruction which performs the actual truncation.
2344     unsigned ANDReg = createResultReg(&AArch64::GPR32spRegClass);
2345     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ANDWri),
2346             ANDReg)
2347         .addReg(Reg32)
2348         .addImm(AArch64_AM::encodeLogicalImmediate(Mask, 32));
2349     SrcReg = ANDReg;
2350   }
2351
2352   UpdateValueMap(I, SrcReg);
2353   return true;
2354 }
2355
2356 unsigned AArch64FastISel::Emiti1Ext(unsigned SrcReg, MVT DestVT, bool isZExt) {
2357   assert((DestVT == MVT::i8 || DestVT == MVT::i16 || DestVT == MVT::i32 ||
2358           DestVT == MVT::i64) &&
2359          "Unexpected value type.");
2360   // Handle i8 and i16 as i32.
2361   if (DestVT == MVT::i8 || DestVT == MVT::i16)
2362     DestVT = MVT::i32;
2363
2364   if (isZExt) {
2365     MRI.constrainRegClass(SrcReg, &AArch64::GPR32RegClass);
2366     unsigned ResultReg = createResultReg(&AArch64::GPR32spRegClass);
2367     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ANDWri),
2368             ResultReg)
2369         .addReg(SrcReg)
2370         .addImm(AArch64_AM::encodeLogicalImmediate(1, 32));
2371
2372     if (DestVT == MVT::i64) {
2373       // We're ZExt i1 to i64.  The ANDWri Wd, Ws, #1 implicitly clears the
2374       // upper 32 bits.  Emit a SUBREG_TO_REG to extend from Wd to Xd.
2375       unsigned Reg64 = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
2376       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2377               TII.get(AArch64::SUBREG_TO_REG), Reg64)
2378           .addImm(0)
2379           .addReg(ResultReg)
2380           .addImm(AArch64::sub_32);
2381       ResultReg = Reg64;
2382     }
2383     return ResultReg;
2384   } else {
2385     if (DestVT == MVT::i64) {
2386       // FIXME: We're SExt i1 to i64.
2387       return 0;
2388     }
2389     unsigned ResultReg = createResultReg(&AArch64::GPR32RegClass);
2390     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::SBFMWri),
2391             ResultReg)
2392         .addReg(SrcReg)
2393         .addImm(0)
2394         .addImm(0);
2395     return ResultReg;
2396   }
2397 }
2398
2399 unsigned AArch64FastISel::Emit_MUL_rr(MVT RetVT, unsigned Op0, bool Op0IsKill,
2400                                       unsigned Op1, bool Op1IsKill) {
2401   unsigned Opc, ZReg;
2402   switch (RetVT.SimpleTy) {
2403   default: return 0;
2404   case MVT::i8:
2405   case MVT::i16:
2406   case MVT::i32:
2407     RetVT = MVT::i32;
2408     Opc = AArch64::MADDWrrr; ZReg = AArch64::WZR; break;
2409   case MVT::i64:
2410     Opc = AArch64::MADDXrrr; ZReg = AArch64::XZR; break;
2411   }
2412
2413   // Create the base instruction, then add the operands.
2414   unsigned ResultReg = createResultReg(TLI.getRegClassFor(RetVT));
2415   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg)
2416     .addReg(Op0, getKillRegState(Op0IsKill))
2417     .addReg(Op1, getKillRegState(Op1IsKill))
2418     .addReg(ZReg, getKillRegState(true));
2419
2420   return ResultReg;
2421 }
2422
2423 unsigned AArch64FastISel::Emit_SMULL_rr(MVT RetVT, unsigned Op0, bool Op0IsKill,
2424                                         unsigned Op1, bool Op1IsKill) {
2425   if (RetVT != MVT::i64)
2426     return 0;
2427
2428   // Create the base instruction, then add the operands.
2429   unsigned ResultReg = createResultReg(&AArch64::GPR64RegClass);
2430   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::SMADDLrrr),
2431           ResultReg)
2432     .addReg(Op0, getKillRegState(Op0IsKill))
2433     .addReg(Op1, getKillRegState(Op1IsKill))
2434     .addReg(AArch64::XZR, getKillRegState(true));
2435
2436   return ResultReg;
2437 }
2438
2439 unsigned AArch64FastISel::Emit_UMULL_rr(MVT RetVT, unsigned Op0, bool Op0IsKill,
2440                                         unsigned Op1, bool Op1IsKill) {
2441   if (RetVT != MVT::i64)
2442     return 0;
2443
2444   // Create the base instruction, then add the operands.
2445   unsigned ResultReg = createResultReg(&AArch64::GPR64RegClass);
2446   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::UMADDLrrr),
2447           ResultReg)
2448     .addReg(Op0, getKillRegState(Op0IsKill))
2449     .addReg(Op1, getKillRegState(Op1IsKill))
2450     .addReg(AArch64::XZR, getKillRegState(true));
2451
2452   return ResultReg;
2453 }
2454
2455 unsigned AArch64FastISel::Emit_LSL_ri(MVT RetVT, unsigned Op0, bool Op0IsKill,
2456                                       uint64_t Shift) {
2457   unsigned Opc, ImmR, ImmS;
2458   switch (RetVT.SimpleTy) {
2459   default: return 0;
2460   case MVT::i8:
2461     Opc = AArch64::UBFMWri; ImmR = -Shift % 32; ImmS =  7 - Shift; break;
2462   case MVT::i16:
2463     Opc = AArch64::UBFMWri; ImmR = -Shift % 32; ImmS = 15 - Shift; break;
2464   case MVT::i32:
2465     Opc = AArch64::UBFMWri; ImmR = -Shift % 32; ImmS = 31 - Shift; break;
2466   case MVT::i64:
2467     Opc = AArch64::UBFMXri; ImmR = -Shift % 64; ImmS = 63 - Shift; break;
2468   }
2469
2470   RetVT.SimpleTy = std::max(MVT::i32, RetVT.SimpleTy);
2471   return FastEmitInst_rii(Opc, TLI.getRegClassFor(RetVT), Op0, Op0IsKill, ImmR,
2472                           ImmS);
2473 }
2474
2475 unsigned AArch64FastISel::Emit_LSR_ri(MVT RetVT, unsigned Op0, bool Op0IsKill,
2476                                       uint64_t Shift) {
2477   unsigned Opc, ImmS;
2478   switch (RetVT.SimpleTy) {
2479   default: return 0;
2480   case MVT::i8:  Opc = AArch64::UBFMWri; ImmS =  7; break;
2481   case MVT::i16: Opc = AArch64::UBFMWri; ImmS = 15; break;
2482   case MVT::i32: Opc = AArch64::UBFMWri; ImmS = 31; break;
2483   case MVT::i64: Opc = AArch64::UBFMXri; ImmS = 63; break;
2484   }
2485
2486   RetVT.SimpleTy = std::max(MVT::i32, RetVT.SimpleTy);
2487   return FastEmitInst_rii(Opc, TLI.getRegClassFor(RetVT), Op0, Op0IsKill, Shift,
2488                           ImmS);
2489 }
2490
2491 unsigned AArch64FastISel::Emit_ASR_ri(MVT RetVT, unsigned Op0, bool Op0IsKill,
2492                                       uint64_t Shift) {
2493   unsigned Opc, ImmS;
2494   switch (RetVT.SimpleTy) {
2495   default: return 0;
2496   case MVT::i8:  Opc = AArch64::SBFMWri; ImmS =  7; break;
2497   case MVT::i16: Opc = AArch64::SBFMWri; ImmS = 15; break;
2498   case MVT::i32: Opc = AArch64::SBFMWri; ImmS = 31; break;
2499   case MVT::i64: Opc = AArch64::SBFMXri; ImmS = 63; break;
2500   }
2501
2502   RetVT.SimpleTy = std::max(MVT::i32, RetVT.SimpleTy);
2503   return FastEmitInst_rii(Opc, TLI.getRegClassFor(RetVT), Op0, Op0IsKill, Shift,
2504                           ImmS);
2505 }
2506
2507 unsigned AArch64FastISel::EmitIntExt(MVT SrcVT, unsigned SrcReg, MVT DestVT,
2508                                      bool isZExt) {
2509   assert(DestVT != MVT::i1 && "ZeroExt/SignExt an i1?");
2510
2511   // FastISel does not have plumbing to deal with extensions where the SrcVT or
2512   // DestVT are odd things, so test to make sure that they are both types we can
2513   // handle (i1/i8/i16/i32 for SrcVT and i8/i16/i32/i64 for DestVT), otherwise
2514   // bail out to SelectionDAG.
2515   if (((DestVT != MVT::i8) && (DestVT != MVT::i16) &&
2516        (DestVT != MVT::i32) && (DestVT != MVT::i64)) ||
2517       ((SrcVT !=  MVT::i1) && (SrcVT !=  MVT::i8) &&
2518        (SrcVT !=  MVT::i16) && (SrcVT !=  MVT::i32)))
2519     return 0;
2520
2521   unsigned Opc;
2522   unsigned Imm = 0;
2523
2524   switch (SrcVT.SimpleTy) {
2525   default:
2526     return 0;
2527   case MVT::i1:
2528     return Emiti1Ext(SrcReg, DestVT, isZExt);
2529   case MVT::i8:
2530     if (DestVT == MVT::i64)
2531       Opc = isZExt ? AArch64::UBFMXri : AArch64::SBFMXri;
2532     else
2533       Opc = isZExt ? AArch64::UBFMWri : AArch64::SBFMWri;
2534     Imm = 7;
2535     break;
2536   case MVT::i16:
2537     if (DestVT == MVT::i64)
2538       Opc = isZExt ? AArch64::UBFMXri : AArch64::SBFMXri;
2539     else
2540       Opc = isZExt ? AArch64::UBFMWri : AArch64::SBFMWri;
2541     Imm = 15;
2542     break;
2543   case MVT::i32:
2544     assert(DestVT == MVT::i64 && "IntExt i32 to i32?!?");
2545     Opc = isZExt ? AArch64::UBFMXri : AArch64::SBFMXri;
2546     Imm = 31;
2547     break;
2548   }
2549
2550   // Handle i8 and i16 as i32.
2551   if (DestVT == MVT::i8 || DestVT == MVT::i16)
2552     DestVT = MVT::i32;
2553   else if (DestVT == MVT::i64) {
2554     unsigned Src64 = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
2555     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2556             TII.get(AArch64::SUBREG_TO_REG), Src64)
2557         .addImm(0)
2558         .addReg(SrcReg)
2559         .addImm(AArch64::sub_32);
2560     SrcReg = Src64;
2561   }
2562
2563   unsigned ResultReg = createResultReg(TLI.getRegClassFor(DestVT));
2564   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg)
2565       .addReg(SrcReg)
2566       .addImm(0)
2567       .addImm(Imm);
2568
2569   return ResultReg;
2570 }
2571
2572 bool AArch64FastISel::SelectIntExt(const Instruction *I) {
2573   // On ARM, in general, integer casts don't involve legal types; this code
2574   // handles promotable integers.  The high bits for a type smaller than
2575   // the register size are assumed to be undefined.
2576   Type *DestTy = I->getType();
2577   Value *Src = I->getOperand(0);
2578   Type *SrcTy = Src->getType();
2579
2580   bool isZExt = isa<ZExtInst>(I);
2581   unsigned SrcReg = getRegForValue(Src);
2582   if (!SrcReg)
2583     return false;
2584
2585   EVT SrcEVT = TLI.getValueType(SrcTy, true);
2586   EVT DestEVT = TLI.getValueType(DestTy, true);
2587   if (!SrcEVT.isSimple())
2588     return false;
2589   if (!DestEVT.isSimple())
2590     return false;
2591
2592   MVT SrcVT = SrcEVT.getSimpleVT();
2593   MVT DestVT = DestEVT.getSimpleVT();
2594   unsigned ResultReg = 0;
2595
2596   // Check if it is an argument and if it is already zero/sign-extended.
2597   if (const auto *Arg = dyn_cast<Argument>(Src)) {
2598     if ((isZExt && Arg->hasZExtAttr()) || (!isZExt && Arg->hasSExtAttr())) {
2599       if (DestVT == MVT::i64) {
2600         ResultReg = createResultReg(TLI.getRegClassFor(DestVT));
2601         BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2602                 TII.get(AArch64::SUBREG_TO_REG), ResultReg)
2603           .addImm(0)
2604           .addReg(SrcReg)
2605           .addImm(AArch64::sub_32);
2606       } else
2607         ResultReg = SrcReg;
2608     }
2609   }
2610
2611   if (!ResultReg)
2612     ResultReg = EmitIntExt(SrcVT, SrcReg, DestVT, isZExt);
2613
2614   if (!ResultReg)
2615     return false;
2616
2617   UpdateValueMap(I, ResultReg);
2618   return true;
2619 }
2620
2621 bool AArch64FastISel::SelectRem(const Instruction *I, unsigned ISDOpcode) {
2622   EVT DestEVT = TLI.getValueType(I->getType(), true);
2623   if (!DestEVT.isSimple())
2624     return false;
2625
2626   MVT DestVT = DestEVT.getSimpleVT();
2627   if (DestVT != MVT::i64 && DestVT != MVT::i32)
2628     return false;
2629
2630   unsigned DivOpc;
2631   bool is64bit = (DestVT == MVT::i64);
2632   switch (ISDOpcode) {
2633   default:
2634     return false;
2635   case ISD::SREM:
2636     DivOpc = is64bit ? AArch64::SDIVXr : AArch64::SDIVWr;
2637     break;
2638   case ISD::UREM:
2639     DivOpc = is64bit ? AArch64::UDIVXr : AArch64::UDIVWr;
2640     break;
2641   }
2642   unsigned MSubOpc = is64bit ? AArch64::MSUBXrrr : AArch64::MSUBWrrr;
2643   unsigned Src0Reg = getRegForValue(I->getOperand(0));
2644   if (!Src0Reg)
2645     return false;
2646
2647   unsigned Src1Reg = getRegForValue(I->getOperand(1));
2648   if (!Src1Reg)
2649     return false;
2650
2651   unsigned QuotReg = createResultReg(TLI.getRegClassFor(DestVT));
2652   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(DivOpc), QuotReg)
2653       .addReg(Src0Reg)
2654       .addReg(Src1Reg);
2655   // The remainder is computed as numerator - (quotient * denominator) using the
2656   // MSUB instruction.
2657   unsigned ResultReg = createResultReg(TLI.getRegClassFor(DestVT));
2658   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(MSubOpc), ResultReg)
2659       .addReg(QuotReg)
2660       .addReg(Src1Reg)
2661       .addReg(Src0Reg);
2662   UpdateValueMap(I, ResultReg);
2663   return true;
2664 }
2665
2666 bool AArch64FastISel::SelectMul(const Instruction *I) {
2667   EVT SrcEVT = TLI.getValueType(I->getOperand(0)->getType(), true);
2668   if (!SrcEVT.isSimple())
2669     return false;
2670   MVT SrcVT = SrcEVT.getSimpleVT();
2671
2672   // Must be simple value type.  Don't handle vectors.
2673   if (SrcVT != MVT::i64 && SrcVT != MVT::i32 && SrcVT != MVT::i16 &&
2674       SrcVT != MVT::i8)
2675     return false;
2676
2677   unsigned Src0Reg = getRegForValue(I->getOperand(0));
2678   if (!Src0Reg)
2679     return false;
2680   bool Src0IsKill = hasTrivialKill(I->getOperand(0));
2681
2682   unsigned Src1Reg = getRegForValue(I->getOperand(1));
2683   if (!Src1Reg)
2684     return false;
2685   bool Src1IsKill = hasTrivialKill(I->getOperand(1));
2686
2687   unsigned ResultReg =
2688     Emit_MUL_rr(SrcVT, Src0Reg, Src0IsKill, Src1Reg, Src1IsKill);
2689
2690   if (!ResultReg)
2691     return false;
2692
2693   UpdateValueMap(I, ResultReg);
2694   return true;
2695 }
2696
2697 bool AArch64FastISel::SelectShift(const Instruction *I, bool IsLeftShift,
2698                                   bool IsArithmetic) {
2699   EVT RetEVT = TLI.getValueType(I->getType(), true);
2700   if (!RetEVT.isSimple())
2701     return false;
2702   MVT RetVT = RetEVT.getSimpleVT();
2703
2704   if (!isa<ConstantInt>(I->getOperand(1)))
2705     return false;
2706
2707   unsigned Op0Reg = getRegForValue(I->getOperand(0));
2708   if (!Op0Reg)
2709     return false;
2710   bool Op0IsKill = hasTrivialKill(I->getOperand(0));
2711
2712   uint64_t ShiftVal = cast<ConstantInt>(I->getOperand(1))->getZExtValue();
2713
2714   unsigned ResultReg;
2715   if (IsLeftShift)
2716     ResultReg = Emit_LSL_ri(RetVT, Op0Reg, Op0IsKill, ShiftVal);
2717   else {
2718     if (IsArithmetic)
2719       ResultReg = Emit_ASR_ri(RetVT, Op0Reg, Op0IsKill, ShiftVal);
2720     else
2721       ResultReg = Emit_LSR_ri(RetVT, Op0Reg, Op0IsKill, ShiftVal);
2722   }
2723
2724   if (!ResultReg)
2725     return false;
2726
2727   UpdateValueMap(I, ResultReg);
2728   return true;
2729 }
2730
2731 bool AArch64FastISel::SelectBitCast(const Instruction *I) {
2732   MVT RetVT, SrcVT;
2733
2734   if (!isTypeLegal(I->getOperand(0)->getType(), SrcVT))
2735     return false;
2736   if (!isTypeLegal(I->getType(), RetVT))
2737     return false;
2738
2739   unsigned Opc;
2740   if (RetVT == MVT::f32 && SrcVT == MVT::i32)
2741     Opc = AArch64::FMOVWSr;
2742   else if (RetVT == MVT::f64 && SrcVT == MVT::i64)
2743     Opc = AArch64::FMOVXDr;
2744   else if (RetVT == MVT::i32 && SrcVT == MVT::f32)
2745     Opc = AArch64::FMOVSWr;
2746   else if (RetVT == MVT::i64 && SrcVT == MVT::f64)
2747     Opc = AArch64::FMOVDXr;
2748   else
2749     return false;
2750
2751   unsigned Op0Reg = getRegForValue(I->getOperand(0));
2752   if (!Op0Reg)
2753     return false;
2754   bool Op0IsKill = hasTrivialKill(I->getOperand(0));
2755   unsigned ResultReg = FastEmitInst_r(Opc, TLI.getRegClassFor(RetVT),
2756                                       Op0Reg, Op0IsKill);
2757
2758   if (!ResultReg)
2759     return false;
2760
2761   UpdateValueMap(I, ResultReg);
2762   return true;
2763 }
2764
2765 bool AArch64FastISel::TargetSelectInstruction(const Instruction *I) {
2766   switch (I->getOpcode()) {
2767   default:
2768     break;
2769   case Instruction::Load:
2770     return SelectLoad(I);
2771   case Instruction::Store:
2772     return SelectStore(I);
2773   case Instruction::Br:
2774     return SelectBranch(I);
2775   case Instruction::IndirectBr:
2776     return SelectIndirectBr(I);
2777   case Instruction::FCmp:
2778   case Instruction::ICmp:
2779     return SelectCmp(I);
2780   case Instruction::Select:
2781     return SelectSelect(I);
2782   case Instruction::FPExt:
2783     return SelectFPExt(I);
2784   case Instruction::FPTrunc:
2785     return SelectFPTrunc(I);
2786   case Instruction::FPToSI:
2787     return SelectFPToInt(I, /*Signed=*/true);
2788   case Instruction::FPToUI:
2789     return SelectFPToInt(I, /*Signed=*/false);
2790   case Instruction::SIToFP:
2791     return SelectIntToFP(I, /*Signed=*/true);
2792   case Instruction::UIToFP:
2793     return SelectIntToFP(I, /*Signed=*/false);
2794   case Instruction::SRem:
2795     return SelectRem(I, ISD::SREM);
2796   case Instruction::URem:
2797     return SelectRem(I, ISD::UREM);
2798   case Instruction::Ret:
2799     return SelectRet(I);
2800   case Instruction::Trunc:
2801     return SelectTrunc(I);
2802   case Instruction::ZExt:
2803   case Instruction::SExt:
2804     return SelectIntExt(I);
2805
2806   // FIXME: All of these should really be handled by the target-independent
2807   // selector -> improve FastISel tblgen.
2808   case Instruction::Mul:
2809     return SelectMul(I);
2810   case Instruction::Shl:
2811       return SelectShift(I, /*IsLeftShift=*/true, /*IsArithmetic=*/false);
2812   case Instruction::LShr:
2813     return SelectShift(I, /*IsLeftShift=*/false, /*IsArithmetic=*/false);
2814   case Instruction::AShr:
2815     return SelectShift(I, /*IsLeftShift=*/false, /*IsArithmetic=*/true);
2816   case Instruction::BitCast:
2817     return SelectBitCast(I);
2818   }
2819   return false;
2820   // Silence warnings.
2821   (void)&CC_AArch64_DarwinPCS_VarArg;
2822 }
2823
2824 namespace llvm {
2825 llvm::FastISel *AArch64::createFastISel(FunctionLoweringInfo &funcInfo,
2826                                         const TargetLibraryInfo *libInfo) {
2827   return new AArch64FastISel(funcInfo, libInfo);
2828 }
2829 }