lib/Target/AArch64/AArch64FastISel.cpp

   1 //===-- AArch6464FastISel.cpp - AArch64 FastISel implementation -----------===//
   2 //
   3 //                     The LLVM Compiler Infrastructure
   4 //
   5 // This file is distributed under the University of Illinois Open Source
   6 // License. See LICENSE.TXT for details.
   7 //
   8 //===----------------------------------------------------------------------===//
   9 //
  10 // This file defines the AArch64-specific support for the FastISel class. Some
  11 // of the target-specific code is generated by tablegen in the file
  12 // AArch64GenFastISel.inc, which is #included here.
  13 //
  14 //===----------------------------------------------------------------------===//
  15
  16 #include "AArch64.h"
  17 #include "AArch64Subtarget.h"
  18 #include "AArch64TargetMachine.h"
  19 #include "MCTargetDesc/AArch64AddressingModes.h"
  20 #include "llvm/Analysis/BranchProbabilityInfo.h"
  21 #include "llvm/CodeGen/CallingConvLower.h"
  22 #include "llvm/CodeGen/FastISel.h"
  23 #include "llvm/CodeGen/FunctionLoweringInfo.h"
  24 #include "llvm/CodeGen/MachineConstantPool.h"
  25 #include "llvm/CodeGen/MachineFrameInfo.h"
  26 #include "llvm/CodeGen/MachineInstrBuilder.h"
  27 #include "llvm/CodeGen/MachineRegisterInfo.h"
  28 #include "llvm/IR/CallingConv.h"
  29 #include "llvm/IR/DataLayout.h"
  30 #include "llvm/IR/DerivedTypes.h"
  31 #include "llvm/IR/Function.h"
  32 #include "llvm/IR/GetElementPtrTypeIterator.h"
  33 #include "llvm/IR/GlobalAlias.h"
  34 #include "llvm/IR/GlobalVariable.h"
  35 #include "llvm/IR/Instructions.h"
  36 #include "llvm/IR/IntrinsicInst.h"
  37 #include "llvm/IR/Operator.h"
  38 #include "llvm/Support/CommandLine.h"
  39 using namespace llvm;
  40
  41 namespace {
  42
  43 class AArch64FastISel : public FastISel {
  44   class Address {
  45   public:
  46     typedef enum {
  47       RegBase,
  48       FrameIndexBase
  49     } BaseKind;
  50
  51   private:
  52     BaseKind Kind;
  53     AArch64_AM::ShiftExtendType ExtType;
  54     union {
  55       unsigned Reg;
  56       int FI;
  57     } Base;
  58     unsigned OffsetReg;
  59     unsigned Shift;
  60     int64_t Offset;
  61     const GlobalValue *GV;
  62
  63   public:
  64     Address() : Kind(RegBase), ExtType(AArch64_AM::InvalidShiftExtend),
  65       OffsetReg(0), Shift(0), Offset(0), GV(nullptr) { Base.Reg = 0; }
  66     void setKind(BaseKind K) { Kind = K; }
  67     BaseKind getKind() const { return Kind; }
  68     void setExtendType(AArch64_AM::ShiftExtendType E) { ExtType = E; }
  69     AArch64_AM::ShiftExtendType getExtendType() const { return ExtType; }
  70     bool isRegBase() const { return Kind == RegBase; }
  71     bool isFIBase() const { return Kind == FrameIndexBase; }
  72     void setReg(unsigned Reg) {
  73       assert(isRegBase() && "Invalid base register access!");
  74       Base.Reg = Reg;
  75     }
  76     unsigned getReg() const {
  77       assert(isRegBase() && "Invalid base register access!");
  78       return Base.Reg;
  79     }
  80     void setOffsetReg(unsigned Reg) {
  81       assert(isRegBase() && "Invalid offset register access!");
  82       OffsetReg = Reg;
  83     }
  84     unsigned getOffsetReg() const {
  85       assert(isRegBase() && "Invalid offset register access!");
  86       return OffsetReg;
  87     }
  88     void setFI(unsigned FI) {
  89       assert(isFIBase() && "Invalid base frame index  access!");
  90       Base.FI = FI;
  91     }
  92     unsigned getFI() const {
  93       assert(isFIBase() && "Invalid base frame index access!");
  94       return Base.FI;
  95     }
  96     void setOffset(int64_t O) { Offset = O; }
  97     int64_t getOffset() { return Offset; }
  98     void setShift(unsigned S) { Shift = S; }
  99     unsigned getShift() { return Shift; }
 100
 101     void setGlobalValue(const GlobalValue *G) { GV = G; }
 102     const GlobalValue *getGlobalValue() { return GV; }
 103   };
 104
 105   /// Subtarget - Keep a pointer to the AArch64Subtarget around so that we can
 106   /// make the right decision when generating code for different targets.
 107   const AArch64Subtarget *Subtarget;
 108   LLVMContext *Context;
 109
 110   bool FastLowerArguments() override;
 111   bool FastLowerCall(CallLoweringInfo &CLI) override;
 112   bool FastLowerIntrinsicCall(const IntrinsicInst *II) override;
 113
 114 private:
 115   // Selection routines.
 116   bool SelectLoad(const Instruction *I);
 117   bool SelectStore(const Instruction *I);
 118   bool SelectBranch(const Instruction *I);
 119   bool SelectIndirectBr(const Instruction *I);
 120   bool SelectCmp(const Instruction *I);
 121   bool SelectSelect(const Instruction *I);
 122   bool SelectFPExt(const Instruction *I);
 123   bool SelectFPTrunc(const Instruction *I);
 124   bool SelectFPToInt(const Instruction *I, bool Signed);
 125   bool SelectIntToFP(const Instruction *I, bool Signed);
 126   bool SelectRem(const Instruction *I, unsigned ISDOpcode);
 127   bool SelectRet(const Instruction *I);
 128   bool SelectTrunc(const Instruction *I);
 129   bool SelectIntExt(const Instruction *I);
 130   bool SelectMul(const Instruction *I);
 131   bool SelectShift(const Instruction *I, bool IsLeftShift, bool IsArithmetic);
 132   bool SelectBitCast(const Instruction *I);
 133
 134   // Utility helper routines.
 135   bool isTypeLegal(Type *Ty, MVT &VT);
 136   bool isLoadStoreTypeLegal(Type *Ty, MVT &VT);
 137   bool ComputeAddress(const Value *Obj, Address &Addr, Type *Ty = nullptr);
 138   bool ComputeCallAddress(const Value *V, Address &Addr);
 139   bool SimplifyAddress(Address &Addr, MVT VT);
 140   void AddLoadStoreOperands(Address &Addr, const MachineInstrBuilder &MIB,
 141                             unsigned Flags, unsigned ScaleFactor,
 142                             MachineMemOperand *MMO);
 143   bool IsMemCpySmall(uint64_t Len, unsigned Alignment);
 144   bool TryEmitSmallMemCpy(Address Dest, Address Src, uint64_t Len,
 145                           unsigned Alignment);
 146   bool foldXALUIntrinsic(AArch64CC::CondCode &CC, const Instruction *I,
 147                          const Value *Cond);
 148
 149   // Emit functions.
 150   bool EmitCmp(Value *Src1Value, Value *Src2Value, bool isZExt);
 151   bool EmitLoad(MVT VT, unsigned &ResultReg, Address Addr,
 152                 MachineMemOperand *MMO = nullptr);
 153   bool EmitStore(MVT VT, unsigned SrcReg, Address Addr,
 154                  MachineMemOperand *MMO = nullptr);
 155   unsigned EmitIntExt(MVT SrcVT, unsigned SrcReg, MVT DestVT, bool isZExt);
 156   unsigned Emiti1Ext(unsigned SrcReg, MVT DestVT, bool isZExt);
 157   unsigned Emit_MUL_rr(MVT RetVT, unsigned Op0, bool Op0IsKill,
 158                        unsigned Op1, bool Op1IsKill);
 159   unsigned Emit_SMULL_rr(MVT RetVT, unsigned Op0, bool Op0IsKill,
 160                          unsigned Op1, bool Op1IsKill);
 161   unsigned Emit_UMULL_rr(MVT RetVT, unsigned Op0, bool Op0IsKill,
 162                          unsigned Op1, bool Op1IsKill);
 163   unsigned Emit_LSL_ri(MVT RetVT, unsigned Op0, bool Op0IsKill, uint64_t Imm);
 164   unsigned Emit_LSR_ri(MVT RetVT, unsigned Op0, bool Op0IsKill, uint64_t Imm);
 165   unsigned Emit_ASR_ri(MVT RetVT, unsigned Op0, bool Op0IsKill, uint64_t Imm);
 166
 167   unsigned AArch64MaterializeInt(const ConstantInt *CI, MVT VT);
 168   unsigned AArch64MaterializeFP(const ConstantFP *CFP, MVT VT);
 169   unsigned AArch64MaterializeGV(const GlobalValue *GV);
 170
 171   // Call handling routines.
 172 private:
 173   CCAssignFn *CCAssignFnForCall(CallingConv::ID CC) const;
 174   bool ProcessCallArgs(CallLoweringInfo &CLI, SmallVectorImpl<MVT> &ArgVTs,
 175                        unsigned &NumBytes);
 176   bool FinishCall(CallLoweringInfo &CLI, MVT RetVT, unsigned NumBytes);
 177
 178 public:
 179   // Backend specific FastISel code.
 180   unsigned TargetMaterializeAlloca(const AllocaInst *AI) override;
 181   unsigned TargetMaterializeConstant(const Constant *C) override;
 182
 183   explicit AArch64FastISel(FunctionLoweringInfo &funcInfo,
 184                          const TargetLibraryInfo *libInfo)
 185       : FastISel(funcInfo, libInfo) {
 186     Subtarget = &TM.getSubtarget<AArch64Subtarget>();
 187     Context = &funcInfo.Fn->getContext();
 188   }
 189
 190   bool TargetSelectInstruction(const Instruction *I) override;
 191
 192 #include "AArch64GenFastISel.inc"
 193 };
 194
 195 } // end anonymous namespace
 196
 197 #include "AArch64GenCallingConv.inc"
 198
 199 CCAssignFn *AArch64FastISel::CCAssignFnForCall(CallingConv::ID CC) const {
 200   if (CC == CallingConv::WebKit_JS)
 201     return CC_AArch64_WebKit_JS;
 202   return Subtarget->isTargetDarwin() ? CC_AArch64_DarwinPCS : CC_AArch64_AAPCS;
 203 }
 204
 205 unsigned AArch64FastISel::TargetMaterializeAlloca(const AllocaInst *AI) {
 206   assert(TLI.getValueType(AI->getType(), true) == MVT::i64 &&
 207          "Alloca should always return a pointer.");
 208
 209   // Don't handle dynamic allocas.
 210   if (!FuncInfo.StaticAllocaMap.count(AI))
 211     return 0;
 212
 213   DenseMap<const AllocaInst *, int>::iterator SI =
 214       FuncInfo.StaticAllocaMap.find(AI);
 215
 216   if (SI != FuncInfo.StaticAllocaMap.end()) {
 217     unsigned ResultReg = createResultReg(&AArch64::GPR64RegClass);
 218     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADDXri),
 219             ResultReg)
 220         .addFrameIndex(SI->second)
 221         .addImm(0)
 222         .addImm(0);
 223     return ResultReg;
 224   }
 225
 226   return 0;
 227 }
 228
 229 unsigned AArch64FastISel::AArch64MaterializeInt(const ConstantInt *CI, MVT VT) {
 230   if (VT > MVT::i64)
 231     return 0;
 232
 233   if (!CI->isZero())
 234     return FastEmit_i(VT, VT, ISD::Constant, CI->getZExtValue());
 235
 236   // Create a copy from the zero register to materialize a "0" value.
 237   const TargetRegisterClass *RC = (VT == MVT::i64) ? &AArch64::GPR64RegClass
 238                                                    : &AArch64::GPR32RegClass;
 239   unsigned ZeroReg = (VT == MVT::i64) ? AArch64::XZR : AArch64::WZR;
 240   unsigned ResultReg = createResultReg(RC);
 241   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
 242           TII.get(TargetOpcode::COPY), ResultReg)
 243     .addReg(ZeroReg, getKillRegState(true));
 244   return ResultReg;
 245 }
 246
 247 unsigned AArch64FastISel::AArch64MaterializeFP(const ConstantFP *CFP, MVT VT) {
 248   if (VT != MVT::f32 && VT != MVT::f64)
 249     return 0;
 250
 251   const APFloat Val = CFP->getValueAPF();
 252   bool Is64Bit = (VT == MVT::f64);
 253
 254   // This checks to see if we can use FMOV instructions to materialize
 255   // a constant, otherwise we have to materialize via the constant pool.
 256   if (TLI.isFPImmLegal(Val, VT)) {
 257     int Imm = Is64Bit ? AArch64_AM::getFP64Imm(Val)
 258                       : AArch64_AM::getFP32Imm(Val);
 259     unsigned Opc = Is64Bit ? AArch64::FMOVDi : AArch64::FMOVSi;
 260     unsigned ResultReg = createResultReg(TLI.getRegClassFor(VT));
 261     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg)
 262       .addImm(Imm);
 263     return ResultReg;
 264   }
 265
 266   // Materialize via constant pool.  MachineConstantPool wants an explicit
 267   // alignment.
 268   unsigned Align = DL.getPrefTypeAlignment(CFP->getType());
 269   if (Align == 0)
 270     Align = DL.getTypeAllocSize(CFP->getType());
 271
 272   unsigned CPI = MCP.getConstantPoolIndex(cast<Constant>(CFP), Align);
 273   unsigned ADRPReg = createResultReg(&AArch64::GPR64commonRegClass);
 274   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADRP),
 275           ADRPReg)
 276     .addConstantPoolIndex(CPI, 0, AArch64II::MO_PAGE);
 277
 278   unsigned Opc = Is64Bit ? AArch64::LDRDui : AArch64::LDRSui;
 279   unsigned ResultReg = createResultReg(TLI.getRegClassFor(VT));
 280   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg)
 281     .addReg(ADRPReg)
 282     .addConstantPoolIndex(CPI, 0, AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
 283   return ResultReg;
 284 }
 285
 286 unsigned AArch64FastISel::AArch64MaterializeGV(const GlobalValue *GV) {
 287   // We can't handle thread-local variables quickly yet.
 288   if (GV->isThreadLocal())
 289     return 0;
 290
 291   // MachO still uses GOT for large code-model accesses, but ELF requires
 292   // movz/movk sequences, which FastISel doesn't handle yet.
 293   if (TM.getCodeModel() != CodeModel::Small && !Subtarget->isTargetMachO())
 294     return 0;
 295
 296   unsigned char OpFlags = Subtarget->ClassifyGlobalReference(GV, TM);
 297
 298   EVT DestEVT = TLI.getValueType(GV->getType(), true);
 299   if (!DestEVT.isSimple())
 300     return 0;
 301
 302   unsigned ADRPReg = createResultReg(&AArch64::GPR64commonRegClass);
 303   unsigned ResultReg;
 304
 305   if (OpFlags & AArch64II::MO_GOT) {
 306     // ADRP + LDRX
 307     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADRP),
 308             ADRPReg)
 309       .addGlobalAddress(GV, 0, AArch64II::MO_GOT | AArch64II::MO_PAGE);
 310
 311     ResultReg = createResultReg(&AArch64::GPR64RegClass);
 312     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::LDRXui),
 313             ResultReg)
 314       .addReg(ADRPReg)
 315       .addGlobalAddress(GV, 0, AArch64II::MO_GOT | AArch64II::MO_PAGEOFF |
 316                         AArch64II::MO_NC);
 317   } else {
 318     // ADRP + ADDX
 319     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADRP),
 320             ADRPReg)
 321       .addGlobalAddress(GV, 0, AArch64II::MO_PAGE);
 322
 323     ResultReg = createResultReg(&AArch64::GPR64spRegClass);
 324     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADDXri),
 325             ResultReg)
 326       .addReg(ADRPReg)
 327       .addGlobalAddress(GV, 0, AArch64II::MO_PAGEOFF | AArch64II::MO_NC)
 328       .addImm(0);
 329   }
 330   return ResultReg;
 331 }
 332
 333 unsigned AArch64FastISel::TargetMaterializeConstant(const Constant *C) {
 334   EVT CEVT = TLI.getValueType(C->getType(), true);
 335
 336   // Only handle simple types.
 337   if (!CEVT.isSimple())
 338     return 0;
 339   MVT VT = CEVT.getSimpleVT();
 340
 341   if (const auto *CI = dyn_cast<ConstantInt>(C))
 342     return AArch64MaterializeInt(CI, VT);
 343   else if (const ConstantFP *CFP = dyn_cast<ConstantFP>(C))
 344     return AArch64MaterializeFP(CFP, VT);
 345   else if (const GlobalValue *GV = dyn_cast<GlobalValue>(C))
 346     return AArch64MaterializeGV(GV);
 347
 348   return 0;
 349 }
 350
 351 // Computes the address to get to an object.
 352 bool AArch64FastISel::ComputeAddress(const Value *Obj, Address &Addr, Type *Ty)
 353 {
 354   const User *U = nullptr;
 355   unsigned Opcode = Instruction::UserOp1;
 356   if (const Instruction *I = dyn_cast<Instruction>(Obj)) {
 357     // Don't walk into other basic blocks unless the object is an alloca from
 358     // another block, otherwise it may not have a virtual register assigned.
 359     if (FuncInfo.StaticAllocaMap.count(static_cast<const AllocaInst *>(Obj)) ||
 360         FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB) {
 361       Opcode = I->getOpcode();
 362       U = I;
 363     }
 364   } else if (const ConstantExpr *C = dyn_cast<ConstantExpr>(Obj)) {
 365     Opcode = C->getOpcode();
 366     U = C;
 367   }
 368
 369   if (const PointerType *Ty = dyn_cast<PointerType>(Obj->getType()))
 370     if (Ty->getAddressSpace() > 255)
 371       // Fast instruction selection doesn't support the special
 372       // address spaces.
 373       return false;
 374
 375   switch (Opcode) {
 376   default:
 377     break;
 378   case Instruction::BitCast: {
 379     // Look through bitcasts.
 380     return ComputeAddress(U->getOperand(0), Addr, Ty);
 381   }
 382   case Instruction::IntToPtr: {
 383     // Look past no-op inttoptrs.
 384     if (TLI.getValueType(U->getOperand(0)->getType()) == TLI.getPointerTy())
 385       return ComputeAddress(U->getOperand(0), Addr, Ty);
 386     break;
 387   }
 388   case Instruction::PtrToInt: {
 389     // Look past no-op ptrtoints.
 390     if (TLI.getValueType(U->getType()) == TLI.getPointerTy())
 391       return ComputeAddress(U->getOperand(0), Addr, Ty);
 392     break;
 393   }
 394   case Instruction::GetElementPtr: {
 395     Address SavedAddr = Addr;
 396     uint64_t TmpOffset = Addr.getOffset();
 397
 398     // Iterate through the GEP folding the constants into offsets where
 399     // we can.
 400     gep_type_iterator GTI = gep_type_begin(U);
 401     for (User::const_op_iterator i = U->op_begin() + 1, e = U->op_end(); i != e;
 402          ++i, ++GTI) {
 403       const Value *Op = *i;
 404       if (StructType *STy = dyn_cast<StructType>(*GTI)) {
 405         const StructLayout *SL = DL.getStructLayout(STy);
 406         unsigned Idx = cast<ConstantInt>(Op)->getZExtValue();
 407         TmpOffset += SL->getElementOffset(Idx);
 408       } else {
 409         uint64_t S = DL.getTypeAllocSize(GTI.getIndexedType());
 410         for (;;) {
 411           if (const ConstantInt *CI = dyn_cast<ConstantInt>(Op)) {
 412             // Constant-offset addressing.
 413             TmpOffset += CI->getSExtValue() * S;
 414             break;
 415           }
 416           if (canFoldAddIntoGEP(U, Op)) {
 417             // A compatible add with a constant operand. Fold the constant.
 418             ConstantInt *CI =
 419                 cast<ConstantInt>(cast<AddOperator>(Op)->getOperand(1));
 420             TmpOffset += CI->getSExtValue() * S;
 421             // Iterate on the other operand.
 422             Op = cast<AddOperator>(Op)->getOperand(0);
 423             continue;
 424           }
 425           // Unsupported
 426           goto unsupported_gep;
 427         }
 428       }
 429     }
 430
 431     // Try to grab the base operand now.
 432     Addr.setOffset(TmpOffset);
 433     if (ComputeAddress(U->getOperand(0), Addr, Ty))
 434       return true;
 435
 436     // We failed, restore everything and try the other options.
 437     Addr = SavedAddr;
 438
 439   unsupported_gep:
 440     break;
 441   }
 442   case Instruction::Alloca: {
 443     const AllocaInst *AI = cast<AllocaInst>(Obj);
 444     DenseMap<const AllocaInst *, int>::iterator SI =
 445         FuncInfo.StaticAllocaMap.find(AI);
 446     if (SI != FuncInfo.StaticAllocaMap.end()) {
 447       Addr.setKind(Address::FrameIndexBase);
 448       Addr.setFI(SI->second);
 449       return true;
 450     }
 451     break;
 452   }
 453   case Instruction::Add: {
 454     // Adds of constants are common and easy enough.
 455     const Value *LHS = U->getOperand(0);
 456     const Value *RHS = U->getOperand(1);
 457
 458     if (isa<ConstantInt>(LHS))
 459       std::swap(LHS, RHS);
 460
 461     if (const ConstantInt *CI = dyn_cast<ConstantInt>(RHS)) {
 462       Addr.setOffset(Addr.getOffset() + (uint64_t)CI->getSExtValue());
 463       return ComputeAddress(LHS, Addr, Ty);
 464     }
 465
 466     Address Backup = Addr;
 467     if (ComputeAddress(LHS, Addr, Ty) && ComputeAddress(RHS, Addr, Ty))
 468       return true;
 469     Addr = Backup;
 470
 471     break;
 472   }
 473   case Instruction::Shl:
 474     if (Addr.getOffsetReg())
 475       break;
 476
 477     if (const auto *CI = dyn_cast<ConstantInt>(U->getOperand(1))) {
 478       unsigned Val = CI->getZExtValue();
 479       if (Val < 1 || Val > 3)
 480         break;
 481
 482       uint64_t NumBytes = 0;
 483       if (Ty && Ty->isSized()) {
 484         uint64_t NumBits = DL.getTypeSizeInBits(Ty);
 485         NumBytes = NumBits / 8;
 486         if (!isPowerOf2_64(NumBits))
 487           NumBytes = 0;
 488       }
 489
 490       if (NumBytes != (1 << Val))
 491         break;
 492
 493       Addr.setShift(Val);
 494       Addr.setExtendType(AArch64_AM::LSL);
 495
 496       if (const auto *I = dyn_cast<Instruction>(U->getOperand(0)))
 497         if (FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB)
 498           U = I;
 499
 500       if (const auto *ZE = dyn_cast<ZExtInst>(U))
 501         if (ZE->getOperand(0)->getType()->isIntegerTy(32))
 502           Addr.setExtendType(AArch64_AM::UXTW);
 503
 504       if (const auto *SE = dyn_cast<SExtInst>(U))
 505         if (SE->getOperand(0)->getType()->isIntegerTy(32))
 506           Addr.setExtendType(AArch64_AM::SXTW);
 507
 508       unsigned Reg = getRegForValue(U->getOperand(0));
 509       if (!Reg)
 510         return false;
 511       Addr.setOffsetReg(Reg);
 512       return true;
 513     }
 514     break;
 515   }
 516
 517   if (Addr.getReg()) {
 518     if (!Addr.getOffsetReg()) {
 519       unsigned Reg = getRegForValue(Obj);
 520       if (!Reg)
 521         return false;
 522       Addr.setOffsetReg(Reg);
 523       return true;
 524     }
 525     return false;
 526   }
 527
 528   unsigned Reg = getRegForValue(Obj);
 529   if (!Reg)
 530     return false;
 531   Addr.setReg(Reg);
 532   return true;
 533 }
 534
 535 bool AArch64FastISel::ComputeCallAddress(const Value *V, Address &Addr) {
 536   const User *U = nullptr;
 537   unsigned Opcode = Instruction::UserOp1;
 538   bool InMBB = true;
 539
 540   if (const auto *I = dyn_cast<Instruction>(V)) {
 541     Opcode = I->getOpcode();
 542     U = I;
 543     InMBB = I->getParent() == FuncInfo.MBB->getBasicBlock();
 544   } else if (const auto *C = dyn_cast<ConstantExpr>(V)) {
 545     Opcode = C->getOpcode();
 546     U = C;
 547   }
 548
 549   switch (Opcode) {
 550   default: break;
 551   case Instruction::BitCast:
 552     // Look past bitcasts if its operand is in the same BB.
 553     if (InMBB)
 554       return ComputeCallAddress(U->getOperand(0), Addr);
 555     break;
 556   case Instruction::IntToPtr:
 557     // Look past no-op inttoptrs if its operand is in the same BB.
 558     if (InMBB &&
 559         TLI.getValueType(U->getOperand(0)->getType()) == TLI.getPointerTy())
 560       return ComputeCallAddress(U->getOperand(0), Addr);
 561     break;
 562   case Instruction::PtrToInt:
 563     // Look past no-op ptrtoints if its operand is in the same BB.
 564     if (InMBB &&
 565         TLI.getValueType(U->getType()) == TLI.getPointerTy())
 566       return ComputeCallAddress(U->getOperand(0), Addr);
 567     break;
 568   }
 569
 570   if (const GlobalValue *GV = dyn_cast<GlobalValue>(V)) {
 571     Addr.setGlobalValue(GV);
 572     return true;
 573   }
 574
 575   // If all else fails, try to materialize the value in a register.
 576   if (!Addr.getGlobalValue()) {
 577     Addr.setReg(getRegForValue(V));
 578     return Addr.getReg() != 0;
 579   }
 580
 581   return false;
 582 }
 583
 584
 585 bool AArch64FastISel::isTypeLegal(Type *Ty, MVT &VT) {
 586   EVT evt = TLI.getValueType(Ty, true);
 587
 588   // Only handle simple types.
 589   if (evt == MVT::Other || !evt.isSimple())
 590     return false;
 591   VT = evt.getSimpleVT();
 592
 593   // This is a legal type, but it's not something we handle in fast-isel.
 594   if (VT == MVT::f128)
 595     return false;
 596
 597   // Handle all other legal types, i.e. a register that will directly hold this
 598   // value.
 599   return TLI.isTypeLegal(VT);
 600 }
 601
 602 bool AArch64FastISel::isLoadStoreTypeLegal(Type *Ty, MVT &VT) {
 603   if (isTypeLegal(Ty, VT))
 604     return true;
 605
 606   // If this is a type than can be sign or zero-extended to a basic operation
 607   // go ahead and accept it now. For stores, this reflects truncation.
 608   if (VT == MVT::i1 || VT == MVT::i8 || VT == MVT::i16)
 609     return true;
 610
 611   return false;
 612 }
 613
 614 bool AArch64FastISel::SimplifyAddress(Address &Addr, MVT VT) {
 615   unsigned ScaleFactor;
 616   switch (VT.SimpleTy) {
 617   default: return false;
 618   case MVT::i1:  // fall-through
 619   case MVT::i8:  ScaleFactor = 1; break;
 620   case MVT::i16: ScaleFactor = 2; break;
 621   case MVT::i32: // fall-through
 622   case MVT::f32: ScaleFactor = 4; break;
 623   case MVT::i64: // fall-through
 624   case MVT::f64: ScaleFactor = 8; break;
 625   }
 626
 627   bool ImmediateOffsetNeedsLowering = false;
 628   bool RegisterOffsetNeedsLowering = false;
 629   int64_t Offset = Addr.getOffset();
 630   if (((Offset < 0) || (Offset & (ScaleFactor - 1))) && !isInt<9>(Offset))
 631     ImmediateOffsetNeedsLowering = true;
 632   else if (Offset > 0 && !(Offset & (ScaleFactor - 1)) &&
 633            !isUInt<12>(Offset / ScaleFactor))
 634     ImmediateOffsetNeedsLowering = true;
 635
 636   // Cannot encode an offset register and an immediate offset in the same
 637   // instruction. Fold the immediate offset into the load/store instruction and
 638   // emit an additonal add to take care of the offset register.
 639   if (!ImmediateOffsetNeedsLowering && Addr.getOffset() && Addr.isRegBase() &&
 640       Addr.getOffsetReg())
 641     RegisterOffsetNeedsLowering = true;
 642
 643   // If this is a stack pointer and the offset needs to be simplified then put
 644   // the alloca address into a register, set the base type back to register and
 645   // continue. This should almost never happen.
 646   if (ImmediateOffsetNeedsLowering && Addr.isFIBase()) {
 647     unsigned ResultReg = createResultReg(&AArch64::GPR64RegClass);
 648     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADDXri),
 649             ResultReg)
 650       .addFrameIndex(Addr.getFI())
 651       .addImm(0)
 652       .addImm(0);
 653     Addr.setKind(Address::RegBase);
 654     Addr.setReg(ResultReg);
 655   }
 656
 657   if (RegisterOffsetNeedsLowering) {
 658     unsigned ResultReg = 0;
 659     if (Addr.getReg()) {
 660       ResultReg = createResultReg(&AArch64::GPR64RegClass);
 661       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
 662               TII.get(AArch64::ADDXrs), ResultReg)
 663         .addReg(Addr.getReg())
 664         .addReg(Addr.getOffsetReg())
 665         .addImm(Addr.getShift());
 666     } else
 667       ResultReg = Emit_LSL_ri(MVT::i64, Addr.getOffsetReg(),
 668                               /*Op0IsKill=*/false, Addr.getShift());
 669     if (!ResultReg)
 670       return false;
 671
 672     Addr.setReg(ResultReg);
 673     Addr.setOffsetReg(0);
 674     Addr.setShift(0);
 675   }
 676
 677   // Since the offset is too large for the load/store instruction get the
 678   // reg+offset into a register.
 679   if (ImmediateOffsetNeedsLowering) {
 680     unsigned ResultReg = 0;
 681     if (Addr.getReg())
 682       ResultReg = FastEmit_ri_(MVT::i64, ISD::ADD, Addr.getReg(),
 683                                /*IsKill=*/false, Offset, MVT::i64);
 684     else
 685       ResultReg = FastEmit_i(MVT::i64, MVT::i64, ISD::Constant, Offset);
 686
 687     if (!ResultReg)
 688       return false;
 689     Addr.setReg(ResultReg);
 690     Addr.setOffset(0);
 691   }
 692   return true;
 693 }
 694
 695 void AArch64FastISel::AddLoadStoreOperands(Address &Addr,
 696                                            const MachineInstrBuilder &MIB,
 697                                            unsigned Flags,
 698                                            unsigned ScaleFactor,
 699                                            MachineMemOperand *MMO) {
 700   int64_t Offset = Addr.getOffset() / ScaleFactor;
 701   // Frame base works a bit differently. Handle it separately.
 702   if (Addr.isFIBase()) {
 703     int FI = Addr.getFI();
 704     // FIXME: We shouldn't be using getObjectSize/getObjectAlignment.  The size
 705     // and alignment should be based on the VT.
 706     MMO = FuncInfo.MF->getMachineMemOperand(
 707       MachinePointerInfo::getFixedStack(FI, Offset), Flags,
 708       MFI.getObjectSize(FI), MFI.getObjectAlignment(FI));
 709     // Now add the rest of the operands.
 710     MIB.addFrameIndex(FI).addImm(Offset);
 711   } else {
 712     assert(Addr.isRegBase() && "Unexpected address kind.");
 713     if (Addr.getOffsetReg()) {
 714       assert(Addr.getOffset() == 0 && "Unexpected offset");
 715       bool IsSigned = Addr.getExtendType() == AArch64_AM::SXTW ||
 716                       Addr.getExtendType() == AArch64_AM::SXTX;
 717       MIB.addReg(Addr.getReg());
 718       MIB.addReg(Addr.getOffsetReg());
 719       MIB.addImm(IsSigned);
 720       MIB.addImm(Addr.getShift() != 0);
 721     } else {
 722       MIB.addReg(Addr.getReg());
 723       MIB.addImm(Offset);
 724     }
 725   }
 726
 727   if (MMO)
 728     MIB.addMemOperand(MMO);
 729 }
 730
 731 bool AArch64FastISel::EmitLoad(MVT VT, unsigned &ResultReg, Address Addr,
 732                                MachineMemOperand *MMO) {
 733   // Simplify this down to something we can handle.
 734   if (!SimplifyAddress(Addr, VT))
 735     return false;
 736
 737   unsigned ScaleFactor;
 738   switch (VT.SimpleTy) {
 739   default: llvm_unreachable("Unexpected value type.");
 740   case MVT::i1:  // fall-through
 741   case MVT::i8:  ScaleFactor = 1; break;
 742   case MVT::i16: ScaleFactor = 2; break;
 743   case MVT::i32: // fall-through
 744   case MVT::f32: ScaleFactor = 4; break;
 745   case MVT::i64: // fall-through
 746   case MVT::f64: ScaleFactor = 8; break;
 747   }
 748
 749   // Negative offsets require unscaled, 9-bit, signed immediate offsets.
 750   // Otherwise, we try using scaled, 12-bit, unsigned immediate offsets.
 751   bool UseScaled = true;
 752   if ((Addr.getOffset() < 0) || (Addr.getOffset() & (ScaleFactor - 1))) {
 753     UseScaled = false;
 754     ScaleFactor = 1;
 755   }
 756
 757   static const unsigned OpcTable[4][6] = {
 758     { AArch64::LDURBBi,  AArch64::LDURHHi,  AArch64::LDURWi,  AArch64::LDURXi,
 759       AArch64::LDURSi,   AArch64::LDURDi },
 760     { AArch64::LDRBBui,  AArch64::LDRHHui,  AArch64::LDRWui,  AArch64::LDRXui,
 761       AArch64::LDRSui,   AArch64::LDRDui },
 762     { AArch64::LDRBBroX, AArch64::LDRHHroX, AArch64::LDRWroX, AArch64::LDRXroX,
 763       AArch64::LDRSroX,  AArch64::LDRDroX },
 764     { AArch64::LDRBBroW, AArch64::LDRHHroW, AArch64::LDRWroW, AArch64::LDRXroW,
 765       AArch64::LDRSroW,  AArch64::LDRDroW }
 766   };
 767
 768   unsigned Opc;
 769   const TargetRegisterClass *RC;
 770   bool VTIsi1 = false;
 771   bool UseRegOffset = Addr.isRegBase() && !Addr.getOffset() && Addr.getReg() &&
 772                       Addr.getOffsetReg();
 773   unsigned Idx = UseRegOffset ? 2 : UseScaled ? 1 : 0;
 774   if (Addr.getExtendType() == AArch64_AM::UXTW ||
 775       Addr.getExtendType() == AArch64_AM::SXTW)
 776     Idx++;
 777
 778   switch (VT.SimpleTy) {
 779   default: llvm_unreachable("Unexpected value type.");
 780   case MVT::i1:  VTIsi1 = true; // Intentional fall-through.
 781   case MVT::i8:  Opc = OpcTable[Idx][0]; RC = &AArch64::GPR32RegClass; break;
 782   case MVT::i16: Opc = OpcTable[Idx][1]; RC = &AArch64::GPR32RegClass; break;
 783   case MVT::i32: Opc = OpcTable[Idx][2]; RC = &AArch64::GPR32RegClass; break;
 784   case MVT::i64: Opc = OpcTable[Idx][3]; RC = &AArch64::GPR64RegClass; break;
 785   case MVT::f32: Opc = OpcTable[Idx][4]; RC = &AArch64::FPR32RegClass; break;
 786   case MVT::f64: Opc = OpcTable[Idx][5]; RC = &AArch64::FPR64RegClass; break;
 787   }
 788
 789   // Create the base instruction, then add the operands.
 790   ResultReg = createResultReg(RC);
 791   MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
 792                                     TII.get(Opc), ResultReg);
 793   AddLoadStoreOperands(Addr, MIB, MachineMemOperand::MOLoad, ScaleFactor, MMO);
 794
 795   // Loading an i1 requires special handling.
 796   if (VTIsi1) {
 797     MRI.constrainRegClass(ResultReg, &AArch64::GPR32RegClass);
 798     unsigned ANDReg = createResultReg(&AArch64::GPR32spRegClass);
 799     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ANDWri),
 800             ANDReg)
 801       .addReg(ResultReg)
 802       .addImm(AArch64_AM::encodeLogicalImmediate(1, 32));
 803     ResultReg = ANDReg;
 804   }
 805   return true;
 806 }
 807
 808 bool AArch64FastISel::SelectLoad(const Instruction *I) {
 809   MVT VT;
 810   // Verify we have a legal type before going any further.  Currently, we handle
 811   // simple types that will directly fit in a register (i32/f32/i64/f64) or
 812   // those that can be sign or zero-extended to a basic operation (i1/i8/i16).
 813   if (!isLoadStoreTypeLegal(I->getType(), VT) || cast<LoadInst>(I)->isAtomic())
 814     return false;
 815
 816   // See if we can handle this address.
 817   Address Addr;
 818   if (!ComputeAddress(I->getOperand(0), Addr, I->getType()))
 819     return false;
 820
 821   unsigned ResultReg;
 822   if (!EmitLoad(VT, ResultReg, Addr, createMachineMemOperandFor(I)))
 823     return false;
 824
 825   UpdateValueMap(I, ResultReg);
 826   return true;
 827 }
 828
 829 bool AArch64FastISel::EmitStore(MVT VT, unsigned SrcReg, Address Addr,
 830                                 MachineMemOperand *MMO) {
 831   // Simplify this down to something we can handle.
 832   if (!SimplifyAddress(Addr, VT))
 833     return false;
 834
 835   unsigned ScaleFactor;
 836   switch (VT.SimpleTy) {
 837   default: llvm_unreachable("Unexpected value type.");
 838   case MVT::i1:  // fall-through
 839   case MVT::i8:  ScaleFactor = 1; break;
 840   case MVT::i16: ScaleFactor = 2; break;
 841   case MVT::i32: // fall-through
 842   case MVT::f32: ScaleFactor = 4; break;
 843   case MVT::i64: // fall-through
 844   case MVT::f64: ScaleFactor = 8; break;
 845   }
 846
 847   // Negative offsets require unscaled, 9-bit, signed immediate offsets.
 848   // Otherwise, we try using scaled, 12-bit, unsigned immediate offsets.
 849   bool UseScaled = true;
 850   if ((Addr.getOffset() < 0) || (Addr.getOffset() & (ScaleFactor - 1))) {
 851     UseScaled = false;
 852     ScaleFactor = 1;
 853   }
 854
 855
 856   static const unsigned OpcTable[4][6] = {
 857     { AArch64::STURBBi,  AArch64::STURHHi,  AArch64::STURWi,  AArch64::STURXi,
 858       AArch64::STURSi,   AArch64::STURDi },
 859     { AArch64::STRBBui,  AArch64::STRHHui,  AArch64::STRWui,  AArch64::STRXui,
 860       AArch64::STRSui,   AArch64::STRDui },
 861     { AArch64::STRBBroX, AArch64::STRHHroX, AArch64::STRWroX, AArch64::STRXroX,
 862       AArch64::STRSroX,  AArch64::STRDroX },
 863     { AArch64::STRBBroW, AArch64::STRHHroW, AArch64::STRWroW, AArch64::STRXroW,
 864       AArch64::STRSroW,  AArch64::STRDroW }
 865
 866   };
 867
 868   unsigned Opc;
 869   bool VTIsi1 = false;
 870   bool UseRegOffset = Addr.isRegBase() && !Addr.getOffset() && Addr.getReg() &&
 871                       Addr.getOffsetReg();
 872   unsigned Idx = UseRegOffset ? 2 : UseScaled ? 1 : 0;
 873   if (Addr.getExtendType() == AArch64_AM::UXTW ||
 874       Addr.getExtendType() == AArch64_AM::SXTW)
 875     Idx++;
 876
 877   switch (VT.SimpleTy) {
 878   default: llvm_unreachable("Unexpected value type.");
 879   case MVT::i1:  VTIsi1 = true;
 880   case MVT::i8:  Opc = OpcTable[Idx][0]; break;
 881   case MVT::i16: Opc = OpcTable[Idx][1]; break;
 882   case MVT::i32: Opc = OpcTable[Idx][2]; break;
 883   case MVT::i64: Opc = OpcTable[Idx][3]; break;
 884   case MVT::f32: Opc = OpcTable[Idx][4]; break;
 885   case MVT::f64: Opc = OpcTable[Idx][5]; break;
 886   }
 887
 888   // Storing an i1 requires special handling.
 889   if (VTIsi1) {
 890     MRI.constrainRegClass(SrcReg, &AArch64::GPR32RegClass);
 891     unsigned ANDReg = createResultReg(&AArch64::GPR32spRegClass);
 892     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ANDWri),
 893             ANDReg)
 894       .addReg(SrcReg)
 895       .addImm(AArch64_AM::encodeLogicalImmediate(1, 32));
 896     SrcReg = ANDReg;
 897   }
 898   // Create the base instruction, then add the operands.
 899   MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
 900                                     TII.get(Opc))
 901                               .addReg(SrcReg);
 902   AddLoadStoreOperands(Addr, MIB, MachineMemOperand::MOStore, ScaleFactor, MMO);
 903
 904   return true;
 905 }
 906
 907 bool AArch64FastISel::SelectStore(const Instruction *I) {
 908   MVT VT;
 909   Value *Op0 = I->getOperand(0);
 910   // Verify we have a legal type before going any further.  Currently, we handle
 911   // simple types that will directly fit in a register (i32/f32/i64/f64) or
 912   // those that can be sign or zero-extended to a basic operation (i1/i8/i16).
 913   if (!isLoadStoreTypeLegal(Op0->getType(), VT) ||
 914       cast<StoreInst>(I)->isAtomic())
 915     return false;
 916
 917   // Get the value to be stored into a register.
 918   unsigned SrcReg = getRegForValue(Op0);
 919   if (SrcReg == 0)
 920     return false;
 921
 922   // See if we can handle this address.
 923   Address Addr;
 924   if (!ComputeAddress(I->getOperand(1), Addr, I->getOperand(0)->getType()))
 925     return false;
 926
 927   if (!EmitStore(VT, SrcReg, Addr, createMachineMemOperandFor(I)))
 928     return false;
 929   return true;
 930 }
 931
 932 static AArch64CC::CondCode getCompareCC(CmpInst::Predicate Pred) {
 933   switch (Pred) {
 934   case CmpInst::FCMP_ONE:
 935   case CmpInst::FCMP_UEQ:
 936   default:
 937     // AL is our "false" for now. The other two need more compares.
 938     return AArch64CC::AL;
 939   case CmpInst::ICMP_EQ:
 940   case CmpInst::FCMP_OEQ:
 941     return AArch64CC::EQ;
 942   case CmpInst::ICMP_SGT:
 943   case CmpInst::FCMP_OGT:
 944     return AArch64CC::GT;
 945   case CmpInst::ICMP_SGE:
 946   case CmpInst::FCMP_OGE:
 947     return AArch64CC::GE;
 948   case CmpInst::ICMP_UGT:
 949   case CmpInst::FCMP_UGT:
 950     return AArch64CC::HI;
 951   case CmpInst::FCMP_OLT:
 952     return AArch64CC::MI;
 953   case CmpInst::ICMP_ULE:
 954   case CmpInst::FCMP_OLE:
 955     return AArch64CC::LS;
 956   case CmpInst::FCMP_ORD:
 957     return AArch64CC::VC;
 958   case CmpInst::FCMP_UNO:
 959     return AArch64CC::VS;
 960   case CmpInst::FCMP_UGE:
 961     return AArch64CC::PL;
 962   case CmpInst::ICMP_SLT:
 963   case CmpInst::FCMP_ULT:
 964     return AArch64CC::LT;
 965   case CmpInst::ICMP_SLE:
 966   case CmpInst::FCMP_ULE:
 967     return AArch64CC::LE;
 968   case CmpInst::FCMP_UNE:
 969   case CmpInst::ICMP_NE:
 970     return AArch64CC::NE;
 971   case CmpInst::ICMP_UGE:
 972     return AArch64CC::HS;
 973   case CmpInst::ICMP_ULT:
 974     return AArch64CC::LO;
 975   }
 976 }
 977
 978 bool AArch64FastISel::SelectBranch(const Instruction *I) {
 979   const BranchInst *BI = cast<BranchInst>(I);
 980   MachineBasicBlock *TBB = FuncInfo.MBBMap[BI->getSuccessor(0)];
 981   MachineBasicBlock *FBB = FuncInfo.MBBMap[BI->getSuccessor(1)];
 982
 983   AArch64CC::CondCode CC = AArch64CC::NE;
 984   if (const CmpInst *CI = dyn_cast<CmpInst>(BI->getCondition())) {
 985     if (CI->hasOneUse() && (CI->getParent() == I->getParent())) {
 986       // We may not handle every CC for now.
 987       CC = getCompareCC(CI->getPredicate());
 988       if (CC == AArch64CC::AL)
 989         return false;
 990
 991       // Emit the cmp.
 992       if (!EmitCmp(CI->getOperand(0), CI->getOperand(1), CI->isUnsigned()))
 993         return false;
 994
 995       // Emit the branch.
 996       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::Bcc))
 997           .addImm(CC)
 998           .addMBB(TBB);
 999
1000       // Obtain the branch weight and add the TrueBB to the successor list.
1001       uint32_t BranchWeight = 0;
1002       if (FuncInfo.BPI)
1003         BranchWeight = FuncInfo.BPI->getEdgeWeight(BI->getParent(),
1004                                                   TBB->getBasicBlock());
1005       FuncInfo.MBB->addSuccessor(TBB, BranchWeight);
1006
1007       FastEmitBranch(FBB, DbgLoc);
1008       return true;
1009     }
1010   } else if (TruncInst *TI = dyn_cast<TruncInst>(BI->getCondition())) {
1011     MVT SrcVT;
1012     if (TI->hasOneUse() && TI->getParent() == I->getParent() &&
1013         (isLoadStoreTypeLegal(TI->getOperand(0)->getType(), SrcVT))) {
1014       unsigned CondReg = getRegForValue(TI->getOperand(0));
1015       if (CondReg == 0)
1016         return false;
1017
1018       // Issue an extract_subreg to get the lower 32-bits.
1019       if (SrcVT == MVT::i64)
1020         CondReg = FastEmitInst_extractsubreg(MVT::i32, CondReg, /*Kill=*/true,
1021                                              AArch64::sub_32);
1022
1023       MRI.constrainRegClass(CondReg, &AArch64::GPR32RegClass);
1024       unsigned ANDReg = createResultReg(&AArch64::GPR32spRegClass);
1025       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
1026               TII.get(AArch64::ANDWri), ANDReg)
1027           .addReg(CondReg)
1028           .addImm(AArch64_AM::encodeLogicalImmediate(1, 32));
1029       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
1030               TII.get(AArch64::SUBSWri))
1031           .addReg(ANDReg)
1032           .addReg(ANDReg)
1033           .addImm(0)
1034           .addImm(0);
1035
1036       if (FuncInfo.MBB->isLayoutSuccessor(TBB)) {
1037         std::swap(TBB, FBB);
1038         CC = AArch64CC::EQ;
1039       }
1040       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::Bcc))
1041           .addImm(CC)
1042           .addMBB(TBB);
1043
1044       // Obtain the branch weight and add the TrueBB to the successor list.
1045       uint32_t BranchWeight = 0;
1046       if (FuncInfo.BPI)
1047         BranchWeight = FuncInfo.BPI->getEdgeWeight(BI->getParent(),
1048                                                   TBB->getBasicBlock());
1049       FuncInfo.MBB->addSuccessor(TBB, BranchWeight);
1050
1051       FastEmitBranch(FBB, DbgLoc);
1052       return true;
1053     }
1054   } else if (const ConstantInt *CI =
1055                  dyn_cast<ConstantInt>(BI->getCondition())) {
1056     uint64_t Imm = CI->getZExtValue();
1057     MachineBasicBlock *Target = (Imm == 0) ? FBB : TBB;
1058     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::B))
1059         .addMBB(Target);
1060
1061     // Obtain the branch weight and add the target to the successor list.
1062     uint32_t BranchWeight = 0;
1063     if (FuncInfo.BPI)
1064       BranchWeight = FuncInfo.BPI->getEdgeWeight(BI->getParent(),
1065                                                  Target->getBasicBlock());
1066     FuncInfo.MBB->addSuccessor(Target, BranchWeight);
1067     return true;
1068   } else if (foldXALUIntrinsic(CC, I, BI->getCondition())) {
1069     // Fake request the condition, otherwise the intrinsic might be completely
1070     // optimized away.
1071     unsigned CondReg = getRegForValue(BI->getCondition());
1072     if (!CondReg)
1073       return false;
1074
1075     // Emit the branch.
1076     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::Bcc))
1077       .addImm(CC)
1078       .addMBB(TBB);
1079
1080     // Obtain the branch weight and add the TrueBB to the successor list.
1081     uint32_t BranchWeight = 0;
1082     if (FuncInfo.BPI)
1083       BranchWeight = FuncInfo.BPI->getEdgeWeight(BI->getParent(),
1084                                                  TBB->getBasicBlock());
1085     FuncInfo.MBB->addSuccessor(TBB, BranchWeight);
1086
1087     FastEmitBranch(FBB, DbgLoc);
1088     return true;
1089   }
1090
1091   unsigned CondReg = getRegForValue(BI->getCondition());
1092   if (CondReg == 0)
1093     return false;
1094
1095   // We've been divorced from our compare!  Our block was split, and
1096   // now our compare lives in a predecessor block.  We musn't
1097   // re-compare here, as the children of the compare aren't guaranteed
1098   // live across the block boundary (we *could* check for this).
1099   // Regardless, the compare has been done in the predecessor block,
1100   // and it left a value for us in a virtual register.  Ergo, we test
1101   // the one-bit value left in the virtual register.
1102   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::SUBSWri),
1103           AArch64::WZR)
1104       .addReg(CondReg)
1105       .addImm(0)
1106       .addImm(0);
1107
1108   if (FuncInfo.MBB->isLayoutSuccessor(TBB)) {
1109     std::swap(TBB, FBB);
1110     CC = AArch64CC::EQ;
1111   }
1112
1113   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::Bcc))
1114       .addImm(CC)
1115       .addMBB(TBB);
1116
1117   // Obtain the branch weight and add the TrueBB to the successor list.
1118   uint32_t BranchWeight = 0;
1119   if (FuncInfo.BPI)
1120     BranchWeight = FuncInfo.BPI->getEdgeWeight(BI->getParent(),
1121                                                TBB->getBasicBlock());
1122   FuncInfo.MBB->addSuccessor(TBB, BranchWeight);
1123
1124   FastEmitBranch(FBB, DbgLoc);
1125   return true;
1126 }
1127
1128 bool AArch64FastISel::SelectIndirectBr(const Instruction *I) {
1129   const IndirectBrInst *BI = cast<IndirectBrInst>(I);
1130   unsigned AddrReg = getRegForValue(BI->getOperand(0));
1131   if (AddrReg == 0)
1132     return false;
1133
1134   // Emit the indirect branch.
1135   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::BR))
1136       .addReg(AddrReg);
1137
1138   // Make sure the CFG is up-to-date.
1139   for (unsigned i = 0, e = BI->getNumSuccessors(); i != e; ++i)
1140     FuncInfo.MBB->addSuccessor(FuncInfo.MBBMap[BI->getSuccessor(i)]);
1141
1142   return true;
1143 }
1144
1145 bool AArch64FastISel::EmitCmp(Value *Src1Value, Value *Src2Value, bool isZExt) {
1146   Type *Ty = Src1Value->getType();
1147   EVT SrcEVT = TLI.getValueType(Ty, true);
1148   if (!SrcEVT.isSimple())
1149     return false;
1150   MVT SrcVT = SrcEVT.getSimpleVT();
1151
1152   // Check to see if the 2nd operand is a constant that we can encode directly
1153   // in the compare.
1154   uint64_t Imm;
1155   bool UseImm = false;
1156   bool isNegativeImm = false;
1157   if (const ConstantInt *ConstInt = dyn_cast<ConstantInt>(Src2Value)) {
1158     if (SrcVT == MVT::i64 || SrcVT == MVT::i32 || SrcVT == MVT::i16 ||
1159         SrcVT == MVT::i8 || SrcVT == MVT::i1) {
1160       const APInt &CIVal = ConstInt->getValue();
1161
1162       Imm = (isZExt) ? CIVal.getZExtValue() : CIVal.getSExtValue();
1163       if (CIVal.isNegative()) {
1164         isNegativeImm = true;
1165         Imm = -Imm;
1166       }
1167       // FIXME: We can handle more immediates using shifts.
1168       UseImm = ((Imm & 0xfff) == Imm);
1169     }
1170   } else if (const ConstantFP *ConstFP = dyn_cast<ConstantFP>(Src2Value)) {
1171     if (SrcVT == MVT::f32 || SrcVT == MVT::f64)
1172       if (ConstFP->isZero() && !ConstFP->isNegative())
1173         UseImm = true;
1174   }
1175
1176   unsigned ZReg;
1177   unsigned CmpOpc;
1178   bool isICmp = true;
1179   bool needsExt = false;
1180   switch (SrcVT.SimpleTy) {
1181   default:
1182     return false;
1183   case MVT::i1:
1184   case MVT::i8:
1185   case MVT::i16:
1186     needsExt = true;
1187   // Intentional fall-through.
1188   case MVT::i32:
1189     ZReg = AArch64::WZR;
1190     if (UseImm)
1191       CmpOpc = isNegativeImm ? AArch64::ADDSWri : AArch64::SUBSWri;
1192     else
1193       CmpOpc = AArch64::SUBSWrr;
1194     break;
1195   case MVT::i64:
1196     ZReg = AArch64::XZR;
1197     if (UseImm)
1198       CmpOpc = isNegativeImm ? AArch64::ADDSXri : AArch64::SUBSXri;
1199     else
1200       CmpOpc = AArch64::SUBSXrr;
1201     break;
1202   case MVT::f32:
1203     isICmp = false;
1204     CmpOpc = UseImm ? AArch64::FCMPSri : AArch64::FCMPSrr;
1205     break;
1206   case MVT::f64:
1207     isICmp = false;
1208     CmpOpc = UseImm ? AArch64::FCMPDri : AArch64::FCMPDrr;
1209     break;
1210   }
1211
1212   unsigned SrcReg1 = getRegForValue(Src1Value);
1213   if (SrcReg1 == 0)
1214     return false;
1215
1216   unsigned SrcReg2;
1217   if (!UseImm) {
1218     SrcReg2 = getRegForValue(Src2Value);
1219     if (SrcReg2 == 0)
1220       return false;
1221   }
1222
1223   // We have i1, i8, or i16, we need to either zero extend or sign extend.
1224   if (needsExt) {
1225     SrcReg1 = EmitIntExt(SrcVT, SrcReg1, MVT::i32, isZExt);
1226     if (SrcReg1 == 0)
1227       return false;
1228     if (!UseImm) {
1229       SrcReg2 = EmitIntExt(SrcVT, SrcReg2, MVT::i32, isZExt);
1230       if (SrcReg2 == 0)
1231         return false;
1232     }
1233   }
1234
1235   if (isICmp) {
1236     if (UseImm)
1237       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(CmpOpc))
1238           .addReg(ZReg)
1239           .addReg(SrcReg1)
1240           .addImm(Imm)
1241           .addImm(0);
1242     else
1243       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(CmpOpc))
1244           .addReg(ZReg)
1245           .addReg(SrcReg1)
1246           .addReg(SrcReg2);
1247   } else {
1248     if (UseImm)
1249       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(CmpOpc))
1250           .addReg(SrcReg1);
1251     else
1252       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(CmpOpc))
1253           .addReg(SrcReg1)
1254           .addReg(SrcReg2);
1255   }
1256   return true;
1257 }
1258
1259 bool AArch64FastISel::SelectCmp(const Instruction *I) {
1260   const CmpInst *CI = cast<CmpInst>(I);
1261
1262   // We may not handle every CC for now.
1263   AArch64CC::CondCode CC = getCompareCC(CI->getPredicate());
1264   if (CC == AArch64CC::AL)
1265     return false;
1266
1267   // Emit the cmp.
1268   if (!EmitCmp(CI->getOperand(0), CI->getOperand(1), CI->isUnsigned()))
1269     return false;
1270
1271   // Now set a register based on the comparison.
1272   AArch64CC::CondCode invertedCC = getInvertedCondCode(CC);
1273   unsigned ResultReg = createResultReg(&AArch64::GPR32RegClass);
1274   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::CSINCWr),
1275           ResultReg)
1276       .addReg(AArch64::WZR)
1277       .addReg(AArch64::WZR)
1278       .addImm(invertedCC);
1279
1280   UpdateValueMap(I, ResultReg);
1281   return true;
1282 }
1283
1284 bool AArch64FastISel::SelectSelect(const Instruction *I) {
1285   const SelectInst *SI = cast<SelectInst>(I);
1286
1287   EVT DestEVT = TLI.getValueType(SI->getType(), true);
1288   if (!DestEVT.isSimple())
1289     return false;
1290
1291   MVT DestVT = DestEVT.getSimpleVT();
1292   if (DestVT != MVT::i32 && DestVT != MVT::i64 && DestVT != MVT::f32 &&
1293       DestVT != MVT::f64)
1294     return false;
1295
1296   unsigned SelectOpc;
1297   switch (DestVT.SimpleTy) {
1298   default: return false;
1299   case MVT::i32: SelectOpc = AArch64::CSELWr;    break;
1300   case MVT::i64: SelectOpc = AArch64::CSELXr;    break;
1301   case MVT::f32: SelectOpc = AArch64::FCSELSrrr; break;
1302   case MVT::f64: SelectOpc = AArch64::FCSELDrrr; break;
1303   }
1304
1305   const Value *Cond = SI->getCondition();
1306   bool NeedTest = true;
1307   AArch64CC::CondCode CC = AArch64CC::NE;
1308   if (foldXALUIntrinsic(CC, I, Cond))
1309     NeedTest = false;
1310
1311   unsigned CondReg = getRegForValue(Cond);
1312   if (!CondReg)
1313     return false;
1314   bool CondIsKill = hasTrivialKill(Cond);
1315
1316   if (NeedTest) {
1317     MRI.constrainRegClass(CondReg, &AArch64::GPR32RegClass);
1318     unsigned ANDReg = createResultReg(&AArch64::GPR32spRegClass);
1319     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ANDWri),
1320             ANDReg)
1321       .addReg(CondReg, getKillRegState(CondIsKill))
1322       .addImm(AArch64_AM::encodeLogicalImmediate(1, 32));
1323
1324     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::SUBSWri))
1325       .addReg(ANDReg)
1326       .addReg(ANDReg)
1327       .addImm(0)
1328       .addImm(0);
1329   }
1330
1331   unsigned TrueReg = getRegForValue(SI->getTrueValue());
1332   bool TrueIsKill = hasTrivialKill(SI->getTrueValue());
1333
1334   unsigned FalseReg = getRegForValue(SI->getFalseValue());
1335   bool FalseIsKill = hasTrivialKill(SI->getFalseValue());
1336
1337   if (!TrueReg || !FalseReg)
1338     return false;
1339
1340   unsigned ResultReg = createResultReg(TLI.getRegClassFor(DestVT));
1341   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(SelectOpc),
1342           ResultReg)
1343     .addReg(TrueReg, getKillRegState(TrueIsKill))
1344     .addReg(FalseReg, getKillRegState(FalseIsKill))
1345     .addImm(CC);
1346
1347   UpdateValueMap(I, ResultReg);
1348   return true;
1349 }
1350
1351 bool AArch64FastISel::SelectFPExt(const Instruction *I) {
1352   Value *V = I->getOperand(0);
1353   if (!I->getType()->isDoubleTy() || !V->getType()->isFloatTy())
1354     return false;
1355
1356   unsigned Op = getRegForValue(V);
1357   if (Op == 0)
1358     return false;
1359
1360   unsigned ResultReg = createResultReg(&AArch64::FPR64RegClass);
1361   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::FCVTDSr),
1362           ResultReg).addReg(Op);
1363   UpdateValueMap(I, ResultReg);
1364   return true;
1365 }
1366
1367 bool AArch64FastISel::SelectFPTrunc(const Instruction *I) {
1368   Value *V = I->getOperand(0);
1369   if (!I->getType()->isFloatTy() || !V->getType()->isDoubleTy())
1370     return false;
1371
1372   unsigned Op = getRegForValue(V);
1373   if (Op == 0)
1374     return false;
1375
1376   unsigned ResultReg = createResultReg(&AArch64::FPR32RegClass);
1377   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::FCVTSDr),
1378           ResultReg).addReg(Op);
1379   UpdateValueMap(I, ResultReg);
1380   return true;
1381 }
1382
1383 // FPToUI and FPToSI
1384 bool AArch64FastISel::SelectFPToInt(const Instruction *I, bool Signed) {
1385   MVT DestVT;
1386   if (!isTypeLegal(I->getType(), DestVT) || DestVT.isVector())
1387     return false;
1388
1389   unsigned SrcReg = getRegForValue(I->getOperand(0));
1390   if (SrcReg == 0)
1391     return false;
1392
1393   EVT SrcVT = TLI.getValueType(I->getOperand(0)->getType(), true);
1394   if (SrcVT == MVT::f128)
1395     return false;
1396
1397   unsigned Opc;
1398   if (SrcVT == MVT::f64) {
1399     if (Signed)
1400       Opc = (DestVT == MVT::i32) ? AArch64::FCVTZSUWDr : AArch64::FCVTZSUXDr;
1401     else
1402       Opc = (DestVT == MVT::i32) ? AArch64::FCVTZUUWDr : AArch64::FCVTZUUXDr;
1403   } else {
1404     if (Signed)
1405       Opc = (DestVT == MVT::i32) ? AArch64::FCVTZSUWSr : AArch64::FCVTZSUXSr;
1406     else
1407       Opc = (DestVT == MVT::i32) ? AArch64::FCVTZUUWSr : AArch64::FCVTZUUXSr;
1408   }
1409   unsigned ResultReg = createResultReg(
1410       DestVT == MVT::i32 ? &AArch64::GPR32RegClass : &AArch64::GPR64RegClass);
1411   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg)
1412       .addReg(SrcReg);
1413   UpdateValueMap(I, ResultReg);
1414   return true;
1415 }
1416
1417 bool AArch64FastISel::SelectIntToFP(const Instruction *I, bool Signed) {
1418   MVT DestVT;
1419   if (!isTypeLegal(I->getType(), DestVT) || DestVT.isVector())
1420     return false;
1421   assert ((DestVT == MVT::f32 || DestVT == MVT::f64) &&
1422           "Unexpected value type.");
1423
1424   unsigned SrcReg = getRegForValue(I->getOperand(0));
1425   if (SrcReg == 0)
1426     return false;
1427
1428   EVT SrcVT = TLI.getValueType(I->getOperand(0)->getType(), true);
1429
1430   // Handle sign-extension.
1431   if (SrcVT == MVT::i16 || SrcVT == MVT::i8 || SrcVT == MVT::i1) {
1432     SrcReg =
1433         EmitIntExt(SrcVT.getSimpleVT(), SrcReg, MVT::i32, /*isZExt*/ !Signed);
1434     if (SrcReg == 0)
1435       return false;
1436   }
1437
1438   MRI.constrainRegClass(SrcReg, SrcVT == MVT::i64 ? &AArch64::GPR64RegClass
1439                                                   : &AArch64::GPR32RegClass);
1440
1441   unsigned Opc;
1442   if (SrcVT == MVT::i64) {
1443     if (Signed)
1444       Opc = (DestVT == MVT::f32) ? AArch64::SCVTFUXSri : AArch64::SCVTFUXDri;
1445     else
1446       Opc = (DestVT == MVT::f32) ? AArch64::UCVTFUXSri : AArch64::UCVTFUXDri;
1447   } else {
1448     if (Signed)
1449       Opc = (DestVT == MVT::f32) ? AArch64::SCVTFUWSri : AArch64::SCVTFUWDri;
1450     else
1451       Opc = (DestVT == MVT::f32) ? AArch64::UCVTFUWSri : AArch64::UCVTFUWDri;
1452   }
1453
1454   unsigned ResultReg = createResultReg(TLI.getRegClassFor(DestVT));
1455   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg)
1456       .addReg(SrcReg);
1457   UpdateValueMap(I, ResultReg);
1458   return true;
1459 }
1460
1461 bool AArch64FastISel::FastLowerArguments() {
1462   if (!FuncInfo.CanLowerReturn)
1463     return false;
1464
1465   const Function *F = FuncInfo.Fn;
1466   if (F->isVarArg())
1467     return false;
1468
1469   CallingConv::ID CC = F->getCallingConv();
1470   if (CC != CallingConv::C)
1471     return false;
1472
1473   // Only handle simple cases like i1/i8/i16/i32/i64/f32/f64 of up to 8 GPR and
1474   // FPR each.
1475   unsigned GPRCnt = 0;
1476   unsigned FPRCnt = 0;
1477   unsigned Idx = 0;
1478   for (auto const &Arg : F->args()) {
1479     // The first argument is at index 1.
1480     ++Idx;
1481     if (F->getAttributes().hasAttribute(Idx, Attribute::ByVal) ||
1482         F->getAttributes().hasAttribute(Idx, Attribute::InReg) ||
1483         F->getAttributes().hasAttribute(Idx, Attribute::StructRet) ||
1484         F->getAttributes().hasAttribute(Idx, Attribute::Nest))
1485       return false;
1486
1487     Type *ArgTy = Arg.getType();
1488     if (ArgTy->isStructTy() || ArgTy->isArrayTy() || ArgTy->isVectorTy())
1489       return false;
1490
1491     EVT ArgVT = TLI.getValueType(ArgTy);
1492     if (!ArgVT.isSimple()) return false;
1493     switch (ArgVT.getSimpleVT().SimpleTy) {
1494     default: return false;
1495     case MVT::i1:
1496     case MVT::i8:
1497     case MVT::i16:
1498     case MVT::i32:
1499     case MVT::i64:
1500       ++GPRCnt;
1501       break;
1502     case MVT::f16:
1503     case MVT::f32:
1504     case MVT::f64:
1505       ++FPRCnt;
1506       break;
1507     }
1508
1509     if (GPRCnt > 8 || FPRCnt > 8)
1510       return false;
1511   }
1512
1513   static const MCPhysReg Registers[5][8] = {
1514     { AArch64::W0, AArch64::W1, AArch64::W2, AArch64::W3, AArch64::W4,
1515       AArch64::W5, AArch64::W6, AArch64::W7 },
1516     { AArch64::X0, AArch64::X1, AArch64::X2, AArch64::X3, AArch64::X4,
1517       AArch64::X5, AArch64::X6, AArch64::X7 },
1518     { AArch64::H0, AArch64::H1, AArch64::H2, AArch64::H3, AArch64::H4,
1519       AArch64::H5, AArch64::H6, AArch64::H7 },
1520     { AArch64::S0, AArch64::S1, AArch64::S2, AArch64::S3, AArch64::S4,
1521       AArch64::S5, AArch64::S6, AArch64::S7 },
1522     { AArch64::D0, AArch64::D1, AArch64::D2, AArch64::D3, AArch64::D4,
1523       AArch64::D5, AArch64::D6, AArch64::D7 }
1524   };
1525
1526   unsigned GPRIdx = 0;
1527   unsigned FPRIdx = 0;
1528   for (auto const &Arg : F->args()) {
1529     MVT VT = TLI.getSimpleValueType(Arg.getType());
1530     unsigned SrcReg;
1531     switch (VT.SimpleTy) {
1532     default: llvm_unreachable("Unexpected value type.");
1533     case MVT::i1:
1534     case MVT::i8:
1535     case MVT::i16: VT = MVT::i32; // fall-through
1536     case MVT::i32: SrcReg = Registers[0][GPRIdx++]; break;
1537     case MVT::i64: SrcReg = Registers[1][GPRIdx++]; break;
1538     case MVT::f16: SrcReg = Registers[2][FPRIdx++]; break;
1539     case MVT::f32: SrcReg = Registers[3][FPRIdx++]; break;
1540     case MVT::f64: SrcReg = Registers[4][FPRIdx++]; break;
1541     }
1542
1543     // Skip unused arguments.
1544     if (Arg.use_empty()) {
1545       UpdateValueMap(&Arg, 0);
1546       continue;
1547     }
1548
1549     const TargetRegisterClass *RC = TLI.getRegClassFor(VT);
1550     unsigned DstReg = FuncInfo.MF->addLiveIn(SrcReg, RC);
1551     // FIXME: Unfortunately it's necessary to emit a copy from the livein copy.
1552     // Without this, EmitLiveInCopies may eliminate the livein if its only
1553     // use is a bitcast (which isn't turned into an instruction).
1554     unsigned ResultReg = createResultReg(RC);
1555     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
1556             TII.get(TargetOpcode::COPY), ResultReg)
1557       .addReg(DstReg, getKillRegState(true));
1558     UpdateValueMap(&Arg, ResultReg);
1559   }
1560   return true;
1561 }
1562
1563 bool AArch64FastISel::ProcessCallArgs(CallLoweringInfo &CLI,
1564                                       SmallVectorImpl<MVT> &OutVTs,
1565                                       unsigned &NumBytes) {
1566   CallingConv::ID CC = CLI.CallConv;
1567   SmallVector<CCValAssign, 16> ArgLocs;
1568   CCState CCInfo(CC, false, *FuncInfo.MF, ArgLocs, *Context);
1569   CCInfo.AnalyzeCallOperands(OutVTs, CLI.OutFlags, CCAssignFnForCall(CC));
1570
1571   // Get a count of how many bytes are to be pushed on the stack.
1572   NumBytes = CCInfo.getNextStackOffset();
1573
1574   // Issue CALLSEQ_START
1575   unsigned AdjStackDown = TII.getCallFrameSetupOpcode();
1576   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AdjStackDown))
1577     .addImm(NumBytes);
1578
1579   // Process the args.
1580   for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
1581     CCValAssign &VA = ArgLocs[i];
1582     const Value *ArgVal = CLI.OutVals[VA.getValNo()];
1583     MVT ArgVT = OutVTs[VA.getValNo()];
1584
1585     unsigned ArgReg = getRegForValue(ArgVal);
1586     if (!ArgReg)
1587       return false;
1588
1589     // Handle arg promotion: SExt, ZExt, AExt.
1590     switch (VA.getLocInfo()) {
1591     case CCValAssign::Full:
1592       break;
1593     case CCValAssign::SExt: {
1594       MVT DestVT = VA.getLocVT();
1595       MVT SrcVT = ArgVT;
1596       ArgReg = EmitIntExt(SrcVT, ArgReg, DestVT, /*isZExt=*/false);
1597       if (!ArgReg)
1598         return false;
1599       break;
1600     }
1601     case CCValAssign::AExt:
1602     // Intentional fall-through.
1603     case CCValAssign::ZExt: {
1604       MVT DestVT = VA.getLocVT();
1605       MVT SrcVT = ArgVT;
1606       ArgReg = EmitIntExt(SrcVT, ArgReg, DestVT, /*isZExt=*/true);
1607       if (!ArgReg)
1608         return false;
1609       break;
1610     }
1611     default:
1612       llvm_unreachable("Unknown arg promotion!");
1613     }
1614
1615     // Now copy/store arg to correct locations.
1616     if (VA.isRegLoc() && !VA.needsCustom()) {
1617       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
1618               TII.get(TargetOpcode::COPY), VA.getLocReg()).addReg(ArgReg);
1619       CLI.OutRegs.push_back(VA.getLocReg());
1620     } else if (VA.needsCustom()) {
1621       // FIXME: Handle custom args.
1622       return false;
1623     } else {
1624       assert(VA.isMemLoc() && "Assuming store on stack.");
1625
1626       // Don't emit stores for undef values.
1627       if (isa<UndefValue>(ArgVal))
1628         continue;
1629
1630       // Need to store on the stack.
1631       unsigned ArgSize = (ArgVT.getSizeInBits() + 7) / 8;
1632
1633       unsigned BEAlign = 0;
1634       if (ArgSize < 8 && !Subtarget->isLittleEndian())
1635         BEAlign = 8 - ArgSize;
1636
1637       Address Addr;
1638       Addr.setKind(Address::RegBase);
1639       Addr.setReg(AArch64::SP);
1640       Addr.setOffset(VA.getLocMemOffset() + BEAlign);
1641
1642       unsigned Alignment = DL.getABITypeAlignment(ArgVal->getType());
1643       MachineMemOperand *MMO = FuncInfo.MF->getMachineMemOperand(
1644         MachinePointerInfo::getStack(Addr.getOffset()),
1645         MachineMemOperand::MOStore, ArgVT.getStoreSize(), Alignment);
1646
1647       if (!EmitStore(ArgVT, ArgReg, Addr, MMO))
1648         return false;
1649     }
1650   }
1651   return true;
1652 }
1653
1654 bool AArch64FastISel::FinishCall(CallLoweringInfo &CLI, MVT RetVT,
1655                                  unsigned NumBytes) {
1656   CallingConv::ID CC = CLI.CallConv;
1657
1658   // Issue CALLSEQ_END
1659   unsigned AdjStackUp = TII.getCallFrameDestroyOpcode();
1660   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AdjStackUp))
1661     .addImm(NumBytes).addImm(0);
1662
1663   // Now the return value.
1664   if (RetVT != MVT::isVoid) {
1665     SmallVector<CCValAssign, 16> RVLocs;
1666     CCState CCInfo(CC, false, *FuncInfo.MF, RVLocs, *Context);
1667     CCInfo.AnalyzeCallResult(RetVT, CCAssignFnForCall(CC));
1668
1669     // Only handle a single return value.
1670     if (RVLocs.size() != 1)
1671       return false;
1672
1673     // Copy all of the result registers out of their specified physreg.
1674     MVT CopyVT = RVLocs[0].getValVT();
1675     unsigned ResultReg = createResultReg(TLI.getRegClassFor(CopyVT));
1676     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
1677             TII.get(TargetOpcode::COPY), ResultReg)
1678       .addReg(RVLocs[0].getLocReg());
1679     CLI.InRegs.push_back(RVLocs[0].getLocReg());
1680
1681     CLI.ResultReg = ResultReg;
1682     CLI.NumResultRegs = 1;
1683   }
1684
1685   return true;
1686 }
1687
1688 bool AArch64FastISel::FastLowerCall(CallLoweringInfo &CLI) {
1689   CallingConv::ID CC  = CLI.CallConv;
1690   bool IsVarArg       = CLI.IsVarArg;
1691   const Value *Callee = CLI.Callee;
1692   const char *SymName = CLI.SymName;
1693
1694   CodeModel::Model CM = TM.getCodeModel();
1695   // Only support the small and large code model.
1696   if (CM != CodeModel::Small && CM != CodeModel::Large)
1697     return false;
1698
1699   // FIXME: Add large code model support for ELF.
1700   if (CM == CodeModel::Large && !Subtarget->isTargetMachO())
1701     return false;
1702
1703   // Let SDISel handle vararg functions.
1704   if (IsVarArg)
1705     return false;
1706
1707   // FIXME: Only handle *simple* calls for now.
1708   MVT RetVT;
1709   if (CLI.RetTy->isVoidTy())
1710     RetVT = MVT::isVoid;
1711   else if (!isTypeLegal(CLI.RetTy, RetVT))
1712     return false;
1713
1714   for (auto Flag : CLI.OutFlags)
1715     if (Flag.isInReg() || Flag.isSRet() || Flag.isNest() || Flag.isByVal())
1716       return false;
1717
1718   // Set up the argument vectors.
1719   SmallVector<MVT, 16> OutVTs;
1720   OutVTs.reserve(CLI.OutVals.size());
1721
1722   for (auto *Val : CLI.OutVals) {
1723     MVT VT;
1724     if (!isTypeLegal(Val->getType(), VT) &&
1725         !(VT == MVT::i1 || VT == MVT::i8 || VT == MVT::i16))
1726       return false;
1727
1728     // We don't handle vector parameters yet.
1729     if (VT.isVector() || VT.getSizeInBits() > 64)
1730       return false;
1731
1732     OutVTs.push_back(VT);
1733   }
1734
1735   Address Addr;
1736   if (!ComputeCallAddress(Callee, Addr))
1737     return false;
1738
1739   // Handle the arguments now that we've gotten them.
1740   unsigned NumBytes;
1741   if (!ProcessCallArgs(CLI, OutVTs, NumBytes))
1742     return false;
1743
1744   // Issue the call.
1745   MachineInstrBuilder MIB;
1746   if (CM == CodeModel::Small) {
1747     unsigned CallOpc = Addr.getReg() ? AArch64::BLR : AArch64::BL;
1748     MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(CallOpc));
1749     if (SymName)
1750       MIB.addExternalSymbol(SymName, 0);
1751     else if (Addr.getGlobalValue())
1752       MIB.addGlobalAddress(Addr.getGlobalValue(), 0, 0);
1753     else if (Addr.getReg())
1754       MIB.addReg(Addr.getReg());
1755     else
1756       return false;
1757   } else {
1758     unsigned CallReg = 0;
1759     if (SymName) {
1760       unsigned ADRPReg = createResultReg(&AArch64::GPR64commonRegClass);
1761       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADRP),
1762               ADRPReg)
1763         .addExternalSymbol(SymName, AArch64II::MO_GOT | AArch64II::MO_PAGE);
1764
1765       CallReg = createResultReg(&AArch64::GPR64RegClass);
1766       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::LDRXui),
1767               CallReg)
1768         .addReg(ADRPReg)
1769         .addExternalSymbol(SymName, AArch64II::MO_GOT | AArch64II::MO_PAGEOFF |
1770                            AArch64II::MO_NC);
1771     } else if (Addr.getGlobalValue()) {
1772       CallReg = AArch64MaterializeGV(Addr.getGlobalValue());
1773     } else if (Addr.getReg())
1774       CallReg = Addr.getReg();
1775
1776     if (!CallReg)
1777       return false;
1778
1779     MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
1780                   TII.get(AArch64::BLR)).addReg(CallReg);
1781   }
1782
1783   // Add implicit physical register uses to the call.
1784   for (auto Reg : CLI.OutRegs)
1785     MIB.addReg(Reg, RegState::Implicit);
1786
1787   // Add a register mask with the call-preserved registers.
1788   // Proper defs for return values will be added by setPhysRegsDeadExcept().
1789   MIB.addRegMask(TRI.getCallPreservedMask(CC));
1790
1791   CLI.Call = MIB;
1792
1793   // Finish off the call including any return values.
1794   return FinishCall(CLI, RetVT, NumBytes);
1795 }
1796
1797 bool AArch64FastISel::IsMemCpySmall(uint64_t Len, unsigned Alignment) {
1798   if (Alignment)
1799     return Len / Alignment <= 4;
1800   else
1801     return Len < 32;
1802 }
1803
1804 bool AArch64FastISel::TryEmitSmallMemCpy(Address Dest, Address Src,
1805                                          uint64_t Len, unsigned Alignment) {
1806   // Make sure we don't bloat code by inlining very large memcpy's.
1807   if (!IsMemCpySmall(Len, Alignment))
1808     return false;
1809
1810   int64_t UnscaledOffset = 0;
1811   Address OrigDest = Dest;
1812   Address OrigSrc = Src;
1813
1814   while (Len) {
1815     MVT VT;
1816     if (!Alignment || Alignment >= 8) {
1817       if (Len >= 8)
1818         VT = MVT::i64;
1819       else if (Len >= 4)
1820         VT = MVT::i32;
1821       else if (Len >= 2)
1822         VT = MVT::i16;
1823       else {
1824         VT = MVT::i8;
1825       }
1826     } else {
1827       // Bound based on alignment.
1828       if (Len >= 4 && Alignment == 4)
1829         VT = MVT::i32;
1830       else if (Len >= 2 && Alignment == 2)
1831         VT = MVT::i16;
1832       else {
1833         VT = MVT::i8;
1834       }
1835     }
1836
1837     bool RV;
1838     unsigned ResultReg;
1839     RV = EmitLoad(VT, ResultReg, Src);
1840     if (!RV)
1841       return false;
1842
1843     RV = EmitStore(VT, ResultReg, Dest);
1844     if (!RV)
1845       return false;
1846
1847     int64_t Size = VT.getSizeInBits() / 8;
1848     Len -= Size;
1849     UnscaledOffset += Size;
1850
1851     // We need to recompute the unscaled offset for each iteration.
1852     Dest.setOffset(OrigDest.getOffset() + UnscaledOffset);
1853     Src.setOffset(OrigSrc.getOffset() + UnscaledOffset);
1854   }
1855
1856   return true;
1857 }
1858
1859 /// \brief Check if it is possible to fold the condition from the XALU intrinsic
1860 /// into the user. The condition code will only be updated on success.
1861 bool AArch64FastISel::foldXALUIntrinsic(AArch64CC::CondCode &CC,
1862                                         const Instruction *I,
1863                                         const Value *Cond) {
1864   if (!isa<ExtractValueInst>(Cond))
1865     return false;
1866
1867   const auto *EV = cast<ExtractValueInst>(Cond);
1868   if (!isa<IntrinsicInst>(EV->getAggregateOperand()))
1869     return false;
1870
1871   const auto *II = cast<IntrinsicInst>(EV->getAggregateOperand());
1872   MVT RetVT;
1873   const Function *Callee = II->getCalledFunction();
1874   Type *RetTy =
1875   cast<StructType>(Callee->getReturnType())->getTypeAtIndex(0U);
1876   if (!isTypeLegal(RetTy, RetVT))
1877     return false;
1878
1879   if (RetVT != MVT::i32 && RetVT != MVT::i64)
1880     return false;
1881
1882   AArch64CC::CondCode TmpCC;
1883   switch (II->getIntrinsicID()) {
1884     default: return false;
1885     case Intrinsic::sadd_with_overflow:
1886     case Intrinsic::ssub_with_overflow: TmpCC = AArch64CC::VS; break;
1887     case Intrinsic::uadd_with_overflow: TmpCC = AArch64CC::HS; break;
1888     case Intrinsic::usub_with_overflow: TmpCC = AArch64CC::LO; break;
1889     case Intrinsic::smul_with_overflow:
1890     case Intrinsic::umul_with_overflow: TmpCC = AArch64CC::NE; break;
1891   }
1892
1893   // Check if both instructions are in the same basic block.
1894   if (II->getParent() != I->getParent())
1895     return false;
1896
1897   // Make sure nothing is in the way
1898   BasicBlock::const_iterator Start = I;
1899   BasicBlock::const_iterator End = II;
1900   for (auto Itr = std::prev(Start); Itr != End; --Itr) {
1901     // We only expect extractvalue instructions between the intrinsic and the
1902     // instruction to be selected.
1903     if (!isa<ExtractValueInst>(Itr))
1904       return false;
1905
1906     // Check that the extractvalue operand comes from the intrinsic.
1907     const auto *EVI = cast<ExtractValueInst>(Itr);
1908     if (EVI->getAggregateOperand() != II)
1909       return false;
1910   }
1911
1912   CC = TmpCC;
1913   return true;
1914 }
1915
1916 bool AArch64FastISel::FastLowerIntrinsicCall(const IntrinsicInst *II) {
1917   // FIXME: Handle more intrinsics.
1918   switch (II->getIntrinsicID()) {
1919   default: return false;
1920   case Intrinsic::frameaddress: {
1921     MachineFrameInfo *MFI = FuncInfo.MF->getFrameInfo();
1922     MFI->setFrameAddressIsTaken(true);
1923
1924     const AArch64RegisterInfo *RegInfo =
1925         static_cast<const AArch64RegisterInfo *>(
1926             TM.getSubtargetImpl()->getRegisterInfo());
1927     unsigned FramePtr = RegInfo->getFrameRegister(*(FuncInfo.MF));
1928     unsigned SrcReg = FramePtr;
1929
1930     // Recursively load frame address
1931     // ldr x0, [fp]
1932     // ldr x0, [x0]
1933     // ldr x0, [x0]
1934     // ...
1935     unsigned DestReg;
1936     unsigned Depth = cast<ConstantInt>(II->getOperand(0))->getZExtValue();
1937     while (Depth--) {
1938       DestReg = createResultReg(&AArch64::GPR64RegClass);
1939       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
1940               TII.get(AArch64::LDRXui), DestReg)
1941         .addReg(SrcReg).addImm(0);
1942       SrcReg = DestReg;
1943     }
1944
1945     UpdateValueMap(II, SrcReg);
1946     return true;
1947   }
1948   case Intrinsic::memcpy:
1949   case Intrinsic::memmove: {
1950     const auto *MTI = cast<MemTransferInst>(II);
1951     // Don't handle volatile.
1952     if (MTI->isVolatile())
1953       return false;
1954
1955     // Disable inlining for memmove before calls to ComputeAddress.  Otherwise,
1956     // we would emit dead code because we don't currently handle memmoves.
1957     bool IsMemCpy = (II->getIntrinsicID() == Intrinsic::memcpy);
1958     if (isa<ConstantInt>(MTI->getLength()) && IsMemCpy) {
1959       // Small memcpy's are common enough that we want to do them without a call
1960       // if possible.
1961       uint64_t Len = cast<ConstantInt>(MTI->getLength())->getZExtValue();
1962       unsigned Alignment = MTI->getAlignment();
1963       if (IsMemCpySmall(Len, Alignment)) {
1964         Address Dest, Src;
1965         if (!ComputeAddress(MTI->getRawDest(), Dest) ||
1966             !ComputeAddress(MTI->getRawSource(), Src))
1967           return false;
1968         if (TryEmitSmallMemCpy(Dest, Src, Len, Alignment))
1969           return true;
1970       }
1971     }
1972
1973     if (!MTI->getLength()->getType()->isIntegerTy(64))
1974       return false;
1975
1976     if (MTI->getSourceAddressSpace() > 255 || MTI->getDestAddressSpace() > 255)
1977       // Fast instruction selection doesn't support the special
1978       // address spaces.
1979       return false;
1980
1981     const char *IntrMemName = isa<MemCpyInst>(II) ? "memcpy" : "memmove";
1982     return LowerCallTo(II, IntrMemName, II->getNumArgOperands() - 2);
1983   }
1984   case Intrinsic::memset: {
1985     const MemSetInst *MSI = cast<MemSetInst>(II);
1986     // Don't handle volatile.
1987     if (MSI->isVolatile())
1988       return false;
1989
1990     if (!MSI->getLength()->getType()->isIntegerTy(64))
1991       return false;
1992
1993     if (MSI->getDestAddressSpace() > 255)
1994       // Fast instruction selection doesn't support the special
1995       // address spaces.
1996       return false;
1997
1998     return LowerCallTo(II, "memset", II->getNumArgOperands() - 2);
1999   }
2000   case Intrinsic::trap: {
2001     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::BRK))
2002         .addImm(1);
2003     return true;
2004   }
2005   case Intrinsic::sqrt: {
2006     Type *RetTy = II->getCalledFunction()->getReturnType();
2007
2008     MVT VT;
2009     if (!isTypeLegal(RetTy, VT))
2010       return false;
2011
2012     unsigned Op0Reg = getRegForValue(II->getOperand(0));
2013     if (!Op0Reg)
2014       return false;
2015     bool Op0IsKill = hasTrivialKill(II->getOperand(0));
2016
2017     unsigned ResultReg = FastEmit_r(VT, VT, ISD::FSQRT, Op0Reg, Op0IsKill);
2018     if (!ResultReg)
2019       return false;
2020
2021     UpdateValueMap(II, ResultReg);
2022     return true;
2023   }
2024   case Intrinsic::sadd_with_overflow:
2025   case Intrinsic::uadd_with_overflow:
2026   case Intrinsic::ssub_with_overflow:
2027   case Intrinsic::usub_with_overflow:
2028   case Intrinsic::smul_with_overflow:
2029   case Intrinsic::umul_with_overflow: {
2030     // This implements the basic lowering of the xalu with overflow intrinsics.
2031     const Function *Callee = II->getCalledFunction();
2032     auto *Ty = cast<StructType>(Callee->getReturnType());
2033     Type *RetTy = Ty->getTypeAtIndex(0U);
2034     Type *CondTy = Ty->getTypeAtIndex(1);
2035
2036     MVT VT;
2037     if (!isTypeLegal(RetTy, VT))
2038       return false;
2039
2040     if (VT != MVT::i32 && VT != MVT::i64)
2041       return false;
2042
2043     const Value *LHS = II->getArgOperand(0);
2044     const Value *RHS = II->getArgOperand(1);
2045     // Canonicalize immediate to the RHS.
2046     if (isa<ConstantInt>(LHS) && !isa<ConstantInt>(RHS) &&
2047         isCommutativeIntrinsic(II))
2048       std::swap(LHS, RHS);
2049
2050     unsigned LHSReg = getRegForValue(LHS);
2051     if (!LHSReg)
2052       return false;
2053     bool LHSIsKill = hasTrivialKill(LHS);
2054
2055     // Check if the immediate can be encoded in the instruction and if we should
2056     // invert the instruction (adds -> subs) to handle negative immediates.
2057     bool UseImm = false;
2058     bool UseInverse = false;
2059     uint64_t Imm = 0;
2060     if (const auto *C = dyn_cast<ConstantInt>(RHS)) {
2061       if (C->isNegative()) {
2062         UseInverse = true;
2063         Imm = -(C->getSExtValue());
2064       } else
2065         Imm = C->getZExtValue();
2066
2067       if (isUInt<12>(Imm))
2068         UseImm = true;
2069
2070       UseInverse = UseImm && UseInverse;
2071     }
2072
2073     static const unsigned OpcTable[2][2][2] = {
2074       { {AArch64::ADDSWrr, AArch64::ADDSXrr},
2075         {AArch64::ADDSWri, AArch64::ADDSXri} },
2076       { {AArch64::SUBSWrr, AArch64::SUBSXrr},
2077         {AArch64::SUBSWri, AArch64::SUBSXri} }
2078     };
2079     unsigned Opc = 0;
2080     unsigned MulReg = 0;
2081     unsigned RHSReg = 0;
2082     bool RHSIsKill = false;
2083     AArch64CC::CondCode CC = AArch64CC::Invalid;
2084     bool Is64Bit = VT == MVT::i64;
2085     switch (II->getIntrinsicID()) {
2086     default: llvm_unreachable("Unexpected intrinsic!");
2087     case Intrinsic::sadd_with_overflow:
2088       Opc = OpcTable[UseInverse][UseImm][Is64Bit]; CC = AArch64CC::VS; break;
2089     case Intrinsic::uadd_with_overflow:
2090       Opc = OpcTable[UseInverse][UseImm][Is64Bit]; CC = AArch64CC::HS; break;
2091     case Intrinsic::ssub_with_overflow:
2092       Opc = OpcTable[!UseInverse][UseImm][Is64Bit]; CC = AArch64CC::VS; break;
2093     case Intrinsic::usub_with_overflow:
2094       Opc = OpcTable[!UseInverse][UseImm][Is64Bit]; CC = AArch64CC::LO; break;
2095     case Intrinsic::smul_with_overflow: {
2096       CC = AArch64CC::NE;
2097       RHSReg = getRegForValue(RHS);
2098       if (!RHSReg)
2099         return false;
2100       RHSIsKill = hasTrivialKill(RHS);
2101
2102       if (VT == MVT::i32) {
2103         MulReg = Emit_SMULL_rr(MVT::i64, LHSReg, LHSIsKill, RHSReg, RHSIsKill);
2104         unsigned ShiftReg = Emit_LSR_ri(MVT::i64, MulReg, false, 32);
2105         MulReg = FastEmitInst_extractsubreg(VT, MulReg, /*IsKill=*/true,
2106                                             AArch64::sub_32);
2107         ShiftReg = FastEmitInst_extractsubreg(VT, ShiftReg, /*IsKill=*/true,
2108                                               AArch64::sub_32);
2109         unsigned CmpReg = createResultReg(TLI.getRegClassFor(VT));
2110         BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2111                 TII.get(AArch64::SUBSWrs), CmpReg)
2112           .addReg(ShiftReg, getKillRegState(true))
2113           .addReg(MulReg, getKillRegState(false))
2114           .addImm(159); // 159 <-> asr #31
2115       } else {
2116         assert(VT == MVT::i64 && "Unexpected value type.");
2117         MulReg = Emit_MUL_rr(VT, LHSReg, LHSIsKill, RHSReg, RHSIsKill);
2118         unsigned SMULHReg = FastEmit_rr(VT, VT, ISD::MULHS, LHSReg, LHSIsKill,
2119                                         RHSReg, RHSIsKill);
2120         unsigned CmpReg = createResultReg(TLI.getRegClassFor(VT));
2121         BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2122                 TII.get(AArch64::SUBSXrs), CmpReg)
2123           .addReg(SMULHReg, getKillRegState(true))
2124           .addReg(MulReg, getKillRegState(false))
2125           .addImm(191); // 191 <-> asr #63
2126       }
2127       break;
2128     }
2129     case Intrinsic::umul_with_overflow: {
2130       CC = AArch64CC::NE;
2131       RHSReg = getRegForValue(RHS);
2132       if (!RHSReg)
2133         return false;
2134       RHSIsKill = hasTrivialKill(RHS);
2135
2136       if (VT == MVT::i32) {
2137         MulReg = Emit_UMULL_rr(MVT::i64, LHSReg, LHSIsKill, RHSReg, RHSIsKill);
2138         unsigned CmpReg = createResultReg(TLI.getRegClassFor(MVT::i64));
2139         BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2140                 TII.get(AArch64::SUBSXrs), CmpReg)
2141           .addReg(AArch64::XZR, getKillRegState(true))
2142           .addReg(MulReg, getKillRegState(false))
2143           .addImm(96); // 96 <-> lsr #32
2144         MulReg = FastEmitInst_extractsubreg(VT, MulReg, /*IsKill=*/true,
2145                                             AArch64::sub_32);
2146       } else {
2147         assert(VT == MVT::i64 && "Unexpected value type.");
2148         MulReg = Emit_MUL_rr(VT, LHSReg, LHSIsKill, RHSReg, RHSIsKill);
2149         unsigned UMULHReg = FastEmit_rr(VT, VT, ISD::MULHU, LHSReg, LHSIsKill,
2150                                         RHSReg, RHSIsKill);
2151         unsigned CmpReg = createResultReg(TLI.getRegClassFor(VT));
2152         BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2153                 TII.get(AArch64::SUBSXrr), CmpReg)
2154         .addReg(AArch64::XZR, getKillRegState(true))
2155         .addReg(UMULHReg, getKillRegState(false));
2156       }
2157       break;
2158     }
2159     }
2160
2161     if (!UseImm) {
2162       RHSReg = getRegForValue(RHS);
2163       if (!RHSReg)
2164         return false;
2165       RHSIsKill = hasTrivialKill(RHS);
2166     }
2167
2168     unsigned ResultReg = createResultReg(TLI.getRegClassFor(VT));
2169     if (Opc) {
2170       MachineInstrBuilder MIB;
2171       MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc),
2172                     ResultReg)
2173               .addReg(LHSReg, getKillRegState(LHSIsKill));
2174       if (UseImm) {
2175         MIB.addImm(Imm);
2176         MIB.addImm(0);
2177       } else
2178         MIB.addReg(RHSReg, getKillRegState(RHSIsKill));
2179     }
2180     else
2181       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2182               TII.get(TargetOpcode::COPY), ResultReg)
2183         .addReg(MulReg);
2184
2185     unsigned ResultReg2 = FuncInfo.CreateRegs(CondTy);
2186     assert((ResultReg+1) == ResultReg2 && "Nonconsecutive result registers.");
2187     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::CSINCWr),
2188             ResultReg2)
2189       .addReg(AArch64::WZR, getKillRegState(true))
2190       .addReg(AArch64::WZR, getKillRegState(true))
2191       .addImm(getInvertedCondCode(CC));
2192
2193     UpdateValueMap(II, ResultReg, 2);
2194     return true;
2195   }
2196   }
2197   return false;
2198 }
2199
2200 bool AArch64FastISel::SelectRet(const Instruction *I) {
2201   const ReturnInst *Ret = cast<ReturnInst>(I);
2202   const Function &F = *I->getParent()->getParent();
2203
2204   if (!FuncInfo.CanLowerReturn)
2205     return false;
2206
2207   if (F.isVarArg())
2208     return false;
2209
2210   // Build a list of return value registers.
2211   SmallVector<unsigned, 4> RetRegs;
2212
2213   if (Ret->getNumOperands() > 0) {
2214     CallingConv::ID CC = F.getCallingConv();
2215     SmallVector<ISD::OutputArg, 4> Outs;
2216     GetReturnInfo(F.getReturnType(), F.getAttributes(), Outs, TLI);
2217
2218     // Analyze operands of the call, assigning locations to each operand.
2219     SmallVector<CCValAssign, 16> ValLocs;
2220     CCState CCInfo(CC, F.isVarArg(), *FuncInfo.MF, ValLocs, I->getContext());
2221     CCAssignFn *RetCC = CC == CallingConv::WebKit_JS ? RetCC_AArch64_WebKit_JS
2222                                                      : RetCC_AArch64_AAPCS;
2223     CCInfo.AnalyzeReturn(Outs, RetCC);
2224
2225     // Only handle a single return value for now.
2226     if (ValLocs.size() != 1)
2227       return false;
2228
2229     CCValAssign &VA = ValLocs[0];
2230     const Value *RV = Ret->getOperand(0);
2231
2232     // Don't bother handling odd stuff for now.
2233     if (VA.getLocInfo() != CCValAssign::Full)
2234       return false;
2235     // Only handle register returns for now.
2236     if (!VA.isRegLoc())
2237       return false;
2238     unsigned Reg = getRegForValue(RV);
2239     if (Reg == 0)
2240       return false;
2241
2242     unsigned SrcReg = Reg + VA.getValNo();
2243     unsigned DestReg = VA.getLocReg();
2244     // Avoid a cross-class copy. This is very unlikely.
2245     if (!MRI.getRegClass(SrcReg)->contains(DestReg))
2246       return false;
2247
2248     EVT RVEVT = TLI.getValueType(RV->getType());
2249     if (!RVEVT.isSimple())
2250       return false;
2251
2252     // Vectors (of > 1 lane) in big endian need tricky handling.
2253     if (RVEVT.isVector() && RVEVT.getVectorNumElements() > 1)
2254       return false;
2255
2256     MVT RVVT = RVEVT.getSimpleVT();
2257     if (RVVT == MVT::f128)
2258       return false;
2259     MVT DestVT = VA.getValVT();
2260     // Special handling for extended integers.
2261     if (RVVT != DestVT) {
2262       if (RVVT != MVT::i1 && RVVT != MVT::i8 && RVVT != MVT::i16)
2263         return false;
2264
2265       if (!Outs[0].Flags.isZExt() && !Outs[0].Flags.isSExt())
2266         return false;
2267
2268       bool isZExt = Outs[0].Flags.isZExt();
2269       SrcReg = EmitIntExt(RVVT, SrcReg, DestVT, isZExt);
2270       if (SrcReg == 0)
2271         return false;
2272     }
2273
2274     // Make the copy.
2275     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2276             TII.get(TargetOpcode::COPY), DestReg).addReg(SrcReg);
2277
2278     // Add register to return instruction.
2279     RetRegs.push_back(VA.getLocReg());
2280   }
2281
2282   MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2283                                     TII.get(AArch64::RET_ReallyLR));
2284   for (unsigned i = 0, e = RetRegs.size(); i != e; ++i)
2285     MIB.addReg(RetRegs[i], RegState::Implicit);
2286   return true;
2287 }
2288
2289 bool AArch64FastISel::SelectTrunc(const Instruction *I) {
2290   Type *DestTy = I->getType();
2291   Value *Op = I->getOperand(0);
2292   Type *SrcTy = Op->getType();
2293
2294   EVT SrcEVT = TLI.getValueType(SrcTy, true);
2295   EVT DestEVT = TLI.getValueType(DestTy, true);
2296   if (!SrcEVT.isSimple())
2297     return false;
2298   if (!DestEVT.isSimple())
2299     return false;
2300
2301   MVT SrcVT = SrcEVT.getSimpleVT();
2302   MVT DestVT = DestEVT.getSimpleVT();
2303
2304   if (SrcVT != MVT::i64 && SrcVT != MVT::i32 && SrcVT != MVT::i16 &&
2305       SrcVT != MVT::i8)
2306     return false;
2307   if (DestVT != MVT::i32 && DestVT != MVT::i16 && DestVT != MVT::i8 &&
2308       DestVT != MVT::i1)
2309     return false;
2310
2311   unsigned SrcReg = getRegForValue(Op);
2312   if (!SrcReg)
2313     return false;
2314
2315   // If we're truncating from i64 to a smaller non-legal type then generate an
2316   // AND.  Otherwise, we know the high bits are undefined and a truncate doesn't
2317   // generate any code.
2318   if (SrcVT == MVT::i64) {
2319     uint64_t Mask = 0;
2320     switch (DestVT.SimpleTy) {
2321     default:
2322       // Trunc i64 to i32 is handled by the target-independent fast-isel.
2323       return false;
2324     case MVT::i1:
2325       Mask = 0x1;
2326       break;
2327     case MVT::i8:
2328       Mask = 0xff;
2329       break;
2330     case MVT::i16:
2331       Mask = 0xffff;
2332       break;
2333     }
2334     // Issue an extract_subreg to get the lower 32-bits.
2335     unsigned Reg32 = FastEmitInst_extractsubreg(MVT::i32, SrcReg, /*Kill=*/true,
2336                                                 AArch64::sub_32);
2337     MRI.constrainRegClass(Reg32, &AArch64::GPR32RegClass);
2338     // Create the AND instruction which performs the actual truncation.
2339     unsigned ANDReg = createResultReg(&AArch64::GPR32spRegClass);
2340     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ANDWri),
2341             ANDReg)
2342         .addReg(Reg32)
2343         .addImm(AArch64_AM::encodeLogicalImmediate(Mask, 32));
2344     SrcReg = ANDReg;
2345   }
2346
2347   UpdateValueMap(I, SrcReg);
2348   return true;
2349 }
2350
2351 unsigned AArch64FastISel::Emiti1Ext(unsigned SrcReg, MVT DestVT, bool isZExt) {
2352   assert((DestVT == MVT::i8 || DestVT == MVT::i16 || DestVT == MVT::i32 ||
2353           DestVT == MVT::i64) &&
2354          "Unexpected value type.");
2355   // Handle i8 and i16 as i32.
2356   if (DestVT == MVT::i8 || DestVT == MVT::i16)
2357     DestVT = MVT::i32;
2358
2359   if (isZExt) {
2360     MRI.constrainRegClass(SrcReg, &AArch64::GPR32RegClass);
2361     unsigned ResultReg = createResultReg(&AArch64::GPR32spRegClass);
2362     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ANDWri),
2363             ResultReg)
2364         .addReg(SrcReg)
2365         .addImm(AArch64_AM::encodeLogicalImmediate(1, 32));
2366
2367     if (DestVT == MVT::i64) {
2368       // We're ZExt i1 to i64.  The ANDWri Wd, Ws, #1 implicitly clears the
2369       // upper 32 bits.  Emit a SUBREG_TO_REG to extend from Wd to Xd.
2370       unsigned Reg64 = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
2371       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2372               TII.get(AArch64::SUBREG_TO_REG), Reg64)
2373           .addImm(0)
2374           .addReg(ResultReg)
2375           .addImm(AArch64::sub_32);
2376       ResultReg = Reg64;
2377     }
2378     return ResultReg;
2379   } else {
2380     if (DestVT == MVT::i64) {
2381       // FIXME: We're SExt i1 to i64.
2382       return 0;
2383     }
2384     unsigned ResultReg = createResultReg(&AArch64::GPR32RegClass);
2385     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::SBFMWri),
2386             ResultReg)
2387         .addReg(SrcReg)
2388         .addImm(0)
2389         .addImm(0);
2390     return ResultReg;
2391   }
2392 }
2393
2394 unsigned AArch64FastISel::Emit_MUL_rr(MVT RetVT, unsigned Op0, bool Op0IsKill,
2395                                       unsigned Op1, bool Op1IsKill) {
2396   unsigned Opc, ZReg;
2397   switch (RetVT.SimpleTy) {
2398   default: return 0;
2399   case MVT::i8:
2400   case MVT::i16:
2401   case MVT::i32:
2402     RetVT = MVT::i32;
2403     Opc = AArch64::MADDWrrr; ZReg = AArch64::WZR; break;
2404   case MVT::i64:
2405     Opc = AArch64::MADDXrrr; ZReg = AArch64::XZR; break;
2406   }
2407
2408   // Create the base instruction, then add the operands.
2409   unsigned ResultReg = createResultReg(TLI.getRegClassFor(RetVT));
2410   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg)
2411     .addReg(Op0, getKillRegState(Op0IsKill))
2412     .addReg(Op1, getKillRegState(Op1IsKill))
2413     .addReg(ZReg, getKillRegState(true));
2414
2415   return ResultReg;
2416 }
2417
2418 unsigned AArch64FastISel::Emit_SMULL_rr(MVT RetVT, unsigned Op0, bool Op0IsKill,
2419                                         unsigned Op1, bool Op1IsKill) {
2420   if (RetVT != MVT::i64)
2421     return 0;
2422
2423   // Create the base instruction, then add the operands.
2424   unsigned ResultReg = createResultReg(&AArch64::GPR64RegClass);
2425   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::SMADDLrrr),
2426           ResultReg)
2427     .addReg(Op0, getKillRegState(Op0IsKill))
2428     .addReg(Op1, getKillRegState(Op1IsKill))
2429     .addReg(AArch64::XZR, getKillRegState(true));
2430
2431   return ResultReg;
2432 }
2433
2434 unsigned AArch64FastISel::Emit_UMULL_rr(MVT RetVT, unsigned Op0, bool Op0IsKill,
2435                                         unsigned Op1, bool Op1IsKill) {
2436   if (RetVT != MVT::i64)
2437     return 0;
2438
2439   // Create the base instruction, then add the operands.
2440   unsigned ResultReg = createResultReg(&AArch64::GPR64RegClass);
2441   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::UMADDLrrr),
2442           ResultReg)
2443     .addReg(Op0, getKillRegState(Op0IsKill))
2444     .addReg(Op1, getKillRegState(Op1IsKill))
2445     .addReg(AArch64::XZR, getKillRegState(true));
2446
2447   return ResultReg;
2448 }
2449
2450 unsigned AArch64FastISel::Emit_LSL_ri(MVT RetVT, unsigned Op0, bool Op0IsKill,
2451                                       uint64_t Shift) {
2452   unsigned Opc, ImmR, ImmS;
2453   switch (RetVT.SimpleTy) {
2454   default: return 0;
2455   case MVT::i8:
2456     Opc = AArch64::UBFMWri; ImmR = -Shift % 32; ImmS =  7 - Shift; break;
2457   case MVT::i16:
2458     Opc = AArch64::UBFMWri; ImmR = -Shift % 32; ImmS = 15 - Shift; break;
2459   case MVT::i32:
2460     Opc = AArch64::UBFMWri; ImmR = -Shift % 32; ImmS = 31 - Shift; break;
2461   case MVT::i64:
2462     Opc = AArch64::UBFMXri; ImmR = -Shift % 64; ImmS = 63 - Shift; break;
2463   }
2464
2465   RetVT.SimpleTy = std::max(MVT::i32, RetVT.SimpleTy);
2466   return FastEmitInst_rii(Opc, TLI.getRegClassFor(RetVT), Op0, Op0IsKill, ImmR,
2467                           ImmS);
2468 }
2469
2470 unsigned AArch64FastISel::Emit_LSR_ri(MVT RetVT, unsigned Op0, bool Op0IsKill,
2471                                       uint64_t Shift) {
2472   unsigned Opc, ImmS;
2473   switch (RetVT.SimpleTy) {
2474   default: return 0;
2475   case MVT::i8:  Opc = AArch64::UBFMWri; ImmS =  7; break;
2476   case MVT::i16: Opc = AArch64::UBFMWri; ImmS = 15; break;
2477   case MVT::i32: Opc = AArch64::UBFMWri; ImmS = 31; break;
2478   case MVT::i64: Opc = AArch64::UBFMXri; ImmS = 63; break;
2479   }
2480
2481   RetVT.SimpleTy = std::max(MVT::i32, RetVT.SimpleTy);
2482   return FastEmitInst_rii(Opc, TLI.getRegClassFor(RetVT), Op0, Op0IsKill, Shift,
2483                           ImmS);
2484 }
2485
2486 unsigned AArch64FastISel::Emit_ASR_ri(MVT RetVT, unsigned Op0, bool Op0IsKill,
2487                                       uint64_t Shift) {
2488   unsigned Opc, ImmS;
2489   switch (RetVT.SimpleTy) {
2490   default: return 0;
2491   case MVT::i8:  Opc = AArch64::SBFMWri; ImmS =  7; break;
2492   case MVT::i16: Opc = AArch64::SBFMWri; ImmS = 15; break;
2493   case MVT::i32: Opc = AArch64::SBFMWri; ImmS = 31; break;
2494   case MVT::i64: Opc = AArch64::SBFMXri; ImmS = 63; break;
2495   }
2496
2497   RetVT.SimpleTy = std::max(MVT::i32, RetVT.SimpleTy);
2498   return FastEmitInst_rii(Opc, TLI.getRegClassFor(RetVT), Op0, Op0IsKill, Shift,
2499                           ImmS);
2500 }
2501
2502 unsigned AArch64FastISel::EmitIntExt(MVT SrcVT, unsigned SrcReg, MVT DestVT,
2503                                      bool isZExt) {
2504   assert(DestVT != MVT::i1 && "ZeroExt/SignExt an i1?");
2505
2506   // FastISel does not have plumbing to deal with extensions where the SrcVT or
2507   // DestVT are odd things, so test to make sure that they are both types we can
2508   // handle (i1/i8/i16/i32 for SrcVT and i8/i16/i32/i64 for DestVT), otherwise
2509   // bail out to SelectionDAG.
2510   if (((DestVT != MVT::i8) && (DestVT != MVT::i16) &&
2511        (DestVT != MVT::i32) && (DestVT != MVT::i64)) ||
2512       ((SrcVT !=  MVT::i1) && (SrcVT !=  MVT::i8) &&
2513        (SrcVT !=  MVT::i16) && (SrcVT !=  MVT::i32)))
2514     return 0;
2515
2516   unsigned Opc;
2517   unsigned Imm = 0;
2518
2519   switch (SrcVT.SimpleTy) {
2520   default:
2521     return 0;
2522   case MVT::i1:
2523     return Emiti1Ext(SrcReg, DestVT, isZExt);
2524   case MVT::i8:
2525     if (DestVT == MVT::i64)
2526       Opc = isZExt ? AArch64::UBFMXri : AArch64::SBFMXri;
2527     else
2528       Opc = isZExt ? AArch64::UBFMWri : AArch64::SBFMWri;
2529     Imm = 7;
2530     break;
2531   case MVT::i16:
2532     if (DestVT == MVT::i64)
2533       Opc = isZExt ? AArch64::UBFMXri : AArch64::SBFMXri;
2534     else
2535       Opc = isZExt ? AArch64::UBFMWri : AArch64::SBFMWri;
2536     Imm = 15;
2537     break;
2538   case MVT::i32:
2539     assert(DestVT == MVT::i64 && "IntExt i32 to i32?!?");
2540     Opc = isZExt ? AArch64::UBFMXri : AArch64::SBFMXri;
2541     Imm = 31;
2542     break;
2543   }
2544
2545   // Handle i8 and i16 as i32.
2546   if (DestVT == MVT::i8 || DestVT == MVT::i16)
2547     DestVT = MVT::i32;
2548   else if (DestVT == MVT::i64) {
2549     unsigned Src64 = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
2550     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2551             TII.get(AArch64::SUBREG_TO_REG), Src64)
2552         .addImm(0)
2553         .addReg(SrcReg)
2554         .addImm(AArch64::sub_32);
2555     SrcReg = Src64;
2556   }
2557
2558   unsigned ResultReg = createResultReg(TLI.getRegClassFor(DestVT));
2559   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg)
2560       .addReg(SrcReg)
2561       .addImm(0)
2562       .addImm(Imm);
2563
2564   return ResultReg;
2565 }
2566
2567 bool AArch64FastISel::SelectIntExt(const Instruction *I) {
2568   // On ARM, in general, integer casts don't involve legal types; this code
2569   // handles promotable integers.  The high bits for a type smaller than
2570   // the register size are assumed to be undefined.
2571   Type *DestTy = I->getType();
2572   Value *Src = I->getOperand(0);
2573   Type *SrcTy = Src->getType();
2574
2575   bool isZExt = isa<ZExtInst>(I);
2576   unsigned SrcReg = getRegForValue(Src);
2577   if (!SrcReg)
2578     return false;
2579
2580   EVT SrcEVT = TLI.getValueType(SrcTy, true);
2581   EVT DestEVT = TLI.getValueType(DestTy, true);
2582   if (!SrcEVT.isSimple())
2583     return false;
2584   if (!DestEVT.isSimple())
2585     return false;
2586
2587   MVT SrcVT = SrcEVT.getSimpleVT();
2588   MVT DestVT = DestEVT.getSimpleVT();
2589   unsigned ResultReg = 0;
2590
2591   // Check if it is an argument and if it is already zero/sign-extended.
2592   if (const auto *Arg = dyn_cast<Argument>(Src)) {
2593     if ((isZExt && Arg->hasZExtAttr()) || (!isZExt && Arg->hasSExtAttr())) {
2594       if (DestVT == MVT::i64) {
2595         ResultReg = createResultReg(TLI.getRegClassFor(DestVT));
2596         BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2597                 TII.get(AArch64::SUBREG_TO_REG), ResultReg)
2598           .addImm(0)
2599           .addReg(SrcReg)
2600           .addImm(AArch64::sub_32);
2601       } else
2602         ResultReg = SrcReg;
2603     }
2604   }
2605
2606   if (!ResultReg)
2607     ResultReg = EmitIntExt(SrcVT, SrcReg, DestVT, isZExt);
2608
2609   if (!ResultReg)
2610     return false;
2611
2612   UpdateValueMap(I, ResultReg);
2613   return true;
2614 }
2615
2616 bool AArch64FastISel::SelectRem(const Instruction *I, unsigned ISDOpcode) {
2617   EVT DestEVT = TLI.getValueType(I->getType(), true);
2618   if (!DestEVT.isSimple())
2619     return false;
2620
2621   MVT DestVT = DestEVT.getSimpleVT();
2622   if (DestVT != MVT::i64 && DestVT != MVT::i32)
2623     return false;
2624
2625   unsigned DivOpc;
2626   bool is64bit = (DestVT == MVT::i64);
2627   switch (ISDOpcode) {
2628   default:
2629     return false;
2630   case ISD::SREM:
2631     DivOpc = is64bit ? AArch64::SDIVXr : AArch64::SDIVWr;
2632     break;
2633   case ISD::UREM:
2634     DivOpc = is64bit ? AArch64::UDIVXr : AArch64::UDIVWr;
2635     break;
2636   }
2637   unsigned MSubOpc = is64bit ? AArch64::MSUBXrrr : AArch64::MSUBWrrr;
2638   unsigned Src0Reg = getRegForValue(I->getOperand(0));
2639   if (!Src0Reg)
2640     return false;
2641
2642   unsigned Src1Reg = getRegForValue(I->getOperand(1));
2643   if (!Src1Reg)
2644     return false;
2645
2646   unsigned QuotReg = createResultReg(TLI.getRegClassFor(DestVT));
2647   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(DivOpc), QuotReg)
2648       .addReg(Src0Reg)
2649       .addReg(Src1Reg);
2650   // The remainder is computed as numerator - (quotient * denominator) using the
2651   // MSUB instruction.
2652   unsigned ResultReg = createResultReg(TLI.getRegClassFor(DestVT));
2653   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(MSubOpc), ResultReg)
2654       .addReg(QuotReg)
2655       .addReg(Src1Reg)
2656       .addReg(Src0Reg);
2657   UpdateValueMap(I, ResultReg);
2658   return true;
2659 }
2660
2661 bool AArch64FastISel::SelectMul(const Instruction *I) {
2662   EVT SrcEVT = TLI.getValueType(I->getOperand(0)->getType(), true);
2663   if (!SrcEVT.isSimple())
2664     return false;
2665   MVT SrcVT = SrcEVT.getSimpleVT();
2666
2667   // Must be simple value type.  Don't handle vectors.
2668   if (SrcVT != MVT::i64 && SrcVT != MVT::i32 && SrcVT != MVT::i16 &&
2669       SrcVT != MVT::i8)
2670     return false;
2671
2672   unsigned Src0Reg = getRegForValue(I->getOperand(0));
2673   if (!Src0Reg)
2674     return false;
2675   bool Src0IsKill = hasTrivialKill(I->getOperand(0));
2676
2677   unsigned Src1Reg = getRegForValue(I->getOperand(1));
2678   if (!Src1Reg)
2679     return false;
2680   bool Src1IsKill = hasTrivialKill(I->getOperand(1));
2681
2682   unsigned ResultReg =
2683     Emit_MUL_rr(SrcVT, Src0Reg, Src0IsKill, Src1Reg, Src1IsKill);
2684
2685   if (!ResultReg)
2686     return false;
2687
2688   UpdateValueMap(I, ResultReg);
2689   return true;
2690 }
2691
2692 bool AArch64FastISel::SelectShift(const Instruction *I, bool IsLeftShift,
2693                                   bool IsArithmetic) {
2694   EVT RetEVT = TLI.getValueType(I->getType(), true);
2695   if (!RetEVT.isSimple())
2696     return false;
2697   MVT RetVT = RetEVT.getSimpleVT();
2698
2699   if (!isa<ConstantInt>(I->getOperand(1)))
2700     return false;
2701
2702   unsigned Op0Reg = getRegForValue(I->getOperand(0));
2703   if (!Op0Reg)
2704     return false;
2705   bool Op0IsKill = hasTrivialKill(I->getOperand(0));
2706
2707   uint64_t ShiftVal = cast<ConstantInt>(I->getOperand(1))->getZExtValue();
2708
2709   unsigned ResultReg;
2710   if (IsLeftShift)
2711     ResultReg = Emit_LSL_ri(RetVT, Op0Reg, Op0IsKill, ShiftVal);
2712   else {
2713     if (IsArithmetic)
2714       ResultReg = Emit_ASR_ri(RetVT, Op0Reg, Op0IsKill, ShiftVal);
2715     else
2716       ResultReg = Emit_LSR_ri(RetVT, Op0Reg, Op0IsKill, ShiftVal);
2717   }
2718
2719   if (!ResultReg)
2720     return false;
2721
2722   UpdateValueMap(I, ResultReg);
2723   return true;
2724 }
2725
2726 bool AArch64FastISel::SelectBitCast(const Instruction *I) {
2727   MVT RetVT, SrcVT;
2728
2729   if (!isTypeLegal(I->getOperand(0)->getType(), SrcVT))
2730     return false;
2731   if (!isTypeLegal(I->getType(), RetVT))
2732     return false;
2733
2734   unsigned Opc;
2735   if (RetVT == MVT::f32 && SrcVT == MVT::i32)
2736     Opc = AArch64::FMOVWSr;
2737   else if (RetVT == MVT::f64 && SrcVT == MVT::i64)
2738     Opc = AArch64::FMOVXDr;
2739   else if (RetVT == MVT::i32 && SrcVT == MVT::f32)
2740     Opc = AArch64::FMOVSWr;
2741   else if (RetVT == MVT::i64 && SrcVT == MVT::f64)
2742     Opc = AArch64::FMOVDXr;
2743   else
2744     return false;
2745
2746   unsigned Op0Reg = getRegForValue(I->getOperand(0));
2747   if (!Op0Reg)
2748     return false;
2749   bool Op0IsKill = hasTrivialKill(I->getOperand(0));
2750   unsigned ResultReg = FastEmitInst_r(Opc, TLI.getRegClassFor(RetVT),
2751                                       Op0Reg, Op0IsKill);
2752
2753   if (!ResultReg)
2754     return false;
2755
2756   UpdateValueMap(I, ResultReg);
2757   return true;
2758 }
2759
2760 bool AArch64FastISel::TargetSelectInstruction(const Instruction *I) {
2761   switch (I->getOpcode()) {
2762   default:
2763     break;
2764   case Instruction::Load:
2765     return SelectLoad(I);
2766   case Instruction::Store:
2767     return SelectStore(I);
2768   case Instruction::Br:
2769     return SelectBranch(I);
2770   case Instruction::IndirectBr:
2771     return SelectIndirectBr(I);
2772   case Instruction::FCmp:
2773   case Instruction::ICmp:
2774     return SelectCmp(I);
2775   case Instruction::Select:
2776     return SelectSelect(I);
2777   case Instruction::FPExt:
2778     return SelectFPExt(I);
2779   case Instruction::FPTrunc:
2780     return SelectFPTrunc(I);
2781   case Instruction::FPToSI:
2782     return SelectFPToInt(I, /*Signed=*/true);
2783   case Instruction::FPToUI:
2784     return SelectFPToInt(I, /*Signed=*/false);
2785   case Instruction::SIToFP:
2786     return SelectIntToFP(I, /*Signed=*/true);
2787   case Instruction::UIToFP:
2788     return SelectIntToFP(I, /*Signed=*/false);
2789   case Instruction::SRem:
2790     return SelectRem(I, ISD::SREM);
2791   case Instruction::URem:
2792     return SelectRem(I, ISD::UREM);
2793   case Instruction::Ret:
2794     return SelectRet(I);
2795   case Instruction::Trunc:
2796     return SelectTrunc(I);
2797   case Instruction::ZExt:
2798   case Instruction::SExt:
2799     return SelectIntExt(I);
2800
2801   // FIXME: All of these should really be handled by the target-independent
2802   // selector -> improve FastISel tblgen.
2803   case Instruction::Mul:
2804     return SelectMul(I);
2805   case Instruction::Shl:
2806       return SelectShift(I, /*IsLeftShift=*/true, /*IsArithmetic=*/false);
2807   case Instruction::LShr:
2808     return SelectShift(I, /*IsLeftShift=*/false, /*IsArithmetic=*/false);
2809   case Instruction::AShr:
2810     return SelectShift(I, /*IsLeftShift=*/false, /*IsArithmetic=*/true);
2811   case Instruction::BitCast:
2812     return SelectBitCast(I);
2813   }
2814   return false;
2815   // Silence warnings.
2816   (void)&CC_AArch64_DarwinPCS_VarArg;
2817 }
2818
2819 namespace llvm {
2820 llvm::FastISel *AArch64::createFastISel(FunctionLoweringInfo &funcInfo,
2821                                         const TargetLibraryInfo *libInfo) {
2822   return new AArch64FastISel(funcInfo, libInfo);
2823 }
2824 }