lib/Target/AArch64/AArch64FastISel.cpp

   1 //===-- AArch6464FastISel.cpp - AArch64 FastISel implementation -----------===//
   2 //
   3 //                     The LLVM Compiler Infrastructure
   4 //
   5 // This file is distributed under the University of Illinois Open Source
   6 // License. See LICENSE.TXT for details.
   7 //
   8 //===----------------------------------------------------------------------===//
   9 //
  10 // This file defines the AArch64-specific support for the FastISel class. Some
  11 // of the target-specific code is generated by tablegen in the file
  12 // AArch64GenFastISel.inc, which is #included here.
  13 //
  14 //===----------------------------------------------------------------------===//
  15
  16 #include "AArch64.h"
  17 #include "AArch64Subtarget.h"
  18 #include "AArch64TargetMachine.h"
  19 #include "MCTargetDesc/AArch64AddressingModes.h"
  20 #include "llvm/Analysis/BranchProbabilityInfo.h"
  21 #include "llvm/CodeGen/CallingConvLower.h"
  22 #include "llvm/CodeGen/FastISel.h"
  23 #include "llvm/CodeGen/FunctionLoweringInfo.h"
  24 #include "llvm/CodeGen/MachineConstantPool.h"
  25 #include "llvm/CodeGen/MachineFrameInfo.h"
  26 #include "llvm/CodeGen/MachineInstrBuilder.h"
  27 #include "llvm/CodeGen/MachineRegisterInfo.h"
  28 #include "llvm/IR/CallingConv.h"
  29 #include "llvm/IR/DataLayout.h"
  30 #include "llvm/IR/DerivedTypes.h"
  31 #include "llvm/IR/Function.h"
  32 #include "llvm/IR/GetElementPtrTypeIterator.h"
  33 #include "llvm/IR/GlobalAlias.h"
  34 #include "llvm/IR/GlobalVariable.h"
  35 #include "llvm/IR/Instructions.h"
  36 #include "llvm/IR/IntrinsicInst.h"
  37 #include "llvm/IR/Operator.h"
  38 #include "llvm/Support/CommandLine.h"
  39 using namespace llvm;
  40
  41 namespace {
  42
  43 class AArch64FastISel : public FastISel {
  44   class Address {
  45   public:
  46     typedef enum {
  47       RegBase,
  48       FrameIndexBase
  49     } BaseKind;
  50
  51   private:
  52     BaseKind Kind;
  53     AArch64_AM::ShiftExtendType ExtType;
  54     union {
  55       unsigned Reg;
  56       int FI;
  57     } Base;
  58     unsigned OffsetReg;
  59     unsigned Shift;
  60     int64_t Offset;
  61     const GlobalValue *GV;
  62
  63   public:
  64     Address() : Kind(RegBase), ExtType(AArch64_AM::InvalidShiftExtend),
  65       OffsetReg(0), Shift(0), Offset(0), GV(nullptr) { Base.Reg = 0; }
  66     void setKind(BaseKind K) { Kind = K; }
  67     BaseKind getKind() const { return Kind; }
  68     void setExtendType(AArch64_AM::ShiftExtendType E) { ExtType = E; }
  69     AArch64_AM::ShiftExtendType getExtendType() const { return ExtType; }
  70     bool isRegBase() const { return Kind == RegBase; }
  71     bool isFIBase() const { return Kind == FrameIndexBase; }
  72     void setReg(unsigned Reg) {
  73       assert(isRegBase() && "Invalid base register access!");
  74       Base.Reg = Reg;
  75     }
  76     unsigned getReg() const {
  77       assert(isRegBase() && "Invalid base register access!");
  78       return Base.Reg;
  79     }
  80     void setOffsetReg(unsigned Reg) {
  81       assert(isRegBase() && "Invalid offset register access!");
  82       OffsetReg = Reg;
  83     }
  84     unsigned getOffsetReg() const {
  85       assert(isRegBase() && "Invalid offset register access!");
  86       return OffsetReg;
  87     }
  88     void setFI(unsigned FI) {
  89       assert(isFIBase() && "Invalid base frame index  access!");
  90       Base.FI = FI;
  91     }
  92     unsigned getFI() const {
  93       assert(isFIBase() && "Invalid base frame index access!");
  94       return Base.FI;
  95     }
  96     void setOffset(int64_t O) { Offset = O; }
  97     int64_t getOffset() { return Offset; }
  98     void setShift(unsigned S) { Shift = S; }
  99     unsigned getShift() { return Shift; }
 100
 101     void setGlobalValue(const GlobalValue *G) { GV = G; }
 102     const GlobalValue *getGlobalValue() { return GV; }
 103   };
 104
 105   /// Subtarget - Keep a pointer to the AArch64Subtarget around so that we can
 106   /// make the right decision when generating code for different targets.
 107   const AArch64Subtarget *Subtarget;
 108   LLVMContext *Context;
 109
 110   bool fastLowerArguments() override;
 111   bool fastLowerCall(CallLoweringInfo &CLI) override;
 112   bool fastLowerIntrinsicCall(const IntrinsicInst *II) override;
 113
 114 private:
 115   // Selection routines.
 116   bool selectAddSub(const Instruction *I);
 117   bool selectLogicalOp(const Instruction *I, unsigned ISDOpcode);
 118   bool SelectLoad(const Instruction *I);
 119   bool SelectStore(const Instruction *I);
 120   bool SelectBranch(const Instruction *I);
 121   bool SelectIndirectBr(const Instruction *I);
 122   bool SelectCmp(const Instruction *I);
 123   bool SelectSelect(const Instruction *I);
 124   bool SelectFPExt(const Instruction *I);
 125   bool SelectFPTrunc(const Instruction *I);
 126   bool SelectFPToInt(const Instruction *I, bool Signed);
 127   bool SelectIntToFP(const Instruction *I, bool Signed);
 128   bool SelectRem(const Instruction *I, unsigned ISDOpcode);
 129   bool SelectRet(const Instruction *I);
 130   bool SelectTrunc(const Instruction *I);
 131   bool SelectIntExt(const Instruction *I);
 132   bool SelectMul(const Instruction *I);
 133   bool SelectShift(const Instruction *I);
 134   bool SelectBitCast(const Instruction *I);
 135
 136   // Utility helper routines.
 137   bool isTypeLegal(Type *Ty, MVT &VT);
 138   bool isTypeSupported(Type *Ty, MVT &VT, bool IsVectorAllowed = false);
 139   bool isValueAvailable(const Value *V) const;
 140   bool ComputeAddress(const Value *Obj, Address &Addr, Type *Ty = nullptr);
 141   bool ComputeCallAddress(const Value *V, Address &Addr);
 142   bool SimplifyAddress(Address &Addr, MVT VT);
 143   void AddLoadStoreOperands(Address &Addr, const MachineInstrBuilder &MIB,
 144                             unsigned Flags, unsigned ScaleFactor,
 145                             MachineMemOperand *MMO);
 146   bool IsMemCpySmall(uint64_t Len, unsigned Alignment);
 147   bool TryEmitSmallMemCpy(Address Dest, Address Src, uint64_t Len,
 148                           unsigned Alignment);
 149   bool foldXALUIntrinsic(AArch64CC::CondCode &CC, const Instruction *I,
 150                          const Value *Cond);
 151
 152   // Emit helper routines.
 153   unsigned emitAddSub(bool UseAdd, MVT RetVT, const Value *LHS,
 154                       const Value *RHS, bool SetFlags = false,
 155                       bool WantResult = true,  bool IsZExt = false);
 156   unsigned emitAddSub_rr(bool UseAdd, MVT RetVT, unsigned LHSReg,
 157                          bool LHSIsKill, unsigned RHSReg, bool RHSIsKill,
 158                          bool SetFlags = false, bool WantResult = true);
 159   unsigned emitAddSub_ri(bool UseAdd, MVT RetVT, unsigned LHSReg,
 160                          bool LHSIsKill, uint64_t Imm, bool SetFlags = false,
 161                          bool WantResult = true);
 162   unsigned emitAddSub_rs(bool UseAdd, MVT RetVT, unsigned LHSReg,
 163                          bool LHSIsKill, unsigned RHSReg, bool RHSIsKill,
 164                          AArch64_AM::ShiftExtendType ShiftType,
 165                          uint64_t ShiftImm, bool SetFlags = false,
 166                          bool WantResult = true);
 167   unsigned emitAddSub_rx(bool UseAdd, MVT RetVT, unsigned LHSReg,
 168                          bool LHSIsKill, unsigned RHSReg, bool RHSIsKill,
 169                           AArch64_AM::ShiftExtendType ExtType,
 170                           uint64_t ShiftImm, bool SetFlags = false,
 171                          bool WantResult = true);
 172
 173   // Emit functions.
 174   bool emitCmp(const Value *LHS, const Value *RHS, bool IsZExt);
 175   bool emitICmp(MVT RetVT, const Value *LHS, const Value *RHS, bool IsZExt);
 176   bool emitICmp_ri(MVT RetVT, unsigned LHSReg, bool LHSIsKill, uint64_t Imm);
 177   bool emitFCmp(MVT RetVT, const Value *LHS, const Value *RHS);
 178   bool EmitLoad(MVT VT, unsigned &ResultReg, Address Addr,
 179                 MachineMemOperand *MMO = nullptr);
 180   bool EmitStore(MVT VT, unsigned SrcReg, Address Addr,
 181                  MachineMemOperand *MMO = nullptr);
 182   unsigned EmitIntExt(MVT SrcVT, unsigned SrcReg, MVT DestVT, bool isZExt);
 183   unsigned Emiti1Ext(unsigned SrcReg, MVT DestVT, bool isZExt);
 184   unsigned emitAdd(MVT RetVT, const Value *LHS, const Value *RHS,
 185                    bool SetFlags = false, bool WantResult = true,
 186                    bool IsZExt = false);
 187   unsigned emitSub(MVT RetVT, const Value *LHS, const Value *RHS,
 188                    bool SetFlags = false, bool WantResult = true,
 189                    bool IsZExt = false);
 190   unsigned emitSubs_rr(MVT RetVT, unsigned LHSReg, bool LHSIsKill,
 191                        unsigned RHSReg, bool RHSIsKill, bool WantResult = true);
 192   unsigned emitSubs_rs(MVT RetVT, unsigned LHSReg, bool LHSIsKill,
 193                        unsigned RHSReg, bool RHSIsKill,
 194                        AArch64_AM::ShiftExtendType ShiftType, uint64_t ShiftImm,
 195                        bool WantResult = true);
 196   unsigned emitLogicalOp(unsigned ISDOpc, MVT RetVT, const Value *LHS,
 197                          const Value *RHS);
 198   unsigned emitLogicalOp_ri(unsigned ISDOpc, MVT RetVT, unsigned LHSReg,
 199                             bool LHSIsKill, uint64_t Imm);
 200   unsigned emitLogicalOp_rs(unsigned ISDOpc, MVT RetVT, unsigned LHSReg,
 201                             bool LHSIsKill, unsigned RHSReg, bool RHSIsKill,
 202                             uint64_t ShiftImm);
 203   unsigned emitAnd_ri(MVT RetVT, unsigned LHSReg, bool LHSIsKill, uint64_t Imm);
 204   unsigned Emit_MUL_rr(MVT RetVT, unsigned Op0, bool Op0IsKill,
 205                        unsigned Op1, bool Op1IsKill);
 206   unsigned Emit_SMULL_rr(MVT RetVT, unsigned Op0, bool Op0IsKill,
 207                          unsigned Op1, bool Op1IsKill);
 208   unsigned Emit_UMULL_rr(MVT RetVT, unsigned Op0, bool Op0IsKill,
 209                          unsigned Op1, bool Op1IsKill);
 210   unsigned emitLSL_rr(MVT RetVT, unsigned Op0Reg, bool Op0IsKill,
 211                       unsigned Op1Reg, bool Op1IsKill);
 212   unsigned emitLSL_ri(MVT RetVT, MVT SrcVT, unsigned Op0Reg, bool Op0IsKill,
 213                       uint64_t Imm, bool IsZExt = true);
 214   unsigned emitLSR_rr(MVT RetVT, unsigned Op0Reg, bool Op0IsKill,
 215                       unsigned Op1Reg, bool Op1IsKill);
 216   unsigned emitLSR_ri(MVT RetVT, MVT SrcVT, unsigned Op0Reg, bool Op0IsKill,
 217                       uint64_t Imm, bool IsZExt = true);
 218   unsigned emitASR_rr(MVT RetVT, unsigned Op0Reg, bool Op0IsKill,
 219                       unsigned Op1Reg, bool Op1IsKill);
 220   unsigned emitASR_ri(MVT RetVT, MVT SrcVT, unsigned Op0Reg, bool Op0IsKill,
 221                       uint64_t Imm, bool IsZExt = false);
 222
 223   unsigned AArch64MaterializeInt(const ConstantInt *CI, MVT VT);
 224   unsigned AArch64MaterializeFP(const ConstantFP *CFP, MVT VT);
 225   unsigned AArch64MaterializeGV(const GlobalValue *GV);
 226
 227   // Call handling routines.
 228 private:
 229   CCAssignFn *CCAssignFnForCall(CallingConv::ID CC) const;
 230   bool ProcessCallArgs(CallLoweringInfo &CLI, SmallVectorImpl<MVT> &ArgVTs,
 231                        unsigned &NumBytes);
 232   bool FinishCall(CallLoweringInfo &CLI, MVT RetVT, unsigned NumBytes);
 233
 234 public:
 235   // Backend specific FastISel code.
 236   unsigned fastMaterializeAlloca(const AllocaInst *AI) override;
 237   unsigned fastMaterializeConstant(const Constant *C) override;
 238   unsigned fastMaterializeFloatZero(const ConstantFP* CF) override;
 239
 240   explicit AArch64FastISel(FunctionLoweringInfo &FuncInfo,
 241                          const TargetLibraryInfo *LibInfo)
 242       : FastISel(FuncInfo, LibInfo, /*SkipTargetIndependentISel=*/true) {
 243     Subtarget = &TM.getSubtarget<AArch64Subtarget>();
 244     Context = &FuncInfo.Fn->getContext();
 245   }
 246
 247   bool fastSelectInstruction(const Instruction *I) override;
 248
 249 #include "AArch64GenFastISel.inc"
 250 };
 251
 252 } // end anonymous namespace
 253
 254 #include "AArch64GenCallingConv.inc"
 255
 256 CCAssignFn *AArch64FastISel::CCAssignFnForCall(CallingConv::ID CC) const {
 257   if (CC == CallingConv::WebKit_JS)
 258     return CC_AArch64_WebKit_JS;
 259   return Subtarget->isTargetDarwin() ? CC_AArch64_DarwinPCS : CC_AArch64_AAPCS;
 260 }
 261
 262 unsigned AArch64FastISel::fastMaterializeAlloca(const AllocaInst *AI) {
 263   assert(TLI.getValueType(AI->getType(), true) == MVT::i64 &&
 264          "Alloca should always return a pointer.");
 265
 266   // Don't handle dynamic allocas.
 267   if (!FuncInfo.StaticAllocaMap.count(AI))
 268     return 0;
 269
 270   DenseMap<const AllocaInst *, int>::iterator SI =
 271       FuncInfo.StaticAllocaMap.find(AI);
 272
 273   if (SI != FuncInfo.StaticAllocaMap.end()) {
 274     unsigned ResultReg = createResultReg(&AArch64::GPR64spRegClass);
 275     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADDXri),
 276             ResultReg)
 277         .addFrameIndex(SI->second)
 278         .addImm(0)
 279         .addImm(0);
 280     return ResultReg;
 281   }
 282
 283   return 0;
 284 }
 285
 286 unsigned AArch64FastISel::AArch64MaterializeInt(const ConstantInt *CI, MVT VT) {
 287   if (VT > MVT::i64)
 288     return 0;
 289
 290   if (!CI->isZero())
 291     return fastEmit_i(VT, VT, ISD::Constant, CI->getZExtValue());
 292
 293   // Create a copy from the zero register to materialize a "0" value.
 294   const TargetRegisterClass *RC = (VT == MVT::i64) ? &AArch64::GPR64RegClass
 295                                                    : &AArch64::GPR32RegClass;
 296   unsigned ZeroReg = (VT == MVT::i64) ? AArch64::XZR : AArch64::WZR;
 297   unsigned ResultReg = createResultReg(RC);
 298   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(TargetOpcode::COPY),
 299           ResultReg).addReg(ZeroReg, getKillRegState(true));
 300   return ResultReg;
 301 }
 302
 303 unsigned AArch64FastISel::AArch64MaterializeFP(const ConstantFP *CFP, MVT VT) {
 304   // Positive zero (+0.0) has to be materialized with a fmov from the zero
 305   // register, because the immediate version of fmov cannot encode zero.
 306   if (CFP->isNullValue())
 307     return fastMaterializeFloatZero(CFP);
 308
 309   if (VT != MVT::f32 && VT != MVT::f64)
 310     return 0;
 311
 312   const APFloat Val = CFP->getValueAPF();
 313   bool Is64Bit = (VT == MVT::f64);
 314   // This checks to see if we can use FMOV instructions to materialize
 315   // a constant, otherwise we have to materialize via the constant pool.
 316   if (TLI.isFPImmLegal(Val, VT)) {
 317     int Imm =
 318         Is64Bit ? AArch64_AM::getFP64Imm(Val) : AArch64_AM::getFP32Imm(Val);
 319     assert((Imm != -1) && "Cannot encode floating-point constant.");
 320     unsigned Opc = Is64Bit ? AArch64::FMOVDi : AArch64::FMOVSi;
 321     return fastEmitInst_i(Opc, TLI.getRegClassFor(VT), Imm);
 322   }
 323
 324   // Materialize via constant pool.  MachineConstantPool wants an explicit
 325   // alignment.
 326   unsigned Align = DL.getPrefTypeAlignment(CFP->getType());
 327   if (Align == 0)
 328     Align = DL.getTypeAllocSize(CFP->getType());
 329
 330   unsigned CPI = MCP.getConstantPoolIndex(cast<Constant>(CFP), Align);
 331   unsigned ADRPReg = createResultReg(&AArch64::GPR64commonRegClass);
 332   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADRP),
 333           ADRPReg).addConstantPoolIndex(CPI, 0, AArch64II::MO_PAGE);
 334
 335   unsigned Opc = Is64Bit ? AArch64::LDRDui : AArch64::LDRSui;
 336   unsigned ResultReg = createResultReg(TLI.getRegClassFor(VT));
 337   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg)
 338       .addReg(ADRPReg)
 339       .addConstantPoolIndex(CPI, 0, AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
 340   return ResultReg;
 341 }
 342
 343 unsigned AArch64FastISel::AArch64MaterializeGV(const GlobalValue *GV) {
 344   // We can't handle thread-local variables quickly yet.
 345   if (GV->isThreadLocal())
 346     return 0;
 347
 348   // MachO still uses GOT for large code-model accesses, but ELF requires
 349   // movz/movk sequences, which FastISel doesn't handle yet.
 350   if (TM.getCodeModel() != CodeModel::Small && !Subtarget->isTargetMachO())
 351     return 0;
 352
 353   unsigned char OpFlags = Subtarget->ClassifyGlobalReference(GV, TM);
 354
 355   EVT DestEVT = TLI.getValueType(GV->getType(), true);
 356   if (!DestEVT.isSimple())
 357     return 0;
 358
 359   unsigned ADRPReg = createResultReg(&AArch64::GPR64commonRegClass);
 360   unsigned ResultReg;
 361
 362   if (OpFlags & AArch64II::MO_GOT) {
 363     // ADRP + LDRX
 364     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADRP),
 365             ADRPReg)
 366       .addGlobalAddress(GV, 0, AArch64II::MO_GOT | AArch64II::MO_PAGE);
 367
 368     ResultReg = createResultReg(&AArch64::GPR64RegClass);
 369     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::LDRXui),
 370             ResultReg)
 371       .addReg(ADRPReg)
 372       .addGlobalAddress(GV, 0, AArch64II::MO_GOT | AArch64II::MO_PAGEOFF |
 373                         AArch64II::MO_NC);
 374   } else if (OpFlags & AArch64II::MO_CONSTPOOL) {
 375     // We can't handle addresses loaded from a constant pool quickly yet.
 376     return 0;
 377   } else {
 378     // ADRP + ADDX
 379     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADRP),
 380             ADRPReg)
 381       .addGlobalAddress(GV, 0, AArch64II::MO_PAGE);
 382
 383     ResultReg = createResultReg(&AArch64::GPR64spRegClass);
 384     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADDXri),
 385             ResultReg)
 386       .addReg(ADRPReg)
 387       .addGlobalAddress(GV, 0, AArch64II::MO_PAGEOFF | AArch64II::MO_NC)
 388       .addImm(0);
 389   }
 390   return ResultReg;
 391 }
 392
 393 unsigned AArch64FastISel::fastMaterializeConstant(const Constant *C) {
 394   EVT CEVT = TLI.getValueType(C->getType(), true);
 395
 396   // Only handle simple types.
 397   if (!CEVT.isSimple())
 398     return 0;
 399   MVT VT = CEVT.getSimpleVT();
 400
 401   if (const auto *CI = dyn_cast<ConstantInt>(C))
 402     return AArch64MaterializeInt(CI, VT);
 403   else if (const ConstantFP *CFP = dyn_cast<ConstantFP>(C))
 404     return AArch64MaterializeFP(CFP, VT);
 405   else if (const GlobalValue *GV = dyn_cast<GlobalValue>(C))
 406     return AArch64MaterializeGV(GV);
 407
 408   return 0;
 409 }
 410
 411 unsigned AArch64FastISel::fastMaterializeFloatZero(const ConstantFP* CFP) {
 412   assert(CFP->isNullValue() &&
 413          "Floating-point constant is not a positive zero.");
 414   MVT VT;
 415   if (!isTypeLegal(CFP->getType(), VT))
 416     return 0;
 417
 418   if (VT != MVT::f32 && VT != MVT::f64)
 419     return 0;
 420
 421   bool Is64Bit = (VT == MVT::f64);
 422   unsigned ZReg = Is64Bit ? AArch64::XZR : AArch64::WZR;
 423   unsigned Opc = Is64Bit ? AArch64::FMOVXDr : AArch64::FMOVWSr;
 424   return fastEmitInst_r(Opc, TLI.getRegClassFor(VT), ZReg, /*IsKill=*/true);
 425 }
 426
 427 // Computes the address to get to an object.
 428 bool AArch64FastISel::ComputeAddress(const Value *Obj, Address &Addr, Type *Ty)
 429 {
 430   const User *U = nullptr;
 431   unsigned Opcode = Instruction::UserOp1;
 432   if (const Instruction *I = dyn_cast<Instruction>(Obj)) {
 433     // Don't walk into other basic blocks unless the object is an alloca from
 434     // another block, otherwise it may not have a virtual register assigned.
 435     if (FuncInfo.StaticAllocaMap.count(static_cast<const AllocaInst *>(Obj)) ||
 436         FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB) {
 437       Opcode = I->getOpcode();
 438       U = I;
 439     }
 440   } else if (const ConstantExpr *C = dyn_cast<ConstantExpr>(Obj)) {
 441     Opcode = C->getOpcode();
 442     U = C;
 443   }
 444
 445   if (const PointerType *Ty = dyn_cast<PointerType>(Obj->getType()))
 446     if (Ty->getAddressSpace() > 255)
 447       // Fast instruction selection doesn't support the special
 448       // address spaces.
 449       return false;
 450
 451   switch (Opcode) {
 452   default:
 453     break;
 454   case Instruction::BitCast: {
 455     // Look through bitcasts.
 456     return ComputeAddress(U->getOperand(0), Addr, Ty);
 457   }
 458   case Instruction::IntToPtr: {
 459     // Look past no-op inttoptrs.
 460     if (TLI.getValueType(U->getOperand(0)->getType()) == TLI.getPointerTy())
 461       return ComputeAddress(U->getOperand(0), Addr, Ty);
 462     break;
 463   }
 464   case Instruction::PtrToInt: {
 465     // Look past no-op ptrtoints.
 466     if (TLI.getValueType(U->getType()) == TLI.getPointerTy())
 467       return ComputeAddress(U->getOperand(0), Addr, Ty);
 468     break;
 469   }
 470   case Instruction::GetElementPtr: {
 471     Address SavedAddr = Addr;
 472     uint64_t TmpOffset = Addr.getOffset();
 473
 474     // Iterate through the GEP folding the constants into offsets where
 475     // we can.
 476     gep_type_iterator GTI = gep_type_begin(U);
 477     for (User::const_op_iterator i = U->op_begin() + 1, e = U->op_end(); i != e;
 478          ++i, ++GTI) {
 479       const Value *Op = *i;
 480       if (StructType *STy = dyn_cast<StructType>(*GTI)) {
 481         const StructLayout *SL = DL.getStructLayout(STy);
 482         unsigned Idx = cast<ConstantInt>(Op)->getZExtValue();
 483         TmpOffset += SL->getElementOffset(Idx);
 484       } else {
 485         uint64_t S = DL.getTypeAllocSize(GTI.getIndexedType());
 486         for (;;) {
 487           if (const ConstantInt *CI = dyn_cast<ConstantInt>(Op)) {
 488             // Constant-offset addressing.
 489             TmpOffset += CI->getSExtValue() * S;
 490             break;
 491           }
 492           if (canFoldAddIntoGEP(U, Op)) {
 493             // A compatible add with a constant operand. Fold the constant.
 494             ConstantInt *CI =
 495                 cast<ConstantInt>(cast<AddOperator>(Op)->getOperand(1));
 496             TmpOffset += CI->getSExtValue() * S;
 497             // Iterate on the other operand.
 498             Op = cast<AddOperator>(Op)->getOperand(0);
 499             continue;
 500           }
 501           // Unsupported
 502           goto unsupported_gep;
 503         }
 504       }
 505     }
 506
 507     // Try to grab the base operand now.
 508     Addr.setOffset(TmpOffset);
 509     if (ComputeAddress(U->getOperand(0), Addr, Ty))
 510       return true;
 511
 512     // We failed, restore everything and try the other options.
 513     Addr = SavedAddr;
 514
 515   unsupported_gep:
 516     break;
 517   }
 518   case Instruction::Alloca: {
 519     const AllocaInst *AI = cast<AllocaInst>(Obj);
 520     DenseMap<const AllocaInst *, int>::iterator SI =
 521         FuncInfo.StaticAllocaMap.find(AI);
 522     if (SI != FuncInfo.StaticAllocaMap.end()) {
 523       Addr.setKind(Address::FrameIndexBase);
 524       Addr.setFI(SI->second);
 525       return true;
 526     }
 527     break;
 528   }
 529   case Instruction::Add: {
 530     // Adds of constants are common and easy enough.
 531     const Value *LHS = U->getOperand(0);
 532     const Value *RHS = U->getOperand(1);
 533
 534     if (isa<ConstantInt>(LHS))
 535       std::swap(LHS, RHS);
 536
 537     if (const ConstantInt *CI = dyn_cast<ConstantInt>(RHS)) {
 538       Addr.setOffset(Addr.getOffset() + (uint64_t)CI->getSExtValue());
 539       return ComputeAddress(LHS, Addr, Ty);
 540     }
 541
 542     Address Backup = Addr;
 543     if (ComputeAddress(LHS, Addr, Ty) && ComputeAddress(RHS, Addr, Ty))
 544       return true;
 545     Addr = Backup;
 546
 547     break;
 548   }
 549   case Instruction::Shl:
 550     if (Addr.getOffsetReg())
 551       break;
 552
 553     if (const auto *CI = dyn_cast<ConstantInt>(U->getOperand(1))) {
 554       unsigned Val = CI->getZExtValue();
 555       if (Val < 1 || Val > 3)
 556         break;
 557
 558       uint64_t NumBytes = 0;
 559       if (Ty && Ty->isSized()) {
 560         uint64_t NumBits = DL.getTypeSizeInBits(Ty);
 561         NumBytes = NumBits / 8;
 562         if (!isPowerOf2_64(NumBits))
 563           NumBytes = 0;
 564       }
 565
 566       if (NumBytes != (1ULL << Val))
 567         break;
 568
 569       Addr.setShift(Val);
 570       Addr.setExtendType(AArch64_AM::LSL);
 571
 572       if (const auto *I = dyn_cast<Instruction>(U->getOperand(0)))
 573         if (FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB)
 574           U = I;
 575
 576       if (const auto *ZE = dyn_cast<ZExtInst>(U))
 577         if (ZE->getOperand(0)->getType()->isIntegerTy(32))
 578           Addr.setExtendType(AArch64_AM::UXTW);
 579
 580       if (const auto *SE = dyn_cast<SExtInst>(U))
 581         if (SE->getOperand(0)->getType()->isIntegerTy(32))
 582           Addr.setExtendType(AArch64_AM::SXTW);
 583
 584       unsigned Reg = getRegForValue(U->getOperand(0));
 585       if (!Reg)
 586         return false;
 587       Addr.setOffsetReg(Reg);
 588       return true;
 589     }
 590     break;
 591   }
 592
 593   if (Addr.getReg()) {
 594     if (!Addr.getOffsetReg()) {
 595       unsigned Reg = getRegForValue(Obj);
 596       if (!Reg)
 597         return false;
 598       Addr.setOffsetReg(Reg);
 599       return true;
 600     }
 601     return false;
 602   }
 603
 604   unsigned Reg = getRegForValue(Obj);
 605   if (!Reg)
 606     return false;
 607   Addr.setReg(Reg);
 608   return true;
 609 }
 610
 611 bool AArch64FastISel::ComputeCallAddress(const Value *V, Address &Addr) {
 612   const User *U = nullptr;
 613   unsigned Opcode = Instruction::UserOp1;
 614   bool InMBB = true;
 615
 616   if (const auto *I = dyn_cast<Instruction>(V)) {
 617     Opcode = I->getOpcode();
 618     U = I;
 619     InMBB = I->getParent() == FuncInfo.MBB->getBasicBlock();
 620   } else if (const auto *C = dyn_cast<ConstantExpr>(V)) {
 621     Opcode = C->getOpcode();
 622     U = C;
 623   }
 624
 625   switch (Opcode) {
 626   default: break;
 627   case Instruction::BitCast:
 628     // Look past bitcasts if its operand is in the same BB.
 629     if (InMBB)
 630       return ComputeCallAddress(U->getOperand(0), Addr);
 631     break;
 632   case Instruction::IntToPtr:
 633     // Look past no-op inttoptrs if its operand is in the same BB.
 634     if (InMBB &&
 635         TLI.getValueType(U->getOperand(0)->getType()) == TLI.getPointerTy())
 636       return ComputeCallAddress(U->getOperand(0), Addr);
 637     break;
 638   case Instruction::PtrToInt:
 639     // Look past no-op ptrtoints if its operand is in the same BB.
 640     if (InMBB &&
 641         TLI.getValueType(U->getType()) == TLI.getPointerTy())
 642       return ComputeCallAddress(U->getOperand(0), Addr);
 643     break;
 644   }
 645
 646   if (const GlobalValue *GV = dyn_cast<GlobalValue>(V)) {
 647     Addr.setGlobalValue(GV);
 648     return true;
 649   }
 650
 651   // If all else fails, try to materialize the value in a register.
 652   if (!Addr.getGlobalValue()) {
 653     Addr.setReg(getRegForValue(V));
 654     return Addr.getReg() != 0;
 655   }
 656
 657   return false;
 658 }
 659
 660
 661 bool AArch64FastISel::isTypeLegal(Type *Ty, MVT &VT) {
 662   EVT evt = TLI.getValueType(Ty, true);
 663
 664   // Only handle simple types.
 665   if (evt == MVT::Other || !evt.isSimple())
 666     return false;
 667   VT = evt.getSimpleVT();
 668
 669   // This is a legal type, but it's not something we handle in fast-isel.
 670   if (VT == MVT::f128)
 671     return false;
 672
 673   // Handle all other legal types, i.e. a register that will directly hold this
 674   // value.
 675   return TLI.isTypeLegal(VT);
 676 }
 677
 678 /// \brief Determine if the value type is supported by FastISel.
 679 ///
 680 /// FastISel for AArch64 can handle more value types than are legal. This adds
 681 /// simple value type such as i1, i8, and i16.
 682 bool AArch64FastISel::isTypeSupported(Type *Ty, MVT &VT, bool IsVectorAllowed) {
 683   if (Ty->isVectorTy() && !IsVectorAllowed)
 684     return false;
 685
 686   if (isTypeLegal(Ty, VT))
 687     return true;
 688
 689   // If this is a type than can be sign or zero-extended to a basic operation
 690   // go ahead and accept it now.
 691   if (VT == MVT::i1 || VT == MVT::i8 || VT == MVT::i16)
 692     return true;
 693
 694   return false;
 695 }
 696
 697 bool AArch64FastISel::isValueAvailable(const Value *V) const {
 698   if (!isa<Instruction>(V))
 699     return true;
 700
 701   const auto *I = cast<Instruction>(V);
 702   if (FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB)
 703     return true;
 704
 705   return false;
 706 }
 707
 708 bool AArch64FastISel::SimplifyAddress(Address &Addr, MVT VT) {
 709   unsigned ScaleFactor;
 710   switch (VT.SimpleTy) {
 711   default: return false;
 712   case MVT::i1:  // fall-through
 713   case MVT::i8:  ScaleFactor = 1; break;
 714   case MVT::i16: ScaleFactor = 2; break;
 715   case MVT::i32: // fall-through
 716   case MVT::f32: ScaleFactor = 4; break;
 717   case MVT::i64: // fall-through
 718   case MVT::f64: ScaleFactor = 8; break;
 719   }
 720
 721   bool ImmediateOffsetNeedsLowering = false;
 722   bool RegisterOffsetNeedsLowering = false;
 723   int64_t Offset = Addr.getOffset();
 724   if (((Offset < 0) || (Offset & (ScaleFactor - 1))) && !isInt<9>(Offset))
 725     ImmediateOffsetNeedsLowering = true;
 726   else if (Offset > 0 && !(Offset & (ScaleFactor - 1)) &&
 727            !isUInt<12>(Offset / ScaleFactor))
 728     ImmediateOffsetNeedsLowering = true;
 729
 730   // Cannot encode an offset register and an immediate offset in the same
 731   // instruction. Fold the immediate offset into the load/store instruction and
 732   // emit an additonal add to take care of the offset register.
 733   if (!ImmediateOffsetNeedsLowering && Addr.getOffset() && Addr.isRegBase() &&
 734       Addr.getOffsetReg())
 735     RegisterOffsetNeedsLowering = true;
 736
 737   // Cannot encode zero register as base.
 738   if (Addr.isRegBase() && Addr.getOffsetReg() && !Addr.getReg())
 739     RegisterOffsetNeedsLowering = true;
 740
 741   // If this is a stack pointer and the offset needs to be simplified then put
 742   // the alloca address into a register, set the base type back to register and
 743   // continue. This should almost never happen.
 744   if (ImmediateOffsetNeedsLowering && Addr.isFIBase()) {
 745     unsigned ResultReg = createResultReg(&AArch64::GPR64spRegClass);
 746     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADDXri),
 747             ResultReg)
 748       .addFrameIndex(Addr.getFI())
 749       .addImm(0)
 750       .addImm(0);
 751     Addr.setKind(Address::RegBase);
 752     Addr.setReg(ResultReg);
 753   }
 754
 755   if (RegisterOffsetNeedsLowering) {
 756     unsigned ResultReg = 0;
 757     if (Addr.getReg()) {
 758       if (Addr.getExtendType() == AArch64_AM::SXTW ||
 759           Addr.getExtendType() == AArch64_AM::UXTW   )
 760         ResultReg = emitAddSub_rx(/*UseAdd=*/true, MVT::i64, Addr.getReg(),
 761                                   /*TODO:IsKill=*/false, Addr.getOffsetReg(),
 762                                   /*TODO:IsKill=*/false, Addr.getExtendType(),
 763                                   Addr.getShift());
 764       else
 765         ResultReg = emitAddSub_rs(/*UseAdd=*/true, MVT::i64, Addr.getReg(),
 766                                   /*TODO:IsKill=*/false, Addr.getOffsetReg(),
 767                                   /*TODO:IsKill=*/false, AArch64_AM::LSL,
 768                                   Addr.getShift());
 769     } else {
 770       if (Addr.getExtendType() == AArch64_AM::UXTW)
 771         ResultReg = emitLSL_ri(MVT::i64, MVT::i32, Addr.getOffsetReg(),
 772                                /*Op0IsKill=*/false, Addr.getShift(),
 773                                /*IsZExt=*/true);
 774       else if (Addr.getExtendType() == AArch64_AM::SXTW)
 775         ResultReg = emitLSL_ri(MVT::i64, MVT::i32, Addr.getOffsetReg(),
 776                                /*Op0IsKill=*/false, Addr.getShift(),
 777                                /*IsZExt=*/false);
 778       else
 779         ResultReg = emitLSL_ri(MVT::i64, MVT::i64, Addr.getOffsetReg(),
 780                                /*Op0IsKill=*/false, Addr.getShift());
 781     }
 782     if (!ResultReg)
 783       return false;
 784
 785     Addr.setReg(ResultReg);
 786     Addr.setOffsetReg(0);
 787     Addr.setShift(0);
 788     Addr.setExtendType(AArch64_AM::InvalidShiftExtend);
 789   }
 790
 791   // Since the offset is too large for the load/store instruction get the
 792   // reg+offset into a register.
 793   if (ImmediateOffsetNeedsLowering) {
 794     unsigned ResultReg = 0;
 795     if (Addr.getReg())
 796       ResultReg = fastEmit_ri_(MVT::i64, ISD::ADD, Addr.getReg(),
 797                                /*IsKill=*/false, Offset, MVT::i64);
 798     else
 799       ResultReg = fastEmit_i(MVT::i64, MVT::i64, ISD::Constant, Offset);
 800
 801     if (!ResultReg)
 802       return false;
 803     Addr.setReg(ResultReg);
 804     Addr.setOffset(0);
 805   }
 806   return true;
 807 }
 808
 809 void AArch64FastISel::AddLoadStoreOperands(Address &Addr,
 810                                            const MachineInstrBuilder &MIB,
 811                                            unsigned Flags,
 812                                            unsigned ScaleFactor,
 813                                            MachineMemOperand *MMO) {
 814   int64_t Offset = Addr.getOffset() / ScaleFactor;
 815   // Frame base works a bit differently. Handle it separately.
 816   if (Addr.isFIBase()) {
 817     int FI = Addr.getFI();
 818     // FIXME: We shouldn't be using getObjectSize/getObjectAlignment.  The size
 819     // and alignment should be based on the VT.
 820     MMO = FuncInfo.MF->getMachineMemOperand(
 821       MachinePointerInfo::getFixedStack(FI, Offset), Flags,
 822       MFI.getObjectSize(FI), MFI.getObjectAlignment(FI));
 823     // Now add the rest of the operands.
 824     MIB.addFrameIndex(FI).addImm(Offset);
 825   } else {
 826     assert(Addr.isRegBase() && "Unexpected address kind.");
 827     const MCInstrDesc &II = MIB->getDesc();
 828     unsigned Idx = (Flags & MachineMemOperand::MOStore) ? 1 : 0;
 829     Addr.setReg(
 830       constrainOperandRegClass(II, Addr.getReg(), II.getNumDefs()+Idx));
 831     Addr.setOffsetReg(
 832       constrainOperandRegClass(II, Addr.getOffsetReg(), II.getNumDefs()+Idx+1));
 833     if (Addr.getOffsetReg()) {
 834       assert(Addr.getOffset() == 0 && "Unexpected offset");
 835       bool IsSigned = Addr.getExtendType() == AArch64_AM::SXTW ||
 836                       Addr.getExtendType() == AArch64_AM::SXTX;
 837       MIB.addReg(Addr.getReg());
 838       MIB.addReg(Addr.getOffsetReg());
 839       MIB.addImm(IsSigned);
 840       MIB.addImm(Addr.getShift() != 0);
 841     } else {
 842       MIB.addReg(Addr.getReg());
 843       MIB.addImm(Offset);
 844     }
 845   }
 846
 847   if (MMO)
 848     MIB.addMemOperand(MMO);
 849 }
 850
 851 unsigned AArch64FastISel::emitAddSub(bool UseAdd, MVT RetVT, const Value *LHS,
 852                                      const Value *RHS, bool SetFlags,
 853                                      bool WantResult,  bool IsZExt) {
 854   AArch64_AM::ShiftExtendType ExtendType = AArch64_AM::InvalidShiftExtend;
 855   bool NeedExtend = false;
 856   switch (RetVT.SimpleTy) {
 857   default:
 858     return 0;
 859   case MVT::i1:
 860     NeedExtend = true;
 861     break;
 862   case MVT::i8:
 863     NeedExtend = true;
 864     ExtendType = IsZExt ? AArch64_AM::UXTB : AArch64_AM::SXTB;
 865     break;
 866   case MVT::i16:
 867     NeedExtend = true;
 868     ExtendType = IsZExt ? AArch64_AM::UXTH : AArch64_AM::SXTH;
 869     break;
 870   case MVT::i32:  // fall-through
 871   case MVT::i64:
 872     break;
 873   }
 874   MVT SrcVT = RetVT;
 875   RetVT.SimpleTy = std::max(RetVT.SimpleTy, MVT::i32);
 876
 877   // Canonicalize immediates to the RHS first.
 878   if (UseAdd && isa<ConstantInt>(LHS) && !isa<ConstantInt>(RHS))
 879     std::swap(LHS, RHS);
 880
 881   // Canonicalize shift immediate to the RHS.
 882   if (UseAdd && isValueAvailable(LHS))
 883     if (const auto *SI = dyn_cast<BinaryOperator>(LHS))
 884       if (isa<ConstantInt>(SI->getOperand(1)))
 885         if (SI->getOpcode() == Instruction::Shl  ||
 886             SI->getOpcode() == Instruction::LShr ||
 887             SI->getOpcode() == Instruction::AShr   )
 888           std::swap(LHS, RHS);
 889
 890   unsigned LHSReg = getRegForValue(LHS);
 891   if (!LHSReg)
 892     return 0;
 893   bool LHSIsKill = hasTrivialKill(LHS);
 894
 895   if (NeedExtend)
 896     LHSReg = EmitIntExt(SrcVT, LHSReg, RetVT, IsZExt);
 897
 898   unsigned ResultReg = 0;
 899   if (const auto *C = dyn_cast<ConstantInt>(RHS)) {
 900     uint64_t Imm = IsZExt ? C->getZExtValue() : C->getSExtValue();
 901     if (C->isNegative())
 902       ResultReg = emitAddSub_ri(!UseAdd, RetVT, LHSReg, LHSIsKill, -Imm,
 903                                 SetFlags, WantResult);
 904     else
 905       ResultReg = emitAddSub_ri(UseAdd, RetVT, LHSReg, LHSIsKill, Imm, SetFlags,
 906                                 WantResult);
 907   }
 908   if (ResultReg)
 909     return ResultReg;
 910
 911   // Only extend the RHS within the instruction if there is a valid extend type.
 912   if (ExtendType != AArch64_AM::InvalidShiftExtend && isValueAvailable(RHS)) {
 913     if (const auto *SI = dyn_cast<BinaryOperator>(RHS))
 914       if (const auto *C = dyn_cast<ConstantInt>(SI->getOperand(1)))
 915         if ((SI->getOpcode() == Instruction::Shl) && (C->getZExtValue() < 4)) {
 916           unsigned RHSReg = getRegForValue(SI->getOperand(0));
 917           if (!RHSReg)
 918             return 0;
 919           bool RHSIsKill = hasTrivialKill(SI->getOperand(0));
 920           return emitAddSub_rx(UseAdd, RetVT, LHSReg, LHSIsKill, RHSReg,
 921                                RHSIsKill, ExtendType, C->getZExtValue(),
 922                                SetFlags, WantResult);
 923         }
 924     unsigned RHSReg = getRegForValue(RHS);
 925     if (!RHSReg)
 926       return 0;
 927     bool RHSIsKill = hasTrivialKill(RHS);
 928     return emitAddSub_rx(UseAdd, RetVT, LHSReg, LHSIsKill, RHSReg, RHSIsKill,
 929                          ExtendType, 0, SetFlags, WantResult);
 930   }
 931
 932   // Check if the shift can be folded into the instruction.
 933   if (isValueAvailable(RHS))
 934     if (const auto *SI = dyn_cast<BinaryOperator>(RHS)) {
 935       if (const auto *C = dyn_cast<ConstantInt>(SI->getOperand(1))) {
 936         AArch64_AM::ShiftExtendType ShiftType = AArch64_AM::InvalidShiftExtend;
 937         switch (SI->getOpcode()) {
 938         default: break;
 939         case Instruction::Shl:  ShiftType = AArch64_AM::LSL; break;
 940         case Instruction::LShr: ShiftType = AArch64_AM::LSR; break;
 941         case Instruction::AShr: ShiftType = AArch64_AM::ASR; break;
 942         }
 943         uint64_t ShiftVal = C->getZExtValue();
 944         if (ShiftType != AArch64_AM::InvalidShiftExtend) {
 945           unsigned RHSReg = getRegForValue(SI->getOperand(0));
 946           if (!RHSReg)
 947             return 0;
 948           bool RHSIsKill = hasTrivialKill(SI->getOperand(0));
 949           return emitAddSub_rs(UseAdd, RetVT, LHSReg, LHSIsKill, RHSReg,
 950                                RHSIsKill, ShiftType, ShiftVal, SetFlags,
 951                                WantResult);
 952         }
 953       }
 954     }
 955
 956   unsigned RHSReg = getRegForValue(RHS);
 957   if (!RHSReg)
 958     return 0;
 959   bool RHSIsKill = hasTrivialKill(RHS);
 960
 961   if (NeedExtend)
 962     RHSReg = EmitIntExt(SrcVT, RHSReg, RetVT, IsZExt);
 963
 964   return emitAddSub_rr(UseAdd, RetVT, LHSReg, LHSIsKill, RHSReg, RHSIsKill,
 965                        SetFlags, WantResult);
 966 }
 967
 968 unsigned AArch64FastISel::emitAddSub_rr(bool UseAdd, MVT RetVT, unsigned LHSReg,
 969                                         bool LHSIsKill, unsigned RHSReg,
 970                                         bool RHSIsKill, bool SetFlags,
 971                                         bool WantResult) {
 972   assert(LHSReg && RHSReg && "Invalid register number.");
 973
 974   if (RetVT != MVT::i32 && RetVT != MVT::i64)
 975     return 0;
 976
 977   static const unsigned OpcTable[2][2][2] = {
 978     { { AArch64::SUBWrr,  AArch64::SUBXrr  },
 979       { AArch64::ADDWrr,  AArch64::ADDXrr  }  },
 980     { { AArch64::SUBSWrr, AArch64::SUBSXrr },
 981       { AArch64::ADDSWrr, AArch64::ADDSXrr }  }
 982   };
 983   bool Is64Bit = RetVT == MVT::i64;
 984   unsigned Opc = OpcTable[SetFlags][UseAdd][Is64Bit];
 985   const TargetRegisterClass *RC =
 986       Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
 987   unsigned ResultReg;
 988   if (WantResult)
 989     ResultReg = createResultReg(RC);
 990   else
 991     ResultReg = Is64Bit ? AArch64::XZR : AArch64::WZR;
 992
 993   const MCInstrDesc &II = TII.get(Opc);
 994   LHSReg = constrainOperandRegClass(II, LHSReg, II.getNumDefs());
 995   RHSReg = constrainOperandRegClass(II, RHSReg, II.getNumDefs() + 1);
 996   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg)
 997       .addReg(LHSReg, getKillRegState(LHSIsKill))
 998       .addReg(RHSReg, getKillRegState(RHSIsKill));
 999   return ResultReg;
1000 }
1001
1002 unsigned AArch64FastISel::emitAddSub_ri(bool UseAdd, MVT RetVT, unsigned LHSReg,
1003                                         bool LHSIsKill, uint64_t Imm,
1004                                         bool SetFlags, bool WantResult) {
1005   assert(LHSReg && "Invalid register number.");
1006
1007   if (RetVT != MVT::i32 && RetVT != MVT::i64)
1008     return 0;
1009
1010   unsigned ShiftImm;
1011   if (isUInt<12>(Imm))
1012     ShiftImm = 0;
1013   else if ((Imm & 0xfff000) == Imm) {
1014     ShiftImm = 12;
1015     Imm >>= 12;
1016   } else
1017     return 0;
1018
1019   static const unsigned OpcTable[2][2][2] = {
1020     { { AArch64::SUBWri,  AArch64::SUBXri  },
1021       { AArch64::ADDWri,  AArch64::ADDXri  }  },
1022     { { AArch64::SUBSWri, AArch64::SUBSXri },
1023       { AArch64::ADDSWri, AArch64::ADDSXri }  }
1024   };
1025   bool Is64Bit = RetVT == MVT::i64;
1026   unsigned Opc = OpcTable[SetFlags][UseAdd][Is64Bit];
1027   const TargetRegisterClass *RC;
1028   if (SetFlags)
1029     RC = Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
1030   else
1031     RC = Is64Bit ? &AArch64::GPR64spRegClass : &AArch64::GPR32spRegClass;
1032   unsigned ResultReg;
1033   if (WantResult)
1034     ResultReg = createResultReg(RC);
1035   else
1036     ResultReg = Is64Bit ? AArch64::XZR : AArch64::WZR;
1037
1038   const MCInstrDesc &II = TII.get(Opc);
1039   LHSReg = constrainOperandRegClass(II, LHSReg, II.getNumDefs());
1040   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg)
1041       .addReg(LHSReg, getKillRegState(LHSIsKill))
1042       .addImm(Imm)
1043       .addImm(getShifterImm(AArch64_AM::LSL, ShiftImm));
1044   return ResultReg;
1045 }
1046
1047 unsigned AArch64FastISel::emitAddSub_rs(bool UseAdd, MVT RetVT, unsigned LHSReg,
1048                                         bool LHSIsKill, unsigned RHSReg,
1049                                         bool RHSIsKill,
1050                                         AArch64_AM::ShiftExtendType ShiftType,
1051                                         uint64_t ShiftImm, bool SetFlags,
1052                                         bool WantResult) {
1053   assert(LHSReg && RHSReg && "Invalid register number.");
1054
1055   if (RetVT != MVT::i32 && RetVT != MVT::i64)
1056     return 0;
1057
1058   static const unsigned OpcTable[2][2][2] = {
1059     { { AArch64::SUBWrs,  AArch64::SUBXrs  },
1060       { AArch64::ADDWrs,  AArch64::ADDXrs  }  },
1061     { { AArch64::SUBSWrs, AArch64::SUBSXrs },
1062       { AArch64::ADDSWrs, AArch64::ADDSXrs }  }
1063   };
1064   bool Is64Bit = RetVT == MVT::i64;
1065   unsigned Opc = OpcTable[SetFlags][UseAdd][Is64Bit];
1066   const TargetRegisterClass *RC =
1067       Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
1068   unsigned ResultReg;
1069   if (WantResult)
1070     ResultReg = createResultReg(RC);
1071   else
1072     ResultReg = Is64Bit ? AArch64::XZR : AArch64::WZR;
1073
1074   const MCInstrDesc &II = TII.get(Opc);
1075   LHSReg = constrainOperandRegClass(II, LHSReg, II.getNumDefs());
1076   RHSReg = constrainOperandRegClass(II, RHSReg, II.getNumDefs() + 1);
1077   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg)
1078       .addReg(LHSReg, getKillRegState(LHSIsKill))
1079       .addReg(RHSReg, getKillRegState(RHSIsKill))
1080       .addImm(getShifterImm(ShiftType, ShiftImm));
1081   return ResultReg;
1082 }
1083
1084 unsigned AArch64FastISel::emitAddSub_rx(bool UseAdd, MVT RetVT, unsigned LHSReg,
1085                                         bool LHSIsKill, unsigned RHSReg,
1086                                         bool RHSIsKill,
1087                                         AArch64_AM::ShiftExtendType ExtType,
1088                                         uint64_t ShiftImm, bool SetFlags,
1089                                         bool WantResult) {
1090   assert(LHSReg && RHSReg && "Invalid register number.");
1091
1092   if (RetVT != MVT::i32 && RetVT != MVT::i64)
1093     return 0;
1094
1095   static const unsigned OpcTable[2][2][2] = {
1096     { { AArch64::SUBWrx,  AArch64::SUBXrx  },
1097       { AArch64::ADDWrx,  AArch64::ADDXrx  }  },
1098     { { AArch64::SUBSWrx, AArch64::SUBSXrx },
1099       { AArch64::ADDSWrx, AArch64::ADDSXrx }  }
1100   };
1101   bool Is64Bit = RetVT == MVT::i64;
1102   unsigned Opc = OpcTable[SetFlags][UseAdd][Is64Bit];
1103   const TargetRegisterClass *RC = nullptr;
1104   if (SetFlags)
1105     RC = Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
1106   else
1107     RC = Is64Bit ? &AArch64::GPR64spRegClass : &AArch64::GPR32spRegClass;
1108   unsigned ResultReg;
1109   if (WantResult)
1110     ResultReg = createResultReg(RC);
1111   else
1112     ResultReg = Is64Bit ? AArch64::XZR : AArch64::WZR;
1113
1114   const MCInstrDesc &II = TII.get(Opc);
1115   LHSReg = constrainOperandRegClass(II, LHSReg, II.getNumDefs());
1116   RHSReg = constrainOperandRegClass(II, RHSReg, II.getNumDefs() + 1);
1117   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg)
1118       .addReg(LHSReg, getKillRegState(LHSIsKill))
1119       .addReg(RHSReg, getKillRegState(RHSIsKill))
1120       .addImm(getArithExtendImm(ExtType, ShiftImm));
1121   return ResultReg;
1122 }
1123
1124 bool AArch64FastISel::emitCmp(const Value *LHS, const Value *RHS, bool IsZExt) {
1125   Type *Ty = LHS->getType();
1126   EVT EVT = TLI.getValueType(Ty, true);
1127   if (!EVT.isSimple())
1128     return false;
1129   MVT VT = EVT.getSimpleVT();
1130
1131   switch (VT.SimpleTy) {
1132   default:
1133     return false;
1134   case MVT::i1:
1135   case MVT::i8:
1136   case MVT::i16:
1137   case MVT::i32:
1138   case MVT::i64:
1139     return emitICmp(VT, LHS, RHS, IsZExt);
1140   case MVT::f32:
1141   case MVT::f64:
1142     return emitFCmp(VT, LHS, RHS);
1143   }
1144 }
1145
1146 bool AArch64FastISel::emitICmp(MVT RetVT, const Value *LHS, const Value *RHS,
1147                                bool IsZExt) {
1148   return emitSub(RetVT, LHS, RHS, /*SetFlags=*/true, /*WantResult=*/false,
1149                  IsZExt) != 0;
1150 }
1151
1152 bool AArch64FastISel::emitICmp_ri(MVT RetVT, unsigned LHSReg, bool LHSIsKill,
1153                                   uint64_t Imm) {
1154   return emitAddSub_ri(/*UseAdd=*/false, RetVT, LHSReg, LHSIsKill, Imm,
1155                        /*SetFlags=*/true, /*WantResult=*/false) != 0;
1156 }
1157
1158 bool AArch64FastISel::emitFCmp(MVT RetVT, const Value *LHS, const Value *RHS) {
1159   if (RetVT != MVT::f32 && RetVT != MVT::f64)
1160     return false;
1161
1162   // Check to see if the 2nd operand is a constant that we can encode directly
1163   // in the compare.
1164   bool UseImm = false;
1165   if (const auto *CFP = dyn_cast<ConstantFP>(RHS))
1166     if (CFP->isZero() && !CFP->isNegative())
1167       UseImm = true;
1168
1169   unsigned LHSReg = getRegForValue(LHS);
1170   if (!LHSReg)
1171     return false;
1172   bool LHSIsKill = hasTrivialKill(LHS);
1173
1174   if (UseImm) {
1175     unsigned Opc = (RetVT == MVT::f64) ? AArch64::FCMPDri : AArch64::FCMPSri;
1176     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc))
1177         .addReg(LHSReg, getKillRegState(LHSIsKill));
1178     return true;
1179   }
1180
1181   unsigned RHSReg = getRegForValue(RHS);
1182   if (!RHSReg)
1183     return false;
1184   bool RHSIsKill = hasTrivialKill(RHS);
1185
1186   unsigned Opc = (RetVT == MVT::f64) ? AArch64::FCMPDrr : AArch64::FCMPSrr;
1187   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc))
1188       .addReg(LHSReg, getKillRegState(LHSIsKill))
1189       .addReg(RHSReg, getKillRegState(RHSIsKill));
1190   return true;
1191 }
1192
1193 unsigned AArch64FastISel::emitAdd(MVT RetVT, const Value *LHS, const Value *RHS,
1194                                   bool SetFlags, bool WantResult, bool IsZExt) {
1195   return emitAddSub(/*UseAdd=*/true, RetVT, LHS, RHS, SetFlags, WantResult,
1196                     IsZExt);
1197 }
1198
1199 unsigned AArch64FastISel::emitSub(MVT RetVT, const Value *LHS, const Value *RHS,
1200                                   bool SetFlags, bool WantResult, bool IsZExt) {
1201   return emitAddSub(/*UseAdd=*/false, RetVT, LHS, RHS, SetFlags, WantResult,
1202                     IsZExt);
1203 }
1204
1205 unsigned AArch64FastISel::emitSubs_rr(MVT RetVT, unsigned LHSReg,
1206                                       bool LHSIsKill, unsigned RHSReg,
1207                                       bool RHSIsKill, bool WantResult) {
1208   return emitAddSub_rr(/*UseAdd=*/false, RetVT, LHSReg, LHSIsKill, RHSReg,
1209                        RHSIsKill, /*SetFlags=*/true, WantResult);
1210 }
1211
1212 unsigned AArch64FastISel::emitSubs_rs(MVT RetVT, unsigned LHSReg,
1213                                       bool LHSIsKill, unsigned RHSReg,
1214                                       bool RHSIsKill,
1215                                       AArch64_AM::ShiftExtendType ShiftType,
1216                                       uint64_t ShiftImm, bool WantResult) {
1217   return emitAddSub_rs(/*UseAdd=*/false, RetVT, LHSReg, LHSIsKill, RHSReg,
1218                        RHSIsKill, ShiftType, ShiftImm, /*SetFlags=*/true,
1219                        WantResult);
1220 }
1221
1222 unsigned AArch64FastISel::emitLogicalOp(unsigned ISDOpc, MVT RetVT,
1223                                         const Value *LHS, const Value *RHS) {
1224   // Canonicalize immediates to the RHS first.
1225   if (isa<ConstantInt>(LHS) && !isa<ConstantInt>(RHS))
1226     std::swap(LHS, RHS);
1227
1228   // Canonicalize shift immediate to the RHS.
1229   if (isValueAvailable(LHS))
1230     if (const auto *SI = dyn_cast<BinaryOperator>(LHS))
1231       if (isa<ConstantInt>(SI->getOperand(1)))
1232         if (SI->getOpcode() == Instruction::Shl)
1233           std::swap(LHS, RHS);
1234
1235   unsigned LHSReg = getRegForValue(LHS);
1236   if (!LHSReg)
1237     return 0;
1238   bool LHSIsKill = hasTrivialKill(LHS);
1239
1240   unsigned ResultReg = 0;
1241   if (const auto *C = dyn_cast<ConstantInt>(RHS)) {
1242     uint64_t Imm = C->getZExtValue();
1243     ResultReg = emitLogicalOp_ri(ISDOpc, RetVT, LHSReg, LHSIsKill, Imm);
1244   }
1245   if (ResultReg)
1246     return ResultReg;
1247
1248   // Check if the shift can be folded into the instruction.
1249   if (isValueAvailable(RHS))
1250     if (const auto *SI = dyn_cast<BinaryOperator>(RHS))
1251       if (const auto *C = dyn_cast<ConstantInt>(SI->getOperand(1)))
1252         if (SI->getOpcode() == Instruction::Shl) {
1253           uint64_t ShiftVal = C->getZExtValue();
1254           unsigned RHSReg = getRegForValue(SI->getOperand(0));
1255           if (!RHSReg)
1256             return 0;
1257           bool RHSIsKill = hasTrivialKill(SI->getOperand(0));
1258           return emitLogicalOp_rs(ISDOpc, RetVT, LHSReg, LHSIsKill, RHSReg,
1259                                   RHSIsKill, ShiftVal);
1260         }
1261
1262   unsigned RHSReg = getRegForValue(RHS);
1263   if (!RHSReg)
1264     return 0;
1265   bool RHSIsKill = hasTrivialKill(RHS);
1266
1267   MVT VT = std::max(MVT::i32, RetVT.SimpleTy);
1268   ResultReg = fastEmit_rr(VT, VT, ISDOpc, LHSReg, LHSIsKill, RHSReg, RHSIsKill);
1269   if (RetVT >= MVT::i8 && RetVT <= MVT::i16) {
1270     uint64_t Mask = (RetVT == MVT::i8) ? 0xff : 0xffff;
1271     ResultReg = emitAnd_ri(MVT::i32, ResultReg, /*IsKill=*/true, Mask);
1272   }
1273   return ResultReg;
1274 }
1275
1276 unsigned AArch64FastISel::emitLogicalOp_ri(unsigned ISDOpc, MVT RetVT,
1277                                            unsigned LHSReg, bool LHSIsKill,
1278                                            uint64_t Imm) {
1279   assert((ISD::AND + 1 == ISD::OR) && (ISD::AND + 2 == ISD::XOR) &&
1280          "ISD nodes are not consecutive!");
1281   static const unsigned OpcTable[3][2] = {
1282     { AArch64::ANDWri, AArch64::ANDXri },
1283     { AArch64::ORRWri, AArch64::ORRXri },
1284     { AArch64::EORWri, AArch64::EORXri }
1285   };
1286   const TargetRegisterClass *RC;
1287   unsigned Opc;
1288   unsigned RegSize;
1289   switch (RetVT.SimpleTy) {
1290   default:
1291     return 0;
1292   case MVT::i1:
1293   case MVT::i8:
1294   case MVT::i16:
1295   case MVT::i32: {
1296     unsigned Idx = ISDOpc - ISD::AND;
1297     Opc = OpcTable[Idx][0];
1298     RC = &AArch64::GPR32spRegClass;
1299     RegSize = 32;
1300     break;
1301   }
1302   case MVT::i64:
1303     Opc = OpcTable[ISDOpc - ISD::AND][1];
1304     RC = &AArch64::GPR64spRegClass;
1305     RegSize = 64;
1306     break;
1307   }
1308
1309   if (!AArch64_AM::isLogicalImmediate(Imm, RegSize))
1310     return 0;
1311
1312   unsigned ResultReg =
1313       fastEmitInst_ri(Opc, RC, LHSReg, LHSIsKill,
1314                       AArch64_AM::encodeLogicalImmediate(Imm, RegSize));
1315   if (RetVT >= MVT::i8 && RetVT <= MVT::i16 && ISDOpc != ISD::AND) {
1316     uint64_t Mask = (RetVT == MVT::i8) ? 0xff : 0xffff;
1317     ResultReg = emitAnd_ri(MVT::i32, ResultReg, /*IsKill=*/true, Mask);
1318   }
1319   return ResultReg;
1320 }
1321
1322 unsigned AArch64FastISel::emitLogicalOp_rs(unsigned ISDOpc, MVT RetVT,
1323                                            unsigned LHSReg, bool LHSIsKill,
1324                                            unsigned RHSReg, bool RHSIsKill,
1325                                            uint64_t ShiftImm) {
1326   assert((ISD::AND + 1 == ISD::OR) && (ISD::AND + 2 == ISD::XOR) &&
1327          "ISD nodes are not consecutive!");
1328   static const unsigned OpcTable[3][2] = {
1329     { AArch64::ANDWrs, AArch64::ANDXrs },
1330     { AArch64::ORRWrs, AArch64::ORRXrs },
1331     { AArch64::EORWrs, AArch64::EORXrs }
1332   };
1333   const TargetRegisterClass *RC;
1334   unsigned Opc;
1335   switch (RetVT.SimpleTy) {
1336   default:
1337     return 0;
1338   case MVT::i1:
1339   case MVT::i8:
1340   case MVT::i16:
1341   case MVT::i32:
1342     Opc = OpcTable[ISDOpc - ISD::AND][0];
1343     RC = &AArch64::GPR32RegClass;
1344     break;
1345   case MVT::i64:
1346     Opc = OpcTable[ISDOpc - ISD::AND][1];
1347     RC = &AArch64::GPR64RegClass;
1348     break;
1349   }
1350   unsigned ResultReg =
1351       fastEmitInst_rri(Opc, RC, LHSReg, LHSIsKill, RHSReg, RHSIsKill,
1352                        AArch64_AM::getShifterImm(AArch64_AM::LSL, ShiftImm));
1353   if (RetVT >= MVT::i8 && RetVT <= MVT::i16) {
1354     uint64_t Mask = (RetVT == MVT::i8) ? 0xff : 0xffff;
1355     ResultReg = emitAnd_ri(MVT::i32, ResultReg, /*IsKill=*/true, Mask);
1356   }
1357   return ResultReg;
1358 }
1359
1360 unsigned AArch64FastISel::emitAnd_ri(MVT RetVT, unsigned LHSReg, bool LHSIsKill,
1361                                      uint64_t Imm) {
1362   return emitLogicalOp_ri(ISD::AND, RetVT, LHSReg, LHSIsKill, Imm);
1363 }
1364
1365 bool AArch64FastISel::EmitLoad(MVT VT, unsigned &ResultReg, Address Addr,
1366                                MachineMemOperand *MMO) {
1367   // Simplify this down to something we can handle.
1368   if (!SimplifyAddress(Addr, VT))
1369     return false;
1370
1371   unsigned ScaleFactor;
1372   switch (VT.SimpleTy) {
1373   default: llvm_unreachable("Unexpected value type.");
1374   case MVT::i1:  // fall-through
1375   case MVT::i8:  ScaleFactor = 1; break;
1376   case MVT::i16: ScaleFactor = 2; break;
1377   case MVT::i32: // fall-through
1378   case MVT::f32: ScaleFactor = 4; break;
1379   case MVT::i64: // fall-through
1380   case MVT::f64: ScaleFactor = 8; break;
1381   }
1382
1383   // Negative offsets require unscaled, 9-bit, signed immediate offsets.
1384   // Otherwise, we try using scaled, 12-bit, unsigned immediate offsets.
1385   bool UseScaled = true;
1386   if ((Addr.getOffset() < 0) || (Addr.getOffset() & (ScaleFactor - 1))) {
1387     UseScaled = false;
1388     ScaleFactor = 1;
1389   }
1390
1391   static const unsigned OpcTable[4][6] = {
1392     { AArch64::LDURBBi,  AArch64::LDURHHi,  AArch64::LDURWi,  AArch64::LDURXi,
1393       AArch64::LDURSi,   AArch64::LDURDi },
1394     { AArch64::LDRBBui,  AArch64::LDRHHui,  AArch64::LDRWui,  AArch64::LDRXui,
1395       AArch64::LDRSui,   AArch64::LDRDui },
1396     { AArch64::LDRBBroX, AArch64::LDRHHroX, AArch64::LDRWroX, AArch64::LDRXroX,
1397       AArch64::LDRSroX,  AArch64::LDRDroX },
1398     { AArch64::LDRBBroW, AArch64::LDRHHroW, AArch64::LDRWroW, AArch64::LDRXroW,
1399       AArch64::LDRSroW,  AArch64::LDRDroW }
1400   };
1401
1402   unsigned Opc;
1403   const TargetRegisterClass *RC;
1404   bool VTIsi1 = false;
1405   bool UseRegOffset = Addr.isRegBase() && !Addr.getOffset() && Addr.getReg() &&
1406                       Addr.getOffsetReg();
1407   unsigned Idx = UseRegOffset ? 2 : UseScaled ? 1 : 0;
1408   if (Addr.getExtendType() == AArch64_AM::UXTW ||
1409       Addr.getExtendType() == AArch64_AM::SXTW)
1410     Idx++;
1411
1412   switch (VT.SimpleTy) {
1413   default: llvm_unreachable("Unexpected value type.");
1414   case MVT::i1:  VTIsi1 = true; // Intentional fall-through.
1415   case MVT::i8:  Opc = OpcTable[Idx][0]; RC = &AArch64::GPR32RegClass; break;
1416   case MVT::i16: Opc = OpcTable[Idx][1]; RC = &AArch64::GPR32RegClass; break;
1417   case MVT::i32: Opc = OpcTable[Idx][2]; RC = &AArch64::GPR32RegClass; break;
1418   case MVT::i64: Opc = OpcTable[Idx][3]; RC = &AArch64::GPR64RegClass; break;
1419   case MVT::f32: Opc = OpcTable[Idx][4]; RC = &AArch64::FPR32RegClass; break;
1420   case MVT::f64: Opc = OpcTable[Idx][5]; RC = &AArch64::FPR64RegClass; break;
1421   }
1422
1423   // Create the base instruction, then add the operands.
1424   ResultReg = createResultReg(RC);
1425   MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
1426                                     TII.get(Opc), ResultReg);
1427   AddLoadStoreOperands(Addr, MIB, MachineMemOperand::MOLoad, ScaleFactor, MMO);
1428
1429   // Loading an i1 requires special handling.
1430   if (VTIsi1) {
1431     unsigned ANDReg = emitAnd_ri(MVT::i32, ResultReg, /*IsKill=*/true, 1);
1432     assert(ANDReg && "Unexpected AND instruction emission failure.");
1433     ResultReg = ANDReg;
1434   }
1435   return true;
1436 }
1437
1438 bool AArch64FastISel::selectAddSub(const Instruction *I) {
1439   MVT VT;
1440   if (!isTypeSupported(I->getType(), VT))
1441     return false;
1442
1443   unsigned ResultReg;
1444   if (I->getOpcode() == Instruction::Add)
1445     ResultReg = emitAdd(VT, I->getOperand(0), I->getOperand(1));
1446   else if (I->getOpcode() == Instruction::Sub)
1447     ResultReg = emitSub(VT, I->getOperand(0), I->getOperand(1));
1448   else
1449     llvm_unreachable("Unexpected instruction.");
1450
1451   assert(ResultReg && "Couldn't select Add/Sub instruction.");
1452   updateValueMap(I, ResultReg);
1453   return true;
1454 }
1455
1456 bool AArch64FastISel::selectLogicalOp(const Instruction *I, unsigned ISDOpc) {
1457   MVT VT;
1458   if (!isTypeSupported(I->getType(), VT))
1459     return false;
1460
1461   unsigned ResultReg =
1462       emitLogicalOp(ISDOpc, VT, I->getOperand(0), I->getOperand(1));
1463   if (!ResultReg)
1464     return false;
1465
1466   updateValueMap(I, ResultReg);
1467   return true;
1468 }
1469
1470 bool AArch64FastISel::SelectLoad(const Instruction *I) {
1471   MVT VT;
1472   // Verify we have a legal type before going any further.  Currently, we handle
1473   // simple types that will directly fit in a register (i32/f32/i64/f64) or
1474   // those that can be sign or zero-extended to a basic operation (i1/i8/i16).
1475   if (!isTypeSupported(I->getType(), VT, /*IsVectorAllowed=*/true) ||
1476       cast<LoadInst>(I)->isAtomic())
1477     return false;
1478
1479   // See if we can handle this address.
1480   Address Addr;
1481   if (!ComputeAddress(I->getOperand(0), Addr, I->getType()))
1482     return false;
1483
1484   unsigned ResultReg;
1485   if (!EmitLoad(VT, ResultReg, Addr, createMachineMemOperandFor(I)))
1486     return false;
1487
1488   updateValueMap(I, ResultReg);
1489   return true;
1490 }
1491
1492 bool AArch64FastISel::EmitStore(MVT VT, unsigned SrcReg, Address Addr,
1493                                 MachineMemOperand *MMO) {
1494   // Simplify this down to something we can handle.
1495   if (!SimplifyAddress(Addr, VT))
1496     return false;
1497
1498   unsigned ScaleFactor;
1499   switch (VT.SimpleTy) {
1500   default: llvm_unreachable("Unexpected value type.");
1501   case MVT::i1:  // fall-through
1502   case MVT::i8:  ScaleFactor = 1; break;
1503   case MVT::i16: ScaleFactor = 2; break;
1504   case MVT::i32: // fall-through
1505   case MVT::f32: ScaleFactor = 4; break;
1506   case MVT::i64: // fall-through
1507   case MVT::f64: ScaleFactor = 8; break;
1508   }
1509
1510   // Negative offsets require unscaled, 9-bit, signed immediate offsets.
1511   // Otherwise, we try using scaled, 12-bit, unsigned immediate offsets.
1512   bool UseScaled = true;
1513   if ((Addr.getOffset() < 0) || (Addr.getOffset() & (ScaleFactor - 1))) {
1514     UseScaled = false;
1515     ScaleFactor = 1;
1516   }
1517
1518
1519   static const unsigned OpcTable[4][6] = {
1520     { AArch64::STURBBi,  AArch64::STURHHi,  AArch64::STURWi,  AArch64::STURXi,
1521       AArch64::STURSi,   AArch64::STURDi },
1522     { AArch64::STRBBui,  AArch64::STRHHui,  AArch64::STRWui,  AArch64::STRXui,
1523       AArch64::STRSui,   AArch64::STRDui },
1524     { AArch64::STRBBroX, AArch64::STRHHroX, AArch64::STRWroX, AArch64::STRXroX,
1525       AArch64::STRSroX,  AArch64::STRDroX },
1526     { AArch64::STRBBroW, AArch64::STRHHroW, AArch64::STRWroW, AArch64::STRXroW,
1527       AArch64::STRSroW,  AArch64::STRDroW }
1528
1529   };
1530
1531   unsigned Opc;
1532   bool VTIsi1 = false;
1533   bool UseRegOffset = Addr.isRegBase() && !Addr.getOffset() && Addr.getReg() &&
1534                       Addr.getOffsetReg();
1535   unsigned Idx = UseRegOffset ? 2 : UseScaled ? 1 : 0;
1536   if (Addr.getExtendType() == AArch64_AM::UXTW ||
1537       Addr.getExtendType() == AArch64_AM::SXTW)
1538     Idx++;
1539
1540   switch (VT.SimpleTy) {
1541   default: llvm_unreachable("Unexpected value type.");
1542   case MVT::i1:  VTIsi1 = true;
1543   case MVT::i8:  Opc = OpcTable[Idx][0]; break;
1544   case MVT::i16: Opc = OpcTable[Idx][1]; break;
1545   case MVT::i32: Opc = OpcTable[Idx][2]; break;
1546   case MVT::i64: Opc = OpcTable[Idx][3]; break;
1547   case MVT::f32: Opc = OpcTable[Idx][4]; break;
1548   case MVT::f64: Opc = OpcTable[Idx][5]; break;
1549   }
1550
1551   // Storing an i1 requires special handling.
1552   if (VTIsi1 && SrcReg != AArch64::WZR) {
1553     unsigned ANDReg = emitAnd_ri(MVT::i32, SrcReg, /*TODO:IsKill=*/false, 1);
1554     assert(ANDReg && "Unexpected AND instruction emission failure.");
1555     SrcReg = ANDReg;
1556   }
1557   // Create the base instruction, then add the operands.
1558   const MCInstrDesc &II = TII.get(Opc);
1559   SrcReg = constrainOperandRegClass(II, SrcReg, II.getNumDefs());
1560   MachineInstrBuilder MIB =
1561       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II).addReg(SrcReg);
1562   AddLoadStoreOperands(Addr, MIB, MachineMemOperand::MOStore, ScaleFactor, MMO);
1563
1564   return true;
1565 }
1566
1567 bool AArch64FastISel::SelectStore(const Instruction *I) {
1568   MVT VT;
1569   const Value *Op0 = I->getOperand(0);
1570   // Verify we have a legal type before going any further.  Currently, we handle
1571   // simple types that will directly fit in a register (i32/f32/i64/f64) or
1572   // those that can be sign or zero-extended to a basic operation (i1/i8/i16).
1573   if (!isTypeSupported(Op0->getType(), VT, /*IsVectorAllowed=*/true) ||
1574       cast<StoreInst>(I)->isAtomic())
1575     return false;
1576
1577   // Get the value to be stored into a register. Use the zero register directly
1578   // when possible to avoid an unnecessary copy and a wasted register.
1579   unsigned SrcReg = 0;
1580   if (const auto *CI = dyn_cast<ConstantInt>(Op0)) {
1581     if (CI->isZero())
1582       SrcReg = (VT == MVT::i64) ? AArch64::XZR : AArch64::WZR;
1583   } else if (const auto *CF = dyn_cast<ConstantFP>(Op0)) {
1584     if (CF->isZero() && !CF->isNegative()) {
1585       VT = MVT::getIntegerVT(VT.getSizeInBits());
1586       SrcReg = (VT == MVT::i64) ? AArch64::XZR : AArch64::WZR;
1587     }
1588   }
1589
1590   if (!SrcReg)
1591     SrcReg = getRegForValue(Op0);
1592
1593   if (!SrcReg)
1594     return false;
1595
1596   // See if we can handle this address.
1597   Address Addr;
1598   if (!ComputeAddress(I->getOperand(1), Addr, I->getOperand(0)->getType()))
1599     return false;
1600
1601   if (!EmitStore(VT, SrcReg, Addr, createMachineMemOperandFor(I)))
1602     return false;
1603   return true;
1604 }
1605
1606 static AArch64CC::CondCode getCompareCC(CmpInst::Predicate Pred) {
1607   switch (Pred) {
1608   case CmpInst::FCMP_ONE:
1609   case CmpInst::FCMP_UEQ:
1610   default:
1611     // AL is our "false" for now. The other two need more compares.
1612     return AArch64CC::AL;
1613   case CmpInst::ICMP_EQ:
1614   case CmpInst::FCMP_OEQ:
1615     return AArch64CC::EQ;
1616   case CmpInst::ICMP_SGT:
1617   case CmpInst::FCMP_OGT:
1618     return AArch64CC::GT;
1619   case CmpInst::ICMP_SGE:
1620   case CmpInst::FCMP_OGE:
1621     return AArch64CC::GE;
1622   case CmpInst::ICMP_UGT:
1623   case CmpInst::FCMP_UGT:
1624     return AArch64CC::HI;
1625   case CmpInst::FCMP_OLT:
1626     return AArch64CC::MI;
1627   case CmpInst::ICMP_ULE:
1628   case CmpInst::FCMP_OLE:
1629     return AArch64CC::LS;
1630   case CmpInst::FCMP_ORD:
1631     return AArch64CC::VC;
1632   case CmpInst::FCMP_UNO:
1633     return AArch64CC::VS;
1634   case CmpInst::FCMP_UGE:
1635     return AArch64CC::PL;
1636   case CmpInst::ICMP_SLT:
1637   case CmpInst::FCMP_ULT:
1638     return AArch64CC::LT;
1639   case CmpInst::ICMP_SLE:
1640   case CmpInst::FCMP_ULE:
1641     return AArch64CC::LE;
1642   case CmpInst::FCMP_UNE:
1643   case CmpInst::ICMP_NE:
1644     return AArch64CC::NE;
1645   case CmpInst::ICMP_UGE:
1646     return AArch64CC::HS;
1647   case CmpInst::ICMP_ULT:
1648     return AArch64CC::LO;
1649   }
1650 }
1651
1652 bool AArch64FastISel::SelectBranch(const Instruction *I) {
1653   const BranchInst *BI = cast<BranchInst>(I);
1654   if (BI->isUnconditional()) {
1655     MachineBasicBlock *MSucc = FuncInfo.MBBMap[BI->getSuccessor(0)];
1656     fastEmitBranch(MSucc, BI->getDebugLoc());
1657     return true;
1658   }
1659
1660   MachineBasicBlock *TBB = FuncInfo.MBBMap[BI->getSuccessor(0)];
1661   MachineBasicBlock *FBB = FuncInfo.MBBMap[BI->getSuccessor(1)];
1662
1663   AArch64CC::CondCode CC = AArch64CC::NE;
1664   if (const CmpInst *CI = dyn_cast<CmpInst>(BI->getCondition())) {
1665     if (CI->hasOneUse() && (CI->getParent() == I->getParent())) {
1666       // We may not handle every CC for now.
1667       CC = getCompareCC(CI->getPredicate());
1668       if (CC == AArch64CC::AL)
1669         return false;
1670
1671       // Emit the cmp.
1672       if (!emitCmp(CI->getOperand(0), CI->getOperand(1), CI->isUnsigned()))
1673         return false;
1674
1675       // Emit the branch.
1676       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::Bcc))
1677           .addImm(CC)
1678           .addMBB(TBB);
1679
1680       // Obtain the branch weight and add the TrueBB to the successor list.
1681       uint32_t BranchWeight = 0;
1682       if (FuncInfo.BPI)
1683         BranchWeight = FuncInfo.BPI->getEdgeWeight(BI->getParent(),
1684                                                   TBB->getBasicBlock());
1685       FuncInfo.MBB->addSuccessor(TBB, BranchWeight);
1686
1687       fastEmitBranch(FBB, DbgLoc);
1688       return true;
1689     }
1690   } else if (TruncInst *TI = dyn_cast<TruncInst>(BI->getCondition())) {
1691     MVT SrcVT;
1692     if (TI->hasOneUse() && TI->getParent() == I->getParent() &&
1693         (isTypeSupported(TI->getOperand(0)->getType(), SrcVT))) {
1694       unsigned CondReg = getRegForValue(TI->getOperand(0));
1695       if (!CondReg)
1696         return false;
1697       bool CondIsKill = hasTrivialKill(TI->getOperand(0));
1698
1699       // Issue an extract_subreg to get the lower 32-bits.
1700       if (SrcVT == MVT::i64) {
1701         CondReg = fastEmitInst_extractsubreg(MVT::i32, CondReg, CondIsKill,
1702                                              AArch64::sub_32);
1703         CondIsKill = true;
1704       }
1705
1706       unsigned ANDReg = emitAnd_ri(MVT::i32, CondReg, CondIsKill, 1);
1707       assert(ANDReg && "Unexpected AND instruction emission failure.");
1708       emitICmp_ri(MVT::i32, ANDReg, /*IsKill=*/true, 0);
1709
1710       if (FuncInfo.MBB->isLayoutSuccessor(TBB)) {
1711         std::swap(TBB, FBB);
1712         CC = AArch64CC::EQ;
1713       }
1714       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::Bcc))
1715           .addImm(CC)
1716           .addMBB(TBB);
1717
1718       // Obtain the branch weight and add the TrueBB to the successor list.
1719       uint32_t BranchWeight = 0;
1720       if (FuncInfo.BPI)
1721         BranchWeight = FuncInfo.BPI->getEdgeWeight(BI->getParent(),
1722                                                   TBB->getBasicBlock());
1723       FuncInfo.MBB->addSuccessor(TBB, BranchWeight);
1724
1725       fastEmitBranch(FBB, DbgLoc);
1726       return true;
1727     }
1728   } else if (const ConstantInt *CI =
1729                  dyn_cast<ConstantInt>(BI->getCondition())) {
1730     uint64_t Imm = CI->getZExtValue();
1731     MachineBasicBlock *Target = (Imm == 0) ? FBB : TBB;
1732     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::B))
1733         .addMBB(Target);
1734
1735     // Obtain the branch weight and add the target to the successor list.
1736     uint32_t BranchWeight = 0;
1737     if (FuncInfo.BPI)
1738       BranchWeight = FuncInfo.BPI->getEdgeWeight(BI->getParent(),
1739                                                  Target->getBasicBlock());
1740     FuncInfo.MBB->addSuccessor(Target, BranchWeight);
1741     return true;
1742   } else if (foldXALUIntrinsic(CC, I, BI->getCondition())) {
1743     // Fake request the condition, otherwise the intrinsic might be completely
1744     // optimized away.
1745     unsigned CondReg = getRegForValue(BI->getCondition());
1746     if (!CondReg)
1747       return false;
1748
1749     // Emit the branch.
1750     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::Bcc))
1751       .addImm(CC)
1752       .addMBB(TBB);
1753
1754     // Obtain the branch weight and add the TrueBB to the successor list.
1755     uint32_t BranchWeight = 0;
1756     if (FuncInfo.BPI)
1757       BranchWeight = FuncInfo.BPI->getEdgeWeight(BI->getParent(),
1758                                                  TBB->getBasicBlock());
1759     FuncInfo.MBB->addSuccessor(TBB, BranchWeight);
1760
1761     fastEmitBranch(FBB, DbgLoc);
1762     return true;
1763   }
1764
1765   unsigned CondReg = getRegForValue(BI->getCondition());
1766   if (CondReg == 0)
1767     return false;
1768   bool CondRegIsKill = hasTrivialKill(BI->getCondition());
1769
1770   // We've been divorced from our compare!  Our block was split, and
1771   // now our compare lives in a predecessor block.  We musn't
1772   // re-compare here, as the children of the compare aren't guaranteed
1773   // live across the block boundary (we *could* check for this).
1774   // Regardless, the compare has been done in the predecessor block,
1775   // and it left a value for us in a virtual register.  Ergo, we test
1776   // the one-bit value left in the virtual register.
1777   emitICmp_ri(MVT::i32, CondReg, CondRegIsKill, 0);
1778
1779   if (FuncInfo.MBB->isLayoutSuccessor(TBB)) {
1780     std::swap(TBB, FBB);
1781     CC = AArch64CC::EQ;
1782   }
1783
1784   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::Bcc))
1785       .addImm(CC)
1786       .addMBB(TBB);
1787
1788   // Obtain the branch weight and add the TrueBB to the successor list.
1789   uint32_t BranchWeight = 0;
1790   if (FuncInfo.BPI)
1791     BranchWeight = FuncInfo.BPI->getEdgeWeight(BI->getParent(),
1792                                                TBB->getBasicBlock());
1793   FuncInfo.MBB->addSuccessor(TBB, BranchWeight);
1794
1795   fastEmitBranch(FBB, DbgLoc);
1796   return true;
1797 }
1798
1799 bool AArch64FastISel::SelectIndirectBr(const Instruction *I) {
1800   const IndirectBrInst *BI = cast<IndirectBrInst>(I);
1801   unsigned AddrReg = getRegForValue(BI->getOperand(0));
1802   if (AddrReg == 0)
1803     return false;
1804
1805   // Emit the indirect branch.
1806   const MCInstrDesc &II = TII.get(AArch64::BR);
1807   AddrReg = constrainOperandRegClass(II, AddrReg,  II.getNumDefs());
1808   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II).addReg(AddrReg);
1809
1810   // Make sure the CFG is up-to-date.
1811   for (unsigned i = 0, e = BI->getNumSuccessors(); i != e; ++i)
1812     FuncInfo.MBB->addSuccessor(FuncInfo.MBBMap[BI->getSuccessor(i)]);
1813
1814   return true;
1815 }
1816
1817 bool AArch64FastISel::SelectCmp(const Instruction *I) {
1818   const CmpInst *CI = cast<CmpInst>(I);
1819
1820   // Try to optimize or fold the cmp.
1821   CmpInst::Predicate Predicate = optimizeCmpPredicate(CI);
1822   unsigned ResultReg = 0;
1823   switch (Predicate) {
1824   default:
1825     break;
1826   case CmpInst::FCMP_FALSE:
1827     ResultReg = createResultReg(&AArch64::GPR32RegClass);
1828     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
1829             TII.get(TargetOpcode::COPY), ResultReg)
1830         .addReg(AArch64::WZR, getKillRegState(true));
1831     break;
1832   case CmpInst::FCMP_TRUE:
1833     ResultReg = fastEmit_i(MVT::i32, MVT::i32, ISD::Constant, 1);
1834     break;
1835   }
1836
1837   if (ResultReg) {
1838     updateValueMap(I, ResultReg);
1839     return true;
1840   }
1841
1842   // Emit the cmp.
1843   if (!emitCmp(CI->getOperand(0), CI->getOperand(1), CI->isUnsigned()))
1844     return false;
1845
1846   ResultReg = createResultReg(&AArch64::GPR32RegClass);
1847
1848   // FCMP_UEQ and FCMP_ONE cannot be checked with a single instruction. These
1849   // condition codes are inverted, because they are used by CSINC.
1850   static unsigned CondCodeTable[2][2] = {
1851     { AArch64CC::NE, AArch64CC::VC },
1852     { AArch64CC::PL, AArch64CC::LE }
1853   };
1854   unsigned *CondCodes = nullptr;
1855   switch (Predicate) {
1856   default:
1857     break;
1858   case CmpInst::FCMP_UEQ:
1859     CondCodes = &CondCodeTable[0][0];
1860     break;
1861   case CmpInst::FCMP_ONE:
1862     CondCodes = &CondCodeTable[1][0];
1863     break;
1864   }
1865
1866   if (CondCodes) {
1867     unsigned TmpReg1 = createResultReg(&AArch64::GPR32RegClass);
1868     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::CSINCWr),
1869             TmpReg1)
1870         .addReg(AArch64::WZR, getKillRegState(true))
1871         .addReg(AArch64::WZR, getKillRegState(true))
1872         .addImm(CondCodes[0]);
1873     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::CSINCWr),
1874             ResultReg)
1875         .addReg(TmpReg1, getKillRegState(true))
1876         .addReg(AArch64::WZR, getKillRegState(true))
1877         .addImm(CondCodes[1]);
1878
1879     updateValueMap(I, ResultReg);
1880     return true;
1881   }
1882
1883   // Now set a register based on the comparison.
1884   AArch64CC::CondCode CC = getCompareCC(Predicate);
1885   assert((CC != AArch64CC::AL) && "Unexpected condition code.");
1886   AArch64CC::CondCode invertedCC = getInvertedCondCode(CC);
1887   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::CSINCWr),
1888           ResultReg)
1889       .addReg(AArch64::WZR, getKillRegState(true))
1890       .addReg(AArch64::WZR, getKillRegState(true))
1891       .addImm(invertedCC);
1892
1893   updateValueMap(I, ResultReg);
1894   return true;
1895 }
1896
1897 bool AArch64FastISel::SelectSelect(const Instruction *I) {
1898   const SelectInst *SI = cast<SelectInst>(I);
1899
1900   EVT DestEVT = TLI.getValueType(SI->getType(), true);
1901   if (!DestEVT.isSimple())
1902     return false;
1903
1904   MVT DestVT = DestEVT.getSimpleVT();
1905   if (DestVT != MVT::i32 && DestVT != MVT::i64 && DestVT != MVT::f32 &&
1906       DestVT != MVT::f64)
1907     return false;
1908
1909   unsigned SelectOpc;
1910   const TargetRegisterClass *RC = nullptr;
1911   switch (DestVT.SimpleTy) {
1912   default: return false;
1913   case MVT::i32:
1914     SelectOpc = AArch64::CSELWr;    RC = &AArch64::GPR32RegClass; break;
1915   case MVT::i64:
1916     SelectOpc = AArch64::CSELXr;    RC = &AArch64::GPR64RegClass; break;
1917   case MVT::f32:
1918     SelectOpc = AArch64::FCSELSrrr; RC = &AArch64::FPR32RegClass; break;
1919   case MVT::f64:
1920     SelectOpc = AArch64::FCSELDrrr; RC = &AArch64::FPR64RegClass; break;
1921   }
1922
1923   const Value *Cond = SI->getCondition();
1924   bool NeedTest = true;
1925   AArch64CC::CondCode CC = AArch64CC::NE;
1926   if (foldXALUIntrinsic(CC, I, Cond))
1927     NeedTest = false;
1928
1929   unsigned CondReg = getRegForValue(Cond);
1930   if (!CondReg)
1931     return false;
1932   bool CondIsKill = hasTrivialKill(Cond);
1933
1934   if (NeedTest) {
1935     unsigned ANDReg = emitAnd_ri(MVT::i32, CondReg, CondIsKill, 1);
1936     assert(ANDReg && "Unexpected AND instruction emission failure.");
1937     emitICmp_ri(MVT::i32, ANDReg, /*IsKill=*/true, 0);
1938   }
1939
1940   unsigned TrueReg = getRegForValue(SI->getTrueValue());
1941   bool TrueIsKill = hasTrivialKill(SI->getTrueValue());
1942
1943   unsigned FalseReg = getRegForValue(SI->getFalseValue());
1944   bool FalseIsKill = hasTrivialKill(SI->getFalseValue());
1945
1946   if (!TrueReg || !FalseReg)
1947     return false;
1948
1949   unsigned ResultReg = fastEmitInst_rri(SelectOpc, RC, TrueReg, TrueIsKill,
1950                                         FalseReg, FalseIsKill, CC);
1951   updateValueMap(I, ResultReg);
1952   return true;
1953 }
1954
1955 bool AArch64FastISel::SelectFPExt(const Instruction *I) {
1956   Value *V = I->getOperand(0);
1957   if (!I->getType()->isDoubleTy() || !V->getType()->isFloatTy())
1958     return false;
1959
1960   unsigned Op = getRegForValue(V);
1961   if (Op == 0)
1962     return false;
1963
1964   unsigned ResultReg = createResultReg(&AArch64::FPR64RegClass);
1965   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::FCVTDSr),
1966           ResultReg).addReg(Op);
1967   updateValueMap(I, ResultReg);
1968   return true;
1969 }
1970
1971 bool AArch64FastISel::SelectFPTrunc(const Instruction *I) {
1972   Value *V = I->getOperand(0);
1973   if (!I->getType()->isFloatTy() || !V->getType()->isDoubleTy())
1974     return false;
1975
1976   unsigned Op = getRegForValue(V);
1977   if (Op == 0)
1978     return false;
1979
1980   unsigned ResultReg = createResultReg(&AArch64::FPR32RegClass);
1981   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::FCVTSDr),
1982           ResultReg).addReg(Op);
1983   updateValueMap(I, ResultReg);
1984   return true;
1985 }
1986
1987 // FPToUI and FPToSI
1988 bool AArch64FastISel::SelectFPToInt(const Instruction *I, bool Signed) {
1989   MVT DestVT;
1990   if (!isTypeLegal(I->getType(), DestVT) || DestVT.isVector())
1991     return false;
1992
1993   unsigned SrcReg = getRegForValue(I->getOperand(0));
1994   if (SrcReg == 0)
1995     return false;
1996
1997   EVT SrcVT = TLI.getValueType(I->getOperand(0)->getType(), true);
1998   if (SrcVT == MVT::f128)
1999     return false;
2000
2001   unsigned Opc;
2002   if (SrcVT == MVT::f64) {
2003     if (Signed)
2004       Opc = (DestVT == MVT::i32) ? AArch64::FCVTZSUWDr : AArch64::FCVTZSUXDr;
2005     else
2006       Opc = (DestVT == MVT::i32) ? AArch64::FCVTZUUWDr : AArch64::FCVTZUUXDr;
2007   } else {
2008     if (Signed)
2009       Opc = (DestVT == MVT::i32) ? AArch64::FCVTZSUWSr : AArch64::FCVTZSUXSr;
2010     else
2011       Opc = (DestVT == MVT::i32) ? AArch64::FCVTZUUWSr : AArch64::FCVTZUUXSr;
2012   }
2013   unsigned ResultReg = createResultReg(
2014       DestVT == MVT::i32 ? &AArch64::GPR32RegClass : &AArch64::GPR64RegClass);
2015   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg)
2016       .addReg(SrcReg);
2017   updateValueMap(I, ResultReg);
2018   return true;
2019 }
2020
2021 bool AArch64FastISel::SelectIntToFP(const Instruction *I, bool Signed) {
2022   MVT DestVT;
2023   if (!isTypeLegal(I->getType(), DestVT) || DestVT.isVector())
2024     return false;
2025   assert ((DestVT == MVT::f32 || DestVT == MVT::f64) &&
2026           "Unexpected value type.");
2027
2028   unsigned SrcReg = getRegForValue(I->getOperand(0));
2029   if (!SrcReg)
2030     return false;
2031   bool SrcIsKill = hasTrivialKill(I->getOperand(0));
2032
2033   EVT SrcVT = TLI.getValueType(I->getOperand(0)->getType(), true);
2034
2035   // Handle sign-extension.
2036   if (SrcVT == MVT::i16 || SrcVT == MVT::i8 || SrcVT == MVT::i1) {
2037     SrcReg =
2038         EmitIntExt(SrcVT.getSimpleVT(), SrcReg, MVT::i32, /*isZExt*/ !Signed);
2039     if (!SrcReg)
2040       return false;
2041     SrcIsKill = true;
2042   }
2043
2044   unsigned Opc;
2045   if (SrcVT == MVT::i64) {
2046     if (Signed)
2047       Opc = (DestVT == MVT::f32) ? AArch64::SCVTFUXSri : AArch64::SCVTFUXDri;
2048     else
2049       Opc = (DestVT == MVT::f32) ? AArch64::UCVTFUXSri : AArch64::UCVTFUXDri;
2050   } else {
2051     if (Signed)
2052       Opc = (DestVT == MVT::f32) ? AArch64::SCVTFUWSri : AArch64::SCVTFUWDri;
2053     else
2054       Opc = (DestVT == MVT::f32) ? AArch64::UCVTFUWSri : AArch64::UCVTFUWDri;
2055   }
2056
2057   unsigned ResultReg = fastEmitInst_r(Opc, TLI.getRegClassFor(DestVT), SrcReg,
2058                                       SrcIsKill);
2059   updateValueMap(I, ResultReg);
2060   return true;
2061 }
2062
2063 bool AArch64FastISel::fastLowerArguments() {
2064   if (!FuncInfo.CanLowerReturn)
2065     return false;
2066
2067   const Function *F = FuncInfo.Fn;
2068   if (F->isVarArg())
2069     return false;
2070
2071   CallingConv::ID CC = F->getCallingConv();
2072   if (CC != CallingConv::C)
2073     return false;
2074
2075   // Only handle simple cases like i1/i8/i16/i32/i64/f32/f64 of up to 8 GPR and
2076   // FPR each.
2077   unsigned GPRCnt = 0;
2078   unsigned FPRCnt = 0;
2079   unsigned Idx = 0;
2080   for (auto const &Arg : F->args()) {
2081     // The first argument is at index 1.
2082     ++Idx;
2083     if (F->getAttributes().hasAttribute(Idx, Attribute::ByVal) ||
2084         F->getAttributes().hasAttribute(Idx, Attribute::InReg) ||
2085         F->getAttributes().hasAttribute(Idx, Attribute::StructRet) ||
2086         F->getAttributes().hasAttribute(Idx, Attribute::Nest))
2087       return false;
2088
2089     Type *ArgTy = Arg.getType();
2090     if (ArgTy->isStructTy() || ArgTy->isArrayTy() || ArgTy->isVectorTy())
2091       return false;
2092
2093     EVT ArgVT = TLI.getValueType(ArgTy);
2094     if (!ArgVT.isSimple()) return false;
2095     switch (ArgVT.getSimpleVT().SimpleTy) {
2096     default: return false;
2097     case MVT::i1:
2098     case MVT::i8:
2099     case MVT::i16:
2100     case MVT::i32:
2101     case MVT::i64:
2102       ++GPRCnt;
2103       break;
2104     case MVT::f16:
2105     case MVT::f32:
2106     case MVT::f64:
2107       ++FPRCnt;
2108       break;
2109     }
2110
2111     if (GPRCnt > 8 || FPRCnt > 8)
2112       return false;
2113   }
2114
2115   static const MCPhysReg Registers[5][8] = {
2116     { AArch64::W0, AArch64::W1, AArch64::W2, AArch64::W3, AArch64::W4,
2117       AArch64::W5, AArch64::W6, AArch64::W7 },
2118     { AArch64::X0, AArch64::X1, AArch64::X2, AArch64::X3, AArch64::X4,
2119       AArch64::X5, AArch64::X6, AArch64::X7 },
2120     { AArch64::H0, AArch64::H1, AArch64::H2, AArch64::H3, AArch64::H4,
2121       AArch64::H5, AArch64::H6, AArch64::H7 },
2122     { AArch64::S0, AArch64::S1, AArch64::S2, AArch64::S3, AArch64::S4,
2123       AArch64::S5, AArch64::S6, AArch64::S7 },
2124     { AArch64::D0, AArch64::D1, AArch64::D2, AArch64::D3, AArch64::D4,
2125       AArch64::D5, AArch64::D6, AArch64::D7 }
2126   };
2127
2128   unsigned GPRIdx = 0;
2129   unsigned FPRIdx = 0;
2130   for (auto const &Arg : F->args()) {
2131     MVT VT = TLI.getSimpleValueType(Arg.getType());
2132     unsigned SrcReg;
2133     const TargetRegisterClass *RC = nullptr;
2134     switch (VT.SimpleTy) {
2135     default: llvm_unreachable("Unexpected value type.");
2136     case MVT::i1:
2137     case MVT::i8:
2138     case MVT::i16: VT = MVT::i32; // fall-through
2139     case MVT::i32:
2140       SrcReg = Registers[0][GPRIdx++]; RC = &AArch64::GPR32RegClass; break;
2141     case MVT::i64:
2142       SrcReg = Registers[1][GPRIdx++]; RC = &AArch64::GPR64RegClass; break;
2143     case MVT::f16:
2144       SrcReg = Registers[2][FPRIdx++]; RC = &AArch64::FPR16RegClass; break;
2145     case MVT::f32:
2146       SrcReg = Registers[3][FPRIdx++]; RC = &AArch64::FPR32RegClass; break;
2147     case MVT::f64:
2148       SrcReg = Registers[4][FPRIdx++]; RC = &AArch64::FPR64RegClass; break;
2149     }
2150
2151     // Skip unused arguments.
2152     if (Arg.use_empty()) {
2153       updateValueMap(&Arg, 0);
2154       continue;
2155     }
2156
2157     unsigned DstReg = FuncInfo.MF->addLiveIn(SrcReg, RC);
2158     // FIXME: Unfortunately it's necessary to emit a copy from the livein copy.
2159     // Without this, EmitLiveInCopies may eliminate the livein if its only
2160     // use is a bitcast (which isn't turned into an instruction).
2161     unsigned ResultReg = createResultReg(RC);
2162     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2163             TII.get(TargetOpcode::COPY), ResultReg)
2164         .addReg(DstReg, getKillRegState(true));
2165     updateValueMap(&Arg, ResultReg);
2166   }
2167   return true;
2168 }
2169
2170 bool AArch64FastISel::ProcessCallArgs(CallLoweringInfo &CLI,
2171                                       SmallVectorImpl<MVT> &OutVTs,
2172                                       unsigned &NumBytes) {
2173   CallingConv::ID CC = CLI.CallConv;
2174   SmallVector<CCValAssign, 16> ArgLocs;
2175   CCState CCInfo(CC, false, *FuncInfo.MF, ArgLocs, *Context);
2176   CCInfo.AnalyzeCallOperands(OutVTs, CLI.OutFlags, CCAssignFnForCall(CC));
2177
2178   // Get a count of how many bytes are to be pushed on the stack.
2179   NumBytes = CCInfo.getNextStackOffset();
2180
2181   // Issue CALLSEQ_START
2182   unsigned AdjStackDown = TII.getCallFrameSetupOpcode();
2183   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AdjStackDown))
2184     .addImm(NumBytes);
2185
2186   // Process the args.
2187   for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
2188     CCValAssign &VA = ArgLocs[i];
2189     const Value *ArgVal = CLI.OutVals[VA.getValNo()];
2190     MVT ArgVT = OutVTs[VA.getValNo()];
2191
2192     unsigned ArgReg = getRegForValue(ArgVal);
2193     if (!ArgReg)
2194       return false;
2195
2196     // Handle arg promotion: SExt, ZExt, AExt.
2197     switch (VA.getLocInfo()) {
2198     case CCValAssign::Full:
2199       break;
2200     case CCValAssign::SExt: {
2201       MVT DestVT = VA.getLocVT();
2202       MVT SrcVT = ArgVT;
2203       ArgReg = EmitIntExt(SrcVT, ArgReg, DestVT, /*isZExt=*/false);
2204       if (!ArgReg)
2205         return false;
2206       break;
2207     }
2208     case CCValAssign::AExt:
2209     // Intentional fall-through.
2210     case CCValAssign::ZExt: {
2211       MVT DestVT = VA.getLocVT();
2212       MVT SrcVT = ArgVT;
2213       ArgReg = EmitIntExt(SrcVT, ArgReg, DestVT, /*isZExt=*/true);
2214       if (!ArgReg)
2215         return false;
2216       break;
2217     }
2218     default:
2219       llvm_unreachable("Unknown arg promotion!");
2220     }
2221
2222     // Now copy/store arg to correct locations.
2223     if (VA.isRegLoc() && !VA.needsCustom()) {
2224       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2225               TII.get(TargetOpcode::COPY), VA.getLocReg()).addReg(ArgReg);
2226       CLI.OutRegs.push_back(VA.getLocReg());
2227     } else if (VA.needsCustom()) {
2228       // FIXME: Handle custom args.
2229       return false;
2230     } else {
2231       assert(VA.isMemLoc() && "Assuming store on stack.");
2232
2233       // Don't emit stores for undef values.
2234       if (isa<UndefValue>(ArgVal))
2235         continue;
2236
2237       // Need to store on the stack.
2238       unsigned ArgSize = (ArgVT.getSizeInBits() + 7) / 8;
2239
2240       unsigned BEAlign = 0;
2241       if (ArgSize < 8 && !Subtarget->isLittleEndian())
2242         BEAlign = 8 - ArgSize;
2243
2244       Address Addr;
2245       Addr.setKind(Address::RegBase);
2246       Addr.setReg(AArch64::SP);
2247       Addr.setOffset(VA.getLocMemOffset() + BEAlign);
2248
2249       unsigned Alignment = DL.getABITypeAlignment(ArgVal->getType());
2250       MachineMemOperand *MMO = FuncInfo.MF->getMachineMemOperand(
2251         MachinePointerInfo::getStack(Addr.getOffset()),
2252         MachineMemOperand::MOStore, ArgVT.getStoreSize(), Alignment);
2253
2254       if (!EmitStore(ArgVT, ArgReg, Addr, MMO))
2255         return false;
2256     }
2257   }
2258   return true;
2259 }
2260
2261 bool AArch64FastISel::FinishCall(CallLoweringInfo &CLI, MVT RetVT,
2262                                  unsigned NumBytes) {
2263   CallingConv::ID CC = CLI.CallConv;
2264
2265   // Issue CALLSEQ_END
2266   unsigned AdjStackUp = TII.getCallFrameDestroyOpcode();
2267   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AdjStackUp))
2268     .addImm(NumBytes).addImm(0);
2269
2270   // Now the return value.
2271   if (RetVT != MVT::isVoid) {
2272     SmallVector<CCValAssign, 16> RVLocs;
2273     CCState CCInfo(CC, false, *FuncInfo.MF, RVLocs, *Context);
2274     CCInfo.AnalyzeCallResult(RetVT, CCAssignFnForCall(CC));
2275
2276     // Only handle a single return value.
2277     if (RVLocs.size() != 1)
2278       return false;
2279
2280     // Copy all of the result registers out of their specified physreg.
2281     MVT CopyVT = RVLocs[0].getValVT();
2282     unsigned ResultReg = createResultReg(TLI.getRegClassFor(CopyVT));
2283     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2284             TII.get(TargetOpcode::COPY), ResultReg)
2285         .addReg(RVLocs[0].getLocReg());
2286     CLI.InRegs.push_back(RVLocs[0].getLocReg());
2287
2288     CLI.ResultReg = ResultReg;
2289     CLI.NumResultRegs = 1;
2290   }
2291
2292   return true;
2293 }
2294
2295 bool AArch64FastISel::fastLowerCall(CallLoweringInfo &CLI) {
2296   CallingConv::ID CC  = CLI.CallConv;
2297   bool IsTailCall     = CLI.IsTailCall;
2298   bool IsVarArg       = CLI.IsVarArg;
2299   const Value *Callee = CLI.Callee;
2300   const char *SymName = CLI.SymName;
2301
2302   // Allow SelectionDAG isel to handle tail calls.
2303   if (IsTailCall)
2304     return false;
2305
2306   CodeModel::Model CM = TM.getCodeModel();
2307   // Only support the small and large code model.
2308   if (CM != CodeModel::Small && CM != CodeModel::Large)
2309     return false;
2310
2311   // FIXME: Add large code model support for ELF.
2312   if (CM == CodeModel::Large && !Subtarget->isTargetMachO())
2313     return false;
2314
2315   // Let SDISel handle vararg functions.
2316   if (IsVarArg)
2317     return false;
2318
2319   // FIXME: Only handle *simple* calls for now.
2320   MVT RetVT;
2321   if (CLI.RetTy->isVoidTy())
2322     RetVT = MVT::isVoid;
2323   else if (!isTypeLegal(CLI.RetTy, RetVT))
2324     return false;
2325
2326   for (auto Flag : CLI.OutFlags)
2327     if (Flag.isInReg() || Flag.isSRet() || Flag.isNest() || Flag.isByVal())
2328       return false;
2329
2330   // Set up the argument vectors.
2331   SmallVector<MVT, 16> OutVTs;
2332   OutVTs.reserve(CLI.OutVals.size());
2333
2334   for (auto *Val : CLI.OutVals) {
2335     MVT VT;
2336     if (!isTypeLegal(Val->getType(), VT) &&
2337         !(VT == MVT::i1 || VT == MVT::i8 || VT == MVT::i16))
2338       return false;
2339
2340     // We don't handle vector parameters yet.
2341     if (VT.isVector() || VT.getSizeInBits() > 64)
2342       return false;
2343
2344     OutVTs.push_back(VT);
2345   }
2346
2347   Address Addr;
2348   if (!ComputeCallAddress(Callee, Addr))
2349     return false;
2350
2351   // Handle the arguments now that we've gotten them.
2352   unsigned NumBytes;
2353   if (!ProcessCallArgs(CLI, OutVTs, NumBytes))
2354     return false;
2355
2356   // Issue the call.
2357   MachineInstrBuilder MIB;
2358   if (CM == CodeModel::Small) {
2359     const MCInstrDesc &II = TII.get(Addr.getReg() ? AArch64::BLR : AArch64::BL);
2360     MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II);
2361     if (SymName)
2362       MIB.addExternalSymbol(SymName, 0);
2363     else if (Addr.getGlobalValue())
2364       MIB.addGlobalAddress(Addr.getGlobalValue(), 0, 0);
2365     else if (Addr.getReg()) {
2366       unsigned Reg = constrainOperandRegClass(II, Addr.getReg(), 0);
2367       MIB.addReg(Reg);
2368     } else
2369       return false;
2370   } else {
2371     unsigned CallReg = 0;
2372     if (SymName) {
2373       unsigned ADRPReg = createResultReg(&AArch64::GPR64commonRegClass);
2374       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADRP),
2375               ADRPReg)
2376         .addExternalSymbol(SymName, AArch64II::MO_GOT | AArch64II::MO_PAGE);
2377
2378       CallReg = createResultReg(&AArch64::GPR64RegClass);
2379       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::LDRXui),
2380               CallReg)
2381         .addReg(ADRPReg)
2382         .addExternalSymbol(SymName, AArch64II::MO_GOT | AArch64II::MO_PAGEOFF |
2383                            AArch64II::MO_NC);
2384     } else if (Addr.getGlobalValue()) {
2385       CallReg = AArch64MaterializeGV(Addr.getGlobalValue());
2386     } else if (Addr.getReg())
2387       CallReg = Addr.getReg();
2388
2389     if (!CallReg)
2390       return false;
2391
2392     const MCInstrDesc &II = TII.get(AArch64::BLR);
2393     CallReg = constrainOperandRegClass(II, CallReg, 0);
2394     MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II).addReg(CallReg);
2395   }
2396
2397   // Add implicit physical register uses to the call.
2398   for (auto Reg : CLI.OutRegs)
2399     MIB.addReg(Reg, RegState::Implicit);
2400
2401   // Add a register mask with the call-preserved registers.
2402   // Proper defs for return values will be added by setPhysRegsDeadExcept().
2403   MIB.addRegMask(TRI.getCallPreservedMask(CC));
2404
2405   CLI.Call = MIB;
2406
2407   // Finish off the call including any return values.
2408   return FinishCall(CLI, RetVT, NumBytes);
2409 }
2410
2411 bool AArch64FastISel::IsMemCpySmall(uint64_t Len, unsigned Alignment) {
2412   if (Alignment)
2413     return Len / Alignment <= 4;
2414   else
2415     return Len < 32;
2416 }
2417
2418 bool AArch64FastISel::TryEmitSmallMemCpy(Address Dest, Address Src,
2419                                          uint64_t Len, unsigned Alignment) {
2420   // Make sure we don't bloat code by inlining very large memcpy's.
2421   if (!IsMemCpySmall(Len, Alignment))
2422     return false;
2423
2424   int64_t UnscaledOffset = 0;
2425   Address OrigDest = Dest;
2426   Address OrigSrc = Src;
2427
2428   while (Len) {
2429     MVT VT;
2430     if (!Alignment || Alignment >= 8) {
2431       if (Len >= 8)
2432         VT = MVT::i64;
2433       else if (Len >= 4)
2434         VT = MVT::i32;
2435       else if (Len >= 2)
2436         VT = MVT::i16;
2437       else {
2438         VT = MVT::i8;
2439       }
2440     } else {
2441       // Bound based on alignment.
2442       if (Len >= 4 && Alignment == 4)
2443         VT = MVT::i32;
2444       else if (Len >= 2 && Alignment == 2)
2445         VT = MVT::i16;
2446       else {
2447         VT = MVT::i8;
2448       }
2449     }
2450
2451     bool RV;
2452     unsigned ResultReg;
2453     RV = EmitLoad(VT, ResultReg, Src);
2454     if (!RV)
2455       return false;
2456
2457     RV = EmitStore(VT, ResultReg, Dest);
2458     if (!RV)
2459       return false;
2460
2461     int64_t Size = VT.getSizeInBits() / 8;
2462     Len -= Size;
2463     UnscaledOffset += Size;
2464
2465     // We need to recompute the unscaled offset for each iteration.
2466     Dest.setOffset(OrigDest.getOffset() + UnscaledOffset);
2467     Src.setOffset(OrigSrc.getOffset() + UnscaledOffset);
2468   }
2469
2470   return true;
2471 }
2472
2473 /// \brief Check if it is possible to fold the condition from the XALU intrinsic
2474 /// into the user. The condition code will only be updated on success.
2475 bool AArch64FastISel::foldXALUIntrinsic(AArch64CC::CondCode &CC,
2476                                         const Instruction *I,
2477                                         const Value *Cond) {
2478   if (!isa<ExtractValueInst>(Cond))
2479     return false;
2480
2481   const auto *EV = cast<ExtractValueInst>(Cond);
2482   if (!isa<IntrinsicInst>(EV->getAggregateOperand()))
2483     return false;
2484
2485   const auto *II = cast<IntrinsicInst>(EV->getAggregateOperand());
2486   MVT RetVT;
2487   const Function *Callee = II->getCalledFunction();
2488   Type *RetTy =
2489   cast<StructType>(Callee->getReturnType())->getTypeAtIndex(0U);
2490   if (!isTypeLegal(RetTy, RetVT))
2491     return false;
2492
2493   if (RetVT != MVT::i32 && RetVT != MVT::i64)
2494     return false;
2495
2496   AArch64CC::CondCode TmpCC;
2497   switch (II->getIntrinsicID()) {
2498     default: return false;
2499     case Intrinsic::sadd_with_overflow:
2500     case Intrinsic::ssub_with_overflow: TmpCC = AArch64CC::VS; break;
2501     case Intrinsic::uadd_with_overflow: TmpCC = AArch64CC::HS; break;
2502     case Intrinsic::usub_with_overflow: TmpCC = AArch64CC::LO; break;
2503     case Intrinsic::smul_with_overflow:
2504     case Intrinsic::umul_with_overflow: TmpCC = AArch64CC::NE; break;
2505   }
2506
2507   // Check if both instructions are in the same basic block.
2508   if (II->getParent() != I->getParent())
2509     return false;
2510
2511   // Make sure nothing is in the way
2512   BasicBlock::const_iterator Start = I;
2513   BasicBlock::const_iterator End = II;
2514   for (auto Itr = std::prev(Start); Itr != End; --Itr) {
2515     // We only expect extractvalue instructions between the intrinsic and the
2516     // instruction to be selected.
2517     if (!isa<ExtractValueInst>(Itr))
2518       return false;
2519
2520     // Check that the extractvalue operand comes from the intrinsic.
2521     const auto *EVI = cast<ExtractValueInst>(Itr);
2522     if (EVI->getAggregateOperand() != II)
2523       return false;
2524   }
2525
2526   CC = TmpCC;
2527   return true;
2528 }
2529
2530 bool AArch64FastISel::fastLowerIntrinsicCall(const IntrinsicInst *II) {
2531   // FIXME: Handle more intrinsics.
2532   switch (II->getIntrinsicID()) {
2533   default: return false;
2534   case Intrinsic::frameaddress: {
2535     MachineFrameInfo *MFI = FuncInfo.MF->getFrameInfo();
2536     MFI->setFrameAddressIsTaken(true);
2537
2538     const AArch64RegisterInfo *RegInfo =
2539         static_cast<const AArch64RegisterInfo *>(
2540             TM.getSubtargetImpl()->getRegisterInfo());
2541     unsigned FramePtr = RegInfo->getFrameRegister(*(FuncInfo.MF));
2542     unsigned SrcReg = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
2543     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2544             TII.get(TargetOpcode::COPY), SrcReg).addReg(FramePtr);
2545     // Recursively load frame address
2546     // ldr x0, [fp]
2547     // ldr x0, [x0]
2548     // ldr x0, [x0]
2549     // ...
2550     unsigned DestReg;
2551     unsigned Depth = cast<ConstantInt>(II->getOperand(0))->getZExtValue();
2552     while (Depth--) {
2553       DestReg = fastEmitInst_ri(AArch64::LDRXui, &AArch64::GPR64RegClass,
2554                                 SrcReg, /*IsKill=*/true, 0);
2555       assert(DestReg && "Unexpected LDR instruction emission failure.");
2556       SrcReg = DestReg;
2557     }
2558
2559     updateValueMap(II, SrcReg);
2560     return true;
2561   }
2562   case Intrinsic::memcpy:
2563   case Intrinsic::memmove: {
2564     const auto *MTI = cast<MemTransferInst>(II);
2565     // Don't handle volatile.
2566     if (MTI->isVolatile())
2567       return false;
2568
2569     // Disable inlining for memmove before calls to ComputeAddress.  Otherwise,
2570     // we would emit dead code because we don't currently handle memmoves.
2571     bool IsMemCpy = (II->getIntrinsicID() == Intrinsic::memcpy);
2572     if (isa<ConstantInt>(MTI->getLength()) && IsMemCpy) {
2573       // Small memcpy's are common enough that we want to do them without a call
2574       // if possible.
2575       uint64_t Len = cast<ConstantInt>(MTI->getLength())->getZExtValue();
2576       unsigned Alignment = MTI->getAlignment();
2577       if (IsMemCpySmall(Len, Alignment)) {
2578         Address Dest, Src;
2579         if (!ComputeAddress(MTI->getRawDest(), Dest) ||
2580             !ComputeAddress(MTI->getRawSource(), Src))
2581           return false;
2582         if (TryEmitSmallMemCpy(Dest, Src, Len, Alignment))
2583           return true;
2584       }
2585     }
2586
2587     if (!MTI->getLength()->getType()->isIntegerTy(64))
2588       return false;
2589
2590     if (MTI->getSourceAddressSpace() > 255 || MTI->getDestAddressSpace() > 255)
2591       // Fast instruction selection doesn't support the special
2592       // address spaces.
2593       return false;
2594
2595     const char *IntrMemName = isa<MemCpyInst>(II) ? "memcpy" : "memmove";
2596     return lowerCallTo(II, IntrMemName, II->getNumArgOperands() - 2);
2597   }
2598   case Intrinsic::memset: {
2599     const MemSetInst *MSI = cast<MemSetInst>(II);
2600     // Don't handle volatile.
2601     if (MSI->isVolatile())
2602       return false;
2603
2604     if (!MSI->getLength()->getType()->isIntegerTy(64))
2605       return false;
2606
2607     if (MSI->getDestAddressSpace() > 255)
2608       // Fast instruction selection doesn't support the special
2609       // address spaces.
2610       return false;
2611
2612     return lowerCallTo(II, "memset", II->getNumArgOperands() - 2);
2613   }
2614   case Intrinsic::trap: {
2615     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::BRK))
2616         .addImm(1);
2617     return true;
2618   }
2619   case Intrinsic::sqrt: {
2620     Type *RetTy = II->getCalledFunction()->getReturnType();
2621
2622     MVT VT;
2623     if (!isTypeLegal(RetTy, VT))
2624       return false;
2625
2626     unsigned Op0Reg = getRegForValue(II->getOperand(0));
2627     if (!Op0Reg)
2628       return false;
2629     bool Op0IsKill = hasTrivialKill(II->getOperand(0));
2630
2631     unsigned ResultReg = fastEmit_r(VT, VT, ISD::FSQRT, Op0Reg, Op0IsKill);
2632     if (!ResultReg)
2633       return false;
2634
2635     updateValueMap(II, ResultReg);
2636     return true;
2637   }
2638   case Intrinsic::sadd_with_overflow:
2639   case Intrinsic::uadd_with_overflow:
2640   case Intrinsic::ssub_with_overflow:
2641   case Intrinsic::usub_with_overflow:
2642   case Intrinsic::smul_with_overflow:
2643   case Intrinsic::umul_with_overflow: {
2644     // This implements the basic lowering of the xalu with overflow intrinsics.
2645     const Function *Callee = II->getCalledFunction();
2646     auto *Ty = cast<StructType>(Callee->getReturnType());
2647     Type *RetTy = Ty->getTypeAtIndex(0U);
2648
2649     MVT VT;
2650     if (!isTypeLegal(RetTy, VT))
2651       return false;
2652
2653     if (VT != MVT::i32 && VT != MVT::i64)
2654       return false;
2655
2656     const Value *LHS = II->getArgOperand(0);
2657     const Value *RHS = II->getArgOperand(1);
2658     // Canonicalize immediate to the RHS.
2659     if (isa<ConstantInt>(LHS) && !isa<ConstantInt>(RHS) &&
2660         isCommutativeIntrinsic(II))
2661       std::swap(LHS, RHS);
2662
2663     unsigned ResultReg1 = 0, ResultReg2 = 0, MulReg = 0;
2664     AArch64CC::CondCode CC = AArch64CC::Invalid;
2665     switch (II->getIntrinsicID()) {
2666     default: llvm_unreachable("Unexpected intrinsic!");
2667     case Intrinsic::sadd_with_overflow:
2668       ResultReg1 = emitAdd(VT, LHS, RHS, /*SetFlags=*/true);
2669       CC = AArch64CC::VS;
2670       break;
2671     case Intrinsic::uadd_with_overflow:
2672       ResultReg1 = emitAdd(VT, LHS, RHS, /*SetFlags=*/true);
2673       CC = AArch64CC::HS;
2674       break;
2675     case Intrinsic::ssub_with_overflow:
2676       ResultReg1 = emitSub(VT, LHS, RHS, /*SetFlags=*/true);
2677       CC = AArch64CC::VS;
2678       break;
2679     case Intrinsic::usub_with_overflow:
2680       ResultReg1 = emitSub(VT, LHS, RHS, /*SetFlags=*/true);
2681       CC = AArch64CC::LO;
2682       break;
2683     case Intrinsic::smul_with_overflow: {
2684       CC = AArch64CC::NE;
2685       unsigned LHSReg = getRegForValue(LHS);
2686       if (!LHSReg)
2687         return false;
2688       bool LHSIsKill = hasTrivialKill(LHS);
2689
2690       unsigned RHSReg = getRegForValue(RHS);
2691       if (!RHSReg)
2692         return false;
2693       bool RHSIsKill = hasTrivialKill(RHS);
2694
2695       if (VT == MVT::i32) {
2696         MulReg = Emit_SMULL_rr(MVT::i64, LHSReg, LHSIsKill, RHSReg, RHSIsKill);
2697         unsigned ShiftReg = emitLSR_ri(MVT::i64, MVT::i64, MulReg,
2698                                        /*IsKill=*/false, 32);
2699         MulReg = fastEmitInst_extractsubreg(VT, MulReg, /*IsKill=*/true,
2700                                             AArch64::sub_32);
2701         ShiftReg = fastEmitInst_extractsubreg(VT, ShiftReg, /*IsKill=*/true,
2702                                               AArch64::sub_32);
2703         emitSubs_rs(VT, ShiftReg, /*IsKill=*/true, MulReg, /*IsKill=*/false,
2704                     AArch64_AM::ASR, 31, /*WantResult=*/false);
2705       } else {
2706         assert(VT == MVT::i64 && "Unexpected value type.");
2707         MulReg = Emit_MUL_rr(VT, LHSReg, LHSIsKill, RHSReg, RHSIsKill);
2708         unsigned SMULHReg = fastEmit_rr(VT, VT, ISD::MULHS, LHSReg, LHSIsKill,
2709                                         RHSReg, RHSIsKill);
2710         emitSubs_rs(VT, SMULHReg, /*IsKill=*/true, MulReg, /*IsKill=*/false,
2711                     AArch64_AM::ASR, 63, /*WantResult=*/false);
2712       }
2713       break;
2714     }
2715     case Intrinsic::umul_with_overflow: {
2716       CC = AArch64CC::NE;
2717       unsigned LHSReg = getRegForValue(LHS);
2718       if (!LHSReg)
2719         return false;
2720       bool LHSIsKill = hasTrivialKill(LHS);
2721
2722       unsigned RHSReg = getRegForValue(RHS);
2723       if (!RHSReg)
2724         return false;
2725       bool RHSIsKill = hasTrivialKill(RHS);
2726
2727       if (VT == MVT::i32) {
2728         MulReg = Emit_UMULL_rr(MVT::i64, LHSReg, LHSIsKill, RHSReg, RHSIsKill);
2729         emitSubs_rs(MVT::i64, AArch64::XZR, /*IsKill=*/true, MulReg,
2730                     /*IsKill=*/false, AArch64_AM::LSR, 32,
2731                     /*WantResult=*/false);
2732         MulReg = fastEmitInst_extractsubreg(VT, MulReg, /*IsKill=*/true,
2733                                             AArch64::sub_32);
2734       } else {
2735         assert(VT == MVT::i64 && "Unexpected value type.");
2736         MulReg = Emit_MUL_rr(VT, LHSReg, LHSIsKill, RHSReg, RHSIsKill);
2737         unsigned UMULHReg = fastEmit_rr(VT, VT, ISD::MULHU, LHSReg, LHSIsKill,
2738                                         RHSReg, RHSIsKill);
2739         emitSubs_rr(VT, AArch64::XZR, /*IsKill=*/true, UMULHReg,
2740                     /*IsKill=*/false, /*WantResult=*/false);
2741       }
2742       break;
2743     }
2744     }
2745
2746     if (MulReg) {
2747       ResultReg1 = createResultReg(TLI.getRegClassFor(VT));
2748       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2749               TII.get(TargetOpcode::COPY), ResultReg1).addReg(MulReg);
2750     }
2751
2752     ResultReg2 = fastEmitInst_rri(AArch64::CSINCWr, &AArch64::GPR32RegClass,
2753                                   AArch64::WZR, /*IsKill=*/true, AArch64::WZR,
2754                                   /*IsKill=*/true, getInvertedCondCode(CC));
2755     assert((ResultReg1 + 1) == ResultReg2 &&
2756            "Nonconsecutive result registers.");
2757     updateValueMap(II, ResultReg1, 2);
2758     return true;
2759   }
2760   }
2761   return false;
2762 }
2763
2764 bool AArch64FastISel::SelectRet(const Instruction *I) {
2765   const ReturnInst *Ret = cast<ReturnInst>(I);
2766   const Function &F = *I->getParent()->getParent();
2767
2768   if (!FuncInfo.CanLowerReturn)
2769     return false;
2770
2771   if (F.isVarArg())
2772     return false;
2773
2774   // Build a list of return value registers.
2775   SmallVector<unsigned, 4> RetRegs;
2776
2777   if (Ret->getNumOperands() > 0) {
2778     CallingConv::ID CC = F.getCallingConv();
2779     SmallVector<ISD::OutputArg, 4> Outs;
2780     GetReturnInfo(F.getReturnType(), F.getAttributes(), Outs, TLI);
2781
2782     // Analyze operands of the call, assigning locations to each operand.
2783     SmallVector<CCValAssign, 16> ValLocs;
2784     CCState CCInfo(CC, F.isVarArg(), *FuncInfo.MF, ValLocs, I->getContext());
2785     CCAssignFn *RetCC = CC == CallingConv::WebKit_JS ? RetCC_AArch64_WebKit_JS
2786                                                      : RetCC_AArch64_AAPCS;
2787     CCInfo.AnalyzeReturn(Outs, RetCC);
2788
2789     // Only handle a single return value for now.
2790     if (ValLocs.size() != 1)
2791       return false;
2792
2793     CCValAssign &VA = ValLocs[0];
2794     const Value *RV = Ret->getOperand(0);
2795
2796     // Don't bother handling odd stuff for now.
2797     if (VA.getLocInfo() != CCValAssign::Full)
2798       return false;
2799     // Only handle register returns for now.
2800     if (!VA.isRegLoc())
2801       return false;
2802     unsigned Reg = getRegForValue(RV);
2803     if (Reg == 0)
2804       return false;
2805
2806     unsigned SrcReg = Reg + VA.getValNo();
2807     unsigned DestReg = VA.getLocReg();
2808     // Avoid a cross-class copy. This is very unlikely.
2809     if (!MRI.getRegClass(SrcReg)->contains(DestReg))
2810       return false;
2811
2812     EVT RVEVT = TLI.getValueType(RV->getType());
2813     if (!RVEVT.isSimple())
2814       return false;
2815
2816     // Vectors (of > 1 lane) in big endian need tricky handling.
2817     if (RVEVT.isVector() && RVEVT.getVectorNumElements() > 1)
2818       return false;
2819
2820     MVT RVVT = RVEVT.getSimpleVT();
2821     if (RVVT == MVT::f128)
2822       return false;
2823     MVT DestVT = VA.getValVT();
2824     // Special handling for extended integers.
2825     if (RVVT != DestVT) {
2826       if (RVVT != MVT::i1 && RVVT != MVT::i8 && RVVT != MVT::i16)
2827         return false;
2828
2829       if (!Outs[0].Flags.isZExt() && !Outs[0].Flags.isSExt())
2830         return false;
2831
2832       bool isZExt = Outs[0].Flags.isZExt();
2833       SrcReg = EmitIntExt(RVVT, SrcReg, DestVT, isZExt);
2834       if (SrcReg == 0)
2835         return false;
2836     }
2837
2838     // Make the copy.
2839     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2840             TII.get(TargetOpcode::COPY), DestReg).addReg(SrcReg);
2841
2842     // Add register to return instruction.
2843     RetRegs.push_back(VA.getLocReg());
2844   }
2845
2846   MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2847                                     TII.get(AArch64::RET_ReallyLR));
2848   for (unsigned i = 0, e = RetRegs.size(); i != e; ++i)
2849     MIB.addReg(RetRegs[i], RegState::Implicit);
2850   return true;
2851 }
2852
2853 bool AArch64FastISel::SelectTrunc(const Instruction *I) {
2854   Type *DestTy = I->getType();
2855   Value *Op = I->getOperand(0);
2856   Type *SrcTy = Op->getType();
2857
2858   EVT SrcEVT = TLI.getValueType(SrcTy, true);
2859   EVT DestEVT = TLI.getValueType(DestTy, true);
2860   if (!SrcEVT.isSimple())
2861     return false;
2862   if (!DestEVT.isSimple())
2863     return false;
2864
2865   MVT SrcVT = SrcEVT.getSimpleVT();
2866   MVT DestVT = DestEVT.getSimpleVT();
2867
2868   if (SrcVT != MVT::i64 && SrcVT != MVT::i32 && SrcVT != MVT::i16 &&
2869       SrcVT != MVT::i8)
2870     return false;
2871   if (DestVT != MVT::i32 && DestVT != MVT::i16 && DestVT != MVT::i8 &&
2872       DestVT != MVT::i1)
2873     return false;
2874
2875   unsigned SrcReg = getRegForValue(Op);
2876   if (!SrcReg)
2877     return false;
2878   bool SrcIsKill = hasTrivialKill(Op);
2879
2880   // If we're truncating from i64 to a smaller non-legal type then generate an
2881   // AND. Otherwise, we know the high bits are undefined and a truncate only
2882   // generate a COPY. We cannot mark the source register also as result
2883   // register, because this can incorrectly transfer the kill flag onto the
2884   // source register.
2885   unsigned ResultReg;
2886   if (SrcVT == MVT::i64) {
2887     uint64_t Mask = 0;
2888     switch (DestVT.SimpleTy) {
2889     default:
2890       // Trunc i64 to i32 is handled by the target-independent fast-isel.
2891       return false;
2892     case MVT::i1:
2893       Mask = 0x1;
2894       break;
2895     case MVT::i8:
2896       Mask = 0xff;
2897       break;
2898     case MVT::i16:
2899       Mask = 0xffff;
2900       break;
2901     }
2902     // Issue an extract_subreg to get the lower 32-bits.
2903     unsigned Reg32 = fastEmitInst_extractsubreg(MVT::i32, SrcReg, SrcIsKill,
2904                                                 AArch64::sub_32);
2905     // Create the AND instruction which performs the actual truncation.
2906     ResultReg = emitAnd_ri(MVT::i32, Reg32, /*IsKill=*/true, Mask);
2907     assert(ResultReg && "Unexpected AND instruction emission failure.");
2908   } else {
2909     ResultReg = createResultReg(&AArch64::GPR32RegClass);
2910     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2911             TII.get(TargetOpcode::COPY), ResultReg)
2912         .addReg(SrcReg, getKillRegState(SrcIsKill));
2913   }
2914
2915   updateValueMap(I, ResultReg);
2916   return true;
2917 }
2918
2919 unsigned AArch64FastISel::Emiti1Ext(unsigned SrcReg, MVT DestVT, bool isZExt) {
2920   assert((DestVT == MVT::i8 || DestVT == MVT::i16 || DestVT == MVT::i32 ||
2921           DestVT == MVT::i64) &&
2922          "Unexpected value type.");
2923   // Handle i8 and i16 as i32.
2924   if (DestVT == MVT::i8 || DestVT == MVT::i16)
2925     DestVT = MVT::i32;
2926
2927   if (isZExt) {
2928     unsigned ResultReg = emitAnd_ri(MVT::i32, SrcReg, /*TODO:IsKill=*/false, 1);
2929     assert(ResultReg && "Unexpected AND instruction emission failure.");
2930     if (DestVT == MVT::i64) {
2931       // We're ZExt i1 to i64.  The ANDWri Wd, Ws, #1 implicitly clears the
2932       // upper 32 bits.  Emit a SUBREG_TO_REG to extend from Wd to Xd.
2933       unsigned Reg64 = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
2934       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2935               TII.get(AArch64::SUBREG_TO_REG), Reg64)
2936           .addImm(0)
2937           .addReg(ResultReg)
2938           .addImm(AArch64::sub_32);
2939       ResultReg = Reg64;
2940     }
2941     return ResultReg;
2942   } else {
2943     if (DestVT == MVT::i64) {
2944       // FIXME: We're SExt i1 to i64.
2945       return 0;
2946     }
2947     return fastEmitInst_rii(AArch64::SBFMWri, &AArch64::GPR32RegClass, SrcReg,
2948                             /*TODO:IsKill=*/false, 0, 0);
2949   }
2950 }
2951
2952 unsigned AArch64FastISel::Emit_MUL_rr(MVT RetVT, unsigned Op0, bool Op0IsKill,
2953                                       unsigned Op1, bool Op1IsKill) {
2954   unsigned Opc, ZReg;
2955   switch (RetVT.SimpleTy) {
2956   default: return 0;
2957   case MVT::i8:
2958   case MVT::i16:
2959   case MVT::i32:
2960     RetVT = MVT::i32;
2961     Opc = AArch64::MADDWrrr; ZReg = AArch64::WZR; break;
2962   case MVT::i64:
2963     Opc = AArch64::MADDXrrr; ZReg = AArch64::XZR; break;
2964   }
2965
2966   const TargetRegisterClass *RC =
2967       (RetVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
2968   return fastEmitInst_rrr(Opc, RC, Op0, Op0IsKill, Op1, Op1IsKill,
2969                           /*IsKill=*/ZReg, true);
2970 }
2971
2972 unsigned AArch64FastISel::Emit_SMULL_rr(MVT RetVT, unsigned Op0, bool Op0IsKill,
2973                                         unsigned Op1, bool Op1IsKill) {
2974   if (RetVT != MVT::i64)
2975     return 0;
2976
2977   return fastEmitInst_rrr(AArch64::SMADDLrrr, &AArch64::GPR64RegClass,
2978                           Op0, Op0IsKill, Op1, Op1IsKill,
2979                           AArch64::XZR, /*IsKill=*/true);
2980 }
2981
2982 unsigned AArch64FastISel::Emit_UMULL_rr(MVT RetVT, unsigned Op0, bool Op0IsKill,
2983                                         unsigned Op1, bool Op1IsKill) {
2984   if (RetVT != MVT::i64)
2985     return 0;
2986
2987   return fastEmitInst_rrr(AArch64::UMADDLrrr, &AArch64::GPR64RegClass,
2988                           Op0, Op0IsKill, Op1, Op1IsKill,
2989                           AArch64::XZR, /*IsKill=*/true);
2990 }
2991
2992 unsigned AArch64FastISel::emitLSL_rr(MVT RetVT, unsigned Op0Reg, bool Op0IsKill,
2993                                      unsigned Op1Reg, bool Op1IsKill) {
2994   unsigned Opc = 0;
2995   bool NeedTrunc = false;
2996   uint64_t Mask = 0;
2997   switch (RetVT.SimpleTy) {
2998   default: return 0;
2999   case MVT::i8:  Opc = AArch64::LSLVWr; NeedTrunc = true; Mask = 0xff;   break;
3000   case MVT::i16: Opc = AArch64::LSLVWr; NeedTrunc = true; Mask = 0xffff; break;
3001   case MVT::i32: Opc = AArch64::LSLVWr;                                  break;
3002   case MVT::i64: Opc = AArch64::LSLVXr;                                  break;
3003   }
3004
3005   const TargetRegisterClass *RC =
3006       (RetVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
3007   if (NeedTrunc) {
3008     Op1Reg = emitAnd_ri(MVT::i32, Op1Reg, Op1IsKill, Mask);
3009     Op1IsKill = true;
3010   }
3011   unsigned ResultReg = fastEmitInst_rr(Opc, RC, Op0Reg, Op0IsKill, Op1Reg,
3012                                        Op1IsKill);
3013   if (NeedTrunc)
3014     ResultReg = emitAnd_ri(MVT::i32, ResultReg, /*IsKill=*/true, Mask);
3015   return ResultReg;
3016 }
3017
3018 unsigned AArch64FastISel::emitLSL_ri(MVT RetVT, MVT SrcVT, unsigned Op0,
3019                                      bool Op0IsKill, uint64_t Shift,
3020                                      bool IsZext) {
3021   assert(RetVT.SimpleTy >= SrcVT.SimpleTy &&
3022          "Unexpected source/return type pair.");
3023   assert((SrcVT == MVT::i8 || SrcVT == MVT::i16 || SrcVT == MVT::i32 ||
3024           SrcVT == MVT::i64) && "Unexpected source value type.");
3025   assert((RetVT == MVT::i8 || RetVT == MVT::i16 || RetVT == MVT::i32 ||
3026           RetVT == MVT::i64) && "Unexpected return value type.");
3027
3028   bool Is64Bit = (RetVT == MVT::i64);
3029   unsigned RegSize = Is64Bit ? 64 : 32;
3030   unsigned DstBits = RetVT.getSizeInBits();
3031   unsigned SrcBits = SrcVT.getSizeInBits();
3032
3033   // Don't deal with undefined shifts.
3034   if (Shift >= DstBits)
3035     return 0;
3036
3037   // For immediate shifts we can fold the zero-/sign-extension into the shift.
3038   // {S|U}BFM Wd, Wn, #r, #s
3039   // Wd<32+s-r,32-r> = Wn<s:0> when r > s
3040
3041   // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
3042   // %2 = shl i16 %1, 4
3043   // Wd<32+7-28,32-28> = Wn<7:0> <- clamp s to 7
3044   // 0b1111_1111_1111_1111__1111_1010_1010_0000 sext
3045   // 0b0000_0000_0000_0000__0000_0101_0101_0000 sext | zext
3046   // 0b0000_0000_0000_0000__0000_1010_1010_0000 zext
3047
3048   // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
3049   // %2 = shl i16 %1, 8
3050   // Wd<32+7-24,32-24> = Wn<7:0>
3051   // 0b1111_1111_1111_1111__1010_1010_0000_0000 sext
3052   // 0b0000_0000_0000_0000__0101_0101_0000_0000 sext | zext
3053   // 0b0000_0000_0000_0000__1010_1010_0000_0000 zext
3054
3055   // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
3056   // %2 = shl i16 %1, 12
3057   // Wd<32+3-20,32-20> = Wn<3:0>
3058   // 0b1111_1111_1111_1111__1010_0000_0000_0000 sext
3059   // 0b0000_0000_0000_0000__0101_0000_0000_0000 sext | zext
3060   // 0b0000_0000_0000_0000__1010_0000_0000_0000 zext
3061
3062   unsigned ImmR = RegSize - Shift;
3063   // Limit the width to the length of the source type.
3064   unsigned ImmS = std::min<unsigned>(SrcBits - 1, DstBits - 1 - Shift);
3065   static const unsigned OpcTable[2][2] = {
3066     {AArch64::SBFMWri, AArch64::SBFMXri},
3067     {AArch64::UBFMWri, AArch64::UBFMXri}
3068   };
3069   unsigned Opc = OpcTable[IsZext][Is64Bit];
3070   const TargetRegisterClass *RC =
3071       Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
3072   if (SrcVT.SimpleTy <= MVT::i32 && RetVT == MVT::i64) {
3073     unsigned TmpReg = MRI.createVirtualRegister(RC);
3074     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3075             TII.get(AArch64::SUBREG_TO_REG), TmpReg)
3076         .addImm(0)
3077         .addReg(Op0, getKillRegState(Op0IsKill))
3078         .addImm(AArch64::sub_32);
3079     Op0 = TmpReg;
3080     Op0IsKill = true;
3081   }
3082   return fastEmitInst_rii(Opc, RC, Op0, Op0IsKill, ImmR, ImmS);
3083 }
3084
3085 unsigned AArch64FastISel::emitLSR_rr(MVT RetVT, unsigned Op0Reg, bool Op0IsKill,
3086                                      unsigned Op1Reg, bool Op1IsKill) {
3087   unsigned Opc = 0;
3088   bool NeedTrunc = false;
3089   uint64_t Mask = 0;
3090   switch (RetVT.SimpleTy) {
3091   default: return 0;
3092   case MVT::i8:  Opc = AArch64::LSRVWr; NeedTrunc = true; Mask = 0xff;   break;
3093   case MVT::i16: Opc = AArch64::LSRVWr; NeedTrunc = true; Mask = 0xffff; break;
3094   case MVT::i32: Opc = AArch64::LSRVWr; break;
3095   case MVT::i64: Opc = AArch64::LSRVXr; break;
3096   }
3097
3098   const TargetRegisterClass *RC =
3099       (RetVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
3100   if (NeedTrunc) {
3101     Op0Reg = emitAnd_ri(MVT::i32, Op0Reg, Op0IsKill, Mask);
3102     Op1Reg = emitAnd_ri(MVT::i32, Op1Reg, Op1IsKill, Mask);
3103     Op0IsKill = Op1IsKill = true;
3104   }
3105   unsigned ResultReg = fastEmitInst_rr(Opc, RC, Op0Reg, Op0IsKill, Op1Reg,
3106                                        Op1IsKill);
3107   if (NeedTrunc)
3108     ResultReg = emitAnd_ri(MVT::i32, ResultReg, /*IsKill=*/true, Mask);
3109   return ResultReg;
3110 }
3111
3112 unsigned AArch64FastISel::emitLSR_ri(MVT RetVT, MVT SrcVT, unsigned Op0,
3113                                      bool Op0IsKill, uint64_t Shift,
3114                                      bool IsZExt) {
3115   assert(RetVT.SimpleTy >= SrcVT.SimpleTy &&
3116          "Unexpected source/return type pair.");
3117   assert((SrcVT == MVT::i8 || SrcVT == MVT::i16 || SrcVT == MVT::i32 ||
3118           SrcVT == MVT::i64) && "Unexpected source value type.");
3119   assert((RetVT == MVT::i8 || RetVT == MVT::i16 || RetVT == MVT::i32 ||
3120           RetVT == MVT::i64) && "Unexpected return value type.");
3121
3122   bool Is64Bit = (RetVT == MVT::i64);
3123   unsigned RegSize = Is64Bit ? 64 : 32;
3124   unsigned DstBits = RetVT.getSizeInBits();
3125   unsigned SrcBits = SrcVT.getSizeInBits();
3126
3127   // Don't deal with undefined shifts.
3128   if (Shift >= DstBits)
3129     return 0;
3130
3131   // For immediate shifts we can fold the zero-/sign-extension into the shift.
3132   // {S|U}BFM Wd, Wn, #r, #s
3133   // Wd<s-r:0> = Wn<s:r> when r <= s
3134
3135   // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
3136   // %2 = lshr i16 %1, 4
3137   // Wd<7-4:0> = Wn<7:4>
3138   // 0b0000_0000_0000_0000__0000_1111_1111_1010 sext
3139   // 0b0000_0000_0000_0000__0000_0000_0000_0101 sext | zext
3140   // 0b0000_0000_0000_0000__0000_0000_0000_1010 zext
3141
3142   // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
3143   // %2 = lshr i16 %1, 8
3144   // Wd<7-7,0> = Wn<7:7>
3145   // 0b0000_0000_0000_0000__0000_0000_1111_1111 sext
3146   // 0b0000_0000_0000_0000__0000_0000_0000_0000 sext
3147   // 0b0000_0000_0000_0000__0000_0000_0000_0000 zext
3148
3149   // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
3150   // %2 = lshr i16 %1, 12
3151   // Wd<7-7,0> = Wn<7:7> <- clamp r to 7
3152   // 0b0000_0000_0000_0000__0000_0000_0000_1111 sext
3153   // 0b0000_0000_0000_0000__0000_0000_0000_0000 sext
3154   // 0b0000_0000_0000_0000__0000_0000_0000_0000 zext
3155
3156   if (Shift >= SrcBits && IsZExt)
3157     return AArch64MaterializeInt(ConstantInt::get(*Context, APInt(RegSize, 0)),
3158                                  RetVT);
3159
3160   // It is not possible to fold a sign-extend into the LShr instruction. In this
3161   // case emit a sign-extend.
3162   if (!IsZExt) {
3163     Op0 = EmitIntExt(SrcVT, Op0, RetVT, IsZExt);
3164     if (!Op0)
3165       return 0;
3166     Op0IsKill = true;
3167     SrcVT = RetVT;
3168     SrcBits = SrcVT.getSizeInBits();
3169     IsZExt = true;
3170   }
3171
3172   unsigned ImmR = std::min<unsigned>(SrcBits - 1, Shift);
3173   unsigned ImmS = SrcBits - 1;
3174   static const unsigned OpcTable[2][2] = {
3175     {AArch64::SBFMWri, AArch64::SBFMXri},
3176     {AArch64::UBFMWri, AArch64::UBFMXri}
3177   };
3178   unsigned Opc = OpcTable[IsZExt][Is64Bit];
3179   const TargetRegisterClass *RC =
3180       Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
3181   if (SrcVT.SimpleTy <= MVT::i32 && RetVT == MVT::i64) {
3182     unsigned TmpReg = MRI.createVirtualRegister(RC);
3183     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3184             TII.get(AArch64::SUBREG_TO_REG), TmpReg)
3185         .addImm(0)
3186         .addReg(Op0, getKillRegState(Op0IsKill))
3187         .addImm(AArch64::sub_32);
3188     Op0 = TmpReg;
3189     Op0IsKill = true;
3190   }
3191   return fastEmitInst_rii(Opc, RC, Op0, Op0IsKill, ImmR, ImmS);
3192 }
3193
3194 unsigned AArch64FastISel::emitASR_rr(MVT RetVT, unsigned Op0Reg, bool Op0IsKill,
3195                                      unsigned Op1Reg, bool Op1IsKill) {
3196   unsigned Opc = 0;
3197   bool NeedTrunc = false;
3198   uint64_t Mask = 0;
3199   switch (RetVT.SimpleTy) {
3200   default: return 0;
3201   case MVT::i8:  Opc = AArch64::ASRVWr; NeedTrunc = true; Mask = 0xff;   break;
3202   case MVT::i16: Opc = AArch64::ASRVWr; NeedTrunc = true; Mask = 0xffff; break;
3203   case MVT::i32: Opc = AArch64::ASRVWr;                                  break;
3204   case MVT::i64: Opc = AArch64::ASRVXr;                                  break;
3205   }
3206
3207   const TargetRegisterClass *RC =
3208       (RetVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
3209   if (NeedTrunc) {
3210     Op0Reg = EmitIntExt(RetVT, Op0Reg, MVT::i32, /*IsZExt=*/false);
3211     Op1Reg = emitAnd_ri(MVT::i32, Op1Reg, Op1IsKill, Mask);
3212     Op0IsKill = Op1IsKill = true;
3213   }
3214   unsigned ResultReg = fastEmitInst_rr(Opc, RC, Op0Reg, Op0IsKill, Op1Reg,
3215                                        Op1IsKill);
3216   if (NeedTrunc)
3217     ResultReg = emitAnd_ri(MVT::i32, ResultReg, /*IsKill=*/true, Mask);
3218   return ResultReg;
3219 }
3220
3221 unsigned AArch64FastISel::emitASR_ri(MVT RetVT, MVT SrcVT, unsigned Op0,
3222                                      bool Op0IsKill, uint64_t Shift,
3223                                      bool IsZExt) {
3224   assert(RetVT.SimpleTy >= SrcVT.SimpleTy &&
3225          "Unexpected source/return type pair.");
3226   assert((SrcVT == MVT::i8 || SrcVT == MVT::i16 || SrcVT == MVT::i32 ||
3227           SrcVT == MVT::i64) && "Unexpected source value type.");
3228   assert((RetVT == MVT::i8 || RetVT == MVT::i16 || RetVT == MVT::i32 ||
3229           RetVT == MVT::i64) && "Unexpected return value type.");
3230
3231   bool Is64Bit = (RetVT == MVT::i64);
3232   unsigned RegSize = Is64Bit ? 64 : 32;
3233   unsigned DstBits = RetVT.getSizeInBits();
3234   unsigned SrcBits = SrcVT.getSizeInBits();
3235
3236   // Don't deal with undefined shifts.
3237   if (Shift >= DstBits)
3238     return 0;
3239
3240   // For immediate shifts we can fold the zero-/sign-extension into the shift.
3241   // {S|U}BFM Wd, Wn, #r, #s
3242   // Wd<s-r:0> = Wn<s:r> when r <= s
3243
3244   // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
3245   // %2 = ashr i16 %1, 4
3246   // Wd<7-4:0> = Wn<7:4>
3247   // 0b1111_1111_1111_1111__1111_1111_1111_1010 sext
3248   // 0b0000_0000_0000_0000__0000_0000_0000_0101 sext | zext
3249   // 0b0000_0000_0000_0000__0000_0000_0000_1010 zext
3250
3251   // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
3252   // %2 = ashr i16 %1, 8
3253   // Wd<7-7,0> = Wn<7:7>
3254   // 0b1111_1111_1111_1111__1111_1111_1111_1111 sext
3255   // 0b0000_0000_0000_0000__0000_0000_0000_0000 sext
3256   // 0b0000_0000_0000_0000__0000_0000_0000_0000 zext
3257
3258   // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
3259   // %2 = ashr i16 %1, 12
3260   // Wd<7-7,0> = Wn<7:7> <- clamp r to 7
3261   // 0b1111_1111_1111_1111__1111_1111_1111_1111 sext
3262   // 0b0000_0000_0000_0000__0000_0000_0000_0000 sext
3263   // 0b0000_0000_0000_0000__0000_0000_0000_0000 zext
3264
3265   if (Shift >= SrcBits && IsZExt)
3266     return AArch64MaterializeInt(ConstantInt::get(*Context, APInt(RegSize, 0)),
3267                                  RetVT);
3268
3269   unsigned ImmR = std::min<unsigned>(SrcBits - 1, Shift);
3270   unsigned ImmS = SrcBits - 1;
3271   static const unsigned OpcTable[2][2] = {
3272     {AArch64::SBFMWri, AArch64::SBFMXri},
3273     {AArch64::UBFMWri, AArch64::UBFMXri}
3274   };
3275   unsigned Opc = OpcTable[IsZExt][Is64Bit];
3276   const TargetRegisterClass *RC =
3277       Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
3278   if (SrcVT.SimpleTy <= MVT::i32 && RetVT == MVT::i64) {
3279     unsigned TmpReg = MRI.createVirtualRegister(RC);
3280     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3281             TII.get(AArch64::SUBREG_TO_REG), TmpReg)
3282         .addImm(0)
3283         .addReg(Op0, getKillRegState(Op0IsKill))
3284         .addImm(AArch64::sub_32);
3285     Op0 = TmpReg;
3286     Op0IsKill = true;
3287   }
3288   return fastEmitInst_rii(Opc, RC, Op0, Op0IsKill, ImmR, ImmS);
3289 }
3290
3291 unsigned AArch64FastISel::EmitIntExt(MVT SrcVT, unsigned SrcReg, MVT DestVT,
3292                                      bool isZExt) {
3293   assert(DestVT != MVT::i1 && "ZeroExt/SignExt an i1?");
3294
3295   // FastISel does not have plumbing to deal with extensions where the SrcVT or
3296   // DestVT are odd things, so test to make sure that they are both types we can
3297   // handle (i1/i8/i16/i32 for SrcVT and i8/i16/i32/i64 for DestVT), otherwise
3298   // bail out to SelectionDAG.
3299   if (((DestVT != MVT::i8) && (DestVT != MVT::i16) &&
3300        (DestVT != MVT::i32) && (DestVT != MVT::i64)) ||
3301       ((SrcVT !=  MVT::i1) && (SrcVT !=  MVT::i8) &&
3302        (SrcVT !=  MVT::i16) && (SrcVT !=  MVT::i32)))
3303     return 0;
3304
3305   unsigned Opc;
3306   unsigned Imm = 0;
3307
3308   switch (SrcVT.SimpleTy) {
3309   default:
3310     return 0;
3311   case MVT::i1:
3312     return Emiti1Ext(SrcReg, DestVT, isZExt);
3313   case MVT::i8:
3314     if (DestVT == MVT::i64)
3315       Opc = isZExt ? AArch64::UBFMXri : AArch64::SBFMXri;
3316     else
3317       Opc = isZExt ? AArch64::UBFMWri : AArch64::SBFMWri;
3318     Imm = 7;
3319     break;
3320   case MVT::i16:
3321     if (DestVT == MVT::i64)
3322       Opc = isZExt ? AArch64::UBFMXri : AArch64::SBFMXri;
3323     else
3324       Opc = isZExt ? AArch64::UBFMWri : AArch64::SBFMWri;
3325     Imm = 15;
3326     break;
3327   case MVT::i32:
3328     assert(DestVT == MVT::i64 && "IntExt i32 to i32?!?");
3329     Opc = isZExt ? AArch64::UBFMXri : AArch64::SBFMXri;
3330     Imm = 31;
3331     break;
3332   }
3333
3334   // Handle i8 and i16 as i32.
3335   if (DestVT == MVT::i8 || DestVT == MVT::i16)
3336     DestVT = MVT::i32;
3337   else if (DestVT == MVT::i64) {
3338     unsigned Src64 = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
3339     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3340             TII.get(AArch64::SUBREG_TO_REG), Src64)
3341         .addImm(0)
3342         .addReg(SrcReg)
3343         .addImm(AArch64::sub_32);
3344     SrcReg = Src64;
3345   }
3346
3347   const TargetRegisterClass *RC =
3348       (DestVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
3349   return fastEmitInst_rii(Opc, RC, SrcReg, /*TODO:IsKill=*/false, 0, Imm);
3350 }
3351
3352 bool AArch64FastISel::SelectIntExt(const Instruction *I) {
3353   // On ARM, in general, integer casts don't involve legal types; this code
3354   // handles promotable integers.  The high bits for a type smaller than
3355   // the register size are assumed to be undefined.
3356   Type *DestTy = I->getType();
3357   Value *Src = I->getOperand(0);
3358   Type *SrcTy = Src->getType();
3359
3360   bool isZExt = isa<ZExtInst>(I);
3361   unsigned SrcReg = getRegForValue(Src);
3362   if (!SrcReg)
3363     return false;
3364
3365   EVT SrcEVT = TLI.getValueType(SrcTy, true);
3366   EVT DestEVT = TLI.getValueType(DestTy, true);
3367   if (!SrcEVT.isSimple())
3368     return false;
3369   if (!DestEVT.isSimple())
3370     return false;
3371
3372   MVT SrcVT = SrcEVT.getSimpleVT();
3373   MVT DestVT = DestEVT.getSimpleVT();
3374   unsigned ResultReg = 0;
3375
3376   // Check if it is an argument and if it is already zero/sign-extended.
3377   if (const auto *Arg = dyn_cast<Argument>(Src)) {
3378     if ((isZExt && Arg->hasZExtAttr()) || (!isZExt && Arg->hasSExtAttr())) {
3379       if (DestVT == MVT::i64) {
3380         ResultReg = createResultReg(TLI.getRegClassFor(DestVT));
3381         BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3382                 TII.get(AArch64::SUBREG_TO_REG), ResultReg)
3383           .addImm(0)
3384           .addReg(SrcReg)
3385           .addImm(AArch64::sub_32);
3386       } else
3387         ResultReg = SrcReg;
3388     }
3389   }
3390
3391   if (!ResultReg)
3392     ResultReg = EmitIntExt(SrcVT, SrcReg, DestVT, isZExt);
3393
3394   if (!ResultReg)
3395     return false;
3396
3397   updateValueMap(I, ResultReg);
3398   return true;
3399 }
3400
3401 bool AArch64FastISel::SelectRem(const Instruction *I, unsigned ISDOpcode) {
3402   EVT DestEVT = TLI.getValueType(I->getType(), true);
3403   if (!DestEVT.isSimple())
3404     return false;
3405
3406   MVT DestVT = DestEVT.getSimpleVT();
3407   if (DestVT != MVT::i64 && DestVT != MVT::i32)
3408     return false;
3409
3410   unsigned DivOpc;
3411   bool is64bit = (DestVT == MVT::i64);
3412   switch (ISDOpcode) {
3413   default:
3414     return false;
3415   case ISD::SREM:
3416     DivOpc = is64bit ? AArch64::SDIVXr : AArch64::SDIVWr;
3417     break;
3418   case ISD::UREM:
3419     DivOpc = is64bit ? AArch64::UDIVXr : AArch64::UDIVWr;
3420     break;
3421   }
3422   unsigned MSubOpc = is64bit ? AArch64::MSUBXrrr : AArch64::MSUBWrrr;
3423   unsigned Src0Reg = getRegForValue(I->getOperand(0));
3424   if (!Src0Reg)
3425     return false;
3426   bool Src0IsKill = hasTrivialKill(I->getOperand(0));
3427
3428   unsigned Src1Reg = getRegForValue(I->getOperand(1));
3429   if (!Src1Reg)
3430     return false;
3431   bool Src1IsKill = hasTrivialKill(I->getOperand(1));
3432
3433   const TargetRegisterClass *RC =
3434       (DestVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
3435   unsigned QuotReg = fastEmitInst_rr(DivOpc, RC, Src0Reg, /*IsKill=*/false,
3436                                      Src1Reg, /*IsKill=*/false);
3437   assert(QuotReg && "Unexpected DIV instruction emission failure.");
3438   // The remainder is computed as numerator - (quotient * denominator) using the
3439   // MSUB instruction.
3440   unsigned ResultReg = fastEmitInst_rrr(MSubOpc, RC, QuotReg, /*IsKill=*/true,
3441                                         Src1Reg, Src1IsKill, Src0Reg,
3442                                         Src0IsKill);
3443   updateValueMap(I, ResultReg);
3444   return true;
3445 }
3446
3447 bool AArch64FastISel::SelectMul(const Instruction *I) {
3448   EVT SrcEVT = TLI.getValueType(I->getOperand(0)->getType(), true);
3449   if (!SrcEVT.isSimple())
3450     return false;
3451   MVT SrcVT = SrcEVT.getSimpleVT();
3452
3453   // Must be simple value type.  Don't handle vectors.
3454   if (SrcVT != MVT::i64 && SrcVT != MVT::i32 && SrcVT != MVT::i16 &&
3455       SrcVT != MVT::i8)
3456     return false;
3457
3458   unsigned Src0Reg = getRegForValue(I->getOperand(0));
3459   if (!Src0Reg)
3460     return false;
3461   bool Src0IsKill = hasTrivialKill(I->getOperand(0));
3462
3463   unsigned Src1Reg = getRegForValue(I->getOperand(1));
3464   if (!Src1Reg)
3465     return false;
3466   bool Src1IsKill = hasTrivialKill(I->getOperand(1));
3467
3468   unsigned ResultReg =
3469     Emit_MUL_rr(SrcVT, Src0Reg, Src0IsKill, Src1Reg, Src1IsKill);
3470
3471   if (!ResultReg)
3472     return false;
3473
3474   updateValueMap(I, ResultReg);
3475   return true;
3476 }
3477
3478 bool AArch64FastISel::SelectShift(const Instruction *I) {
3479   MVT RetVT;
3480   if (!isTypeSupported(I->getType(), RetVT))
3481     return false;
3482
3483   if (const auto *C = dyn_cast<ConstantInt>(I->getOperand(1))) {
3484     unsigned ResultReg = 0;
3485     uint64_t ShiftVal = C->getZExtValue();
3486     MVT SrcVT = RetVT;
3487     bool IsZExt = (I->getOpcode() == Instruction::AShr) ? false : true;
3488     const Value *Op0 = I->getOperand(0);
3489     if (const auto *ZExt = dyn_cast<ZExtInst>(Op0)) {
3490       MVT TmpVT;
3491       if (isValueAvailable(ZExt) && isTypeSupported(ZExt->getSrcTy(), TmpVT)) {
3492         SrcVT = TmpVT;
3493         IsZExt = true;
3494         Op0 = ZExt->getOperand(0);
3495       }
3496     } else if (const auto *SExt = dyn_cast<SExtInst>(Op0)) {
3497       MVT TmpVT;
3498       if (isValueAvailable(SExt) && isTypeSupported(SExt->getSrcTy(), TmpVT)) {
3499         SrcVT = TmpVT;
3500         IsZExt = false;
3501         Op0 = SExt->getOperand(0);
3502       }
3503     }
3504
3505     unsigned Op0Reg = getRegForValue(Op0);
3506     if (!Op0Reg)
3507       return false;
3508     bool Op0IsKill = hasTrivialKill(Op0);
3509
3510     switch (I->getOpcode()) {
3511     default: llvm_unreachable("Unexpected instruction.");
3512     case Instruction::Shl:
3513       ResultReg = emitLSL_ri(RetVT, SrcVT, Op0Reg, Op0IsKill, ShiftVal, IsZExt);
3514       break;
3515     case Instruction::AShr:
3516       ResultReg = emitASR_ri(RetVT, SrcVT, Op0Reg, Op0IsKill, ShiftVal, IsZExt);
3517       break;
3518     case Instruction::LShr:
3519       ResultReg = emitLSR_ri(RetVT, SrcVT, Op0Reg, Op0IsKill, ShiftVal, IsZExt);
3520       break;
3521     }
3522     if (!ResultReg)
3523       return false;
3524
3525     updateValueMap(I, ResultReg);
3526     return true;
3527   }
3528
3529   unsigned Op0Reg = getRegForValue(I->getOperand(0));
3530   if (!Op0Reg)
3531     return false;
3532   bool Op0IsKill = hasTrivialKill(I->getOperand(0));
3533
3534   unsigned Op1Reg = getRegForValue(I->getOperand(1));
3535   if (!Op1Reg)
3536     return false;
3537   bool Op1IsKill = hasTrivialKill(I->getOperand(1));
3538
3539   unsigned ResultReg = 0;
3540   switch (I->getOpcode()) {
3541   default: llvm_unreachable("Unexpected instruction.");
3542   case Instruction::Shl:
3543     ResultReg = emitLSL_rr(RetVT, Op0Reg, Op0IsKill, Op1Reg, Op1IsKill);
3544     break;
3545   case Instruction::AShr:
3546     ResultReg = emitASR_rr(RetVT, Op0Reg, Op0IsKill, Op1Reg, Op1IsKill);
3547     break;
3548   case Instruction::LShr:
3549     ResultReg = emitLSR_rr(RetVT, Op0Reg, Op0IsKill, Op1Reg, Op1IsKill);
3550     break;
3551   }
3552
3553   if (!ResultReg)
3554     return false;
3555
3556   updateValueMap(I, ResultReg);
3557   return true;
3558 }
3559
3560 bool AArch64FastISel::SelectBitCast(const Instruction *I) {
3561   MVT RetVT, SrcVT;
3562
3563   if (!isTypeLegal(I->getOperand(0)->getType(), SrcVT))
3564     return false;
3565   if (!isTypeLegal(I->getType(), RetVT))
3566     return false;
3567
3568   unsigned Opc;
3569   if (RetVT == MVT::f32 && SrcVT == MVT::i32)
3570     Opc = AArch64::FMOVWSr;
3571   else if (RetVT == MVT::f64 && SrcVT == MVT::i64)
3572     Opc = AArch64::FMOVXDr;
3573   else if (RetVT == MVT::i32 && SrcVT == MVT::f32)
3574     Opc = AArch64::FMOVSWr;
3575   else if (RetVT == MVT::i64 && SrcVT == MVT::f64)
3576     Opc = AArch64::FMOVDXr;
3577   else
3578     return false;
3579
3580   const TargetRegisterClass *RC = nullptr;
3581   switch (RetVT.SimpleTy) {
3582   default: llvm_unreachable("Unexpected value type.");
3583   case MVT::i32: RC = &AArch64::GPR32RegClass; break;
3584   case MVT::i64: RC = &AArch64::GPR64RegClass; break;
3585   case MVT::f32: RC = &AArch64::FPR32RegClass; break;
3586   case MVT::f64: RC = &AArch64::FPR64RegClass; break;
3587   }
3588   unsigned Op0Reg = getRegForValue(I->getOperand(0));
3589   if (!Op0Reg)
3590     return false;
3591   bool Op0IsKill = hasTrivialKill(I->getOperand(0));
3592   unsigned ResultReg = fastEmitInst_r(Opc, RC, Op0Reg, Op0IsKill);
3593
3594   if (!ResultReg)
3595     return false;
3596
3597   updateValueMap(I, ResultReg);
3598   return true;
3599 }
3600
3601 bool AArch64FastISel::fastSelectInstruction(const Instruction *I) {
3602   switch (I->getOpcode()) {
3603   default:
3604     break;
3605   case Instruction::Add:
3606   case Instruction::Sub:
3607     if (selectAddSub(I))
3608       return true;
3609     break;
3610   case Instruction::Mul:
3611     if (!selectBinaryOp(I, ISD::MUL))
3612       return SelectMul(I);
3613     return true;
3614   case Instruction::SRem:
3615     if (!selectBinaryOp(I, ISD::SREM))
3616       return SelectRem(I, ISD::SREM);
3617     return true;
3618   case Instruction::URem:
3619     if (!selectBinaryOp(I, ISD::UREM))
3620       return SelectRem(I, ISD::UREM);
3621     return true;
3622   case Instruction::Shl:
3623   case Instruction::LShr:
3624   case Instruction::AShr:
3625     if (SelectShift(I))
3626       return true;
3627     break;
3628   case Instruction::And:
3629     if (selectLogicalOp(I, ISD::AND))
3630       return true;
3631     break;
3632   case Instruction::Or:
3633     if (selectLogicalOp(I, ISD::OR))
3634       return true;
3635     break;
3636   case Instruction::Xor:
3637     if (selectLogicalOp(I, ISD::XOR))
3638       return true;
3639     break;
3640   case Instruction::Br:
3641     return SelectBranch(I);
3642   case Instruction::IndirectBr:
3643     return SelectIndirectBr(I);
3644   case Instruction::BitCast:
3645     if (!FastISel::selectBitCast(I))
3646       return SelectBitCast(I);
3647     return true;
3648   case Instruction::FPToSI:
3649     if (!selectCast(I, ISD::FP_TO_SINT))
3650       return SelectFPToInt(I, /*Signed=*/true);
3651     return true;
3652   case Instruction::FPToUI:
3653     return SelectFPToInt(I, /*Signed=*/false);
3654   case Instruction::ZExt:
3655     if (!selectCast(I, ISD::ZERO_EXTEND))
3656       return SelectIntExt(I);
3657     return true;
3658   case Instruction::SExt:
3659     if (!selectCast(I, ISD::SIGN_EXTEND))
3660       return SelectIntExt(I);
3661     return true;
3662   case Instruction::Trunc:
3663     if (!selectCast(I, ISD::TRUNCATE))
3664       return SelectTrunc(I);
3665     return true;
3666   case Instruction::FPExt:
3667     return SelectFPExt(I);
3668   case Instruction::FPTrunc:
3669     return SelectFPTrunc(I);
3670   case Instruction::SIToFP:
3671     if (!selectCast(I, ISD::SINT_TO_FP))
3672       return SelectIntToFP(I, /*Signed=*/true);
3673     return true;
3674   case Instruction::UIToFP:
3675     return SelectIntToFP(I, /*Signed=*/false);
3676   case Instruction::Load:
3677     return SelectLoad(I);
3678   case Instruction::Store:
3679     return SelectStore(I);
3680   case Instruction::FCmp:
3681   case Instruction::ICmp:
3682     return SelectCmp(I);
3683   case Instruction::Select:
3684     return SelectSelect(I);
3685   case Instruction::Ret:
3686     return SelectRet(I);
3687   }
3688
3689   // fall-back to target-independent instruction selection.
3690   return selectOperator(I, I->getOpcode());
3691   // Silence warnings.
3692   (void)&CC_AArch64_DarwinPCS_VarArg;
3693 }
3694
3695 namespace llvm {
3696 llvm::FastISel *AArch64::createFastISel(FunctionLoweringInfo &funcInfo,
3697                                         const TargetLibraryInfo *libInfo) {
3698   return new AArch64FastISel(funcInfo, libInfo);
3699 }
3700 }