lib/Target/AArch64/AArch64FastISel.cpp

   1 //===-- AArch6464FastISel.cpp - AArch64 FastISel implementation -----------===//
   2 //
   3 //                     The LLVM Compiler Infrastructure
   4 //
   5 // This file is distributed under the University of Illinois Open Source
   6 // License. See LICENSE.TXT for details.
   7 //
   8 //===----------------------------------------------------------------------===//
   9 //
  10 // This file defines the AArch64-specific support for the FastISel class. Some
  11 // of the target-specific code is generated by tablegen in the file
  12 // AArch64GenFastISel.inc, which is #included here.
  13 //
  14 //===----------------------------------------------------------------------===//
  15
  16 #include "AArch64.h"
  17 #include "AArch64Subtarget.h"
  18 #include "AArch64TargetMachine.h"
  19 #include "MCTargetDesc/AArch64AddressingModes.h"
  20 #include "llvm/Analysis/BranchProbabilityInfo.h"
  21 #include "llvm/CodeGen/CallingConvLower.h"
  22 #include "llvm/CodeGen/FastISel.h"
  23 #include "llvm/CodeGen/FunctionLoweringInfo.h"
  24 #include "llvm/CodeGen/MachineConstantPool.h"
  25 #include "llvm/CodeGen/MachineFrameInfo.h"
  26 #include "llvm/CodeGen/MachineInstrBuilder.h"
  27 #include "llvm/CodeGen/MachineRegisterInfo.h"
  28 #include "llvm/IR/CallingConv.h"
  29 #include "llvm/IR/DataLayout.h"
  30 #include "llvm/IR/DerivedTypes.h"
  31 #include "llvm/IR/Function.h"
  32 #include "llvm/IR/GetElementPtrTypeIterator.h"
  33 #include "llvm/IR/GlobalAlias.h"
  34 #include "llvm/IR/GlobalVariable.h"
  35 #include "llvm/IR/Instructions.h"
  36 #include "llvm/IR/IntrinsicInst.h"
  37 #include "llvm/IR/Operator.h"
  38 #include "llvm/Support/CommandLine.h"
  39 using namespace llvm;
  40
  41 namespace {
  42
  43 class AArch64FastISel : public FastISel {
  44   class Address {
  45   public:
  46     typedef enum {
  47       RegBase,
  48       FrameIndexBase
  49     } BaseKind;
  50
  51   private:
  52     BaseKind Kind;
  53     AArch64_AM::ShiftExtendType ExtType;
  54     union {
  55       unsigned Reg;
  56       int FI;
  57     } Base;
  58     unsigned OffsetReg;
  59     unsigned Shift;
  60     int64_t Offset;
  61     const GlobalValue *GV;
  62
  63   public:
  64     Address() : Kind(RegBase), ExtType(AArch64_AM::InvalidShiftExtend),
  65       OffsetReg(0), Shift(0), Offset(0), GV(nullptr) { Base.Reg = 0; }
  66     void setKind(BaseKind K) { Kind = K; }
  67     BaseKind getKind() const { return Kind; }
  68     void setExtendType(AArch64_AM::ShiftExtendType E) { ExtType = E; }
  69     AArch64_AM::ShiftExtendType getExtendType() const { return ExtType; }
  70     bool isRegBase() const { return Kind == RegBase; }
  71     bool isFIBase() const { return Kind == FrameIndexBase; }
  72     void setReg(unsigned Reg) {
  73       assert(isRegBase() && "Invalid base register access!");
  74       Base.Reg = Reg;
  75     }
  76     unsigned getReg() const {
  77       assert(isRegBase() && "Invalid base register access!");
  78       return Base.Reg;
  79     }
  80     void setOffsetReg(unsigned Reg) {
  81       assert(isRegBase() && "Invalid offset register access!");
  82       OffsetReg = Reg;
  83     }
  84     unsigned getOffsetReg() const {
  85       assert(isRegBase() && "Invalid offset register access!");
  86       return OffsetReg;
  87     }
  88     void setFI(unsigned FI) {
  89       assert(isFIBase() && "Invalid base frame index  access!");
  90       Base.FI = FI;
  91     }
  92     unsigned getFI() const {
  93       assert(isFIBase() && "Invalid base frame index access!");
  94       return Base.FI;
  95     }
  96     void setOffset(int64_t O) { Offset = O; }
  97     int64_t getOffset() { return Offset; }
  98     void setShift(unsigned S) { Shift = S; }
  99     unsigned getShift() { return Shift; }
 100
 101     void setGlobalValue(const GlobalValue *G) { GV = G; }
 102     const GlobalValue *getGlobalValue() { return GV; }
 103   };
 104
 105   /// Subtarget - Keep a pointer to the AArch64Subtarget around so that we can
 106   /// make the right decision when generating code for different targets.
 107   const AArch64Subtarget *Subtarget;
 108   LLVMContext *Context;
 109
 110   bool fastLowerArguments() override;
 111   bool fastLowerCall(CallLoweringInfo &CLI) override;
 112   bool fastLowerIntrinsicCall(const IntrinsicInst *II) override;
 113
 114 private:
 115   // Selection routines.
 116   bool selectAddSub(const Instruction *I);
 117   bool selectLogicalOp(const Instruction *I, unsigned ISDOpcode);
 118   bool SelectLoad(const Instruction *I);
 119   bool SelectStore(const Instruction *I);
 120   bool SelectBranch(const Instruction *I);
 121   bool SelectIndirectBr(const Instruction *I);
 122   bool SelectCmp(const Instruction *I);
 123   bool SelectSelect(const Instruction *I);
 124   bool SelectFPExt(const Instruction *I);
 125   bool SelectFPTrunc(const Instruction *I);
 126   bool SelectFPToInt(const Instruction *I, bool Signed);
 127   bool SelectIntToFP(const Instruction *I, bool Signed);
 128   bool SelectRem(const Instruction *I, unsigned ISDOpcode);
 129   bool SelectRet(const Instruction *I);
 130   bool SelectTrunc(const Instruction *I);
 131   bool SelectIntExt(const Instruction *I);
 132   bool SelectMul(const Instruction *I);
 133   bool SelectShift(const Instruction *I);
 134   bool SelectBitCast(const Instruction *I);
 135
 136   // Utility helper routines.
 137   bool isTypeLegal(Type *Ty, MVT &VT);
 138   bool isLoadStoreTypeLegal(Type *Ty, MVT &VT);
 139   bool isTypeSupported(Type *Ty, MVT &VT);
 140   bool isValueAvailable(const Value *V) const;
 141   bool ComputeAddress(const Value *Obj, Address &Addr, Type *Ty = nullptr);
 142   bool ComputeCallAddress(const Value *V, Address &Addr);
 143   bool SimplifyAddress(Address &Addr, MVT VT);
 144   void AddLoadStoreOperands(Address &Addr, const MachineInstrBuilder &MIB,
 145                             unsigned Flags, unsigned ScaleFactor,
 146                             MachineMemOperand *MMO);
 147   bool IsMemCpySmall(uint64_t Len, unsigned Alignment);
 148   bool TryEmitSmallMemCpy(Address Dest, Address Src, uint64_t Len,
 149                           unsigned Alignment);
 150   bool foldXALUIntrinsic(AArch64CC::CondCode &CC, const Instruction *I,
 151                          const Value *Cond);
 152
 153   // Emit helper routines.
 154   unsigned emitAddSub(bool UseAdd, MVT RetVT, const Value *LHS,
 155                       const Value *RHS, bool SetFlags = false,
 156                       bool WantResult = true,  bool IsZExt = false);
 157   unsigned emitAddSub_rr(bool UseAdd, MVT RetVT, unsigned LHSReg,
 158                          bool LHSIsKill, unsigned RHSReg, bool RHSIsKill,
 159                          bool SetFlags = false, bool WantResult = true);
 160   unsigned emitAddSub_ri(bool UseAdd, MVT RetVT, unsigned LHSReg,
 161                          bool LHSIsKill, uint64_t Imm, bool SetFlags = false,
 162                          bool WantResult = true);
 163   unsigned emitAddSub_rs(bool UseAdd, MVT RetVT, unsigned LHSReg,
 164                          bool LHSIsKill, unsigned RHSReg, bool RHSIsKill,
 165                          AArch64_AM::ShiftExtendType ShiftType,
 166                          uint64_t ShiftImm, bool SetFlags = false,
 167                          bool WantResult = true);
 168   unsigned emitAddSub_rx(bool UseAdd, MVT RetVT, unsigned LHSReg,
 169                          bool LHSIsKill, unsigned RHSReg, bool RHSIsKill,
 170                           AArch64_AM::ShiftExtendType ExtType,
 171                           uint64_t ShiftImm, bool SetFlags = false,
 172                          bool WantResult = true);
 173
 174   // Emit functions.
 175   bool emitCmp(const Value *LHS, const Value *RHS, bool IsZExt);
 176   bool emitICmp(MVT RetVT, const Value *LHS, const Value *RHS, bool IsZExt);
 177   bool emitICmp_ri(MVT RetVT, unsigned LHSReg, bool LHSIsKill, uint64_t Imm);
 178   bool emitFCmp(MVT RetVT, const Value *LHS, const Value *RHS);
 179   bool EmitLoad(MVT VT, unsigned &ResultReg, Address Addr,
 180                 MachineMemOperand *MMO = nullptr);
 181   bool EmitStore(MVT VT, unsigned SrcReg, Address Addr,
 182                  MachineMemOperand *MMO = nullptr);
 183   unsigned EmitIntExt(MVT SrcVT, unsigned SrcReg, MVT DestVT, bool isZExt);
 184   unsigned Emiti1Ext(unsigned SrcReg, MVT DestVT, bool isZExt);
 185   unsigned emitAdd(MVT RetVT, const Value *LHS, const Value *RHS,
 186                    bool SetFlags = false, bool WantResult = true,
 187                    bool IsZExt = false);
 188   unsigned emitSub(MVT RetVT, const Value *LHS, const Value *RHS,
 189                    bool SetFlags = false, bool WantResult = true,
 190                    bool IsZExt = false);
 191   unsigned emitSubs_rr(MVT RetVT, unsigned LHSReg, bool LHSIsKill,
 192                        unsigned RHSReg, bool RHSIsKill, bool WantResult = true);
 193   unsigned emitSubs_rs(MVT RetVT, unsigned LHSReg, bool LHSIsKill,
 194                        unsigned RHSReg, bool RHSIsKill,
 195                        AArch64_AM::ShiftExtendType ShiftType, uint64_t ShiftImm,
 196                        bool WantResult = true);
 197   unsigned emitLogicalOp(unsigned ISDOpc, MVT RetVT, const Value *LHS,
 198                          const Value *RHS);
 199   unsigned emitLogicalOp_ri(unsigned ISDOpc, MVT RetVT, unsigned LHSReg,
 200                             bool LHSIsKill, uint64_t Imm);
 201   unsigned emitLogicalOp_rs(unsigned ISDOpc, MVT RetVT, unsigned LHSReg,
 202                             bool LHSIsKill, unsigned RHSReg, bool RHSIsKill,
 203                             uint64_t ShiftImm);
 204   unsigned emitAnd_ri(MVT RetVT, unsigned LHSReg, bool LHSIsKill, uint64_t Imm);
 205   unsigned Emit_MUL_rr(MVT RetVT, unsigned Op0, bool Op0IsKill,
 206                        unsigned Op1, bool Op1IsKill);
 207   unsigned Emit_SMULL_rr(MVT RetVT, unsigned Op0, bool Op0IsKill,
 208                          unsigned Op1, bool Op1IsKill);
 209   unsigned Emit_UMULL_rr(MVT RetVT, unsigned Op0, bool Op0IsKill,
 210                          unsigned Op1, bool Op1IsKill);
 211   unsigned emitLSL_rr(MVT RetVT, unsigned Op0Reg, bool Op0IsKill,
 212                       unsigned Op1Reg, bool Op1IsKill);
 213   unsigned emitLSL_ri(MVT RetVT, MVT SrcVT, unsigned Op0Reg, bool Op0IsKill,
 214                       uint64_t Imm, bool IsZExt = true);
 215   unsigned emitLSR_rr(MVT RetVT, unsigned Op0Reg, bool Op0IsKill,
 216                       unsigned Op1Reg, bool Op1IsKill);
 217   unsigned emitLSR_ri(MVT RetVT, MVT SrcVT, unsigned Op0Reg, bool Op0IsKill,
 218                       uint64_t Imm, bool IsZExt = true);
 219   unsigned emitASR_rr(MVT RetVT, unsigned Op0Reg, bool Op0IsKill,
 220                       unsigned Op1Reg, bool Op1IsKill);
 221   unsigned emitASR_ri(MVT RetVT, MVT SrcVT, unsigned Op0Reg, bool Op0IsKill,
 222                       uint64_t Imm, bool IsZExt = false);
 223
 224   unsigned AArch64MaterializeInt(const ConstantInt *CI, MVT VT);
 225   unsigned AArch64MaterializeFP(const ConstantFP *CFP, MVT VT);
 226   unsigned AArch64MaterializeGV(const GlobalValue *GV);
 227
 228   // Call handling routines.
 229 private:
 230   CCAssignFn *CCAssignFnForCall(CallingConv::ID CC) const;
 231   bool ProcessCallArgs(CallLoweringInfo &CLI, SmallVectorImpl<MVT> &ArgVTs,
 232                        unsigned &NumBytes);
 233   bool FinishCall(CallLoweringInfo &CLI, MVT RetVT, unsigned NumBytes);
 234
 235 public:
 236   // Backend specific FastISel code.
 237   unsigned fastMaterializeAlloca(const AllocaInst *AI) override;
 238   unsigned fastMaterializeConstant(const Constant *C) override;
 239   unsigned fastMaterializeFloatZero(const ConstantFP* CF) override;
 240
 241   explicit AArch64FastISel(FunctionLoweringInfo &FuncInfo,
 242                          const TargetLibraryInfo *LibInfo)
 243       : FastISel(FuncInfo, LibInfo, /*SkipTargetIndependentISel=*/true) {
 244     Subtarget = &TM.getSubtarget<AArch64Subtarget>();
 245     Context = &FuncInfo.Fn->getContext();
 246   }
 247
 248   bool fastSelectInstruction(const Instruction *I) override;
 249
 250 #include "AArch64GenFastISel.inc"
 251 };
 252
 253 } // end anonymous namespace
 254
 255 #include "AArch64GenCallingConv.inc"
 256
 257 CCAssignFn *AArch64FastISel::CCAssignFnForCall(CallingConv::ID CC) const {
 258   if (CC == CallingConv::WebKit_JS)
 259     return CC_AArch64_WebKit_JS;
 260   return Subtarget->isTargetDarwin() ? CC_AArch64_DarwinPCS : CC_AArch64_AAPCS;
 261 }
 262
 263 unsigned AArch64FastISel::fastMaterializeAlloca(const AllocaInst *AI) {
 264   assert(TLI.getValueType(AI->getType(), true) == MVT::i64 &&
 265          "Alloca should always return a pointer.");
 266
 267   // Don't handle dynamic allocas.
 268   if (!FuncInfo.StaticAllocaMap.count(AI))
 269     return 0;
 270
 271   DenseMap<const AllocaInst *, int>::iterator SI =
 272       FuncInfo.StaticAllocaMap.find(AI);
 273
 274   if (SI != FuncInfo.StaticAllocaMap.end()) {
 275     unsigned ResultReg = createResultReg(&AArch64::GPR64spRegClass);
 276     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADDXri),
 277             ResultReg)
 278         .addFrameIndex(SI->second)
 279         .addImm(0)
 280         .addImm(0);
 281     return ResultReg;
 282   }
 283
 284   return 0;
 285 }
 286
 287 unsigned AArch64FastISel::AArch64MaterializeInt(const ConstantInt *CI, MVT VT) {
 288   if (VT > MVT::i64)
 289     return 0;
 290
 291   if (!CI->isZero())
 292     return fastEmit_i(VT, VT, ISD::Constant, CI->getZExtValue());
 293
 294   // Create a copy from the zero register to materialize a "0" value.
 295   const TargetRegisterClass *RC = (VT == MVT::i64) ? &AArch64::GPR64RegClass
 296                                                    : &AArch64::GPR32RegClass;
 297   unsigned ZeroReg = (VT == MVT::i64) ? AArch64::XZR : AArch64::WZR;
 298   unsigned ResultReg = createResultReg(RC);
 299   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(TargetOpcode::COPY),
 300           ResultReg).addReg(ZeroReg, getKillRegState(true));
 301   return ResultReg;
 302 }
 303
 304 unsigned AArch64FastISel::AArch64MaterializeFP(const ConstantFP *CFP, MVT VT) {
 305   // Positive zero (+0.0) has to be materialized with a fmov from the zero
 306   // register, because the immediate version of fmov cannot encode zero.
 307   if (CFP->isNullValue())
 308     return fastMaterializeFloatZero(CFP);
 309
 310   if (VT != MVT::f32 && VT != MVT::f64)
 311     return 0;
 312
 313   const APFloat Val = CFP->getValueAPF();
 314   bool Is64Bit = (VT == MVT::f64);
 315   // This checks to see if we can use FMOV instructions to materialize
 316   // a constant, otherwise we have to materialize via the constant pool.
 317   if (TLI.isFPImmLegal(Val, VT)) {
 318     int Imm =
 319         Is64Bit ? AArch64_AM::getFP64Imm(Val) : AArch64_AM::getFP32Imm(Val);
 320     assert((Imm != -1) && "Cannot encode floating-point constant.");
 321     unsigned Opc = Is64Bit ? AArch64::FMOVDi : AArch64::FMOVSi;
 322     return fastEmitInst_i(Opc, TLI.getRegClassFor(VT), Imm);
 323   }
 324
 325   // Materialize via constant pool.  MachineConstantPool wants an explicit
 326   // alignment.
 327   unsigned Align = DL.getPrefTypeAlignment(CFP->getType());
 328   if (Align == 0)
 329     Align = DL.getTypeAllocSize(CFP->getType());
 330
 331   unsigned CPI = MCP.getConstantPoolIndex(cast<Constant>(CFP), Align);
 332   unsigned ADRPReg = createResultReg(&AArch64::GPR64commonRegClass);
 333   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADRP),
 334           ADRPReg).addConstantPoolIndex(CPI, 0, AArch64II::MO_PAGE);
 335
 336   unsigned Opc = Is64Bit ? AArch64::LDRDui : AArch64::LDRSui;
 337   unsigned ResultReg = createResultReg(TLI.getRegClassFor(VT));
 338   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg)
 339       .addReg(ADRPReg)
 340       .addConstantPoolIndex(CPI, 0, AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
 341   return ResultReg;
 342 }
 343
 344 unsigned AArch64FastISel::AArch64MaterializeGV(const GlobalValue *GV) {
 345   // We can't handle thread-local variables quickly yet.
 346   if (GV->isThreadLocal())
 347     return 0;
 348
 349   // MachO still uses GOT for large code-model accesses, but ELF requires
 350   // movz/movk sequences, which FastISel doesn't handle yet.
 351   if (TM.getCodeModel() != CodeModel::Small && !Subtarget->isTargetMachO())
 352     return 0;
 353
 354   unsigned char OpFlags = Subtarget->ClassifyGlobalReference(GV, TM);
 355
 356   EVT DestEVT = TLI.getValueType(GV->getType(), true);
 357   if (!DestEVT.isSimple())
 358     return 0;
 359
 360   unsigned ADRPReg = createResultReg(&AArch64::GPR64commonRegClass);
 361   unsigned ResultReg;
 362
 363   if (OpFlags & AArch64II::MO_GOT) {
 364     // ADRP + LDRX
 365     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADRP),
 366             ADRPReg)
 367       .addGlobalAddress(GV, 0, AArch64II::MO_GOT | AArch64II::MO_PAGE);
 368
 369     ResultReg = createResultReg(&AArch64::GPR64RegClass);
 370     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::LDRXui),
 371             ResultReg)
 372       .addReg(ADRPReg)
 373       .addGlobalAddress(GV, 0, AArch64II::MO_GOT | AArch64II::MO_PAGEOFF |
 374                         AArch64II::MO_NC);
 375   } else if (OpFlags & AArch64II::MO_CONSTPOOL) {
 376     // We can't handle addresses loaded from a constant pool quickly yet.
 377     return 0;
 378   } else {
 379     // ADRP + ADDX
 380     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADRP),
 381             ADRPReg)
 382       .addGlobalAddress(GV, 0, AArch64II::MO_PAGE);
 383
 384     ResultReg = createResultReg(&AArch64::GPR64spRegClass);
 385     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADDXri),
 386             ResultReg)
 387       .addReg(ADRPReg)
 388       .addGlobalAddress(GV, 0, AArch64II::MO_PAGEOFF | AArch64II::MO_NC)
 389       .addImm(0);
 390   }
 391   return ResultReg;
 392 }
 393
 394 unsigned AArch64FastISel::fastMaterializeConstant(const Constant *C) {
 395   EVT CEVT = TLI.getValueType(C->getType(), true);
 396
 397   // Only handle simple types.
 398   if (!CEVT.isSimple())
 399     return 0;
 400   MVT VT = CEVT.getSimpleVT();
 401
 402   if (const auto *CI = dyn_cast<ConstantInt>(C))
 403     return AArch64MaterializeInt(CI, VT);
 404   else if (const ConstantFP *CFP = dyn_cast<ConstantFP>(C))
 405     return AArch64MaterializeFP(CFP, VT);
 406   else if (const GlobalValue *GV = dyn_cast<GlobalValue>(C))
 407     return AArch64MaterializeGV(GV);
 408
 409   return 0;
 410 }
 411
 412 unsigned AArch64FastISel::fastMaterializeFloatZero(const ConstantFP* CFP) {
 413   assert(CFP->isNullValue() &&
 414          "Floating-point constant is not a positive zero.");
 415   MVT VT;
 416   if (!isTypeLegal(CFP->getType(), VT))
 417     return 0;
 418
 419   if (VT != MVT::f32 && VT != MVT::f64)
 420     return 0;
 421
 422   bool Is64Bit = (VT == MVT::f64);
 423   unsigned ZReg = Is64Bit ? AArch64::XZR : AArch64::WZR;
 424   unsigned Opc = Is64Bit ? AArch64::FMOVXDr : AArch64::FMOVWSr;
 425   return fastEmitInst_r(Opc, TLI.getRegClassFor(VT), ZReg, /*IsKill=*/true);
 426 }
 427
 428 // Computes the address to get to an object.
 429 bool AArch64FastISel::ComputeAddress(const Value *Obj, Address &Addr, Type *Ty)
 430 {
 431   const User *U = nullptr;
 432   unsigned Opcode = Instruction::UserOp1;
 433   if (const Instruction *I = dyn_cast<Instruction>(Obj)) {
 434     // Don't walk into other basic blocks unless the object is an alloca from
 435     // another block, otherwise it may not have a virtual register assigned.
 436     if (FuncInfo.StaticAllocaMap.count(static_cast<const AllocaInst *>(Obj)) ||
 437         FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB) {
 438       Opcode = I->getOpcode();
 439       U = I;
 440     }
 441   } else if (const ConstantExpr *C = dyn_cast<ConstantExpr>(Obj)) {
 442     Opcode = C->getOpcode();
 443     U = C;
 444   }
 445
 446   if (const PointerType *Ty = dyn_cast<PointerType>(Obj->getType()))
 447     if (Ty->getAddressSpace() > 255)
 448       // Fast instruction selection doesn't support the special
 449       // address spaces.
 450       return false;
 451
 452   switch (Opcode) {
 453   default:
 454     break;
 455   case Instruction::BitCast: {
 456     // Look through bitcasts.
 457     return ComputeAddress(U->getOperand(0), Addr, Ty);
 458   }
 459   case Instruction::IntToPtr: {
 460     // Look past no-op inttoptrs.
 461     if (TLI.getValueType(U->getOperand(0)->getType()) == TLI.getPointerTy())
 462       return ComputeAddress(U->getOperand(0), Addr, Ty);
 463     break;
 464   }
 465   case Instruction::PtrToInt: {
 466     // Look past no-op ptrtoints.
 467     if (TLI.getValueType(U->getType()) == TLI.getPointerTy())
 468       return ComputeAddress(U->getOperand(0), Addr, Ty);
 469     break;
 470   }
 471   case Instruction::GetElementPtr: {
 472     Address SavedAddr = Addr;
 473     uint64_t TmpOffset = Addr.getOffset();
 474
 475     // Iterate through the GEP folding the constants into offsets where
 476     // we can.
 477     gep_type_iterator GTI = gep_type_begin(U);
 478     for (User::const_op_iterator i = U->op_begin() + 1, e = U->op_end(); i != e;
 479          ++i, ++GTI) {
 480       const Value *Op = *i;
 481       if (StructType *STy = dyn_cast<StructType>(*GTI)) {
 482         const StructLayout *SL = DL.getStructLayout(STy);
 483         unsigned Idx = cast<ConstantInt>(Op)->getZExtValue();
 484         TmpOffset += SL->getElementOffset(Idx);
 485       } else {
 486         uint64_t S = DL.getTypeAllocSize(GTI.getIndexedType());
 487         for (;;) {
 488           if (const ConstantInt *CI = dyn_cast<ConstantInt>(Op)) {
 489             // Constant-offset addressing.
 490             TmpOffset += CI->getSExtValue() * S;
 491             break;
 492           }
 493           if (canFoldAddIntoGEP(U, Op)) {
 494             // A compatible add with a constant operand. Fold the constant.
 495             ConstantInt *CI =
 496                 cast<ConstantInt>(cast<AddOperator>(Op)->getOperand(1));
 497             TmpOffset += CI->getSExtValue() * S;
 498             // Iterate on the other operand.
 499             Op = cast<AddOperator>(Op)->getOperand(0);
 500             continue;
 501           }
 502           // Unsupported
 503           goto unsupported_gep;
 504         }
 505       }
 506     }
 507
 508     // Try to grab the base operand now.
 509     Addr.setOffset(TmpOffset);
 510     if (ComputeAddress(U->getOperand(0), Addr, Ty))
 511       return true;
 512
 513     // We failed, restore everything and try the other options.
 514     Addr = SavedAddr;
 515
 516   unsupported_gep:
 517     break;
 518   }
 519   case Instruction::Alloca: {
 520     const AllocaInst *AI = cast<AllocaInst>(Obj);
 521     DenseMap<const AllocaInst *, int>::iterator SI =
 522         FuncInfo.StaticAllocaMap.find(AI);
 523     if (SI != FuncInfo.StaticAllocaMap.end()) {
 524       Addr.setKind(Address::FrameIndexBase);
 525       Addr.setFI(SI->second);
 526       return true;
 527     }
 528     break;
 529   }
 530   case Instruction::Add: {
 531     // Adds of constants are common and easy enough.
 532     const Value *LHS = U->getOperand(0);
 533     const Value *RHS = U->getOperand(1);
 534
 535     if (isa<ConstantInt>(LHS))
 536       std::swap(LHS, RHS);
 537
 538     if (const ConstantInt *CI = dyn_cast<ConstantInt>(RHS)) {
 539       Addr.setOffset(Addr.getOffset() + (uint64_t)CI->getSExtValue());
 540       return ComputeAddress(LHS, Addr, Ty);
 541     }
 542
 543     Address Backup = Addr;
 544     if (ComputeAddress(LHS, Addr, Ty) && ComputeAddress(RHS, Addr, Ty))
 545       return true;
 546     Addr = Backup;
 547
 548     break;
 549   }
 550   case Instruction::Shl:
 551     if (Addr.getOffsetReg())
 552       break;
 553
 554     if (const auto *CI = dyn_cast<ConstantInt>(U->getOperand(1))) {
 555       unsigned Val = CI->getZExtValue();
 556       if (Val < 1 || Val > 3)
 557         break;
 558
 559       uint64_t NumBytes = 0;
 560       if (Ty && Ty->isSized()) {
 561         uint64_t NumBits = DL.getTypeSizeInBits(Ty);
 562         NumBytes = NumBits / 8;
 563         if (!isPowerOf2_64(NumBits))
 564           NumBytes = 0;
 565       }
 566
 567       if (NumBytes != (1ULL << Val))
 568         break;
 569
 570       Addr.setShift(Val);
 571       Addr.setExtendType(AArch64_AM::LSL);
 572
 573       if (const auto *I = dyn_cast<Instruction>(U->getOperand(0)))
 574         if (FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB)
 575           U = I;
 576
 577       if (const auto *ZE = dyn_cast<ZExtInst>(U))
 578         if (ZE->getOperand(0)->getType()->isIntegerTy(32))
 579           Addr.setExtendType(AArch64_AM::UXTW);
 580
 581       if (const auto *SE = dyn_cast<SExtInst>(U))
 582         if (SE->getOperand(0)->getType()->isIntegerTy(32))
 583           Addr.setExtendType(AArch64_AM::SXTW);
 584
 585       unsigned Reg = getRegForValue(U->getOperand(0));
 586       if (!Reg)
 587         return false;
 588       Addr.setOffsetReg(Reg);
 589       return true;
 590     }
 591     break;
 592   }
 593
 594   if (Addr.getReg()) {
 595     if (!Addr.getOffsetReg()) {
 596       unsigned Reg = getRegForValue(Obj);
 597       if (!Reg)
 598         return false;
 599       Addr.setOffsetReg(Reg);
 600       return true;
 601     }
 602     return false;
 603   }
 604
 605   unsigned Reg = getRegForValue(Obj);
 606   if (!Reg)
 607     return false;
 608   Addr.setReg(Reg);
 609   return true;
 610 }
 611
 612 bool AArch64FastISel::ComputeCallAddress(const Value *V, Address &Addr) {
 613   const User *U = nullptr;
 614   unsigned Opcode = Instruction::UserOp1;
 615   bool InMBB = true;
 616
 617   if (const auto *I = dyn_cast<Instruction>(V)) {
 618     Opcode = I->getOpcode();
 619     U = I;
 620     InMBB = I->getParent() == FuncInfo.MBB->getBasicBlock();
 621   } else if (const auto *C = dyn_cast<ConstantExpr>(V)) {
 622     Opcode = C->getOpcode();
 623     U = C;
 624   }
 625
 626   switch (Opcode) {
 627   default: break;
 628   case Instruction::BitCast:
 629     // Look past bitcasts if its operand is in the same BB.
 630     if (InMBB)
 631       return ComputeCallAddress(U->getOperand(0), Addr);
 632     break;
 633   case Instruction::IntToPtr:
 634     // Look past no-op inttoptrs if its operand is in the same BB.
 635     if (InMBB &&
 636         TLI.getValueType(U->getOperand(0)->getType()) == TLI.getPointerTy())
 637       return ComputeCallAddress(U->getOperand(0), Addr);
 638     break;
 639   case Instruction::PtrToInt:
 640     // Look past no-op ptrtoints if its operand is in the same BB.
 641     if (InMBB &&
 642         TLI.getValueType(U->getType()) == TLI.getPointerTy())
 643       return ComputeCallAddress(U->getOperand(0), Addr);
 644     break;
 645   }
 646
 647   if (const GlobalValue *GV = dyn_cast<GlobalValue>(V)) {
 648     Addr.setGlobalValue(GV);
 649     return true;
 650   }
 651
 652   // If all else fails, try to materialize the value in a register.
 653   if (!Addr.getGlobalValue()) {
 654     Addr.setReg(getRegForValue(V));
 655     return Addr.getReg() != 0;
 656   }
 657
 658   return false;
 659 }
 660
 661
 662 bool AArch64FastISel::isTypeLegal(Type *Ty, MVT &VT) {
 663   EVT evt = TLI.getValueType(Ty, true);
 664
 665   // Only handle simple types.
 666   if (evt == MVT::Other || !evt.isSimple())
 667     return false;
 668   VT = evt.getSimpleVT();
 669
 670   // This is a legal type, but it's not something we handle in fast-isel.
 671   if (VT == MVT::f128)
 672     return false;
 673
 674   // Handle all other legal types, i.e. a register that will directly hold this
 675   // value.
 676   return TLI.isTypeLegal(VT);
 677 }
 678
 679 bool AArch64FastISel::isLoadStoreTypeLegal(Type *Ty, MVT &VT) {
 680   if (isTypeLegal(Ty, VT))
 681     return true;
 682
 683   // If this is a type than can be sign or zero-extended to a basic operation
 684   // go ahead and accept it now. For stores, this reflects truncation.
 685   if (VT == MVT::i1 || VT == MVT::i8 || VT == MVT::i16)
 686     return true;
 687
 688   return false;
 689 }
 690
 691 /// \brief Determine if the value type is supported by FastISel.
 692 ///
 693 /// FastISel for AArch64 can handle more value types than are legal. This adds
 694 /// simple value type such as i1, i8, and i16.
 695 /// Vectors on the other side are not supported yet.
 696 bool AArch64FastISel::isTypeSupported(Type *Ty, MVT &VT) {
 697   if (Ty->isVectorTy())
 698     return false;
 699
 700   if (isTypeLegal(Ty, VT))
 701     return true;
 702
 703   // If this is a type than can be sign or zero-extended to a basic operation
 704   // go ahead and accept it now.
 705   if (VT == MVT::i1 || VT == MVT::i8 || VT == MVT::i16)
 706     return true;
 707
 708   return false;
 709 }
 710
 711 bool AArch64FastISel::isValueAvailable(const Value *V) const {
 712   if (!isa<Instruction>(V))
 713     return true;
 714
 715   const auto *I = cast<Instruction>(V);
 716   if (FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB)
 717     return true;
 718
 719   return false;
 720 }
 721
 722 bool AArch64FastISel::SimplifyAddress(Address &Addr, MVT VT) {
 723   unsigned ScaleFactor;
 724   switch (VT.SimpleTy) {
 725   default: return false;
 726   case MVT::i1:  // fall-through
 727   case MVT::i8:  ScaleFactor = 1; break;
 728   case MVT::i16: ScaleFactor = 2; break;
 729   case MVT::i32: // fall-through
 730   case MVT::f32: ScaleFactor = 4; break;
 731   case MVT::i64: // fall-through
 732   case MVT::f64: ScaleFactor = 8; break;
 733   }
 734
 735   bool ImmediateOffsetNeedsLowering = false;
 736   bool RegisterOffsetNeedsLowering = false;
 737   int64_t Offset = Addr.getOffset();
 738   if (((Offset < 0) || (Offset & (ScaleFactor - 1))) && !isInt<9>(Offset))
 739     ImmediateOffsetNeedsLowering = true;
 740   else if (Offset > 0 && !(Offset & (ScaleFactor - 1)) &&
 741            !isUInt<12>(Offset / ScaleFactor))
 742     ImmediateOffsetNeedsLowering = true;
 743
 744   // Cannot encode an offset register and an immediate offset in the same
 745   // instruction. Fold the immediate offset into the load/store instruction and
 746   // emit an additonal add to take care of the offset register.
 747   if (!ImmediateOffsetNeedsLowering && Addr.getOffset() && Addr.isRegBase() &&
 748       Addr.getOffsetReg())
 749     RegisterOffsetNeedsLowering = true;
 750
 751   // Cannot encode zero register as base.
 752   if (Addr.isRegBase() && Addr.getOffsetReg() && !Addr.getReg())
 753     RegisterOffsetNeedsLowering = true;
 754
 755   // If this is a stack pointer and the offset needs to be simplified then put
 756   // the alloca address into a register, set the base type back to register and
 757   // continue. This should almost never happen.
 758   if (ImmediateOffsetNeedsLowering && Addr.isFIBase()) {
 759     unsigned ResultReg = createResultReg(&AArch64::GPR64spRegClass);
 760     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADDXri),
 761             ResultReg)
 762       .addFrameIndex(Addr.getFI())
 763       .addImm(0)
 764       .addImm(0);
 765     Addr.setKind(Address::RegBase);
 766     Addr.setReg(ResultReg);
 767   }
 768
 769   if (RegisterOffsetNeedsLowering) {
 770     unsigned ResultReg = 0;
 771     if (Addr.getReg()) {
 772       if (Addr.getExtendType() == AArch64_AM::SXTW ||
 773           Addr.getExtendType() == AArch64_AM::UXTW   )
 774         ResultReg = emitAddSub_rx(/*UseAdd=*/true, MVT::i64, Addr.getReg(),
 775                                   /*TODO:IsKill=*/false, Addr.getOffsetReg(),
 776                                   /*TODO:IsKill=*/false, Addr.getExtendType(),
 777                                   Addr.getShift());
 778       else
 779         ResultReg = emitAddSub_rs(/*UseAdd=*/true, MVT::i64, Addr.getReg(),
 780                                   /*TODO:IsKill=*/false, Addr.getOffsetReg(),
 781                                   /*TODO:IsKill=*/false, AArch64_AM::LSL,
 782                                   Addr.getShift());
 783     } else {
 784       if (Addr.getExtendType() == AArch64_AM::UXTW)
 785         ResultReg = emitLSL_ri(MVT::i64, MVT::i32, Addr.getOffsetReg(),
 786                                /*Op0IsKill=*/false, Addr.getShift(),
 787                                /*IsZExt=*/true);
 788       else if (Addr.getExtendType() == AArch64_AM::SXTW)
 789         ResultReg = emitLSL_ri(MVT::i64, MVT::i32, Addr.getOffsetReg(),
 790                                /*Op0IsKill=*/false, Addr.getShift(),
 791                                /*IsZExt=*/false);
 792       else
 793         ResultReg = emitLSL_ri(MVT::i64, MVT::i64, Addr.getOffsetReg(),
 794                                /*Op0IsKill=*/false, Addr.getShift());
 795     }
 796     if (!ResultReg)
 797       return false;
 798
 799     Addr.setReg(ResultReg);
 800     Addr.setOffsetReg(0);
 801     Addr.setShift(0);
 802     Addr.setExtendType(AArch64_AM::InvalidShiftExtend);
 803   }
 804
 805   // Since the offset is too large for the load/store instruction get the
 806   // reg+offset into a register.
 807   if (ImmediateOffsetNeedsLowering) {
 808     unsigned ResultReg = 0;
 809     if (Addr.getReg())
 810       ResultReg = fastEmit_ri_(MVT::i64, ISD::ADD, Addr.getReg(),
 811                                /*IsKill=*/false, Offset, MVT::i64);
 812     else
 813       ResultReg = fastEmit_i(MVT::i64, MVT::i64, ISD::Constant, Offset);
 814
 815     if (!ResultReg)
 816       return false;
 817     Addr.setReg(ResultReg);
 818     Addr.setOffset(0);
 819   }
 820   return true;
 821 }
 822
 823 void AArch64FastISel::AddLoadStoreOperands(Address &Addr,
 824                                            const MachineInstrBuilder &MIB,
 825                                            unsigned Flags,
 826                                            unsigned ScaleFactor,
 827                                            MachineMemOperand *MMO) {
 828   int64_t Offset = Addr.getOffset() / ScaleFactor;
 829   // Frame base works a bit differently. Handle it separately.
 830   if (Addr.isFIBase()) {
 831     int FI = Addr.getFI();
 832     // FIXME: We shouldn't be using getObjectSize/getObjectAlignment.  The size
 833     // and alignment should be based on the VT.
 834     MMO = FuncInfo.MF->getMachineMemOperand(
 835       MachinePointerInfo::getFixedStack(FI, Offset), Flags,
 836       MFI.getObjectSize(FI), MFI.getObjectAlignment(FI));
 837     // Now add the rest of the operands.
 838     MIB.addFrameIndex(FI).addImm(Offset);
 839   } else {
 840     assert(Addr.isRegBase() && "Unexpected address kind.");
 841     const MCInstrDesc &II = MIB->getDesc();
 842     unsigned Idx = (Flags & MachineMemOperand::MOStore) ? 1 : 0;
 843     Addr.setReg(
 844       constrainOperandRegClass(II, Addr.getReg(), II.getNumDefs()+Idx));
 845     Addr.setOffsetReg(
 846       constrainOperandRegClass(II, Addr.getOffsetReg(), II.getNumDefs()+Idx+1));
 847     if (Addr.getOffsetReg()) {
 848       assert(Addr.getOffset() == 0 && "Unexpected offset");
 849       bool IsSigned = Addr.getExtendType() == AArch64_AM::SXTW ||
 850                       Addr.getExtendType() == AArch64_AM::SXTX;
 851       MIB.addReg(Addr.getReg());
 852       MIB.addReg(Addr.getOffsetReg());
 853       MIB.addImm(IsSigned);
 854       MIB.addImm(Addr.getShift() != 0);
 855     } else {
 856       MIB.addReg(Addr.getReg());
 857       MIB.addImm(Offset);
 858     }
 859   }
 860
 861   if (MMO)
 862     MIB.addMemOperand(MMO);
 863 }
 864
 865 unsigned AArch64FastISel::emitAddSub(bool UseAdd, MVT RetVT, const Value *LHS,
 866                                      const Value *RHS, bool SetFlags,
 867                                      bool WantResult,  bool IsZExt) {
 868   AArch64_AM::ShiftExtendType ExtendType = AArch64_AM::InvalidShiftExtend;
 869   bool NeedExtend = false;
 870   switch (RetVT.SimpleTy) {
 871   default:
 872     return 0;
 873   case MVT::i1:
 874     NeedExtend = true;
 875     break;
 876   case MVT::i8:
 877     NeedExtend = true;
 878     ExtendType = IsZExt ? AArch64_AM::UXTB : AArch64_AM::SXTB;
 879     break;
 880   case MVT::i16:
 881     NeedExtend = true;
 882     ExtendType = IsZExt ? AArch64_AM::UXTH : AArch64_AM::SXTH;
 883     break;
 884   case MVT::i32:  // fall-through
 885   case MVT::i64:
 886     break;
 887   }
 888   MVT SrcVT = RetVT;
 889   RetVT.SimpleTy = std::max(RetVT.SimpleTy, MVT::i32);
 890
 891   // Canonicalize immediates to the RHS first.
 892   if (UseAdd && isa<ConstantInt>(LHS) && !isa<ConstantInt>(RHS))
 893     std::swap(LHS, RHS);
 894
 895   // Canonicalize shift immediate to the RHS.
 896   if (UseAdd && isValueAvailable(LHS))
 897     if (const auto *SI = dyn_cast<BinaryOperator>(LHS))
 898       if (isa<ConstantInt>(SI->getOperand(1)))
 899         if (SI->getOpcode() == Instruction::Shl  ||
 900             SI->getOpcode() == Instruction::LShr ||
 901             SI->getOpcode() == Instruction::AShr   )
 902           std::swap(LHS, RHS);
 903
 904   unsigned LHSReg = getRegForValue(LHS);
 905   if (!LHSReg)
 906     return 0;
 907   bool LHSIsKill = hasTrivialKill(LHS);
 908
 909   if (NeedExtend)
 910     LHSReg = EmitIntExt(SrcVT, LHSReg, RetVT, IsZExt);
 911
 912   unsigned ResultReg = 0;
 913   if (const auto *C = dyn_cast<ConstantInt>(RHS)) {
 914     uint64_t Imm = IsZExt ? C->getZExtValue() : C->getSExtValue();
 915     if (C->isNegative())
 916       ResultReg = emitAddSub_ri(!UseAdd, RetVT, LHSReg, LHSIsKill, -Imm,
 917                                 SetFlags, WantResult);
 918     else
 919       ResultReg = emitAddSub_ri(UseAdd, RetVT, LHSReg, LHSIsKill, Imm, SetFlags,
 920                                 WantResult);
 921   }
 922   if (ResultReg)
 923     return ResultReg;
 924
 925   // Only extend the RHS within the instruction if there is a valid extend type.
 926   if (ExtendType != AArch64_AM::InvalidShiftExtend && isValueAvailable(RHS)) {
 927     if (const auto *SI = dyn_cast<BinaryOperator>(RHS))
 928       if (const auto *C = dyn_cast<ConstantInt>(SI->getOperand(1)))
 929         if ((SI->getOpcode() == Instruction::Shl) && (C->getZExtValue() < 4)) {
 930           unsigned RHSReg = getRegForValue(SI->getOperand(0));
 931           if (!RHSReg)
 932             return 0;
 933           bool RHSIsKill = hasTrivialKill(SI->getOperand(0));
 934           return emitAddSub_rx(UseAdd, RetVT, LHSReg, LHSIsKill, RHSReg,
 935                                RHSIsKill, ExtendType, C->getZExtValue(),
 936                                SetFlags, WantResult);
 937         }
 938     unsigned RHSReg = getRegForValue(RHS);
 939     if (!RHSReg)
 940       return 0;
 941     bool RHSIsKill = hasTrivialKill(RHS);
 942     return emitAddSub_rx(UseAdd, RetVT, LHSReg, LHSIsKill, RHSReg, RHSIsKill,
 943                          ExtendType, 0, SetFlags, WantResult);
 944   }
 945
 946   // Check if the shift can be folded into the instruction.
 947   if (isValueAvailable(RHS))
 948     if (const auto *SI = dyn_cast<BinaryOperator>(RHS)) {
 949       if (const auto *C = dyn_cast<ConstantInt>(SI->getOperand(1))) {
 950         AArch64_AM::ShiftExtendType ShiftType = AArch64_AM::InvalidShiftExtend;
 951         switch (SI->getOpcode()) {
 952         default: break;
 953         case Instruction::Shl:  ShiftType = AArch64_AM::LSL; break;
 954         case Instruction::LShr: ShiftType = AArch64_AM::LSR; break;
 955         case Instruction::AShr: ShiftType = AArch64_AM::ASR; break;
 956         }
 957         uint64_t ShiftVal = C->getZExtValue();
 958         if (ShiftType != AArch64_AM::InvalidShiftExtend) {
 959           unsigned RHSReg = getRegForValue(SI->getOperand(0));
 960           if (!RHSReg)
 961             return 0;
 962           bool RHSIsKill = hasTrivialKill(SI->getOperand(0));
 963           return emitAddSub_rs(UseAdd, RetVT, LHSReg, LHSIsKill, RHSReg,
 964                                RHSIsKill, ShiftType, ShiftVal, SetFlags,
 965                                WantResult);
 966         }
 967       }
 968     }
 969
 970   unsigned RHSReg = getRegForValue(RHS);
 971   if (!RHSReg)
 972     return 0;
 973   bool RHSIsKill = hasTrivialKill(RHS);
 974
 975   if (NeedExtend)
 976     RHSReg = EmitIntExt(SrcVT, RHSReg, RetVT, IsZExt);
 977
 978   return emitAddSub_rr(UseAdd, RetVT, LHSReg, LHSIsKill, RHSReg, RHSIsKill,
 979                        SetFlags, WantResult);
 980 }
 981
 982 unsigned AArch64FastISel::emitAddSub_rr(bool UseAdd, MVT RetVT, unsigned LHSReg,
 983                                         bool LHSIsKill, unsigned RHSReg,
 984                                         bool RHSIsKill, bool SetFlags,
 985                                         bool WantResult) {
 986   assert(LHSReg && RHSReg && "Invalid register number.");
 987
 988   if (RetVT != MVT::i32 && RetVT != MVT::i64)
 989     return 0;
 990
 991   static const unsigned OpcTable[2][2][2] = {
 992     { { AArch64::SUBWrr,  AArch64::SUBXrr  },
 993       { AArch64::ADDWrr,  AArch64::ADDXrr  }  },
 994     { { AArch64::SUBSWrr, AArch64::SUBSXrr },
 995       { AArch64::ADDSWrr, AArch64::ADDSXrr }  }
 996   };
 997   bool Is64Bit = RetVT == MVT::i64;
 998   unsigned Opc = OpcTable[SetFlags][UseAdd][Is64Bit];
 999   const TargetRegisterClass *RC =
1000       Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
1001   unsigned ResultReg;
1002   if (WantResult)
1003     ResultReg = createResultReg(RC);
1004   else
1005     ResultReg = Is64Bit ? AArch64::XZR : AArch64::WZR;
1006
1007   const MCInstrDesc &II = TII.get(Opc);
1008   LHSReg = constrainOperandRegClass(II, LHSReg, II.getNumDefs());
1009   RHSReg = constrainOperandRegClass(II, RHSReg, II.getNumDefs() + 1);
1010   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg)
1011       .addReg(LHSReg, getKillRegState(LHSIsKill))
1012       .addReg(RHSReg, getKillRegState(RHSIsKill));
1013   return ResultReg;
1014 }
1015
1016 unsigned AArch64FastISel::emitAddSub_ri(bool UseAdd, MVT RetVT, unsigned LHSReg,
1017                                         bool LHSIsKill, uint64_t Imm,
1018                                         bool SetFlags, bool WantResult) {
1019   assert(LHSReg && "Invalid register number.");
1020
1021   if (RetVT != MVT::i32 && RetVT != MVT::i64)
1022     return 0;
1023
1024   unsigned ShiftImm;
1025   if (isUInt<12>(Imm))
1026     ShiftImm = 0;
1027   else if ((Imm & 0xfff000) == Imm) {
1028     ShiftImm = 12;
1029     Imm >>= 12;
1030   } else
1031     return 0;
1032
1033   static const unsigned OpcTable[2][2][2] = {
1034     { { AArch64::SUBWri,  AArch64::SUBXri  },
1035       { AArch64::ADDWri,  AArch64::ADDXri  }  },
1036     { { AArch64::SUBSWri, AArch64::SUBSXri },
1037       { AArch64::ADDSWri, AArch64::ADDSXri }  }
1038   };
1039   bool Is64Bit = RetVT == MVT::i64;
1040   unsigned Opc = OpcTable[SetFlags][UseAdd][Is64Bit];
1041   const TargetRegisterClass *RC;
1042   if (SetFlags)
1043     RC = Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
1044   else
1045     RC = Is64Bit ? &AArch64::GPR64spRegClass : &AArch64::GPR32spRegClass;
1046   unsigned ResultReg;
1047   if (WantResult)
1048     ResultReg = createResultReg(RC);
1049   else
1050     ResultReg = Is64Bit ? AArch64::XZR : AArch64::WZR;
1051
1052   const MCInstrDesc &II = TII.get(Opc);
1053   LHSReg = constrainOperandRegClass(II, LHSReg, II.getNumDefs());
1054   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg)
1055       .addReg(LHSReg, getKillRegState(LHSIsKill))
1056       .addImm(Imm)
1057       .addImm(getShifterImm(AArch64_AM::LSL, ShiftImm));
1058   return ResultReg;
1059 }
1060
1061 unsigned AArch64FastISel::emitAddSub_rs(bool UseAdd, MVT RetVT, unsigned LHSReg,
1062                                         bool LHSIsKill, unsigned RHSReg,
1063                                         bool RHSIsKill,
1064                                         AArch64_AM::ShiftExtendType ShiftType,
1065                                         uint64_t ShiftImm, bool SetFlags,
1066                                         bool WantResult) {
1067   assert(LHSReg && RHSReg && "Invalid register number.");
1068
1069   if (RetVT != MVT::i32 && RetVT != MVT::i64)
1070     return 0;
1071
1072   static const unsigned OpcTable[2][2][2] = {
1073     { { AArch64::SUBWrs,  AArch64::SUBXrs  },
1074       { AArch64::ADDWrs,  AArch64::ADDXrs  }  },
1075     { { AArch64::SUBSWrs, AArch64::SUBSXrs },
1076       { AArch64::ADDSWrs, AArch64::ADDSXrs }  }
1077   };
1078   bool Is64Bit = RetVT == MVT::i64;
1079   unsigned Opc = OpcTable[SetFlags][UseAdd][Is64Bit];
1080   const TargetRegisterClass *RC =
1081       Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
1082   unsigned ResultReg;
1083   if (WantResult)
1084     ResultReg = createResultReg(RC);
1085   else
1086     ResultReg = Is64Bit ? AArch64::XZR : AArch64::WZR;
1087
1088   const MCInstrDesc &II = TII.get(Opc);
1089   LHSReg = constrainOperandRegClass(II, LHSReg, II.getNumDefs());
1090   RHSReg = constrainOperandRegClass(II, RHSReg, II.getNumDefs() + 1);
1091   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg)
1092       .addReg(LHSReg, getKillRegState(LHSIsKill))
1093       .addReg(RHSReg, getKillRegState(RHSIsKill))
1094       .addImm(getShifterImm(ShiftType, ShiftImm));
1095   return ResultReg;
1096 }
1097
1098 unsigned AArch64FastISel::emitAddSub_rx(bool UseAdd, MVT RetVT, unsigned LHSReg,
1099                                         bool LHSIsKill, unsigned RHSReg,
1100                                         bool RHSIsKill,
1101                                         AArch64_AM::ShiftExtendType ExtType,
1102                                         uint64_t ShiftImm, bool SetFlags,
1103                                         bool WantResult) {
1104   assert(LHSReg && RHSReg && "Invalid register number.");
1105
1106   if (RetVT != MVT::i32 && RetVT != MVT::i64)
1107     return 0;
1108
1109   static const unsigned OpcTable[2][2][2] = {
1110     { { AArch64::SUBWrx,  AArch64::SUBXrx  },
1111       { AArch64::ADDWrx,  AArch64::ADDXrx  }  },
1112     { { AArch64::SUBSWrx, AArch64::SUBSXrx },
1113       { AArch64::ADDSWrx, AArch64::ADDSXrx }  }
1114   };
1115   bool Is64Bit = RetVT == MVT::i64;
1116   unsigned Opc = OpcTable[SetFlags][UseAdd][Is64Bit];
1117   const TargetRegisterClass *RC = nullptr;
1118   if (SetFlags)
1119     RC = Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
1120   else
1121     RC = Is64Bit ? &AArch64::GPR64spRegClass : &AArch64::GPR32spRegClass;
1122   unsigned ResultReg;
1123   if (WantResult)
1124     ResultReg = createResultReg(RC);
1125   else
1126     ResultReg = Is64Bit ? AArch64::XZR : AArch64::WZR;
1127
1128   const MCInstrDesc &II = TII.get(Opc);
1129   LHSReg = constrainOperandRegClass(II, LHSReg, II.getNumDefs());
1130   RHSReg = constrainOperandRegClass(II, RHSReg, II.getNumDefs() + 1);
1131   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg)
1132       .addReg(LHSReg, getKillRegState(LHSIsKill))
1133       .addReg(RHSReg, getKillRegState(RHSIsKill))
1134       .addImm(getArithExtendImm(ExtType, ShiftImm));
1135   return ResultReg;
1136 }
1137
1138 bool AArch64FastISel::emitCmp(const Value *LHS, const Value *RHS, bool IsZExt) {
1139   Type *Ty = LHS->getType();
1140   EVT EVT = TLI.getValueType(Ty, true);
1141   if (!EVT.isSimple())
1142     return false;
1143   MVT VT = EVT.getSimpleVT();
1144
1145   switch (VT.SimpleTy) {
1146   default:
1147     return false;
1148   case MVT::i1:
1149   case MVT::i8:
1150   case MVT::i16:
1151   case MVT::i32:
1152   case MVT::i64:
1153     return emitICmp(VT, LHS, RHS, IsZExt);
1154   case MVT::f32:
1155   case MVT::f64:
1156     return emitFCmp(VT, LHS, RHS);
1157   }
1158 }
1159
1160 bool AArch64FastISel::emitICmp(MVT RetVT, const Value *LHS, const Value *RHS,
1161                                bool IsZExt) {
1162   return emitSub(RetVT, LHS, RHS, /*SetFlags=*/true, /*WantResult=*/false,
1163                  IsZExt) != 0;
1164 }
1165
1166 bool AArch64FastISel::emitICmp_ri(MVT RetVT, unsigned LHSReg, bool LHSIsKill,
1167                                   uint64_t Imm) {
1168   return emitAddSub_ri(/*UseAdd=*/false, RetVT, LHSReg, LHSIsKill, Imm,
1169                        /*SetFlags=*/true, /*WantResult=*/false) != 0;
1170 }
1171
1172 bool AArch64FastISel::emitFCmp(MVT RetVT, const Value *LHS, const Value *RHS) {
1173   if (RetVT != MVT::f32 && RetVT != MVT::f64)
1174     return false;
1175
1176   // Check to see if the 2nd operand is a constant that we can encode directly
1177   // in the compare.
1178   bool UseImm = false;
1179   if (const auto *CFP = dyn_cast<ConstantFP>(RHS))
1180     if (CFP->isZero() && !CFP->isNegative())
1181       UseImm = true;
1182
1183   unsigned LHSReg = getRegForValue(LHS);
1184   if (!LHSReg)
1185     return false;
1186   bool LHSIsKill = hasTrivialKill(LHS);
1187
1188   if (UseImm) {
1189     unsigned Opc = (RetVT == MVT::f64) ? AArch64::FCMPDri : AArch64::FCMPSri;
1190     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc))
1191         .addReg(LHSReg, getKillRegState(LHSIsKill));
1192     return true;
1193   }
1194
1195   unsigned RHSReg = getRegForValue(RHS);
1196   if (!RHSReg)
1197     return false;
1198   bool RHSIsKill = hasTrivialKill(RHS);
1199
1200   unsigned Opc = (RetVT == MVT::f64) ? AArch64::FCMPDrr : AArch64::FCMPSrr;
1201   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc))
1202       .addReg(LHSReg, getKillRegState(LHSIsKill))
1203       .addReg(RHSReg, getKillRegState(RHSIsKill));
1204   return true;
1205 }
1206
1207 unsigned AArch64FastISel::emitAdd(MVT RetVT, const Value *LHS, const Value *RHS,
1208                                   bool SetFlags, bool WantResult, bool IsZExt) {
1209   return emitAddSub(/*UseAdd=*/true, RetVT, LHS, RHS, SetFlags, WantResult,
1210                     IsZExt);
1211 }
1212
1213 unsigned AArch64FastISel::emitSub(MVT RetVT, const Value *LHS, const Value *RHS,
1214                                   bool SetFlags, bool WantResult, bool IsZExt) {
1215   return emitAddSub(/*UseAdd=*/false, RetVT, LHS, RHS, SetFlags, WantResult,
1216                     IsZExt);
1217 }
1218
1219 unsigned AArch64FastISel::emitSubs_rr(MVT RetVT, unsigned LHSReg,
1220                                       bool LHSIsKill, unsigned RHSReg,
1221                                       bool RHSIsKill, bool WantResult) {
1222   return emitAddSub_rr(/*UseAdd=*/false, RetVT, LHSReg, LHSIsKill, RHSReg,
1223                        RHSIsKill, /*SetFlags=*/true, WantResult);
1224 }
1225
1226 unsigned AArch64FastISel::emitSubs_rs(MVT RetVT, unsigned LHSReg,
1227                                       bool LHSIsKill, unsigned RHSReg,
1228                                       bool RHSIsKill,
1229                                       AArch64_AM::ShiftExtendType ShiftType,
1230                                       uint64_t ShiftImm, bool WantResult) {
1231   return emitAddSub_rs(/*UseAdd=*/false, RetVT, LHSReg, LHSIsKill, RHSReg,
1232                        RHSIsKill, ShiftType, ShiftImm, /*SetFlags=*/true,
1233                        WantResult);
1234 }
1235
1236 unsigned AArch64FastISel::emitLogicalOp(unsigned ISDOpc, MVT RetVT,
1237                                         const Value *LHS, const Value *RHS) {
1238   // Canonicalize immediates to the RHS first.
1239   if (isa<ConstantInt>(LHS) && !isa<ConstantInt>(RHS))
1240     std::swap(LHS, RHS);
1241
1242   // Canonicalize shift immediate to the RHS.
1243   if (isValueAvailable(LHS))
1244     if (const auto *SI = dyn_cast<BinaryOperator>(LHS))
1245       if (isa<ConstantInt>(SI->getOperand(1)))
1246         if (SI->getOpcode() == Instruction::Shl)
1247           std::swap(LHS, RHS);
1248
1249   unsigned LHSReg = getRegForValue(LHS);
1250   if (!LHSReg)
1251     return 0;
1252   bool LHSIsKill = hasTrivialKill(LHS);
1253
1254   unsigned ResultReg = 0;
1255   if (const auto *C = dyn_cast<ConstantInt>(RHS)) {
1256     uint64_t Imm = C->getZExtValue();
1257     ResultReg = emitLogicalOp_ri(ISDOpc, RetVT, LHSReg, LHSIsKill, Imm);
1258   }
1259   if (ResultReg)
1260     return ResultReg;
1261
1262   // Check if the shift can be folded into the instruction.
1263   if (isValueAvailable(RHS))
1264     if (const auto *SI = dyn_cast<BinaryOperator>(RHS))
1265       if (const auto *C = dyn_cast<ConstantInt>(SI->getOperand(1)))
1266         if (SI->getOpcode() == Instruction::Shl) {
1267           uint64_t ShiftVal = C->getZExtValue();
1268           unsigned RHSReg = getRegForValue(SI->getOperand(0));
1269           if (!RHSReg)
1270             return 0;
1271           bool RHSIsKill = hasTrivialKill(SI->getOperand(0));
1272           return emitLogicalOp_rs(ISDOpc, RetVT, LHSReg, LHSIsKill, RHSReg,
1273                                   RHSIsKill, ShiftVal);
1274         }
1275
1276   unsigned RHSReg = getRegForValue(RHS);
1277   if (!RHSReg)
1278     return 0;
1279   bool RHSIsKill = hasTrivialKill(RHS);
1280
1281   MVT VT = std::max(MVT::i32, RetVT.SimpleTy);
1282   ResultReg = fastEmit_rr(VT, VT, ISDOpc, LHSReg, LHSIsKill, RHSReg, RHSIsKill);
1283   if (RetVT >= MVT::i8 && RetVT <= MVT::i16) {
1284     uint64_t Mask = (RetVT == MVT::i8) ? 0xff : 0xffff;
1285     ResultReg = emitAnd_ri(MVT::i32, ResultReg, /*IsKill=*/true, Mask);
1286   }
1287   return ResultReg;
1288 }
1289
1290 unsigned AArch64FastISel::emitLogicalOp_ri(unsigned ISDOpc, MVT RetVT,
1291                                            unsigned LHSReg, bool LHSIsKill,
1292                                            uint64_t Imm) {
1293   assert((ISD::AND + 1 == ISD::OR) && (ISD::AND + 2 == ISD::XOR) &&
1294          "ISD nodes are not consecutive!");
1295   static const unsigned OpcTable[3][2] = {
1296     { AArch64::ANDWri, AArch64::ANDXri },
1297     { AArch64::ORRWri, AArch64::ORRXri },
1298     { AArch64::EORWri, AArch64::EORXri }
1299   };
1300   const TargetRegisterClass *RC;
1301   unsigned Opc;
1302   unsigned RegSize;
1303   switch (RetVT.SimpleTy) {
1304   default:
1305     return 0;
1306   case MVT::i1:
1307   case MVT::i8:
1308   case MVT::i16:
1309   case MVT::i32: {
1310     unsigned Idx = ISDOpc - ISD::AND;
1311     Opc = OpcTable[Idx][0];
1312     RC = &AArch64::GPR32spRegClass;
1313     RegSize = 32;
1314     break;
1315   }
1316   case MVT::i64:
1317     Opc = OpcTable[ISDOpc - ISD::AND][1];
1318     RC = &AArch64::GPR64spRegClass;
1319     RegSize = 64;
1320     break;
1321   }
1322
1323   if (!AArch64_AM::isLogicalImmediate(Imm, RegSize))
1324     return 0;
1325
1326   unsigned ResultReg =
1327       fastEmitInst_ri(Opc, RC, LHSReg, LHSIsKill,
1328                       AArch64_AM::encodeLogicalImmediate(Imm, RegSize));
1329   if (RetVT >= MVT::i8 && RetVT <= MVT::i16 && ISDOpc != ISD::AND) {
1330     uint64_t Mask = (RetVT == MVT::i8) ? 0xff : 0xffff;
1331     ResultReg = emitAnd_ri(MVT::i32, ResultReg, /*IsKill=*/true, Mask);
1332   }
1333   return ResultReg;
1334 }
1335
1336 unsigned AArch64FastISel::emitLogicalOp_rs(unsigned ISDOpc, MVT RetVT,
1337                                            unsigned LHSReg, bool LHSIsKill,
1338                                            unsigned RHSReg, bool RHSIsKill,
1339                                            uint64_t ShiftImm) {
1340   assert((ISD::AND + 1 == ISD::OR) && (ISD::AND + 2 == ISD::XOR) &&
1341          "ISD nodes are not consecutive!");
1342   static const unsigned OpcTable[3][2] = {
1343     { AArch64::ANDWrs, AArch64::ANDXrs },
1344     { AArch64::ORRWrs, AArch64::ORRXrs },
1345     { AArch64::EORWrs, AArch64::EORXrs }
1346   };
1347   const TargetRegisterClass *RC;
1348   unsigned Opc;
1349   switch (RetVT.SimpleTy) {
1350   default:
1351     return 0;
1352   case MVT::i1:
1353   case MVT::i8:
1354   case MVT::i16:
1355   case MVT::i32:
1356     Opc = OpcTable[ISDOpc - ISD::AND][0];
1357     RC = &AArch64::GPR32RegClass;
1358     break;
1359   case MVT::i64:
1360     Opc = OpcTable[ISDOpc - ISD::AND][1];
1361     RC = &AArch64::GPR64RegClass;
1362     break;
1363   }
1364   unsigned ResultReg =
1365       fastEmitInst_rri(Opc, RC, LHSReg, LHSIsKill, RHSReg, RHSIsKill,
1366                        AArch64_AM::getShifterImm(AArch64_AM::LSL, ShiftImm));
1367   if (RetVT >= MVT::i8 && RetVT <= MVT::i16) {
1368     uint64_t Mask = (RetVT == MVT::i8) ? 0xff : 0xffff;
1369     ResultReg = emitAnd_ri(MVT::i32, ResultReg, /*IsKill=*/true, Mask);
1370   }
1371   return ResultReg;
1372 }
1373
1374 unsigned AArch64FastISel::emitAnd_ri(MVT RetVT, unsigned LHSReg, bool LHSIsKill,
1375                                      uint64_t Imm) {
1376   return emitLogicalOp_ri(ISD::AND, RetVT, LHSReg, LHSIsKill, Imm);
1377 }
1378
1379 bool AArch64FastISel::EmitLoad(MVT VT, unsigned &ResultReg, Address Addr,
1380                                MachineMemOperand *MMO) {
1381   // Simplify this down to something we can handle.
1382   if (!SimplifyAddress(Addr, VT))
1383     return false;
1384
1385   unsigned ScaleFactor;
1386   switch (VT.SimpleTy) {
1387   default: llvm_unreachable("Unexpected value type.");
1388   case MVT::i1:  // fall-through
1389   case MVT::i8:  ScaleFactor = 1; break;
1390   case MVT::i16: ScaleFactor = 2; break;
1391   case MVT::i32: // fall-through
1392   case MVT::f32: ScaleFactor = 4; break;
1393   case MVT::i64: // fall-through
1394   case MVT::f64: ScaleFactor = 8; break;
1395   }
1396
1397   // Negative offsets require unscaled, 9-bit, signed immediate offsets.
1398   // Otherwise, we try using scaled, 12-bit, unsigned immediate offsets.
1399   bool UseScaled = true;
1400   if ((Addr.getOffset() < 0) || (Addr.getOffset() & (ScaleFactor - 1))) {
1401     UseScaled = false;
1402     ScaleFactor = 1;
1403   }
1404
1405   static const unsigned OpcTable[4][6] = {
1406     { AArch64::LDURBBi,  AArch64::LDURHHi,  AArch64::LDURWi,  AArch64::LDURXi,
1407       AArch64::LDURSi,   AArch64::LDURDi },
1408     { AArch64::LDRBBui,  AArch64::LDRHHui,  AArch64::LDRWui,  AArch64::LDRXui,
1409       AArch64::LDRSui,   AArch64::LDRDui },
1410     { AArch64::LDRBBroX, AArch64::LDRHHroX, AArch64::LDRWroX, AArch64::LDRXroX,
1411       AArch64::LDRSroX,  AArch64::LDRDroX },
1412     { AArch64::LDRBBroW, AArch64::LDRHHroW, AArch64::LDRWroW, AArch64::LDRXroW,
1413       AArch64::LDRSroW,  AArch64::LDRDroW }
1414   };
1415
1416   unsigned Opc;
1417   const TargetRegisterClass *RC;
1418   bool VTIsi1 = false;
1419   bool UseRegOffset = Addr.isRegBase() && !Addr.getOffset() && Addr.getReg() &&
1420                       Addr.getOffsetReg();
1421   unsigned Idx = UseRegOffset ? 2 : UseScaled ? 1 : 0;
1422   if (Addr.getExtendType() == AArch64_AM::UXTW ||
1423       Addr.getExtendType() == AArch64_AM::SXTW)
1424     Idx++;
1425
1426   switch (VT.SimpleTy) {
1427   default: llvm_unreachable("Unexpected value type.");
1428   case MVT::i1:  VTIsi1 = true; // Intentional fall-through.
1429   case MVT::i8:  Opc = OpcTable[Idx][0]; RC = &AArch64::GPR32RegClass; break;
1430   case MVT::i16: Opc = OpcTable[Idx][1]; RC = &AArch64::GPR32RegClass; break;
1431   case MVT::i32: Opc = OpcTable[Idx][2]; RC = &AArch64::GPR32RegClass; break;
1432   case MVT::i64: Opc = OpcTable[Idx][3]; RC = &AArch64::GPR64RegClass; break;
1433   case MVT::f32: Opc = OpcTable[Idx][4]; RC = &AArch64::FPR32RegClass; break;
1434   case MVT::f64: Opc = OpcTable[Idx][5]; RC = &AArch64::FPR64RegClass; break;
1435   }
1436
1437   // Create the base instruction, then add the operands.
1438   ResultReg = createResultReg(RC);
1439   MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
1440                                     TII.get(Opc), ResultReg);
1441   AddLoadStoreOperands(Addr, MIB, MachineMemOperand::MOLoad, ScaleFactor, MMO);
1442
1443   // Loading an i1 requires special handling.
1444   if (VTIsi1) {
1445     unsigned ANDReg = emitAnd_ri(MVT::i32, ResultReg, /*IsKill=*/true, 1);
1446     assert(ANDReg && "Unexpected AND instruction emission failure.");
1447     ResultReg = ANDReg;
1448   }
1449   return true;
1450 }
1451
1452 bool AArch64FastISel::selectAddSub(const Instruction *I) {
1453   MVT VT;
1454   if (!isTypeSupported(I->getType(), VT))
1455     return false;
1456
1457   unsigned ResultReg;
1458   if (I->getOpcode() == Instruction::Add)
1459     ResultReg = emitAdd(VT, I->getOperand(0), I->getOperand(1));
1460   else if (I->getOpcode() == Instruction::Sub)
1461     ResultReg = emitSub(VT, I->getOperand(0), I->getOperand(1));
1462   else
1463     llvm_unreachable("Unexpected instruction.");
1464
1465   assert(ResultReg && "Couldn't select Add/Sub instruction.");
1466   updateValueMap(I, ResultReg);
1467   return true;
1468 }
1469
1470 bool AArch64FastISel::selectLogicalOp(const Instruction *I, unsigned ISDOpc) {
1471   MVT VT;
1472   if (!isTypeSupported(I->getType(), VT))
1473     return false;
1474
1475   unsigned ResultReg =
1476       emitLogicalOp(ISDOpc, VT, I->getOperand(0), I->getOperand(1));
1477   if (!ResultReg)
1478     return false;
1479
1480   updateValueMap(I, ResultReg);
1481   return true;
1482 }
1483
1484 bool AArch64FastISel::SelectLoad(const Instruction *I) {
1485   MVT VT;
1486   // Verify we have a legal type before going any further.  Currently, we handle
1487   // simple types that will directly fit in a register (i32/f32/i64/f64) or
1488   // those that can be sign or zero-extended to a basic operation (i1/i8/i16).
1489   if (!isLoadStoreTypeLegal(I->getType(), VT) || cast<LoadInst>(I)->isAtomic())
1490     return false;
1491
1492   // See if we can handle this address.
1493   Address Addr;
1494   if (!ComputeAddress(I->getOperand(0), Addr, I->getType()))
1495     return false;
1496
1497   unsigned ResultReg;
1498   if (!EmitLoad(VT, ResultReg, Addr, createMachineMemOperandFor(I)))
1499     return false;
1500
1501   updateValueMap(I, ResultReg);
1502   return true;
1503 }
1504
1505 bool AArch64FastISel::EmitStore(MVT VT, unsigned SrcReg, Address Addr,
1506                                 MachineMemOperand *MMO) {
1507   // Simplify this down to something we can handle.
1508   if (!SimplifyAddress(Addr, VT))
1509     return false;
1510
1511   unsigned ScaleFactor;
1512   switch (VT.SimpleTy) {
1513   default: llvm_unreachable("Unexpected value type.");
1514   case MVT::i1:  // fall-through
1515   case MVT::i8:  ScaleFactor = 1; break;
1516   case MVT::i16: ScaleFactor = 2; break;
1517   case MVT::i32: // fall-through
1518   case MVT::f32: ScaleFactor = 4; break;
1519   case MVT::i64: // fall-through
1520   case MVT::f64: ScaleFactor = 8; break;
1521   }
1522
1523   // Negative offsets require unscaled, 9-bit, signed immediate offsets.
1524   // Otherwise, we try using scaled, 12-bit, unsigned immediate offsets.
1525   bool UseScaled = true;
1526   if ((Addr.getOffset() < 0) || (Addr.getOffset() & (ScaleFactor - 1))) {
1527     UseScaled = false;
1528     ScaleFactor = 1;
1529   }
1530
1531
1532   static const unsigned OpcTable[4][6] = {
1533     { AArch64::STURBBi,  AArch64::STURHHi,  AArch64::STURWi,  AArch64::STURXi,
1534       AArch64::STURSi,   AArch64::STURDi },
1535     { AArch64::STRBBui,  AArch64::STRHHui,  AArch64::STRWui,  AArch64::STRXui,
1536       AArch64::STRSui,   AArch64::STRDui },
1537     { AArch64::STRBBroX, AArch64::STRHHroX, AArch64::STRWroX, AArch64::STRXroX,
1538       AArch64::STRSroX,  AArch64::STRDroX },
1539     { AArch64::STRBBroW, AArch64::STRHHroW, AArch64::STRWroW, AArch64::STRXroW,
1540       AArch64::STRSroW,  AArch64::STRDroW }
1541
1542   };
1543
1544   unsigned Opc;
1545   bool VTIsi1 = false;
1546   bool UseRegOffset = Addr.isRegBase() && !Addr.getOffset() && Addr.getReg() &&
1547                       Addr.getOffsetReg();
1548   unsigned Idx = UseRegOffset ? 2 : UseScaled ? 1 : 0;
1549   if (Addr.getExtendType() == AArch64_AM::UXTW ||
1550       Addr.getExtendType() == AArch64_AM::SXTW)
1551     Idx++;
1552
1553   switch (VT.SimpleTy) {
1554   default: llvm_unreachable("Unexpected value type.");
1555   case MVT::i1:  VTIsi1 = true;
1556   case MVT::i8:  Opc = OpcTable[Idx][0]; break;
1557   case MVT::i16: Opc = OpcTable[Idx][1]; break;
1558   case MVT::i32: Opc = OpcTable[Idx][2]; break;
1559   case MVT::i64: Opc = OpcTable[Idx][3]; break;
1560   case MVT::f32: Opc = OpcTable[Idx][4]; break;
1561   case MVT::f64: Opc = OpcTable[Idx][5]; break;
1562   }
1563
1564   // Storing an i1 requires special handling.
1565   if (VTIsi1 && SrcReg != AArch64::WZR) {
1566     unsigned ANDReg = emitAnd_ri(MVT::i32, SrcReg, /*TODO:IsKill=*/false, 1);
1567     assert(ANDReg && "Unexpected AND instruction emission failure.");
1568     SrcReg = ANDReg;
1569   }
1570   // Create the base instruction, then add the operands.
1571   const MCInstrDesc &II = TII.get(Opc);
1572   SrcReg = constrainOperandRegClass(II, SrcReg, II.getNumDefs());
1573   MachineInstrBuilder MIB =
1574       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II).addReg(SrcReg);
1575   AddLoadStoreOperands(Addr, MIB, MachineMemOperand::MOStore, ScaleFactor, MMO);
1576
1577   return true;
1578 }
1579
1580 bool AArch64FastISel::SelectStore(const Instruction *I) {
1581   MVT VT;
1582   const Value *Op0 = I->getOperand(0);
1583   // Verify we have a legal type before going any further.  Currently, we handle
1584   // simple types that will directly fit in a register (i32/f32/i64/f64) or
1585   // those that can be sign or zero-extended to a basic operation (i1/i8/i16).
1586   if (!isLoadStoreTypeLegal(Op0->getType(), VT) ||
1587       cast<StoreInst>(I)->isAtomic())
1588     return false;
1589
1590   // Get the value to be stored into a register. Use the zero register directly
1591   // when possible to avoid an unnecessary copy and a wasted register.
1592   unsigned SrcReg = 0;
1593   if (const auto *CI = dyn_cast<ConstantInt>(Op0)) {
1594     if (CI->isZero())
1595       SrcReg = (VT == MVT::i64) ? AArch64::XZR : AArch64::WZR;
1596   } else if (const auto *CF = dyn_cast<ConstantFP>(Op0)) {
1597     if (CF->isZero() && !CF->isNegative()) {
1598       VT = MVT::getIntegerVT(VT.getSizeInBits());
1599       SrcReg = (VT == MVT::i64) ? AArch64::XZR : AArch64::WZR;
1600     }
1601   }
1602
1603   if (!SrcReg)
1604     SrcReg = getRegForValue(Op0);
1605
1606   if (!SrcReg)
1607     return false;
1608
1609   // See if we can handle this address.
1610   Address Addr;
1611   if (!ComputeAddress(I->getOperand(1), Addr, I->getOperand(0)->getType()))
1612     return false;
1613
1614   if (!EmitStore(VT, SrcReg, Addr, createMachineMemOperandFor(I)))
1615     return false;
1616   return true;
1617 }
1618
1619 static AArch64CC::CondCode getCompareCC(CmpInst::Predicate Pred) {
1620   switch (Pred) {
1621   case CmpInst::FCMP_ONE:
1622   case CmpInst::FCMP_UEQ:
1623   default:
1624     // AL is our "false" for now. The other two need more compares.
1625     return AArch64CC::AL;
1626   case CmpInst::ICMP_EQ:
1627   case CmpInst::FCMP_OEQ:
1628     return AArch64CC::EQ;
1629   case CmpInst::ICMP_SGT:
1630   case CmpInst::FCMP_OGT:
1631     return AArch64CC::GT;
1632   case CmpInst::ICMP_SGE:
1633   case CmpInst::FCMP_OGE:
1634     return AArch64CC::GE;
1635   case CmpInst::ICMP_UGT:
1636   case CmpInst::FCMP_UGT:
1637     return AArch64CC::HI;
1638   case CmpInst::FCMP_OLT:
1639     return AArch64CC::MI;
1640   case CmpInst::ICMP_ULE:
1641   case CmpInst::FCMP_OLE:
1642     return AArch64CC::LS;
1643   case CmpInst::FCMP_ORD:
1644     return AArch64CC::VC;
1645   case CmpInst::FCMP_UNO:
1646     return AArch64CC::VS;
1647   case CmpInst::FCMP_UGE:
1648     return AArch64CC::PL;
1649   case CmpInst::ICMP_SLT:
1650   case CmpInst::FCMP_ULT:
1651     return AArch64CC::LT;
1652   case CmpInst::ICMP_SLE:
1653   case CmpInst::FCMP_ULE:
1654     return AArch64CC::LE;
1655   case CmpInst::FCMP_UNE:
1656   case CmpInst::ICMP_NE:
1657     return AArch64CC::NE;
1658   case CmpInst::ICMP_UGE:
1659     return AArch64CC::HS;
1660   case CmpInst::ICMP_ULT:
1661     return AArch64CC::LO;
1662   }
1663 }
1664
1665 bool AArch64FastISel::SelectBranch(const Instruction *I) {
1666   const BranchInst *BI = cast<BranchInst>(I);
1667   if (BI->isUnconditional()) {
1668     MachineBasicBlock *MSucc = FuncInfo.MBBMap[BI->getSuccessor(0)];
1669     fastEmitBranch(MSucc, BI->getDebugLoc());
1670     return true;
1671   }
1672
1673   MachineBasicBlock *TBB = FuncInfo.MBBMap[BI->getSuccessor(0)];
1674   MachineBasicBlock *FBB = FuncInfo.MBBMap[BI->getSuccessor(1)];
1675
1676   AArch64CC::CondCode CC = AArch64CC::NE;
1677   if (const CmpInst *CI = dyn_cast<CmpInst>(BI->getCondition())) {
1678     if (CI->hasOneUse() && (CI->getParent() == I->getParent())) {
1679       // We may not handle every CC for now.
1680       CC = getCompareCC(CI->getPredicate());
1681       if (CC == AArch64CC::AL)
1682         return false;
1683
1684       // Emit the cmp.
1685       if (!emitCmp(CI->getOperand(0), CI->getOperand(1), CI->isUnsigned()))
1686         return false;
1687
1688       // Emit the branch.
1689       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::Bcc))
1690           .addImm(CC)
1691           .addMBB(TBB);
1692
1693       // Obtain the branch weight and add the TrueBB to the successor list.
1694       uint32_t BranchWeight = 0;
1695       if (FuncInfo.BPI)
1696         BranchWeight = FuncInfo.BPI->getEdgeWeight(BI->getParent(),
1697                                                   TBB->getBasicBlock());
1698       FuncInfo.MBB->addSuccessor(TBB, BranchWeight);
1699
1700       fastEmitBranch(FBB, DbgLoc);
1701       return true;
1702     }
1703   } else if (TruncInst *TI = dyn_cast<TruncInst>(BI->getCondition())) {
1704     MVT SrcVT;
1705     if (TI->hasOneUse() && TI->getParent() == I->getParent() &&
1706         (isTypeSupported(TI->getOperand(0)->getType(), SrcVT))) {
1707       unsigned CondReg = getRegForValue(TI->getOperand(0));
1708       if (!CondReg)
1709         return false;
1710       bool CondIsKill = hasTrivialKill(TI->getOperand(0));
1711
1712       // Issue an extract_subreg to get the lower 32-bits.
1713       if (SrcVT == MVT::i64) {
1714         CondReg = fastEmitInst_extractsubreg(MVT::i32, CondReg, CondIsKill,
1715                                              AArch64::sub_32);
1716         CondIsKill = true;
1717       }
1718
1719       unsigned ANDReg = emitAnd_ri(MVT::i32, CondReg, CondIsKill, 1);
1720       assert(ANDReg && "Unexpected AND instruction emission failure.");
1721       emitICmp_ri(MVT::i32, ANDReg, /*IsKill=*/true, 0);
1722
1723       if (FuncInfo.MBB->isLayoutSuccessor(TBB)) {
1724         std::swap(TBB, FBB);
1725         CC = AArch64CC::EQ;
1726       }
1727       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::Bcc))
1728           .addImm(CC)
1729           .addMBB(TBB);
1730
1731       // Obtain the branch weight and add the TrueBB to the successor list.
1732       uint32_t BranchWeight = 0;
1733       if (FuncInfo.BPI)
1734         BranchWeight = FuncInfo.BPI->getEdgeWeight(BI->getParent(),
1735                                                   TBB->getBasicBlock());
1736       FuncInfo.MBB->addSuccessor(TBB, BranchWeight);
1737
1738       fastEmitBranch(FBB, DbgLoc);
1739       return true;
1740     }
1741   } else if (const ConstantInt *CI =
1742                  dyn_cast<ConstantInt>(BI->getCondition())) {
1743     uint64_t Imm = CI->getZExtValue();
1744     MachineBasicBlock *Target = (Imm == 0) ? FBB : TBB;
1745     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::B))
1746         .addMBB(Target);
1747
1748     // Obtain the branch weight and add the target to the successor list.
1749     uint32_t BranchWeight = 0;
1750     if (FuncInfo.BPI)
1751       BranchWeight = FuncInfo.BPI->getEdgeWeight(BI->getParent(),
1752                                                  Target->getBasicBlock());
1753     FuncInfo.MBB->addSuccessor(Target, BranchWeight);
1754     return true;
1755   } else if (foldXALUIntrinsic(CC, I, BI->getCondition())) {
1756     // Fake request the condition, otherwise the intrinsic might be completely
1757     // optimized away.
1758     unsigned CondReg = getRegForValue(BI->getCondition());
1759     if (!CondReg)
1760       return false;
1761
1762     // Emit the branch.
1763     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::Bcc))
1764       .addImm(CC)
1765       .addMBB(TBB);
1766
1767     // Obtain the branch weight and add the TrueBB to the successor list.
1768     uint32_t BranchWeight = 0;
1769     if (FuncInfo.BPI)
1770       BranchWeight = FuncInfo.BPI->getEdgeWeight(BI->getParent(),
1771                                                  TBB->getBasicBlock());
1772     FuncInfo.MBB->addSuccessor(TBB, BranchWeight);
1773
1774     fastEmitBranch(FBB, DbgLoc);
1775     return true;
1776   }
1777
1778   unsigned CondReg = getRegForValue(BI->getCondition());
1779   if (CondReg == 0)
1780     return false;
1781   bool CondRegIsKill = hasTrivialKill(BI->getCondition());
1782
1783   // We've been divorced from our compare!  Our block was split, and
1784   // now our compare lives in a predecessor block.  We musn't
1785   // re-compare here, as the children of the compare aren't guaranteed
1786   // live across the block boundary (we *could* check for this).
1787   // Regardless, the compare has been done in the predecessor block,
1788   // and it left a value for us in a virtual register.  Ergo, we test
1789   // the one-bit value left in the virtual register.
1790   emitICmp_ri(MVT::i32, CondReg, CondRegIsKill, 0);
1791
1792   if (FuncInfo.MBB->isLayoutSuccessor(TBB)) {
1793     std::swap(TBB, FBB);
1794     CC = AArch64CC::EQ;
1795   }
1796
1797   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::Bcc))
1798       .addImm(CC)
1799       .addMBB(TBB);
1800
1801   // Obtain the branch weight and add the TrueBB to the successor list.
1802   uint32_t BranchWeight = 0;
1803   if (FuncInfo.BPI)
1804     BranchWeight = FuncInfo.BPI->getEdgeWeight(BI->getParent(),
1805                                                TBB->getBasicBlock());
1806   FuncInfo.MBB->addSuccessor(TBB, BranchWeight);
1807
1808   fastEmitBranch(FBB, DbgLoc);
1809   return true;
1810 }
1811
1812 bool AArch64FastISel::SelectIndirectBr(const Instruction *I) {
1813   const IndirectBrInst *BI = cast<IndirectBrInst>(I);
1814   unsigned AddrReg = getRegForValue(BI->getOperand(0));
1815   if (AddrReg == 0)
1816     return false;
1817
1818   // Emit the indirect branch.
1819   const MCInstrDesc &II = TII.get(AArch64::BR);
1820   AddrReg = constrainOperandRegClass(II, AddrReg,  II.getNumDefs());
1821   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II).addReg(AddrReg);
1822
1823   // Make sure the CFG is up-to-date.
1824   for (unsigned i = 0, e = BI->getNumSuccessors(); i != e; ++i)
1825     FuncInfo.MBB->addSuccessor(FuncInfo.MBBMap[BI->getSuccessor(i)]);
1826
1827   return true;
1828 }
1829
1830 bool AArch64FastISel::SelectCmp(const Instruction *I) {
1831   const CmpInst *CI = cast<CmpInst>(I);
1832
1833   // Try to optimize or fold the cmp.
1834   CmpInst::Predicate Predicate = optimizeCmpPredicate(CI);
1835   unsigned ResultReg = 0;
1836   switch (Predicate) {
1837   default:
1838     break;
1839   case CmpInst::FCMP_FALSE:
1840     ResultReg = createResultReg(&AArch64::GPR32RegClass);
1841     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
1842             TII.get(TargetOpcode::COPY), ResultReg)
1843         .addReg(AArch64::WZR, getKillRegState(true));
1844     break;
1845   case CmpInst::FCMP_TRUE:
1846     ResultReg = fastEmit_i(MVT::i32, MVT::i32, ISD::Constant, 1);
1847     break;
1848   }
1849
1850   if (ResultReg) {
1851     updateValueMap(I, ResultReg);
1852     return true;
1853   }
1854
1855   // Emit the cmp.
1856   if (!emitCmp(CI->getOperand(0), CI->getOperand(1), CI->isUnsigned()))
1857     return false;
1858
1859   ResultReg = createResultReg(&AArch64::GPR32RegClass);
1860
1861   // FCMP_UEQ and FCMP_ONE cannot be checked with a single instruction. These
1862   // condition codes are inverted, because they are used by CSINC.
1863   static unsigned CondCodeTable[2][2] = {
1864     { AArch64CC::NE, AArch64CC::VC },
1865     { AArch64CC::PL, AArch64CC::LE }
1866   };
1867   unsigned *CondCodes = nullptr;
1868   switch (Predicate) {
1869   default:
1870     break;
1871   case CmpInst::FCMP_UEQ:
1872     CondCodes = &CondCodeTable[0][0];
1873     break;
1874   case CmpInst::FCMP_ONE:
1875     CondCodes = &CondCodeTable[1][0];
1876     break;
1877   }
1878
1879   if (CondCodes) {
1880     unsigned TmpReg1 = createResultReg(&AArch64::GPR32RegClass);
1881     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::CSINCWr),
1882             TmpReg1)
1883         .addReg(AArch64::WZR, getKillRegState(true))
1884         .addReg(AArch64::WZR, getKillRegState(true))
1885         .addImm(CondCodes[0]);
1886     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::CSINCWr),
1887             ResultReg)
1888         .addReg(TmpReg1, getKillRegState(true))
1889         .addReg(AArch64::WZR, getKillRegState(true))
1890         .addImm(CondCodes[1]);
1891
1892     updateValueMap(I, ResultReg);
1893     return true;
1894   }
1895
1896   // Now set a register based on the comparison.
1897   AArch64CC::CondCode CC = getCompareCC(Predicate);
1898   assert((CC != AArch64CC::AL) && "Unexpected condition code.");
1899   AArch64CC::CondCode invertedCC = getInvertedCondCode(CC);
1900   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::CSINCWr),
1901           ResultReg)
1902       .addReg(AArch64::WZR, getKillRegState(true))
1903       .addReg(AArch64::WZR, getKillRegState(true))
1904       .addImm(invertedCC);
1905
1906   updateValueMap(I, ResultReg);
1907   return true;
1908 }
1909
1910 bool AArch64FastISel::SelectSelect(const Instruction *I) {
1911   const SelectInst *SI = cast<SelectInst>(I);
1912
1913   EVT DestEVT = TLI.getValueType(SI->getType(), true);
1914   if (!DestEVT.isSimple())
1915     return false;
1916
1917   MVT DestVT = DestEVT.getSimpleVT();
1918   if (DestVT != MVT::i32 && DestVT != MVT::i64 && DestVT != MVT::f32 &&
1919       DestVT != MVT::f64)
1920     return false;
1921
1922   unsigned SelectOpc;
1923   const TargetRegisterClass *RC = nullptr;
1924   switch (DestVT.SimpleTy) {
1925   default: return false;
1926   case MVT::i32:
1927     SelectOpc = AArch64::CSELWr;    RC = &AArch64::GPR32RegClass; break;
1928   case MVT::i64:
1929     SelectOpc = AArch64::CSELXr;    RC = &AArch64::GPR64RegClass; break;
1930   case MVT::f32:
1931     SelectOpc = AArch64::FCSELSrrr; RC = &AArch64::FPR32RegClass; break;
1932   case MVT::f64:
1933     SelectOpc = AArch64::FCSELDrrr; RC = &AArch64::FPR64RegClass; break;
1934   }
1935
1936   const Value *Cond = SI->getCondition();
1937   bool NeedTest = true;
1938   AArch64CC::CondCode CC = AArch64CC::NE;
1939   if (foldXALUIntrinsic(CC, I, Cond))
1940     NeedTest = false;
1941
1942   unsigned CondReg = getRegForValue(Cond);
1943   if (!CondReg)
1944     return false;
1945   bool CondIsKill = hasTrivialKill(Cond);
1946
1947   if (NeedTest) {
1948     unsigned ANDReg = emitAnd_ri(MVT::i32, CondReg, CondIsKill, 1);
1949     assert(ANDReg && "Unexpected AND instruction emission failure.");
1950     emitICmp_ri(MVT::i32, ANDReg, /*IsKill=*/true, 0);
1951   }
1952
1953   unsigned TrueReg = getRegForValue(SI->getTrueValue());
1954   bool TrueIsKill = hasTrivialKill(SI->getTrueValue());
1955
1956   unsigned FalseReg = getRegForValue(SI->getFalseValue());
1957   bool FalseIsKill = hasTrivialKill(SI->getFalseValue());
1958
1959   if (!TrueReg || !FalseReg)
1960     return false;
1961
1962   unsigned ResultReg = fastEmitInst_rri(SelectOpc, RC, TrueReg, TrueIsKill,
1963                                         FalseReg, FalseIsKill, CC);
1964   updateValueMap(I, ResultReg);
1965   return true;
1966 }
1967
1968 bool AArch64FastISel::SelectFPExt(const Instruction *I) {
1969   Value *V = I->getOperand(0);
1970   if (!I->getType()->isDoubleTy() || !V->getType()->isFloatTy())
1971     return false;
1972
1973   unsigned Op = getRegForValue(V);
1974   if (Op == 0)
1975     return false;
1976
1977   unsigned ResultReg = createResultReg(&AArch64::FPR64RegClass);
1978   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::FCVTDSr),
1979           ResultReg).addReg(Op);
1980   updateValueMap(I, ResultReg);
1981   return true;
1982 }
1983
1984 bool AArch64FastISel::SelectFPTrunc(const Instruction *I) {
1985   Value *V = I->getOperand(0);
1986   if (!I->getType()->isFloatTy() || !V->getType()->isDoubleTy())
1987     return false;
1988
1989   unsigned Op = getRegForValue(V);
1990   if (Op == 0)
1991     return false;
1992
1993   unsigned ResultReg = createResultReg(&AArch64::FPR32RegClass);
1994   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::FCVTSDr),
1995           ResultReg).addReg(Op);
1996   updateValueMap(I, ResultReg);
1997   return true;
1998 }
1999
2000 // FPToUI and FPToSI
2001 bool AArch64FastISel::SelectFPToInt(const Instruction *I, bool Signed) {
2002   MVT DestVT;
2003   if (!isTypeLegal(I->getType(), DestVT) || DestVT.isVector())
2004     return false;
2005
2006   unsigned SrcReg = getRegForValue(I->getOperand(0));
2007   if (SrcReg == 0)
2008     return false;
2009
2010   EVT SrcVT = TLI.getValueType(I->getOperand(0)->getType(), true);
2011   if (SrcVT == MVT::f128)
2012     return false;
2013
2014   unsigned Opc;
2015   if (SrcVT == MVT::f64) {
2016     if (Signed)
2017       Opc = (DestVT == MVT::i32) ? AArch64::FCVTZSUWDr : AArch64::FCVTZSUXDr;
2018     else
2019       Opc = (DestVT == MVT::i32) ? AArch64::FCVTZUUWDr : AArch64::FCVTZUUXDr;
2020   } else {
2021     if (Signed)
2022       Opc = (DestVT == MVT::i32) ? AArch64::FCVTZSUWSr : AArch64::FCVTZSUXSr;
2023     else
2024       Opc = (DestVT == MVT::i32) ? AArch64::FCVTZUUWSr : AArch64::FCVTZUUXSr;
2025   }
2026   unsigned ResultReg = createResultReg(
2027       DestVT == MVT::i32 ? &AArch64::GPR32RegClass : &AArch64::GPR64RegClass);
2028   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg)
2029       .addReg(SrcReg);
2030   updateValueMap(I, ResultReg);
2031   return true;
2032 }
2033
2034 bool AArch64FastISel::SelectIntToFP(const Instruction *I, bool Signed) {
2035   MVT DestVT;
2036   if (!isTypeLegal(I->getType(), DestVT) || DestVT.isVector())
2037     return false;
2038   assert ((DestVT == MVT::f32 || DestVT == MVT::f64) &&
2039           "Unexpected value type.");
2040
2041   unsigned SrcReg = getRegForValue(I->getOperand(0));
2042   if (!SrcReg)
2043     return false;
2044   bool SrcIsKill = hasTrivialKill(I->getOperand(0));
2045
2046   EVT SrcVT = TLI.getValueType(I->getOperand(0)->getType(), true);
2047
2048   // Handle sign-extension.
2049   if (SrcVT == MVT::i16 || SrcVT == MVT::i8 || SrcVT == MVT::i1) {
2050     SrcReg =
2051         EmitIntExt(SrcVT.getSimpleVT(), SrcReg, MVT::i32, /*isZExt*/ !Signed);
2052     if (!SrcReg)
2053       return false;
2054     SrcIsKill = true;
2055   }
2056
2057   unsigned Opc;
2058   if (SrcVT == MVT::i64) {
2059     if (Signed)
2060       Opc = (DestVT == MVT::f32) ? AArch64::SCVTFUXSri : AArch64::SCVTFUXDri;
2061     else
2062       Opc = (DestVT == MVT::f32) ? AArch64::UCVTFUXSri : AArch64::UCVTFUXDri;
2063   } else {
2064     if (Signed)
2065       Opc = (DestVT == MVT::f32) ? AArch64::SCVTFUWSri : AArch64::SCVTFUWDri;
2066     else
2067       Opc = (DestVT == MVT::f32) ? AArch64::UCVTFUWSri : AArch64::UCVTFUWDri;
2068   }
2069
2070   unsigned ResultReg = fastEmitInst_r(Opc, TLI.getRegClassFor(DestVT), SrcReg,
2071                                       SrcIsKill);
2072   updateValueMap(I, ResultReg);
2073   return true;
2074 }
2075
2076 bool AArch64FastISel::fastLowerArguments() {
2077   if (!FuncInfo.CanLowerReturn)
2078     return false;
2079
2080   const Function *F = FuncInfo.Fn;
2081   if (F->isVarArg())
2082     return false;
2083
2084   CallingConv::ID CC = F->getCallingConv();
2085   if (CC != CallingConv::C)
2086     return false;
2087
2088   // Only handle simple cases like i1/i8/i16/i32/i64/f32/f64 of up to 8 GPR and
2089   // FPR each.
2090   unsigned GPRCnt = 0;
2091   unsigned FPRCnt = 0;
2092   unsigned Idx = 0;
2093   for (auto const &Arg : F->args()) {
2094     // The first argument is at index 1.
2095     ++Idx;
2096     if (F->getAttributes().hasAttribute(Idx, Attribute::ByVal) ||
2097         F->getAttributes().hasAttribute(Idx, Attribute::InReg) ||
2098         F->getAttributes().hasAttribute(Idx, Attribute::StructRet) ||
2099         F->getAttributes().hasAttribute(Idx, Attribute::Nest))
2100       return false;
2101
2102     Type *ArgTy = Arg.getType();
2103     if (ArgTy->isStructTy() || ArgTy->isArrayTy() || ArgTy->isVectorTy())
2104       return false;
2105
2106     EVT ArgVT = TLI.getValueType(ArgTy);
2107     if (!ArgVT.isSimple()) return false;
2108     switch (ArgVT.getSimpleVT().SimpleTy) {
2109     default: return false;
2110     case MVT::i1:
2111     case MVT::i8:
2112     case MVT::i16:
2113     case MVT::i32:
2114     case MVT::i64:
2115       ++GPRCnt;
2116       break;
2117     case MVT::f16:
2118     case MVT::f32:
2119     case MVT::f64:
2120       ++FPRCnt;
2121       break;
2122     }
2123
2124     if (GPRCnt > 8 || FPRCnt > 8)
2125       return false;
2126   }
2127
2128   static const MCPhysReg Registers[5][8] = {
2129     { AArch64::W0, AArch64::W1, AArch64::W2, AArch64::W3, AArch64::W4,
2130       AArch64::W5, AArch64::W6, AArch64::W7 },
2131     { AArch64::X0, AArch64::X1, AArch64::X2, AArch64::X3, AArch64::X4,
2132       AArch64::X5, AArch64::X6, AArch64::X7 },
2133     { AArch64::H0, AArch64::H1, AArch64::H2, AArch64::H3, AArch64::H4,
2134       AArch64::H5, AArch64::H6, AArch64::H7 },
2135     { AArch64::S0, AArch64::S1, AArch64::S2, AArch64::S3, AArch64::S4,
2136       AArch64::S5, AArch64::S6, AArch64::S7 },
2137     { AArch64::D0, AArch64::D1, AArch64::D2, AArch64::D3, AArch64::D4,
2138       AArch64::D5, AArch64::D6, AArch64::D7 }
2139   };
2140
2141   unsigned GPRIdx = 0;
2142   unsigned FPRIdx = 0;
2143   for (auto const &Arg : F->args()) {
2144     MVT VT = TLI.getSimpleValueType(Arg.getType());
2145     unsigned SrcReg;
2146     const TargetRegisterClass *RC = nullptr;
2147     switch (VT.SimpleTy) {
2148     default: llvm_unreachable("Unexpected value type.");
2149     case MVT::i1:
2150     case MVT::i8:
2151     case MVT::i16: VT = MVT::i32; // fall-through
2152     case MVT::i32:
2153       SrcReg = Registers[0][GPRIdx++]; RC = &AArch64::GPR32RegClass; break;
2154     case MVT::i64:
2155       SrcReg = Registers[1][GPRIdx++]; RC = &AArch64::GPR64RegClass; break;
2156     case MVT::f16:
2157       SrcReg = Registers[2][FPRIdx++]; RC = &AArch64::FPR16RegClass; break;
2158     case MVT::f32:
2159       SrcReg = Registers[3][FPRIdx++]; RC = &AArch64::FPR32RegClass; break;
2160     case MVT::f64:
2161       SrcReg = Registers[4][FPRIdx++]; RC = &AArch64::FPR64RegClass; break;
2162     }
2163
2164     // Skip unused arguments.
2165     if (Arg.use_empty()) {
2166       updateValueMap(&Arg, 0);
2167       continue;
2168     }
2169
2170     unsigned DstReg = FuncInfo.MF->addLiveIn(SrcReg, RC);
2171     // FIXME: Unfortunately it's necessary to emit a copy from the livein copy.
2172     // Without this, EmitLiveInCopies may eliminate the livein if its only
2173     // use is a bitcast (which isn't turned into an instruction).
2174     unsigned ResultReg = createResultReg(RC);
2175     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2176             TII.get(TargetOpcode::COPY), ResultReg)
2177         .addReg(DstReg, getKillRegState(true));
2178     updateValueMap(&Arg, ResultReg);
2179   }
2180   return true;
2181 }
2182
2183 bool AArch64FastISel::ProcessCallArgs(CallLoweringInfo &CLI,
2184                                       SmallVectorImpl<MVT> &OutVTs,
2185                                       unsigned &NumBytes) {
2186   CallingConv::ID CC = CLI.CallConv;
2187   SmallVector<CCValAssign, 16> ArgLocs;
2188   CCState CCInfo(CC, false, *FuncInfo.MF, ArgLocs, *Context);
2189   CCInfo.AnalyzeCallOperands(OutVTs, CLI.OutFlags, CCAssignFnForCall(CC));
2190
2191   // Get a count of how many bytes are to be pushed on the stack.
2192   NumBytes = CCInfo.getNextStackOffset();
2193
2194   // Issue CALLSEQ_START
2195   unsigned AdjStackDown = TII.getCallFrameSetupOpcode();
2196   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AdjStackDown))
2197     .addImm(NumBytes);
2198
2199   // Process the args.
2200   for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
2201     CCValAssign &VA = ArgLocs[i];
2202     const Value *ArgVal = CLI.OutVals[VA.getValNo()];
2203     MVT ArgVT = OutVTs[VA.getValNo()];
2204
2205     unsigned ArgReg = getRegForValue(ArgVal);
2206     if (!ArgReg)
2207       return false;
2208
2209     // Handle arg promotion: SExt, ZExt, AExt.
2210     switch (VA.getLocInfo()) {
2211     case CCValAssign::Full:
2212       break;
2213     case CCValAssign::SExt: {
2214       MVT DestVT = VA.getLocVT();
2215       MVT SrcVT = ArgVT;
2216       ArgReg = EmitIntExt(SrcVT, ArgReg, DestVT, /*isZExt=*/false);
2217       if (!ArgReg)
2218         return false;
2219       break;
2220     }
2221     case CCValAssign::AExt:
2222     // Intentional fall-through.
2223     case CCValAssign::ZExt: {
2224       MVT DestVT = VA.getLocVT();
2225       MVT SrcVT = ArgVT;
2226       ArgReg = EmitIntExt(SrcVT, ArgReg, DestVT, /*isZExt=*/true);
2227       if (!ArgReg)
2228         return false;
2229       break;
2230     }
2231     default:
2232       llvm_unreachable("Unknown arg promotion!");
2233     }
2234
2235     // Now copy/store arg to correct locations.
2236     if (VA.isRegLoc() && !VA.needsCustom()) {
2237       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2238               TII.get(TargetOpcode::COPY), VA.getLocReg()).addReg(ArgReg);
2239       CLI.OutRegs.push_back(VA.getLocReg());
2240     } else if (VA.needsCustom()) {
2241       // FIXME: Handle custom args.
2242       return false;
2243     } else {
2244       assert(VA.isMemLoc() && "Assuming store on stack.");
2245
2246       // Don't emit stores for undef values.
2247       if (isa<UndefValue>(ArgVal))
2248         continue;
2249
2250       // Need to store on the stack.
2251       unsigned ArgSize = (ArgVT.getSizeInBits() + 7) / 8;
2252
2253       unsigned BEAlign = 0;
2254       if (ArgSize < 8 && !Subtarget->isLittleEndian())
2255         BEAlign = 8 - ArgSize;
2256
2257       Address Addr;
2258       Addr.setKind(Address::RegBase);
2259       Addr.setReg(AArch64::SP);
2260       Addr.setOffset(VA.getLocMemOffset() + BEAlign);
2261
2262       unsigned Alignment = DL.getABITypeAlignment(ArgVal->getType());
2263       MachineMemOperand *MMO = FuncInfo.MF->getMachineMemOperand(
2264         MachinePointerInfo::getStack(Addr.getOffset()),
2265         MachineMemOperand::MOStore, ArgVT.getStoreSize(), Alignment);
2266
2267       if (!EmitStore(ArgVT, ArgReg, Addr, MMO))
2268         return false;
2269     }
2270   }
2271   return true;
2272 }
2273
2274 bool AArch64FastISel::FinishCall(CallLoweringInfo &CLI, MVT RetVT,
2275                                  unsigned NumBytes) {
2276   CallingConv::ID CC = CLI.CallConv;
2277
2278   // Issue CALLSEQ_END
2279   unsigned AdjStackUp = TII.getCallFrameDestroyOpcode();
2280   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AdjStackUp))
2281     .addImm(NumBytes).addImm(0);
2282
2283   // Now the return value.
2284   if (RetVT != MVT::isVoid) {
2285     SmallVector<CCValAssign, 16> RVLocs;
2286     CCState CCInfo(CC, false, *FuncInfo.MF, RVLocs, *Context);
2287     CCInfo.AnalyzeCallResult(RetVT, CCAssignFnForCall(CC));
2288
2289     // Only handle a single return value.
2290     if (RVLocs.size() != 1)
2291       return false;
2292
2293     // Copy all of the result registers out of their specified physreg.
2294     MVT CopyVT = RVLocs[0].getValVT();
2295     unsigned ResultReg = createResultReg(TLI.getRegClassFor(CopyVT));
2296     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2297             TII.get(TargetOpcode::COPY), ResultReg)
2298         .addReg(RVLocs[0].getLocReg());
2299     CLI.InRegs.push_back(RVLocs[0].getLocReg());
2300
2301     CLI.ResultReg = ResultReg;
2302     CLI.NumResultRegs = 1;
2303   }
2304
2305   return true;
2306 }
2307
2308 bool AArch64FastISel::fastLowerCall(CallLoweringInfo &CLI) {
2309   CallingConv::ID CC  = CLI.CallConv;
2310   bool IsTailCall     = CLI.IsTailCall;
2311   bool IsVarArg       = CLI.IsVarArg;
2312   const Value *Callee = CLI.Callee;
2313   const char *SymName = CLI.SymName;
2314
2315   // Allow SelectionDAG isel to handle tail calls.
2316   if (IsTailCall)
2317     return false;
2318
2319   CodeModel::Model CM = TM.getCodeModel();
2320   // Only support the small and large code model.
2321   if (CM != CodeModel::Small && CM != CodeModel::Large)
2322     return false;
2323
2324   // FIXME: Add large code model support for ELF.
2325   if (CM == CodeModel::Large && !Subtarget->isTargetMachO())
2326     return false;
2327
2328   // Let SDISel handle vararg functions.
2329   if (IsVarArg)
2330     return false;
2331
2332   // FIXME: Only handle *simple* calls for now.
2333   MVT RetVT;
2334   if (CLI.RetTy->isVoidTy())
2335     RetVT = MVT::isVoid;
2336   else if (!isTypeLegal(CLI.RetTy, RetVT))
2337     return false;
2338
2339   for (auto Flag : CLI.OutFlags)
2340     if (Flag.isInReg() || Flag.isSRet() || Flag.isNest() || Flag.isByVal())
2341       return false;
2342
2343   // Set up the argument vectors.
2344   SmallVector<MVT, 16> OutVTs;
2345   OutVTs.reserve(CLI.OutVals.size());
2346
2347   for (auto *Val : CLI.OutVals) {
2348     MVT VT;
2349     if (!isTypeLegal(Val->getType(), VT) &&
2350         !(VT == MVT::i1 || VT == MVT::i8 || VT == MVT::i16))
2351       return false;
2352
2353     // We don't handle vector parameters yet.
2354     if (VT.isVector() || VT.getSizeInBits() > 64)
2355       return false;
2356
2357     OutVTs.push_back(VT);
2358   }
2359
2360   Address Addr;
2361   if (!ComputeCallAddress(Callee, Addr))
2362     return false;
2363
2364   // Handle the arguments now that we've gotten them.
2365   unsigned NumBytes;
2366   if (!ProcessCallArgs(CLI, OutVTs, NumBytes))
2367     return false;
2368
2369   // Issue the call.
2370   MachineInstrBuilder MIB;
2371   if (CM == CodeModel::Small) {
2372     const MCInstrDesc &II = TII.get(Addr.getReg() ? AArch64::BLR : AArch64::BL);
2373     MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II);
2374     if (SymName)
2375       MIB.addExternalSymbol(SymName, 0);
2376     else if (Addr.getGlobalValue())
2377       MIB.addGlobalAddress(Addr.getGlobalValue(), 0, 0);
2378     else if (Addr.getReg()) {
2379       unsigned Reg = constrainOperandRegClass(II, Addr.getReg(), 0);
2380       MIB.addReg(Reg);
2381     } else
2382       return false;
2383   } else {
2384     unsigned CallReg = 0;
2385     if (SymName) {
2386       unsigned ADRPReg = createResultReg(&AArch64::GPR64commonRegClass);
2387       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADRP),
2388               ADRPReg)
2389         .addExternalSymbol(SymName, AArch64II::MO_GOT | AArch64II::MO_PAGE);
2390
2391       CallReg = createResultReg(&AArch64::GPR64RegClass);
2392       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::LDRXui),
2393               CallReg)
2394         .addReg(ADRPReg)
2395         .addExternalSymbol(SymName, AArch64II::MO_GOT | AArch64II::MO_PAGEOFF |
2396                            AArch64II::MO_NC);
2397     } else if (Addr.getGlobalValue()) {
2398       CallReg = AArch64MaterializeGV(Addr.getGlobalValue());
2399     } else if (Addr.getReg())
2400       CallReg = Addr.getReg();
2401
2402     if (!CallReg)
2403       return false;
2404
2405     const MCInstrDesc &II = TII.get(AArch64::BLR);
2406     CallReg = constrainOperandRegClass(II, CallReg, 0);
2407     MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II).addReg(CallReg);
2408   }
2409
2410   // Add implicit physical register uses to the call.
2411   for (auto Reg : CLI.OutRegs)
2412     MIB.addReg(Reg, RegState::Implicit);
2413
2414   // Add a register mask with the call-preserved registers.
2415   // Proper defs for return values will be added by setPhysRegsDeadExcept().
2416   MIB.addRegMask(TRI.getCallPreservedMask(CC));
2417
2418   CLI.Call = MIB;
2419
2420   // Finish off the call including any return values.
2421   return FinishCall(CLI, RetVT, NumBytes);
2422 }
2423
2424 bool AArch64FastISel::IsMemCpySmall(uint64_t Len, unsigned Alignment) {
2425   if (Alignment)
2426     return Len / Alignment <= 4;
2427   else
2428     return Len < 32;
2429 }
2430
2431 bool AArch64FastISel::TryEmitSmallMemCpy(Address Dest, Address Src,
2432                                          uint64_t Len, unsigned Alignment) {
2433   // Make sure we don't bloat code by inlining very large memcpy's.
2434   if (!IsMemCpySmall(Len, Alignment))
2435     return false;
2436
2437   int64_t UnscaledOffset = 0;
2438   Address OrigDest = Dest;
2439   Address OrigSrc = Src;
2440
2441   while (Len) {
2442     MVT VT;
2443     if (!Alignment || Alignment >= 8) {
2444       if (Len >= 8)
2445         VT = MVT::i64;
2446       else if (Len >= 4)
2447         VT = MVT::i32;
2448       else if (Len >= 2)
2449         VT = MVT::i16;
2450       else {
2451         VT = MVT::i8;
2452       }
2453     } else {
2454       // Bound based on alignment.
2455       if (Len >= 4 && Alignment == 4)
2456         VT = MVT::i32;
2457       else if (Len >= 2 && Alignment == 2)
2458         VT = MVT::i16;
2459       else {
2460         VT = MVT::i8;
2461       }
2462     }
2463
2464     bool RV;
2465     unsigned ResultReg;
2466     RV = EmitLoad(VT, ResultReg, Src);
2467     if (!RV)
2468       return false;
2469
2470     RV = EmitStore(VT, ResultReg, Dest);
2471     if (!RV)
2472       return false;
2473
2474     int64_t Size = VT.getSizeInBits() / 8;
2475     Len -= Size;
2476     UnscaledOffset += Size;
2477
2478     // We need to recompute the unscaled offset for each iteration.
2479     Dest.setOffset(OrigDest.getOffset() + UnscaledOffset);
2480     Src.setOffset(OrigSrc.getOffset() + UnscaledOffset);
2481   }
2482
2483   return true;
2484 }
2485
2486 /// \brief Check if it is possible to fold the condition from the XALU intrinsic
2487 /// into the user. The condition code will only be updated on success.
2488 bool AArch64FastISel::foldXALUIntrinsic(AArch64CC::CondCode &CC,
2489                                         const Instruction *I,
2490                                         const Value *Cond) {
2491   if (!isa<ExtractValueInst>(Cond))
2492     return false;
2493
2494   const auto *EV = cast<ExtractValueInst>(Cond);
2495   if (!isa<IntrinsicInst>(EV->getAggregateOperand()))
2496     return false;
2497
2498   const auto *II = cast<IntrinsicInst>(EV->getAggregateOperand());
2499   MVT RetVT;
2500   const Function *Callee = II->getCalledFunction();
2501   Type *RetTy =
2502   cast<StructType>(Callee->getReturnType())->getTypeAtIndex(0U);
2503   if (!isTypeLegal(RetTy, RetVT))
2504     return false;
2505
2506   if (RetVT != MVT::i32 && RetVT != MVT::i64)
2507     return false;
2508
2509   AArch64CC::CondCode TmpCC;
2510   switch (II->getIntrinsicID()) {
2511     default: return false;
2512     case Intrinsic::sadd_with_overflow:
2513     case Intrinsic::ssub_with_overflow: TmpCC = AArch64CC::VS; break;
2514     case Intrinsic::uadd_with_overflow: TmpCC = AArch64CC::HS; break;
2515     case Intrinsic::usub_with_overflow: TmpCC = AArch64CC::LO; break;
2516     case Intrinsic::smul_with_overflow:
2517     case Intrinsic::umul_with_overflow: TmpCC = AArch64CC::NE; break;
2518   }
2519
2520   // Check if both instructions are in the same basic block.
2521   if (II->getParent() != I->getParent())
2522     return false;
2523
2524   // Make sure nothing is in the way
2525   BasicBlock::const_iterator Start = I;
2526   BasicBlock::const_iterator End = II;
2527   for (auto Itr = std::prev(Start); Itr != End; --Itr) {
2528     // We only expect extractvalue instructions between the intrinsic and the
2529     // instruction to be selected.
2530     if (!isa<ExtractValueInst>(Itr))
2531       return false;
2532
2533     // Check that the extractvalue operand comes from the intrinsic.
2534     const auto *EVI = cast<ExtractValueInst>(Itr);
2535     if (EVI->getAggregateOperand() != II)
2536       return false;
2537   }
2538
2539   CC = TmpCC;
2540   return true;
2541 }
2542
2543 bool AArch64FastISel::fastLowerIntrinsicCall(const IntrinsicInst *II) {
2544   // FIXME: Handle more intrinsics.
2545   switch (II->getIntrinsicID()) {
2546   default: return false;
2547   case Intrinsic::frameaddress: {
2548     MachineFrameInfo *MFI = FuncInfo.MF->getFrameInfo();
2549     MFI->setFrameAddressIsTaken(true);
2550
2551     const AArch64RegisterInfo *RegInfo =
2552         static_cast<const AArch64RegisterInfo *>(
2553             TM.getSubtargetImpl()->getRegisterInfo());
2554     unsigned FramePtr = RegInfo->getFrameRegister(*(FuncInfo.MF));
2555     unsigned SrcReg = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
2556     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2557             TII.get(TargetOpcode::COPY), SrcReg).addReg(FramePtr);
2558     // Recursively load frame address
2559     // ldr x0, [fp]
2560     // ldr x0, [x0]
2561     // ldr x0, [x0]
2562     // ...
2563     unsigned DestReg;
2564     unsigned Depth = cast<ConstantInt>(II->getOperand(0))->getZExtValue();
2565     while (Depth--) {
2566       DestReg = fastEmitInst_ri(AArch64::LDRXui, &AArch64::GPR64RegClass,
2567                                 SrcReg, /*IsKill=*/true, 0);
2568       assert(DestReg && "Unexpected LDR instruction emission failure.");
2569       SrcReg = DestReg;
2570     }
2571
2572     updateValueMap(II, SrcReg);
2573     return true;
2574   }
2575   case Intrinsic::memcpy:
2576   case Intrinsic::memmove: {
2577     const auto *MTI = cast<MemTransferInst>(II);
2578     // Don't handle volatile.
2579     if (MTI->isVolatile())
2580       return false;
2581
2582     // Disable inlining for memmove before calls to ComputeAddress.  Otherwise,
2583     // we would emit dead code because we don't currently handle memmoves.
2584     bool IsMemCpy = (II->getIntrinsicID() == Intrinsic::memcpy);
2585     if (isa<ConstantInt>(MTI->getLength()) && IsMemCpy) {
2586       // Small memcpy's are common enough that we want to do them without a call
2587       // if possible.
2588       uint64_t Len = cast<ConstantInt>(MTI->getLength())->getZExtValue();
2589       unsigned Alignment = MTI->getAlignment();
2590       if (IsMemCpySmall(Len, Alignment)) {
2591         Address Dest, Src;
2592         if (!ComputeAddress(MTI->getRawDest(), Dest) ||
2593             !ComputeAddress(MTI->getRawSource(), Src))
2594           return false;
2595         if (TryEmitSmallMemCpy(Dest, Src, Len, Alignment))
2596           return true;
2597       }
2598     }
2599
2600     if (!MTI->getLength()->getType()->isIntegerTy(64))
2601       return false;
2602
2603     if (MTI->getSourceAddressSpace() > 255 || MTI->getDestAddressSpace() > 255)
2604       // Fast instruction selection doesn't support the special
2605       // address spaces.
2606       return false;
2607
2608     const char *IntrMemName = isa<MemCpyInst>(II) ? "memcpy" : "memmove";
2609     return lowerCallTo(II, IntrMemName, II->getNumArgOperands() - 2);
2610   }
2611   case Intrinsic::memset: {
2612     const MemSetInst *MSI = cast<MemSetInst>(II);
2613     // Don't handle volatile.
2614     if (MSI->isVolatile())
2615       return false;
2616
2617     if (!MSI->getLength()->getType()->isIntegerTy(64))
2618       return false;
2619
2620     if (MSI->getDestAddressSpace() > 255)
2621       // Fast instruction selection doesn't support the special
2622       // address spaces.
2623       return false;
2624
2625     return lowerCallTo(II, "memset", II->getNumArgOperands() - 2);
2626   }
2627   case Intrinsic::trap: {
2628     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::BRK))
2629         .addImm(1);
2630     return true;
2631   }
2632   case Intrinsic::sqrt: {
2633     Type *RetTy = II->getCalledFunction()->getReturnType();
2634
2635     MVT VT;
2636     if (!isTypeLegal(RetTy, VT))
2637       return false;
2638
2639     unsigned Op0Reg = getRegForValue(II->getOperand(0));
2640     if (!Op0Reg)
2641       return false;
2642     bool Op0IsKill = hasTrivialKill(II->getOperand(0));
2643
2644     unsigned ResultReg = fastEmit_r(VT, VT, ISD::FSQRT, Op0Reg, Op0IsKill);
2645     if (!ResultReg)
2646       return false;
2647
2648     updateValueMap(II, ResultReg);
2649     return true;
2650   }
2651   case Intrinsic::sadd_with_overflow:
2652   case Intrinsic::uadd_with_overflow:
2653   case Intrinsic::ssub_with_overflow:
2654   case Intrinsic::usub_with_overflow:
2655   case Intrinsic::smul_with_overflow:
2656   case Intrinsic::umul_with_overflow: {
2657     // This implements the basic lowering of the xalu with overflow intrinsics.
2658     const Function *Callee = II->getCalledFunction();
2659     auto *Ty = cast<StructType>(Callee->getReturnType());
2660     Type *RetTy = Ty->getTypeAtIndex(0U);
2661
2662     MVT VT;
2663     if (!isTypeLegal(RetTy, VT))
2664       return false;
2665
2666     if (VT != MVT::i32 && VT != MVT::i64)
2667       return false;
2668
2669     const Value *LHS = II->getArgOperand(0);
2670     const Value *RHS = II->getArgOperand(1);
2671     // Canonicalize immediate to the RHS.
2672     if (isa<ConstantInt>(LHS) && !isa<ConstantInt>(RHS) &&
2673         isCommutativeIntrinsic(II))
2674       std::swap(LHS, RHS);
2675
2676     unsigned ResultReg1 = 0, ResultReg2 = 0, MulReg = 0;
2677     AArch64CC::CondCode CC = AArch64CC::Invalid;
2678     switch (II->getIntrinsicID()) {
2679     default: llvm_unreachable("Unexpected intrinsic!");
2680     case Intrinsic::sadd_with_overflow:
2681       ResultReg1 = emitAdd(VT, LHS, RHS, /*SetFlags=*/true);
2682       CC = AArch64CC::VS;
2683       break;
2684     case Intrinsic::uadd_with_overflow:
2685       ResultReg1 = emitAdd(VT, LHS, RHS, /*SetFlags=*/true);
2686       CC = AArch64CC::HS;
2687       break;
2688     case Intrinsic::ssub_with_overflow:
2689       ResultReg1 = emitSub(VT, LHS, RHS, /*SetFlags=*/true);
2690       CC = AArch64CC::VS;
2691       break;
2692     case Intrinsic::usub_with_overflow:
2693       ResultReg1 = emitSub(VT, LHS, RHS, /*SetFlags=*/true);
2694       CC = AArch64CC::LO;
2695       break;
2696     case Intrinsic::smul_with_overflow: {
2697       CC = AArch64CC::NE;
2698       unsigned LHSReg = getRegForValue(LHS);
2699       if (!LHSReg)
2700         return false;
2701       bool LHSIsKill = hasTrivialKill(LHS);
2702
2703       unsigned RHSReg = getRegForValue(RHS);
2704       if (!RHSReg)
2705         return false;
2706       bool RHSIsKill = hasTrivialKill(RHS);
2707
2708       if (VT == MVT::i32) {
2709         MulReg = Emit_SMULL_rr(MVT::i64, LHSReg, LHSIsKill, RHSReg, RHSIsKill);
2710         unsigned ShiftReg = emitLSR_ri(MVT::i64, MVT::i64, MulReg,
2711                                        /*IsKill=*/false, 32);
2712         MulReg = fastEmitInst_extractsubreg(VT, MulReg, /*IsKill=*/true,
2713                                             AArch64::sub_32);
2714         ShiftReg = fastEmitInst_extractsubreg(VT, ShiftReg, /*IsKill=*/true,
2715                                               AArch64::sub_32);
2716         emitSubs_rs(VT, ShiftReg, /*IsKill=*/true, MulReg, /*IsKill=*/false,
2717                     AArch64_AM::ASR, 31, /*WantResult=*/false);
2718       } else {
2719         assert(VT == MVT::i64 && "Unexpected value type.");
2720         MulReg = Emit_MUL_rr(VT, LHSReg, LHSIsKill, RHSReg, RHSIsKill);
2721         unsigned SMULHReg = fastEmit_rr(VT, VT, ISD::MULHS, LHSReg, LHSIsKill,
2722                                         RHSReg, RHSIsKill);
2723         emitSubs_rs(VT, SMULHReg, /*IsKill=*/true, MulReg, /*IsKill=*/false,
2724                     AArch64_AM::ASR, 63, /*WantResult=*/false);
2725       }
2726       break;
2727     }
2728     case Intrinsic::umul_with_overflow: {
2729       CC = AArch64CC::NE;
2730       unsigned LHSReg = getRegForValue(LHS);
2731       if (!LHSReg)
2732         return false;
2733       bool LHSIsKill = hasTrivialKill(LHS);
2734
2735       unsigned RHSReg = getRegForValue(RHS);
2736       if (!RHSReg)
2737         return false;
2738       bool RHSIsKill = hasTrivialKill(RHS);
2739
2740       if (VT == MVT::i32) {
2741         MulReg = Emit_UMULL_rr(MVT::i64, LHSReg, LHSIsKill, RHSReg, RHSIsKill);
2742         emitSubs_rs(MVT::i64, AArch64::XZR, /*IsKill=*/true, MulReg,
2743                     /*IsKill=*/false, AArch64_AM::LSR, 32,
2744                     /*WantResult=*/false);
2745         MulReg = fastEmitInst_extractsubreg(VT, MulReg, /*IsKill=*/true,
2746                                             AArch64::sub_32);
2747       } else {
2748         assert(VT == MVT::i64 && "Unexpected value type.");
2749         MulReg = Emit_MUL_rr(VT, LHSReg, LHSIsKill, RHSReg, RHSIsKill);
2750         unsigned UMULHReg = fastEmit_rr(VT, VT, ISD::MULHU, LHSReg, LHSIsKill,
2751                                         RHSReg, RHSIsKill);
2752         emitSubs_rr(VT, AArch64::XZR, /*IsKill=*/true, UMULHReg,
2753                     /*IsKill=*/false, /*WantResult=*/false);
2754       }
2755       break;
2756     }
2757     }
2758
2759     if (MulReg) {
2760       ResultReg1 = createResultReg(TLI.getRegClassFor(VT));
2761       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2762               TII.get(TargetOpcode::COPY), ResultReg1).addReg(MulReg);
2763     }
2764
2765     ResultReg2 = fastEmitInst_rri(AArch64::CSINCWr, &AArch64::GPR32RegClass,
2766                                   AArch64::WZR, /*IsKill=*/true, AArch64::WZR,
2767                                   /*IsKill=*/true, getInvertedCondCode(CC));
2768     assert((ResultReg1 + 1) == ResultReg2 &&
2769            "Nonconsecutive result registers.");
2770     updateValueMap(II, ResultReg1, 2);
2771     return true;
2772   }
2773   }
2774   return false;
2775 }
2776
2777 bool AArch64FastISel::SelectRet(const Instruction *I) {
2778   const ReturnInst *Ret = cast<ReturnInst>(I);
2779   const Function &F = *I->getParent()->getParent();
2780
2781   if (!FuncInfo.CanLowerReturn)
2782     return false;
2783
2784   if (F.isVarArg())
2785     return false;
2786
2787   // Build a list of return value registers.
2788   SmallVector<unsigned, 4> RetRegs;
2789
2790   if (Ret->getNumOperands() > 0) {
2791     CallingConv::ID CC = F.getCallingConv();
2792     SmallVector<ISD::OutputArg, 4> Outs;
2793     GetReturnInfo(F.getReturnType(), F.getAttributes(), Outs, TLI);
2794
2795     // Analyze operands of the call, assigning locations to each operand.
2796     SmallVector<CCValAssign, 16> ValLocs;
2797     CCState CCInfo(CC, F.isVarArg(), *FuncInfo.MF, ValLocs, I->getContext());
2798     CCAssignFn *RetCC = CC == CallingConv::WebKit_JS ? RetCC_AArch64_WebKit_JS
2799                                                      : RetCC_AArch64_AAPCS;
2800     CCInfo.AnalyzeReturn(Outs, RetCC);
2801
2802     // Only handle a single return value for now.
2803     if (ValLocs.size() != 1)
2804       return false;
2805
2806     CCValAssign &VA = ValLocs[0];
2807     const Value *RV = Ret->getOperand(0);
2808
2809     // Don't bother handling odd stuff for now.
2810     if (VA.getLocInfo() != CCValAssign::Full)
2811       return false;
2812     // Only handle register returns for now.
2813     if (!VA.isRegLoc())
2814       return false;
2815     unsigned Reg = getRegForValue(RV);
2816     if (Reg == 0)
2817       return false;
2818
2819     unsigned SrcReg = Reg + VA.getValNo();
2820     unsigned DestReg = VA.getLocReg();
2821     // Avoid a cross-class copy. This is very unlikely.
2822     if (!MRI.getRegClass(SrcReg)->contains(DestReg))
2823       return false;
2824
2825     EVT RVEVT = TLI.getValueType(RV->getType());
2826     if (!RVEVT.isSimple())
2827       return false;
2828
2829     // Vectors (of > 1 lane) in big endian need tricky handling.
2830     if (RVEVT.isVector() && RVEVT.getVectorNumElements() > 1)
2831       return false;
2832
2833     MVT RVVT = RVEVT.getSimpleVT();
2834     if (RVVT == MVT::f128)
2835       return false;
2836     MVT DestVT = VA.getValVT();
2837     // Special handling for extended integers.
2838     if (RVVT != DestVT) {
2839       if (RVVT != MVT::i1 && RVVT != MVT::i8 && RVVT != MVT::i16)
2840         return false;
2841
2842       if (!Outs[0].Flags.isZExt() && !Outs[0].Flags.isSExt())
2843         return false;
2844
2845       bool isZExt = Outs[0].Flags.isZExt();
2846       SrcReg = EmitIntExt(RVVT, SrcReg, DestVT, isZExt);
2847       if (SrcReg == 0)
2848         return false;
2849     }
2850
2851     // Make the copy.
2852     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2853             TII.get(TargetOpcode::COPY), DestReg).addReg(SrcReg);
2854
2855     // Add register to return instruction.
2856     RetRegs.push_back(VA.getLocReg());
2857   }
2858
2859   MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2860                                     TII.get(AArch64::RET_ReallyLR));
2861   for (unsigned i = 0, e = RetRegs.size(); i != e; ++i)
2862     MIB.addReg(RetRegs[i], RegState::Implicit);
2863   return true;
2864 }
2865
2866 bool AArch64FastISel::SelectTrunc(const Instruction *I) {
2867   Type *DestTy = I->getType();
2868   Value *Op = I->getOperand(0);
2869   Type *SrcTy = Op->getType();
2870
2871   EVT SrcEVT = TLI.getValueType(SrcTy, true);
2872   EVT DestEVT = TLI.getValueType(DestTy, true);
2873   if (!SrcEVT.isSimple())
2874     return false;
2875   if (!DestEVT.isSimple())
2876     return false;
2877
2878   MVT SrcVT = SrcEVT.getSimpleVT();
2879   MVT DestVT = DestEVT.getSimpleVT();
2880
2881   if (SrcVT != MVT::i64 && SrcVT != MVT::i32 && SrcVT != MVT::i16 &&
2882       SrcVT != MVT::i8)
2883     return false;
2884   if (DestVT != MVT::i32 && DestVT != MVT::i16 && DestVT != MVT::i8 &&
2885       DestVT != MVT::i1)
2886     return false;
2887
2888   unsigned SrcReg = getRegForValue(Op);
2889   if (!SrcReg)
2890     return false;
2891   bool SrcIsKill = hasTrivialKill(Op);
2892
2893   // If we're truncating from i64 to a smaller non-legal type then generate an
2894   // AND. Otherwise, we know the high bits are undefined and a truncate only
2895   // generate a COPY. We cannot mark the source register also as result
2896   // register, because this can incorrectly transfer the kill flag onto the
2897   // source register.
2898   unsigned ResultReg;
2899   if (SrcVT == MVT::i64) {
2900     uint64_t Mask = 0;
2901     switch (DestVT.SimpleTy) {
2902     default:
2903       // Trunc i64 to i32 is handled by the target-independent fast-isel.
2904       return false;
2905     case MVT::i1:
2906       Mask = 0x1;
2907       break;
2908     case MVT::i8:
2909       Mask = 0xff;
2910       break;
2911     case MVT::i16:
2912       Mask = 0xffff;
2913       break;
2914     }
2915     // Issue an extract_subreg to get the lower 32-bits.
2916     unsigned Reg32 = fastEmitInst_extractsubreg(MVT::i32, SrcReg, SrcIsKill,
2917                                                 AArch64::sub_32);
2918     // Create the AND instruction which performs the actual truncation.
2919     ResultReg = emitAnd_ri(MVT::i32, Reg32, /*IsKill=*/true, Mask);
2920     assert(ResultReg && "Unexpected AND instruction emission failure.");
2921   } else {
2922     ResultReg = createResultReg(&AArch64::GPR32RegClass);
2923     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2924             TII.get(TargetOpcode::COPY), ResultReg)
2925         .addReg(SrcReg, getKillRegState(SrcIsKill));
2926   }
2927
2928   updateValueMap(I, ResultReg);
2929   return true;
2930 }
2931
2932 unsigned AArch64FastISel::Emiti1Ext(unsigned SrcReg, MVT DestVT, bool isZExt) {
2933   assert((DestVT == MVT::i8 || DestVT == MVT::i16 || DestVT == MVT::i32 ||
2934           DestVT == MVT::i64) &&
2935          "Unexpected value type.");
2936   // Handle i8 and i16 as i32.
2937   if (DestVT == MVT::i8 || DestVT == MVT::i16)
2938     DestVT = MVT::i32;
2939
2940   if (isZExt) {
2941     unsigned ResultReg = emitAnd_ri(MVT::i32, SrcReg, /*TODO:IsKill=*/false, 1);
2942     assert(ResultReg && "Unexpected AND instruction emission failure.");
2943     if (DestVT == MVT::i64) {
2944       // We're ZExt i1 to i64.  The ANDWri Wd, Ws, #1 implicitly clears the
2945       // upper 32 bits.  Emit a SUBREG_TO_REG to extend from Wd to Xd.
2946       unsigned Reg64 = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
2947       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2948               TII.get(AArch64::SUBREG_TO_REG), Reg64)
2949           .addImm(0)
2950           .addReg(ResultReg)
2951           .addImm(AArch64::sub_32);
2952       ResultReg = Reg64;
2953     }
2954     return ResultReg;
2955   } else {
2956     if (DestVT == MVT::i64) {
2957       // FIXME: We're SExt i1 to i64.
2958       return 0;
2959     }
2960     return fastEmitInst_rii(AArch64::SBFMWri, &AArch64::GPR32RegClass, SrcReg,
2961                             /*TODO:IsKill=*/false, 0, 0);
2962   }
2963 }
2964
2965 unsigned AArch64FastISel::Emit_MUL_rr(MVT RetVT, unsigned Op0, bool Op0IsKill,
2966                                       unsigned Op1, bool Op1IsKill) {
2967   unsigned Opc, ZReg;
2968   switch (RetVT.SimpleTy) {
2969   default: return 0;
2970   case MVT::i8:
2971   case MVT::i16:
2972   case MVT::i32:
2973     RetVT = MVT::i32;
2974     Opc = AArch64::MADDWrrr; ZReg = AArch64::WZR; break;
2975   case MVT::i64:
2976     Opc = AArch64::MADDXrrr; ZReg = AArch64::XZR; break;
2977   }
2978
2979   const TargetRegisterClass *RC =
2980       (RetVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
2981   return fastEmitInst_rrr(Opc, RC, Op0, Op0IsKill, Op1, Op1IsKill,
2982                           /*IsKill=*/ZReg, true);
2983 }
2984
2985 unsigned AArch64FastISel::Emit_SMULL_rr(MVT RetVT, unsigned Op0, bool Op0IsKill,
2986                                         unsigned Op1, bool Op1IsKill) {
2987   if (RetVT != MVT::i64)
2988     return 0;
2989
2990   return fastEmitInst_rrr(AArch64::SMADDLrrr, &AArch64::GPR64RegClass,
2991                           Op0, Op0IsKill, Op1, Op1IsKill,
2992                           AArch64::XZR, /*IsKill=*/true);
2993 }
2994
2995 unsigned AArch64FastISel::Emit_UMULL_rr(MVT RetVT, unsigned Op0, bool Op0IsKill,
2996                                         unsigned Op1, bool Op1IsKill) {
2997   if (RetVT != MVT::i64)
2998     return 0;
2999
3000   return fastEmitInst_rrr(AArch64::UMADDLrrr, &AArch64::GPR64RegClass,
3001                           Op0, Op0IsKill, Op1, Op1IsKill,
3002                           AArch64::XZR, /*IsKill=*/true);
3003 }
3004
3005 unsigned AArch64FastISel::emitLSL_rr(MVT RetVT, unsigned Op0Reg, bool Op0IsKill,
3006                                      unsigned Op1Reg, bool Op1IsKill) {
3007   unsigned Opc = 0;
3008   bool NeedTrunc = false;
3009   uint64_t Mask = 0;
3010   switch (RetVT.SimpleTy) {
3011   default: return 0;
3012   case MVT::i8:  Opc = AArch64::LSLVWr; NeedTrunc = true; Mask = 0xff;   break;
3013   case MVT::i16: Opc = AArch64::LSLVWr; NeedTrunc = true; Mask = 0xffff; break;
3014   case MVT::i32: Opc = AArch64::LSLVWr;                                  break;
3015   case MVT::i64: Opc = AArch64::LSLVXr;                                  break;
3016   }
3017
3018   const TargetRegisterClass *RC =
3019       (RetVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
3020   if (NeedTrunc) {
3021     Op1Reg = emitAnd_ri(MVT::i32, Op1Reg, Op1IsKill, Mask);
3022     Op1IsKill = true;
3023   }
3024   unsigned ResultReg = fastEmitInst_rr(Opc, RC, Op0Reg, Op0IsKill, Op1Reg,
3025                                        Op1IsKill);
3026   if (NeedTrunc)
3027     ResultReg = emitAnd_ri(MVT::i32, ResultReg, /*IsKill=*/true, Mask);
3028   return ResultReg;
3029 }
3030
3031 unsigned AArch64FastISel::emitLSL_ri(MVT RetVT, MVT SrcVT, unsigned Op0,
3032                                      bool Op0IsKill, uint64_t Shift,
3033                                      bool IsZext) {
3034   assert(RetVT.SimpleTy >= SrcVT.SimpleTy &&
3035          "Unexpected source/return type pair.");
3036   assert((SrcVT == MVT::i8 || SrcVT == MVT::i16 || SrcVT == MVT::i32 ||
3037           SrcVT == MVT::i64) && "Unexpected source value type.");
3038   assert((RetVT == MVT::i8 || RetVT == MVT::i16 || RetVT == MVT::i32 ||
3039           RetVT == MVT::i64) && "Unexpected return value type.");
3040
3041   bool Is64Bit = (RetVT == MVT::i64);
3042   unsigned RegSize = Is64Bit ? 64 : 32;
3043   unsigned DstBits = RetVT.getSizeInBits();
3044   unsigned SrcBits = SrcVT.getSizeInBits();
3045
3046   // Don't deal with undefined shifts.
3047   if (Shift >= DstBits)
3048     return 0;
3049
3050   // For immediate shifts we can fold the zero-/sign-extension into the shift.
3051   // {S|U}BFM Wd, Wn, #r, #s
3052   // Wd<32+s-r,32-r> = Wn<s:0> when r > s
3053
3054   // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
3055   // %2 = shl i16 %1, 4
3056   // Wd<32+7-28,32-28> = Wn<7:0> <- clamp s to 7
3057   // 0b1111_1111_1111_1111__1111_1010_1010_0000 sext
3058   // 0b0000_0000_0000_0000__0000_0101_0101_0000 sext | zext
3059   // 0b0000_0000_0000_0000__0000_1010_1010_0000 zext
3060
3061   // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
3062   // %2 = shl i16 %1, 8
3063   // Wd<32+7-24,32-24> = Wn<7:0>
3064   // 0b1111_1111_1111_1111__1010_1010_0000_0000 sext
3065   // 0b0000_0000_0000_0000__0101_0101_0000_0000 sext | zext
3066   // 0b0000_0000_0000_0000__1010_1010_0000_0000 zext
3067
3068   // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
3069   // %2 = shl i16 %1, 12
3070   // Wd<32+3-20,32-20> = Wn<3:0>
3071   // 0b1111_1111_1111_1111__1010_0000_0000_0000 sext
3072   // 0b0000_0000_0000_0000__0101_0000_0000_0000 sext | zext
3073   // 0b0000_0000_0000_0000__1010_0000_0000_0000 zext
3074
3075   unsigned ImmR = RegSize - Shift;
3076   // Limit the width to the length of the source type.
3077   unsigned ImmS = std::min<unsigned>(SrcBits - 1, DstBits - 1 - Shift);
3078   static const unsigned OpcTable[2][2] = {
3079     {AArch64::SBFMWri, AArch64::SBFMXri},
3080     {AArch64::UBFMWri, AArch64::UBFMXri}
3081   };
3082   unsigned Opc = OpcTable[IsZext][Is64Bit];
3083   const TargetRegisterClass *RC =
3084       Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
3085   if (SrcVT.SimpleTy <= MVT::i32 && RetVT == MVT::i64) {
3086     unsigned TmpReg = MRI.createVirtualRegister(RC);
3087     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3088             TII.get(AArch64::SUBREG_TO_REG), TmpReg)
3089         .addImm(0)
3090         .addReg(Op0, getKillRegState(Op0IsKill))
3091         .addImm(AArch64::sub_32);
3092     Op0 = TmpReg;
3093     Op0IsKill = true;
3094   }
3095   return fastEmitInst_rii(Opc, RC, Op0, Op0IsKill, ImmR, ImmS);
3096 }
3097
3098 unsigned AArch64FastISel::emitLSR_rr(MVT RetVT, unsigned Op0Reg, bool Op0IsKill,
3099                                      unsigned Op1Reg, bool Op1IsKill) {
3100   unsigned Opc = 0;
3101   bool NeedTrunc = false;
3102   uint64_t Mask = 0;
3103   switch (RetVT.SimpleTy) {
3104   default: return 0;
3105   case MVT::i8:  Opc = AArch64::LSRVWr; NeedTrunc = true; Mask = 0xff;   break;
3106   case MVT::i16: Opc = AArch64::LSRVWr; NeedTrunc = true; Mask = 0xffff; break;
3107   case MVT::i32: Opc = AArch64::LSRVWr; break;
3108   case MVT::i64: Opc = AArch64::LSRVXr; break;
3109   }
3110
3111   const TargetRegisterClass *RC =
3112       (RetVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
3113   if (NeedTrunc) {
3114     Op0Reg = emitAnd_ri(MVT::i32, Op0Reg, Op0IsKill, Mask);
3115     Op1Reg = emitAnd_ri(MVT::i32, Op1Reg, Op1IsKill, Mask);
3116     Op0IsKill = Op1IsKill = true;
3117   }
3118   unsigned ResultReg = fastEmitInst_rr(Opc, RC, Op0Reg, Op0IsKill, Op1Reg,
3119                                        Op1IsKill);
3120   if (NeedTrunc)
3121     ResultReg = emitAnd_ri(MVT::i32, ResultReg, /*IsKill=*/true, Mask);
3122   return ResultReg;
3123 }
3124
3125 unsigned AArch64FastISel::emitLSR_ri(MVT RetVT, MVT SrcVT, unsigned Op0,
3126                                      bool Op0IsKill, uint64_t Shift,
3127                                      bool IsZExt) {
3128   assert(RetVT.SimpleTy >= SrcVT.SimpleTy &&
3129          "Unexpected source/return type pair.");
3130   assert((SrcVT == MVT::i8 || SrcVT == MVT::i16 || SrcVT == MVT::i32 ||
3131           SrcVT == MVT::i64) && "Unexpected source value type.");
3132   assert((RetVT == MVT::i8 || RetVT == MVT::i16 || RetVT == MVT::i32 ||
3133           RetVT == MVT::i64) && "Unexpected return value type.");
3134
3135   bool Is64Bit = (RetVT == MVT::i64);
3136   unsigned RegSize = Is64Bit ? 64 : 32;
3137   unsigned DstBits = RetVT.getSizeInBits();
3138   unsigned SrcBits = SrcVT.getSizeInBits();
3139
3140   // Don't deal with undefined shifts.
3141   if (Shift >= DstBits)
3142     return 0;
3143
3144   // For immediate shifts we can fold the zero-/sign-extension into the shift.
3145   // {S|U}BFM Wd, Wn, #r, #s
3146   // Wd<s-r:0> = Wn<s:r> when r <= s
3147
3148   // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
3149   // %2 = lshr i16 %1, 4
3150   // Wd<7-4:0> = Wn<7:4>
3151   // 0b0000_0000_0000_0000__0000_1111_1111_1010 sext
3152   // 0b0000_0000_0000_0000__0000_0000_0000_0101 sext | zext
3153   // 0b0000_0000_0000_0000__0000_0000_0000_1010 zext
3154
3155   // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
3156   // %2 = lshr i16 %1, 8
3157   // Wd<7-7,0> = Wn<7:7>
3158   // 0b0000_0000_0000_0000__0000_0000_1111_1111 sext
3159   // 0b0000_0000_0000_0000__0000_0000_0000_0000 sext
3160   // 0b0000_0000_0000_0000__0000_0000_0000_0000 zext
3161
3162   // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
3163   // %2 = lshr i16 %1, 12
3164   // Wd<7-7,0> = Wn<7:7> <- clamp r to 7
3165   // 0b0000_0000_0000_0000__0000_0000_0000_1111 sext
3166   // 0b0000_0000_0000_0000__0000_0000_0000_0000 sext
3167   // 0b0000_0000_0000_0000__0000_0000_0000_0000 zext
3168
3169   if (Shift >= SrcBits && IsZExt)
3170     return AArch64MaterializeInt(ConstantInt::get(*Context, APInt(RegSize, 0)),
3171                                  RetVT);
3172
3173   // It is not possible to fold a sign-extend into the LShr instruction. In this
3174   // case emit a sign-extend.
3175   if (!IsZExt) {
3176     Op0 = EmitIntExt(SrcVT, Op0, RetVT, IsZExt);
3177     if (!Op0)
3178       return 0;
3179     Op0IsKill = true;
3180     SrcVT = RetVT;
3181     SrcBits = SrcVT.getSizeInBits();
3182     IsZExt = true;
3183   }
3184
3185   unsigned ImmR = std::min<unsigned>(SrcBits - 1, Shift);
3186   unsigned ImmS = SrcBits - 1;
3187   static const unsigned OpcTable[2][2] = {
3188     {AArch64::SBFMWri, AArch64::SBFMXri},
3189     {AArch64::UBFMWri, AArch64::UBFMXri}
3190   };
3191   unsigned Opc = OpcTable[IsZExt][Is64Bit];
3192   const TargetRegisterClass *RC =
3193       Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
3194   if (SrcVT.SimpleTy <= MVT::i32 && RetVT == MVT::i64) {
3195     unsigned TmpReg = MRI.createVirtualRegister(RC);
3196     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3197             TII.get(AArch64::SUBREG_TO_REG), TmpReg)
3198         .addImm(0)
3199         .addReg(Op0, getKillRegState(Op0IsKill))
3200         .addImm(AArch64::sub_32);
3201     Op0 = TmpReg;
3202     Op0IsKill = true;
3203   }
3204   return fastEmitInst_rii(Opc, RC, Op0, Op0IsKill, ImmR, ImmS);
3205 }
3206
3207 unsigned AArch64FastISel::emitASR_rr(MVT RetVT, unsigned Op0Reg, bool Op0IsKill,
3208                                      unsigned Op1Reg, bool Op1IsKill) {
3209   unsigned Opc = 0;
3210   bool NeedTrunc = false;
3211   uint64_t Mask = 0;
3212   switch (RetVT.SimpleTy) {
3213   default: return 0;
3214   case MVT::i8:  Opc = AArch64::ASRVWr; NeedTrunc = true; Mask = 0xff;   break;
3215   case MVT::i16: Opc = AArch64::ASRVWr; NeedTrunc = true; Mask = 0xffff; break;
3216   case MVT::i32: Opc = AArch64::ASRVWr;                                  break;
3217   case MVT::i64: Opc = AArch64::ASRVXr;                                  break;
3218   }
3219
3220   const TargetRegisterClass *RC =
3221       (RetVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
3222   if (NeedTrunc) {
3223     Op0Reg = EmitIntExt(RetVT, Op0Reg, MVT::i32, /*IsZExt=*/false);
3224     Op1Reg = emitAnd_ri(MVT::i32, Op1Reg, Op1IsKill, Mask);
3225     Op0IsKill = Op1IsKill = true;
3226   }
3227   unsigned ResultReg = fastEmitInst_rr(Opc, RC, Op0Reg, Op0IsKill, Op1Reg,
3228                                        Op1IsKill);
3229   if (NeedTrunc)
3230     ResultReg = emitAnd_ri(MVT::i32, ResultReg, /*IsKill=*/true, Mask);
3231   return ResultReg;
3232 }
3233
3234 unsigned AArch64FastISel::emitASR_ri(MVT RetVT, MVT SrcVT, unsigned Op0,
3235                                      bool Op0IsKill, uint64_t Shift,
3236                                      bool IsZExt) {
3237   assert(RetVT.SimpleTy >= SrcVT.SimpleTy &&
3238          "Unexpected source/return type pair.");
3239   assert((SrcVT == MVT::i8 || SrcVT == MVT::i16 || SrcVT == MVT::i32 ||
3240           SrcVT == MVT::i64) && "Unexpected source value type.");
3241   assert((RetVT == MVT::i8 || RetVT == MVT::i16 || RetVT == MVT::i32 ||
3242           RetVT == MVT::i64) && "Unexpected return value type.");
3243
3244   bool Is64Bit = (RetVT == MVT::i64);
3245   unsigned RegSize = Is64Bit ? 64 : 32;
3246   unsigned DstBits = RetVT.getSizeInBits();
3247   unsigned SrcBits = SrcVT.getSizeInBits();
3248
3249   // Don't deal with undefined shifts.
3250   if (Shift >= DstBits)
3251     return 0;
3252
3253   // For immediate shifts we can fold the zero-/sign-extension into the shift.
3254   // {S|U}BFM Wd, Wn, #r, #s
3255   // Wd<s-r:0> = Wn<s:r> when r <= s
3256
3257   // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
3258   // %2 = ashr i16 %1, 4
3259   // Wd<7-4:0> = Wn<7:4>
3260   // 0b1111_1111_1111_1111__1111_1111_1111_1010 sext
3261   // 0b0000_0000_0000_0000__0000_0000_0000_0101 sext | zext
3262   // 0b0000_0000_0000_0000__0000_0000_0000_1010 zext
3263
3264   // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
3265   // %2 = ashr i16 %1, 8
3266   // Wd<7-7,0> = Wn<7:7>
3267   // 0b1111_1111_1111_1111__1111_1111_1111_1111 sext
3268   // 0b0000_0000_0000_0000__0000_0000_0000_0000 sext
3269   // 0b0000_0000_0000_0000__0000_0000_0000_0000 zext
3270
3271   // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
3272   // %2 = ashr i16 %1, 12
3273   // Wd<7-7,0> = Wn<7:7> <- clamp r to 7
3274   // 0b1111_1111_1111_1111__1111_1111_1111_1111 sext
3275   // 0b0000_0000_0000_0000__0000_0000_0000_0000 sext
3276   // 0b0000_0000_0000_0000__0000_0000_0000_0000 zext
3277
3278   if (Shift >= SrcBits && IsZExt)
3279     return AArch64MaterializeInt(ConstantInt::get(*Context, APInt(RegSize, 0)),
3280                                  RetVT);
3281
3282   unsigned ImmR = std::min<unsigned>(SrcBits - 1, Shift);
3283   unsigned ImmS = SrcBits - 1;
3284   static const unsigned OpcTable[2][2] = {
3285     {AArch64::SBFMWri, AArch64::SBFMXri},
3286     {AArch64::UBFMWri, AArch64::UBFMXri}
3287   };
3288   unsigned Opc = OpcTable[IsZExt][Is64Bit];
3289   const TargetRegisterClass *RC =
3290       Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
3291   if (SrcVT.SimpleTy <= MVT::i32 && RetVT == MVT::i64) {
3292     unsigned TmpReg = MRI.createVirtualRegister(RC);
3293     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3294             TII.get(AArch64::SUBREG_TO_REG), TmpReg)
3295         .addImm(0)
3296         .addReg(Op0, getKillRegState(Op0IsKill))
3297         .addImm(AArch64::sub_32);
3298     Op0 = TmpReg;
3299     Op0IsKill = true;
3300   }
3301   return fastEmitInst_rii(Opc, RC, Op0, Op0IsKill, ImmR, ImmS);
3302 }
3303
3304 unsigned AArch64FastISel::EmitIntExt(MVT SrcVT, unsigned SrcReg, MVT DestVT,
3305                                      bool isZExt) {
3306   assert(DestVT != MVT::i1 && "ZeroExt/SignExt an i1?");
3307
3308   // FastISel does not have plumbing to deal with extensions where the SrcVT or
3309   // DestVT are odd things, so test to make sure that they are both types we can
3310   // handle (i1/i8/i16/i32 for SrcVT and i8/i16/i32/i64 for DestVT), otherwise
3311   // bail out to SelectionDAG.
3312   if (((DestVT != MVT::i8) && (DestVT != MVT::i16) &&
3313        (DestVT != MVT::i32) && (DestVT != MVT::i64)) ||
3314       ((SrcVT !=  MVT::i1) && (SrcVT !=  MVT::i8) &&
3315        (SrcVT !=  MVT::i16) && (SrcVT !=  MVT::i32)))
3316     return 0;
3317
3318   unsigned Opc;
3319   unsigned Imm = 0;
3320
3321   switch (SrcVT.SimpleTy) {
3322   default:
3323     return 0;
3324   case MVT::i1:
3325     return Emiti1Ext(SrcReg, DestVT, isZExt);
3326   case MVT::i8:
3327     if (DestVT == MVT::i64)
3328       Opc = isZExt ? AArch64::UBFMXri : AArch64::SBFMXri;
3329     else
3330       Opc = isZExt ? AArch64::UBFMWri : AArch64::SBFMWri;
3331     Imm = 7;
3332     break;
3333   case MVT::i16:
3334     if (DestVT == MVT::i64)
3335       Opc = isZExt ? AArch64::UBFMXri : AArch64::SBFMXri;
3336     else
3337       Opc = isZExt ? AArch64::UBFMWri : AArch64::SBFMWri;
3338     Imm = 15;
3339     break;
3340   case MVT::i32:
3341     assert(DestVT == MVT::i64 && "IntExt i32 to i32?!?");
3342     Opc = isZExt ? AArch64::UBFMXri : AArch64::SBFMXri;
3343     Imm = 31;
3344     break;
3345   }
3346
3347   // Handle i8 and i16 as i32.
3348   if (DestVT == MVT::i8 || DestVT == MVT::i16)
3349     DestVT = MVT::i32;
3350   else if (DestVT == MVT::i64) {
3351     unsigned Src64 = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
3352     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3353             TII.get(AArch64::SUBREG_TO_REG), Src64)
3354         .addImm(0)
3355         .addReg(SrcReg)
3356         .addImm(AArch64::sub_32);
3357     SrcReg = Src64;
3358   }
3359
3360   const TargetRegisterClass *RC =
3361       (DestVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
3362   return fastEmitInst_rii(Opc, RC, SrcReg, /*TODO:IsKill=*/false, 0, Imm);
3363 }
3364
3365 bool AArch64FastISel::SelectIntExt(const Instruction *I) {
3366   // On ARM, in general, integer casts don't involve legal types; this code
3367   // handles promotable integers.  The high bits for a type smaller than
3368   // the register size are assumed to be undefined.
3369   Type *DestTy = I->getType();
3370   Value *Src = I->getOperand(0);
3371   Type *SrcTy = Src->getType();
3372
3373   bool isZExt = isa<ZExtInst>(I);
3374   unsigned SrcReg = getRegForValue(Src);
3375   if (!SrcReg)
3376     return false;
3377
3378   EVT SrcEVT = TLI.getValueType(SrcTy, true);
3379   EVT DestEVT = TLI.getValueType(DestTy, true);
3380   if (!SrcEVT.isSimple())
3381     return false;
3382   if (!DestEVT.isSimple())
3383     return false;
3384
3385   MVT SrcVT = SrcEVT.getSimpleVT();
3386   MVT DestVT = DestEVT.getSimpleVT();
3387   unsigned ResultReg = 0;
3388
3389   // Check if it is an argument and if it is already zero/sign-extended.
3390   if (const auto *Arg = dyn_cast<Argument>(Src)) {
3391     if ((isZExt && Arg->hasZExtAttr()) || (!isZExt && Arg->hasSExtAttr())) {
3392       if (DestVT == MVT::i64) {
3393         ResultReg = createResultReg(TLI.getRegClassFor(DestVT));
3394         BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3395                 TII.get(AArch64::SUBREG_TO_REG), ResultReg)
3396           .addImm(0)
3397           .addReg(SrcReg)
3398           .addImm(AArch64::sub_32);
3399       } else
3400         ResultReg = SrcReg;
3401     }
3402   }
3403
3404   if (!ResultReg)
3405     ResultReg = EmitIntExt(SrcVT, SrcReg, DestVT, isZExt);
3406
3407   if (!ResultReg)
3408     return false;
3409
3410   updateValueMap(I, ResultReg);
3411   return true;
3412 }
3413
3414 bool AArch64FastISel::SelectRem(const Instruction *I, unsigned ISDOpcode) {
3415   EVT DestEVT = TLI.getValueType(I->getType(), true);
3416   if (!DestEVT.isSimple())
3417     return false;
3418
3419   MVT DestVT = DestEVT.getSimpleVT();
3420   if (DestVT != MVT::i64 && DestVT != MVT::i32)
3421     return false;
3422
3423   unsigned DivOpc;
3424   bool is64bit = (DestVT == MVT::i64);
3425   switch (ISDOpcode) {
3426   default:
3427     return false;
3428   case ISD::SREM:
3429     DivOpc = is64bit ? AArch64::SDIVXr : AArch64::SDIVWr;
3430     break;
3431   case ISD::UREM:
3432     DivOpc = is64bit ? AArch64::UDIVXr : AArch64::UDIVWr;
3433     break;
3434   }
3435   unsigned MSubOpc = is64bit ? AArch64::MSUBXrrr : AArch64::MSUBWrrr;
3436   unsigned Src0Reg = getRegForValue(I->getOperand(0));
3437   if (!Src0Reg)
3438     return false;
3439   bool Src0IsKill = hasTrivialKill(I->getOperand(0));
3440
3441   unsigned Src1Reg = getRegForValue(I->getOperand(1));
3442   if (!Src1Reg)
3443     return false;
3444   bool Src1IsKill = hasTrivialKill(I->getOperand(1));
3445
3446   const TargetRegisterClass *RC =
3447       (DestVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
3448   unsigned QuotReg = fastEmitInst_rr(DivOpc, RC, Src0Reg, /*IsKill=*/false,
3449                                      Src1Reg, /*IsKill=*/false);
3450   assert(QuotReg && "Unexpected DIV instruction emission failure.");
3451   // The remainder is computed as numerator - (quotient * denominator) using the
3452   // MSUB instruction.
3453   unsigned ResultReg = fastEmitInst_rrr(MSubOpc, RC, QuotReg, /*IsKill=*/true,
3454                                         Src1Reg, Src1IsKill, Src0Reg,
3455                                         Src0IsKill);
3456   updateValueMap(I, ResultReg);
3457   return true;
3458 }
3459
3460 bool AArch64FastISel::SelectMul(const Instruction *I) {
3461   EVT SrcEVT = TLI.getValueType(I->getOperand(0)->getType(), true);
3462   if (!SrcEVT.isSimple())
3463     return false;
3464   MVT SrcVT = SrcEVT.getSimpleVT();
3465
3466   // Must be simple value type.  Don't handle vectors.
3467   if (SrcVT != MVT::i64 && SrcVT != MVT::i32 && SrcVT != MVT::i16 &&
3468       SrcVT != MVT::i8)
3469     return false;
3470
3471   unsigned Src0Reg = getRegForValue(I->getOperand(0));
3472   if (!Src0Reg)
3473     return false;
3474   bool Src0IsKill = hasTrivialKill(I->getOperand(0));
3475
3476   unsigned Src1Reg = getRegForValue(I->getOperand(1));
3477   if (!Src1Reg)
3478     return false;
3479   bool Src1IsKill = hasTrivialKill(I->getOperand(1));
3480
3481   unsigned ResultReg =
3482     Emit_MUL_rr(SrcVT, Src0Reg, Src0IsKill, Src1Reg, Src1IsKill);
3483
3484   if (!ResultReg)
3485     return false;
3486
3487   updateValueMap(I, ResultReg);
3488   return true;
3489 }
3490
3491 bool AArch64FastISel::SelectShift(const Instruction *I) {
3492   MVT RetVT;
3493   if (!isTypeSupported(I->getType(), RetVT))
3494     return false;
3495
3496   if (const auto *C = dyn_cast<ConstantInt>(I->getOperand(1))) {
3497     unsigned ResultReg = 0;
3498     uint64_t ShiftVal = C->getZExtValue();
3499     MVT SrcVT = RetVT;
3500     bool IsZExt = (I->getOpcode() == Instruction::AShr) ? false : true;
3501     const Value *Op0 = I->getOperand(0);
3502     if (const auto *ZExt = dyn_cast<ZExtInst>(Op0)) {
3503       MVT TmpVT;
3504       if (isValueAvailable(ZExt) && isTypeSupported(ZExt->getSrcTy(), TmpVT)) {
3505         SrcVT = TmpVT;
3506         IsZExt = true;
3507         Op0 = ZExt->getOperand(0);
3508       }
3509     } else if (const auto *SExt = dyn_cast<SExtInst>(Op0)) {
3510       MVT TmpVT;
3511       if (isValueAvailable(SExt) && isTypeSupported(SExt->getSrcTy(), TmpVT)) {
3512         SrcVT = TmpVT;
3513         IsZExt = false;
3514         Op0 = SExt->getOperand(0);
3515       }
3516     }
3517
3518     unsigned Op0Reg = getRegForValue(Op0);
3519     if (!Op0Reg)
3520       return false;
3521     bool Op0IsKill = hasTrivialKill(Op0);
3522
3523     switch (I->getOpcode()) {
3524     default: llvm_unreachable("Unexpected instruction.");
3525     case Instruction::Shl:
3526       ResultReg = emitLSL_ri(RetVT, SrcVT, Op0Reg, Op0IsKill, ShiftVal, IsZExt);
3527       break;
3528     case Instruction::AShr:
3529       ResultReg = emitASR_ri(RetVT, SrcVT, Op0Reg, Op0IsKill, ShiftVal, IsZExt);
3530       break;
3531     case Instruction::LShr:
3532       ResultReg = emitLSR_ri(RetVT, SrcVT, Op0Reg, Op0IsKill, ShiftVal, IsZExt);
3533       break;
3534     }
3535     if (!ResultReg)
3536       return false;
3537
3538     updateValueMap(I, ResultReg);
3539     return true;
3540   }
3541
3542   unsigned Op0Reg = getRegForValue(I->getOperand(0));
3543   if (!Op0Reg)
3544     return false;
3545   bool Op0IsKill = hasTrivialKill(I->getOperand(0));
3546
3547   unsigned Op1Reg = getRegForValue(I->getOperand(1));
3548   if (!Op1Reg)
3549     return false;
3550   bool Op1IsKill = hasTrivialKill(I->getOperand(1));
3551
3552   unsigned ResultReg = 0;
3553   switch (I->getOpcode()) {
3554   default: llvm_unreachable("Unexpected instruction.");
3555   case Instruction::Shl:
3556     ResultReg = emitLSL_rr(RetVT, Op0Reg, Op0IsKill, Op1Reg, Op1IsKill);
3557     break;
3558   case Instruction::AShr:
3559     ResultReg = emitASR_rr(RetVT, Op0Reg, Op0IsKill, Op1Reg, Op1IsKill);
3560     break;
3561   case Instruction::LShr:
3562     ResultReg = emitLSR_rr(RetVT, Op0Reg, Op0IsKill, Op1Reg, Op1IsKill);
3563     break;
3564   }
3565
3566   if (!ResultReg)
3567     return false;
3568
3569   updateValueMap(I, ResultReg);
3570   return true;
3571 }
3572
3573 bool AArch64FastISel::SelectBitCast(const Instruction *I) {
3574   MVT RetVT, SrcVT;
3575
3576   if (!isTypeLegal(I->getOperand(0)->getType(), SrcVT))
3577     return false;
3578   if (!isTypeLegal(I->getType(), RetVT))
3579     return false;
3580
3581   unsigned Opc;
3582   if (RetVT == MVT::f32 && SrcVT == MVT::i32)
3583     Opc = AArch64::FMOVWSr;
3584   else if (RetVT == MVT::f64 && SrcVT == MVT::i64)
3585     Opc = AArch64::FMOVXDr;
3586   else if (RetVT == MVT::i32 && SrcVT == MVT::f32)
3587     Opc = AArch64::FMOVSWr;
3588   else if (RetVT == MVT::i64 && SrcVT == MVT::f64)
3589     Opc = AArch64::FMOVDXr;
3590   else
3591     return false;
3592
3593   const TargetRegisterClass *RC = nullptr;
3594   switch (RetVT.SimpleTy) {
3595   default: llvm_unreachable("Unexpected value type.");
3596   case MVT::i32: RC = &AArch64::GPR32RegClass; break;
3597   case MVT::i64: RC = &AArch64::GPR64RegClass; break;
3598   case MVT::f32: RC = &AArch64::FPR32RegClass; break;
3599   case MVT::f64: RC = &AArch64::FPR64RegClass; break;
3600   }
3601   unsigned Op0Reg = getRegForValue(I->getOperand(0));
3602   if (!Op0Reg)
3603     return false;
3604   bool Op0IsKill = hasTrivialKill(I->getOperand(0));
3605   unsigned ResultReg = fastEmitInst_r(Opc, RC, Op0Reg, Op0IsKill);
3606
3607   if (!ResultReg)
3608     return false;
3609
3610   updateValueMap(I, ResultReg);
3611   return true;
3612 }
3613
3614 bool AArch64FastISel::fastSelectInstruction(const Instruction *I) {
3615   switch (I->getOpcode()) {
3616   default:
3617     break;
3618   case Instruction::Add:
3619   case Instruction::Sub:
3620     if (selectAddSub(I))
3621       return true;
3622     break;
3623   case Instruction::Mul:
3624     if (!selectBinaryOp(I, ISD::MUL))
3625       return SelectMul(I);
3626     return true;
3627   case Instruction::SRem:
3628     if (!selectBinaryOp(I, ISD::SREM))
3629       return SelectRem(I, ISD::SREM);
3630     return true;
3631   case Instruction::URem:
3632     if (!selectBinaryOp(I, ISD::UREM))
3633       return SelectRem(I, ISD::UREM);
3634     return true;
3635   case Instruction::Shl:
3636   case Instruction::LShr:
3637   case Instruction::AShr:
3638     if (SelectShift(I))
3639       return true;
3640     break;
3641   case Instruction::And:
3642     if (selectLogicalOp(I, ISD::AND))
3643       return true;
3644     break;
3645   case Instruction::Or:
3646     if (selectLogicalOp(I, ISD::OR))
3647       return true;
3648     break;
3649   case Instruction::Xor:
3650     if (selectLogicalOp(I, ISD::XOR))
3651       return true;
3652     break;
3653   case Instruction::Br:
3654     return SelectBranch(I);
3655   case Instruction::IndirectBr:
3656     return SelectIndirectBr(I);
3657   case Instruction::BitCast:
3658     if (!FastISel::selectBitCast(I))
3659       return SelectBitCast(I);
3660     return true;
3661   case Instruction::FPToSI:
3662     if (!selectCast(I, ISD::FP_TO_SINT))
3663       return SelectFPToInt(I, /*Signed=*/true);
3664     return true;
3665   case Instruction::FPToUI:
3666     return SelectFPToInt(I, /*Signed=*/false);
3667   case Instruction::ZExt:
3668     if (!selectCast(I, ISD::ZERO_EXTEND))
3669       return SelectIntExt(I);
3670     return true;
3671   case Instruction::SExt:
3672     if (!selectCast(I, ISD::SIGN_EXTEND))
3673       return SelectIntExt(I);
3674     return true;
3675   case Instruction::Trunc:
3676     if (!selectCast(I, ISD::TRUNCATE))
3677       return SelectTrunc(I);
3678     return true;
3679   case Instruction::FPExt:
3680     return SelectFPExt(I);
3681   case Instruction::FPTrunc:
3682     return SelectFPTrunc(I);
3683   case Instruction::SIToFP:
3684     if (!selectCast(I, ISD::SINT_TO_FP))
3685       return SelectIntToFP(I, /*Signed=*/true);
3686     return true;
3687   case Instruction::UIToFP:
3688     return SelectIntToFP(I, /*Signed=*/false);
3689   case Instruction::Load:
3690     return SelectLoad(I);
3691   case Instruction::Store:
3692     return SelectStore(I);
3693   case Instruction::FCmp:
3694   case Instruction::ICmp:
3695     return SelectCmp(I);
3696   case Instruction::Select:
3697     return SelectSelect(I);
3698   case Instruction::Ret:
3699     return SelectRet(I);
3700   }
3701
3702   // fall-back to target-independent instruction selection.
3703   return selectOperator(I, I->getOpcode());
3704   // Silence warnings.
3705   (void)&CC_AArch64_DarwinPCS_VarArg;
3706 }
3707
3708 namespace llvm {
3709 llvm::FastISel *AArch64::createFastISel(FunctionLoweringInfo &funcInfo,
3710                                         const TargetLibraryInfo *libInfo) {
3711   return new AArch64FastISel(funcInfo, libInfo);
3712 }
3713 }