lib/Target/AArch64/AArch64FastISel.cpp

   1 //===-- AArch6464FastISel.cpp - AArch64 FastISel implementation -----------===//
   2 //
   3 //                     The LLVM Compiler Infrastructure
   4 //
   5 // This file is distributed under the University of Illinois Open Source
   6 // License. See LICENSE.TXT for details.
   7 //
   8 //===----------------------------------------------------------------------===//
   9 //
  10 // This file defines the AArch64-specific support for the FastISel class. Some
  11 // of the target-specific code is generated by tablegen in the file
  12 // AArch64GenFastISel.inc, which is #included here.
  13 //
  14 //===----------------------------------------------------------------------===//
  15
  16 #include "AArch64.h"
  17 #include "AArch64Subtarget.h"
  18 #include "AArch64TargetMachine.h"
  19 #include "MCTargetDesc/AArch64AddressingModes.h"
  20 #include "llvm/Analysis/BranchProbabilityInfo.h"
  21 #include "llvm/CodeGen/CallingConvLower.h"
  22 #include "llvm/CodeGen/FastISel.h"
  23 #include "llvm/CodeGen/FunctionLoweringInfo.h"
  24 #include "llvm/CodeGen/MachineConstantPool.h"
  25 #include "llvm/CodeGen/MachineFrameInfo.h"
  26 #include "llvm/CodeGen/MachineInstrBuilder.h"
  27 #include "llvm/CodeGen/MachineRegisterInfo.h"
  28 #include "llvm/IR/CallingConv.h"
  29 #include "llvm/IR/DataLayout.h"
  30 #include "llvm/IR/DerivedTypes.h"
  31 #include "llvm/IR/Function.h"
  32 #include "llvm/IR/GetElementPtrTypeIterator.h"
  33 #include "llvm/IR/GlobalAlias.h"
  34 #include "llvm/IR/GlobalVariable.h"
  35 #include "llvm/IR/Instructions.h"
  36 #include "llvm/IR/IntrinsicInst.h"
  37 #include "llvm/IR/Operator.h"
  38 #include "llvm/Support/CommandLine.h"
  39 using namespace llvm;
  40
  41 namespace {
  42
  43 class AArch64FastISel : public FastISel {
  44   class Address {
  45   public:
  46     typedef enum {
  47       RegBase,
  48       FrameIndexBase
  49     } BaseKind;
  50
  51   private:
  52     BaseKind Kind;
  53     AArch64_AM::ShiftExtendType ExtType;
  54     union {
  55       unsigned Reg;
  56       int FI;
  57     } Base;
  58     unsigned OffsetReg;
  59     unsigned Shift;
  60     int64_t Offset;
  61     const GlobalValue *GV;
  62
  63   public:
  64     Address() : Kind(RegBase), ExtType(AArch64_AM::InvalidShiftExtend),
  65       OffsetReg(0), Shift(0), Offset(0), GV(nullptr) { Base.Reg = 0; }
  66     void setKind(BaseKind K) { Kind = K; }
  67     BaseKind getKind() const { return Kind; }
  68     void setExtendType(AArch64_AM::ShiftExtendType E) { ExtType = E; }
  69     AArch64_AM::ShiftExtendType getExtendType() const { return ExtType; }
  70     bool isRegBase() const { return Kind == RegBase; }
  71     bool isFIBase() const { return Kind == FrameIndexBase; }
  72     void setReg(unsigned Reg) {
  73       assert(isRegBase() && "Invalid base register access!");
  74       Base.Reg = Reg;
  75     }
  76     unsigned getReg() const {
  77       assert(isRegBase() && "Invalid base register access!");
  78       return Base.Reg;
  79     }
  80     void setOffsetReg(unsigned Reg) {
  81       assert(isRegBase() && "Invalid offset register access!");
  82       OffsetReg = Reg;
  83     }
  84     unsigned getOffsetReg() const {
  85       assert(isRegBase() && "Invalid offset register access!");
  86       return OffsetReg;
  87     }
  88     void setFI(unsigned FI) {
  89       assert(isFIBase() && "Invalid base frame index  access!");
  90       Base.FI = FI;
  91     }
  92     unsigned getFI() const {
  93       assert(isFIBase() && "Invalid base frame index access!");
  94       return Base.FI;
  95     }
  96     void setOffset(int64_t O) { Offset = O; }
  97     int64_t getOffset() { return Offset; }
  98     void setShift(unsigned S) { Shift = S; }
  99     unsigned getShift() { return Shift; }
 100
 101     void setGlobalValue(const GlobalValue *G) { GV = G; }
 102     const GlobalValue *getGlobalValue() { return GV; }
 103   };
 104
 105   /// Subtarget - Keep a pointer to the AArch64Subtarget around so that we can
 106   /// make the right decision when generating code for different targets.
 107   const AArch64Subtarget *Subtarget;
 108   LLVMContext *Context;
 109
 110   bool fastLowerArguments() override;
 111   bool fastLowerCall(CallLoweringInfo &CLI) override;
 112   bool fastLowerIntrinsicCall(const IntrinsicInst *II) override;
 113
 114 private:
 115   // Selection routines.
 116   bool selectAddSub(const Instruction *I);
 117   bool selectLogicalOp(const Instruction *I, unsigned ISDOpcode);
 118   bool SelectLoad(const Instruction *I);
 119   bool SelectStore(const Instruction *I);
 120   bool SelectBranch(const Instruction *I);
 121   bool SelectIndirectBr(const Instruction *I);
 122   bool SelectCmp(const Instruction *I);
 123   bool SelectSelect(const Instruction *I);
 124   bool SelectFPExt(const Instruction *I);
 125   bool SelectFPTrunc(const Instruction *I);
 126   bool SelectFPToInt(const Instruction *I, bool Signed);
 127   bool SelectIntToFP(const Instruction *I, bool Signed);
 128   bool SelectRem(const Instruction *I, unsigned ISDOpcode);
 129   bool SelectRet(const Instruction *I);
 130   bool SelectTrunc(const Instruction *I);
 131   bool SelectIntExt(const Instruction *I);
 132   bool SelectMul(const Instruction *I);
 133   bool SelectShift(const Instruction *I);
 134   bool SelectBitCast(const Instruction *I);
 135
 136   // Utility helper routines.
 137   bool isTypeLegal(Type *Ty, MVT &VT);
 138   bool isLoadStoreTypeLegal(Type *Ty, MVT &VT);
 139   bool isTypeSupported(Type *Ty, MVT &VT);
 140   bool isValueAvailable(const Value *V) const;
 141   bool ComputeAddress(const Value *Obj, Address &Addr, Type *Ty = nullptr);
 142   bool ComputeCallAddress(const Value *V, Address &Addr);
 143   bool SimplifyAddress(Address &Addr, MVT VT);
 144   void AddLoadStoreOperands(Address &Addr, const MachineInstrBuilder &MIB,
 145                             unsigned Flags, unsigned ScaleFactor,
 146                             MachineMemOperand *MMO);
 147   bool IsMemCpySmall(uint64_t Len, unsigned Alignment);
 148   bool TryEmitSmallMemCpy(Address Dest, Address Src, uint64_t Len,
 149                           unsigned Alignment);
 150   bool foldXALUIntrinsic(AArch64CC::CondCode &CC, const Instruction *I,
 151                          const Value *Cond);
 152
 153   // Emit helper routines.
 154   unsigned emitAddSub(bool UseAdd, MVT RetVT, const Value *LHS,
 155                       const Value *RHS, bool SetFlags = false,
 156                       bool WantResult = true,  bool IsZExt = false);
 157   unsigned emitAddSub_rr(bool UseAdd, MVT RetVT, unsigned LHSReg,
 158                          bool LHSIsKill, unsigned RHSReg, bool RHSIsKill,
 159                          bool SetFlags = false, bool WantResult = true);
 160   unsigned emitAddSub_ri(bool UseAdd, MVT RetVT, unsigned LHSReg,
 161                          bool LHSIsKill, uint64_t Imm, bool SetFlags = false,
 162                          bool WantResult = true);
 163   unsigned emitAddSub_rs(bool UseAdd, MVT RetVT, unsigned LHSReg,
 164                          bool LHSIsKill, unsigned RHSReg, bool RHSIsKill,
 165                          AArch64_AM::ShiftExtendType ShiftType,
 166                          uint64_t ShiftImm, bool SetFlags = false,
 167                          bool WantResult = true);
 168   unsigned emitAddSub_rx(bool UseAdd, MVT RetVT, unsigned LHSReg,
 169                          bool LHSIsKill, unsigned RHSReg, bool RHSIsKill,
 170                           AArch64_AM::ShiftExtendType ExtType,
 171                           uint64_t ShiftImm, bool SetFlags = false,
 172                          bool WantResult = true);
 173
 174   // Emit functions.
 175   bool emitCmp(const Value *LHS, const Value *RHS, bool IsZExt);
 176   bool emitICmp(MVT RetVT, const Value *LHS, const Value *RHS, bool IsZExt);
 177   bool emitICmp_ri(MVT RetVT, unsigned LHSReg, bool LHSIsKill, uint64_t Imm);
 178   bool emitFCmp(MVT RetVT, const Value *LHS, const Value *RHS);
 179   bool EmitLoad(MVT VT, unsigned &ResultReg, Address Addr,
 180                 MachineMemOperand *MMO = nullptr);
 181   bool EmitStore(MVT VT, unsigned SrcReg, Address Addr,
 182                  MachineMemOperand *MMO = nullptr);
 183   unsigned EmitIntExt(MVT SrcVT, unsigned SrcReg, MVT DestVT, bool isZExt);
 184   unsigned Emiti1Ext(unsigned SrcReg, MVT DestVT, bool isZExt);
 185   unsigned emitAdd(MVT RetVT, const Value *LHS, const Value *RHS,
 186                    bool SetFlags = false, bool WantResult = true,
 187                    bool IsZExt = false);
 188   unsigned emitSub(MVT RetVT, const Value *LHS, const Value *RHS,
 189                    bool SetFlags = false, bool WantResult = true,
 190                    bool IsZExt = false);
 191   unsigned emitSubs_rr(MVT RetVT, unsigned LHSReg, bool LHSIsKill,
 192                        unsigned RHSReg, bool RHSIsKill, bool WantResult = true);
 193   unsigned emitSubs_rs(MVT RetVT, unsigned LHSReg, bool LHSIsKill,
 194                        unsigned RHSReg, bool RHSIsKill,
 195                        AArch64_AM::ShiftExtendType ShiftType, uint64_t ShiftImm,
 196                        bool WantResult = true);
 197   unsigned emitLogicalOp(unsigned ISDOpc, MVT RetVT, const Value *LHS,
 198                          const Value *RHS);
 199   unsigned emitLogicalOp_ri(unsigned ISDOpc, MVT RetVT, unsigned LHSReg,
 200                             bool LHSIsKill, uint64_t Imm);
 201   unsigned emitLogicalOp_rs(unsigned ISDOpc, MVT RetVT, unsigned LHSReg,
 202                             bool LHSIsKill, unsigned RHSReg, bool RHSIsKill,
 203                             uint64_t ShiftImm);
 204   unsigned emitAnd_ri(MVT RetVT, unsigned LHSReg, bool LHSIsKill, uint64_t Imm);
 205   unsigned Emit_MUL_rr(MVT RetVT, unsigned Op0, bool Op0IsKill,
 206                        unsigned Op1, bool Op1IsKill);
 207   unsigned Emit_SMULL_rr(MVT RetVT, unsigned Op0, bool Op0IsKill,
 208                          unsigned Op1, bool Op1IsKill);
 209   unsigned Emit_UMULL_rr(MVT RetVT, unsigned Op0, bool Op0IsKill,
 210                          unsigned Op1, bool Op1IsKill);
 211   unsigned emitLSL_rr(MVT RetVT, unsigned Op0Reg, bool Op0IsKill,
 212                       unsigned Op1Reg, bool Op1IsKill);
 213   unsigned emitLSL_ri(MVT RetVT, MVT SrcVT, unsigned Op0Reg, bool Op0IsKill,
 214                       uint64_t Imm, bool IsZExt = true);
 215   unsigned emitLSR_rr(MVT RetVT, unsigned Op0Reg, bool Op0IsKill,
 216                       unsigned Op1Reg, bool Op1IsKill);
 217   unsigned emitLSR_ri(MVT RetVT, MVT SrcVT, unsigned Op0Reg, bool Op0IsKill,
 218                       uint64_t Imm, bool IsZExt = true);
 219   unsigned emitASR_rr(MVT RetVT, unsigned Op0Reg, bool Op0IsKill,
 220                       unsigned Op1Reg, bool Op1IsKill);
 221   unsigned emitASR_ri(MVT RetVT, MVT SrcVT, unsigned Op0Reg, bool Op0IsKill,
 222                       uint64_t Imm, bool IsZExt = false);
 223
 224   unsigned AArch64MaterializeInt(const ConstantInt *CI, MVT VT);
 225   unsigned AArch64MaterializeFP(const ConstantFP *CFP, MVT VT);
 226   unsigned AArch64MaterializeGV(const GlobalValue *GV);
 227
 228   // Call handling routines.
 229 private:
 230   CCAssignFn *CCAssignFnForCall(CallingConv::ID CC) const;
 231   bool ProcessCallArgs(CallLoweringInfo &CLI, SmallVectorImpl<MVT> &ArgVTs,
 232                        unsigned &NumBytes);
 233   bool FinishCall(CallLoweringInfo &CLI, MVT RetVT, unsigned NumBytes);
 234
 235 public:
 236   // Backend specific FastISel code.
 237   unsigned fastMaterializeAlloca(const AllocaInst *AI) override;
 238   unsigned fastMaterializeConstant(const Constant *C) override;
 239   unsigned fastMaterializeFloatZero(const ConstantFP* CF) override;
 240
 241   explicit AArch64FastISel(FunctionLoweringInfo &FuncInfo,
 242                          const TargetLibraryInfo *LibInfo)
 243       : FastISel(FuncInfo, LibInfo, /*SkipTargetIndependentISel=*/true) {
 244     Subtarget = &TM.getSubtarget<AArch64Subtarget>();
 245     Context = &FuncInfo.Fn->getContext();
 246   }
 247
 248   bool fastSelectInstruction(const Instruction *I) override;
 249
 250 #include "AArch64GenFastISel.inc"
 251 };
 252
 253 } // end anonymous namespace
 254
 255 #include "AArch64GenCallingConv.inc"
 256
 257 CCAssignFn *AArch64FastISel::CCAssignFnForCall(CallingConv::ID CC) const {
 258   if (CC == CallingConv::WebKit_JS)
 259     return CC_AArch64_WebKit_JS;
 260   return Subtarget->isTargetDarwin() ? CC_AArch64_DarwinPCS : CC_AArch64_AAPCS;
 261 }
 262
 263 unsigned AArch64FastISel::fastMaterializeAlloca(const AllocaInst *AI) {
 264   assert(TLI.getValueType(AI->getType(), true) == MVT::i64 &&
 265          "Alloca should always return a pointer.");
 266
 267   // Don't handle dynamic allocas.
 268   if (!FuncInfo.StaticAllocaMap.count(AI))
 269     return 0;
 270
 271   DenseMap<const AllocaInst *, int>::iterator SI =
 272       FuncInfo.StaticAllocaMap.find(AI);
 273
 274   if (SI != FuncInfo.StaticAllocaMap.end()) {
 275     unsigned ResultReg = createResultReg(&AArch64::GPR64spRegClass);
 276     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADDXri),
 277             ResultReg)
 278         .addFrameIndex(SI->second)
 279         .addImm(0)
 280         .addImm(0);
 281     return ResultReg;
 282   }
 283
 284   return 0;
 285 }
 286
 287 unsigned AArch64FastISel::AArch64MaterializeInt(const ConstantInt *CI, MVT VT) {
 288   if (VT > MVT::i64)
 289     return 0;
 290
 291   if (!CI->isZero())
 292     return fastEmit_i(VT, VT, ISD::Constant, CI->getZExtValue());
 293
 294   // Create a copy from the zero register to materialize a "0" value.
 295   const TargetRegisterClass *RC = (VT == MVT::i64) ? &AArch64::GPR64RegClass
 296                                                    : &AArch64::GPR32RegClass;
 297   unsigned ZeroReg = (VT == MVT::i64) ? AArch64::XZR : AArch64::WZR;
 298   unsigned ResultReg = createResultReg(RC);
 299   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(TargetOpcode::COPY),
 300           ResultReg).addReg(ZeroReg, getKillRegState(true));
 301   return ResultReg;
 302 }
 303
 304 unsigned AArch64FastISel::AArch64MaterializeFP(const ConstantFP *CFP, MVT VT) {
 305   // Positive zero (+0.0) has to be materialized with a fmov from the zero
 306   // register, because the immediate version of fmov cannot encode zero.
 307   if (CFP->isNullValue())
 308     return fastMaterializeFloatZero(CFP);
 309
 310   if (VT != MVT::f32 && VT != MVT::f64)
 311     return 0;
 312
 313   const APFloat Val = CFP->getValueAPF();
 314   bool Is64Bit = (VT == MVT::f64);
 315   // This checks to see if we can use FMOV instructions to materialize
 316   // a constant, otherwise we have to materialize via the constant pool.
 317   if (TLI.isFPImmLegal(Val, VT)) {
 318     int Imm =
 319         Is64Bit ? AArch64_AM::getFP64Imm(Val) : AArch64_AM::getFP32Imm(Val);
 320     assert((Imm != -1) && "Cannot encode floating-point constant.");
 321     unsigned Opc = Is64Bit ? AArch64::FMOVDi : AArch64::FMOVSi;
 322     return fastEmitInst_i(Opc, TLI.getRegClassFor(VT), Imm);
 323   }
 324
 325   // Materialize via constant pool.  MachineConstantPool wants an explicit
 326   // alignment.
 327   unsigned Align = DL.getPrefTypeAlignment(CFP->getType());
 328   if (Align == 0)
 329     Align = DL.getTypeAllocSize(CFP->getType());
 330
 331   unsigned CPI = MCP.getConstantPoolIndex(cast<Constant>(CFP), Align);
 332   unsigned ADRPReg = createResultReg(&AArch64::GPR64commonRegClass);
 333   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADRP),
 334           ADRPReg).addConstantPoolIndex(CPI, 0, AArch64II::MO_PAGE);
 335
 336   unsigned Opc = Is64Bit ? AArch64::LDRDui : AArch64::LDRSui;
 337   unsigned ResultReg = createResultReg(TLI.getRegClassFor(VT));
 338   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg)
 339       .addReg(ADRPReg)
 340       .addConstantPoolIndex(CPI, 0, AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
 341   return ResultReg;
 342 }
 343
 344 unsigned AArch64FastISel::AArch64MaterializeGV(const GlobalValue *GV) {
 345   // We can't handle thread-local variables quickly yet.
 346   if (GV->isThreadLocal())
 347     return 0;
 348
 349   // MachO still uses GOT for large code-model accesses, but ELF requires
 350   // movz/movk sequences, which FastISel doesn't handle yet.
 351   if (TM.getCodeModel() != CodeModel::Small && !Subtarget->isTargetMachO())
 352     return 0;
 353
 354   unsigned char OpFlags = Subtarget->ClassifyGlobalReference(GV, TM);
 355
 356   EVT DestEVT = TLI.getValueType(GV->getType(), true);
 357   if (!DestEVT.isSimple())
 358     return 0;
 359
 360   unsigned ADRPReg = createResultReg(&AArch64::GPR64commonRegClass);
 361   unsigned ResultReg;
 362
 363   if (OpFlags & AArch64II::MO_GOT) {
 364     // ADRP + LDRX
 365     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADRP),
 366             ADRPReg)
 367       .addGlobalAddress(GV, 0, AArch64II::MO_GOT | AArch64II::MO_PAGE);
 368
 369     ResultReg = createResultReg(&AArch64::GPR64RegClass);
 370     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::LDRXui),
 371             ResultReg)
 372       .addReg(ADRPReg)
 373       .addGlobalAddress(GV, 0, AArch64II::MO_GOT | AArch64II::MO_PAGEOFF |
 374                         AArch64II::MO_NC);
 375   } else if (OpFlags & AArch64II::MO_CONSTPOOL) {
 376     // We can't handle addresses loaded from a constant pool quickly yet.
 377     return 0;
 378   } else {
 379     // ADRP + ADDX
 380     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADRP),
 381             ADRPReg)
 382       .addGlobalAddress(GV, 0, AArch64II::MO_PAGE);
 383
 384     ResultReg = createResultReg(&AArch64::GPR64spRegClass);
 385     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADDXri),
 386             ResultReg)
 387       .addReg(ADRPReg)
 388       .addGlobalAddress(GV, 0, AArch64II::MO_PAGEOFF | AArch64II::MO_NC)
 389       .addImm(0);
 390   }
 391   return ResultReg;
 392 }
 393
 394 unsigned AArch64FastISel::fastMaterializeConstant(const Constant *C) {
 395   EVT CEVT = TLI.getValueType(C->getType(), true);
 396
 397   // Only handle simple types.
 398   if (!CEVT.isSimple())
 399     return 0;
 400   MVT VT = CEVT.getSimpleVT();
 401
 402   if (const auto *CI = dyn_cast<ConstantInt>(C))
 403     return AArch64MaterializeInt(CI, VT);
 404   else if (const ConstantFP *CFP = dyn_cast<ConstantFP>(C))
 405     return AArch64MaterializeFP(CFP, VT);
 406   else if (const GlobalValue *GV = dyn_cast<GlobalValue>(C))
 407     return AArch64MaterializeGV(GV);
 408
 409   return 0;
 410 }
 411
 412 unsigned AArch64FastISel::fastMaterializeFloatZero(const ConstantFP* CFP) {
 413   assert(CFP->isNullValue() &&
 414          "Floating-point constant is not a positive zero.");
 415   MVT VT;
 416   if (!isTypeLegal(CFP->getType(), VT))
 417     return 0;
 418
 419   if (VT != MVT::f32 && VT != MVT::f64)
 420     return 0;
 421
 422   bool Is64Bit = (VT == MVT::f64);
 423   unsigned ZReg = Is64Bit ? AArch64::XZR : AArch64::WZR;
 424   unsigned Opc = Is64Bit ? AArch64::FMOVXDr : AArch64::FMOVWSr;
 425   return fastEmitInst_r(Opc, TLI.getRegClassFor(VT), ZReg, /*IsKill=*/true);
 426 }
 427
 428 // Computes the address to get to an object.
 429 bool AArch64FastISel::ComputeAddress(const Value *Obj, Address &Addr, Type *Ty)
 430 {
 431   const User *U = nullptr;
 432   unsigned Opcode = Instruction::UserOp1;
 433   if (const Instruction *I = dyn_cast<Instruction>(Obj)) {
 434     // Don't walk into other basic blocks unless the object is an alloca from
 435     // another block, otherwise it may not have a virtual register assigned.
 436     if (FuncInfo.StaticAllocaMap.count(static_cast<const AllocaInst *>(Obj)) ||
 437         FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB) {
 438       Opcode = I->getOpcode();
 439       U = I;
 440     }
 441   } else if (const ConstantExpr *C = dyn_cast<ConstantExpr>(Obj)) {
 442     Opcode = C->getOpcode();
 443     U = C;
 444   }
 445
 446   if (const PointerType *Ty = dyn_cast<PointerType>(Obj->getType()))
 447     if (Ty->getAddressSpace() > 255)
 448       // Fast instruction selection doesn't support the special
 449       // address spaces.
 450       return false;
 451
 452   switch (Opcode) {
 453   default:
 454     break;
 455   case Instruction::BitCast: {
 456     // Look through bitcasts.
 457     return ComputeAddress(U->getOperand(0), Addr, Ty);
 458   }
 459   case Instruction::IntToPtr: {
 460     // Look past no-op inttoptrs.
 461     if (TLI.getValueType(U->getOperand(0)->getType()) == TLI.getPointerTy())
 462       return ComputeAddress(U->getOperand(0), Addr, Ty);
 463     break;
 464   }
 465   case Instruction::PtrToInt: {
 466     // Look past no-op ptrtoints.
 467     if (TLI.getValueType(U->getType()) == TLI.getPointerTy())
 468       return ComputeAddress(U->getOperand(0), Addr, Ty);
 469     break;
 470   }
 471   case Instruction::GetElementPtr: {
 472     Address SavedAddr = Addr;
 473     uint64_t TmpOffset = Addr.getOffset();
 474
 475     // Iterate through the GEP folding the constants into offsets where
 476     // we can.
 477     gep_type_iterator GTI = gep_type_begin(U);
 478     for (User::const_op_iterator i = U->op_begin() + 1, e = U->op_end(); i != e;
 479          ++i, ++GTI) {
 480       const Value *Op = *i;
 481       if (StructType *STy = dyn_cast<StructType>(*GTI)) {
 482         const StructLayout *SL = DL.getStructLayout(STy);
 483         unsigned Idx = cast<ConstantInt>(Op)->getZExtValue();
 484         TmpOffset += SL->getElementOffset(Idx);
 485       } else {
 486         uint64_t S = DL.getTypeAllocSize(GTI.getIndexedType());
 487         for (;;) {
 488           if (const ConstantInt *CI = dyn_cast<ConstantInt>(Op)) {
 489             // Constant-offset addressing.
 490             TmpOffset += CI->getSExtValue() * S;
 491             break;
 492           }
 493           if (canFoldAddIntoGEP(U, Op)) {
 494             // A compatible add with a constant operand. Fold the constant.
 495             ConstantInt *CI =
 496                 cast<ConstantInt>(cast<AddOperator>(Op)->getOperand(1));
 497             TmpOffset += CI->getSExtValue() * S;
 498             // Iterate on the other operand.
 499             Op = cast<AddOperator>(Op)->getOperand(0);
 500             continue;
 501           }
 502           // Unsupported
 503           goto unsupported_gep;
 504         }
 505       }
 506     }
 507
 508     // Try to grab the base operand now.
 509     Addr.setOffset(TmpOffset);
 510     if (ComputeAddress(U->getOperand(0), Addr, Ty))
 511       return true;
 512
 513     // We failed, restore everything and try the other options.
 514     Addr = SavedAddr;
 515
 516   unsupported_gep:
 517     break;
 518   }
 519   case Instruction::Alloca: {
 520     const AllocaInst *AI = cast<AllocaInst>(Obj);
 521     DenseMap<const AllocaInst *, int>::iterator SI =
 522         FuncInfo.StaticAllocaMap.find(AI);
 523     if (SI != FuncInfo.StaticAllocaMap.end()) {
 524       Addr.setKind(Address::FrameIndexBase);
 525       Addr.setFI(SI->second);
 526       return true;
 527     }
 528     break;
 529   }
 530   case Instruction::Add: {
 531     // Adds of constants are common and easy enough.
 532     const Value *LHS = U->getOperand(0);
 533     const Value *RHS = U->getOperand(1);
 534
 535     if (isa<ConstantInt>(LHS))
 536       std::swap(LHS, RHS);
 537
 538     if (const ConstantInt *CI = dyn_cast<ConstantInt>(RHS)) {
 539       Addr.setOffset(Addr.getOffset() + (uint64_t)CI->getSExtValue());
 540       return ComputeAddress(LHS, Addr, Ty);
 541     }
 542
 543     Address Backup = Addr;
 544     if (ComputeAddress(LHS, Addr, Ty) && ComputeAddress(RHS, Addr, Ty))
 545       return true;
 546     Addr = Backup;
 547
 548     break;
 549   }
 550   case Instruction::Shl:
 551     if (Addr.getOffsetReg())
 552       break;
 553
 554     if (const auto *CI = dyn_cast<ConstantInt>(U->getOperand(1))) {
 555       unsigned Val = CI->getZExtValue();
 556       if (Val < 1 || Val > 3)
 557         break;
 558
 559       uint64_t NumBytes = 0;
 560       if (Ty && Ty->isSized()) {
 561         uint64_t NumBits = DL.getTypeSizeInBits(Ty);
 562         NumBytes = NumBits / 8;
 563         if (!isPowerOf2_64(NumBits))
 564           NumBytes = 0;
 565       }
 566
 567       if (NumBytes != (1ULL << Val))
 568         break;
 569
 570       Addr.setShift(Val);
 571       Addr.setExtendType(AArch64_AM::LSL);
 572
 573       if (const auto *I = dyn_cast<Instruction>(U->getOperand(0)))
 574         if (FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB)
 575           U = I;
 576
 577       if (const auto *ZE = dyn_cast<ZExtInst>(U))
 578         if (ZE->getOperand(0)->getType()->isIntegerTy(32))
 579           Addr.setExtendType(AArch64_AM::UXTW);
 580
 581       if (const auto *SE = dyn_cast<SExtInst>(U))
 582         if (SE->getOperand(0)->getType()->isIntegerTy(32))
 583           Addr.setExtendType(AArch64_AM::SXTW);
 584
 585       unsigned Reg = getRegForValue(U->getOperand(0));
 586       if (!Reg)
 587         return false;
 588       Addr.setOffsetReg(Reg);
 589       return true;
 590     }
 591     break;
 592   }
 593
 594   if (Addr.getReg()) {
 595     if (!Addr.getOffsetReg()) {
 596       unsigned Reg = getRegForValue(Obj);
 597       if (!Reg)
 598         return false;
 599       Addr.setOffsetReg(Reg);
 600       return true;
 601     }
 602     return false;
 603   }
 604
 605   unsigned Reg = getRegForValue(Obj);
 606   if (!Reg)
 607     return false;
 608   Addr.setReg(Reg);
 609   return true;
 610 }
 611
 612 bool AArch64FastISel::ComputeCallAddress(const Value *V, Address &Addr) {
 613   const User *U = nullptr;
 614   unsigned Opcode = Instruction::UserOp1;
 615   bool InMBB = true;
 616
 617   if (const auto *I = dyn_cast<Instruction>(V)) {
 618     Opcode = I->getOpcode();
 619     U = I;
 620     InMBB = I->getParent() == FuncInfo.MBB->getBasicBlock();
 621   } else if (const auto *C = dyn_cast<ConstantExpr>(V)) {
 622     Opcode = C->getOpcode();
 623     U = C;
 624   }
 625
 626   switch (Opcode) {
 627   default: break;
 628   case Instruction::BitCast:
 629     // Look past bitcasts if its operand is in the same BB.
 630     if (InMBB)
 631       return ComputeCallAddress(U->getOperand(0), Addr);
 632     break;
 633   case Instruction::IntToPtr:
 634     // Look past no-op inttoptrs if its operand is in the same BB.
 635     if (InMBB &&
 636         TLI.getValueType(U->getOperand(0)->getType()) == TLI.getPointerTy())
 637       return ComputeCallAddress(U->getOperand(0), Addr);
 638     break;
 639   case Instruction::PtrToInt:
 640     // Look past no-op ptrtoints if its operand is in the same BB.
 641     if (InMBB &&
 642         TLI.getValueType(U->getType()) == TLI.getPointerTy())
 643       return ComputeCallAddress(U->getOperand(0), Addr);
 644     break;
 645   }
 646
 647   if (const GlobalValue *GV = dyn_cast<GlobalValue>(V)) {
 648     Addr.setGlobalValue(GV);
 649     return true;
 650   }
 651
 652   // If all else fails, try to materialize the value in a register.
 653   if (!Addr.getGlobalValue()) {
 654     Addr.setReg(getRegForValue(V));
 655     return Addr.getReg() != 0;
 656   }
 657
 658   return false;
 659 }
 660
 661
 662 bool AArch64FastISel::isTypeLegal(Type *Ty, MVT &VT) {
 663   EVT evt = TLI.getValueType(Ty, true);
 664
 665   // Only handle simple types.
 666   if (evt == MVT::Other || !evt.isSimple())
 667     return false;
 668   VT = evt.getSimpleVT();
 669
 670   // This is a legal type, but it's not something we handle in fast-isel.
 671   if (VT == MVT::f128)
 672     return false;
 673
 674   // Handle all other legal types, i.e. a register that will directly hold this
 675   // value.
 676   return TLI.isTypeLegal(VT);
 677 }
 678
 679 bool AArch64FastISel::isLoadStoreTypeLegal(Type *Ty, MVT &VT) {
 680   if (isTypeLegal(Ty, VT))
 681     return true;
 682
 683   // If this is a type than can be sign or zero-extended to a basic operation
 684   // go ahead and accept it now. For stores, this reflects truncation.
 685   if (VT == MVT::i1 || VT == MVT::i8 || VT == MVT::i16)
 686     return true;
 687
 688   return false;
 689 }
 690
 691 /// \brief Determine if the value type is supported by FastISel.
 692 ///
 693 /// FastISel for AArch64 can handle more value types than are legal. This adds
 694 /// simple value type such as i1, i8, and i16.
 695 /// Vectors on the other side are not supported yet.
 696 bool AArch64FastISel::isTypeSupported(Type *Ty, MVT &VT) {
 697   if (Ty->isVectorTy())
 698     return false;
 699
 700   if (isTypeLegal(Ty, VT))
 701     return true;
 702
 703   // If this is a type than can be sign or zero-extended to a basic operation
 704   // go ahead and accept it now.
 705   if (VT == MVT::i1 || VT == MVT::i8 || VT == MVT::i16)
 706     return true;
 707
 708   return false;
 709 }
 710
 711 bool AArch64FastISel::isValueAvailable(const Value *V) const {
 712   if (!isa<Instruction>(V))
 713     return true;
 714
 715   const auto *I = cast<Instruction>(V);
 716   if (FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB)
 717     return true;
 718
 719   return false;
 720 }
 721
 722 bool AArch64FastISel::SimplifyAddress(Address &Addr, MVT VT) {
 723   unsigned ScaleFactor;
 724   switch (VT.SimpleTy) {
 725   default: return false;
 726   case MVT::i1:  // fall-through
 727   case MVT::i8:  ScaleFactor = 1; break;
 728   case MVT::i16: ScaleFactor = 2; break;
 729   case MVT::i32: // fall-through
 730   case MVT::f32: ScaleFactor = 4; break;
 731   case MVT::i64: // fall-through
 732   case MVT::f64: ScaleFactor = 8; break;
 733   }
 734
 735   bool ImmediateOffsetNeedsLowering = false;
 736   bool RegisterOffsetNeedsLowering = false;
 737   int64_t Offset = Addr.getOffset();
 738   if (((Offset < 0) || (Offset & (ScaleFactor - 1))) && !isInt<9>(Offset))
 739     ImmediateOffsetNeedsLowering = true;
 740   else if (Offset > 0 && !(Offset & (ScaleFactor - 1)) &&
 741            !isUInt<12>(Offset / ScaleFactor))
 742     ImmediateOffsetNeedsLowering = true;
 743
 744   // Cannot encode an offset register and an immediate offset in the same
 745   // instruction. Fold the immediate offset into the load/store instruction and
 746   // emit an additonal add to take care of the offset register.
 747   if (!ImmediateOffsetNeedsLowering && Addr.getOffset() && Addr.isRegBase() &&
 748       Addr.getOffsetReg())
 749     RegisterOffsetNeedsLowering = true;
 750
 751   // Cannot encode zero register as base.
 752   if (Addr.isRegBase() && Addr.getOffsetReg() && !Addr.getReg())
 753     RegisterOffsetNeedsLowering = true;
 754
 755   // If this is a stack pointer and the offset needs to be simplified then put
 756   // the alloca address into a register, set the base type back to register and
 757   // continue. This should almost never happen.
 758   if (ImmediateOffsetNeedsLowering && Addr.isFIBase()) {
 759     unsigned ResultReg = createResultReg(&AArch64::GPR64spRegClass);
 760     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADDXri),
 761             ResultReg)
 762       .addFrameIndex(Addr.getFI())
 763       .addImm(0)
 764       .addImm(0);
 765     Addr.setKind(Address::RegBase);
 766     Addr.setReg(ResultReg);
 767   }
 768
 769   if (RegisterOffsetNeedsLowering) {
 770     unsigned ResultReg = 0;
 771     if (Addr.getReg()) {
 772       if (Addr.getExtendType() == AArch64_AM::SXTW ||
 773           Addr.getExtendType() == AArch64_AM::UXTW   )
 774         ResultReg = emitAddSub_rx(/*UseAdd=*/true, MVT::i64, Addr.getReg(),
 775                                   /*TODO:IsKill=*/false, Addr.getOffsetReg(),
 776                                   /*TODO:IsKill=*/false, Addr.getExtendType(),
 777                                   Addr.getShift());
 778       else
 779         ResultReg = emitAddSub_rs(/*UseAdd=*/true, MVT::i64, Addr.getReg(),
 780                                   /*TODO:IsKill=*/false, Addr.getOffsetReg(),
 781                                   /*TODO:IsKill=*/false, AArch64_AM::LSL,
 782                                   Addr.getShift());
 783     } else {
 784       if (Addr.getExtendType() == AArch64_AM::UXTW)
 785         ResultReg = emitLSL_ri(MVT::i64, MVT::i32, Addr.getOffsetReg(),
 786                                /*Op0IsKill=*/false, Addr.getShift(),
 787                                /*IsZExt=*/true);
 788       else if (Addr.getExtendType() == AArch64_AM::SXTW)
 789         ResultReg = emitLSL_ri(MVT::i64, MVT::i32, Addr.getOffsetReg(),
 790                                /*Op0IsKill=*/false, Addr.getShift(),
 791                                /*IsZExt=*/false);
 792       else
 793         ResultReg = emitLSL_ri(MVT::i64, MVT::i64, Addr.getOffsetReg(),
 794                                /*Op0IsKill=*/false, Addr.getShift());
 795     }
 796     if (!ResultReg)
 797       return false;
 798
 799     Addr.setReg(ResultReg);
 800     Addr.setOffsetReg(0);
 801     Addr.setShift(0);
 802     Addr.setExtendType(AArch64_AM::InvalidShiftExtend);
 803   }
 804
 805   // Since the offset is too large for the load/store instruction get the
 806   // reg+offset into a register.
 807   if (ImmediateOffsetNeedsLowering) {
 808     unsigned ResultReg = 0;
 809     if (Addr.getReg())
 810       ResultReg = fastEmit_ri_(MVT::i64, ISD::ADD, Addr.getReg(),
 811                                /*IsKill=*/false, Offset, MVT::i64);
 812     else
 813       ResultReg = fastEmit_i(MVT::i64, MVT::i64, ISD::Constant, Offset);
 814
 815     if (!ResultReg)
 816       return false;
 817     Addr.setReg(ResultReg);
 818     Addr.setOffset(0);
 819   }
 820   return true;
 821 }
 822
 823 void AArch64FastISel::AddLoadStoreOperands(Address &Addr,
 824                                            const MachineInstrBuilder &MIB,
 825                                            unsigned Flags,
 826                                            unsigned ScaleFactor,
 827                                            MachineMemOperand *MMO) {
 828   int64_t Offset = Addr.getOffset() / ScaleFactor;
 829   // Frame base works a bit differently. Handle it separately.
 830   if (Addr.isFIBase()) {
 831     int FI = Addr.getFI();
 832     // FIXME: We shouldn't be using getObjectSize/getObjectAlignment.  The size
 833     // and alignment should be based on the VT.
 834     MMO = FuncInfo.MF->getMachineMemOperand(
 835       MachinePointerInfo::getFixedStack(FI, Offset), Flags,
 836       MFI.getObjectSize(FI), MFI.getObjectAlignment(FI));
 837     // Now add the rest of the operands.
 838     MIB.addFrameIndex(FI).addImm(Offset);
 839   } else {
 840     assert(Addr.isRegBase() && "Unexpected address kind.");
 841     const MCInstrDesc &II = MIB->getDesc();
 842     unsigned Idx = (Flags & MachineMemOperand::MOStore) ? 1 : 0;
 843     Addr.setReg(
 844       constrainOperandRegClass(II, Addr.getReg(), II.getNumDefs()+Idx));
 845     Addr.setOffsetReg(
 846       constrainOperandRegClass(II, Addr.getOffsetReg(), II.getNumDefs()+Idx+1));
 847     if (Addr.getOffsetReg()) {
 848       assert(Addr.getOffset() == 0 && "Unexpected offset");
 849       bool IsSigned = Addr.getExtendType() == AArch64_AM::SXTW ||
 850                       Addr.getExtendType() == AArch64_AM::SXTX;
 851       MIB.addReg(Addr.getReg());
 852       MIB.addReg(Addr.getOffsetReg());
 853       MIB.addImm(IsSigned);
 854       MIB.addImm(Addr.getShift() != 0);
 855     } else {
 856       MIB.addReg(Addr.getReg());
 857       MIB.addImm(Offset);
 858     }
 859   }
 860
 861   if (MMO)
 862     MIB.addMemOperand(MMO);
 863 }
 864
 865 unsigned AArch64FastISel::emitAddSub(bool UseAdd, MVT RetVT, const Value *LHS,
 866                                      const Value *RHS, bool SetFlags,
 867                                      bool WantResult,  bool IsZExt) {
 868   AArch64_AM::ShiftExtendType ExtendType = AArch64_AM::InvalidShiftExtend;
 869   bool NeedExtend = false;
 870   switch (RetVT.SimpleTy) {
 871   default:
 872     return 0;
 873   case MVT::i1:
 874     NeedExtend = true;
 875     break;
 876   case MVT::i8:
 877     NeedExtend = true;
 878     ExtendType = IsZExt ? AArch64_AM::UXTB : AArch64_AM::SXTB;
 879     break;
 880   case MVT::i16:
 881     NeedExtend = true;
 882     ExtendType = IsZExt ? AArch64_AM::UXTH : AArch64_AM::SXTH;
 883     break;
 884   case MVT::i32:  // fall-through
 885   case MVT::i64:
 886     break;
 887   }
 888   MVT SrcVT = RetVT;
 889   RetVT.SimpleTy = std::max(RetVT.SimpleTy, MVT::i32);
 890
 891   // Canonicalize immediates to the RHS first.
 892   if (UseAdd && isa<ConstantInt>(LHS) && !isa<ConstantInt>(RHS))
 893     std::swap(LHS, RHS);
 894
 895   // Canonicalize shift immediate to the RHS.
 896   if (UseAdd && isValueAvailable(LHS))
 897     if (const auto *SI = dyn_cast<BinaryOperator>(LHS))
 898       if (isa<ConstantInt>(SI->getOperand(1)))
 899         if (SI->getOpcode() == Instruction::Shl  ||
 900             SI->getOpcode() == Instruction::LShr ||
 901             SI->getOpcode() == Instruction::AShr   )
 902           std::swap(LHS, RHS);
 903
 904   unsigned LHSReg = getRegForValue(LHS);
 905   if (!LHSReg)
 906     return 0;
 907   bool LHSIsKill = hasTrivialKill(LHS);
 908
 909   if (NeedExtend)
 910     LHSReg = EmitIntExt(SrcVT, LHSReg, RetVT, IsZExt);
 911
 912   unsigned ResultReg = 0;
 913   if (const auto *C = dyn_cast<ConstantInt>(RHS)) {
 914     uint64_t Imm = IsZExt ? C->getZExtValue() : C->getSExtValue();
 915     if (C->isNegative())
 916       ResultReg = emitAddSub_ri(!UseAdd, RetVT, LHSReg, LHSIsKill, -Imm,
 917                                 SetFlags, WantResult);
 918     else
 919       ResultReg = emitAddSub_ri(UseAdd, RetVT, LHSReg, LHSIsKill, Imm, SetFlags,
 920                                 WantResult);
 921   }
 922   if (ResultReg)
 923     return ResultReg;
 924
 925   // Only extend the RHS within the instruction if there is a valid extend type.
 926   if (ExtendType != AArch64_AM::InvalidShiftExtend && isValueAvailable(RHS)) {
 927     if (const auto *SI = dyn_cast<BinaryOperator>(RHS))
 928       if (const auto *C = dyn_cast<ConstantInt>(SI->getOperand(1)))
 929         if ((SI->getOpcode() == Instruction::Shl) && (C->getZExtValue() < 4)) {
 930           unsigned RHSReg = getRegForValue(SI->getOperand(0));
 931           if (!RHSReg)
 932             return 0;
 933           bool RHSIsKill = hasTrivialKill(SI->getOperand(0));
 934           return emitAddSub_rx(UseAdd, RetVT, LHSReg, LHSIsKill, RHSReg,
 935                                RHSIsKill, ExtendType, C->getZExtValue(),
 936                                SetFlags, WantResult);
 937         }
 938     unsigned RHSReg = getRegForValue(RHS);
 939     if (!RHSReg)
 940       return 0;
 941     bool RHSIsKill = hasTrivialKill(RHS);
 942     return emitAddSub_rx(UseAdd, RetVT, LHSReg, LHSIsKill, RHSReg, RHSIsKill,
 943                          ExtendType, 0, SetFlags, WantResult);
 944   }
 945
 946   // Check if the shift can be folded into the instruction.
 947   if (isValueAvailable(RHS))
 948     if (const auto *SI = dyn_cast<BinaryOperator>(RHS)) {
 949       if (const auto *C = dyn_cast<ConstantInt>(SI->getOperand(1))) {
 950         AArch64_AM::ShiftExtendType ShiftType = AArch64_AM::InvalidShiftExtend;
 951         switch (SI->getOpcode()) {
 952         default: break;
 953         case Instruction::Shl:  ShiftType = AArch64_AM::LSL; break;
 954         case Instruction::LShr: ShiftType = AArch64_AM::LSR; break;
 955         case Instruction::AShr: ShiftType = AArch64_AM::ASR; break;
 956         }
 957         uint64_t ShiftVal = C->getZExtValue();
 958         if (ShiftType != AArch64_AM::InvalidShiftExtend) {
 959           unsigned RHSReg = getRegForValue(SI->getOperand(0));
 960           if (!RHSReg)
 961             return 0;
 962           bool RHSIsKill = hasTrivialKill(SI->getOperand(0));
 963           return emitAddSub_rs(UseAdd, RetVT, LHSReg, LHSIsKill, RHSReg,
 964                                RHSIsKill, ShiftType, ShiftVal, SetFlags,
 965                                WantResult);
 966         }
 967       }
 968     }
 969
 970   unsigned RHSReg = getRegForValue(RHS);
 971   if (!RHSReg)
 972     return 0;
 973   bool RHSIsKill = hasTrivialKill(RHS);
 974
 975   if (NeedExtend)
 976     RHSReg = EmitIntExt(SrcVT, RHSReg, RetVT, IsZExt);
 977
 978   return emitAddSub_rr(UseAdd, RetVT, LHSReg, LHSIsKill, RHSReg, RHSIsKill,
 979                        SetFlags, WantResult);
 980 }
 981
 982 unsigned AArch64FastISel::emitAddSub_rr(bool UseAdd, MVT RetVT, unsigned LHSReg,
 983                                         bool LHSIsKill, unsigned RHSReg,
 984                                         bool RHSIsKill, bool SetFlags,
 985                                         bool WantResult) {
 986   assert(LHSReg && RHSReg && "Invalid register number.");
 987
 988   if (RetVT != MVT::i32 && RetVT != MVT::i64)
 989     return 0;
 990
 991   static const unsigned OpcTable[2][2][2] = {
 992     { { AArch64::SUBWrr,  AArch64::SUBXrr  },
 993       { AArch64::ADDWrr,  AArch64::ADDXrr  }  },
 994     { { AArch64::SUBSWrr, AArch64::SUBSXrr },
 995       { AArch64::ADDSWrr, AArch64::ADDSXrr }  }
 996   };
 997   bool Is64Bit = RetVT == MVT::i64;
 998   unsigned Opc = OpcTable[SetFlags][UseAdd][Is64Bit];
 999   const TargetRegisterClass *RC =
1000       Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
1001   unsigned ResultReg;
1002   if (WantResult)
1003     ResultReg = createResultReg(RC);
1004   else
1005     ResultReg = Is64Bit ? AArch64::XZR : AArch64::WZR;
1006
1007   const MCInstrDesc &II = TII.get(Opc);
1008   LHSReg = constrainOperandRegClass(II, LHSReg, II.getNumDefs());
1009   RHSReg = constrainOperandRegClass(II, RHSReg, II.getNumDefs() + 1);
1010   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg)
1011       .addReg(LHSReg, getKillRegState(LHSIsKill))
1012       .addReg(RHSReg, getKillRegState(RHSIsKill));
1013   return ResultReg;
1014 }
1015
1016 unsigned AArch64FastISel::emitAddSub_ri(bool UseAdd, MVT RetVT, unsigned LHSReg,
1017                                         bool LHSIsKill, uint64_t Imm,
1018                                         bool SetFlags, bool WantResult) {
1019   assert(LHSReg && "Invalid register number.");
1020
1021   if (RetVT != MVT::i32 && RetVT != MVT::i64)
1022     return 0;
1023
1024   unsigned ShiftImm;
1025   if (isUInt<12>(Imm))
1026     ShiftImm = 0;
1027   else if ((Imm & 0xfff000) == Imm) {
1028     ShiftImm = 12;
1029     Imm >>= 12;
1030   } else
1031     return 0;
1032
1033   static const unsigned OpcTable[2][2][2] = {
1034     { { AArch64::SUBWri,  AArch64::SUBXri  },
1035       { AArch64::ADDWri,  AArch64::ADDXri  }  },
1036     { { AArch64::SUBSWri, AArch64::SUBSXri },
1037       { AArch64::ADDSWri, AArch64::ADDSXri }  }
1038   };
1039   bool Is64Bit = RetVT == MVT::i64;
1040   unsigned Opc = OpcTable[SetFlags][UseAdd][Is64Bit];
1041   const TargetRegisterClass *RC;
1042   if (SetFlags)
1043     RC = Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
1044   else
1045     RC = Is64Bit ? &AArch64::GPR64spRegClass : &AArch64::GPR32spRegClass;
1046   unsigned ResultReg;
1047   if (WantResult)
1048     ResultReg = createResultReg(RC);
1049   else
1050     ResultReg = Is64Bit ? AArch64::XZR : AArch64::WZR;
1051
1052   const MCInstrDesc &II = TII.get(Opc);
1053   LHSReg = constrainOperandRegClass(II, LHSReg, II.getNumDefs());
1054   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg)
1055       .addReg(LHSReg, getKillRegState(LHSIsKill))
1056       .addImm(Imm)
1057       .addImm(getShifterImm(AArch64_AM::LSL, ShiftImm));
1058   return ResultReg;
1059 }
1060
1061 unsigned AArch64FastISel::emitAddSub_rs(bool UseAdd, MVT RetVT, unsigned LHSReg,
1062                                         bool LHSIsKill, unsigned RHSReg,
1063                                         bool RHSIsKill,
1064                                         AArch64_AM::ShiftExtendType ShiftType,
1065                                         uint64_t ShiftImm, bool SetFlags,
1066                                         bool WantResult) {
1067   assert(LHSReg && RHSReg && "Invalid register number.");
1068
1069   if (RetVT != MVT::i32 && RetVT != MVT::i64)
1070     return 0;
1071
1072   static const unsigned OpcTable[2][2][2] = {
1073     { { AArch64::SUBWrs,  AArch64::SUBXrs  },
1074       { AArch64::ADDWrs,  AArch64::ADDXrs  }  },
1075     { { AArch64::SUBSWrs, AArch64::SUBSXrs },
1076       { AArch64::ADDSWrs, AArch64::ADDSXrs }  }
1077   };
1078   bool Is64Bit = RetVT == MVT::i64;
1079   unsigned Opc = OpcTable[SetFlags][UseAdd][Is64Bit];
1080   const TargetRegisterClass *RC =
1081       Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
1082   unsigned ResultReg;
1083   if (WantResult)
1084     ResultReg = createResultReg(RC);
1085   else
1086     ResultReg = Is64Bit ? AArch64::XZR : AArch64::WZR;
1087
1088   const MCInstrDesc &II = TII.get(Opc);
1089   LHSReg = constrainOperandRegClass(II, LHSReg, II.getNumDefs());
1090   RHSReg = constrainOperandRegClass(II, RHSReg, II.getNumDefs() + 1);
1091   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg)
1092       .addReg(LHSReg, getKillRegState(LHSIsKill))
1093       .addReg(RHSReg, getKillRegState(RHSIsKill))
1094       .addImm(getShifterImm(ShiftType, ShiftImm));
1095   return ResultReg;
1096 }
1097
1098 unsigned AArch64FastISel::emitAddSub_rx(bool UseAdd, MVT RetVT, unsigned LHSReg,
1099                                         bool LHSIsKill, unsigned RHSReg,
1100                                         bool RHSIsKill,
1101                                         AArch64_AM::ShiftExtendType ExtType,
1102                                         uint64_t ShiftImm, bool SetFlags,
1103                                         bool WantResult) {
1104   assert(LHSReg && RHSReg && "Invalid register number.");
1105
1106   if (RetVT != MVT::i32 && RetVT != MVT::i64)
1107     return 0;
1108
1109   static const unsigned OpcTable[2][2][2] = {
1110     { { AArch64::SUBWrx,  AArch64::SUBXrx  },
1111       { AArch64::ADDWrx,  AArch64::ADDXrx  }  },
1112     { { AArch64::SUBSWrx, AArch64::SUBSXrx },
1113       { AArch64::ADDSWrx, AArch64::ADDSXrx }  }
1114   };
1115   bool Is64Bit = RetVT == MVT::i64;
1116   unsigned Opc = OpcTable[SetFlags][UseAdd][Is64Bit];
1117   const TargetRegisterClass *RC = nullptr;
1118   if (SetFlags)
1119     RC = Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
1120   else
1121     RC = Is64Bit ? &AArch64::GPR64spRegClass : &AArch64::GPR32spRegClass;
1122   unsigned ResultReg;
1123   if (WantResult)
1124     ResultReg = createResultReg(RC);
1125   else
1126     ResultReg = Is64Bit ? AArch64::XZR : AArch64::WZR;
1127
1128   const MCInstrDesc &II = TII.get(Opc);
1129   LHSReg = constrainOperandRegClass(II, LHSReg, II.getNumDefs());
1130   RHSReg = constrainOperandRegClass(II, RHSReg, II.getNumDefs() + 1);
1131   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg)
1132       .addReg(LHSReg, getKillRegState(LHSIsKill))
1133       .addReg(RHSReg, getKillRegState(RHSIsKill))
1134       .addImm(getArithExtendImm(ExtType, ShiftImm));
1135   return ResultReg;
1136 }
1137
1138 bool AArch64FastISel::emitCmp(const Value *LHS, const Value *RHS, bool IsZExt) {
1139   Type *Ty = LHS->getType();
1140   EVT EVT = TLI.getValueType(Ty, true);
1141   if (!EVT.isSimple())
1142     return false;
1143   MVT VT = EVT.getSimpleVT();
1144
1145   switch (VT.SimpleTy) {
1146   default:
1147     return false;
1148   case MVT::i1:
1149   case MVT::i8:
1150   case MVT::i16:
1151   case MVT::i32:
1152   case MVT::i64:
1153     return emitICmp(VT, LHS, RHS, IsZExt);
1154   case MVT::f32:
1155   case MVT::f64:
1156     return emitFCmp(VT, LHS, RHS);
1157   }
1158 }
1159
1160 bool AArch64FastISel::emitICmp(MVT RetVT, const Value *LHS, const Value *RHS,
1161                                bool IsZExt) {
1162   return emitSub(RetVT, LHS, RHS, /*SetFlags=*/true, /*WantResult=*/false,
1163                  IsZExt) != 0;
1164 }
1165
1166 bool AArch64FastISel::emitICmp_ri(MVT RetVT, unsigned LHSReg, bool LHSIsKill,
1167                                   uint64_t Imm) {
1168   return emitAddSub_ri(/*UseAdd=*/false, RetVT, LHSReg, LHSIsKill, Imm,
1169                        /*SetFlags=*/true, /*WantResult=*/false) != 0;
1170 }
1171
1172 bool AArch64FastISel::emitFCmp(MVT RetVT, const Value *LHS, const Value *RHS) {
1173   if (RetVT != MVT::f32 && RetVT != MVT::f64)
1174     return false;
1175
1176   // Check to see if the 2nd operand is a constant that we can encode directly
1177   // in the compare.
1178   bool UseImm = false;
1179   if (const auto *CFP = dyn_cast<ConstantFP>(RHS))
1180     if (CFP->isZero() && !CFP->isNegative())
1181       UseImm = true;
1182
1183   unsigned LHSReg = getRegForValue(LHS);
1184   if (!LHSReg)
1185     return false;
1186   bool LHSIsKill = hasTrivialKill(LHS);
1187
1188   if (UseImm) {
1189     unsigned Opc = (RetVT == MVT::f64) ? AArch64::FCMPDri : AArch64::FCMPSri;
1190     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc))
1191         .addReg(LHSReg, getKillRegState(LHSIsKill));
1192     return true;
1193   }
1194
1195   unsigned RHSReg = getRegForValue(RHS);
1196   if (!RHSReg)
1197     return false;
1198   bool RHSIsKill = hasTrivialKill(RHS);
1199
1200   unsigned Opc = (RetVT == MVT::f64) ? AArch64::FCMPDrr : AArch64::FCMPSrr;
1201   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc))
1202       .addReg(LHSReg, getKillRegState(LHSIsKill))
1203       .addReg(RHSReg, getKillRegState(RHSIsKill));
1204   return true;
1205 }
1206
1207 unsigned AArch64FastISel::emitAdd(MVT RetVT, const Value *LHS, const Value *RHS,
1208                                   bool SetFlags, bool WantResult, bool IsZExt) {
1209   return emitAddSub(/*UseAdd=*/true, RetVT, LHS, RHS, SetFlags, WantResult,
1210                     IsZExt);
1211 }
1212
1213 unsigned AArch64FastISel::emitSub(MVT RetVT, const Value *LHS, const Value *RHS,
1214                                   bool SetFlags, bool WantResult, bool IsZExt) {
1215   return emitAddSub(/*UseAdd=*/false, RetVT, LHS, RHS, SetFlags, WantResult,
1216                     IsZExt);
1217 }
1218
1219 unsigned AArch64FastISel::emitSubs_rr(MVT RetVT, unsigned LHSReg,
1220                                       bool LHSIsKill, unsigned RHSReg,
1221                                       bool RHSIsKill, bool WantResult) {
1222   return emitAddSub_rr(/*UseAdd=*/false, RetVT, LHSReg, LHSIsKill, RHSReg,
1223                        RHSIsKill, /*SetFlags=*/true, WantResult);
1224 }
1225
1226 unsigned AArch64FastISel::emitSubs_rs(MVT RetVT, unsigned LHSReg,
1227                                       bool LHSIsKill, unsigned RHSReg,
1228                                       bool RHSIsKill,
1229                                       AArch64_AM::ShiftExtendType ShiftType,
1230                                       uint64_t ShiftImm, bool WantResult) {
1231   return emitAddSub_rs(/*UseAdd=*/false, RetVT, LHSReg, LHSIsKill, RHSReg,
1232                        RHSIsKill, ShiftType, ShiftImm, /*SetFlags=*/true,
1233                        WantResult);
1234 }
1235
1236 unsigned AArch64FastISel::emitLogicalOp(unsigned ISDOpc, MVT RetVT,
1237                                         const Value *LHS, const Value *RHS) {
1238   // Canonicalize immediates to the RHS first.
1239   if (isa<ConstantInt>(LHS) && !isa<ConstantInt>(RHS))
1240     std::swap(LHS, RHS);
1241
1242   // Canonicalize shift immediate to the RHS.
1243   if (isValueAvailable(LHS))
1244     if (const auto *SI = dyn_cast<BinaryOperator>(LHS))
1245       if (isa<ConstantInt>(SI->getOperand(1)))
1246         if (SI->getOpcode() == Instruction::Shl)
1247           std::swap(LHS, RHS);
1248
1249   unsigned LHSReg = getRegForValue(LHS);
1250   if (!LHSReg)
1251     return 0;
1252   bool LHSIsKill = hasTrivialKill(LHS);
1253
1254   unsigned ResultReg = 0;
1255   if (const auto *C = dyn_cast<ConstantInt>(RHS)) {
1256     uint64_t Imm = C->getZExtValue();
1257     ResultReg = emitLogicalOp_ri(ISDOpc, RetVT, LHSReg, LHSIsKill, Imm);
1258   }
1259   if (ResultReg)
1260     return ResultReg;
1261
1262   // Check if the shift can be folded into the instruction.
1263   if (isValueAvailable(RHS))
1264     if (const auto *SI = dyn_cast<BinaryOperator>(RHS))
1265       if (const auto *C = dyn_cast<ConstantInt>(SI->getOperand(1)))
1266         if (SI->getOpcode() == Instruction::Shl) {
1267           uint64_t ShiftVal = C->getZExtValue();
1268           unsigned RHSReg = getRegForValue(SI->getOperand(0));
1269           if (!RHSReg)
1270             return 0;
1271           bool RHSIsKill = hasTrivialKill(SI->getOperand(0));
1272           return emitLogicalOp_rs(ISDOpc, RetVT, LHSReg, LHSIsKill, RHSReg,
1273                                   RHSIsKill, ShiftVal);
1274         }
1275
1276   unsigned RHSReg = getRegForValue(RHS);
1277   if (!RHSReg)
1278     return 0;
1279   bool RHSIsKill = hasTrivialKill(RHS);
1280
1281   MVT VT = std::max(MVT::i32, RetVT.SimpleTy);
1282   ResultReg = fastEmit_rr(VT, VT, ISDOpc, LHSReg, LHSIsKill, RHSReg, RHSIsKill);
1283   if (RetVT >= MVT::i8 && RetVT <= MVT::i16) {
1284     uint64_t Mask = (RetVT == MVT::i8) ? 0xff : 0xffff;
1285     ResultReg = emitAnd_ri(MVT::i32, ResultReg, /*IsKill=*/true, Mask);
1286   }
1287   return ResultReg;
1288 }
1289
1290 unsigned AArch64FastISel::emitLogicalOp_ri(unsigned ISDOpc, MVT RetVT,
1291                                            unsigned LHSReg, bool LHSIsKill,
1292                                            uint64_t Imm) {
1293   assert((ISD::AND + 1 == ISD::OR) && (ISD::AND + 2 == ISD::XOR) &&
1294          "ISD nodes are not consecutive!");
1295   static const unsigned OpcTable[3][2] = {
1296     { AArch64::ANDWri, AArch64::ANDXri },
1297     { AArch64::ORRWri, AArch64::ORRXri },
1298     { AArch64::EORWri, AArch64::EORXri }
1299   };
1300   const TargetRegisterClass *RC;
1301   unsigned Opc;
1302   unsigned RegSize;
1303   switch (RetVT.SimpleTy) {
1304   default:
1305     return 0;
1306   case MVT::i1:
1307   case MVT::i8:
1308   case MVT::i16:
1309   case MVT::i32: {
1310     unsigned Idx = ISDOpc - ISD::AND;
1311     Opc = OpcTable[Idx][0];
1312     RC = &AArch64::GPR32spRegClass;
1313     RegSize = 32;
1314     break;
1315   }
1316   case MVT::i64:
1317     Opc = OpcTable[ISDOpc - ISD::AND][1];
1318     RC = &AArch64::GPR64spRegClass;
1319     RegSize = 64;
1320     break;
1321   }
1322
1323   if (!AArch64_AM::isLogicalImmediate(Imm, RegSize))
1324     return 0;
1325
1326   unsigned ResultReg =
1327       fastEmitInst_ri(Opc, RC, LHSReg, LHSIsKill,
1328                       AArch64_AM::encodeLogicalImmediate(Imm, RegSize));
1329   if (RetVT >= MVT::i8 && RetVT <= MVT::i16 && ISDOpc != ISD::AND) {
1330     uint64_t Mask = (RetVT == MVT::i8) ? 0xff : 0xffff;
1331     ResultReg = emitAnd_ri(MVT::i32, ResultReg, /*IsKill=*/true, Mask);
1332   }
1333   return ResultReg;
1334 }
1335
1336 unsigned AArch64FastISel::emitLogicalOp_rs(unsigned ISDOpc, MVT RetVT,
1337                                            unsigned LHSReg, bool LHSIsKill,
1338                                            unsigned RHSReg, bool RHSIsKill,
1339                                            uint64_t ShiftImm) {
1340   assert((ISD::AND + 1 == ISD::OR) && (ISD::AND + 2 == ISD::XOR) &&
1341          "ISD nodes are not consecutive!");
1342   static const unsigned OpcTable[3][2] = {
1343     { AArch64::ANDWrs, AArch64::ANDXrs },
1344     { AArch64::ORRWrs, AArch64::ORRXrs },
1345     { AArch64::EORWrs, AArch64::EORXrs }
1346   };
1347   const TargetRegisterClass *RC;
1348   unsigned Opc;
1349   switch (RetVT.SimpleTy) {
1350   default:
1351     return 0;
1352   case MVT::i1:
1353   case MVT::i8:
1354   case MVT::i16:
1355   case MVT::i32:
1356     Opc = OpcTable[ISDOpc - ISD::AND][0];
1357     RC = &AArch64::GPR32RegClass;
1358     break;
1359   case MVT::i64:
1360     Opc = OpcTable[ISDOpc - ISD::AND][1];
1361     RC = &AArch64::GPR64RegClass;
1362     break;
1363   }
1364   unsigned ResultReg =
1365       fastEmitInst_rri(Opc, RC, LHSReg, LHSIsKill, RHSReg, RHSIsKill,
1366                        AArch64_AM::getShifterImm(AArch64_AM::LSL, ShiftImm));
1367   if (RetVT >= MVT::i8 && RetVT <= MVT::i16) {
1368     uint64_t Mask = (RetVT == MVT::i8) ? 0xff : 0xffff;
1369     ResultReg = emitAnd_ri(MVT::i32, ResultReg, /*IsKill=*/true, Mask);
1370   }
1371   return ResultReg;
1372 }
1373
1374 unsigned AArch64FastISel::emitAnd_ri(MVT RetVT, unsigned LHSReg, bool LHSIsKill,
1375                                      uint64_t Imm) {
1376   return emitLogicalOp_ri(ISD::AND, RetVT, LHSReg, LHSIsKill, Imm);
1377 }
1378
1379 bool AArch64FastISel::EmitLoad(MVT VT, unsigned &ResultReg, Address Addr,
1380                                MachineMemOperand *MMO) {
1381   // Simplify this down to something we can handle.
1382   if (!SimplifyAddress(Addr, VT))
1383     return false;
1384
1385   unsigned ScaleFactor;
1386   switch (VT.SimpleTy) {
1387   default: llvm_unreachable("Unexpected value type.");
1388   case MVT::i1:  // fall-through
1389   case MVT::i8:  ScaleFactor = 1; break;
1390   case MVT::i16: ScaleFactor = 2; break;
1391   case MVT::i32: // fall-through
1392   case MVT::f32: ScaleFactor = 4; break;
1393   case MVT::i64: // fall-through
1394   case MVT::f64: ScaleFactor = 8; break;
1395   }
1396
1397   // Negative offsets require unscaled, 9-bit, signed immediate offsets.
1398   // Otherwise, we try using scaled, 12-bit, unsigned immediate offsets.
1399   bool UseScaled = true;
1400   if ((Addr.getOffset() < 0) || (Addr.getOffset() & (ScaleFactor - 1))) {
1401     UseScaled = false;
1402     ScaleFactor = 1;
1403   }
1404
1405   static const unsigned OpcTable[4][6] = {
1406     { AArch64::LDURBBi,  AArch64::LDURHHi,  AArch64::LDURWi,  AArch64::LDURXi,
1407       AArch64::LDURSi,   AArch64::LDURDi },
1408     { AArch64::LDRBBui,  AArch64::LDRHHui,  AArch64::LDRWui,  AArch64::LDRXui,
1409       AArch64::LDRSui,   AArch64::LDRDui },
1410     { AArch64::LDRBBroX, AArch64::LDRHHroX, AArch64::LDRWroX, AArch64::LDRXroX,
1411       AArch64::LDRSroX,  AArch64::LDRDroX },
1412     { AArch64::LDRBBroW, AArch64::LDRHHroW, AArch64::LDRWroW, AArch64::LDRXroW,
1413       AArch64::LDRSroW,  AArch64::LDRDroW }
1414   };
1415
1416   unsigned Opc;
1417   const TargetRegisterClass *RC;
1418   bool VTIsi1 = false;
1419   bool UseRegOffset = Addr.isRegBase() && !Addr.getOffset() && Addr.getReg() &&
1420                       Addr.getOffsetReg();
1421   unsigned Idx = UseRegOffset ? 2 : UseScaled ? 1 : 0;
1422   if (Addr.getExtendType() == AArch64_AM::UXTW ||
1423       Addr.getExtendType() == AArch64_AM::SXTW)
1424     Idx++;
1425
1426   switch (VT.SimpleTy) {
1427   default: llvm_unreachable("Unexpected value type.");
1428   case MVT::i1:  VTIsi1 = true; // Intentional fall-through.
1429   case MVT::i8:  Opc = OpcTable[Idx][0]; RC = &AArch64::GPR32RegClass; break;
1430   case MVT::i16: Opc = OpcTable[Idx][1]; RC = &AArch64::GPR32RegClass; break;
1431   case MVT::i32: Opc = OpcTable[Idx][2]; RC = &AArch64::GPR32RegClass; break;
1432   case MVT::i64: Opc = OpcTable[Idx][3]; RC = &AArch64::GPR64RegClass; break;
1433   case MVT::f32: Opc = OpcTable[Idx][4]; RC = &AArch64::FPR32RegClass; break;
1434   case MVT::f64: Opc = OpcTable[Idx][5]; RC = &AArch64::FPR64RegClass; break;
1435   }
1436
1437   // Create the base instruction, then add the operands.
1438   ResultReg = createResultReg(RC);
1439   MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
1440                                     TII.get(Opc), ResultReg);
1441   AddLoadStoreOperands(Addr, MIB, MachineMemOperand::MOLoad, ScaleFactor, MMO);
1442
1443   // Loading an i1 requires special handling.
1444   if (VTIsi1) {
1445     unsigned ANDReg = emitAnd_ri(MVT::i32, ResultReg, /*IsKill=*/true, 1);
1446     assert(ANDReg && "Unexpected AND instruction emission failure.");
1447     ResultReg = ANDReg;
1448   }
1449   return true;
1450 }
1451
1452 bool AArch64FastISel::selectAddSub(const Instruction *I) {
1453   MVT VT;
1454   if (!isTypeSupported(I->getType(), VT))
1455     return false;
1456
1457   unsigned ResultReg;
1458   if (I->getOpcode() == Instruction::Add)
1459     ResultReg = emitAdd(VT, I->getOperand(0), I->getOperand(1));
1460   else if (I->getOpcode() == Instruction::Sub)
1461     ResultReg = emitSub(VT, I->getOperand(0), I->getOperand(1));
1462   else
1463     llvm_unreachable("Unexpected instruction.");
1464
1465   assert(ResultReg && "Couldn't select Add/Sub instruction.");
1466   updateValueMap(I, ResultReg);
1467   return true;
1468 }
1469
1470 bool AArch64FastISel::selectLogicalOp(const Instruction *I, unsigned ISDOpc) {
1471   MVT VT;
1472   if (!isTypeSupported(I->getType(), VT))
1473     return false;
1474
1475   unsigned ResultReg =
1476       emitLogicalOp(ISDOpc, VT, I->getOperand(0), I->getOperand(1));
1477   if (!ResultReg)
1478     return false;
1479
1480   updateValueMap(I, ResultReg);
1481   return true;
1482 }
1483
1484 bool AArch64FastISel::SelectLoad(const Instruction *I) {
1485   MVT VT;
1486   // Verify we have a legal type before going any further.  Currently, we handle
1487   // simple types that will directly fit in a register (i32/f32/i64/f64) or
1488   // those that can be sign or zero-extended to a basic operation (i1/i8/i16).
1489   if (!isLoadStoreTypeLegal(I->getType(), VT) || cast<LoadInst>(I)->isAtomic())
1490     return false;
1491
1492   // See if we can handle this address.
1493   Address Addr;
1494   if (!ComputeAddress(I->getOperand(0), Addr, I->getType()))
1495     return false;
1496
1497   unsigned ResultReg;
1498   if (!EmitLoad(VT, ResultReg, Addr, createMachineMemOperandFor(I)))
1499     return false;
1500
1501   updateValueMap(I, ResultReg);
1502   return true;
1503 }
1504
1505 bool AArch64FastISel::EmitStore(MVT VT, unsigned SrcReg, Address Addr,
1506                                 MachineMemOperand *MMO) {
1507   // Simplify this down to something we can handle.
1508   if (!SimplifyAddress(Addr, VT))
1509     return false;
1510
1511   unsigned ScaleFactor;
1512   switch (VT.SimpleTy) {
1513   default: llvm_unreachable("Unexpected value type.");
1514   case MVT::i1:  // fall-through
1515   case MVT::i8:  ScaleFactor = 1; break;
1516   case MVT::i16: ScaleFactor = 2; break;
1517   case MVT::i32: // fall-through
1518   case MVT::f32: ScaleFactor = 4; break;
1519   case MVT::i64: // fall-through
1520   case MVT::f64: ScaleFactor = 8; break;
1521   }
1522
1523   // Negative offsets require unscaled, 9-bit, signed immediate offsets.
1524   // Otherwise, we try using scaled, 12-bit, unsigned immediate offsets.
1525   bool UseScaled = true;
1526   if ((Addr.getOffset() < 0) || (Addr.getOffset() & (ScaleFactor - 1))) {
1527     UseScaled = false;
1528     ScaleFactor = 1;
1529   }
1530
1531
1532   static const unsigned OpcTable[4][6] = {
1533     { AArch64::STURBBi,  AArch64::STURHHi,  AArch64::STURWi,  AArch64::STURXi,
1534       AArch64::STURSi,   AArch64::STURDi },
1535     { AArch64::STRBBui,  AArch64::STRHHui,  AArch64::STRWui,  AArch64::STRXui,
1536       AArch64::STRSui,   AArch64::STRDui },
1537     { AArch64::STRBBroX, AArch64::STRHHroX, AArch64::STRWroX, AArch64::STRXroX,
1538       AArch64::STRSroX,  AArch64::STRDroX },
1539     { AArch64::STRBBroW, AArch64::STRHHroW, AArch64::STRWroW, AArch64::STRXroW,
1540       AArch64::STRSroW,  AArch64::STRDroW }
1541
1542   };
1543
1544   unsigned Opc;
1545   bool VTIsi1 = false;
1546   bool UseRegOffset = Addr.isRegBase() && !Addr.getOffset() && Addr.getReg() &&
1547                       Addr.getOffsetReg();
1548   unsigned Idx = UseRegOffset ? 2 : UseScaled ? 1 : 0;
1549   if (Addr.getExtendType() == AArch64_AM::UXTW ||
1550       Addr.getExtendType() == AArch64_AM::SXTW)
1551     Idx++;
1552
1553   switch (VT.SimpleTy) {
1554   default: llvm_unreachable("Unexpected value type.");
1555   case MVT::i1:  VTIsi1 = true;
1556   case MVT::i8:  Opc = OpcTable[Idx][0]; break;
1557   case MVT::i16: Opc = OpcTable[Idx][1]; break;
1558   case MVT::i32: Opc = OpcTable[Idx][2]; break;
1559   case MVT::i64: Opc = OpcTable[Idx][3]; break;
1560   case MVT::f32: Opc = OpcTable[Idx][4]; break;
1561   case MVT::f64: Opc = OpcTable[Idx][5]; break;
1562   }
1563
1564   // Storing an i1 requires special handling.
1565   if (VTIsi1 && SrcReg != AArch64::WZR) {
1566     unsigned ANDReg = emitAnd_ri(MVT::i32, SrcReg, /*TODO:IsKill=*/false, 1);
1567     assert(ANDReg && "Unexpected AND instruction emission failure.");
1568     SrcReg = ANDReg;
1569   }
1570   // Create the base instruction, then add the operands.
1571   const MCInstrDesc &II = TII.get(Opc);
1572   SrcReg = constrainOperandRegClass(II, SrcReg, II.getNumDefs());
1573   MachineInstrBuilder MIB =
1574       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II).addReg(SrcReg);
1575   AddLoadStoreOperands(Addr, MIB, MachineMemOperand::MOStore, ScaleFactor, MMO);
1576
1577   return true;
1578 }
1579
1580 bool AArch64FastISel::SelectStore(const Instruction *I) {
1581   MVT VT;
1582   const Value *Op0 = I->getOperand(0);
1583   // Verify we have a legal type before going any further.  Currently, we handle
1584   // simple types that will directly fit in a register (i32/f32/i64/f64) or
1585   // those that can be sign or zero-extended to a basic operation (i1/i8/i16).
1586   if (!isLoadStoreTypeLegal(Op0->getType(), VT) ||
1587       cast<StoreInst>(I)->isAtomic())
1588     return false;
1589
1590   // Get the value to be stored into a register. Use the zero register directly
1591   // when possible to avoid an unnecessary copy and a wasted register.
1592   unsigned SrcReg = 0;
1593   if (const auto *CI = dyn_cast<ConstantInt>(Op0)) {
1594     if (CI->isZero())
1595       SrcReg = (VT == MVT::i64) ? AArch64::XZR : AArch64::WZR;
1596   } else if (const auto *CF = dyn_cast<ConstantFP>(Op0)) {
1597     if (CF->isZero() && !CF->isNegative()) {
1598       VT = MVT::getIntegerVT(VT.getSizeInBits());
1599       SrcReg = (VT == MVT::i64) ? AArch64::XZR : AArch64::WZR;
1600     }
1601   }
1602
1603   if (!SrcReg)
1604     SrcReg = getRegForValue(Op0);
1605
1606   if (!SrcReg)
1607     return false;
1608
1609   // See if we can handle this address.
1610   Address Addr;
1611   if (!ComputeAddress(I->getOperand(1), Addr, I->getOperand(0)->getType()))
1612     return false;
1613
1614   if (!EmitStore(VT, SrcReg, Addr, createMachineMemOperandFor(I)))
1615     return false;
1616   return true;
1617 }
1618
1619 static AArch64CC::CondCode getCompareCC(CmpInst::Predicate Pred) {
1620   switch (Pred) {
1621   case CmpInst::FCMP_ONE:
1622   case CmpInst::FCMP_UEQ:
1623   default:
1624     // AL is our "false" for now. The other two need more compares.
1625     return AArch64CC::AL;
1626   case CmpInst::ICMP_EQ:
1627   case CmpInst::FCMP_OEQ:
1628     return AArch64CC::EQ;
1629   case CmpInst::ICMP_SGT:
1630   case CmpInst::FCMP_OGT:
1631     return AArch64CC::GT;
1632   case CmpInst::ICMP_SGE:
1633   case CmpInst::FCMP_OGE:
1634     return AArch64CC::GE;
1635   case CmpInst::ICMP_UGT:
1636   case CmpInst::FCMP_UGT:
1637     return AArch64CC::HI;
1638   case CmpInst::FCMP_OLT:
1639     return AArch64CC::MI;
1640   case CmpInst::ICMP_ULE:
1641   case CmpInst::FCMP_OLE:
1642     return AArch64CC::LS;
1643   case CmpInst::FCMP_ORD:
1644     return AArch64CC::VC;
1645   case CmpInst::FCMP_UNO:
1646     return AArch64CC::VS;
1647   case CmpInst::FCMP_UGE:
1648     return AArch64CC::PL;
1649   case CmpInst::ICMP_SLT:
1650   case CmpInst::FCMP_ULT:
1651     return AArch64CC::LT;
1652   case CmpInst::ICMP_SLE:
1653   case CmpInst::FCMP_ULE:
1654     return AArch64CC::LE;
1655   case CmpInst::FCMP_UNE:
1656   case CmpInst::ICMP_NE:
1657     return AArch64CC::NE;
1658   case CmpInst::ICMP_UGE:
1659     return AArch64CC::HS;
1660   case CmpInst::ICMP_ULT:
1661     return AArch64CC::LO;
1662   }
1663 }
1664
1665 bool AArch64FastISel::SelectBranch(const Instruction *I) {
1666   const BranchInst *BI = cast<BranchInst>(I);
1667   if (BI->isUnconditional()) {
1668     MachineBasicBlock *MSucc = FuncInfo.MBBMap[BI->getSuccessor(0)];
1669     fastEmitBranch(MSucc, BI->getDebugLoc());
1670     return true;
1671   }
1672
1673   MachineBasicBlock *TBB = FuncInfo.MBBMap[BI->getSuccessor(0)];
1674   MachineBasicBlock *FBB = FuncInfo.MBBMap[BI->getSuccessor(1)];
1675
1676   AArch64CC::CondCode CC = AArch64CC::NE;
1677   if (const CmpInst *CI = dyn_cast<CmpInst>(BI->getCondition())) {
1678     if (CI->hasOneUse() && (CI->getParent() == I->getParent())) {
1679       // We may not handle every CC for now.
1680       CC = getCompareCC(CI->getPredicate());
1681       if (CC == AArch64CC::AL)
1682         return false;
1683
1684       // Emit the cmp.
1685       if (!emitCmp(CI->getOperand(0), CI->getOperand(1), CI->isUnsigned()))
1686         return false;
1687
1688       // Emit the branch.
1689       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::Bcc))
1690           .addImm(CC)
1691           .addMBB(TBB);
1692
1693       // Obtain the branch weight and add the TrueBB to the successor list.
1694       uint32_t BranchWeight = 0;
1695       if (FuncInfo.BPI)
1696         BranchWeight = FuncInfo.BPI->getEdgeWeight(BI->getParent(),
1697                                                   TBB->getBasicBlock());
1698       FuncInfo.MBB->addSuccessor(TBB, BranchWeight);
1699
1700       fastEmitBranch(FBB, DbgLoc);
1701       return true;
1702     }
1703   } else if (TruncInst *TI = dyn_cast<TruncInst>(BI->getCondition())) {
1704     MVT SrcVT;
1705     if (TI->hasOneUse() && TI->getParent() == I->getParent() &&
1706         (isTypeSupported(TI->getOperand(0)->getType(), SrcVT))) {
1707       unsigned CondReg = getRegForValue(TI->getOperand(0));
1708       if (!CondReg)
1709         return false;
1710       bool CondIsKill = hasTrivialKill(TI->getOperand(0));
1711
1712       // Issue an extract_subreg to get the lower 32-bits.
1713       if (SrcVT == MVT::i64) {
1714         CondReg = fastEmitInst_extractsubreg(MVT::i32, CondReg, CondIsKill,
1715                                              AArch64::sub_32);
1716         CondIsKill = true;
1717       }
1718
1719       unsigned ANDReg = emitAnd_ri(MVT::i32, CondReg, CondIsKill, 1);
1720       assert(ANDReg && "Unexpected AND instruction emission failure.");
1721       emitICmp_ri(MVT::i32, ANDReg, /*IsKill=*/true, 0);
1722
1723       if (FuncInfo.MBB->isLayoutSuccessor(TBB)) {
1724         std::swap(TBB, FBB);
1725         CC = AArch64CC::EQ;
1726       }
1727       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::Bcc))
1728           .addImm(CC)
1729           .addMBB(TBB);
1730
1731       // Obtain the branch weight and add the TrueBB to the successor list.
1732       uint32_t BranchWeight = 0;
1733       if (FuncInfo.BPI)
1734         BranchWeight = FuncInfo.BPI->getEdgeWeight(BI->getParent(),
1735                                                   TBB->getBasicBlock());
1736       FuncInfo.MBB->addSuccessor(TBB, BranchWeight);
1737
1738       fastEmitBranch(FBB, DbgLoc);
1739       return true;
1740     }
1741   } else if (const ConstantInt *CI =
1742                  dyn_cast<ConstantInt>(BI->getCondition())) {
1743     uint64_t Imm = CI->getZExtValue();
1744     MachineBasicBlock *Target = (Imm == 0) ? FBB : TBB;
1745     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::B))
1746         .addMBB(Target);
1747
1748     // Obtain the branch weight and add the target to the successor list.
1749     uint32_t BranchWeight = 0;
1750     if (FuncInfo.BPI)
1751       BranchWeight = FuncInfo.BPI->getEdgeWeight(BI->getParent(),
1752                                                  Target->getBasicBlock());
1753     FuncInfo.MBB->addSuccessor(Target, BranchWeight);
1754     return true;
1755   } else if (foldXALUIntrinsic(CC, I, BI->getCondition())) {
1756     // Fake request the condition, otherwise the intrinsic might be completely
1757     // optimized away.
1758     unsigned CondReg = getRegForValue(BI->getCondition());
1759     if (!CondReg)
1760       return false;
1761
1762     // Emit the branch.
1763     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::Bcc))
1764       .addImm(CC)
1765       .addMBB(TBB);
1766
1767     // Obtain the branch weight and add the TrueBB to the successor list.
1768     uint32_t BranchWeight = 0;
1769     if (FuncInfo.BPI)
1770       BranchWeight = FuncInfo.BPI->getEdgeWeight(BI->getParent(),
1771                                                  TBB->getBasicBlock());
1772     FuncInfo.MBB->addSuccessor(TBB, BranchWeight);
1773
1774     fastEmitBranch(FBB, DbgLoc);
1775     return true;
1776   }
1777
1778   unsigned CondReg = getRegForValue(BI->getCondition());
1779   if (CondReg == 0)
1780     return false;
1781   bool CondRegIsKill = hasTrivialKill(BI->getCondition());
1782
1783   // We've been divorced from our compare!  Our block was split, and
1784   // now our compare lives in a predecessor block.  We musn't
1785   // re-compare here, as the children of the compare aren't guaranteed
1786   // live across the block boundary (we *could* check for this).
1787   // Regardless, the compare has been done in the predecessor block,
1788   // and it left a value for us in a virtual register.  Ergo, we test
1789   // the one-bit value left in the virtual register.
1790   emitICmp_ri(MVT::i32, CondReg, CondRegIsKill, 0);
1791
1792   if (FuncInfo.MBB->isLayoutSuccessor(TBB)) {
1793     std::swap(TBB, FBB);
1794     CC = AArch64CC::EQ;
1795   }
1796
1797   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::Bcc))
1798       .addImm(CC)
1799       .addMBB(TBB);
1800
1801   // Obtain the branch weight and add the TrueBB to the successor list.
1802   uint32_t BranchWeight = 0;
1803   if (FuncInfo.BPI)
1804     BranchWeight = FuncInfo.BPI->getEdgeWeight(BI->getParent(),
1805                                                TBB->getBasicBlock());
1806   FuncInfo.MBB->addSuccessor(TBB, BranchWeight);
1807
1808   fastEmitBranch(FBB, DbgLoc);
1809   return true;
1810 }
1811
1812 bool AArch64FastISel::SelectIndirectBr(const Instruction *I) {
1813   const IndirectBrInst *BI = cast<IndirectBrInst>(I);
1814   unsigned AddrReg = getRegForValue(BI->getOperand(0));
1815   if (AddrReg == 0)
1816     return false;
1817
1818   // Emit the indirect branch.
1819   const MCInstrDesc &II = TII.get(AArch64::BR);
1820   AddrReg = constrainOperandRegClass(II, AddrReg,  II.getNumDefs());
1821   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II).addReg(AddrReg);
1822
1823   // Make sure the CFG is up-to-date.
1824   for (unsigned i = 0, e = BI->getNumSuccessors(); i != e; ++i)
1825     FuncInfo.MBB->addSuccessor(FuncInfo.MBBMap[BI->getSuccessor(i)]);
1826
1827   return true;
1828 }
1829
1830 bool AArch64FastISel::SelectCmp(const Instruction *I) {
1831   const CmpInst *CI = cast<CmpInst>(I);
1832
1833   // We may not handle every CC for now.
1834   AArch64CC::CondCode CC = getCompareCC(CI->getPredicate());
1835   if (CC == AArch64CC::AL)
1836     return false;
1837
1838   // Emit the cmp.
1839   if (!emitCmp(CI->getOperand(0), CI->getOperand(1), CI->isUnsigned()))
1840     return false;
1841
1842   // Now set a register based on the comparison.
1843   AArch64CC::CondCode invertedCC = getInvertedCondCode(CC);
1844   unsigned ResultReg = createResultReg(&AArch64::GPR32RegClass);
1845   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::CSINCWr),
1846           ResultReg)
1847       .addReg(AArch64::WZR)
1848       .addReg(AArch64::WZR)
1849       .addImm(invertedCC);
1850
1851   updateValueMap(I, ResultReg);
1852   return true;
1853 }
1854
1855 bool AArch64FastISel::SelectSelect(const Instruction *I) {
1856   const SelectInst *SI = cast<SelectInst>(I);
1857
1858   EVT DestEVT = TLI.getValueType(SI->getType(), true);
1859   if (!DestEVT.isSimple())
1860     return false;
1861
1862   MVT DestVT = DestEVT.getSimpleVT();
1863   if (DestVT != MVT::i32 && DestVT != MVT::i64 && DestVT != MVT::f32 &&
1864       DestVT != MVT::f64)
1865     return false;
1866
1867   unsigned SelectOpc;
1868   const TargetRegisterClass *RC = nullptr;
1869   switch (DestVT.SimpleTy) {
1870   default: return false;
1871   case MVT::i32:
1872     SelectOpc = AArch64::CSELWr;    RC = &AArch64::GPR32RegClass; break;
1873   case MVT::i64:
1874     SelectOpc = AArch64::CSELXr;    RC = &AArch64::GPR64RegClass; break;
1875   case MVT::f32:
1876     SelectOpc = AArch64::FCSELSrrr; RC = &AArch64::FPR32RegClass; break;
1877   case MVT::f64:
1878     SelectOpc = AArch64::FCSELDrrr; RC = &AArch64::FPR64RegClass; break;
1879   }
1880
1881   const Value *Cond = SI->getCondition();
1882   bool NeedTest = true;
1883   AArch64CC::CondCode CC = AArch64CC::NE;
1884   if (foldXALUIntrinsic(CC, I, Cond))
1885     NeedTest = false;
1886
1887   unsigned CondReg = getRegForValue(Cond);
1888   if (!CondReg)
1889     return false;
1890   bool CondIsKill = hasTrivialKill(Cond);
1891
1892   if (NeedTest) {
1893     unsigned ANDReg = emitAnd_ri(MVT::i32, CondReg, CondIsKill, 1);
1894     assert(ANDReg && "Unexpected AND instruction emission failure.");
1895     emitICmp_ri(MVT::i32, ANDReg, /*IsKill=*/true, 0);
1896   }
1897
1898   unsigned TrueReg = getRegForValue(SI->getTrueValue());
1899   bool TrueIsKill = hasTrivialKill(SI->getTrueValue());
1900
1901   unsigned FalseReg = getRegForValue(SI->getFalseValue());
1902   bool FalseIsKill = hasTrivialKill(SI->getFalseValue());
1903
1904   if (!TrueReg || !FalseReg)
1905     return false;
1906
1907   unsigned ResultReg = fastEmitInst_rri(SelectOpc, RC, TrueReg, TrueIsKill,
1908                                         FalseReg, FalseIsKill, CC);
1909   updateValueMap(I, ResultReg);
1910   return true;
1911 }
1912
1913 bool AArch64FastISel::SelectFPExt(const Instruction *I) {
1914   Value *V = I->getOperand(0);
1915   if (!I->getType()->isDoubleTy() || !V->getType()->isFloatTy())
1916     return false;
1917
1918   unsigned Op = getRegForValue(V);
1919   if (Op == 0)
1920     return false;
1921
1922   unsigned ResultReg = createResultReg(&AArch64::FPR64RegClass);
1923   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::FCVTDSr),
1924           ResultReg).addReg(Op);
1925   updateValueMap(I, ResultReg);
1926   return true;
1927 }
1928
1929 bool AArch64FastISel::SelectFPTrunc(const Instruction *I) {
1930   Value *V = I->getOperand(0);
1931   if (!I->getType()->isFloatTy() || !V->getType()->isDoubleTy())
1932     return false;
1933
1934   unsigned Op = getRegForValue(V);
1935   if (Op == 0)
1936     return false;
1937
1938   unsigned ResultReg = createResultReg(&AArch64::FPR32RegClass);
1939   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::FCVTSDr),
1940           ResultReg).addReg(Op);
1941   updateValueMap(I, ResultReg);
1942   return true;
1943 }
1944
1945 // FPToUI and FPToSI
1946 bool AArch64FastISel::SelectFPToInt(const Instruction *I, bool Signed) {
1947   MVT DestVT;
1948   if (!isTypeLegal(I->getType(), DestVT) || DestVT.isVector())
1949     return false;
1950
1951   unsigned SrcReg = getRegForValue(I->getOperand(0));
1952   if (SrcReg == 0)
1953     return false;
1954
1955   EVT SrcVT = TLI.getValueType(I->getOperand(0)->getType(), true);
1956   if (SrcVT == MVT::f128)
1957     return false;
1958
1959   unsigned Opc;
1960   if (SrcVT == MVT::f64) {
1961     if (Signed)
1962       Opc = (DestVT == MVT::i32) ? AArch64::FCVTZSUWDr : AArch64::FCVTZSUXDr;
1963     else
1964       Opc = (DestVT == MVT::i32) ? AArch64::FCVTZUUWDr : AArch64::FCVTZUUXDr;
1965   } else {
1966     if (Signed)
1967       Opc = (DestVT == MVT::i32) ? AArch64::FCVTZSUWSr : AArch64::FCVTZSUXSr;
1968     else
1969       Opc = (DestVT == MVT::i32) ? AArch64::FCVTZUUWSr : AArch64::FCVTZUUXSr;
1970   }
1971   unsigned ResultReg = createResultReg(
1972       DestVT == MVT::i32 ? &AArch64::GPR32RegClass : &AArch64::GPR64RegClass);
1973   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg)
1974       .addReg(SrcReg);
1975   updateValueMap(I, ResultReg);
1976   return true;
1977 }
1978
1979 bool AArch64FastISel::SelectIntToFP(const Instruction *I, bool Signed) {
1980   MVT DestVT;
1981   if (!isTypeLegal(I->getType(), DestVT) || DestVT.isVector())
1982     return false;
1983   assert ((DestVT == MVT::f32 || DestVT == MVT::f64) &&
1984           "Unexpected value type.");
1985
1986   unsigned SrcReg = getRegForValue(I->getOperand(0));
1987   if (!SrcReg)
1988     return false;
1989   bool SrcIsKill = hasTrivialKill(I->getOperand(0));
1990
1991   EVT SrcVT = TLI.getValueType(I->getOperand(0)->getType(), true);
1992
1993   // Handle sign-extension.
1994   if (SrcVT == MVT::i16 || SrcVT == MVT::i8 || SrcVT == MVT::i1) {
1995     SrcReg =
1996         EmitIntExt(SrcVT.getSimpleVT(), SrcReg, MVT::i32, /*isZExt*/ !Signed);
1997     if (!SrcReg)
1998       return false;
1999     SrcIsKill = true;
2000   }
2001
2002   unsigned Opc;
2003   if (SrcVT == MVT::i64) {
2004     if (Signed)
2005       Opc = (DestVT == MVT::f32) ? AArch64::SCVTFUXSri : AArch64::SCVTFUXDri;
2006     else
2007       Opc = (DestVT == MVT::f32) ? AArch64::UCVTFUXSri : AArch64::UCVTFUXDri;
2008   } else {
2009     if (Signed)
2010       Opc = (DestVT == MVT::f32) ? AArch64::SCVTFUWSri : AArch64::SCVTFUWDri;
2011     else
2012       Opc = (DestVT == MVT::f32) ? AArch64::UCVTFUWSri : AArch64::UCVTFUWDri;
2013   }
2014
2015   unsigned ResultReg = fastEmitInst_r(Opc, TLI.getRegClassFor(DestVT), SrcReg,
2016                                       SrcIsKill);
2017   updateValueMap(I, ResultReg);
2018   return true;
2019 }
2020
2021 bool AArch64FastISel::fastLowerArguments() {
2022   if (!FuncInfo.CanLowerReturn)
2023     return false;
2024
2025   const Function *F = FuncInfo.Fn;
2026   if (F->isVarArg())
2027     return false;
2028
2029   CallingConv::ID CC = F->getCallingConv();
2030   if (CC != CallingConv::C)
2031     return false;
2032
2033   // Only handle simple cases like i1/i8/i16/i32/i64/f32/f64 of up to 8 GPR and
2034   // FPR each.
2035   unsigned GPRCnt = 0;
2036   unsigned FPRCnt = 0;
2037   unsigned Idx = 0;
2038   for (auto const &Arg : F->args()) {
2039     // The first argument is at index 1.
2040     ++Idx;
2041     if (F->getAttributes().hasAttribute(Idx, Attribute::ByVal) ||
2042         F->getAttributes().hasAttribute(Idx, Attribute::InReg) ||
2043         F->getAttributes().hasAttribute(Idx, Attribute::StructRet) ||
2044         F->getAttributes().hasAttribute(Idx, Attribute::Nest))
2045       return false;
2046
2047     Type *ArgTy = Arg.getType();
2048     if (ArgTy->isStructTy() || ArgTy->isArrayTy() || ArgTy->isVectorTy())
2049       return false;
2050
2051     EVT ArgVT = TLI.getValueType(ArgTy);
2052     if (!ArgVT.isSimple()) return false;
2053     switch (ArgVT.getSimpleVT().SimpleTy) {
2054     default: return false;
2055     case MVT::i1:
2056     case MVT::i8:
2057     case MVT::i16:
2058     case MVT::i32:
2059     case MVT::i64:
2060       ++GPRCnt;
2061       break;
2062     case MVT::f16:
2063     case MVT::f32:
2064     case MVT::f64:
2065       ++FPRCnt;
2066       break;
2067     }
2068
2069     if (GPRCnt > 8 || FPRCnt > 8)
2070       return false;
2071   }
2072
2073   static const MCPhysReg Registers[5][8] = {
2074     { AArch64::W0, AArch64::W1, AArch64::W2, AArch64::W3, AArch64::W4,
2075       AArch64::W5, AArch64::W6, AArch64::W7 },
2076     { AArch64::X0, AArch64::X1, AArch64::X2, AArch64::X3, AArch64::X4,
2077       AArch64::X5, AArch64::X6, AArch64::X7 },
2078     { AArch64::H0, AArch64::H1, AArch64::H2, AArch64::H3, AArch64::H4,
2079       AArch64::H5, AArch64::H6, AArch64::H7 },
2080     { AArch64::S0, AArch64::S1, AArch64::S2, AArch64::S3, AArch64::S4,
2081       AArch64::S5, AArch64::S6, AArch64::S7 },
2082     { AArch64::D0, AArch64::D1, AArch64::D2, AArch64::D3, AArch64::D4,
2083       AArch64::D5, AArch64::D6, AArch64::D7 }
2084   };
2085
2086   unsigned GPRIdx = 0;
2087   unsigned FPRIdx = 0;
2088   for (auto const &Arg : F->args()) {
2089     MVT VT = TLI.getSimpleValueType(Arg.getType());
2090     unsigned SrcReg;
2091     const TargetRegisterClass *RC = nullptr;
2092     switch (VT.SimpleTy) {
2093     default: llvm_unreachable("Unexpected value type.");
2094     case MVT::i1:
2095     case MVT::i8:
2096     case MVT::i16: VT = MVT::i32; // fall-through
2097     case MVT::i32:
2098       SrcReg = Registers[0][GPRIdx++]; RC = &AArch64::GPR32RegClass; break;
2099     case MVT::i64:
2100       SrcReg = Registers[1][GPRIdx++]; RC = &AArch64::GPR64RegClass; break;
2101     case MVT::f16:
2102       SrcReg = Registers[2][FPRIdx++]; RC = &AArch64::FPR16RegClass; break;
2103     case MVT::f32:
2104       SrcReg = Registers[3][FPRIdx++]; RC = &AArch64::FPR32RegClass; break;
2105     case MVT::f64:
2106       SrcReg = Registers[4][FPRIdx++]; RC = &AArch64::FPR64RegClass; break;
2107     }
2108
2109     // Skip unused arguments.
2110     if (Arg.use_empty()) {
2111       updateValueMap(&Arg, 0);
2112       continue;
2113     }
2114
2115     unsigned DstReg = FuncInfo.MF->addLiveIn(SrcReg, RC);
2116     // FIXME: Unfortunately it's necessary to emit a copy from the livein copy.
2117     // Without this, EmitLiveInCopies may eliminate the livein if its only
2118     // use is a bitcast (which isn't turned into an instruction).
2119     unsigned ResultReg = createResultReg(RC);
2120     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2121             TII.get(TargetOpcode::COPY), ResultReg)
2122         .addReg(DstReg, getKillRegState(true));
2123     updateValueMap(&Arg, ResultReg);
2124   }
2125   return true;
2126 }
2127
2128 bool AArch64FastISel::ProcessCallArgs(CallLoweringInfo &CLI,
2129                                       SmallVectorImpl<MVT> &OutVTs,
2130                                       unsigned &NumBytes) {
2131   CallingConv::ID CC = CLI.CallConv;
2132   SmallVector<CCValAssign, 16> ArgLocs;
2133   CCState CCInfo(CC, false, *FuncInfo.MF, ArgLocs, *Context);
2134   CCInfo.AnalyzeCallOperands(OutVTs, CLI.OutFlags, CCAssignFnForCall(CC));
2135
2136   // Get a count of how many bytes are to be pushed on the stack.
2137   NumBytes = CCInfo.getNextStackOffset();
2138
2139   // Issue CALLSEQ_START
2140   unsigned AdjStackDown = TII.getCallFrameSetupOpcode();
2141   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AdjStackDown))
2142     .addImm(NumBytes);
2143
2144   // Process the args.
2145   for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
2146     CCValAssign &VA = ArgLocs[i];
2147     const Value *ArgVal = CLI.OutVals[VA.getValNo()];
2148     MVT ArgVT = OutVTs[VA.getValNo()];
2149
2150     unsigned ArgReg = getRegForValue(ArgVal);
2151     if (!ArgReg)
2152       return false;
2153
2154     // Handle arg promotion: SExt, ZExt, AExt.
2155     switch (VA.getLocInfo()) {
2156     case CCValAssign::Full:
2157       break;
2158     case CCValAssign::SExt: {
2159       MVT DestVT = VA.getLocVT();
2160       MVT SrcVT = ArgVT;
2161       ArgReg = EmitIntExt(SrcVT, ArgReg, DestVT, /*isZExt=*/false);
2162       if (!ArgReg)
2163         return false;
2164       break;
2165     }
2166     case CCValAssign::AExt:
2167     // Intentional fall-through.
2168     case CCValAssign::ZExt: {
2169       MVT DestVT = VA.getLocVT();
2170       MVT SrcVT = ArgVT;
2171       ArgReg = EmitIntExt(SrcVT, ArgReg, DestVT, /*isZExt=*/true);
2172       if (!ArgReg)
2173         return false;
2174       break;
2175     }
2176     default:
2177       llvm_unreachable("Unknown arg promotion!");
2178     }
2179
2180     // Now copy/store arg to correct locations.
2181     if (VA.isRegLoc() && !VA.needsCustom()) {
2182       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2183               TII.get(TargetOpcode::COPY), VA.getLocReg()).addReg(ArgReg);
2184       CLI.OutRegs.push_back(VA.getLocReg());
2185     } else if (VA.needsCustom()) {
2186       // FIXME: Handle custom args.
2187       return false;
2188     } else {
2189       assert(VA.isMemLoc() && "Assuming store on stack.");
2190
2191       // Don't emit stores for undef values.
2192       if (isa<UndefValue>(ArgVal))
2193         continue;
2194
2195       // Need to store on the stack.
2196       unsigned ArgSize = (ArgVT.getSizeInBits() + 7) / 8;
2197
2198       unsigned BEAlign = 0;
2199       if (ArgSize < 8 && !Subtarget->isLittleEndian())
2200         BEAlign = 8 - ArgSize;
2201
2202       Address Addr;
2203       Addr.setKind(Address::RegBase);
2204       Addr.setReg(AArch64::SP);
2205       Addr.setOffset(VA.getLocMemOffset() + BEAlign);
2206
2207       unsigned Alignment = DL.getABITypeAlignment(ArgVal->getType());
2208       MachineMemOperand *MMO = FuncInfo.MF->getMachineMemOperand(
2209         MachinePointerInfo::getStack(Addr.getOffset()),
2210         MachineMemOperand::MOStore, ArgVT.getStoreSize(), Alignment);
2211
2212       if (!EmitStore(ArgVT, ArgReg, Addr, MMO))
2213         return false;
2214     }
2215   }
2216   return true;
2217 }
2218
2219 bool AArch64FastISel::FinishCall(CallLoweringInfo &CLI, MVT RetVT,
2220                                  unsigned NumBytes) {
2221   CallingConv::ID CC = CLI.CallConv;
2222
2223   // Issue CALLSEQ_END
2224   unsigned AdjStackUp = TII.getCallFrameDestroyOpcode();
2225   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AdjStackUp))
2226     .addImm(NumBytes).addImm(0);
2227
2228   // Now the return value.
2229   if (RetVT != MVT::isVoid) {
2230     SmallVector<CCValAssign, 16> RVLocs;
2231     CCState CCInfo(CC, false, *FuncInfo.MF, RVLocs, *Context);
2232     CCInfo.AnalyzeCallResult(RetVT, CCAssignFnForCall(CC));
2233
2234     // Only handle a single return value.
2235     if (RVLocs.size() != 1)
2236       return false;
2237
2238     // Copy all of the result registers out of their specified physreg.
2239     MVT CopyVT = RVLocs[0].getValVT();
2240     unsigned ResultReg = createResultReg(TLI.getRegClassFor(CopyVT));
2241     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2242             TII.get(TargetOpcode::COPY), ResultReg)
2243         .addReg(RVLocs[0].getLocReg());
2244     CLI.InRegs.push_back(RVLocs[0].getLocReg());
2245
2246     CLI.ResultReg = ResultReg;
2247     CLI.NumResultRegs = 1;
2248   }
2249
2250   return true;
2251 }
2252
2253 bool AArch64FastISel::fastLowerCall(CallLoweringInfo &CLI) {
2254   CallingConv::ID CC  = CLI.CallConv;
2255   bool IsTailCall     = CLI.IsTailCall;
2256   bool IsVarArg       = CLI.IsVarArg;
2257   const Value *Callee = CLI.Callee;
2258   const char *SymName = CLI.SymName;
2259
2260   // Allow SelectionDAG isel to handle tail calls.
2261   if (IsTailCall)
2262     return false;
2263
2264   CodeModel::Model CM = TM.getCodeModel();
2265   // Only support the small and large code model.
2266   if (CM != CodeModel::Small && CM != CodeModel::Large)
2267     return false;
2268
2269   // FIXME: Add large code model support for ELF.
2270   if (CM == CodeModel::Large && !Subtarget->isTargetMachO())
2271     return false;
2272
2273   // Let SDISel handle vararg functions.
2274   if (IsVarArg)
2275     return false;
2276
2277   // FIXME: Only handle *simple* calls for now.
2278   MVT RetVT;
2279   if (CLI.RetTy->isVoidTy())
2280     RetVT = MVT::isVoid;
2281   else if (!isTypeLegal(CLI.RetTy, RetVT))
2282     return false;
2283
2284   for (auto Flag : CLI.OutFlags)
2285     if (Flag.isInReg() || Flag.isSRet() || Flag.isNest() || Flag.isByVal())
2286       return false;
2287
2288   // Set up the argument vectors.
2289   SmallVector<MVT, 16> OutVTs;
2290   OutVTs.reserve(CLI.OutVals.size());
2291
2292   for (auto *Val : CLI.OutVals) {
2293     MVT VT;
2294     if (!isTypeLegal(Val->getType(), VT) &&
2295         !(VT == MVT::i1 || VT == MVT::i8 || VT == MVT::i16))
2296       return false;
2297
2298     // We don't handle vector parameters yet.
2299     if (VT.isVector() || VT.getSizeInBits() > 64)
2300       return false;
2301
2302     OutVTs.push_back(VT);
2303   }
2304
2305   Address Addr;
2306   if (!ComputeCallAddress(Callee, Addr))
2307     return false;
2308
2309   // Handle the arguments now that we've gotten them.
2310   unsigned NumBytes;
2311   if (!ProcessCallArgs(CLI, OutVTs, NumBytes))
2312     return false;
2313
2314   // Issue the call.
2315   MachineInstrBuilder MIB;
2316   if (CM == CodeModel::Small) {
2317     const MCInstrDesc &II = TII.get(Addr.getReg() ? AArch64::BLR : AArch64::BL);
2318     MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II);
2319     if (SymName)
2320       MIB.addExternalSymbol(SymName, 0);
2321     else if (Addr.getGlobalValue())
2322       MIB.addGlobalAddress(Addr.getGlobalValue(), 0, 0);
2323     else if (Addr.getReg()) {
2324       unsigned Reg = constrainOperandRegClass(II, Addr.getReg(), 0);
2325       MIB.addReg(Reg);
2326     } else
2327       return false;
2328   } else {
2329     unsigned CallReg = 0;
2330     if (SymName) {
2331       unsigned ADRPReg = createResultReg(&AArch64::GPR64commonRegClass);
2332       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADRP),
2333               ADRPReg)
2334         .addExternalSymbol(SymName, AArch64II::MO_GOT | AArch64II::MO_PAGE);
2335
2336       CallReg = createResultReg(&AArch64::GPR64RegClass);
2337       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::LDRXui),
2338               CallReg)
2339         .addReg(ADRPReg)
2340         .addExternalSymbol(SymName, AArch64II::MO_GOT | AArch64II::MO_PAGEOFF |
2341                            AArch64II::MO_NC);
2342     } else if (Addr.getGlobalValue()) {
2343       CallReg = AArch64MaterializeGV(Addr.getGlobalValue());
2344     } else if (Addr.getReg())
2345       CallReg = Addr.getReg();
2346
2347     if (!CallReg)
2348       return false;
2349
2350     const MCInstrDesc &II = TII.get(AArch64::BLR);
2351     CallReg = constrainOperandRegClass(II, CallReg, 0);
2352     MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II).addReg(CallReg);
2353   }
2354
2355   // Add implicit physical register uses to the call.
2356   for (auto Reg : CLI.OutRegs)
2357     MIB.addReg(Reg, RegState::Implicit);
2358
2359   // Add a register mask with the call-preserved registers.
2360   // Proper defs for return values will be added by setPhysRegsDeadExcept().
2361   MIB.addRegMask(TRI.getCallPreservedMask(CC));
2362
2363   CLI.Call = MIB;
2364
2365   // Finish off the call including any return values.
2366   return FinishCall(CLI, RetVT, NumBytes);
2367 }
2368
2369 bool AArch64FastISel::IsMemCpySmall(uint64_t Len, unsigned Alignment) {
2370   if (Alignment)
2371     return Len / Alignment <= 4;
2372   else
2373     return Len < 32;
2374 }
2375
2376 bool AArch64FastISel::TryEmitSmallMemCpy(Address Dest, Address Src,
2377                                          uint64_t Len, unsigned Alignment) {
2378   // Make sure we don't bloat code by inlining very large memcpy's.
2379   if (!IsMemCpySmall(Len, Alignment))
2380     return false;
2381
2382   int64_t UnscaledOffset = 0;
2383   Address OrigDest = Dest;
2384   Address OrigSrc = Src;
2385
2386   while (Len) {
2387     MVT VT;
2388     if (!Alignment || Alignment >= 8) {
2389       if (Len >= 8)
2390         VT = MVT::i64;
2391       else if (Len >= 4)
2392         VT = MVT::i32;
2393       else if (Len >= 2)
2394         VT = MVT::i16;
2395       else {
2396         VT = MVT::i8;
2397       }
2398     } else {
2399       // Bound based on alignment.
2400       if (Len >= 4 && Alignment == 4)
2401         VT = MVT::i32;
2402       else if (Len >= 2 && Alignment == 2)
2403         VT = MVT::i16;
2404       else {
2405         VT = MVT::i8;
2406       }
2407     }
2408
2409     bool RV;
2410     unsigned ResultReg;
2411     RV = EmitLoad(VT, ResultReg, Src);
2412     if (!RV)
2413       return false;
2414
2415     RV = EmitStore(VT, ResultReg, Dest);
2416     if (!RV)
2417       return false;
2418
2419     int64_t Size = VT.getSizeInBits() / 8;
2420     Len -= Size;
2421     UnscaledOffset += Size;
2422
2423     // We need to recompute the unscaled offset for each iteration.
2424     Dest.setOffset(OrigDest.getOffset() + UnscaledOffset);
2425     Src.setOffset(OrigSrc.getOffset() + UnscaledOffset);
2426   }
2427
2428   return true;
2429 }
2430
2431 /// \brief Check if it is possible to fold the condition from the XALU intrinsic
2432 /// into the user. The condition code will only be updated on success.
2433 bool AArch64FastISel::foldXALUIntrinsic(AArch64CC::CondCode &CC,
2434                                         const Instruction *I,
2435                                         const Value *Cond) {
2436   if (!isa<ExtractValueInst>(Cond))
2437     return false;
2438
2439   const auto *EV = cast<ExtractValueInst>(Cond);
2440   if (!isa<IntrinsicInst>(EV->getAggregateOperand()))
2441     return false;
2442
2443   const auto *II = cast<IntrinsicInst>(EV->getAggregateOperand());
2444   MVT RetVT;
2445   const Function *Callee = II->getCalledFunction();
2446   Type *RetTy =
2447   cast<StructType>(Callee->getReturnType())->getTypeAtIndex(0U);
2448   if (!isTypeLegal(RetTy, RetVT))
2449     return false;
2450
2451   if (RetVT != MVT::i32 && RetVT != MVT::i64)
2452     return false;
2453
2454   AArch64CC::CondCode TmpCC;
2455   switch (II->getIntrinsicID()) {
2456     default: return false;
2457     case Intrinsic::sadd_with_overflow:
2458     case Intrinsic::ssub_with_overflow: TmpCC = AArch64CC::VS; break;
2459     case Intrinsic::uadd_with_overflow: TmpCC = AArch64CC::HS; break;
2460     case Intrinsic::usub_with_overflow: TmpCC = AArch64CC::LO; break;
2461     case Intrinsic::smul_with_overflow:
2462     case Intrinsic::umul_with_overflow: TmpCC = AArch64CC::NE; break;
2463   }
2464
2465   // Check if both instructions are in the same basic block.
2466   if (II->getParent() != I->getParent())
2467     return false;
2468
2469   // Make sure nothing is in the way
2470   BasicBlock::const_iterator Start = I;
2471   BasicBlock::const_iterator End = II;
2472   for (auto Itr = std::prev(Start); Itr != End; --Itr) {
2473     // We only expect extractvalue instructions between the intrinsic and the
2474     // instruction to be selected.
2475     if (!isa<ExtractValueInst>(Itr))
2476       return false;
2477
2478     // Check that the extractvalue operand comes from the intrinsic.
2479     const auto *EVI = cast<ExtractValueInst>(Itr);
2480     if (EVI->getAggregateOperand() != II)
2481       return false;
2482   }
2483
2484   CC = TmpCC;
2485   return true;
2486 }
2487
2488 bool AArch64FastISel::fastLowerIntrinsicCall(const IntrinsicInst *II) {
2489   // FIXME: Handle more intrinsics.
2490   switch (II->getIntrinsicID()) {
2491   default: return false;
2492   case Intrinsic::frameaddress: {
2493     MachineFrameInfo *MFI = FuncInfo.MF->getFrameInfo();
2494     MFI->setFrameAddressIsTaken(true);
2495
2496     const AArch64RegisterInfo *RegInfo =
2497         static_cast<const AArch64RegisterInfo *>(
2498             TM.getSubtargetImpl()->getRegisterInfo());
2499     unsigned FramePtr = RegInfo->getFrameRegister(*(FuncInfo.MF));
2500     unsigned SrcReg = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
2501     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2502             TII.get(TargetOpcode::COPY), SrcReg).addReg(FramePtr);
2503     // Recursively load frame address
2504     // ldr x0, [fp]
2505     // ldr x0, [x0]
2506     // ldr x0, [x0]
2507     // ...
2508     unsigned DestReg;
2509     unsigned Depth = cast<ConstantInt>(II->getOperand(0))->getZExtValue();
2510     while (Depth--) {
2511       DestReg = fastEmitInst_ri(AArch64::LDRXui, &AArch64::GPR64RegClass,
2512                                 SrcReg, /*IsKill=*/true, 0);
2513       assert(DestReg && "Unexpected LDR instruction emission failure.");
2514       SrcReg = DestReg;
2515     }
2516
2517     updateValueMap(II, SrcReg);
2518     return true;
2519   }
2520   case Intrinsic::memcpy:
2521   case Intrinsic::memmove: {
2522     const auto *MTI = cast<MemTransferInst>(II);
2523     // Don't handle volatile.
2524     if (MTI->isVolatile())
2525       return false;
2526
2527     // Disable inlining for memmove before calls to ComputeAddress.  Otherwise,
2528     // we would emit dead code because we don't currently handle memmoves.
2529     bool IsMemCpy = (II->getIntrinsicID() == Intrinsic::memcpy);
2530     if (isa<ConstantInt>(MTI->getLength()) && IsMemCpy) {
2531       // Small memcpy's are common enough that we want to do them without a call
2532       // if possible.
2533       uint64_t Len = cast<ConstantInt>(MTI->getLength())->getZExtValue();
2534       unsigned Alignment = MTI->getAlignment();
2535       if (IsMemCpySmall(Len, Alignment)) {
2536         Address Dest, Src;
2537         if (!ComputeAddress(MTI->getRawDest(), Dest) ||
2538             !ComputeAddress(MTI->getRawSource(), Src))
2539           return false;
2540         if (TryEmitSmallMemCpy(Dest, Src, Len, Alignment))
2541           return true;
2542       }
2543     }
2544
2545     if (!MTI->getLength()->getType()->isIntegerTy(64))
2546       return false;
2547
2548     if (MTI->getSourceAddressSpace() > 255 || MTI->getDestAddressSpace() > 255)
2549       // Fast instruction selection doesn't support the special
2550       // address spaces.
2551       return false;
2552
2553     const char *IntrMemName = isa<MemCpyInst>(II) ? "memcpy" : "memmove";
2554     return lowerCallTo(II, IntrMemName, II->getNumArgOperands() - 2);
2555   }
2556   case Intrinsic::memset: {
2557     const MemSetInst *MSI = cast<MemSetInst>(II);
2558     // Don't handle volatile.
2559     if (MSI->isVolatile())
2560       return false;
2561
2562     if (!MSI->getLength()->getType()->isIntegerTy(64))
2563       return false;
2564
2565     if (MSI->getDestAddressSpace() > 255)
2566       // Fast instruction selection doesn't support the special
2567       // address spaces.
2568       return false;
2569
2570     return lowerCallTo(II, "memset", II->getNumArgOperands() - 2);
2571   }
2572   case Intrinsic::trap: {
2573     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::BRK))
2574         .addImm(1);
2575     return true;
2576   }
2577   case Intrinsic::sqrt: {
2578     Type *RetTy = II->getCalledFunction()->getReturnType();
2579
2580     MVT VT;
2581     if (!isTypeLegal(RetTy, VT))
2582       return false;
2583
2584     unsigned Op0Reg = getRegForValue(II->getOperand(0));
2585     if (!Op0Reg)
2586       return false;
2587     bool Op0IsKill = hasTrivialKill(II->getOperand(0));
2588
2589     unsigned ResultReg = fastEmit_r(VT, VT, ISD::FSQRT, Op0Reg, Op0IsKill);
2590     if (!ResultReg)
2591       return false;
2592
2593     updateValueMap(II, ResultReg);
2594     return true;
2595   }
2596   case Intrinsic::sadd_with_overflow:
2597   case Intrinsic::uadd_with_overflow:
2598   case Intrinsic::ssub_with_overflow:
2599   case Intrinsic::usub_with_overflow:
2600   case Intrinsic::smul_with_overflow:
2601   case Intrinsic::umul_with_overflow: {
2602     // This implements the basic lowering of the xalu with overflow intrinsics.
2603     const Function *Callee = II->getCalledFunction();
2604     auto *Ty = cast<StructType>(Callee->getReturnType());
2605     Type *RetTy = Ty->getTypeAtIndex(0U);
2606
2607     MVT VT;
2608     if (!isTypeLegal(RetTy, VT))
2609       return false;
2610
2611     if (VT != MVT::i32 && VT != MVT::i64)
2612       return false;
2613
2614     const Value *LHS = II->getArgOperand(0);
2615     const Value *RHS = II->getArgOperand(1);
2616     // Canonicalize immediate to the RHS.
2617     if (isa<ConstantInt>(LHS) && !isa<ConstantInt>(RHS) &&
2618         isCommutativeIntrinsic(II))
2619       std::swap(LHS, RHS);
2620
2621     unsigned ResultReg1 = 0, ResultReg2 = 0, MulReg = 0;
2622     AArch64CC::CondCode CC = AArch64CC::Invalid;
2623     switch (II->getIntrinsicID()) {
2624     default: llvm_unreachable("Unexpected intrinsic!");
2625     case Intrinsic::sadd_with_overflow:
2626       ResultReg1 = emitAdd(VT, LHS, RHS, /*SetFlags=*/true);
2627       CC = AArch64CC::VS;
2628       break;
2629     case Intrinsic::uadd_with_overflow:
2630       ResultReg1 = emitAdd(VT, LHS, RHS, /*SetFlags=*/true);
2631       CC = AArch64CC::HS;
2632       break;
2633     case Intrinsic::ssub_with_overflow:
2634       ResultReg1 = emitSub(VT, LHS, RHS, /*SetFlags=*/true);
2635       CC = AArch64CC::VS;
2636       break;
2637     case Intrinsic::usub_with_overflow:
2638       ResultReg1 = emitSub(VT, LHS, RHS, /*SetFlags=*/true);
2639       CC = AArch64CC::LO;
2640       break;
2641     case Intrinsic::smul_with_overflow: {
2642       CC = AArch64CC::NE;
2643       unsigned LHSReg = getRegForValue(LHS);
2644       if (!LHSReg)
2645         return false;
2646       bool LHSIsKill = hasTrivialKill(LHS);
2647
2648       unsigned RHSReg = getRegForValue(RHS);
2649       if (!RHSReg)
2650         return false;
2651       bool RHSIsKill = hasTrivialKill(RHS);
2652
2653       if (VT == MVT::i32) {
2654         MulReg = Emit_SMULL_rr(MVT::i64, LHSReg, LHSIsKill, RHSReg, RHSIsKill);
2655         unsigned ShiftReg = emitLSR_ri(MVT::i64, MVT::i64, MulReg,
2656                                        /*IsKill=*/false, 32);
2657         MulReg = fastEmitInst_extractsubreg(VT, MulReg, /*IsKill=*/true,
2658                                             AArch64::sub_32);
2659         ShiftReg = fastEmitInst_extractsubreg(VT, ShiftReg, /*IsKill=*/true,
2660                                               AArch64::sub_32);
2661         emitSubs_rs(VT, ShiftReg, /*IsKill=*/true, MulReg, /*IsKill=*/false,
2662                     AArch64_AM::ASR, 31, /*WantResult=*/false);
2663       } else {
2664         assert(VT == MVT::i64 && "Unexpected value type.");
2665         MulReg = Emit_MUL_rr(VT, LHSReg, LHSIsKill, RHSReg, RHSIsKill);
2666         unsigned SMULHReg = fastEmit_rr(VT, VT, ISD::MULHS, LHSReg, LHSIsKill,
2667                                         RHSReg, RHSIsKill);
2668         emitSubs_rs(VT, SMULHReg, /*IsKill=*/true, MulReg, /*IsKill=*/false,
2669                     AArch64_AM::ASR, 63, /*WantResult=*/false);
2670       }
2671       break;
2672     }
2673     case Intrinsic::umul_with_overflow: {
2674       CC = AArch64CC::NE;
2675       unsigned LHSReg = getRegForValue(LHS);
2676       if (!LHSReg)
2677         return false;
2678       bool LHSIsKill = hasTrivialKill(LHS);
2679
2680       unsigned RHSReg = getRegForValue(RHS);
2681       if (!RHSReg)
2682         return false;
2683       bool RHSIsKill = hasTrivialKill(RHS);
2684
2685       if (VT == MVT::i32) {
2686         MulReg = Emit_UMULL_rr(MVT::i64, LHSReg, LHSIsKill, RHSReg, RHSIsKill);
2687         emitSubs_rs(MVT::i64, AArch64::XZR, /*IsKill=*/true, MulReg,
2688                     /*IsKill=*/false, AArch64_AM::LSR, 32,
2689                     /*WantResult=*/false);
2690         MulReg = fastEmitInst_extractsubreg(VT, MulReg, /*IsKill=*/true,
2691                                             AArch64::sub_32);
2692       } else {
2693         assert(VT == MVT::i64 && "Unexpected value type.");
2694         MulReg = Emit_MUL_rr(VT, LHSReg, LHSIsKill, RHSReg, RHSIsKill);
2695         unsigned UMULHReg = fastEmit_rr(VT, VT, ISD::MULHU, LHSReg, LHSIsKill,
2696                                         RHSReg, RHSIsKill);
2697         emitSubs_rr(VT, AArch64::XZR, /*IsKill=*/true, UMULHReg,
2698                     /*IsKill=*/false, /*WantResult=*/false);
2699       }
2700       break;
2701     }
2702     }
2703
2704     if (MulReg) {
2705       ResultReg1 = createResultReg(TLI.getRegClassFor(VT));
2706       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2707               TII.get(TargetOpcode::COPY), ResultReg1).addReg(MulReg);
2708     }
2709
2710     ResultReg2 = fastEmitInst_rri(AArch64::CSINCWr, &AArch64::GPR32RegClass,
2711                                   AArch64::WZR, /*IsKill=*/true, AArch64::WZR,
2712                                   /*IsKill=*/true, getInvertedCondCode(CC));
2713     assert((ResultReg1 + 1) == ResultReg2 &&
2714            "Nonconsecutive result registers.");
2715     updateValueMap(II, ResultReg1, 2);
2716     return true;
2717   }
2718   }
2719   return false;
2720 }
2721
2722 bool AArch64FastISel::SelectRet(const Instruction *I) {
2723   const ReturnInst *Ret = cast<ReturnInst>(I);
2724   const Function &F = *I->getParent()->getParent();
2725
2726   if (!FuncInfo.CanLowerReturn)
2727     return false;
2728
2729   if (F.isVarArg())
2730     return false;
2731
2732   // Build a list of return value registers.
2733   SmallVector<unsigned, 4> RetRegs;
2734
2735   if (Ret->getNumOperands() > 0) {
2736     CallingConv::ID CC = F.getCallingConv();
2737     SmallVector<ISD::OutputArg, 4> Outs;
2738     GetReturnInfo(F.getReturnType(), F.getAttributes(), Outs, TLI);
2739
2740     // Analyze operands of the call, assigning locations to each operand.
2741     SmallVector<CCValAssign, 16> ValLocs;
2742     CCState CCInfo(CC, F.isVarArg(), *FuncInfo.MF, ValLocs, I->getContext());
2743     CCAssignFn *RetCC = CC == CallingConv::WebKit_JS ? RetCC_AArch64_WebKit_JS
2744                                                      : RetCC_AArch64_AAPCS;
2745     CCInfo.AnalyzeReturn(Outs, RetCC);
2746
2747     // Only handle a single return value for now.
2748     if (ValLocs.size() != 1)
2749       return false;
2750
2751     CCValAssign &VA = ValLocs[0];
2752     const Value *RV = Ret->getOperand(0);
2753
2754     // Don't bother handling odd stuff for now.
2755     if (VA.getLocInfo() != CCValAssign::Full)
2756       return false;
2757     // Only handle register returns for now.
2758     if (!VA.isRegLoc())
2759       return false;
2760     unsigned Reg = getRegForValue(RV);
2761     if (Reg == 0)
2762       return false;
2763
2764     unsigned SrcReg = Reg + VA.getValNo();
2765     unsigned DestReg = VA.getLocReg();
2766     // Avoid a cross-class copy. This is very unlikely.
2767     if (!MRI.getRegClass(SrcReg)->contains(DestReg))
2768       return false;
2769
2770     EVT RVEVT = TLI.getValueType(RV->getType());
2771     if (!RVEVT.isSimple())
2772       return false;
2773
2774     // Vectors (of > 1 lane) in big endian need tricky handling.
2775     if (RVEVT.isVector() && RVEVT.getVectorNumElements() > 1)
2776       return false;
2777
2778     MVT RVVT = RVEVT.getSimpleVT();
2779     if (RVVT == MVT::f128)
2780       return false;
2781     MVT DestVT = VA.getValVT();
2782     // Special handling for extended integers.
2783     if (RVVT != DestVT) {
2784       if (RVVT != MVT::i1 && RVVT != MVT::i8 && RVVT != MVT::i16)
2785         return false;
2786
2787       if (!Outs[0].Flags.isZExt() && !Outs[0].Flags.isSExt())
2788         return false;
2789
2790       bool isZExt = Outs[0].Flags.isZExt();
2791       SrcReg = EmitIntExt(RVVT, SrcReg, DestVT, isZExt);
2792       if (SrcReg == 0)
2793         return false;
2794     }
2795
2796     // Make the copy.
2797     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2798             TII.get(TargetOpcode::COPY), DestReg).addReg(SrcReg);
2799
2800     // Add register to return instruction.
2801     RetRegs.push_back(VA.getLocReg());
2802   }
2803
2804   MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2805                                     TII.get(AArch64::RET_ReallyLR));
2806   for (unsigned i = 0, e = RetRegs.size(); i != e; ++i)
2807     MIB.addReg(RetRegs[i], RegState::Implicit);
2808   return true;
2809 }
2810
2811 bool AArch64FastISel::SelectTrunc(const Instruction *I) {
2812   Type *DestTy = I->getType();
2813   Value *Op = I->getOperand(0);
2814   Type *SrcTy = Op->getType();
2815
2816   EVT SrcEVT = TLI.getValueType(SrcTy, true);
2817   EVT DestEVT = TLI.getValueType(DestTy, true);
2818   if (!SrcEVT.isSimple())
2819     return false;
2820   if (!DestEVT.isSimple())
2821     return false;
2822
2823   MVT SrcVT = SrcEVT.getSimpleVT();
2824   MVT DestVT = DestEVT.getSimpleVT();
2825
2826   if (SrcVT != MVT::i64 && SrcVT != MVT::i32 && SrcVT != MVT::i16 &&
2827       SrcVT != MVT::i8)
2828     return false;
2829   if (DestVT != MVT::i32 && DestVT != MVT::i16 && DestVT != MVT::i8 &&
2830       DestVT != MVT::i1)
2831     return false;
2832
2833   unsigned SrcReg = getRegForValue(Op);
2834   if (!SrcReg)
2835     return false;
2836   bool SrcIsKill = hasTrivialKill(Op);
2837
2838   // If we're truncating from i64 to a smaller non-legal type then generate an
2839   // AND. Otherwise, we know the high bits are undefined and a truncate only
2840   // generate a COPY. We cannot mark the source register also as result
2841   // register, because this can incorrectly transfer the kill flag onto the
2842   // source register.
2843   unsigned ResultReg;
2844   if (SrcVT == MVT::i64) {
2845     uint64_t Mask = 0;
2846     switch (DestVT.SimpleTy) {
2847     default:
2848       // Trunc i64 to i32 is handled by the target-independent fast-isel.
2849       return false;
2850     case MVT::i1:
2851       Mask = 0x1;
2852       break;
2853     case MVT::i8:
2854       Mask = 0xff;
2855       break;
2856     case MVT::i16:
2857       Mask = 0xffff;
2858       break;
2859     }
2860     // Issue an extract_subreg to get the lower 32-bits.
2861     unsigned Reg32 = fastEmitInst_extractsubreg(MVT::i32, SrcReg, SrcIsKill,
2862                                                 AArch64::sub_32);
2863     // Create the AND instruction which performs the actual truncation.
2864     ResultReg = emitAnd_ri(MVT::i32, Reg32, /*IsKill=*/true, Mask);
2865     assert(ResultReg && "Unexpected AND instruction emission failure.");
2866   } else {
2867     ResultReg = createResultReg(&AArch64::GPR32RegClass);
2868     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2869             TII.get(TargetOpcode::COPY), ResultReg)
2870         .addReg(SrcReg, getKillRegState(SrcIsKill));
2871   }
2872
2873   updateValueMap(I, ResultReg);
2874   return true;
2875 }
2876
2877 unsigned AArch64FastISel::Emiti1Ext(unsigned SrcReg, MVT DestVT, bool isZExt) {
2878   assert((DestVT == MVT::i8 || DestVT == MVT::i16 || DestVT == MVT::i32 ||
2879           DestVT == MVT::i64) &&
2880          "Unexpected value type.");
2881   // Handle i8 and i16 as i32.
2882   if (DestVT == MVT::i8 || DestVT == MVT::i16)
2883     DestVT = MVT::i32;
2884
2885   if (isZExt) {
2886     unsigned ResultReg = emitAnd_ri(MVT::i32, SrcReg, /*TODO:IsKill=*/false, 1);
2887     assert(ResultReg && "Unexpected AND instruction emission failure.");
2888     if (DestVT == MVT::i64) {
2889       // We're ZExt i1 to i64.  The ANDWri Wd, Ws, #1 implicitly clears the
2890       // upper 32 bits.  Emit a SUBREG_TO_REG to extend from Wd to Xd.
2891       unsigned Reg64 = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
2892       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2893               TII.get(AArch64::SUBREG_TO_REG), Reg64)
2894           .addImm(0)
2895           .addReg(ResultReg)
2896           .addImm(AArch64::sub_32);
2897       ResultReg = Reg64;
2898     }
2899     return ResultReg;
2900   } else {
2901     if (DestVT == MVT::i64) {
2902       // FIXME: We're SExt i1 to i64.
2903       return 0;
2904     }
2905     return fastEmitInst_rii(AArch64::SBFMWri, &AArch64::GPR32RegClass, SrcReg,
2906                             /*TODO:IsKill=*/false, 0, 0);
2907   }
2908 }
2909
2910 unsigned AArch64FastISel::Emit_MUL_rr(MVT RetVT, unsigned Op0, bool Op0IsKill,
2911                                       unsigned Op1, bool Op1IsKill) {
2912   unsigned Opc, ZReg;
2913   switch (RetVT.SimpleTy) {
2914   default: return 0;
2915   case MVT::i8:
2916   case MVT::i16:
2917   case MVT::i32:
2918     RetVT = MVT::i32;
2919     Opc = AArch64::MADDWrrr; ZReg = AArch64::WZR; break;
2920   case MVT::i64:
2921     Opc = AArch64::MADDXrrr; ZReg = AArch64::XZR; break;
2922   }
2923
2924   const TargetRegisterClass *RC =
2925       (RetVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
2926   return fastEmitInst_rrr(Opc, RC, Op0, Op0IsKill, Op1, Op1IsKill,
2927                           /*IsKill=*/ZReg, true);
2928 }
2929
2930 unsigned AArch64FastISel::Emit_SMULL_rr(MVT RetVT, unsigned Op0, bool Op0IsKill,
2931                                         unsigned Op1, bool Op1IsKill) {
2932   if (RetVT != MVT::i64)
2933     return 0;
2934
2935   return fastEmitInst_rrr(AArch64::SMADDLrrr, &AArch64::GPR64RegClass,
2936                           Op0, Op0IsKill, Op1, Op1IsKill,
2937                           AArch64::XZR, /*IsKill=*/true);
2938 }
2939
2940 unsigned AArch64FastISel::Emit_UMULL_rr(MVT RetVT, unsigned Op0, bool Op0IsKill,
2941                                         unsigned Op1, bool Op1IsKill) {
2942   if (RetVT != MVT::i64)
2943     return 0;
2944
2945   return fastEmitInst_rrr(AArch64::UMADDLrrr, &AArch64::GPR64RegClass,
2946                           Op0, Op0IsKill, Op1, Op1IsKill,
2947                           AArch64::XZR, /*IsKill=*/true);
2948 }
2949
2950 unsigned AArch64FastISel::emitLSL_rr(MVT RetVT, unsigned Op0Reg, bool Op0IsKill,
2951                                      unsigned Op1Reg, bool Op1IsKill) {
2952   unsigned Opc = 0;
2953   bool NeedTrunc = false;
2954   uint64_t Mask = 0;
2955   switch (RetVT.SimpleTy) {
2956   default: return 0;
2957   case MVT::i8:  Opc = AArch64::LSLVWr; NeedTrunc = true; Mask = 0xff;   break;
2958   case MVT::i16: Opc = AArch64::LSLVWr; NeedTrunc = true; Mask = 0xffff; break;
2959   case MVT::i32: Opc = AArch64::LSLVWr;                                  break;
2960   case MVT::i64: Opc = AArch64::LSLVXr;                                  break;
2961   }
2962
2963   const TargetRegisterClass *RC =
2964       (RetVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
2965   if (NeedTrunc) {
2966     Op1Reg = emitAnd_ri(MVT::i32, Op1Reg, Op1IsKill, Mask);
2967     Op1IsKill = true;
2968   }
2969   unsigned ResultReg = fastEmitInst_rr(Opc, RC, Op0Reg, Op0IsKill, Op1Reg,
2970                                        Op1IsKill);
2971   if (NeedTrunc)
2972     ResultReg = emitAnd_ri(MVT::i32, ResultReg, /*IsKill=*/true, Mask);
2973   return ResultReg;
2974 }
2975
2976 unsigned AArch64FastISel::emitLSL_ri(MVT RetVT, MVT SrcVT, unsigned Op0,
2977                                      bool Op0IsKill, uint64_t Shift,
2978                                      bool IsZext) {
2979   assert(RetVT.SimpleTy >= SrcVT.SimpleTy &&
2980          "Unexpected source/return type pair.");
2981   assert((SrcVT == MVT::i8 || SrcVT == MVT::i16 || SrcVT == MVT::i32 ||
2982           SrcVT == MVT::i64) && "Unexpected source value type.");
2983   assert((RetVT == MVT::i8 || RetVT == MVT::i16 || RetVT == MVT::i32 ||
2984           RetVT == MVT::i64) && "Unexpected return value type.");
2985
2986   bool Is64Bit = (RetVT == MVT::i64);
2987   unsigned RegSize = Is64Bit ? 64 : 32;
2988   unsigned DstBits = RetVT.getSizeInBits();
2989   unsigned SrcBits = SrcVT.getSizeInBits();
2990
2991   // Don't deal with undefined shifts.
2992   if (Shift >= DstBits)
2993     return 0;
2994
2995   // For immediate shifts we can fold the zero-/sign-extension into the shift.
2996   // {S|U}BFM Wd, Wn, #r, #s
2997   // Wd<32+s-r,32-r> = Wn<s:0> when r > s
2998
2999   // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
3000   // %2 = shl i16 %1, 4
3001   // Wd<32+7-28,32-28> = Wn<7:0> <- clamp s to 7
3002   // 0b1111_1111_1111_1111__1111_1010_1010_0000 sext
3003   // 0b0000_0000_0000_0000__0000_0101_0101_0000 sext | zext
3004   // 0b0000_0000_0000_0000__0000_1010_1010_0000 zext
3005
3006   // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
3007   // %2 = shl i16 %1, 8
3008   // Wd<32+7-24,32-24> = Wn<7:0>
3009   // 0b1111_1111_1111_1111__1010_1010_0000_0000 sext
3010   // 0b0000_0000_0000_0000__0101_0101_0000_0000 sext | zext
3011   // 0b0000_0000_0000_0000__1010_1010_0000_0000 zext
3012
3013   // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
3014   // %2 = shl i16 %1, 12
3015   // Wd<32+3-20,32-20> = Wn<3:0>
3016   // 0b1111_1111_1111_1111__1010_0000_0000_0000 sext
3017   // 0b0000_0000_0000_0000__0101_0000_0000_0000 sext | zext
3018   // 0b0000_0000_0000_0000__1010_0000_0000_0000 zext
3019
3020   unsigned ImmR = RegSize - Shift;
3021   // Limit the width to the length of the source type.
3022   unsigned ImmS = std::min<unsigned>(SrcBits - 1, DstBits - 1 - Shift);
3023   static const unsigned OpcTable[2][2] = {
3024     {AArch64::SBFMWri, AArch64::SBFMXri},
3025     {AArch64::UBFMWri, AArch64::UBFMXri}
3026   };
3027   unsigned Opc = OpcTable[IsZext][Is64Bit];
3028   const TargetRegisterClass *RC =
3029       Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
3030   if (SrcVT.SimpleTy <= MVT::i32 && RetVT == MVT::i64) {
3031     unsigned TmpReg = MRI.createVirtualRegister(RC);
3032     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3033             TII.get(AArch64::SUBREG_TO_REG), TmpReg)
3034         .addImm(0)
3035         .addReg(Op0, getKillRegState(Op0IsKill))
3036         .addImm(AArch64::sub_32);
3037     Op0 = TmpReg;
3038     Op0IsKill = true;
3039   }
3040   return fastEmitInst_rii(Opc, RC, Op0, Op0IsKill, ImmR, ImmS);
3041 }
3042
3043 unsigned AArch64FastISel::emitLSR_rr(MVT RetVT, unsigned Op0Reg, bool Op0IsKill,
3044                                      unsigned Op1Reg, bool Op1IsKill) {
3045   unsigned Opc = 0;
3046   bool NeedTrunc = false;
3047   uint64_t Mask = 0;
3048   switch (RetVT.SimpleTy) {
3049   default: return 0;
3050   case MVT::i8:  Opc = AArch64::LSRVWr; NeedTrunc = true; Mask = 0xff;   break;
3051   case MVT::i16: Opc = AArch64::LSRVWr; NeedTrunc = true; Mask = 0xffff; break;
3052   case MVT::i32: Opc = AArch64::LSRVWr; break;
3053   case MVT::i64: Opc = AArch64::LSRVXr; break;
3054   }
3055
3056   const TargetRegisterClass *RC =
3057       (RetVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
3058   if (NeedTrunc) {
3059     Op0Reg = emitAnd_ri(MVT::i32, Op0Reg, Op0IsKill, Mask);
3060     Op1Reg = emitAnd_ri(MVT::i32, Op1Reg, Op1IsKill, Mask);
3061     Op0IsKill = Op1IsKill = true;
3062   }
3063   unsigned ResultReg = fastEmitInst_rr(Opc, RC, Op0Reg, Op0IsKill, Op1Reg,
3064                                        Op1IsKill);
3065   if (NeedTrunc)
3066     ResultReg = emitAnd_ri(MVT::i32, ResultReg, /*IsKill=*/true, Mask);
3067   return ResultReg;
3068 }
3069
3070 unsigned AArch64FastISel::emitLSR_ri(MVT RetVT, MVT SrcVT, unsigned Op0,
3071                                      bool Op0IsKill, uint64_t Shift,
3072                                      bool IsZExt) {
3073   assert(RetVT.SimpleTy >= SrcVT.SimpleTy &&
3074          "Unexpected source/return type pair.");
3075   assert((SrcVT == MVT::i8 || SrcVT == MVT::i16 || SrcVT == MVT::i32 ||
3076           SrcVT == MVT::i64) && "Unexpected source value type.");
3077   assert((RetVT == MVT::i8 || RetVT == MVT::i16 || RetVT == MVT::i32 ||
3078           RetVT == MVT::i64) && "Unexpected return value type.");
3079
3080   bool Is64Bit = (RetVT == MVT::i64);
3081   unsigned RegSize = Is64Bit ? 64 : 32;
3082   unsigned DstBits = RetVT.getSizeInBits();
3083   unsigned SrcBits = SrcVT.getSizeInBits();
3084
3085   // Don't deal with undefined shifts.
3086   if (Shift >= DstBits)
3087     return 0;
3088
3089   // For immediate shifts we can fold the zero-/sign-extension into the shift.
3090   // {S|U}BFM Wd, Wn, #r, #s
3091   // Wd<s-r:0> = Wn<s:r> when r <= s
3092
3093   // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
3094   // %2 = lshr i16 %1, 4
3095   // Wd<7-4:0> = Wn<7:4>
3096   // 0b0000_0000_0000_0000__0000_1111_1111_1010 sext
3097   // 0b0000_0000_0000_0000__0000_0000_0000_0101 sext | zext
3098   // 0b0000_0000_0000_0000__0000_0000_0000_1010 zext
3099
3100   // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
3101   // %2 = lshr i16 %1, 8
3102   // Wd<7-7,0> = Wn<7:7>
3103   // 0b0000_0000_0000_0000__0000_0000_1111_1111 sext
3104   // 0b0000_0000_0000_0000__0000_0000_0000_0000 sext
3105   // 0b0000_0000_0000_0000__0000_0000_0000_0000 zext
3106
3107   // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
3108   // %2 = lshr i16 %1, 12
3109   // Wd<7-7,0> = Wn<7:7> <- clamp r to 7
3110   // 0b0000_0000_0000_0000__0000_0000_0000_1111 sext
3111   // 0b0000_0000_0000_0000__0000_0000_0000_0000 sext
3112   // 0b0000_0000_0000_0000__0000_0000_0000_0000 zext
3113
3114   if (Shift >= SrcBits && IsZExt)
3115     return AArch64MaterializeInt(ConstantInt::get(*Context, APInt(RegSize, 0)),
3116                                  RetVT);
3117
3118   // It is not possible to fold a sign-extend into the LShr instruction. In this
3119   // case emit a sign-extend.
3120   if (!IsZExt) {
3121     Op0 = EmitIntExt(SrcVT, Op0, RetVT, IsZExt);
3122     if (!Op0)
3123       return 0;
3124     Op0IsKill = true;
3125     SrcVT = RetVT;
3126     SrcBits = SrcVT.getSizeInBits();
3127     IsZExt = true;
3128   }
3129
3130   unsigned ImmR = std::min<unsigned>(SrcBits - 1, Shift);
3131   unsigned ImmS = SrcBits - 1;
3132   static const unsigned OpcTable[2][2] = {
3133     {AArch64::SBFMWri, AArch64::SBFMXri},
3134     {AArch64::UBFMWri, AArch64::UBFMXri}
3135   };
3136   unsigned Opc = OpcTable[IsZExt][Is64Bit];
3137   const TargetRegisterClass *RC =
3138       Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
3139   if (SrcVT.SimpleTy <= MVT::i32 && RetVT == MVT::i64) {
3140     unsigned TmpReg = MRI.createVirtualRegister(RC);
3141     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3142             TII.get(AArch64::SUBREG_TO_REG), TmpReg)
3143         .addImm(0)
3144         .addReg(Op0, getKillRegState(Op0IsKill))
3145         .addImm(AArch64::sub_32);
3146     Op0 = TmpReg;
3147     Op0IsKill = true;
3148   }
3149   return fastEmitInst_rii(Opc, RC, Op0, Op0IsKill, ImmR, ImmS);
3150 }
3151
3152 unsigned AArch64FastISel::emitASR_rr(MVT RetVT, unsigned Op0Reg, bool Op0IsKill,
3153                                      unsigned Op1Reg, bool Op1IsKill) {
3154   unsigned Opc = 0;
3155   bool NeedTrunc = false;
3156   uint64_t Mask = 0;
3157   switch (RetVT.SimpleTy) {
3158   default: return 0;
3159   case MVT::i8:  Opc = AArch64::ASRVWr; NeedTrunc = true; Mask = 0xff;   break;
3160   case MVT::i16: Opc = AArch64::ASRVWr; NeedTrunc = true; Mask = 0xffff; break;
3161   case MVT::i32: Opc = AArch64::ASRVWr;                                  break;
3162   case MVT::i64: Opc = AArch64::ASRVXr;                                  break;
3163   }
3164
3165   const TargetRegisterClass *RC =
3166       (RetVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
3167   if (NeedTrunc) {
3168     Op0Reg = EmitIntExt(RetVT, Op0Reg, MVT::i32, /*IsZExt=*/false);
3169     Op1Reg = emitAnd_ri(MVT::i32, Op1Reg, Op1IsKill, Mask);
3170     Op0IsKill = Op1IsKill = true;
3171   }
3172   unsigned ResultReg = fastEmitInst_rr(Opc, RC, Op0Reg, Op0IsKill, Op1Reg,
3173                                        Op1IsKill);
3174   if (NeedTrunc)
3175     ResultReg = emitAnd_ri(MVT::i32, ResultReg, /*IsKill=*/true, Mask);
3176   return ResultReg;
3177 }
3178
3179 unsigned AArch64FastISel::emitASR_ri(MVT RetVT, MVT SrcVT, unsigned Op0,
3180                                      bool Op0IsKill, uint64_t Shift,
3181                                      bool IsZExt) {
3182   assert(RetVT.SimpleTy >= SrcVT.SimpleTy &&
3183          "Unexpected source/return type pair.");
3184   assert((SrcVT == MVT::i8 || SrcVT == MVT::i16 || SrcVT == MVT::i32 ||
3185           SrcVT == MVT::i64) && "Unexpected source value type.");
3186   assert((RetVT == MVT::i8 || RetVT == MVT::i16 || RetVT == MVT::i32 ||
3187           RetVT == MVT::i64) && "Unexpected return value type.");
3188
3189   bool Is64Bit = (RetVT == MVT::i64);
3190   unsigned RegSize = Is64Bit ? 64 : 32;
3191   unsigned DstBits = RetVT.getSizeInBits();
3192   unsigned SrcBits = SrcVT.getSizeInBits();
3193
3194   // Don't deal with undefined shifts.
3195   if (Shift >= DstBits)
3196     return 0;
3197
3198   // For immediate shifts we can fold the zero-/sign-extension into the shift.
3199   // {S|U}BFM Wd, Wn, #r, #s
3200   // Wd<s-r:0> = Wn<s:r> when r <= s
3201
3202   // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
3203   // %2 = ashr i16 %1, 4
3204   // Wd<7-4:0> = Wn<7:4>
3205   // 0b1111_1111_1111_1111__1111_1111_1111_1010 sext
3206   // 0b0000_0000_0000_0000__0000_0000_0000_0101 sext | zext
3207   // 0b0000_0000_0000_0000__0000_0000_0000_1010 zext
3208
3209   // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
3210   // %2 = ashr i16 %1, 8
3211   // Wd<7-7,0> = Wn<7:7>
3212   // 0b1111_1111_1111_1111__1111_1111_1111_1111 sext
3213   // 0b0000_0000_0000_0000__0000_0000_0000_0000 sext
3214   // 0b0000_0000_0000_0000__0000_0000_0000_0000 zext
3215
3216   // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
3217   // %2 = ashr i16 %1, 12
3218   // Wd<7-7,0> = Wn<7:7> <- clamp r to 7
3219   // 0b1111_1111_1111_1111__1111_1111_1111_1111 sext
3220   // 0b0000_0000_0000_0000__0000_0000_0000_0000 sext
3221   // 0b0000_0000_0000_0000__0000_0000_0000_0000 zext
3222
3223   if (Shift >= SrcBits && IsZExt)
3224     return AArch64MaterializeInt(ConstantInt::get(*Context, APInt(RegSize, 0)),
3225                                  RetVT);
3226
3227   unsigned ImmR = std::min<unsigned>(SrcBits - 1, Shift);
3228   unsigned ImmS = SrcBits - 1;
3229   static const unsigned OpcTable[2][2] = {
3230     {AArch64::SBFMWri, AArch64::SBFMXri},
3231     {AArch64::UBFMWri, AArch64::UBFMXri}
3232   };
3233   unsigned Opc = OpcTable[IsZExt][Is64Bit];
3234   const TargetRegisterClass *RC =
3235       Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
3236   if (SrcVT.SimpleTy <= MVT::i32 && RetVT == MVT::i64) {
3237     unsigned TmpReg = MRI.createVirtualRegister(RC);
3238     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3239             TII.get(AArch64::SUBREG_TO_REG), TmpReg)
3240         .addImm(0)
3241         .addReg(Op0, getKillRegState(Op0IsKill))
3242         .addImm(AArch64::sub_32);
3243     Op0 = TmpReg;
3244     Op0IsKill = true;
3245   }
3246   return fastEmitInst_rii(Opc, RC, Op0, Op0IsKill, ImmR, ImmS);
3247 }
3248
3249 unsigned AArch64FastISel::EmitIntExt(MVT SrcVT, unsigned SrcReg, MVT DestVT,
3250                                      bool isZExt) {
3251   assert(DestVT != MVT::i1 && "ZeroExt/SignExt an i1?");
3252
3253   // FastISel does not have plumbing to deal with extensions where the SrcVT or
3254   // DestVT are odd things, so test to make sure that they are both types we can
3255   // handle (i1/i8/i16/i32 for SrcVT and i8/i16/i32/i64 for DestVT), otherwise
3256   // bail out to SelectionDAG.
3257   if (((DestVT != MVT::i8) && (DestVT != MVT::i16) &&
3258        (DestVT != MVT::i32) && (DestVT != MVT::i64)) ||
3259       ((SrcVT !=  MVT::i1) && (SrcVT !=  MVT::i8) &&
3260        (SrcVT !=  MVT::i16) && (SrcVT !=  MVT::i32)))
3261     return 0;
3262
3263   unsigned Opc;
3264   unsigned Imm = 0;
3265
3266   switch (SrcVT.SimpleTy) {
3267   default:
3268     return 0;
3269   case MVT::i1:
3270     return Emiti1Ext(SrcReg, DestVT, isZExt);
3271   case MVT::i8:
3272     if (DestVT == MVT::i64)
3273       Opc = isZExt ? AArch64::UBFMXri : AArch64::SBFMXri;
3274     else
3275       Opc = isZExt ? AArch64::UBFMWri : AArch64::SBFMWri;
3276     Imm = 7;
3277     break;
3278   case MVT::i16:
3279     if (DestVT == MVT::i64)
3280       Opc = isZExt ? AArch64::UBFMXri : AArch64::SBFMXri;
3281     else
3282       Opc = isZExt ? AArch64::UBFMWri : AArch64::SBFMWri;
3283     Imm = 15;
3284     break;
3285   case MVT::i32:
3286     assert(DestVT == MVT::i64 && "IntExt i32 to i32?!?");
3287     Opc = isZExt ? AArch64::UBFMXri : AArch64::SBFMXri;
3288     Imm = 31;
3289     break;
3290   }
3291
3292   // Handle i8 and i16 as i32.
3293   if (DestVT == MVT::i8 || DestVT == MVT::i16)
3294     DestVT = MVT::i32;
3295   else if (DestVT == MVT::i64) {
3296     unsigned Src64 = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
3297     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3298             TII.get(AArch64::SUBREG_TO_REG), Src64)
3299         .addImm(0)
3300         .addReg(SrcReg)
3301         .addImm(AArch64::sub_32);
3302     SrcReg = Src64;
3303   }
3304
3305   const TargetRegisterClass *RC =
3306       (DestVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
3307   return fastEmitInst_rii(Opc, RC, SrcReg, /*TODO:IsKill=*/false, 0, Imm);
3308 }
3309
3310 bool AArch64FastISel::SelectIntExt(const Instruction *I) {
3311   // On ARM, in general, integer casts don't involve legal types; this code
3312   // handles promotable integers.  The high bits for a type smaller than
3313   // the register size are assumed to be undefined.
3314   Type *DestTy = I->getType();
3315   Value *Src = I->getOperand(0);
3316   Type *SrcTy = Src->getType();
3317
3318   bool isZExt = isa<ZExtInst>(I);
3319   unsigned SrcReg = getRegForValue(Src);
3320   if (!SrcReg)
3321     return false;
3322
3323   EVT SrcEVT = TLI.getValueType(SrcTy, true);
3324   EVT DestEVT = TLI.getValueType(DestTy, true);
3325   if (!SrcEVT.isSimple())
3326     return false;
3327   if (!DestEVT.isSimple())
3328     return false;
3329
3330   MVT SrcVT = SrcEVT.getSimpleVT();
3331   MVT DestVT = DestEVT.getSimpleVT();
3332   unsigned ResultReg = 0;
3333
3334   // Check if it is an argument and if it is already zero/sign-extended.
3335   if (const auto *Arg = dyn_cast<Argument>(Src)) {
3336     if ((isZExt && Arg->hasZExtAttr()) || (!isZExt && Arg->hasSExtAttr())) {
3337       if (DestVT == MVT::i64) {
3338         ResultReg = createResultReg(TLI.getRegClassFor(DestVT));
3339         BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3340                 TII.get(AArch64::SUBREG_TO_REG), ResultReg)
3341           .addImm(0)
3342           .addReg(SrcReg)
3343           .addImm(AArch64::sub_32);
3344       } else
3345         ResultReg = SrcReg;
3346     }
3347   }
3348
3349   if (!ResultReg)
3350     ResultReg = EmitIntExt(SrcVT, SrcReg, DestVT, isZExt);
3351
3352   if (!ResultReg)
3353     return false;
3354
3355   updateValueMap(I, ResultReg);
3356   return true;
3357 }
3358
3359 bool AArch64FastISel::SelectRem(const Instruction *I, unsigned ISDOpcode) {
3360   EVT DestEVT = TLI.getValueType(I->getType(), true);
3361   if (!DestEVT.isSimple())
3362     return false;
3363
3364   MVT DestVT = DestEVT.getSimpleVT();
3365   if (DestVT != MVT::i64 && DestVT != MVT::i32)
3366     return false;
3367
3368   unsigned DivOpc;
3369   bool is64bit = (DestVT == MVT::i64);
3370   switch (ISDOpcode) {
3371   default:
3372     return false;
3373   case ISD::SREM:
3374     DivOpc = is64bit ? AArch64::SDIVXr : AArch64::SDIVWr;
3375     break;
3376   case ISD::UREM:
3377     DivOpc = is64bit ? AArch64::UDIVXr : AArch64::UDIVWr;
3378     break;
3379   }
3380   unsigned MSubOpc = is64bit ? AArch64::MSUBXrrr : AArch64::MSUBWrrr;
3381   unsigned Src0Reg = getRegForValue(I->getOperand(0));
3382   if (!Src0Reg)
3383     return false;
3384   bool Src0IsKill = hasTrivialKill(I->getOperand(0));
3385
3386   unsigned Src1Reg = getRegForValue(I->getOperand(1));
3387   if (!Src1Reg)
3388     return false;
3389   bool Src1IsKill = hasTrivialKill(I->getOperand(1));
3390
3391   const TargetRegisterClass *RC =
3392       (DestVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
3393   unsigned QuotReg = fastEmitInst_rr(DivOpc, RC, Src0Reg, /*IsKill=*/false,
3394                                      Src1Reg, /*IsKill=*/false);
3395   assert(QuotReg && "Unexpected DIV instruction emission failure.");
3396   // The remainder is computed as numerator - (quotient * denominator) using the
3397   // MSUB instruction.
3398   unsigned ResultReg = fastEmitInst_rrr(MSubOpc, RC, QuotReg, /*IsKill=*/true,
3399                                         Src1Reg, Src1IsKill, Src0Reg,
3400                                         Src0IsKill);
3401   updateValueMap(I, ResultReg);
3402   return true;
3403 }
3404
3405 bool AArch64FastISel::SelectMul(const Instruction *I) {
3406   EVT SrcEVT = TLI.getValueType(I->getOperand(0)->getType(), true);
3407   if (!SrcEVT.isSimple())
3408     return false;
3409   MVT SrcVT = SrcEVT.getSimpleVT();
3410
3411   // Must be simple value type.  Don't handle vectors.
3412   if (SrcVT != MVT::i64 && SrcVT != MVT::i32 && SrcVT != MVT::i16 &&
3413       SrcVT != MVT::i8)
3414     return false;
3415
3416   unsigned Src0Reg = getRegForValue(I->getOperand(0));
3417   if (!Src0Reg)
3418     return false;
3419   bool Src0IsKill = hasTrivialKill(I->getOperand(0));
3420
3421   unsigned Src1Reg = getRegForValue(I->getOperand(1));
3422   if (!Src1Reg)
3423     return false;
3424   bool Src1IsKill = hasTrivialKill(I->getOperand(1));
3425
3426   unsigned ResultReg =
3427     Emit_MUL_rr(SrcVT, Src0Reg, Src0IsKill, Src1Reg, Src1IsKill);
3428
3429   if (!ResultReg)
3430     return false;
3431
3432   updateValueMap(I, ResultReg);
3433   return true;
3434 }
3435
3436 bool AArch64FastISel::SelectShift(const Instruction *I) {
3437   MVT RetVT;
3438   if (!isTypeSupported(I->getType(), RetVT))
3439     return false;
3440
3441   if (const auto *C = dyn_cast<ConstantInt>(I->getOperand(1))) {
3442     unsigned ResultReg = 0;
3443     uint64_t ShiftVal = C->getZExtValue();
3444     MVT SrcVT = RetVT;
3445     bool IsZExt = (I->getOpcode() == Instruction::AShr) ? false : true;
3446     const Value *Op0 = I->getOperand(0);
3447     if (const auto *ZExt = dyn_cast<ZExtInst>(Op0)) {
3448       MVT TmpVT;
3449       if (isValueAvailable(ZExt) && isTypeSupported(ZExt->getSrcTy(), TmpVT)) {
3450         SrcVT = TmpVT;
3451         IsZExt = true;
3452         Op0 = ZExt->getOperand(0);
3453       }
3454     } else if (const auto *SExt = dyn_cast<SExtInst>(Op0)) {
3455       MVT TmpVT;
3456       if (isValueAvailable(SExt) && isTypeSupported(SExt->getSrcTy(), TmpVT)) {
3457         SrcVT = TmpVT;
3458         IsZExt = false;
3459         Op0 = SExt->getOperand(0);
3460       }
3461     }
3462
3463     unsigned Op0Reg = getRegForValue(Op0);
3464     if (!Op0Reg)
3465       return false;
3466     bool Op0IsKill = hasTrivialKill(Op0);
3467
3468     switch (I->getOpcode()) {
3469     default: llvm_unreachable("Unexpected instruction.");
3470     case Instruction::Shl:
3471       ResultReg = emitLSL_ri(RetVT, SrcVT, Op0Reg, Op0IsKill, ShiftVal, IsZExt);
3472       break;
3473     case Instruction::AShr:
3474       ResultReg = emitASR_ri(RetVT, SrcVT, Op0Reg, Op0IsKill, ShiftVal, IsZExt);
3475       break;
3476     case Instruction::LShr:
3477       ResultReg = emitLSR_ri(RetVT, SrcVT, Op0Reg, Op0IsKill, ShiftVal, IsZExt);
3478       break;
3479     }
3480     if (!ResultReg)
3481       return false;
3482
3483     updateValueMap(I, ResultReg);
3484     return true;
3485   }
3486
3487   unsigned Op0Reg = getRegForValue(I->getOperand(0));
3488   if (!Op0Reg)
3489     return false;
3490   bool Op0IsKill = hasTrivialKill(I->getOperand(0));
3491
3492   unsigned Op1Reg = getRegForValue(I->getOperand(1));
3493   if (!Op1Reg)
3494     return false;
3495   bool Op1IsKill = hasTrivialKill(I->getOperand(1));
3496
3497   unsigned ResultReg = 0;
3498   switch (I->getOpcode()) {
3499   default: llvm_unreachable("Unexpected instruction.");
3500   case Instruction::Shl:
3501     ResultReg = emitLSL_rr(RetVT, Op0Reg, Op0IsKill, Op1Reg, Op1IsKill);
3502     break;
3503   case Instruction::AShr:
3504     ResultReg = emitASR_rr(RetVT, Op0Reg, Op0IsKill, Op1Reg, Op1IsKill);
3505     break;
3506   case Instruction::LShr:
3507     ResultReg = emitLSR_rr(RetVT, Op0Reg, Op0IsKill, Op1Reg, Op1IsKill);
3508     break;
3509   }
3510
3511   if (!ResultReg)
3512     return false;
3513
3514   updateValueMap(I, ResultReg);
3515   return true;
3516 }
3517
3518 bool AArch64FastISel::SelectBitCast(const Instruction *I) {
3519   MVT RetVT, SrcVT;
3520
3521   if (!isTypeLegal(I->getOperand(0)->getType(), SrcVT))
3522     return false;
3523   if (!isTypeLegal(I->getType(), RetVT))
3524     return false;
3525
3526   unsigned Opc;
3527   if (RetVT == MVT::f32 && SrcVT == MVT::i32)
3528     Opc = AArch64::FMOVWSr;
3529   else if (RetVT == MVT::f64 && SrcVT == MVT::i64)
3530     Opc = AArch64::FMOVXDr;
3531   else if (RetVT == MVT::i32 && SrcVT == MVT::f32)
3532     Opc = AArch64::FMOVSWr;
3533   else if (RetVT == MVT::i64 && SrcVT == MVT::f64)
3534     Opc = AArch64::FMOVDXr;
3535   else
3536     return false;
3537
3538   const TargetRegisterClass *RC = nullptr;
3539   switch (RetVT.SimpleTy) {
3540   default: llvm_unreachable("Unexpected value type.");
3541   case MVT::i32: RC = &AArch64::GPR32RegClass; break;
3542   case MVT::i64: RC = &AArch64::GPR64RegClass; break;
3543   case MVT::f32: RC = &AArch64::FPR32RegClass; break;
3544   case MVT::f64: RC = &AArch64::FPR64RegClass; break;
3545   }
3546   unsigned Op0Reg = getRegForValue(I->getOperand(0));
3547   if (!Op0Reg)
3548     return false;
3549   bool Op0IsKill = hasTrivialKill(I->getOperand(0));
3550   unsigned ResultReg = fastEmitInst_r(Opc, RC, Op0Reg, Op0IsKill);
3551
3552   if (!ResultReg)
3553     return false;
3554
3555   updateValueMap(I, ResultReg);
3556   return true;
3557 }
3558
3559 bool AArch64FastISel::fastSelectInstruction(const Instruction *I) {
3560   switch (I->getOpcode()) {
3561   default:
3562     break;
3563   case Instruction::Add:
3564   case Instruction::Sub:
3565     if (selectAddSub(I))
3566       return true;
3567     break;
3568   case Instruction::Mul:
3569     if (!selectBinaryOp(I, ISD::MUL))
3570       return SelectMul(I);
3571     return true;
3572   case Instruction::SRem:
3573     if (!selectBinaryOp(I, ISD::SREM))
3574       return SelectRem(I, ISD::SREM);
3575     return true;
3576   case Instruction::URem:
3577     if (!selectBinaryOp(I, ISD::UREM))
3578       return SelectRem(I, ISD::UREM);
3579     return true;
3580   case Instruction::Shl:
3581   case Instruction::LShr:
3582   case Instruction::AShr:
3583     if (SelectShift(I))
3584       return true;
3585     break;
3586   case Instruction::And:
3587     if (selectLogicalOp(I, ISD::AND))
3588       return true;
3589     break;
3590   case Instruction::Or:
3591     if (selectLogicalOp(I, ISD::OR))
3592       return true;
3593     break;
3594   case Instruction::Xor:
3595     if (selectLogicalOp(I, ISD::XOR))
3596       return true;
3597     break;
3598   case Instruction::Br:
3599     return SelectBranch(I);
3600   case Instruction::IndirectBr:
3601     return SelectIndirectBr(I);
3602   case Instruction::BitCast:
3603     if (!FastISel::selectBitCast(I))
3604       return SelectBitCast(I);
3605     return true;
3606   case Instruction::FPToSI:
3607     if (!selectCast(I, ISD::FP_TO_SINT))
3608       return SelectFPToInt(I, /*Signed=*/true);
3609     return true;
3610   case Instruction::FPToUI:
3611     return SelectFPToInt(I, /*Signed=*/false);
3612   case Instruction::ZExt:
3613     if (!selectCast(I, ISD::ZERO_EXTEND))
3614       return SelectIntExt(I);
3615     return true;
3616   case Instruction::SExt:
3617     if (!selectCast(I, ISD::SIGN_EXTEND))
3618       return SelectIntExt(I);
3619     return true;
3620   case Instruction::Trunc:
3621     if (!selectCast(I, ISD::TRUNCATE))
3622       return SelectTrunc(I);
3623     return true;
3624   case Instruction::FPExt:
3625     return SelectFPExt(I);
3626   case Instruction::FPTrunc:
3627     return SelectFPTrunc(I);
3628   case Instruction::SIToFP:
3629     if (!selectCast(I, ISD::SINT_TO_FP))
3630       return SelectIntToFP(I, /*Signed=*/true);
3631     return true;
3632   case Instruction::UIToFP:
3633     return SelectIntToFP(I, /*Signed=*/false);
3634   case Instruction::Load:
3635     return SelectLoad(I);
3636   case Instruction::Store:
3637     return SelectStore(I);
3638   case Instruction::FCmp:
3639   case Instruction::ICmp:
3640     return SelectCmp(I);
3641   case Instruction::Select:
3642     return SelectSelect(I);
3643   case Instruction::Ret:
3644     return SelectRet(I);
3645   }
3646
3647   // fall-back to target-independent instruction selection.
3648   return selectOperator(I, I->getOpcode());
3649   // Silence warnings.
3650   (void)&CC_AArch64_DarwinPCS_VarArg;
3651 }
3652
3653 namespace llvm {
3654 llvm::FastISel *AArch64::createFastISel(FunctionLoweringInfo &funcInfo,
3655                                         const TargetLibraryInfo *libInfo) {
3656   return new AArch64FastISel(funcInfo, libInfo);
3657 }
3658 }