lib/Target/AArch64/AArch64FastISel.cpp

   1 //===-- AArch6464FastISel.cpp - AArch64 FastISel implementation -----------===//
   2 //
   3 //                     The LLVM Compiler Infrastructure
   4 //
   5 // This file is distributed under the University of Illinois Open Source
   6 // License. See LICENSE.TXT for details.
   7 //
   8 //===----------------------------------------------------------------------===//
   9 //
  10 // This file defines the AArch64-specific support for the FastISel class. Some
  11 // of the target-specific code is generated by tablegen in the file
  12 // AArch64GenFastISel.inc, which is #included here.
  13 //
  14 //===----------------------------------------------------------------------===//
  15
  16 #include "AArch64.h"
  17 #include "AArch64Subtarget.h"
  18 #include "AArch64TargetMachine.h"
  19 #include "MCTargetDesc/AArch64AddressingModes.h"
  20 #include "llvm/Analysis/BranchProbabilityInfo.h"
  21 #include "llvm/CodeGen/CallingConvLower.h"
  22 #include "llvm/CodeGen/FastISel.h"
  23 #include "llvm/CodeGen/FunctionLoweringInfo.h"
  24 #include "llvm/CodeGen/MachineConstantPool.h"
  25 #include "llvm/CodeGen/MachineFrameInfo.h"
  26 #include "llvm/CodeGen/MachineInstrBuilder.h"
  27 #include "llvm/CodeGen/MachineRegisterInfo.h"
  28 #include "llvm/IR/CallingConv.h"
  29 #include "llvm/IR/DataLayout.h"
  30 #include "llvm/IR/DerivedTypes.h"
  31 #include "llvm/IR/Function.h"
  32 #include "llvm/IR/GetElementPtrTypeIterator.h"
  33 #include "llvm/IR/GlobalAlias.h"
  34 #include "llvm/IR/GlobalVariable.h"
  35 #include "llvm/IR/Instructions.h"
  36 #include "llvm/IR/IntrinsicInst.h"
  37 #include "llvm/IR/Operator.h"
  38 #include "llvm/Support/CommandLine.h"
  39 using namespace llvm;
  40
  41 namespace {
  42
  43 class AArch64FastISel final : public FastISel {
  44   class Address {
  45   public:
  46     typedef enum {
  47       RegBase,
  48       FrameIndexBase
  49     } BaseKind;
  50
  51   private:
  52     BaseKind Kind;
  53     AArch64_AM::ShiftExtendType ExtType;
  54     union {
  55       unsigned Reg;
  56       int FI;
  57     } Base;
  58     unsigned OffsetReg;
  59     unsigned Shift;
  60     int64_t Offset;
  61     const GlobalValue *GV;
  62
  63   public:
  64     Address() : Kind(RegBase), ExtType(AArch64_AM::InvalidShiftExtend),
  65       OffsetReg(0), Shift(0), Offset(0), GV(nullptr) { Base.Reg = 0; }
  66     void setKind(BaseKind K) { Kind = K; }
  67     BaseKind getKind() const { return Kind; }
  68     void setExtendType(AArch64_AM::ShiftExtendType E) { ExtType = E; }
  69     AArch64_AM::ShiftExtendType getExtendType() const { return ExtType; }
  70     bool isRegBase() const { return Kind == RegBase; }
  71     bool isFIBase() const { return Kind == FrameIndexBase; }
  72     void setReg(unsigned Reg) {
  73       assert(isRegBase() && "Invalid base register access!");
  74       Base.Reg = Reg;
  75     }
  76     unsigned getReg() const {
  77       assert(isRegBase() && "Invalid base register access!");
  78       return Base.Reg;
  79     }
  80     void setOffsetReg(unsigned Reg) {
  81       assert(isRegBase() && "Invalid offset register access!");
  82       OffsetReg = Reg;
  83     }
  84     unsigned getOffsetReg() const {
  85       assert(isRegBase() && "Invalid offset register access!");
  86       return OffsetReg;
  87     }
  88     void setFI(unsigned FI) {
  89       assert(isFIBase() && "Invalid base frame index  access!");
  90       Base.FI = FI;
  91     }
  92     unsigned getFI() const {
  93       assert(isFIBase() && "Invalid base frame index access!");
  94       return Base.FI;
  95     }
  96     void setOffset(int64_t O) { Offset = O; }
  97     int64_t getOffset() { return Offset; }
  98     void setShift(unsigned S) { Shift = S; }
  99     unsigned getShift() { return Shift; }
 100
 101     void setGlobalValue(const GlobalValue *G) { GV = G; }
 102     const GlobalValue *getGlobalValue() { return GV; }
 103   };
 104
 105   /// Subtarget - Keep a pointer to the AArch64Subtarget around so that we can
 106   /// make the right decision when generating code for different targets.
 107   const AArch64Subtarget *Subtarget;
 108   LLVMContext *Context;
 109
 110   bool fastLowerArguments() override;
 111   bool fastLowerCall(CallLoweringInfo &CLI) override;
 112   bool fastLowerIntrinsicCall(const IntrinsicInst *II) override;
 113
 114 private:
 115   // Selection routines.
 116   bool selectAddSub(const Instruction *I);
 117   bool selectLogicalOp(const Instruction *I);
 118   bool selectLoad(const Instruction *I);
 119   bool selectStore(const Instruction *I);
 120   bool selectBranch(const Instruction *I);
 121   bool selectIndirectBr(const Instruction *I);
 122   bool selectCmp(const Instruction *I);
 123   bool selectSelect(const Instruction *I);
 124   bool selectFPExt(const Instruction *I);
 125   bool selectFPTrunc(const Instruction *I);
 126   bool selectFPToInt(const Instruction *I, bool Signed);
 127   bool selectIntToFP(const Instruction *I, bool Signed);
 128   bool selectRem(const Instruction *I, unsigned ISDOpcode);
 129   bool selectRet(const Instruction *I);
 130   bool selectTrunc(const Instruction *I);
 131   bool selectIntExt(const Instruction *I);
 132   bool selectMul(const Instruction *I);
 133   bool selectShift(const Instruction *I);
 134   bool selectBitCast(const Instruction *I);
 135   bool selectFRem(const Instruction *I);
 136   bool selectSDiv(const Instruction *I);
 137
 138   // Utility helper routines.
 139   bool isTypeLegal(Type *Ty, MVT &VT);
 140   bool isTypeSupported(Type *Ty, MVT &VT, bool IsVectorAllowed = false);
 141   bool isValueAvailable(const Value *V) const;
 142   bool computeAddress(const Value *Obj, Address &Addr, Type *Ty = nullptr);
 143   bool computeCallAddress(const Value *V, Address &Addr);
 144   bool simplifyAddress(Address &Addr, MVT VT);
 145   void addLoadStoreOperands(Address &Addr, const MachineInstrBuilder &MIB,
 146                             unsigned Flags, unsigned ScaleFactor,
 147                             MachineMemOperand *MMO);
 148   bool isMemCpySmall(uint64_t Len, unsigned Alignment);
 149   bool tryEmitSmallMemCpy(Address Dest, Address Src, uint64_t Len,
 150                           unsigned Alignment);
 151   bool foldXALUIntrinsic(AArch64CC::CondCode &CC, const Instruction *I,
 152                          const Value *Cond);
 153
 154   // Emit helper routines.
 155   unsigned emitAddSub(bool UseAdd, MVT RetVT, const Value *LHS,
 156                       const Value *RHS, bool SetFlags = false,
 157                       bool WantResult = true,  bool IsZExt = false);
 158   unsigned emitAddSub_rr(bool UseAdd, MVT RetVT, unsigned LHSReg,
 159                          bool LHSIsKill, unsigned RHSReg, bool RHSIsKill,
 160                          bool SetFlags = false, bool WantResult = true);
 161   unsigned emitAddSub_ri(bool UseAdd, MVT RetVT, unsigned LHSReg,
 162                          bool LHSIsKill, uint64_t Imm, bool SetFlags = false,
 163                          bool WantResult = true);
 164   unsigned emitAddSub_rs(bool UseAdd, MVT RetVT, unsigned LHSReg,
 165                          bool LHSIsKill, unsigned RHSReg, bool RHSIsKill,
 166                          AArch64_AM::ShiftExtendType ShiftType,
 167                          uint64_t ShiftImm, bool SetFlags = false,
 168                          bool WantResult = true);
 169   unsigned emitAddSub_rx(bool UseAdd, MVT RetVT, unsigned LHSReg,
 170                          bool LHSIsKill, unsigned RHSReg, bool RHSIsKill,
 171                           AArch64_AM::ShiftExtendType ExtType,
 172                           uint64_t ShiftImm, bool SetFlags = false,
 173                          bool WantResult = true);
 174
 175   // Emit functions.
 176   bool emitCmp(const Value *LHS, const Value *RHS, bool IsZExt);
 177   bool emitICmp(MVT RetVT, const Value *LHS, const Value *RHS, bool IsZExt);
 178   bool emitICmp_ri(MVT RetVT, unsigned LHSReg, bool LHSIsKill, uint64_t Imm);
 179   bool emitFCmp(MVT RetVT, const Value *LHS, const Value *RHS);
 180   bool emitLoad(MVT VT, unsigned &ResultReg, Address Addr,
 181                 MachineMemOperand *MMO = nullptr);
 182   bool emitStore(MVT VT, unsigned SrcReg, Address Addr,
 183                  MachineMemOperand *MMO = nullptr);
 184   unsigned emitIntExt(MVT SrcVT, unsigned SrcReg, MVT DestVT, bool isZExt);
 185   unsigned emiti1Ext(unsigned SrcReg, MVT DestVT, bool isZExt);
 186   unsigned emitAdd(MVT RetVT, const Value *LHS, const Value *RHS,
 187                    bool SetFlags = false, bool WantResult = true,
 188                    bool IsZExt = false);
 189   unsigned emitSub(MVT RetVT, const Value *LHS, const Value *RHS,
 190                    bool SetFlags = false, bool WantResult = true,
 191                    bool IsZExt = false);
 192   unsigned emitSubs_rr(MVT RetVT, unsigned LHSReg, bool LHSIsKill,
 193                        unsigned RHSReg, bool RHSIsKill, bool WantResult = true);
 194   unsigned emitSubs_rs(MVT RetVT, unsigned LHSReg, bool LHSIsKill,
 195                        unsigned RHSReg, bool RHSIsKill,
 196                        AArch64_AM::ShiftExtendType ShiftType, uint64_t ShiftImm,
 197                        bool WantResult = true);
 198   unsigned emitLogicalOp(unsigned ISDOpc, MVT RetVT, const Value *LHS,
 199                          const Value *RHS);
 200   unsigned emitLogicalOp_ri(unsigned ISDOpc, MVT RetVT, unsigned LHSReg,
 201                             bool LHSIsKill, uint64_t Imm);
 202   unsigned emitLogicalOp_rs(unsigned ISDOpc, MVT RetVT, unsigned LHSReg,
 203                             bool LHSIsKill, unsigned RHSReg, bool RHSIsKill,
 204                             uint64_t ShiftImm);
 205   unsigned emitAnd_ri(MVT RetVT, unsigned LHSReg, bool LHSIsKill, uint64_t Imm);
 206   unsigned emitMul_rr(MVT RetVT, unsigned Op0, bool Op0IsKill,
 207                       unsigned Op1, bool Op1IsKill);
 208   unsigned emitSMULL_rr(MVT RetVT, unsigned Op0, bool Op0IsKill,
 209                         unsigned Op1, bool Op1IsKill);
 210   unsigned emitUMULL_rr(MVT RetVT, unsigned Op0, bool Op0IsKill,
 211                         unsigned Op1, bool Op1IsKill);
 212   unsigned emitLSL_rr(MVT RetVT, unsigned Op0Reg, bool Op0IsKill,
 213                       unsigned Op1Reg, bool Op1IsKill);
 214   unsigned emitLSL_ri(MVT RetVT, MVT SrcVT, unsigned Op0Reg, bool Op0IsKill,
 215                       uint64_t Imm, bool IsZExt = true);
 216   unsigned emitLSR_rr(MVT RetVT, unsigned Op0Reg, bool Op0IsKill,
 217                       unsigned Op1Reg, bool Op1IsKill);
 218   unsigned emitLSR_ri(MVT RetVT, MVT SrcVT, unsigned Op0Reg, bool Op0IsKill,
 219                       uint64_t Imm, bool IsZExt = true);
 220   unsigned emitASR_rr(MVT RetVT, unsigned Op0Reg, bool Op0IsKill,
 221                       unsigned Op1Reg, bool Op1IsKill);
 222   unsigned emitASR_ri(MVT RetVT, MVT SrcVT, unsigned Op0Reg, bool Op0IsKill,
 223                       uint64_t Imm, bool IsZExt = false);
 224
 225   unsigned materializeInt(const ConstantInt *CI, MVT VT);
 226   unsigned materializeFP(const ConstantFP *CFP, MVT VT);
 227   unsigned materializeGV(const GlobalValue *GV);
 228
 229   // Call handling routines.
 230 private:
 231   CCAssignFn *CCAssignFnForCall(CallingConv::ID CC) const;
 232   bool processCallArgs(CallLoweringInfo &CLI, SmallVectorImpl<MVT> &ArgVTs,
 233                        unsigned &NumBytes);
 234   bool finishCall(CallLoweringInfo &CLI, MVT RetVT, unsigned NumBytes);
 235
 236 public:
 237   // Backend specific FastISel code.
 238   unsigned fastMaterializeAlloca(const AllocaInst *AI) override;
 239   unsigned fastMaterializeConstant(const Constant *C) override;
 240   unsigned fastMaterializeFloatZero(const ConstantFP* CF) override;
 241
 242   explicit AArch64FastISel(FunctionLoweringInfo &FuncInfo,
 243                          const TargetLibraryInfo *LibInfo)
 244       : FastISel(FuncInfo, LibInfo, /*SkipTargetIndependentISel=*/true) {
 245     Subtarget = &TM.getSubtarget<AArch64Subtarget>();
 246     Context = &FuncInfo.Fn->getContext();
 247   }
 248
 249   bool fastSelectInstruction(const Instruction *I) override;
 250
 251 #include "AArch64GenFastISel.inc"
 252 };
 253
 254 } // end anonymous namespace
 255
 256 #include "AArch64GenCallingConv.inc"
 257
 258 CCAssignFn *AArch64FastISel::CCAssignFnForCall(CallingConv::ID CC) const {
 259   if (CC == CallingConv::WebKit_JS)
 260     return CC_AArch64_WebKit_JS;
 261   return Subtarget->isTargetDarwin() ? CC_AArch64_DarwinPCS : CC_AArch64_AAPCS;
 262 }
 263
 264 unsigned AArch64FastISel::fastMaterializeAlloca(const AllocaInst *AI) {
 265   assert(TLI.getValueType(AI->getType(), true) == MVT::i64 &&
 266          "Alloca should always return a pointer.");
 267
 268   // Don't handle dynamic allocas.
 269   if (!FuncInfo.StaticAllocaMap.count(AI))
 270     return 0;
 271
 272   DenseMap<const AllocaInst *, int>::iterator SI =
 273       FuncInfo.StaticAllocaMap.find(AI);
 274
 275   if (SI != FuncInfo.StaticAllocaMap.end()) {
 276     unsigned ResultReg = createResultReg(&AArch64::GPR64spRegClass);
 277     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADDXri),
 278             ResultReg)
 279         .addFrameIndex(SI->second)
 280         .addImm(0)
 281         .addImm(0);
 282     return ResultReg;
 283   }
 284
 285   return 0;
 286 }
 287
 288 unsigned AArch64FastISel::materializeInt(const ConstantInt *CI, MVT VT) {
 289   if (VT > MVT::i64)
 290     return 0;
 291
 292   if (!CI->isZero())
 293     return fastEmit_i(VT, VT, ISD::Constant, CI->getZExtValue());
 294
 295   // Create a copy from the zero register to materialize a "0" value.
 296   const TargetRegisterClass *RC = (VT == MVT::i64) ? &AArch64::GPR64RegClass
 297                                                    : &AArch64::GPR32RegClass;
 298   unsigned ZeroReg = (VT == MVT::i64) ? AArch64::XZR : AArch64::WZR;
 299   unsigned ResultReg = createResultReg(RC);
 300   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(TargetOpcode::COPY),
 301           ResultReg).addReg(ZeroReg, getKillRegState(true));
 302   return ResultReg;
 303 }
 304
 305 unsigned AArch64FastISel::materializeFP(const ConstantFP *CFP, MVT VT) {
 306   // Positive zero (+0.0) has to be materialized with a fmov from the zero
 307   // register, because the immediate version of fmov cannot encode zero.
 308   if (CFP->isNullValue())
 309     return fastMaterializeFloatZero(CFP);
 310
 311   if (VT != MVT::f32 && VT != MVT::f64)
 312     return 0;
 313
 314   const APFloat Val = CFP->getValueAPF();
 315   bool Is64Bit = (VT == MVT::f64);
 316   // This checks to see if we can use FMOV instructions to materialize
 317   // a constant, otherwise we have to materialize via the constant pool.
 318   if (TLI.isFPImmLegal(Val, VT)) {
 319     int Imm =
 320         Is64Bit ? AArch64_AM::getFP64Imm(Val) : AArch64_AM::getFP32Imm(Val);
 321     assert((Imm != -1) && "Cannot encode floating-point constant.");
 322     unsigned Opc = Is64Bit ? AArch64::FMOVDi : AArch64::FMOVSi;
 323     return fastEmitInst_i(Opc, TLI.getRegClassFor(VT), Imm);
 324   }
 325
 326   // Materialize via constant pool.  MachineConstantPool wants an explicit
 327   // alignment.
 328   unsigned Align = DL.getPrefTypeAlignment(CFP->getType());
 329   if (Align == 0)
 330     Align = DL.getTypeAllocSize(CFP->getType());
 331
 332   unsigned CPI = MCP.getConstantPoolIndex(cast<Constant>(CFP), Align);
 333   unsigned ADRPReg = createResultReg(&AArch64::GPR64commonRegClass);
 334   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADRP),
 335           ADRPReg).addConstantPoolIndex(CPI, 0, AArch64II::MO_PAGE);
 336
 337   unsigned Opc = Is64Bit ? AArch64::LDRDui : AArch64::LDRSui;
 338   unsigned ResultReg = createResultReg(TLI.getRegClassFor(VT));
 339   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg)
 340       .addReg(ADRPReg)
 341       .addConstantPoolIndex(CPI, 0, AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
 342   return ResultReg;
 343 }
 344
 345 unsigned AArch64FastISel::materializeGV(const GlobalValue *GV) {
 346   // We can't handle thread-local variables quickly yet.
 347   if (GV->isThreadLocal())
 348     return 0;
 349
 350   // MachO still uses GOT for large code-model accesses, but ELF requires
 351   // movz/movk sequences, which FastISel doesn't handle yet.
 352   if (TM.getCodeModel() != CodeModel::Small && !Subtarget->isTargetMachO())
 353     return 0;
 354
 355   unsigned char OpFlags = Subtarget->ClassifyGlobalReference(GV, TM);
 356
 357   EVT DestEVT = TLI.getValueType(GV->getType(), true);
 358   if (!DestEVT.isSimple())
 359     return 0;
 360
 361   unsigned ADRPReg = createResultReg(&AArch64::GPR64commonRegClass);
 362   unsigned ResultReg;
 363
 364   if (OpFlags & AArch64II::MO_GOT) {
 365     // ADRP + LDRX
 366     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADRP),
 367             ADRPReg)
 368       .addGlobalAddress(GV, 0, AArch64II::MO_GOT | AArch64II::MO_PAGE);
 369
 370     ResultReg = createResultReg(&AArch64::GPR64RegClass);
 371     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::LDRXui),
 372             ResultReg)
 373       .addReg(ADRPReg)
 374       .addGlobalAddress(GV, 0, AArch64II::MO_GOT | AArch64II::MO_PAGEOFF |
 375                         AArch64II::MO_NC);
 376   } else if (OpFlags & AArch64II::MO_CONSTPOOL) {
 377     // We can't handle addresses loaded from a constant pool quickly yet.
 378     return 0;
 379   } else {
 380     // ADRP + ADDX
 381     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADRP),
 382             ADRPReg)
 383       .addGlobalAddress(GV, 0, AArch64II::MO_PAGE);
 384
 385     ResultReg = createResultReg(&AArch64::GPR64spRegClass);
 386     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADDXri),
 387             ResultReg)
 388       .addReg(ADRPReg)
 389       .addGlobalAddress(GV, 0, AArch64II::MO_PAGEOFF | AArch64II::MO_NC)
 390       .addImm(0);
 391   }
 392   return ResultReg;
 393 }
 394
 395 unsigned AArch64FastISel::fastMaterializeConstant(const Constant *C) {
 396   EVT CEVT = TLI.getValueType(C->getType(), true);
 397
 398   // Only handle simple types.
 399   if (!CEVT.isSimple())
 400     return 0;
 401   MVT VT = CEVT.getSimpleVT();
 402
 403   if (const auto *CI = dyn_cast<ConstantInt>(C))
 404     return materializeInt(CI, VT);
 405   else if (const ConstantFP *CFP = dyn_cast<ConstantFP>(C))
 406     return materializeFP(CFP, VT);
 407   else if (const GlobalValue *GV = dyn_cast<GlobalValue>(C))
 408     return materializeGV(GV);
 409
 410   return 0;
 411 }
 412
 413 unsigned AArch64FastISel::fastMaterializeFloatZero(const ConstantFP* CFP) {
 414   assert(CFP->isNullValue() &&
 415          "Floating-point constant is not a positive zero.");
 416   MVT VT;
 417   if (!isTypeLegal(CFP->getType(), VT))
 418     return 0;
 419
 420   if (VT != MVT::f32 && VT != MVT::f64)
 421     return 0;
 422
 423   bool Is64Bit = (VT == MVT::f64);
 424   unsigned ZReg = Is64Bit ? AArch64::XZR : AArch64::WZR;
 425   unsigned Opc = Is64Bit ? AArch64::FMOVXDr : AArch64::FMOVWSr;
 426   return fastEmitInst_r(Opc, TLI.getRegClassFor(VT), ZReg, /*IsKill=*/true);
 427 }
 428
 429 /// \brief Check if the multiply is by a power-of-2 constant.
 430 static bool isMulPowOf2(const Value *I) {
 431   if (const auto *MI = dyn_cast<MulOperator>(I)) {
 432     if (const auto *C = dyn_cast<ConstantInt>(MI->getOperand(0)))
 433       if (C->getValue().isPowerOf2())
 434         return true;
 435     if (const auto *C = dyn_cast<ConstantInt>(MI->getOperand(1)))
 436       if (C->getValue().isPowerOf2())
 437         return true;
 438   }
 439   return false;
 440 }
 441
 442 // Computes the address to get to an object.
 443 bool AArch64FastISel::computeAddress(const Value *Obj, Address &Addr, Type *Ty)
 444 {
 445   const User *U = nullptr;
 446   unsigned Opcode = Instruction::UserOp1;
 447   if (const Instruction *I = dyn_cast<Instruction>(Obj)) {
 448     // Don't walk into other basic blocks unless the object is an alloca from
 449     // another block, otherwise it may not have a virtual register assigned.
 450     if (FuncInfo.StaticAllocaMap.count(static_cast<const AllocaInst *>(Obj)) ||
 451         FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB) {
 452       Opcode = I->getOpcode();
 453       U = I;
 454     }
 455   } else if (const ConstantExpr *C = dyn_cast<ConstantExpr>(Obj)) {
 456     Opcode = C->getOpcode();
 457     U = C;
 458   }
 459
 460   if (const PointerType *Ty = dyn_cast<PointerType>(Obj->getType()))
 461     if (Ty->getAddressSpace() > 255)
 462       // Fast instruction selection doesn't support the special
 463       // address spaces.
 464       return false;
 465
 466   switch (Opcode) {
 467   default:
 468     break;
 469   case Instruction::BitCast: {
 470     // Look through bitcasts.
 471     return computeAddress(U->getOperand(0), Addr, Ty);
 472   }
 473   case Instruction::IntToPtr: {
 474     // Look past no-op inttoptrs.
 475     if (TLI.getValueType(U->getOperand(0)->getType()) == TLI.getPointerTy())
 476       return computeAddress(U->getOperand(0), Addr, Ty);
 477     break;
 478   }
 479   case Instruction::PtrToInt: {
 480     // Look past no-op ptrtoints.
 481     if (TLI.getValueType(U->getType()) == TLI.getPointerTy())
 482       return computeAddress(U->getOperand(0), Addr, Ty);
 483     break;
 484   }
 485   case Instruction::GetElementPtr: {
 486     Address SavedAddr = Addr;
 487     uint64_t TmpOffset = Addr.getOffset();
 488
 489     // Iterate through the GEP folding the constants into offsets where
 490     // we can.
 491     gep_type_iterator GTI = gep_type_begin(U);
 492     for (User::const_op_iterator i = U->op_begin() + 1, e = U->op_end(); i != e;
 493          ++i, ++GTI) {
 494       const Value *Op = *i;
 495       if (StructType *STy = dyn_cast<StructType>(*GTI)) {
 496         const StructLayout *SL = DL.getStructLayout(STy);
 497         unsigned Idx = cast<ConstantInt>(Op)->getZExtValue();
 498         TmpOffset += SL->getElementOffset(Idx);
 499       } else {
 500         uint64_t S = DL.getTypeAllocSize(GTI.getIndexedType());
 501         for (;;) {
 502           if (const ConstantInt *CI = dyn_cast<ConstantInt>(Op)) {
 503             // Constant-offset addressing.
 504             TmpOffset += CI->getSExtValue() * S;
 505             break;
 506           }
 507           if (canFoldAddIntoGEP(U, Op)) {
 508             // A compatible add with a constant operand. Fold the constant.
 509             ConstantInt *CI =
 510                 cast<ConstantInt>(cast<AddOperator>(Op)->getOperand(1));
 511             TmpOffset += CI->getSExtValue() * S;
 512             // Iterate on the other operand.
 513             Op = cast<AddOperator>(Op)->getOperand(0);
 514             continue;
 515           }
 516           // Unsupported
 517           goto unsupported_gep;
 518         }
 519       }
 520     }
 521
 522     // Try to grab the base operand now.
 523     Addr.setOffset(TmpOffset);
 524     if (computeAddress(U->getOperand(0), Addr, Ty))
 525       return true;
 526
 527     // We failed, restore everything and try the other options.
 528     Addr = SavedAddr;
 529
 530   unsupported_gep:
 531     break;
 532   }
 533   case Instruction::Alloca: {
 534     const AllocaInst *AI = cast<AllocaInst>(Obj);
 535     DenseMap<const AllocaInst *, int>::iterator SI =
 536         FuncInfo.StaticAllocaMap.find(AI);
 537     if (SI != FuncInfo.StaticAllocaMap.end()) {
 538       Addr.setKind(Address::FrameIndexBase);
 539       Addr.setFI(SI->second);
 540       return true;
 541     }
 542     break;
 543   }
 544   case Instruction::Add: {
 545     // Adds of constants are common and easy enough.
 546     const Value *LHS = U->getOperand(0);
 547     const Value *RHS = U->getOperand(1);
 548
 549     if (isa<ConstantInt>(LHS))
 550       std::swap(LHS, RHS);
 551
 552     if (const ConstantInt *CI = dyn_cast<ConstantInt>(RHS)) {
 553       Addr.setOffset(Addr.getOffset() + (uint64_t)CI->getSExtValue());
 554       return computeAddress(LHS, Addr, Ty);
 555     }
 556
 557     Address Backup = Addr;
 558     if (computeAddress(LHS, Addr, Ty) && computeAddress(RHS, Addr, Ty))
 559       return true;
 560     Addr = Backup;
 561
 562     break;
 563   }
 564   case Instruction::Shl:
 565     if (Addr.getOffsetReg())
 566       break;
 567
 568     if (const auto *CI = dyn_cast<ConstantInt>(U->getOperand(1))) {
 569       unsigned Val = CI->getZExtValue();
 570       if (Val < 1 || Val > 3)
 571         break;
 572
 573       uint64_t NumBytes = 0;
 574       if (Ty && Ty->isSized()) {
 575         uint64_t NumBits = DL.getTypeSizeInBits(Ty);
 576         NumBytes = NumBits / 8;
 577         if (!isPowerOf2_64(NumBits))
 578           NumBytes = 0;
 579       }
 580
 581       if (NumBytes != (1ULL << Val))
 582         break;
 583
 584       Addr.setShift(Val);
 585       Addr.setExtendType(AArch64_AM::LSL);
 586
 587       if (const auto *I = dyn_cast<Instruction>(U->getOperand(0)))
 588         if (FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB)
 589           U = I;
 590
 591       if (const auto *ZE = dyn_cast<ZExtInst>(U))
 592         if (ZE->getOperand(0)->getType()->isIntegerTy(32))
 593           Addr.setExtendType(AArch64_AM::UXTW);
 594
 595       if (const auto *SE = dyn_cast<SExtInst>(U))
 596         if (SE->getOperand(0)->getType()->isIntegerTy(32))
 597           Addr.setExtendType(AArch64_AM::SXTW);
 598
 599       if (const auto *AI = dyn_cast<BinaryOperator>(U))
 600         if (AI->getOpcode() == Instruction::And) {
 601           const Value *LHS = AI->getOperand(0);
 602           const Value *RHS = AI->getOperand(1);
 603
 604           if (const auto *C = dyn_cast<ConstantInt>(LHS))
 605             if (C->getValue() == 0xffffffff)
 606               std::swap(LHS, RHS);
 607
 608           if (const auto *C = cast<ConstantInt>(RHS))
 609             if (C->getValue() == 0xffffffff) {
 610               Addr.setExtendType(AArch64_AM::UXTW);
 611               unsigned Reg = getRegForValue(LHS);
 612               if (!Reg)
 613                 return false;
 614               bool RegIsKill = hasTrivialKill(LHS);
 615               Reg = fastEmitInst_extractsubreg(MVT::i32, Reg, RegIsKill,
 616                                                AArch64::sub_32);
 617               Addr.setOffsetReg(Reg);
 618               return true;
 619             }
 620         }
 621
 622       unsigned Reg = getRegForValue(U->getOperand(0));
 623       if (!Reg)
 624         return false;
 625       Addr.setOffsetReg(Reg);
 626       return true;
 627     }
 628     break;
 629   case Instruction::Mul: {
 630     if (Addr.getOffsetReg())
 631       break;
 632
 633     if (!isMulPowOf2(U))
 634       break;
 635
 636     const Value *LHS = U->getOperand(0);
 637     const Value *RHS = U->getOperand(1);
 638
 639     // Canonicalize power-of-2 value to the RHS.
 640     if (const auto *C = dyn_cast<ConstantInt>(LHS))
 641       if (C->getValue().isPowerOf2())
 642         std::swap(LHS, RHS);
 643
 644     assert(isa<ConstantInt>(RHS) && "Expected an ConstantInt.");
 645     const auto *C = cast<ConstantInt>(RHS);
 646     unsigned Val = C->getValue().logBase2();
 647     if (Val < 1 || Val > 3)
 648       break;
 649
 650     uint64_t NumBytes = 0;
 651     if (Ty && Ty->isSized()) {
 652       uint64_t NumBits = DL.getTypeSizeInBits(Ty);
 653       NumBytes = NumBits / 8;
 654       if (!isPowerOf2_64(NumBits))
 655         NumBytes = 0;
 656     }
 657
 658     if (NumBytes != (1ULL << Val))
 659       break;
 660
 661     Addr.setShift(Val);
 662     Addr.setExtendType(AArch64_AM::LSL);
 663
 664     if (const auto *I = dyn_cast<Instruction>(LHS))
 665       if (FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB)
 666         U = I;
 667
 668     if (const auto *ZE = dyn_cast<ZExtInst>(U))
 669       if (ZE->getOperand(0)->getType()->isIntegerTy(32)) {
 670         Addr.setExtendType(AArch64_AM::UXTW);
 671         LHS = U->getOperand(0);
 672       }
 673
 674     if (const auto *SE = dyn_cast<SExtInst>(U))
 675       if (SE->getOperand(0)->getType()->isIntegerTy(32)) {
 676         Addr.setExtendType(AArch64_AM::SXTW);
 677         LHS = U->getOperand(0);
 678       }
 679
 680     unsigned Reg = getRegForValue(LHS);
 681     if (!Reg)
 682       return false;
 683     Addr.setOffsetReg(Reg);
 684     return true;
 685   }
 686   case Instruction::And: {
 687     if (Addr.getOffsetReg())
 688       break;
 689
 690     if (DL.getTypeSizeInBits(Ty) != 8)
 691       break;
 692
 693     const Value *LHS = U->getOperand(0);
 694     const Value *RHS = U->getOperand(1);
 695
 696     if (const auto *C = dyn_cast<ConstantInt>(LHS))
 697       if (C->getValue() == 0xffffffff)
 698         std::swap(LHS, RHS);
 699
 700     if (const auto *C = cast<ConstantInt>(RHS))
 701       if (C->getValue() == 0xffffffff) {
 702         Addr.setShift(0);
 703         Addr.setExtendType(AArch64_AM::LSL);
 704         Addr.setExtendType(AArch64_AM::UXTW);
 705
 706         unsigned Reg = getRegForValue(LHS);
 707         if (!Reg)
 708           return false;
 709         bool RegIsKill = hasTrivialKill(LHS);
 710         Reg = fastEmitInst_extractsubreg(MVT::i32, Reg, RegIsKill,
 711                                          AArch64::sub_32);
 712         Addr.setOffsetReg(Reg);
 713         return true;
 714       }
 715     break;
 716   }
 717   } // end switch
 718
 719   if (Addr.getReg()) {
 720     if (!Addr.getOffsetReg()) {
 721       unsigned Reg = getRegForValue(Obj);
 722       if (!Reg)
 723         return false;
 724       Addr.setOffsetReg(Reg);
 725       return true;
 726     }
 727     return false;
 728   }
 729
 730   unsigned Reg = getRegForValue(Obj);
 731   if (!Reg)
 732     return false;
 733   Addr.setReg(Reg);
 734   return true;
 735 }
 736
 737 bool AArch64FastISel::computeCallAddress(const Value *V, Address &Addr) {
 738   const User *U = nullptr;
 739   unsigned Opcode = Instruction::UserOp1;
 740   bool InMBB = true;
 741
 742   if (const auto *I = dyn_cast<Instruction>(V)) {
 743     Opcode = I->getOpcode();
 744     U = I;
 745     InMBB = I->getParent() == FuncInfo.MBB->getBasicBlock();
 746   } else if (const auto *C = dyn_cast<ConstantExpr>(V)) {
 747     Opcode = C->getOpcode();
 748     U = C;
 749   }
 750
 751   switch (Opcode) {
 752   default: break;
 753   case Instruction::BitCast:
 754     // Look past bitcasts if its operand is in the same BB.
 755     if (InMBB)
 756       return computeCallAddress(U->getOperand(0), Addr);
 757     break;
 758   case Instruction::IntToPtr:
 759     // Look past no-op inttoptrs if its operand is in the same BB.
 760     if (InMBB &&
 761         TLI.getValueType(U->getOperand(0)->getType()) == TLI.getPointerTy())
 762       return computeCallAddress(U->getOperand(0), Addr);
 763     break;
 764   case Instruction::PtrToInt:
 765     // Look past no-op ptrtoints if its operand is in the same BB.
 766     if (InMBB &&
 767         TLI.getValueType(U->getType()) == TLI.getPointerTy())
 768       return computeCallAddress(U->getOperand(0), Addr);
 769     break;
 770   }
 771
 772   if (const GlobalValue *GV = dyn_cast<GlobalValue>(V)) {
 773     Addr.setGlobalValue(GV);
 774     return true;
 775   }
 776
 777   // If all else fails, try to materialize the value in a register.
 778   if (!Addr.getGlobalValue()) {
 779     Addr.setReg(getRegForValue(V));
 780     return Addr.getReg() != 0;
 781   }
 782
 783   return false;
 784 }
 785
 786
 787 bool AArch64FastISel::isTypeLegal(Type *Ty, MVT &VT) {
 788   EVT evt = TLI.getValueType(Ty, true);
 789
 790   // Only handle simple types.
 791   if (evt == MVT::Other || !evt.isSimple())
 792     return false;
 793   VT = evt.getSimpleVT();
 794
 795   // This is a legal type, but it's not something we handle in fast-isel.
 796   if (VT == MVT::f128)
 797     return false;
 798
 799   // Handle all other legal types, i.e. a register that will directly hold this
 800   // value.
 801   return TLI.isTypeLegal(VT);
 802 }
 803
 804 /// \brief Determine if the value type is supported by FastISel.
 805 ///
 806 /// FastISel for AArch64 can handle more value types than are legal. This adds
 807 /// simple value type such as i1, i8, and i16.
 808 bool AArch64FastISel::isTypeSupported(Type *Ty, MVT &VT, bool IsVectorAllowed) {
 809   if (Ty->isVectorTy() && !IsVectorAllowed)
 810     return false;
 811
 812   if (isTypeLegal(Ty, VT))
 813     return true;
 814
 815   // If this is a type than can be sign or zero-extended to a basic operation
 816   // go ahead and accept it now.
 817   if (VT == MVT::i1 || VT == MVT::i8 || VT == MVT::i16)
 818     return true;
 819
 820   return false;
 821 }
 822
 823 bool AArch64FastISel::isValueAvailable(const Value *V) const {
 824   if (!isa<Instruction>(V))
 825     return true;
 826
 827   const auto *I = cast<Instruction>(V);
 828   if (FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB)
 829     return true;
 830
 831   return false;
 832 }
 833
 834 bool AArch64FastISel::simplifyAddress(Address &Addr, MVT VT) {
 835   unsigned ScaleFactor;
 836   switch (VT.SimpleTy) {
 837   default: return false;
 838   case MVT::i1:  // fall-through
 839   case MVT::i8:  ScaleFactor = 1; break;
 840   case MVT::i16: ScaleFactor = 2; break;
 841   case MVT::i32: // fall-through
 842   case MVT::f32: ScaleFactor = 4; break;
 843   case MVT::i64: // fall-through
 844   case MVT::f64: ScaleFactor = 8; break;
 845   }
 846
 847   bool ImmediateOffsetNeedsLowering = false;
 848   bool RegisterOffsetNeedsLowering = false;
 849   int64_t Offset = Addr.getOffset();
 850   if (((Offset < 0) || (Offset & (ScaleFactor - 1))) && !isInt<9>(Offset))
 851     ImmediateOffsetNeedsLowering = true;
 852   else if (Offset > 0 && !(Offset & (ScaleFactor - 1)) &&
 853            !isUInt<12>(Offset / ScaleFactor))
 854     ImmediateOffsetNeedsLowering = true;
 855
 856   // Cannot encode an offset register and an immediate offset in the same
 857   // instruction. Fold the immediate offset into the load/store instruction and
 858   // emit an additonal add to take care of the offset register.
 859   if (!ImmediateOffsetNeedsLowering && Addr.getOffset() && Addr.isRegBase() &&
 860       Addr.getOffsetReg())
 861     RegisterOffsetNeedsLowering = true;
 862
 863   // Cannot encode zero register as base.
 864   if (Addr.isRegBase() && Addr.getOffsetReg() && !Addr.getReg())
 865     RegisterOffsetNeedsLowering = true;
 866
 867   // If this is a stack pointer and the offset needs to be simplified then put
 868   // the alloca address into a register, set the base type back to register and
 869   // continue. This should almost never happen.
 870   if (ImmediateOffsetNeedsLowering && Addr.isFIBase()) {
 871     unsigned ResultReg = createResultReg(&AArch64::GPR64spRegClass);
 872     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADDXri),
 873             ResultReg)
 874       .addFrameIndex(Addr.getFI())
 875       .addImm(0)
 876       .addImm(0);
 877     Addr.setKind(Address::RegBase);
 878     Addr.setReg(ResultReg);
 879   }
 880
 881   if (RegisterOffsetNeedsLowering) {
 882     unsigned ResultReg = 0;
 883     if (Addr.getReg()) {
 884       if (Addr.getExtendType() == AArch64_AM::SXTW ||
 885           Addr.getExtendType() == AArch64_AM::UXTW   )
 886         ResultReg = emitAddSub_rx(/*UseAdd=*/true, MVT::i64, Addr.getReg(),
 887                                   /*TODO:IsKill=*/false, Addr.getOffsetReg(),
 888                                   /*TODO:IsKill=*/false, Addr.getExtendType(),
 889                                   Addr.getShift());
 890       else
 891         ResultReg = emitAddSub_rs(/*UseAdd=*/true, MVT::i64, Addr.getReg(),
 892                                   /*TODO:IsKill=*/false, Addr.getOffsetReg(),
 893                                   /*TODO:IsKill=*/false, AArch64_AM::LSL,
 894                                   Addr.getShift());
 895     } else {
 896       if (Addr.getExtendType() == AArch64_AM::UXTW)
 897         ResultReg = emitLSL_ri(MVT::i64, MVT::i32, Addr.getOffsetReg(),
 898                                /*Op0IsKill=*/false, Addr.getShift(),
 899                                /*IsZExt=*/true);
 900       else if (Addr.getExtendType() == AArch64_AM::SXTW)
 901         ResultReg = emitLSL_ri(MVT::i64, MVT::i32, Addr.getOffsetReg(),
 902                                /*Op0IsKill=*/false, Addr.getShift(),
 903                                /*IsZExt=*/false);
 904       else
 905         ResultReg = emitLSL_ri(MVT::i64, MVT::i64, Addr.getOffsetReg(),
 906                                /*Op0IsKill=*/false, Addr.getShift());
 907     }
 908     if (!ResultReg)
 909       return false;
 910
 911     Addr.setReg(ResultReg);
 912     Addr.setOffsetReg(0);
 913     Addr.setShift(0);
 914     Addr.setExtendType(AArch64_AM::InvalidShiftExtend);
 915   }
 916
 917   // Since the offset is too large for the load/store instruction get the
 918   // reg+offset into a register.
 919   if (ImmediateOffsetNeedsLowering) {
 920     unsigned ResultReg;
 921     if (Addr.getReg()) {
 922       // Try to fold the immediate into the add instruction.
 923       if (Offset < 0)
 924         ResultReg = emitAddSub_ri(/*UseAdd=*/false, MVT::i64, Addr.getReg(),
 925                                   /*IsKill=*/false, -Offset);
 926       else
 927         ResultReg = emitAddSub_ri(/*UseAdd=*/true, MVT::i64, Addr.getReg(),
 928                                   /*IsKill=*/false, Offset);
 929       if (!ResultReg) {
 930         unsigned ImmReg = fastEmit_i(MVT::i64, MVT::i64, ISD::Constant, Offset);
 931         ResultReg = emitAddSub_rr(/*UseAdd=*/true, MVT::i64, Addr.getReg(),
 932                                   /*IsKill=*/false, ImmReg, /*IsKill=*/true);
 933       }
 934     } else
 935       ResultReg = fastEmit_i(MVT::i64, MVT::i64, ISD::Constant, Offset);
 936
 937     if (!ResultReg)
 938       return false;
 939     Addr.setReg(ResultReg);
 940     Addr.setOffset(0);
 941   }
 942   return true;
 943 }
 944
 945 void AArch64FastISel::addLoadStoreOperands(Address &Addr,
 946                                            const MachineInstrBuilder &MIB,
 947                                            unsigned Flags,
 948                                            unsigned ScaleFactor,
 949                                            MachineMemOperand *MMO) {
 950   int64_t Offset = Addr.getOffset() / ScaleFactor;
 951   // Frame base works a bit differently. Handle it separately.
 952   if (Addr.isFIBase()) {
 953     int FI = Addr.getFI();
 954     // FIXME: We shouldn't be using getObjectSize/getObjectAlignment.  The size
 955     // and alignment should be based on the VT.
 956     MMO = FuncInfo.MF->getMachineMemOperand(
 957       MachinePointerInfo::getFixedStack(FI, Offset), Flags,
 958       MFI.getObjectSize(FI), MFI.getObjectAlignment(FI));
 959     // Now add the rest of the operands.
 960     MIB.addFrameIndex(FI).addImm(Offset);
 961   } else {
 962     assert(Addr.isRegBase() && "Unexpected address kind.");
 963     const MCInstrDesc &II = MIB->getDesc();
 964     unsigned Idx = (Flags & MachineMemOperand::MOStore) ? 1 : 0;
 965     Addr.setReg(
 966       constrainOperandRegClass(II, Addr.getReg(), II.getNumDefs()+Idx));
 967     Addr.setOffsetReg(
 968       constrainOperandRegClass(II, Addr.getOffsetReg(), II.getNumDefs()+Idx+1));
 969     if (Addr.getOffsetReg()) {
 970       assert(Addr.getOffset() == 0 && "Unexpected offset");
 971       bool IsSigned = Addr.getExtendType() == AArch64_AM::SXTW ||
 972                       Addr.getExtendType() == AArch64_AM::SXTX;
 973       MIB.addReg(Addr.getReg());
 974       MIB.addReg(Addr.getOffsetReg());
 975       MIB.addImm(IsSigned);
 976       MIB.addImm(Addr.getShift() != 0);
 977     } else {
 978       MIB.addReg(Addr.getReg());
 979       MIB.addImm(Offset);
 980     }
 981   }
 982
 983   if (MMO)
 984     MIB.addMemOperand(MMO);
 985 }
 986
 987 unsigned AArch64FastISel::emitAddSub(bool UseAdd, MVT RetVT, const Value *LHS,
 988                                      const Value *RHS, bool SetFlags,
 989                                      bool WantResult,  bool IsZExt) {
 990   AArch64_AM::ShiftExtendType ExtendType = AArch64_AM::InvalidShiftExtend;
 991   bool NeedExtend = false;
 992   switch (RetVT.SimpleTy) {
 993   default:
 994     return 0;
 995   case MVT::i1:
 996     NeedExtend = true;
 997     break;
 998   case MVT::i8:
 999     NeedExtend = true;
1000     ExtendType = IsZExt ? AArch64_AM::UXTB : AArch64_AM::SXTB;
1001     break;
1002   case MVT::i16:
1003     NeedExtend = true;
1004     ExtendType = IsZExt ? AArch64_AM::UXTH : AArch64_AM::SXTH;
1005     break;
1006   case MVT::i32:  // fall-through
1007   case MVT::i64:
1008     break;
1009   }
1010   MVT SrcVT = RetVT;
1011   RetVT.SimpleTy = std::max(RetVT.SimpleTy, MVT::i32);
1012
1013   // Canonicalize immediates to the RHS first.
1014   if (UseAdd && isa<ConstantInt>(LHS) && !isa<ConstantInt>(RHS))
1015     std::swap(LHS, RHS);
1016
1017   // Canonicalize mul by power of 2 to the RHS.
1018   if (UseAdd && LHS->hasOneUse() && isValueAvailable(LHS))
1019     if (isMulPowOf2(LHS))
1020       std::swap(LHS, RHS);
1021
1022   // Canonicalize shift immediate to the RHS.
1023   if (UseAdd && LHS->hasOneUse() && isValueAvailable(LHS))
1024     if (const auto *SI = dyn_cast<BinaryOperator>(LHS))
1025       if (isa<ConstantInt>(SI->getOperand(1)))
1026         if (SI->getOpcode() == Instruction::Shl  ||
1027             SI->getOpcode() == Instruction::LShr ||
1028             SI->getOpcode() == Instruction::AShr   )
1029           std::swap(LHS, RHS);
1030
1031   unsigned LHSReg = getRegForValue(LHS);
1032   if (!LHSReg)
1033     return 0;
1034   bool LHSIsKill = hasTrivialKill(LHS);
1035
1036   if (NeedExtend)
1037     LHSReg = emitIntExt(SrcVT, LHSReg, RetVT, IsZExt);
1038
1039   unsigned ResultReg = 0;
1040   if (const auto *C = dyn_cast<ConstantInt>(RHS)) {
1041     uint64_t Imm = IsZExt ? C->getZExtValue() : C->getSExtValue();
1042     if (C->isNegative())
1043       ResultReg = emitAddSub_ri(!UseAdd, RetVT, LHSReg, LHSIsKill, -Imm,
1044                                 SetFlags, WantResult);
1045     else
1046       ResultReg = emitAddSub_ri(UseAdd, RetVT, LHSReg, LHSIsKill, Imm, SetFlags,
1047                                 WantResult);
1048   }
1049   if (ResultReg)
1050     return ResultReg;
1051
1052   // Only extend the RHS within the instruction if there is a valid extend type.
1053   if (ExtendType != AArch64_AM::InvalidShiftExtend && RHS->hasOneUse() &&
1054       isValueAvailable(RHS)) {
1055     if (const auto *SI = dyn_cast<BinaryOperator>(RHS))
1056       if (const auto *C = dyn_cast<ConstantInt>(SI->getOperand(1)))
1057         if ((SI->getOpcode() == Instruction::Shl) && (C->getZExtValue() < 4)) {
1058           unsigned RHSReg = getRegForValue(SI->getOperand(0));
1059           if (!RHSReg)
1060             return 0;
1061           bool RHSIsKill = hasTrivialKill(SI->getOperand(0));
1062           return emitAddSub_rx(UseAdd, RetVT, LHSReg, LHSIsKill, RHSReg,
1063                                RHSIsKill, ExtendType, C->getZExtValue(),
1064                                SetFlags, WantResult);
1065         }
1066     unsigned RHSReg = getRegForValue(RHS);
1067     if (!RHSReg)
1068       return 0;
1069     bool RHSIsKill = hasTrivialKill(RHS);
1070     return emitAddSub_rx(UseAdd, RetVT, LHSReg, LHSIsKill, RHSReg, RHSIsKill,
1071                          ExtendType, 0, SetFlags, WantResult);
1072   }
1073
1074   // Check if the mul can be folded into the instruction.
1075   if (RHS->hasOneUse() && isValueAvailable(RHS))
1076     if (isMulPowOf2(RHS)) {
1077       const Value *MulLHS = cast<MulOperator>(RHS)->getOperand(0);
1078       const Value *MulRHS = cast<MulOperator>(RHS)->getOperand(1);
1079
1080       if (const auto *C = dyn_cast<ConstantInt>(MulLHS))
1081         if (C->getValue().isPowerOf2())
1082           std::swap(MulLHS, MulRHS);
1083
1084       assert(isa<ConstantInt>(MulRHS) && "Expected a ConstantInt.");
1085       uint64_t ShiftVal = cast<ConstantInt>(MulRHS)->getValue().logBase2();
1086       unsigned RHSReg = getRegForValue(MulLHS);
1087       if (!RHSReg)
1088         return 0;
1089       bool RHSIsKill = hasTrivialKill(MulLHS);
1090       return emitAddSub_rs(UseAdd, RetVT, LHSReg, LHSIsKill, RHSReg, RHSIsKill,
1091                            AArch64_AM::LSL, ShiftVal, SetFlags, WantResult);
1092     }
1093
1094   // Check if the shift can be folded into the instruction.
1095   if (RHS->hasOneUse() && isValueAvailable(RHS))
1096     if (const auto *SI = dyn_cast<BinaryOperator>(RHS)) {
1097       if (const auto *C = dyn_cast<ConstantInt>(SI->getOperand(1))) {
1098         AArch64_AM::ShiftExtendType ShiftType = AArch64_AM::InvalidShiftExtend;
1099         switch (SI->getOpcode()) {
1100         default: break;
1101         case Instruction::Shl:  ShiftType = AArch64_AM::LSL; break;
1102         case Instruction::LShr: ShiftType = AArch64_AM::LSR; break;
1103         case Instruction::AShr: ShiftType = AArch64_AM::ASR; break;
1104         }
1105         uint64_t ShiftVal = C->getZExtValue();
1106         if (ShiftType != AArch64_AM::InvalidShiftExtend) {
1107           unsigned RHSReg = getRegForValue(SI->getOperand(0));
1108           if (!RHSReg)
1109             return 0;
1110           bool RHSIsKill = hasTrivialKill(SI->getOperand(0));
1111           return emitAddSub_rs(UseAdd, RetVT, LHSReg, LHSIsKill, RHSReg,
1112                                RHSIsKill, ShiftType, ShiftVal, SetFlags,
1113                                WantResult);
1114         }
1115       }
1116     }
1117
1118   unsigned RHSReg = getRegForValue(RHS);
1119   if (!RHSReg)
1120     return 0;
1121   bool RHSIsKill = hasTrivialKill(RHS);
1122
1123   if (NeedExtend)
1124     RHSReg = emitIntExt(SrcVT, RHSReg, RetVT, IsZExt);
1125
1126   return emitAddSub_rr(UseAdd, RetVT, LHSReg, LHSIsKill, RHSReg, RHSIsKill,
1127                        SetFlags, WantResult);
1128 }
1129
1130 unsigned AArch64FastISel::emitAddSub_rr(bool UseAdd, MVT RetVT, unsigned LHSReg,
1131                                         bool LHSIsKill, unsigned RHSReg,
1132                                         bool RHSIsKill, bool SetFlags,
1133                                         bool WantResult) {
1134   assert(LHSReg && RHSReg && "Invalid register number.");
1135
1136   if (RetVT != MVT::i32 && RetVT != MVT::i64)
1137     return 0;
1138
1139   static const unsigned OpcTable[2][2][2] = {
1140     { { AArch64::SUBWrr,  AArch64::SUBXrr  },
1141       { AArch64::ADDWrr,  AArch64::ADDXrr  }  },
1142     { { AArch64::SUBSWrr, AArch64::SUBSXrr },
1143       { AArch64::ADDSWrr, AArch64::ADDSXrr }  }
1144   };
1145   bool Is64Bit = RetVT == MVT::i64;
1146   unsigned Opc = OpcTable[SetFlags][UseAdd][Is64Bit];
1147   const TargetRegisterClass *RC =
1148       Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
1149   unsigned ResultReg;
1150   if (WantResult)
1151     ResultReg = createResultReg(RC);
1152   else
1153     ResultReg = Is64Bit ? AArch64::XZR : AArch64::WZR;
1154
1155   const MCInstrDesc &II = TII.get(Opc);
1156   LHSReg = constrainOperandRegClass(II, LHSReg, II.getNumDefs());
1157   RHSReg = constrainOperandRegClass(II, RHSReg, II.getNumDefs() + 1);
1158   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg)
1159       .addReg(LHSReg, getKillRegState(LHSIsKill))
1160       .addReg(RHSReg, getKillRegState(RHSIsKill));
1161   return ResultReg;
1162 }
1163
1164 unsigned AArch64FastISel::emitAddSub_ri(bool UseAdd, MVT RetVT, unsigned LHSReg,
1165                                         bool LHSIsKill, uint64_t Imm,
1166                                         bool SetFlags, bool WantResult) {
1167   assert(LHSReg && "Invalid register number.");
1168
1169   if (RetVT != MVT::i32 && RetVT != MVT::i64)
1170     return 0;
1171
1172   unsigned ShiftImm;
1173   if (isUInt<12>(Imm))
1174     ShiftImm = 0;
1175   else if ((Imm & 0xfff000) == Imm) {
1176     ShiftImm = 12;
1177     Imm >>= 12;
1178   } else
1179     return 0;
1180
1181   static const unsigned OpcTable[2][2][2] = {
1182     { { AArch64::SUBWri,  AArch64::SUBXri  },
1183       { AArch64::ADDWri,  AArch64::ADDXri  }  },
1184     { { AArch64::SUBSWri, AArch64::SUBSXri },
1185       { AArch64::ADDSWri, AArch64::ADDSXri }  }
1186   };
1187   bool Is64Bit = RetVT == MVT::i64;
1188   unsigned Opc = OpcTable[SetFlags][UseAdd][Is64Bit];
1189   const TargetRegisterClass *RC;
1190   if (SetFlags)
1191     RC = Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
1192   else
1193     RC = Is64Bit ? &AArch64::GPR64spRegClass : &AArch64::GPR32spRegClass;
1194   unsigned ResultReg;
1195   if (WantResult)
1196     ResultReg = createResultReg(RC);
1197   else
1198     ResultReg = Is64Bit ? AArch64::XZR : AArch64::WZR;
1199
1200   const MCInstrDesc &II = TII.get(Opc);
1201   LHSReg = constrainOperandRegClass(II, LHSReg, II.getNumDefs());
1202   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg)
1203       .addReg(LHSReg, getKillRegState(LHSIsKill))
1204       .addImm(Imm)
1205       .addImm(getShifterImm(AArch64_AM::LSL, ShiftImm));
1206   return ResultReg;
1207 }
1208
1209 unsigned AArch64FastISel::emitAddSub_rs(bool UseAdd, MVT RetVT, unsigned LHSReg,
1210                                         bool LHSIsKill, unsigned RHSReg,
1211                                         bool RHSIsKill,
1212                                         AArch64_AM::ShiftExtendType ShiftType,
1213                                         uint64_t ShiftImm, bool SetFlags,
1214                                         bool WantResult) {
1215   assert(LHSReg && RHSReg && "Invalid register number.");
1216
1217   if (RetVT != MVT::i32 && RetVT != MVT::i64)
1218     return 0;
1219
1220   static const unsigned OpcTable[2][2][2] = {
1221     { { AArch64::SUBWrs,  AArch64::SUBXrs  },
1222       { AArch64::ADDWrs,  AArch64::ADDXrs  }  },
1223     { { AArch64::SUBSWrs, AArch64::SUBSXrs },
1224       { AArch64::ADDSWrs, AArch64::ADDSXrs }  }
1225   };
1226   bool Is64Bit = RetVT == MVT::i64;
1227   unsigned Opc = OpcTable[SetFlags][UseAdd][Is64Bit];
1228   const TargetRegisterClass *RC =
1229       Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
1230   unsigned ResultReg;
1231   if (WantResult)
1232     ResultReg = createResultReg(RC);
1233   else
1234     ResultReg = Is64Bit ? AArch64::XZR : AArch64::WZR;
1235
1236   const MCInstrDesc &II = TII.get(Opc);
1237   LHSReg = constrainOperandRegClass(II, LHSReg, II.getNumDefs());
1238   RHSReg = constrainOperandRegClass(II, RHSReg, II.getNumDefs() + 1);
1239   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg)
1240       .addReg(LHSReg, getKillRegState(LHSIsKill))
1241       .addReg(RHSReg, getKillRegState(RHSIsKill))
1242       .addImm(getShifterImm(ShiftType, ShiftImm));
1243   return ResultReg;
1244 }
1245
1246 unsigned AArch64FastISel::emitAddSub_rx(bool UseAdd, MVT RetVT, unsigned LHSReg,
1247                                         bool LHSIsKill, unsigned RHSReg,
1248                                         bool RHSIsKill,
1249                                         AArch64_AM::ShiftExtendType ExtType,
1250                                         uint64_t ShiftImm, bool SetFlags,
1251                                         bool WantResult) {
1252   assert(LHSReg && RHSReg && "Invalid register number.");
1253
1254   if (RetVT != MVT::i32 && RetVT != MVT::i64)
1255     return 0;
1256
1257   static const unsigned OpcTable[2][2][2] = {
1258     { { AArch64::SUBWrx,  AArch64::SUBXrx  },
1259       { AArch64::ADDWrx,  AArch64::ADDXrx  }  },
1260     { { AArch64::SUBSWrx, AArch64::SUBSXrx },
1261       { AArch64::ADDSWrx, AArch64::ADDSXrx }  }
1262   };
1263   bool Is64Bit = RetVT == MVT::i64;
1264   unsigned Opc = OpcTable[SetFlags][UseAdd][Is64Bit];
1265   const TargetRegisterClass *RC = nullptr;
1266   if (SetFlags)
1267     RC = Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
1268   else
1269     RC = Is64Bit ? &AArch64::GPR64spRegClass : &AArch64::GPR32spRegClass;
1270   unsigned ResultReg;
1271   if (WantResult)
1272     ResultReg = createResultReg(RC);
1273   else
1274     ResultReg = Is64Bit ? AArch64::XZR : AArch64::WZR;
1275
1276   const MCInstrDesc &II = TII.get(Opc);
1277   LHSReg = constrainOperandRegClass(II, LHSReg, II.getNumDefs());
1278   RHSReg = constrainOperandRegClass(II, RHSReg, II.getNumDefs() + 1);
1279   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg)
1280       .addReg(LHSReg, getKillRegState(LHSIsKill))
1281       .addReg(RHSReg, getKillRegState(RHSIsKill))
1282       .addImm(getArithExtendImm(ExtType, ShiftImm));
1283   return ResultReg;
1284 }
1285
1286 bool AArch64FastISel::emitCmp(const Value *LHS, const Value *RHS, bool IsZExt) {
1287   Type *Ty = LHS->getType();
1288   EVT EVT = TLI.getValueType(Ty, true);
1289   if (!EVT.isSimple())
1290     return false;
1291   MVT VT = EVT.getSimpleVT();
1292
1293   switch (VT.SimpleTy) {
1294   default:
1295     return false;
1296   case MVT::i1:
1297   case MVT::i8:
1298   case MVT::i16:
1299   case MVT::i32:
1300   case MVT::i64:
1301     return emitICmp(VT, LHS, RHS, IsZExt);
1302   case MVT::f32:
1303   case MVT::f64:
1304     return emitFCmp(VT, LHS, RHS);
1305   }
1306 }
1307
1308 bool AArch64FastISel::emitICmp(MVT RetVT, const Value *LHS, const Value *RHS,
1309                                bool IsZExt) {
1310   return emitSub(RetVT, LHS, RHS, /*SetFlags=*/true, /*WantResult=*/false,
1311                  IsZExt) != 0;
1312 }
1313
1314 bool AArch64FastISel::emitICmp_ri(MVT RetVT, unsigned LHSReg, bool LHSIsKill,
1315                                   uint64_t Imm) {
1316   return emitAddSub_ri(/*UseAdd=*/false, RetVT, LHSReg, LHSIsKill, Imm,
1317                        /*SetFlags=*/true, /*WantResult=*/false) != 0;
1318 }
1319
1320 bool AArch64FastISel::emitFCmp(MVT RetVT, const Value *LHS, const Value *RHS) {
1321   if (RetVT != MVT::f32 && RetVT != MVT::f64)
1322     return false;
1323
1324   // Check to see if the 2nd operand is a constant that we can encode directly
1325   // in the compare.
1326   bool UseImm = false;
1327   if (const auto *CFP = dyn_cast<ConstantFP>(RHS))
1328     if (CFP->isZero() && !CFP->isNegative())
1329       UseImm = true;
1330
1331   unsigned LHSReg = getRegForValue(LHS);
1332   if (!LHSReg)
1333     return false;
1334   bool LHSIsKill = hasTrivialKill(LHS);
1335
1336   if (UseImm) {
1337     unsigned Opc = (RetVT == MVT::f64) ? AArch64::FCMPDri : AArch64::FCMPSri;
1338     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc))
1339         .addReg(LHSReg, getKillRegState(LHSIsKill));
1340     return true;
1341   }
1342
1343   unsigned RHSReg = getRegForValue(RHS);
1344   if (!RHSReg)
1345     return false;
1346   bool RHSIsKill = hasTrivialKill(RHS);
1347
1348   unsigned Opc = (RetVT == MVT::f64) ? AArch64::FCMPDrr : AArch64::FCMPSrr;
1349   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc))
1350       .addReg(LHSReg, getKillRegState(LHSIsKill))
1351       .addReg(RHSReg, getKillRegState(RHSIsKill));
1352   return true;
1353 }
1354
1355 unsigned AArch64FastISel::emitAdd(MVT RetVT, const Value *LHS, const Value *RHS,
1356                                   bool SetFlags, bool WantResult, bool IsZExt) {
1357   return emitAddSub(/*UseAdd=*/true, RetVT, LHS, RHS, SetFlags, WantResult,
1358                     IsZExt);
1359 }
1360
1361 unsigned AArch64FastISel::emitSub(MVT RetVT, const Value *LHS, const Value *RHS,
1362                                   bool SetFlags, bool WantResult, bool IsZExt) {
1363   return emitAddSub(/*UseAdd=*/false, RetVT, LHS, RHS, SetFlags, WantResult,
1364                     IsZExt);
1365 }
1366
1367 unsigned AArch64FastISel::emitSubs_rr(MVT RetVT, unsigned LHSReg,
1368                                       bool LHSIsKill, unsigned RHSReg,
1369                                       bool RHSIsKill, bool WantResult) {
1370   return emitAddSub_rr(/*UseAdd=*/false, RetVT, LHSReg, LHSIsKill, RHSReg,
1371                        RHSIsKill, /*SetFlags=*/true, WantResult);
1372 }
1373
1374 unsigned AArch64FastISel::emitSubs_rs(MVT RetVT, unsigned LHSReg,
1375                                       bool LHSIsKill, unsigned RHSReg,
1376                                       bool RHSIsKill,
1377                                       AArch64_AM::ShiftExtendType ShiftType,
1378                                       uint64_t ShiftImm, bool WantResult) {
1379   return emitAddSub_rs(/*UseAdd=*/false, RetVT, LHSReg, LHSIsKill, RHSReg,
1380                        RHSIsKill, ShiftType, ShiftImm, /*SetFlags=*/true,
1381                        WantResult);
1382 }
1383
1384 unsigned AArch64FastISel::emitLogicalOp(unsigned ISDOpc, MVT RetVT,
1385                                         const Value *LHS, const Value *RHS) {
1386   // Canonicalize immediates to the RHS first.
1387   if (isa<ConstantInt>(LHS) && !isa<ConstantInt>(RHS))
1388     std::swap(LHS, RHS);
1389
1390   // Canonicalize mul by power-of-2 to the RHS.
1391   if (LHS->hasOneUse() && isValueAvailable(LHS))
1392     if (isMulPowOf2(LHS))
1393       std::swap(LHS, RHS);
1394
1395   // Canonicalize shift immediate to the RHS.
1396   if (LHS->hasOneUse() && isValueAvailable(LHS))
1397     if (const auto *SI = dyn_cast<ShlOperator>(LHS))
1398       if (isa<ConstantInt>(SI->getOperand(1)))
1399         std::swap(LHS, RHS);
1400
1401   unsigned LHSReg = getRegForValue(LHS);
1402   if (!LHSReg)
1403     return 0;
1404   bool LHSIsKill = hasTrivialKill(LHS);
1405
1406   unsigned ResultReg = 0;
1407   if (const auto *C = dyn_cast<ConstantInt>(RHS)) {
1408     uint64_t Imm = C->getZExtValue();
1409     ResultReg = emitLogicalOp_ri(ISDOpc, RetVT, LHSReg, LHSIsKill, Imm);
1410   }
1411   if (ResultReg)
1412     return ResultReg;
1413
1414   // Check if the mul can be folded into the instruction.
1415   if (RHS->hasOneUse() && isValueAvailable(RHS))
1416     if (isMulPowOf2(RHS)) {
1417       const Value *MulLHS = cast<MulOperator>(RHS)->getOperand(0);
1418       const Value *MulRHS = cast<MulOperator>(RHS)->getOperand(1);
1419
1420       if (const auto *C = dyn_cast<ConstantInt>(MulLHS))
1421         if (C->getValue().isPowerOf2())
1422           std::swap(MulLHS, MulRHS);
1423
1424       assert(isa<ConstantInt>(MulRHS) && "Expected a ConstantInt.");
1425       uint64_t ShiftVal = cast<ConstantInt>(MulRHS)->getValue().logBase2();
1426
1427       unsigned RHSReg = getRegForValue(MulLHS);
1428       if (!RHSReg)
1429         return 0;
1430       bool RHSIsKill = hasTrivialKill(MulLHS);
1431       return emitLogicalOp_rs(ISDOpc, RetVT, LHSReg, LHSIsKill, RHSReg,
1432                               RHSIsKill, ShiftVal);
1433     }
1434
1435   // Check if the shift can be folded into the instruction.
1436   if (RHS->hasOneUse() && isValueAvailable(RHS))
1437     if (const auto *SI = dyn_cast<ShlOperator>(RHS))
1438       if (const auto *C = dyn_cast<ConstantInt>(SI->getOperand(1))) {
1439         uint64_t ShiftVal = C->getZExtValue();
1440         unsigned RHSReg = getRegForValue(SI->getOperand(0));
1441         if (!RHSReg)
1442           return 0;
1443         bool RHSIsKill = hasTrivialKill(SI->getOperand(0));
1444         return emitLogicalOp_rs(ISDOpc, RetVT, LHSReg, LHSIsKill, RHSReg,
1445                                 RHSIsKill, ShiftVal);
1446       }
1447
1448   unsigned RHSReg = getRegForValue(RHS);
1449   if (!RHSReg)
1450     return 0;
1451   bool RHSIsKill = hasTrivialKill(RHS);
1452
1453   MVT VT = std::max(MVT::i32, RetVT.SimpleTy);
1454   ResultReg = fastEmit_rr(VT, VT, ISDOpc, LHSReg, LHSIsKill, RHSReg, RHSIsKill);
1455   if (RetVT >= MVT::i8 && RetVT <= MVT::i16) {
1456     uint64_t Mask = (RetVT == MVT::i8) ? 0xff : 0xffff;
1457     ResultReg = emitAnd_ri(MVT::i32, ResultReg, /*IsKill=*/true, Mask);
1458   }
1459   return ResultReg;
1460 }
1461
1462 unsigned AArch64FastISel::emitLogicalOp_ri(unsigned ISDOpc, MVT RetVT,
1463                                            unsigned LHSReg, bool LHSIsKill,
1464                                            uint64_t Imm) {
1465   assert((ISD::AND + 1 == ISD::OR) && (ISD::AND + 2 == ISD::XOR) &&
1466          "ISD nodes are not consecutive!");
1467   static const unsigned OpcTable[3][2] = {
1468     { AArch64::ANDWri, AArch64::ANDXri },
1469     { AArch64::ORRWri, AArch64::ORRXri },
1470     { AArch64::EORWri, AArch64::EORXri }
1471   };
1472   const TargetRegisterClass *RC;
1473   unsigned Opc;
1474   unsigned RegSize;
1475   switch (RetVT.SimpleTy) {
1476   default:
1477     return 0;
1478   case MVT::i1:
1479   case MVT::i8:
1480   case MVT::i16:
1481   case MVT::i32: {
1482     unsigned Idx = ISDOpc - ISD::AND;
1483     Opc = OpcTable[Idx][0];
1484     RC = &AArch64::GPR32spRegClass;
1485     RegSize = 32;
1486     break;
1487   }
1488   case MVT::i64:
1489     Opc = OpcTable[ISDOpc - ISD::AND][1];
1490     RC = &AArch64::GPR64spRegClass;
1491     RegSize = 64;
1492     break;
1493   }
1494
1495   if (!AArch64_AM::isLogicalImmediate(Imm, RegSize))
1496     return 0;
1497
1498   unsigned ResultReg =
1499       fastEmitInst_ri(Opc, RC, LHSReg, LHSIsKill,
1500                       AArch64_AM::encodeLogicalImmediate(Imm, RegSize));
1501   if (RetVT >= MVT::i8 && RetVT <= MVT::i16 && ISDOpc != ISD::AND) {
1502     uint64_t Mask = (RetVT == MVT::i8) ? 0xff : 0xffff;
1503     ResultReg = emitAnd_ri(MVT::i32, ResultReg, /*IsKill=*/true, Mask);
1504   }
1505   return ResultReg;
1506 }
1507
1508 unsigned AArch64FastISel::emitLogicalOp_rs(unsigned ISDOpc, MVT RetVT,
1509                                            unsigned LHSReg, bool LHSIsKill,
1510                                            unsigned RHSReg, bool RHSIsKill,
1511                                            uint64_t ShiftImm) {
1512   assert((ISD::AND + 1 == ISD::OR) && (ISD::AND + 2 == ISD::XOR) &&
1513          "ISD nodes are not consecutive!");
1514   static const unsigned OpcTable[3][2] = {
1515     { AArch64::ANDWrs, AArch64::ANDXrs },
1516     { AArch64::ORRWrs, AArch64::ORRXrs },
1517     { AArch64::EORWrs, AArch64::EORXrs }
1518   };
1519   const TargetRegisterClass *RC;
1520   unsigned Opc;
1521   switch (RetVT.SimpleTy) {
1522   default:
1523     return 0;
1524   case MVT::i1:
1525   case MVT::i8:
1526   case MVT::i16:
1527   case MVT::i32:
1528     Opc = OpcTable[ISDOpc - ISD::AND][0];
1529     RC = &AArch64::GPR32RegClass;
1530     break;
1531   case MVT::i64:
1532     Opc = OpcTable[ISDOpc - ISD::AND][1];
1533     RC = &AArch64::GPR64RegClass;
1534     break;
1535   }
1536   unsigned ResultReg =
1537       fastEmitInst_rri(Opc, RC, LHSReg, LHSIsKill, RHSReg, RHSIsKill,
1538                        AArch64_AM::getShifterImm(AArch64_AM::LSL, ShiftImm));
1539   if (RetVT >= MVT::i8 && RetVT <= MVT::i16) {
1540     uint64_t Mask = (RetVT == MVT::i8) ? 0xff : 0xffff;
1541     ResultReg = emitAnd_ri(MVT::i32, ResultReg, /*IsKill=*/true, Mask);
1542   }
1543   return ResultReg;
1544 }
1545
1546 unsigned AArch64FastISel::emitAnd_ri(MVT RetVT, unsigned LHSReg, bool LHSIsKill,
1547                                      uint64_t Imm) {
1548   return emitLogicalOp_ri(ISD::AND, RetVT, LHSReg, LHSIsKill, Imm);
1549 }
1550
1551 bool AArch64FastISel::emitLoad(MVT VT, unsigned &ResultReg, Address Addr,
1552                                MachineMemOperand *MMO) {
1553   // Simplify this down to something we can handle.
1554   if (!simplifyAddress(Addr, VT))
1555     return false;
1556
1557   unsigned ScaleFactor;
1558   switch (VT.SimpleTy) {
1559   default: llvm_unreachable("Unexpected value type.");
1560   case MVT::i1:  // fall-through
1561   case MVT::i8:  ScaleFactor = 1; break;
1562   case MVT::i16: ScaleFactor = 2; break;
1563   case MVT::i32: // fall-through
1564   case MVT::f32: ScaleFactor = 4; break;
1565   case MVT::i64: // fall-through
1566   case MVT::f64: ScaleFactor = 8; break;
1567   }
1568
1569   // Negative offsets require unscaled, 9-bit, signed immediate offsets.
1570   // Otherwise, we try using scaled, 12-bit, unsigned immediate offsets.
1571   bool UseScaled = true;
1572   if ((Addr.getOffset() < 0) || (Addr.getOffset() & (ScaleFactor - 1))) {
1573     UseScaled = false;
1574     ScaleFactor = 1;
1575   }
1576
1577   static const unsigned OpcTable[4][6] = {
1578     { AArch64::LDURBBi,  AArch64::LDURHHi,  AArch64::LDURWi,  AArch64::LDURXi,
1579       AArch64::LDURSi,   AArch64::LDURDi },
1580     { AArch64::LDRBBui,  AArch64::LDRHHui,  AArch64::LDRWui,  AArch64::LDRXui,
1581       AArch64::LDRSui,   AArch64::LDRDui },
1582     { AArch64::LDRBBroX, AArch64::LDRHHroX, AArch64::LDRWroX, AArch64::LDRXroX,
1583       AArch64::LDRSroX,  AArch64::LDRDroX },
1584     { AArch64::LDRBBroW, AArch64::LDRHHroW, AArch64::LDRWroW, AArch64::LDRXroW,
1585       AArch64::LDRSroW,  AArch64::LDRDroW }
1586   };
1587
1588   unsigned Opc;
1589   const TargetRegisterClass *RC;
1590   bool VTIsi1 = false;
1591   bool UseRegOffset = Addr.isRegBase() && !Addr.getOffset() && Addr.getReg() &&
1592                       Addr.getOffsetReg();
1593   unsigned Idx = UseRegOffset ? 2 : UseScaled ? 1 : 0;
1594   if (Addr.getExtendType() == AArch64_AM::UXTW ||
1595       Addr.getExtendType() == AArch64_AM::SXTW)
1596     Idx++;
1597
1598   switch (VT.SimpleTy) {
1599   default: llvm_unreachable("Unexpected value type.");
1600   case MVT::i1:  VTIsi1 = true; // Intentional fall-through.
1601   case MVT::i8:  Opc = OpcTable[Idx][0]; RC = &AArch64::GPR32RegClass; break;
1602   case MVT::i16: Opc = OpcTable[Idx][1]; RC = &AArch64::GPR32RegClass; break;
1603   case MVT::i32: Opc = OpcTable[Idx][2]; RC = &AArch64::GPR32RegClass; break;
1604   case MVT::i64: Opc = OpcTable[Idx][3]; RC = &AArch64::GPR64RegClass; break;
1605   case MVT::f32: Opc = OpcTable[Idx][4]; RC = &AArch64::FPR32RegClass; break;
1606   case MVT::f64: Opc = OpcTable[Idx][5]; RC = &AArch64::FPR64RegClass; break;
1607   }
1608
1609   // Create the base instruction, then add the operands.
1610   ResultReg = createResultReg(RC);
1611   MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
1612                                     TII.get(Opc), ResultReg);
1613   addLoadStoreOperands(Addr, MIB, MachineMemOperand::MOLoad, ScaleFactor, MMO);
1614
1615   // Loading an i1 requires special handling.
1616   if (VTIsi1) {
1617     unsigned ANDReg = emitAnd_ri(MVT::i32, ResultReg, /*IsKill=*/true, 1);
1618     assert(ANDReg && "Unexpected AND instruction emission failure.");
1619     ResultReg = ANDReg;
1620   }
1621   return true;
1622 }
1623
1624 bool AArch64FastISel::selectAddSub(const Instruction *I) {
1625   MVT VT;
1626   if (!isTypeSupported(I->getType(), VT, /*IsVectorAllowed=*/true))
1627     return false;
1628
1629   if (VT.isVector())
1630     return selectOperator(I, I->getOpcode());
1631
1632   unsigned ResultReg;
1633   switch (I->getOpcode()) {
1634   default:
1635     llvm_unreachable("Unexpected instruction.");
1636   case Instruction::Add:
1637     ResultReg = emitAdd(VT, I->getOperand(0), I->getOperand(1));
1638     break;
1639   case Instruction::Sub:
1640     ResultReg = emitSub(VT, I->getOperand(0), I->getOperand(1));
1641     break;
1642   }
1643   if (!ResultReg)
1644     return false;
1645
1646   updateValueMap(I, ResultReg);
1647   return true;
1648 }
1649
1650 bool AArch64FastISel::selectLogicalOp(const Instruction *I) {
1651   MVT VT;
1652   if (!isTypeSupported(I->getType(), VT, /*IsVectorAllowed=*/true))
1653     return false;
1654
1655   if (VT.isVector())
1656     return selectOperator(I, I->getOpcode());
1657
1658   unsigned ResultReg;
1659   switch (I->getOpcode()) {
1660   default:
1661     llvm_unreachable("Unexpected instruction.");
1662   case Instruction::And:
1663     ResultReg = emitLogicalOp(ISD::AND, VT, I->getOperand(0), I->getOperand(1));
1664     break;
1665   case Instruction::Or:
1666     ResultReg = emitLogicalOp(ISD::OR, VT, I->getOperand(0), I->getOperand(1));
1667     break;
1668   case Instruction::Xor:
1669     ResultReg = emitLogicalOp(ISD::XOR, VT, I->getOperand(0), I->getOperand(1));
1670     break;
1671   }
1672   if (!ResultReg)
1673     return false;
1674
1675   updateValueMap(I, ResultReg);
1676   return true;
1677 }
1678
1679 bool AArch64FastISel::selectLoad(const Instruction *I) {
1680   MVT VT;
1681   // Verify we have a legal type before going any further.  Currently, we handle
1682   // simple types that will directly fit in a register (i32/f32/i64/f64) or
1683   // those that can be sign or zero-extended to a basic operation (i1/i8/i16).
1684   if (!isTypeSupported(I->getType(), VT, /*IsVectorAllowed=*/true) ||
1685       cast<LoadInst>(I)->isAtomic())
1686     return false;
1687
1688   // See if we can handle this address.
1689   Address Addr;
1690   if (!computeAddress(I->getOperand(0), Addr, I->getType()))
1691     return false;
1692
1693   unsigned ResultReg;
1694   if (!emitLoad(VT, ResultReg, Addr, createMachineMemOperandFor(I)))
1695     return false;
1696
1697   updateValueMap(I, ResultReg);
1698   return true;
1699 }
1700
1701 bool AArch64FastISel::emitStore(MVT VT, unsigned SrcReg, Address Addr,
1702                                 MachineMemOperand *MMO) {
1703   // Simplify this down to something we can handle.
1704   if (!simplifyAddress(Addr, VT))
1705     return false;
1706
1707   unsigned ScaleFactor;
1708   switch (VT.SimpleTy) {
1709   default: llvm_unreachable("Unexpected value type.");
1710   case MVT::i1:  // fall-through
1711   case MVT::i8:  ScaleFactor = 1; break;
1712   case MVT::i16: ScaleFactor = 2; break;
1713   case MVT::i32: // fall-through
1714   case MVT::f32: ScaleFactor = 4; break;
1715   case MVT::i64: // fall-through
1716   case MVT::f64: ScaleFactor = 8; break;
1717   }
1718
1719   // Negative offsets require unscaled, 9-bit, signed immediate offsets.
1720   // Otherwise, we try using scaled, 12-bit, unsigned immediate offsets.
1721   bool UseScaled = true;
1722   if ((Addr.getOffset() < 0) || (Addr.getOffset() & (ScaleFactor - 1))) {
1723     UseScaled = false;
1724     ScaleFactor = 1;
1725   }
1726
1727
1728   static const unsigned OpcTable[4][6] = {
1729     { AArch64::STURBBi,  AArch64::STURHHi,  AArch64::STURWi,  AArch64::STURXi,
1730       AArch64::STURSi,   AArch64::STURDi },
1731     { AArch64::STRBBui,  AArch64::STRHHui,  AArch64::STRWui,  AArch64::STRXui,
1732       AArch64::STRSui,   AArch64::STRDui },
1733     { AArch64::STRBBroX, AArch64::STRHHroX, AArch64::STRWroX, AArch64::STRXroX,
1734       AArch64::STRSroX,  AArch64::STRDroX },
1735     { AArch64::STRBBroW, AArch64::STRHHroW, AArch64::STRWroW, AArch64::STRXroW,
1736       AArch64::STRSroW,  AArch64::STRDroW }
1737
1738   };
1739
1740   unsigned Opc;
1741   bool VTIsi1 = false;
1742   bool UseRegOffset = Addr.isRegBase() && !Addr.getOffset() && Addr.getReg() &&
1743                       Addr.getOffsetReg();
1744   unsigned Idx = UseRegOffset ? 2 : UseScaled ? 1 : 0;
1745   if (Addr.getExtendType() == AArch64_AM::UXTW ||
1746       Addr.getExtendType() == AArch64_AM::SXTW)
1747     Idx++;
1748
1749   switch (VT.SimpleTy) {
1750   default: llvm_unreachable("Unexpected value type.");
1751   case MVT::i1:  VTIsi1 = true;
1752   case MVT::i8:  Opc = OpcTable[Idx][0]; break;
1753   case MVT::i16: Opc = OpcTable[Idx][1]; break;
1754   case MVT::i32: Opc = OpcTable[Idx][2]; break;
1755   case MVT::i64: Opc = OpcTable[Idx][3]; break;
1756   case MVT::f32: Opc = OpcTable[Idx][4]; break;
1757   case MVT::f64: Opc = OpcTable[Idx][5]; break;
1758   }
1759
1760   // Storing an i1 requires special handling.
1761   if (VTIsi1 && SrcReg != AArch64::WZR) {
1762     unsigned ANDReg = emitAnd_ri(MVT::i32, SrcReg, /*TODO:IsKill=*/false, 1);
1763     assert(ANDReg && "Unexpected AND instruction emission failure.");
1764     SrcReg = ANDReg;
1765   }
1766   // Create the base instruction, then add the operands.
1767   const MCInstrDesc &II = TII.get(Opc);
1768   SrcReg = constrainOperandRegClass(II, SrcReg, II.getNumDefs());
1769   MachineInstrBuilder MIB =
1770       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II).addReg(SrcReg);
1771   addLoadStoreOperands(Addr, MIB, MachineMemOperand::MOStore, ScaleFactor, MMO);
1772
1773   return true;
1774 }
1775
1776 bool AArch64FastISel::selectStore(const Instruction *I) {
1777   MVT VT;
1778   const Value *Op0 = I->getOperand(0);
1779   // Verify we have a legal type before going any further.  Currently, we handle
1780   // simple types that will directly fit in a register (i32/f32/i64/f64) or
1781   // those that can be sign or zero-extended to a basic operation (i1/i8/i16).
1782   if (!isTypeSupported(Op0->getType(), VT, /*IsVectorAllowed=*/true) ||
1783       cast<StoreInst>(I)->isAtomic())
1784     return false;
1785
1786   // Get the value to be stored into a register. Use the zero register directly
1787   // when possible to avoid an unnecessary copy and a wasted register.
1788   unsigned SrcReg = 0;
1789   if (const auto *CI = dyn_cast<ConstantInt>(Op0)) {
1790     if (CI->isZero())
1791       SrcReg = (VT == MVT::i64) ? AArch64::XZR : AArch64::WZR;
1792   } else if (const auto *CF = dyn_cast<ConstantFP>(Op0)) {
1793     if (CF->isZero() && !CF->isNegative()) {
1794       VT = MVT::getIntegerVT(VT.getSizeInBits());
1795       SrcReg = (VT == MVT::i64) ? AArch64::XZR : AArch64::WZR;
1796     }
1797   }
1798
1799   if (!SrcReg)
1800     SrcReg = getRegForValue(Op0);
1801
1802   if (!SrcReg)
1803     return false;
1804
1805   // See if we can handle this address.
1806   Address Addr;
1807   if (!computeAddress(I->getOperand(1), Addr, I->getOperand(0)->getType()))
1808     return false;
1809
1810   if (!emitStore(VT, SrcReg, Addr, createMachineMemOperandFor(I)))
1811     return false;
1812   return true;
1813 }
1814
1815 static AArch64CC::CondCode getCompareCC(CmpInst::Predicate Pred) {
1816   switch (Pred) {
1817   case CmpInst::FCMP_ONE:
1818   case CmpInst::FCMP_UEQ:
1819   default:
1820     // AL is our "false" for now. The other two need more compares.
1821     return AArch64CC::AL;
1822   case CmpInst::ICMP_EQ:
1823   case CmpInst::FCMP_OEQ:
1824     return AArch64CC::EQ;
1825   case CmpInst::ICMP_SGT:
1826   case CmpInst::FCMP_OGT:
1827     return AArch64CC::GT;
1828   case CmpInst::ICMP_SGE:
1829   case CmpInst::FCMP_OGE:
1830     return AArch64CC::GE;
1831   case CmpInst::ICMP_UGT:
1832   case CmpInst::FCMP_UGT:
1833     return AArch64CC::HI;
1834   case CmpInst::FCMP_OLT:
1835     return AArch64CC::MI;
1836   case CmpInst::ICMP_ULE:
1837   case CmpInst::FCMP_OLE:
1838     return AArch64CC::LS;
1839   case CmpInst::FCMP_ORD:
1840     return AArch64CC::VC;
1841   case CmpInst::FCMP_UNO:
1842     return AArch64CC::VS;
1843   case CmpInst::FCMP_UGE:
1844     return AArch64CC::PL;
1845   case CmpInst::ICMP_SLT:
1846   case CmpInst::FCMP_ULT:
1847     return AArch64CC::LT;
1848   case CmpInst::ICMP_SLE:
1849   case CmpInst::FCMP_ULE:
1850     return AArch64CC::LE;
1851   case CmpInst::FCMP_UNE:
1852   case CmpInst::ICMP_NE:
1853     return AArch64CC::NE;
1854   case CmpInst::ICMP_UGE:
1855     return AArch64CC::HS;
1856   case CmpInst::ICMP_ULT:
1857     return AArch64CC::LO;
1858   }
1859 }
1860
1861 /// \brief Check if the comparison against zero and the following branch can be
1862 /// folded into a single instruction (CBZ or CBNZ).
1863 static bool canFoldZeroCheckIntoBranch(const CmpInst *CI) {
1864   CmpInst::Predicate Predicate = CI->getPredicate();
1865   if ((Predicate != CmpInst::ICMP_EQ) && (Predicate != CmpInst::ICMP_NE))
1866     return false;
1867
1868   Type *Ty = CI->getOperand(0)->getType();
1869   if (!Ty->isIntegerTy())
1870     return false;
1871
1872   unsigned BW = cast<IntegerType>(Ty)->getBitWidth();
1873   if (BW != 1 && BW != 8 && BW != 16 && BW != 32 && BW != 64)
1874     return false;
1875
1876   if (const auto *C = dyn_cast<ConstantInt>(CI->getOperand(0)))
1877     if (C->isNullValue())
1878       return true;
1879
1880   if (const auto *C = dyn_cast<ConstantInt>(CI->getOperand(1)))
1881     if (C->isNullValue())
1882       return true;
1883
1884   return false;
1885 }
1886
1887 bool AArch64FastISel::selectBranch(const Instruction *I) {
1888   const BranchInst *BI = cast<BranchInst>(I);
1889   if (BI->isUnconditional()) {
1890     MachineBasicBlock *MSucc = FuncInfo.MBBMap[BI->getSuccessor(0)];
1891     fastEmitBranch(MSucc, BI->getDebugLoc());
1892     return true;
1893   }
1894
1895   MachineBasicBlock *TBB = FuncInfo.MBBMap[BI->getSuccessor(0)];
1896   MachineBasicBlock *FBB = FuncInfo.MBBMap[BI->getSuccessor(1)];
1897
1898   AArch64CC::CondCode CC = AArch64CC::NE;
1899   if (const CmpInst *CI = dyn_cast<CmpInst>(BI->getCondition())) {
1900     if (CI->hasOneUse() && isValueAvailable(CI)) {
1901       // Try to optimize or fold the cmp.
1902       CmpInst::Predicate Predicate = optimizeCmpPredicate(CI);
1903       switch (Predicate) {
1904       default:
1905         break;
1906       case CmpInst::FCMP_FALSE:
1907         fastEmitBranch(FBB, DbgLoc);
1908         return true;
1909       case CmpInst::FCMP_TRUE:
1910         fastEmitBranch(TBB, DbgLoc);
1911         return true;
1912       }
1913
1914       // Try to take advantage of fallthrough opportunities.
1915       if (FuncInfo.MBB->isLayoutSuccessor(TBB)) {
1916         std::swap(TBB, FBB);
1917         Predicate = CmpInst::getInversePredicate(Predicate);
1918       }
1919
1920       // Try to optimize comparisons against zero.
1921       if (canFoldZeroCheckIntoBranch(CI)) {
1922         const Value *LHS = CI->getOperand(0);
1923         const Value *RHS = CI->getOperand(1);
1924
1925         // Canonicalize zero values to the RHS.
1926         if (const auto *C = dyn_cast<ConstantInt>(LHS))
1927           if (C->isNullValue())
1928             std::swap(LHS, RHS);
1929
1930         int TestBit = -1;
1931         if (const auto *AI = dyn_cast<BinaryOperator>(LHS))
1932           if (AI->getOpcode() == Instruction::And) {
1933             const Value *AndLHS = AI->getOperand(0);
1934             const Value *AndRHS = AI->getOperand(1);
1935
1936             if (const auto *C = dyn_cast<ConstantInt>(AndLHS))
1937               if (C->getValue().isPowerOf2())
1938                 std::swap(AndLHS, AndRHS);
1939
1940             if (const auto *C = dyn_cast<ConstantInt>(AndRHS))
1941               if (C->getValue().isPowerOf2()) {
1942                 TestBit = C->getValue().logBase2();
1943                 LHS = AndLHS;
1944               }
1945           }
1946
1947         static const unsigned OpcTable[2][2][2] = {
1948           { {AArch64::CBZW,  AArch64::CBZX },
1949             {AArch64::CBNZW, AArch64::CBNZX} },
1950           { {AArch64::TBZW,  AArch64::TBZX },
1951             {AArch64::TBNZW, AArch64::TBNZX} }
1952         };
1953         bool IsBitTest = TestBit != -1;
1954         bool IsCmpNE = Predicate == CmpInst::ICMP_NE;
1955         bool Is64Bit = LHS->getType()->isIntegerTy(64);
1956         unsigned Opc = OpcTable[IsBitTest][IsCmpNE][Is64Bit];
1957
1958         unsigned SrcReg = getRegForValue(LHS);
1959         if (!SrcReg)
1960           return false;
1961         bool SrcIsKill = hasTrivialKill(LHS);
1962
1963         // Emit the combined compare and branch instruction.
1964         MachineInstrBuilder MIB =
1965             BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc))
1966                 .addReg(SrcReg, getKillRegState(SrcIsKill));
1967         if (IsBitTest)
1968           MIB.addImm(TestBit);
1969         MIB.addMBB(TBB);
1970
1971         // Obtain the branch weight and add the TrueBB to the successor list.
1972         uint32_t BranchWeight = 0;
1973         if (FuncInfo.BPI)
1974           BranchWeight = FuncInfo.BPI->getEdgeWeight(BI->getParent(),
1975                                                      TBB->getBasicBlock());
1976         FuncInfo.MBB->addSuccessor(TBB, BranchWeight);
1977
1978         fastEmitBranch(FBB, DbgLoc);
1979         return true;
1980       }
1981
1982       // Emit the cmp.
1983       if (!emitCmp(CI->getOperand(0), CI->getOperand(1), CI->isUnsigned()))
1984         return false;
1985
1986       // FCMP_UEQ and FCMP_ONE cannot be checked with a single branch
1987       // instruction.
1988       CC = getCompareCC(Predicate);
1989       AArch64CC::CondCode ExtraCC = AArch64CC::AL;
1990       switch (Predicate) {
1991       default:
1992         break;
1993       case CmpInst::FCMP_UEQ:
1994         ExtraCC = AArch64CC::EQ;
1995         CC = AArch64CC::VS;
1996         break;
1997       case CmpInst::FCMP_ONE:
1998         ExtraCC = AArch64CC::MI;
1999         CC = AArch64CC::GT;
2000         break;
2001       }
2002       assert((CC != AArch64CC::AL) && "Unexpected condition code.");
2003
2004       // Emit the extra branch for FCMP_UEQ and FCMP_ONE.
2005       if (ExtraCC != AArch64CC::AL) {
2006         BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::Bcc))
2007             .addImm(ExtraCC)
2008             .addMBB(TBB);
2009       }
2010
2011       // Emit the branch.
2012       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::Bcc))
2013           .addImm(CC)
2014           .addMBB(TBB);
2015
2016       // Obtain the branch weight and add the TrueBB to the successor list.
2017       uint32_t BranchWeight = 0;
2018       if (FuncInfo.BPI)
2019         BranchWeight = FuncInfo.BPI->getEdgeWeight(BI->getParent(),
2020                                                   TBB->getBasicBlock());
2021       FuncInfo.MBB->addSuccessor(TBB, BranchWeight);
2022
2023       fastEmitBranch(FBB, DbgLoc);
2024       return true;
2025     }
2026   } else if (TruncInst *TI = dyn_cast<TruncInst>(BI->getCondition())) {
2027     MVT SrcVT;
2028     if (TI->hasOneUse() && isValueAvailable(TI) &&
2029         isTypeSupported(TI->getOperand(0)->getType(), SrcVT)) {
2030       unsigned CondReg = getRegForValue(TI->getOperand(0));
2031       if (!CondReg)
2032         return false;
2033       bool CondIsKill = hasTrivialKill(TI->getOperand(0));
2034
2035       // Issue an extract_subreg to get the lower 32-bits.
2036       if (SrcVT == MVT::i64) {
2037         CondReg = fastEmitInst_extractsubreg(MVT::i32, CondReg, CondIsKill,
2038                                              AArch64::sub_32);
2039         CondIsKill = true;
2040       }
2041
2042       unsigned ANDReg = emitAnd_ri(MVT::i32, CondReg, CondIsKill, 1);
2043       assert(ANDReg && "Unexpected AND instruction emission failure.");
2044       emitICmp_ri(MVT::i32, ANDReg, /*IsKill=*/true, 0);
2045
2046       if (FuncInfo.MBB->isLayoutSuccessor(TBB)) {
2047         std::swap(TBB, FBB);
2048         CC = AArch64CC::EQ;
2049       }
2050       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::Bcc))
2051           .addImm(CC)
2052           .addMBB(TBB);
2053
2054       // Obtain the branch weight and add the TrueBB to the successor list.
2055       uint32_t BranchWeight = 0;
2056       if (FuncInfo.BPI)
2057         BranchWeight = FuncInfo.BPI->getEdgeWeight(BI->getParent(),
2058                                                   TBB->getBasicBlock());
2059       FuncInfo.MBB->addSuccessor(TBB, BranchWeight);
2060
2061       fastEmitBranch(FBB, DbgLoc);
2062       return true;
2063     }
2064   } else if (const auto *CI = dyn_cast<ConstantInt>(BI->getCondition())) {
2065     uint64_t Imm = CI->getZExtValue();
2066     MachineBasicBlock *Target = (Imm == 0) ? FBB : TBB;
2067     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::B))
2068         .addMBB(Target);
2069
2070     // Obtain the branch weight and add the target to the successor list.
2071     uint32_t BranchWeight = 0;
2072     if (FuncInfo.BPI)
2073       BranchWeight = FuncInfo.BPI->getEdgeWeight(BI->getParent(),
2074                                                  Target->getBasicBlock());
2075     FuncInfo.MBB->addSuccessor(Target, BranchWeight);
2076     return true;
2077   } else if (foldXALUIntrinsic(CC, I, BI->getCondition())) {
2078     // Fake request the condition, otherwise the intrinsic might be completely
2079     // optimized away.
2080     unsigned CondReg = getRegForValue(BI->getCondition());
2081     if (!CondReg)
2082       return false;
2083
2084     // Emit the branch.
2085     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::Bcc))
2086       .addImm(CC)
2087       .addMBB(TBB);
2088
2089     // Obtain the branch weight and add the TrueBB to the successor list.
2090     uint32_t BranchWeight = 0;
2091     if (FuncInfo.BPI)
2092       BranchWeight = FuncInfo.BPI->getEdgeWeight(BI->getParent(),
2093                                                  TBB->getBasicBlock());
2094     FuncInfo.MBB->addSuccessor(TBB, BranchWeight);
2095
2096     fastEmitBranch(FBB, DbgLoc);
2097     return true;
2098   }
2099
2100   unsigned CondReg = getRegForValue(BI->getCondition());
2101   if (CondReg == 0)
2102     return false;
2103   bool CondRegIsKill = hasTrivialKill(BI->getCondition());
2104
2105   // We've been divorced from our compare!  Our block was split, and
2106   // now our compare lives in a predecessor block.  We musn't
2107   // re-compare here, as the children of the compare aren't guaranteed
2108   // live across the block boundary (we *could* check for this).
2109   // Regardless, the compare has been done in the predecessor block,
2110   // and it left a value for us in a virtual register.  Ergo, we test
2111   // the one-bit value left in the virtual register.
2112   emitICmp_ri(MVT::i32, CondReg, CondRegIsKill, 0);
2113
2114   if (FuncInfo.MBB->isLayoutSuccessor(TBB)) {
2115     std::swap(TBB, FBB);
2116     CC = AArch64CC::EQ;
2117   }
2118
2119   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::Bcc))
2120       .addImm(CC)
2121       .addMBB(TBB);
2122
2123   // Obtain the branch weight and add the TrueBB to the successor list.
2124   uint32_t BranchWeight = 0;
2125   if (FuncInfo.BPI)
2126     BranchWeight = FuncInfo.BPI->getEdgeWeight(BI->getParent(),
2127                                                TBB->getBasicBlock());
2128   FuncInfo.MBB->addSuccessor(TBB, BranchWeight);
2129
2130   fastEmitBranch(FBB, DbgLoc);
2131   return true;
2132 }
2133
2134 bool AArch64FastISel::selectIndirectBr(const Instruction *I) {
2135   const IndirectBrInst *BI = cast<IndirectBrInst>(I);
2136   unsigned AddrReg = getRegForValue(BI->getOperand(0));
2137   if (AddrReg == 0)
2138     return false;
2139
2140   // Emit the indirect branch.
2141   const MCInstrDesc &II = TII.get(AArch64::BR);
2142   AddrReg = constrainOperandRegClass(II, AddrReg,  II.getNumDefs());
2143   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II).addReg(AddrReg);
2144
2145   // Make sure the CFG is up-to-date.
2146   for (unsigned i = 0, e = BI->getNumSuccessors(); i != e; ++i)
2147     FuncInfo.MBB->addSuccessor(FuncInfo.MBBMap[BI->getSuccessor(i)]);
2148
2149   return true;
2150 }
2151
2152 bool AArch64FastISel::selectCmp(const Instruction *I) {
2153   const CmpInst *CI = cast<CmpInst>(I);
2154
2155   // Try to optimize or fold the cmp.
2156   CmpInst::Predicate Predicate = optimizeCmpPredicate(CI);
2157   unsigned ResultReg = 0;
2158   switch (Predicate) {
2159   default:
2160     break;
2161   case CmpInst::FCMP_FALSE:
2162     ResultReg = createResultReg(&AArch64::GPR32RegClass);
2163     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2164             TII.get(TargetOpcode::COPY), ResultReg)
2165         .addReg(AArch64::WZR, getKillRegState(true));
2166     break;
2167   case CmpInst::FCMP_TRUE:
2168     ResultReg = fastEmit_i(MVT::i32, MVT::i32, ISD::Constant, 1);
2169     break;
2170   }
2171
2172   if (ResultReg) {
2173     updateValueMap(I, ResultReg);
2174     return true;
2175   }
2176
2177   // Emit the cmp.
2178   if (!emitCmp(CI->getOperand(0), CI->getOperand(1), CI->isUnsigned()))
2179     return false;
2180
2181   ResultReg = createResultReg(&AArch64::GPR32RegClass);
2182
2183   // FCMP_UEQ and FCMP_ONE cannot be checked with a single instruction. These
2184   // condition codes are inverted, because they are used by CSINC.
2185   static unsigned CondCodeTable[2][2] = {
2186     { AArch64CC::NE, AArch64CC::VC },
2187     { AArch64CC::PL, AArch64CC::LE }
2188   };
2189   unsigned *CondCodes = nullptr;
2190   switch (Predicate) {
2191   default:
2192     break;
2193   case CmpInst::FCMP_UEQ:
2194     CondCodes = &CondCodeTable[0][0];
2195     break;
2196   case CmpInst::FCMP_ONE:
2197     CondCodes = &CondCodeTable[1][0];
2198     break;
2199   }
2200
2201   if (CondCodes) {
2202     unsigned TmpReg1 = createResultReg(&AArch64::GPR32RegClass);
2203     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::CSINCWr),
2204             TmpReg1)
2205         .addReg(AArch64::WZR, getKillRegState(true))
2206         .addReg(AArch64::WZR, getKillRegState(true))
2207         .addImm(CondCodes[0]);
2208     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::CSINCWr),
2209             ResultReg)
2210         .addReg(TmpReg1, getKillRegState(true))
2211         .addReg(AArch64::WZR, getKillRegState(true))
2212         .addImm(CondCodes[1]);
2213
2214     updateValueMap(I, ResultReg);
2215     return true;
2216   }
2217
2218   // Now set a register based on the comparison.
2219   AArch64CC::CondCode CC = getCompareCC(Predicate);
2220   assert((CC != AArch64CC::AL) && "Unexpected condition code.");
2221   AArch64CC::CondCode invertedCC = getInvertedCondCode(CC);
2222   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::CSINCWr),
2223           ResultReg)
2224       .addReg(AArch64::WZR, getKillRegState(true))
2225       .addReg(AArch64::WZR, getKillRegState(true))
2226       .addImm(invertedCC);
2227
2228   updateValueMap(I, ResultReg);
2229   return true;
2230 }
2231
2232 bool AArch64FastISel::selectSelect(const Instruction *I) {
2233   const SelectInst *SI = cast<SelectInst>(I);
2234
2235   EVT DestEVT = TLI.getValueType(SI->getType(), true);
2236   if (!DestEVT.isSimple())
2237     return false;
2238
2239   MVT DestVT = DestEVT.getSimpleVT();
2240   if (DestVT != MVT::i32 && DestVT != MVT::i64 && DestVT != MVT::f32 &&
2241       DestVT != MVT::f64)
2242     return false;
2243
2244   unsigned SelectOpc;
2245   const TargetRegisterClass *RC = nullptr;
2246   switch (DestVT.SimpleTy) {
2247   default: return false;
2248   case MVT::i32:
2249     SelectOpc = AArch64::CSELWr;    RC = &AArch64::GPR32RegClass; break;
2250   case MVT::i64:
2251     SelectOpc = AArch64::CSELXr;    RC = &AArch64::GPR64RegClass; break;
2252   case MVT::f32:
2253     SelectOpc = AArch64::FCSELSrrr; RC = &AArch64::FPR32RegClass; break;
2254   case MVT::f64:
2255     SelectOpc = AArch64::FCSELDrrr; RC = &AArch64::FPR64RegClass; break;
2256   }
2257
2258   const Value *Cond = SI->getCondition();
2259   bool NeedTest = true;
2260   AArch64CC::CondCode CC = AArch64CC::NE;
2261   if (foldXALUIntrinsic(CC, I, Cond))
2262     NeedTest = false;
2263
2264   unsigned CondReg = getRegForValue(Cond);
2265   if (!CondReg)
2266     return false;
2267   bool CondIsKill = hasTrivialKill(Cond);
2268
2269   if (NeedTest) {
2270     unsigned ANDReg = emitAnd_ri(MVT::i32, CondReg, CondIsKill, 1);
2271     assert(ANDReg && "Unexpected AND instruction emission failure.");
2272     emitICmp_ri(MVT::i32, ANDReg, /*IsKill=*/true, 0);
2273   }
2274
2275   unsigned TrueReg = getRegForValue(SI->getTrueValue());
2276   bool TrueIsKill = hasTrivialKill(SI->getTrueValue());
2277
2278   unsigned FalseReg = getRegForValue(SI->getFalseValue());
2279   bool FalseIsKill = hasTrivialKill(SI->getFalseValue());
2280
2281   if (!TrueReg || !FalseReg)
2282     return false;
2283
2284   unsigned ResultReg = fastEmitInst_rri(SelectOpc, RC, TrueReg, TrueIsKill,
2285                                         FalseReg, FalseIsKill, CC);
2286   updateValueMap(I, ResultReg);
2287   return true;
2288 }
2289
2290 bool AArch64FastISel::selectFPExt(const Instruction *I) {
2291   Value *V = I->getOperand(0);
2292   if (!I->getType()->isDoubleTy() || !V->getType()->isFloatTy())
2293     return false;
2294
2295   unsigned Op = getRegForValue(V);
2296   if (Op == 0)
2297     return false;
2298
2299   unsigned ResultReg = createResultReg(&AArch64::FPR64RegClass);
2300   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::FCVTDSr),
2301           ResultReg).addReg(Op);
2302   updateValueMap(I, ResultReg);
2303   return true;
2304 }
2305
2306 bool AArch64FastISel::selectFPTrunc(const Instruction *I) {
2307   Value *V = I->getOperand(0);
2308   if (!I->getType()->isFloatTy() || !V->getType()->isDoubleTy())
2309     return false;
2310
2311   unsigned Op = getRegForValue(V);
2312   if (Op == 0)
2313     return false;
2314
2315   unsigned ResultReg = createResultReg(&AArch64::FPR32RegClass);
2316   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::FCVTSDr),
2317           ResultReg).addReg(Op);
2318   updateValueMap(I, ResultReg);
2319   return true;
2320 }
2321
2322 // FPToUI and FPToSI
2323 bool AArch64FastISel::selectFPToInt(const Instruction *I, bool Signed) {
2324   MVT DestVT;
2325   if (!isTypeLegal(I->getType(), DestVT) || DestVT.isVector())
2326     return false;
2327
2328   unsigned SrcReg = getRegForValue(I->getOperand(0));
2329   if (SrcReg == 0)
2330     return false;
2331
2332   EVT SrcVT = TLI.getValueType(I->getOperand(0)->getType(), true);
2333   if (SrcVT == MVT::f128)
2334     return false;
2335
2336   unsigned Opc;
2337   if (SrcVT == MVT::f64) {
2338     if (Signed)
2339       Opc = (DestVT == MVT::i32) ? AArch64::FCVTZSUWDr : AArch64::FCVTZSUXDr;
2340     else
2341       Opc = (DestVT == MVT::i32) ? AArch64::FCVTZUUWDr : AArch64::FCVTZUUXDr;
2342   } else {
2343     if (Signed)
2344       Opc = (DestVT == MVT::i32) ? AArch64::FCVTZSUWSr : AArch64::FCVTZSUXSr;
2345     else
2346       Opc = (DestVT == MVT::i32) ? AArch64::FCVTZUUWSr : AArch64::FCVTZUUXSr;
2347   }
2348   unsigned ResultReg = createResultReg(
2349       DestVT == MVT::i32 ? &AArch64::GPR32RegClass : &AArch64::GPR64RegClass);
2350   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg)
2351       .addReg(SrcReg);
2352   updateValueMap(I, ResultReg);
2353   return true;
2354 }
2355
2356 bool AArch64FastISel::selectIntToFP(const Instruction *I, bool Signed) {
2357   MVT DestVT;
2358   if (!isTypeLegal(I->getType(), DestVT) || DestVT.isVector())
2359     return false;
2360   assert ((DestVT == MVT::f32 || DestVT == MVT::f64) &&
2361           "Unexpected value type.");
2362
2363   unsigned SrcReg = getRegForValue(I->getOperand(0));
2364   if (!SrcReg)
2365     return false;
2366   bool SrcIsKill = hasTrivialKill(I->getOperand(0));
2367
2368   EVT SrcVT = TLI.getValueType(I->getOperand(0)->getType(), true);
2369
2370   // Handle sign-extension.
2371   if (SrcVT == MVT::i16 || SrcVT == MVT::i8 || SrcVT == MVT::i1) {
2372     SrcReg =
2373         emitIntExt(SrcVT.getSimpleVT(), SrcReg, MVT::i32, /*isZExt*/ !Signed);
2374     if (!SrcReg)
2375       return false;
2376     SrcIsKill = true;
2377   }
2378
2379   unsigned Opc;
2380   if (SrcVT == MVT::i64) {
2381     if (Signed)
2382       Opc = (DestVT == MVT::f32) ? AArch64::SCVTFUXSri : AArch64::SCVTFUXDri;
2383     else
2384       Opc = (DestVT == MVT::f32) ? AArch64::UCVTFUXSri : AArch64::UCVTFUXDri;
2385   } else {
2386     if (Signed)
2387       Opc = (DestVT == MVT::f32) ? AArch64::SCVTFUWSri : AArch64::SCVTFUWDri;
2388     else
2389       Opc = (DestVT == MVT::f32) ? AArch64::UCVTFUWSri : AArch64::UCVTFUWDri;
2390   }
2391
2392   unsigned ResultReg = fastEmitInst_r(Opc, TLI.getRegClassFor(DestVT), SrcReg,
2393                                       SrcIsKill);
2394   updateValueMap(I, ResultReg);
2395   return true;
2396 }
2397
2398 bool AArch64FastISel::fastLowerArguments() {
2399   if (!FuncInfo.CanLowerReturn)
2400     return false;
2401
2402   const Function *F = FuncInfo.Fn;
2403   if (F->isVarArg())
2404     return false;
2405
2406   CallingConv::ID CC = F->getCallingConv();
2407   if (CC != CallingConv::C)
2408     return false;
2409
2410   // Only handle simple cases of up to 8 GPR and FPR each.
2411   unsigned GPRCnt = 0;
2412   unsigned FPRCnt = 0;
2413   unsigned Idx = 0;
2414   for (auto const &Arg : F->args()) {
2415     // The first argument is at index 1.
2416     ++Idx;
2417     if (F->getAttributes().hasAttribute(Idx, Attribute::ByVal) ||
2418         F->getAttributes().hasAttribute(Idx, Attribute::InReg) ||
2419         F->getAttributes().hasAttribute(Idx, Attribute::StructRet) ||
2420         F->getAttributes().hasAttribute(Idx, Attribute::Nest))
2421       return false;
2422
2423     Type *ArgTy = Arg.getType();
2424     if (ArgTy->isStructTy() || ArgTy->isArrayTy())
2425       return false;
2426
2427     EVT ArgVT = TLI.getValueType(ArgTy);
2428     if (!ArgVT.isSimple())
2429       return false;
2430
2431     MVT VT = ArgVT.getSimpleVT().SimpleTy;
2432     if (VT.isFloatingPoint() && !Subtarget->hasFPARMv8())
2433       return false;
2434
2435     if (VT.isVector() &&
2436         (!Subtarget->hasNEON() || !Subtarget->isLittleEndian()))
2437       return false;
2438
2439     if (VT >= MVT::i1 && VT <= MVT::i64)
2440       ++GPRCnt;
2441     else if ((VT >= MVT::f16 && VT <= MVT::f64) || VT.is64BitVector() ||
2442              VT.is128BitVector())
2443       ++FPRCnt;
2444     else
2445       return false;
2446
2447     if (GPRCnt > 8 || FPRCnt > 8)
2448       return false;
2449   }
2450
2451   static const MCPhysReg Registers[6][8] = {
2452     { AArch64::W0, AArch64::W1, AArch64::W2, AArch64::W3, AArch64::W4,
2453       AArch64::W5, AArch64::W6, AArch64::W7 },
2454     { AArch64::X0, AArch64::X1, AArch64::X2, AArch64::X3, AArch64::X4,
2455       AArch64::X5, AArch64::X6, AArch64::X7 },
2456     { AArch64::H0, AArch64::H1, AArch64::H2, AArch64::H3, AArch64::H4,
2457       AArch64::H5, AArch64::H6, AArch64::H7 },
2458     { AArch64::S0, AArch64::S1, AArch64::S2, AArch64::S3, AArch64::S4,
2459       AArch64::S5, AArch64::S6, AArch64::S7 },
2460     { AArch64::D0, AArch64::D1, AArch64::D2, AArch64::D3, AArch64::D4,
2461       AArch64::D5, AArch64::D6, AArch64::D7 },
2462     { AArch64::Q0, AArch64::Q1, AArch64::Q2, AArch64::Q3, AArch64::Q4,
2463       AArch64::Q5, AArch64::Q6, AArch64::Q7 }
2464   };
2465
2466   unsigned GPRIdx = 0;
2467   unsigned FPRIdx = 0;
2468   for (auto const &Arg : F->args()) {
2469     MVT VT = TLI.getSimpleValueType(Arg.getType());
2470     unsigned SrcReg;
2471     const TargetRegisterClass *RC;
2472     if (VT >= MVT::i1 && VT <= MVT::i32) {
2473       SrcReg = Registers[0][GPRIdx++];
2474       RC = &AArch64::GPR32RegClass;
2475       VT = MVT::i32;
2476     } else if (VT == MVT::i64) {
2477       SrcReg = Registers[1][GPRIdx++];
2478       RC = &AArch64::GPR64RegClass;
2479     } else if (VT == MVT::f16) {
2480       SrcReg = Registers[2][FPRIdx++];
2481       RC = &AArch64::FPR16RegClass;
2482     } else if (VT ==  MVT::f32) {
2483       SrcReg = Registers[3][FPRIdx++];
2484       RC = &AArch64::FPR32RegClass;
2485     } else if ((VT == MVT::f64) || VT.is64BitVector()) {
2486       SrcReg = Registers[4][FPRIdx++];
2487       RC = &AArch64::FPR64RegClass;
2488     } else if (VT.is128BitVector()) {
2489       SrcReg = Registers[5][FPRIdx++];
2490       RC = &AArch64::FPR128RegClass;
2491     } else
2492       llvm_unreachable("Unexpected value type.");
2493
2494     unsigned DstReg = FuncInfo.MF->addLiveIn(SrcReg, RC);
2495     // FIXME: Unfortunately it's necessary to emit a copy from the livein copy.
2496     // Without this, EmitLiveInCopies may eliminate the livein if its only
2497     // use is a bitcast (which isn't turned into an instruction).
2498     unsigned ResultReg = createResultReg(RC);
2499     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2500             TII.get(TargetOpcode::COPY), ResultReg)
2501         .addReg(DstReg, getKillRegState(true));
2502     updateValueMap(&Arg, ResultReg);
2503   }
2504   return true;
2505 }
2506
2507 bool AArch64FastISel::processCallArgs(CallLoweringInfo &CLI,
2508                                       SmallVectorImpl<MVT> &OutVTs,
2509                                       unsigned &NumBytes) {
2510   CallingConv::ID CC = CLI.CallConv;
2511   SmallVector<CCValAssign, 16> ArgLocs;
2512   CCState CCInfo(CC, false, *FuncInfo.MF, ArgLocs, *Context);
2513   CCInfo.AnalyzeCallOperands(OutVTs, CLI.OutFlags, CCAssignFnForCall(CC));
2514
2515   // Get a count of how many bytes are to be pushed on the stack.
2516   NumBytes = CCInfo.getNextStackOffset();
2517
2518   // Issue CALLSEQ_START
2519   unsigned AdjStackDown = TII.getCallFrameSetupOpcode();
2520   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AdjStackDown))
2521     .addImm(NumBytes);
2522
2523   // Process the args.
2524   for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
2525     CCValAssign &VA = ArgLocs[i];
2526     const Value *ArgVal = CLI.OutVals[VA.getValNo()];
2527     MVT ArgVT = OutVTs[VA.getValNo()];
2528
2529     unsigned ArgReg = getRegForValue(ArgVal);
2530     if (!ArgReg)
2531       return false;
2532
2533     // Handle arg promotion: SExt, ZExt, AExt.
2534     switch (VA.getLocInfo()) {
2535     case CCValAssign::Full:
2536       break;
2537     case CCValAssign::SExt: {
2538       MVT DestVT = VA.getLocVT();
2539       MVT SrcVT = ArgVT;
2540       ArgReg = emitIntExt(SrcVT, ArgReg, DestVT, /*isZExt=*/false);
2541       if (!ArgReg)
2542         return false;
2543       break;
2544     }
2545     case CCValAssign::AExt:
2546     // Intentional fall-through.
2547     case CCValAssign::ZExt: {
2548       MVT DestVT = VA.getLocVT();
2549       MVT SrcVT = ArgVT;
2550       ArgReg = emitIntExt(SrcVT, ArgReg, DestVT, /*isZExt=*/true);
2551       if (!ArgReg)
2552         return false;
2553       break;
2554     }
2555     default:
2556       llvm_unreachable("Unknown arg promotion!");
2557     }
2558
2559     // Now copy/store arg to correct locations.
2560     if (VA.isRegLoc() && !VA.needsCustom()) {
2561       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2562               TII.get(TargetOpcode::COPY), VA.getLocReg()).addReg(ArgReg);
2563       CLI.OutRegs.push_back(VA.getLocReg());
2564     } else if (VA.needsCustom()) {
2565       // FIXME: Handle custom args.
2566       return false;
2567     } else {
2568       assert(VA.isMemLoc() && "Assuming store on stack.");
2569
2570       // Don't emit stores for undef values.
2571       if (isa<UndefValue>(ArgVal))
2572         continue;
2573
2574       // Need to store on the stack.
2575       unsigned ArgSize = (ArgVT.getSizeInBits() + 7) / 8;
2576
2577       unsigned BEAlign = 0;
2578       if (ArgSize < 8 && !Subtarget->isLittleEndian())
2579         BEAlign = 8 - ArgSize;
2580
2581       Address Addr;
2582       Addr.setKind(Address::RegBase);
2583       Addr.setReg(AArch64::SP);
2584       Addr.setOffset(VA.getLocMemOffset() + BEAlign);
2585
2586       unsigned Alignment = DL.getABITypeAlignment(ArgVal->getType());
2587       MachineMemOperand *MMO = FuncInfo.MF->getMachineMemOperand(
2588         MachinePointerInfo::getStack(Addr.getOffset()),
2589         MachineMemOperand::MOStore, ArgVT.getStoreSize(), Alignment);
2590
2591       if (!emitStore(ArgVT, ArgReg, Addr, MMO))
2592         return false;
2593     }
2594   }
2595   return true;
2596 }
2597
2598 bool AArch64FastISel::finishCall(CallLoweringInfo &CLI, MVT RetVT,
2599                                  unsigned NumBytes) {
2600   CallingConv::ID CC = CLI.CallConv;
2601
2602   // Issue CALLSEQ_END
2603   unsigned AdjStackUp = TII.getCallFrameDestroyOpcode();
2604   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AdjStackUp))
2605     .addImm(NumBytes).addImm(0);
2606
2607   // Now the return value.
2608   if (RetVT != MVT::isVoid) {
2609     SmallVector<CCValAssign, 16> RVLocs;
2610     CCState CCInfo(CC, false, *FuncInfo.MF, RVLocs, *Context);
2611     CCInfo.AnalyzeCallResult(RetVT, CCAssignFnForCall(CC));
2612
2613     // Only handle a single return value.
2614     if (RVLocs.size() != 1)
2615       return false;
2616
2617     // Copy all of the result registers out of their specified physreg.
2618     MVT CopyVT = RVLocs[0].getValVT();
2619     unsigned ResultReg = createResultReg(TLI.getRegClassFor(CopyVT));
2620     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2621             TII.get(TargetOpcode::COPY), ResultReg)
2622         .addReg(RVLocs[0].getLocReg());
2623     CLI.InRegs.push_back(RVLocs[0].getLocReg());
2624
2625     CLI.ResultReg = ResultReg;
2626     CLI.NumResultRegs = 1;
2627   }
2628
2629   return true;
2630 }
2631
2632 bool AArch64FastISel::fastLowerCall(CallLoweringInfo &CLI) {
2633   CallingConv::ID CC  = CLI.CallConv;
2634   bool IsTailCall     = CLI.IsTailCall;
2635   bool IsVarArg       = CLI.IsVarArg;
2636   const Value *Callee = CLI.Callee;
2637   const char *SymName = CLI.SymName;
2638
2639   if (!Callee && !SymName)
2640     return false;
2641
2642   // Allow SelectionDAG isel to handle tail calls.
2643   if (IsTailCall)
2644     return false;
2645
2646   CodeModel::Model CM = TM.getCodeModel();
2647   // Only support the small and large code model.
2648   if (CM != CodeModel::Small && CM != CodeModel::Large)
2649     return false;
2650
2651   // FIXME: Add large code model support for ELF.
2652   if (CM == CodeModel::Large && !Subtarget->isTargetMachO())
2653     return false;
2654
2655   // Let SDISel handle vararg functions.
2656   if (IsVarArg)
2657     return false;
2658
2659   // FIXME: Only handle *simple* calls for now.
2660   MVT RetVT;
2661   if (CLI.RetTy->isVoidTy())
2662     RetVT = MVT::isVoid;
2663   else if (!isTypeLegal(CLI.RetTy, RetVT))
2664     return false;
2665
2666   for (auto Flag : CLI.OutFlags)
2667     if (Flag.isInReg() || Flag.isSRet() || Flag.isNest() || Flag.isByVal())
2668       return false;
2669
2670   // Set up the argument vectors.
2671   SmallVector<MVT, 16> OutVTs;
2672   OutVTs.reserve(CLI.OutVals.size());
2673
2674   for (auto *Val : CLI.OutVals) {
2675     MVT VT;
2676     if (!isTypeLegal(Val->getType(), VT) &&
2677         !(VT == MVT::i1 || VT == MVT::i8 || VT == MVT::i16))
2678       return false;
2679
2680     // We don't handle vector parameters yet.
2681     if (VT.isVector() || VT.getSizeInBits() > 64)
2682       return false;
2683
2684     OutVTs.push_back(VT);
2685   }
2686
2687   Address Addr;
2688   if (Callee && !computeCallAddress(Callee, Addr))
2689     return false;
2690
2691   // Handle the arguments now that we've gotten them.
2692   unsigned NumBytes;
2693   if (!processCallArgs(CLI, OutVTs, NumBytes))
2694     return false;
2695
2696   // Issue the call.
2697   MachineInstrBuilder MIB;
2698   if (CM == CodeModel::Small) {
2699     const MCInstrDesc &II = TII.get(Addr.getReg() ? AArch64::BLR : AArch64::BL);
2700     MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II);
2701     if (SymName)
2702       MIB.addExternalSymbol(SymName, 0);
2703     else if (Addr.getGlobalValue())
2704       MIB.addGlobalAddress(Addr.getGlobalValue(), 0, 0);
2705     else if (Addr.getReg()) {
2706       unsigned Reg = constrainOperandRegClass(II, Addr.getReg(), 0);
2707       MIB.addReg(Reg);
2708     } else
2709       return false;
2710   } else {
2711     unsigned CallReg = 0;
2712     if (SymName) {
2713       unsigned ADRPReg = createResultReg(&AArch64::GPR64commonRegClass);
2714       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADRP),
2715               ADRPReg)
2716         .addExternalSymbol(SymName, AArch64II::MO_GOT | AArch64II::MO_PAGE);
2717
2718       CallReg = createResultReg(&AArch64::GPR64RegClass);
2719       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::LDRXui),
2720               CallReg)
2721         .addReg(ADRPReg)
2722         .addExternalSymbol(SymName, AArch64II::MO_GOT | AArch64II::MO_PAGEOFF |
2723                            AArch64II::MO_NC);
2724     } else if (Addr.getGlobalValue())
2725       CallReg = materializeGV(Addr.getGlobalValue());
2726     else if (Addr.getReg())
2727       CallReg = Addr.getReg();
2728
2729     if (!CallReg)
2730       return false;
2731
2732     const MCInstrDesc &II = TII.get(AArch64::BLR);
2733     CallReg = constrainOperandRegClass(II, CallReg, 0);
2734     MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II).addReg(CallReg);
2735   }
2736
2737   // Add implicit physical register uses to the call.
2738   for (auto Reg : CLI.OutRegs)
2739     MIB.addReg(Reg, RegState::Implicit);
2740
2741   // Add a register mask with the call-preserved registers.
2742   // Proper defs for return values will be added by setPhysRegsDeadExcept().
2743   MIB.addRegMask(TRI.getCallPreservedMask(CC));
2744
2745   CLI.Call = MIB;
2746
2747   // Finish off the call including any return values.
2748   return finishCall(CLI, RetVT, NumBytes);
2749 }
2750
2751 bool AArch64FastISel::isMemCpySmall(uint64_t Len, unsigned Alignment) {
2752   if (Alignment)
2753     return Len / Alignment <= 4;
2754   else
2755     return Len < 32;
2756 }
2757
2758 bool AArch64FastISel::tryEmitSmallMemCpy(Address Dest, Address Src,
2759                                          uint64_t Len, unsigned Alignment) {
2760   // Make sure we don't bloat code by inlining very large memcpy's.
2761   if (!isMemCpySmall(Len, Alignment))
2762     return false;
2763
2764   int64_t UnscaledOffset = 0;
2765   Address OrigDest = Dest;
2766   Address OrigSrc = Src;
2767
2768   while (Len) {
2769     MVT VT;
2770     if (!Alignment || Alignment >= 8) {
2771       if (Len >= 8)
2772         VT = MVT::i64;
2773       else if (Len >= 4)
2774         VT = MVT::i32;
2775       else if (Len >= 2)
2776         VT = MVT::i16;
2777       else {
2778         VT = MVT::i8;
2779       }
2780     } else {
2781       // Bound based on alignment.
2782       if (Len >= 4 && Alignment == 4)
2783         VT = MVT::i32;
2784       else if (Len >= 2 && Alignment == 2)
2785         VT = MVT::i16;
2786       else {
2787         VT = MVT::i8;
2788       }
2789     }
2790
2791     bool RV;
2792     unsigned ResultReg;
2793     RV = emitLoad(VT, ResultReg, Src);
2794     if (!RV)
2795       return false;
2796
2797     RV = emitStore(VT, ResultReg, Dest);
2798     if (!RV)
2799       return false;
2800
2801     int64_t Size = VT.getSizeInBits() / 8;
2802     Len -= Size;
2803     UnscaledOffset += Size;
2804
2805     // We need to recompute the unscaled offset for each iteration.
2806     Dest.setOffset(OrigDest.getOffset() + UnscaledOffset);
2807     Src.setOffset(OrigSrc.getOffset() + UnscaledOffset);
2808   }
2809
2810   return true;
2811 }
2812
2813 /// \brief Check if it is possible to fold the condition from the XALU intrinsic
2814 /// into the user. The condition code will only be updated on success.
2815 bool AArch64FastISel::foldXALUIntrinsic(AArch64CC::CondCode &CC,
2816                                         const Instruction *I,
2817                                         const Value *Cond) {
2818   if (!isa<ExtractValueInst>(Cond))
2819     return false;
2820
2821   const auto *EV = cast<ExtractValueInst>(Cond);
2822   if (!isa<IntrinsicInst>(EV->getAggregateOperand()))
2823     return false;
2824
2825   const auto *II = cast<IntrinsicInst>(EV->getAggregateOperand());
2826   MVT RetVT;
2827   const Function *Callee = II->getCalledFunction();
2828   Type *RetTy =
2829   cast<StructType>(Callee->getReturnType())->getTypeAtIndex(0U);
2830   if (!isTypeLegal(RetTy, RetVT))
2831     return false;
2832
2833   if (RetVT != MVT::i32 && RetVT != MVT::i64)
2834     return false;
2835
2836   const Value *LHS = II->getArgOperand(0);
2837   const Value *RHS = II->getArgOperand(1);
2838
2839   // Canonicalize immediate to the RHS.
2840   if (isa<ConstantInt>(LHS) && !isa<ConstantInt>(RHS) &&
2841       isCommutativeIntrinsic(II))
2842     std::swap(LHS, RHS);
2843
2844   // Simplify multiplies.
2845   unsigned IID = II->getIntrinsicID();
2846   switch (IID) {
2847   default:
2848     break;
2849   case Intrinsic::smul_with_overflow:
2850     if (const auto *C = dyn_cast<ConstantInt>(RHS))
2851       if (C->getValue() == 2)
2852         IID = Intrinsic::sadd_with_overflow;
2853     break;
2854   case Intrinsic::umul_with_overflow:
2855     if (const auto *C = dyn_cast<ConstantInt>(RHS))
2856       if (C->getValue() == 2)
2857         IID = Intrinsic::uadd_with_overflow;
2858     break;
2859   }
2860
2861   AArch64CC::CondCode TmpCC;
2862   switch (IID) {
2863   default:
2864     return false;
2865   case Intrinsic::sadd_with_overflow:
2866   case Intrinsic::ssub_with_overflow:
2867     TmpCC = AArch64CC::VS;
2868     break;
2869   case Intrinsic::uadd_with_overflow:
2870     TmpCC = AArch64CC::HS;
2871     break;
2872   case Intrinsic::usub_with_overflow:
2873     TmpCC = AArch64CC::LO;
2874     break;
2875   case Intrinsic::smul_with_overflow:
2876   case Intrinsic::umul_with_overflow:
2877     TmpCC = AArch64CC::NE;
2878     break;
2879   }
2880
2881   // Check if both instructions are in the same basic block.
2882   if (!isValueAvailable(II))
2883     return false;
2884
2885   // Make sure nothing is in the way
2886   BasicBlock::const_iterator Start = I;
2887   BasicBlock::const_iterator End = II;
2888   for (auto Itr = std::prev(Start); Itr != End; --Itr) {
2889     // We only expect extractvalue instructions between the intrinsic and the
2890     // instruction to be selected.
2891     if (!isa<ExtractValueInst>(Itr))
2892       return false;
2893
2894     // Check that the extractvalue operand comes from the intrinsic.
2895     const auto *EVI = cast<ExtractValueInst>(Itr);
2896     if (EVI->getAggregateOperand() != II)
2897       return false;
2898   }
2899
2900   CC = TmpCC;
2901   return true;
2902 }
2903
2904 bool AArch64FastISel::fastLowerIntrinsicCall(const IntrinsicInst *II) {
2905   // FIXME: Handle more intrinsics.
2906   switch (II->getIntrinsicID()) {
2907   default: return false;
2908   case Intrinsic::frameaddress: {
2909     MachineFrameInfo *MFI = FuncInfo.MF->getFrameInfo();
2910     MFI->setFrameAddressIsTaken(true);
2911
2912     const AArch64RegisterInfo *RegInfo =
2913         static_cast<const AArch64RegisterInfo *>(
2914             TM.getSubtargetImpl()->getRegisterInfo());
2915     unsigned FramePtr = RegInfo->getFrameRegister(*(FuncInfo.MF));
2916     unsigned SrcReg = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
2917     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2918             TII.get(TargetOpcode::COPY), SrcReg).addReg(FramePtr);
2919     // Recursively load frame address
2920     // ldr x0, [fp]
2921     // ldr x0, [x0]
2922     // ldr x0, [x0]
2923     // ...
2924     unsigned DestReg;
2925     unsigned Depth = cast<ConstantInt>(II->getOperand(0))->getZExtValue();
2926     while (Depth--) {
2927       DestReg = fastEmitInst_ri(AArch64::LDRXui, &AArch64::GPR64RegClass,
2928                                 SrcReg, /*IsKill=*/true, 0);
2929       assert(DestReg && "Unexpected LDR instruction emission failure.");
2930       SrcReg = DestReg;
2931     }
2932
2933     updateValueMap(II, SrcReg);
2934     return true;
2935   }
2936   case Intrinsic::memcpy:
2937   case Intrinsic::memmove: {
2938     const auto *MTI = cast<MemTransferInst>(II);
2939     // Don't handle volatile.
2940     if (MTI->isVolatile())
2941       return false;
2942
2943     // Disable inlining for memmove before calls to ComputeAddress.  Otherwise,
2944     // we would emit dead code because we don't currently handle memmoves.
2945     bool IsMemCpy = (II->getIntrinsicID() == Intrinsic::memcpy);
2946     if (isa<ConstantInt>(MTI->getLength()) && IsMemCpy) {
2947       // Small memcpy's are common enough that we want to do them without a call
2948       // if possible.
2949       uint64_t Len = cast<ConstantInt>(MTI->getLength())->getZExtValue();
2950       unsigned Alignment = MTI->getAlignment();
2951       if (isMemCpySmall(Len, Alignment)) {
2952         Address Dest, Src;
2953         if (!computeAddress(MTI->getRawDest(), Dest) ||
2954             !computeAddress(MTI->getRawSource(), Src))
2955           return false;
2956         if (tryEmitSmallMemCpy(Dest, Src, Len, Alignment))
2957           return true;
2958       }
2959     }
2960
2961     if (!MTI->getLength()->getType()->isIntegerTy(64))
2962       return false;
2963
2964     if (MTI->getSourceAddressSpace() > 255 || MTI->getDestAddressSpace() > 255)
2965       // Fast instruction selection doesn't support the special
2966       // address spaces.
2967       return false;
2968
2969     const char *IntrMemName = isa<MemCpyInst>(II) ? "memcpy" : "memmove";
2970     return lowerCallTo(II, IntrMemName, II->getNumArgOperands() - 2);
2971   }
2972   case Intrinsic::memset: {
2973     const MemSetInst *MSI = cast<MemSetInst>(II);
2974     // Don't handle volatile.
2975     if (MSI->isVolatile())
2976       return false;
2977
2978     if (!MSI->getLength()->getType()->isIntegerTy(64))
2979       return false;
2980
2981     if (MSI->getDestAddressSpace() > 255)
2982       // Fast instruction selection doesn't support the special
2983       // address spaces.
2984       return false;
2985
2986     return lowerCallTo(II, "memset", II->getNumArgOperands() - 2);
2987   }
2988   case Intrinsic::sin:
2989   case Intrinsic::cos:
2990   case Intrinsic::pow: {
2991     MVT RetVT;
2992     if (!isTypeLegal(II->getType(), RetVT))
2993       return false;
2994
2995     if (RetVT != MVT::f32 && RetVT != MVT::f64)
2996       return false;
2997
2998     static const RTLIB::Libcall LibCallTable[3][2] = {
2999       { RTLIB::SIN_F32, RTLIB::SIN_F64 },
3000       { RTLIB::COS_F32, RTLIB::COS_F64 },
3001       { RTLIB::POW_F32, RTLIB::POW_F64 }
3002     };
3003     RTLIB::Libcall LC;
3004     bool Is64Bit = RetVT == MVT::f64;
3005     switch (II->getIntrinsicID()) {
3006     default:
3007       llvm_unreachable("Unexpected intrinsic.");
3008     case Intrinsic::sin:
3009       LC = LibCallTable[0][Is64Bit];
3010       break;
3011     case Intrinsic::cos:
3012       LC = LibCallTable[1][Is64Bit];
3013       break;
3014     case Intrinsic::pow:
3015       LC = LibCallTable[2][Is64Bit];
3016       break;
3017     }
3018
3019     ArgListTy Args;
3020     Args.reserve(II->getNumArgOperands());
3021
3022     // Populate the argument list.
3023     for (auto &Arg : II->arg_operands()) {
3024       ArgListEntry Entry;
3025       Entry.Val = Arg;
3026       Entry.Ty = Arg->getType();
3027       Args.push_back(Entry);
3028     }
3029
3030     CallLoweringInfo CLI;
3031     CLI.setCallee(TLI.getLibcallCallingConv(LC), II->getType(),
3032                   TLI.getLibcallName(LC), std::move(Args));
3033     if (!lowerCallTo(CLI))
3034       return false;
3035     updateValueMap(II, CLI.ResultReg);
3036     return true;
3037   }
3038   case Intrinsic::trap: {
3039     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::BRK))
3040         .addImm(1);
3041     return true;
3042   }
3043   case Intrinsic::sqrt: {
3044     Type *RetTy = II->getCalledFunction()->getReturnType();
3045
3046     MVT VT;
3047     if (!isTypeLegal(RetTy, VT))
3048       return false;
3049
3050     unsigned Op0Reg = getRegForValue(II->getOperand(0));
3051     if (!Op0Reg)
3052       return false;
3053     bool Op0IsKill = hasTrivialKill(II->getOperand(0));
3054
3055     unsigned ResultReg = fastEmit_r(VT, VT, ISD::FSQRT, Op0Reg, Op0IsKill);
3056     if (!ResultReg)
3057       return false;
3058
3059     updateValueMap(II, ResultReg);
3060     return true;
3061   }
3062   case Intrinsic::sadd_with_overflow:
3063   case Intrinsic::uadd_with_overflow:
3064   case Intrinsic::ssub_with_overflow:
3065   case Intrinsic::usub_with_overflow:
3066   case Intrinsic::smul_with_overflow:
3067   case Intrinsic::umul_with_overflow: {
3068     // This implements the basic lowering of the xalu with overflow intrinsics.
3069     const Function *Callee = II->getCalledFunction();
3070     auto *Ty = cast<StructType>(Callee->getReturnType());
3071     Type *RetTy = Ty->getTypeAtIndex(0U);
3072
3073     MVT VT;
3074     if (!isTypeLegal(RetTy, VT))
3075       return false;
3076
3077     if (VT != MVT::i32 && VT != MVT::i64)
3078       return false;
3079
3080     const Value *LHS = II->getArgOperand(0);
3081     const Value *RHS = II->getArgOperand(1);
3082     // Canonicalize immediate to the RHS.
3083     if (isa<ConstantInt>(LHS) && !isa<ConstantInt>(RHS) &&
3084         isCommutativeIntrinsic(II))
3085       std::swap(LHS, RHS);
3086
3087     // Simplify multiplies.
3088     unsigned IID = II->getIntrinsicID();
3089     switch (IID) {
3090     default:
3091       break;
3092     case Intrinsic::smul_with_overflow:
3093       if (const auto *C = dyn_cast<ConstantInt>(RHS))
3094         if (C->getValue() == 2) {
3095           IID = Intrinsic::sadd_with_overflow;
3096           RHS = LHS;
3097         }
3098       break;
3099     case Intrinsic::umul_with_overflow:
3100       if (const auto *C = dyn_cast<ConstantInt>(RHS))
3101         if (C->getValue() == 2) {
3102           IID = Intrinsic::uadd_with_overflow;
3103           RHS = LHS;
3104         }
3105       break;
3106     }
3107
3108     unsigned ResultReg1 = 0, ResultReg2 = 0, MulReg = 0;
3109     AArch64CC::CondCode CC = AArch64CC::Invalid;
3110     switch (IID) {
3111     default: llvm_unreachable("Unexpected intrinsic!");
3112     case Intrinsic::sadd_with_overflow:
3113       ResultReg1 = emitAdd(VT, LHS, RHS, /*SetFlags=*/true);
3114       CC = AArch64CC::VS;
3115       break;
3116     case Intrinsic::uadd_with_overflow:
3117       ResultReg1 = emitAdd(VT, LHS, RHS, /*SetFlags=*/true);
3118       CC = AArch64CC::HS;
3119       break;
3120     case Intrinsic::ssub_with_overflow:
3121       ResultReg1 = emitSub(VT, LHS, RHS, /*SetFlags=*/true);
3122       CC = AArch64CC::VS;
3123       break;
3124     case Intrinsic::usub_with_overflow:
3125       ResultReg1 = emitSub(VT, LHS, RHS, /*SetFlags=*/true);
3126       CC = AArch64CC::LO;
3127       break;
3128     case Intrinsic::smul_with_overflow: {
3129       CC = AArch64CC::NE;
3130       unsigned LHSReg = getRegForValue(LHS);
3131       if (!LHSReg)
3132         return false;
3133       bool LHSIsKill = hasTrivialKill(LHS);
3134
3135       unsigned RHSReg = getRegForValue(RHS);
3136       if (!RHSReg)
3137         return false;
3138       bool RHSIsKill = hasTrivialKill(RHS);
3139
3140       if (VT == MVT::i32) {
3141         MulReg = emitSMULL_rr(MVT::i64, LHSReg, LHSIsKill, RHSReg, RHSIsKill);
3142         unsigned ShiftReg = emitLSR_ri(MVT::i64, MVT::i64, MulReg,
3143                                        /*IsKill=*/false, 32);
3144         MulReg = fastEmitInst_extractsubreg(VT, MulReg, /*IsKill=*/true,
3145                                             AArch64::sub_32);
3146         ShiftReg = fastEmitInst_extractsubreg(VT, ShiftReg, /*IsKill=*/true,
3147                                               AArch64::sub_32);
3148         emitSubs_rs(VT, ShiftReg, /*IsKill=*/true, MulReg, /*IsKill=*/false,
3149                     AArch64_AM::ASR, 31, /*WantResult=*/false);
3150       } else {
3151         assert(VT == MVT::i64 && "Unexpected value type.");
3152         MulReg = emitMul_rr(VT, LHSReg, LHSIsKill, RHSReg, RHSIsKill);
3153         unsigned SMULHReg = fastEmit_rr(VT, VT, ISD::MULHS, LHSReg, LHSIsKill,
3154                                         RHSReg, RHSIsKill);
3155         emitSubs_rs(VT, SMULHReg, /*IsKill=*/true, MulReg, /*IsKill=*/false,
3156                     AArch64_AM::ASR, 63, /*WantResult=*/false);
3157       }
3158       break;
3159     }
3160     case Intrinsic::umul_with_overflow: {
3161       CC = AArch64CC::NE;
3162       unsigned LHSReg = getRegForValue(LHS);
3163       if (!LHSReg)
3164         return false;
3165       bool LHSIsKill = hasTrivialKill(LHS);
3166
3167       unsigned RHSReg = getRegForValue(RHS);
3168       if (!RHSReg)
3169         return false;
3170       bool RHSIsKill = hasTrivialKill(RHS);
3171
3172       if (VT == MVT::i32) {
3173         MulReg = emitUMULL_rr(MVT::i64, LHSReg, LHSIsKill, RHSReg, RHSIsKill);
3174         emitSubs_rs(MVT::i64, AArch64::XZR, /*IsKill=*/true, MulReg,
3175                     /*IsKill=*/false, AArch64_AM::LSR, 32,
3176                     /*WantResult=*/false);
3177         MulReg = fastEmitInst_extractsubreg(VT, MulReg, /*IsKill=*/true,
3178                                             AArch64::sub_32);
3179       } else {
3180         assert(VT == MVT::i64 && "Unexpected value type.");
3181         MulReg = emitMul_rr(VT, LHSReg, LHSIsKill, RHSReg, RHSIsKill);
3182         unsigned UMULHReg = fastEmit_rr(VT, VT, ISD::MULHU, LHSReg, LHSIsKill,
3183                                         RHSReg, RHSIsKill);
3184         emitSubs_rr(VT, AArch64::XZR, /*IsKill=*/true, UMULHReg,
3185                     /*IsKill=*/false, /*WantResult=*/false);
3186       }
3187       break;
3188     }
3189     }
3190
3191     if (MulReg) {
3192       ResultReg1 = createResultReg(TLI.getRegClassFor(VT));
3193       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3194               TII.get(TargetOpcode::COPY), ResultReg1).addReg(MulReg);
3195     }
3196
3197     ResultReg2 = fastEmitInst_rri(AArch64::CSINCWr, &AArch64::GPR32RegClass,
3198                                   AArch64::WZR, /*IsKill=*/true, AArch64::WZR,
3199                                   /*IsKill=*/true, getInvertedCondCode(CC));
3200     assert((ResultReg1 + 1) == ResultReg2 &&
3201            "Nonconsecutive result registers.");
3202     updateValueMap(II, ResultReg1, 2);
3203     return true;
3204   }
3205   }
3206   return false;
3207 }
3208
3209 bool AArch64FastISel::selectRet(const Instruction *I) {
3210   const ReturnInst *Ret = cast<ReturnInst>(I);
3211   const Function &F = *I->getParent()->getParent();
3212
3213   if (!FuncInfo.CanLowerReturn)
3214     return false;
3215
3216   if (F.isVarArg())
3217     return false;
3218
3219   // Build a list of return value registers.
3220   SmallVector<unsigned, 4> RetRegs;
3221
3222   if (Ret->getNumOperands() > 0) {
3223     CallingConv::ID CC = F.getCallingConv();
3224     SmallVector<ISD::OutputArg, 4> Outs;
3225     GetReturnInfo(F.getReturnType(), F.getAttributes(), Outs, TLI);
3226
3227     // Analyze operands of the call, assigning locations to each operand.
3228     SmallVector<CCValAssign, 16> ValLocs;
3229     CCState CCInfo(CC, F.isVarArg(), *FuncInfo.MF, ValLocs, I->getContext());
3230     CCAssignFn *RetCC = CC == CallingConv::WebKit_JS ? RetCC_AArch64_WebKit_JS
3231                                                      : RetCC_AArch64_AAPCS;
3232     CCInfo.AnalyzeReturn(Outs, RetCC);
3233
3234     // Only handle a single return value for now.
3235     if (ValLocs.size() != 1)
3236       return false;
3237
3238     CCValAssign &VA = ValLocs[0];
3239     const Value *RV = Ret->getOperand(0);
3240
3241     // Don't bother handling odd stuff for now.
3242     if ((VA.getLocInfo() != CCValAssign::Full) &&
3243         (VA.getLocInfo() != CCValAssign::BCvt))
3244       return false;
3245
3246     // Only handle register returns for now.
3247     if (!VA.isRegLoc())
3248       return false;
3249
3250     unsigned Reg = getRegForValue(RV);
3251     if (Reg == 0)
3252       return false;
3253
3254     unsigned SrcReg = Reg + VA.getValNo();
3255     unsigned DestReg = VA.getLocReg();
3256     // Avoid a cross-class copy. This is very unlikely.
3257     if (!MRI.getRegClass(SrcReg)->contains(DestReg))
3258       return false;
3259
3260     EVT RVEVT = TLI.getValueType(RV->getType());
3261     if (!RVEVT.isSimple())
3262       return false;
3263
3264     // Vectors (of > 1 lane) in big endian need tricky handling.
3265     if (RVEVT.isVector() && RVEVT.getVectorNumElements() > 1 &&
3266         !Subtarget->isLittleEndian())
3267       return false;
3268
3269     MVT RVVT = RVEVT.getSimpleVT();
3270     if (RVVT == MVT::f128)
3271       return false;
3272
3273     MVT DestVT = VA.getValVT();
3274     // Special handling for extended integers.
3275     if (RVVT != DestVT) {
3276       if (RVVT != MVT::i1 && RVVT != MVT::i8 && RVVT != MVT::i16)
3277         return false;
3278
3279       if (!Outs[0].Flags.isZExt() && !Outs[0].Flags.isSExt())
3280         return false;
3281
3282       bool IsZExt = Outs[0].Flags.isZExt();
3283       SrcReg = emitIntExt(RVVT, SrcReg, DestVT, IsZExt);
3284       if (SrcReg == 0)
3285         return false;
3286     }
3287
3288     // Make the copy.
3289     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3290             TII.get(TargetOpcode::COPY), DestReg).addReg(SrcReg);
3291
3292     // Add register to return instruction.
3293     RetRegs.push_back(VA.getLocReg());
3294   }
3295
3296   MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3297                                     TII.get(AArch64::RET_ReallyLR));
3298   for (unsigned i = 0, e = RetRegs.size(); i != e; ++i)
3299     MIB.addReg(RetRegs[i], RegState::Implicit);
3300   return true;
3301 }
3302
3303 bool AArch64FastISel::selectTrunc(const Instruction *I) {
3304   Type *DestTy = I->getType();
3305   Value *Op = I->getOperand(0);
3306   Type *SrcTy = Op->getType();
3307
3308   EVT SrcEVT = TLI.getValueType(SrcTy, true);
3309   EVT DestEVT = TLI.getValueType(DestTy, true);
3310   if (!SrcEVT.isSimple())
3311     return false;
3312   if (!DestEVT.isSimple())
3313     return false;
3314
3315   MVT SrcVT = SrcEVT.getSimpleVT();
3316   MVT DestVT = DestEVT.getSimpleVT();
3317
3318   if (SrcVT != MVT::i64 && SrcVT != MVT::i32 && SrcVT != MVT::i16 &&
3319       SrcVT != MVT::i8)
3320     return false;
3321   if (DestVT != MVT::i32 && DestVT != MVT::i16 && DestVT != MVT::i8 &&
3322       DestVT != MVT::i1)
3323     return false;
3324
3325   unsigned SrcReg = getRegForValue(Op);
3326   if (!SrcReg)
3327     return false;
3328   bool SrcIsKill = hasTrivialKill(Op);
3329
3330   // If we're truncating from i64 to a smaller non-legal type then generate an
3331   // AND. Otherwise, we know the high bits are undefined and a truncate only
3332   // generate a COPY. We cannot mark the source register also as result
3333   // register, because this can incorrectly transfer the kill flag onto the
3334   // source register.
3335   unsigned ResultReg;
3336   if (SrcVT == MVT::i64) {
3337     uint64_t Mask = 0;
3338     switch (DestVT.SimpleTy) {
3339     default:
3340       // Trunc i64 to i32 is handled by the target-independent fast-isel.
3341       return false;
3342     case MVT::i1:
3343       Mask = 0x1;
3344       break;
3345     case MVT::i8:
3346       Mask = 0xff;
3347       break;
3348     case MVT::i16:
3349       Mask = 0xffff;
3350       break;
3351     }
3352     // Issue an extract_subreg to get the lower 32-bits.
3353     unsigned Reg32 = fastEmitInst_extractsubreg(MVT::i32, SrcReg, SrcIsKill,
3354                                                 AArch64::sub_32);
3355     // Create the AND instruction which performs the actual truncation.
3356     ResultReg = emitAnd_ri(MVT::i32, Reg32, /*IsKill=*/true, Mask);
3357     assert(ResultReg && "Unexpected AND instruction emission failure.");
3358   } else {
3359     ResultReg = createResultReg(&AArch64::GPR32RegClass);
3360     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3361             TII.get(TargetOpcode::COPY), ResultReg)
3362         .addReg(SrcReg, getKillRegState(SrcIsKill));
3363   }
3364
3365   updateValueMap(I, ResultReg);
3366   return true;
3367 }
3368
3369 unsigned AArch64FastISel::emiti1Ext(unsigned SrcReg, MVT DestVT, bool IsZExt) {
3370   assert((DestVT == MVT::i8 || DestVT == MVT::i16 || DestVT == MVT::i32 ||
3371           DestVT == MVT::i64) &&
3372          "Unexpected value type.");
3373   // Handle i8 and i16 as i32.
3374   if (DestVT == MVT::i8 || DestVT == MVT::i16)
3375     DestVT = MVT::i32;
3376
3377   if (IsZExt) {
3378     unsigned ResultReg = emitAnd_ri(MVT::i32, SrcReg, /*TODO:IsKill=*/false, 1);
3379     assert(ResultReg && "Unexpected AND instruction emission failure.");
3380     if (DestVT == MVT::i64) {
3381       // We're ZExt i1 to i64.  The ANDWri Wd, Ws, #1 implicitly clears the
3382       // upper 32 bits.  Emit a SUBREG_TO_REG to extend from Wd to Xd.
3383       unsigned Reg64 = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
3384       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3385               TII.get(AArch64::SUBREG_TO_REG), Reg64)
3386           .addImm(0)
3387           .addReg(ResultReg)
3388           .addImm(AArch64::sub_32);
3389       ResultReg = Reg64;
3390     }
3391     return ResultReg;
3392   } else {
3393     if (DestVT == MVT::i64) {
3394       // FIXME: We're SExt i1 to i64.
3395       return 0;
3396     }
3397     return fastEmitInst_rii(AArch64::SBFMWri, &AArch64::GPR32RegClass, SrcReg,
3398                             /*TODO:IsKill=*/false, 0, 0);
3399   }
3400 }
3401
3402 unsigned AArch64FastISel::emitMul_rr(MVT RetVT, unsigned Op0, bool Op0IsKill,
3403                                       unsigned Op1, bool Op1IsKill) {
3404   unsigned Opc, ZReg;
3405   switch (RetVT.SimpleTy) {
3406   default: return 0;
3407   case MVT::i8:
3408   case MVT::i16:
3409   case MVT::i32:
3410     RetVT = MVT::i32;
3411     Opc = AArch64::MADDWrrr; ZReg = AArch64::WZR; break;
3412   case MVT::i64:
3413     Opc = AArch64::MADDXrrr; ZReg = AArch64::XZR; break;
3414   }
3415
3416   const TargetRegisterClass *RC =
3417       (RetVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
3418   return fastEmitInst_rrr(Opc, RC, Op0, Op0IsKill, Op1, Op1IsKill,
3419                           /*IsKill=*/ZReg, true);
3420 }
3421
3422 unsigned AArch64FastISel::emitSMULL_rr(MVT RetVT, unsigned Op0, bool Op0IsKill,
3423                                         unsigned Op1, bool Op1IsKill) {
3424   if (RetVT != MVT::i64)
3425     return 0;
3426
3427   return fastEmitInst_rrr(AArch64::SMADDLrrr, &AArch64::GPR64RegClass,
3428                           Op0, Op0IsKill, Op1, Op1IsKill,
3429                           AArch64::XZR, /*IsKill=*/true);
3430 }
3431
3432 unsigned AArch64FastISel::emitUMULL_rr(MVT RetVT, unsigned Op0, bool Op0IsKill,
3433                                         unsigned Op1, bool Op1IsKill) {
3434   if (RetVT != MVT::i64)
3435     return 0;
3436
3437   return fastEmitInst_rrr(AArch64::UMADDLrrr, &AArch64::GPR64RegClass,
3438                           Op0, Op0IsKill, Op1, Op1IsKill,
3439                           AArch64::XZR, /*IsKill=*/true);
3440 }
3441
3442 unsigned AArch64FastISel::emitLSL_rr(MVT RetVT, unsigned Op0Reg, bool Op0IsKill,
3443                                      unsigned Op1Reg, bool Op1IsKill) {
3444   unsigned Opc = 0;
3445   bool NeedTrunc = false;
3446   uint64_t Mask = 0;
3447   switch (RetVT.SimpleTy) {
3448   default: return 0;
3449   case MVT::i8:  Opc = AArch64::LSLVWr; NeedTrunc = true; Mask = 0xff;   break;
3450   case MVT::i16: Opc = AArch64::LSLVWr; NeedTrunc = true; Mask = 0xffff; break;
3451   case MVT::i32: Opc = AArch64::LSLVWr;                                  break;
3452   case MVT::i64: Opc = AArch64::LSLVXr;                                  break;
3453   }
3454
3455   const TargetRegisterClass *RC =
3456       (RetVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
3457   if (NeedTrunc) {
3458     Op1Reg = emitAnd_ri(MVT::i32, Op1Reg, Op1IsKill, Mask);
3459     Op1IsKill = true;
3460   }
3461   unsigned ResultReg = fastEmitInst_rr(Opc, RC, Op0Reg, Op0IsKill, Op1Reg,
3462                                        Op1IsKill);
3463   if (NeedTrunc)
3464     ResultReg = emitAnd_ri(MVT::i32, ResultReg, /*IsKill=*/true, Mask);
3465   return ResultReg;
3466 }
3467
3468 unsigned AArch64FastISel::emitLSL_ri(MVT RetVT, MVT SrcVT, unsigned Op0,
3469                                      bool Op0IsKill, uint64_t Shift,
3470                                      bool IsZext) {
3471   assert(RetVT.SimpleTy >= SrcVT.SimpleTy &&
3472          "Unexpected source/return type pair.");
3473   assert((SrcVT == MVT::i8 || SrcVT == MVT::i16 || SrcVT == MVT::i32 ||
3474           SrcVT == MVT::i64) && "Unexpected source value type.");
3475   assert((RetVT == MVT::i8 || RetVT == MVT::i16 || RetVT == MVT::i32 ||
3476           RetVT == MVT::i64) && "Unexpected return value type.");
3477
3478   bool Is64Bit = (RetVT == MVT::i64);
3479   unsigned RegSize = Is64Bit ? 64 : 32;
3480   unsigned DstBits = RetVT.getSizeInBits();
3481   unsigned SrcBits = SrcVT.getSizeInBits();
3482
3483   // Don't deal with undefined shifts.
3484   if (Shift >= DstBits)
3485     return 0;
3486
3487   // For immediate shifts we can fold the zero-/sign-extension into the shift.
3488   // {S|U}BFM Wd, Wn, #r, #s
3489   // Wd<32+s-r,32-r> = Wn<s:0> when r > s
3490
3491   // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
3492   // %2 = shl i16 %1, 4
3493   // Wd<32+7-28,32-28> = Wn<7:0> <- clamp s to 7
3494   // 0b1111_1111_1111_1111__1111_1010_1010_0000 sext
3495   // 0b0000_0000_0000_0000__0000_0101_0101_0000 sext | zext
3496   // 0b0000_0000_0000_0000__0000_1010_1010_0000 zext
3497
3498   // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
3499   // %2 = shl i16 %1, 8
3500   // Wd<32+7-24,32-24> = Wn<7:0>
3501   // 0b1111_1111_1111_1111__1010_1010_0000_0000 sext
3502   // 0b0000_0000_0000_0000__0101_0101_0000_0000 sext | zext
3503   // 0b0000_0000_0000_0000__1010_1010_0000_0000 zext
3504
3505   // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
3506   // %2 = shl i16 %1, 12
3507   // Wd<32+3-20,32-20> = Wn<3:0>
3508   // 0b1111_1111_1111_1111__1010_0000_0000_0000 sext
3509   // 0b0000_0000_0000_0000__0101_0000_0000_0000 sext | zext
3510   // 0b0000_0000_0000_0000__1010_0000_0000_0000 zext
3511
3512   unsigned ImmR = RegSize - Shift;
3513   // Limit the width to the length of the source type.
3514   unsigned ImmS = std::min<unsigned>(SrcBits - 1, DstBits - 1 - Shift);
3515   static const unsigned OpcTable[2][2] = {
3516     {AArch64::SBFMWri, AArch64::SBFMXri},
3517     {AArch64::UBFMWri, AArch64::UBFMXri}
3518   };
3519   unsigned Opc = OpcTable[IsZext][Is64Bit];
3520   const TargetRegisterClass *RC =
3521       Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
3522   if (SrcVT.SimpleTy <= MVT::i32 && RetVT == MVT::i64) {
3523     unsigned TmpReg = MRI.createVirtualRegister(RC);
3524     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3525             TII.get(AArch64::SUBREG_TO_REG), TmpReg)
3526         .addImm(0)
3527         .addReg(Op0, getKillRegState(Op0IsKill))
3528         .addImm(AArch64::sub_32);
3529     Op0 = TmpReg;
3530     Op0IsKill = true;
3531   }
3532   return fastEmitInst_rii(Opc, RC, Op0, Op0IsKill, ImmR, ImmS);
3533 }
3534
3535 unsigned AArch64FastISel::emitLSR_rr(MVT RetVT, unsigned Op0Reg, bool Op0IsKill,
3536                                      unsigned Op1Reg, bool Op1IsKill) {
3537   unsigned Opc = 0;
3538   bool NeedTrunc = false;
3539   uint64_t Mask = 0;
3540   switch (RetVT.SimpleTy) {
3541   default: return 0;
3542   case MVT::i8:  Opc = AArch64::LSRVWr; NeedTrunc = true; Mask = 0xff;   break;
3543   case MVT::i16: Opc = AArch64::LSRVWr; NeedTrunc = true; Mask = 0xffff; break;
3544   case MVT::i32: Opc = AArch64::LSRVWr; break;
3545   case MVT::i64: Opc = AArch64::LSRVXr; break;
3546   }
3547
3548   const TargetRegisterClass *RC =
3549       (RetVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
3550   if (NeedTrunc) {
3551     Op0Reg = emitAnd_ri(MVT::i32, Op0Reg, Op0IsKill, Mask);
3552     Op1Reg = emitAnd_ri(MVT::i32, Op1Reg, Op1IsKill, Mask);
3553     Op0IsKill = Op1IsKill = true;
3554   }
3555   unsigned ResultReg = fastEmitInst_rr(Opc, RC, Op0Reg, Op0IsKill, Op1Reg,
3556                                        Op1IsKill);
3557   if (NeedTrunc)
3558     ResultReg = emitAnd_ri(MVT::i32, ResultReg, /*IsKill=*/true, Mask);
3559   return ResultReg;
3560 }
3561
3562 unsigned AArch64FastISel::emitLSR_ri(MVT RetVT, MVT SrcVT, unsigned Op0,
3563                                      bool Op0IsKill, uint64_t Shift,
3564                                      bool IsZExt) {
3565   assert(RetVT.SimpleTy >= SrcVT.SimpleTy &&
3566          "Unexpected source/return type pair.");
3567   assert((SrcVT == MVT::i8 || SrcVT == MVT::i16 || SrcVT == MVT::i32 ||
3568           SrcVT == MVT::i64) && "Unexpected source value type.");
3569   assert((RetVT == MVT::i8 || RetVT == MVT::i16 || RetVT == MVT::i32 ||
3570           RetVT == MVT::i64) && "Unexpected return value type.");
3571
3572   bool Is64Bit = (RetVT == MVT::i64);
3573   unsigned RegSize = Is64Bit ? 64 : 32;
3574   unsigned DstBits = RetVT.getSizeInBits();
3575   unsigned SrcBits = SrcVT.getSizeInBits();
3576
3577   // Don't deal with undefined shifts.
3578   if (Shift >= DstBits)
3579     return 0;
3580
3581   // For immediate shifts we can fold the zero-/sign-extension into the shift.
3582   // {S|U}BFM Wd, Wn, #r, #s
3583   // Wd<s-r:0> = Wn<s:r> when r <= s
3584
3585   // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
3586   // %2 = lshr i16 %1, 4
3587   // Wd<7-4:0> = Wn<7:4>
3588   // 0b0000_0000_0000_0000__0000_1111_1111_1010 sext
3589   // 0b0000_0000_0000_0000__0000_0000_0000_0101 sext | zext
3590   // 0b0000_0000_0000_0000__0000_0000_0000_1010 zext
3591
3592   // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
3593   // %2 = lshr i16 %1, 8
3594   // Wd<7-7,0> = Wn<7:7>
3595   // 0b0000_0000_0000_0000__0000_0000_1111_1111 sext
3596   // 0b0000_0000_0000_0000__0000_0000_0000_0000 sext
3597   // 0b0000_0000_0000_0000__0000_0000_0000_0000 zext
3598
3599   // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
3600   // %2 = lshr i16 %1, 12
3601   // Wd<7-7,0> = Wn<7:7> <- clamp r to 7
3602   // 0b0000_0000_0000_0000__0000_0000_0000_1111 sext
3603   // 0b0000_0000_0000_0000__0000_0000_0000_0000 sext
3604   // 0b0000_0000_0000_0000__0000_0000_0000_0000 zext
3605
3606   if (Shift >= SrcBits && IsZExt)
3607     return materializeInt(ConstantInt::get(*Context, APInt(RegSize, 0)), RetVT);
3608
3609   // It is not possible to fold a sign-extend into the LShr instruction. In this
3610   // case emit a sign-extend.
3611   if (!IsZExt) {
3612     Op0 = emitIntExt(SrcVT, Op0, RetVT, IsZExt);
3613     if (!Op0)
3614       return 0;
3615     Op0IsKill = true;
3616     SrcVT = RetVT;
3617     SrcBits = SrcVT.getSizeInBits();
3618     IsZExt = true;
3619   }
3620
3621   unsigned ImmR = std::min<unsigned>(SrcBits - 1, Shift);
3622   unsigned ImmS = SrcBits - 1;
3623   static const unsigned OpcTable[2][2] = {
3624     {AArch64::SBFMWri, AArch64::SBFMXri},
3625     {AArch64::UBFMWri, AArch64::UBFMXri}
3626   };
3627   unsigned Opc = OpcTable[IsZExt][Is64Bit];
3628   const TargetRegisterClass *RC =
3629       Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
3630   if (SrcVT.SimpleTy <= MVT::i32 && RetVT == MVT::i64) {
3631     unsigned TmpReg = MRI.createVirtualRegister(RC);
3632     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3633             TII.get(AArch64::SUBREG_TO_REG), TmpReg)
3634         .addImm(0)
3635         .addReg(Op0, getKillRegState(Op0IsKill))
3636         .addImm(AArch64::sub_32);
3637     Op0 = TmpReg;
3638     Op0IsKill = true;
3639   }
3640   return fastEmitInst_rii(Opc, RC, Op0, Op0IsKill, ImmR, ImmS);
3641 }
3642
3643 unsigned AArch64FastISel::emitASR_rr(MVT RetVT, unsigned Op0Reg, bool Op0IsKill,
3644                                      unsigned Op1Reg, bool Op1IsKill) {
3645   unsigned Opc = 0;
3646   bool NeedTrunc = false;
3647   uint64_t Mask = 0;
3648   switch (RetVT.SimpleTy) {
3649   default: return 0;
3650   case MVT::i8:  Opc = AArch64::ASRVWr; NeedTrunc = true; Mask = 0xff;   break;
3651   case MVT::i16: Opc = AArch64::ASRVWr; NeedTrunc = true; Mask = 0xffff; break;
3652   case MVT::i32: Opc = AArch64::ASRVWr;                                  break;
3653   case MVT::i64: Opc = AArch64::ASRVXr;                                  break;
3654   }
3655
3656   const TargetRegisterClass *RC =
3657       (RetVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
3658   if (NeedTrunc) {
3659     Op0Reg = emitIntExt(RetVT, Op0Reg, MVT::i32, /*IsZExt=*/false);
3660     Op1Reg = emitAnd_ri(MVT::i32, Op1Reg, Op1IsKill, Mask);
3661     Op0IsKill = Op1IsKill = true;
3662   }
3663   unsigned ResultReg = fastEmitInst_rr(Opc, RC, Op0Reg, Op0IsKill, Op1Reg,
3664                                        Op1IsKill);
3665   if (NeedTrunc)
3666     ResultReg = emitAnd_ri(MVT::i32, ResultReg, /*IsKill=*/true, Mask);
3667   return ResultReg;
3668 }
3669
3670 unsigned AArch64FastISel::emitASR_ri(MVT RetVT, MVT SrcVT, unsigned Op0,
3671                                      bool Op0IsKill, uint64_t Shift,
3672                                      bool IsZExt) {
3673   assert(RetVT.SimpleTy >= SrcVT.SimpleTy &&
3674          "Unexpected source/return type pair.");
3675   assert((SrcVT == MVT::i8 || SrcVT == MVT::i16 || SrcVT == MVT::i32 ||
3676           SrcVT == MVT::i64) && "Unexpected source value type.");
3677   assert((RetVT == MVT::i8 || RetVT == MVT::i16 || RetVT == MVT::i32 ||
3678           RetVT == MVT::i64) && "Unexpected return value type.");
3679
3680   bool Is64Bit = (RetVT == MVT::i64);
3681   unsigned RegSize = Is64Bit ? 64 : 32;
3682   unsigned DstBits = RetVT.getSizeInBits();
3683   unsigned SrcBits = SrcVT.getSizeInBits();
3684
3685   // Don't deal with undefined shifts.
3686   if (Shift >= DstBits)
3687     return 0;
3688
3689   // For immediate shifts we can fold the zero-/sign-extension into the shift.
3690   // {S|U}BFM Wd, Wn, #r, #s
3691   // Wd<s-r:0> = Wn<s:r> when r <= s
3692
3693   // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
3694   // %2 = ashr i16 %1, 4
3695   // Wd<7-4:0> = Wn<7:4>
3696   // 0b1111_1111_1111_1111__1111_1111_1111_1010 sext
3697   // 0b0000_0000_0000_0000__0000_0000_0000_0101 sext | zext
3698   // 0b0000_0000_0000_0000__0000_0000_0000_1010 zext
3699
3700   // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
3701   // %2 = ashr i16 %1, 8
3702   // Wd<7-7,0> = Wn<7:7>
3703   // 0b1111_1111_1111_1111__1111_1111_1111_1111 sext
3704   // 0b0000_0000_0000_0000__0000_0000_0000_0000 sext
3705   // 0b0000_0000_0000_0000__0000_0000_0000_0000 zext
3706
3707   // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
3708   // %2 = ashr i16 %1, 12
3709   // Wd<7-7,0> = Wn<7:7> <- clamp r to 7
3710   // 0b1111_1111_1111_1111__1111_1111_1111_1111 sext
3711   // 0b0000_0000_0000_0000__0000_0000_0000_0000 sext
3712   // 0b0000_0000_0000_0000__0000_0000_0000_0000 zext
3713
3714   if (Shift >= SrcBits && IsZExt)
3715     return materializeInt(ConstantInt::get(*Context, APInt(RegSize, 0)), RetVT);
3716
3717   unsigned ImmR = std::min<unsigned>(SrcBits - 1, Shift);
3718   unsigned ImmS = SrcBits - 1;
3719   static const unsigned OpcTable[2][2] = {
3720     {AArch64::SBFMWri, AArch64::SBFMXri},
3721     {AArch64::UBFMWri, AArch64::UBFMXri}
3722   };
3723   unsigned Opc = OpcTable[IsZExt][Is64Bit];
3724   const TargetRegisterClass *RC =
3725       Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
3726   if (SrcVT.SimpleTy <= MVT::i32 && RetVT == MVT::i64) {
3727     unsigned TmpReg = MRI.createVirtualRegister(RC);
3728     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3729             TII.get(AArch64::SUBREG_TO_REG), TmpReg)
3730         .addImm(0)
3731         .addReg(Op0, getKillRegState(Op0IsKill))
3732         .addImm(AArch64::sub_32);
3733     Op0 = TmpReg;
3734     Op0IsKill = true;
3735   }
3736   return fastEmitInst_rii(Opc, RC, Op0, Op0IsKill, ImmR, ImmS);
3737 }
3738
3739 unsigned AArch64FastISel::emitIntExt(MVT SrcVT, unsigned SrcReg, MVT DestVT,
3740                                      bool IsZExt) {
3741   assert(DestVT != MVT::i1 && "ZeroExt/SignExt an i1?");
3742
3743   // FastISel does not have plumbing to deal with extensions where the SrcVT or
3744   // DestVT are odd things, so test to make sure that they are both types we can
3745   // handle (i1/i8/i16/i32 for SrcVT and i8/i16/i32/i64 for DestVT), otherwise
3746   // bail out to SelectionDAG.
3747   if (((DestVT != MVT::i8) && (DestVT != MVT::i16) &&
3748        (DestVT != MVT::i32) && (DestVT != MVT::i64)) ||
3749       ((SrcVT !=  MVT::i1) && (SrcVT !=  MVT::i8) &&
3750        (SrcVT !=  MVT::i16) && (SrcVT !=  MVT::i32)))
3751     return 0;
3752
3753   unsigned Opc;
3754   unsigned Imm = 0;
3755
3756   switch (SrcVT.SimpleTy) {
3757   default:
3758     return 0;
3759   case MVT::i1:
3760     return emiti1Ext(SrcReg, DestVT, IsZExt);
3761   case MVT::i8:
3762     if (DestVT == MVT::i64)
3763       Opc = IsZExt ? AArch64::UBFMXri : AArch64::SBFMXri;
3764     else
3765       Opc = IsZExt ? AArch64::UBFMWri : AArch64::SBFMWri;
3766     Imm = 7;
3767     break;
3768   case MVT::i16:
3769     if (DestVT == MVT::i64)
3770       Opc = IsZExt ? AArch64::UBFMXri : AArch64::SBFMXri;
3771     else
3772       Opc = IsZExt ? AArch64::UBFMWri : AArch64::SBFMWri;
3773     Imm = 15;
3774     break;
3775   case MVT::i32:
3776     assert(DestVT == MVT::i64 && "IntExt i32 to i32?!?");
3777     Opc = IsZExt ? AArch64::UBFMXri : AArch64::SBFMXri;
3778     Imm = 31;
3779     break;
3780   }
3781
3782   // Handle i8 and i16 as i32.
3783   if (DestVT == MVT::i8 || DestVT == MVT::i16)
3784     DestVT = MVT::i32;
3785   else if (DestVT == MVT::i64) {
3786     unsigned Src64 = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
3787     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3788             TII.get(AArch64::SUBREG_TO_REG), Src64)
3789         .addImm(0)
3790         .addReg(SrcReg)
3791         .addImm(AArch64::sub_32);
3792     SrcReg = Src64;
3793   }
3794
3795   const TargetRegisterClass *RC =
3796       (DestVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
3797   return fastEmitInst_rii(Opc, RC, SrcReg, /*TODO:IsKill=*/false, 0, Imm);
3798 }
3799
3800 bool AArch64FastISel::selectIntExt(const Instruction *I) {
3801   // On ARM, in general, integer casts don't involve legal types; this code
3802   // handles promotable integers.  The high bits for a type smaller than
3803   // the register size are assumed to be undefined.
3804   Type *DestTy = I->getType();
3805   Value *Src = I->getOperand(0);
3806   Type *SrcTy = Src->getType();
3807
3808   unsigned SrcReg = getRegForValue(Src);
3809   if (!SrcReg)
3810     return false;
3811
3812   EVT SrcEVT = TLI.getValueType(SrcTy, true);
3813   EVT DestEVT = TLI.getValueType(DestTy, true);
3814   if (!SrcEVT.isSimple())
3815     return false;
3816   if (!DestEVT.isSimple())
3817     return false;
3818
3819   MVT SrcVT = SrcEVT.getSimpleVT();
3820   MVT DestVT = DestEVT.getSimpleVT();
3821   unsigned ResultReg = 0;
3822
3823   bool IsZExt = isa<ZExtInst>(I);
3824   // Check if it is an argument and if it is already zero/sign-extended.
3825   if (const auto *Arg = dyn_cast<Argument>(Src)) {
3826     if ((IsZExt && Arg->hasZExtAttr()) || (!IsZExt && Arg->hasSExtAttr())) {
3827       if (DestVT == MVT::i64) {
3828         ResultReg = createResultReg(TLI.getRegClassFor(DestVT));
3829         BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3830                 TII.get(AArch64::SUBREG_TO_REG), ResultReg)
3831           .addImm(0)
3832           .addReg(SrcReg)
3833           .addImm(AArch64::sub_32);
3834       } else
3835         ResultReg = SrcReg;
3836     }
3837   }
3838
3839   if (!ResultReg)
3840     ResultReg = emitIntExt(SrcVT, SrcReg, DestVT, IsZExt);
3841
3842   if (!ResultReg)
3843     return false;
3844
3845   updateValueMap(I, ResultReg);
3846   return true;
3847 }
3848
3849 bool AArch64FastISel::selectRem(const Instruction *I, unsigned ISDOpcode) {
3850   EVT DestEVT = TLI.getValueType(I->getType(), true);
3851   if (!DestEVT.isSimple())
3852     return false;
3853
3854   MVT DestVT = DestEVT.getSimpleVT();
3855   if (DestVT != MVT::i64 && DestVT != MVT::i32)
3856     return false;
3857
3858   unsigned DivOpc;
3859   bool Is64bit = (DestVT == MVT::i64);
3860   switch (ISDOpcode) {
3861   default:
3862     return false;
3863   case ISD::SREM:
3864     DivOpc = Is64bit ? AArch64::SDIVXr : AArch64::SDIVWr;
3865     break;
3866   case ISD::UREM:
3867     DivOpc = Is64bit ? AArch64::UDIVXr : AArch64::UDIVWr;
3868     break;
3869   }
3870   unsigned MSubOpc = Is64bit ? AArch64::MSUBXrrr : AArch64::MSUBWrrr;
3871   unsigned Src0Reg = getRegForValue(I->getOperand(0));
3872   if (!Src0Reg)
3873     return false;
3874   bool Src0IsKill = hasTrivialKill(I->getOperand(0));
3875
3876   unsigned Src1Reg = getRegForValue(I->getOperand(1));
3877   if (!Src1Reg)
3878     return false;
3879   bool Src1IsKill = hasTrivialKill(I->getOperand(1));
3880
3881   const TargetRegisterClass *RC =
3882       (DestVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
3883   unsigned QuotReg = fastEmitInst_rr(DivOpc, RC, Src0Reg, /*IsKill=*/false,
3884                                      Src1Reg, /*IsKill=*/false);
3885   assert(QuotReg && "Unexpected DIV instruction emission failure.");
3886   // The remainder is computed as numerator - (quotient * denominator) using the
3887   // MSUB instruction.
3888   unsigned ResultReg = fastEmitInst_rrr(MSubOpc, RC, QuotReg, /*IsKill=*/true,
3889                                         Src1Reg, Src1IsKill, Src0Reg,
3890                                         Src0IsKill);
3891   updateValueMap(I, ResultReg);
3892   return true;
3893 }
3894
3895 bool AArch64FastISel::selectMul(const Instruction *I) {
3896   MVT VT;
3897   if (!isTypeSupported(I->getType(), VT, /*IsVectorAllowed=*/true))
3898     return false;
3899
3900   if (VT.isVector())
3901     return selectBinaryOp(I, ISD::MUL);
3902
3903   const Value *Src0 = I->getOperand(0);
3904   const Value *Src1 = I->getOperand(1);
3905   if (const auto *C = dyn_cast<ConstantInt>(Src0))
3906     if (C->getValue().isPowerOf2())
3907       std::swap(Src0, Src1);
3908
3909   // Try to simplify to a shift instruction.
3910   if (const auto *C = dyn_cast<ConstantInt>(Src1))
3911     if (C->getValue().isPowerOf2()) {
3912       uint64_t ShiftVal = C->getValue().logBase2();
3913       MVT SrcVT = VT;
3914       bool IsZExt = true;
3915       if (const auto *ZExt = dyn_cast<ZExtInst>(Src0)) {
3916         MVT VT;
3917         if (isValueAvailable(ZExt) && isTypeSupported(ZExt->getSrcTy(), VT)) {
3918           SrcVT = VT;
3919           IsZExt = true;
3920           Src0 = ZExt->getOperand(0);
3921         }
3922       } else if (const auto *SExt = dyn_cast<SExtInst>(Src0)) {
3923         MVT VT;
3924         if (isValueAvailable(SExt) && isTypeSupported(SExt->getSrcTy(), VT)) {
3925           SrcVT = VT;
3926           IsZExt = false;
3927           Src0 = SExt->getOperand(0);
3928         }
3929       }
3930
3931       unsigned Src0Reg = getRegForValue(Src0);
3932       if (!Src0Reg)
3933         return false;
3934       bool Src0IsKill = hasTrivialKill(Src0);
3935
3936       unsigned ResultReg =
3937           emitLSL_ri(VT, SrcVT, Src0Reg, Src0IsKill, ShiftVal, IsZExt);
3938
3939       if (ResultReg) {
3940         updateValueMap(I, ResultReg);
3941         return true;
3942       }
3943     }
3944
3945   unsigned Src0Reg = getRegForValue(I->getOperand(0));
3946   if (!Src0Reg)
3947     return false;
3948   bool Src0IsKill = hasTrivialKill(I->getOperand(0));
3949
3950   unsigned Src1Reg = getRegForValue(I->getOperand(1));
3951   if (!Src1Reg)
3952     return false;
3953   bool Src1IsKill = hasTrivialKill(I->getOperand(1));
3954
3955   unsigned ResultReg = emitMul_rr(VT, Src0Reg, Src0IsKill, Src1Reg, Src1IsKill);
3956
3957   if (!ResultReg)
3958     return false;
3959
3960   updateValueMap(I, ResultReg);
3961   return true;
3962 }
3963
3964 bool AArch64FastISel::selectShift(const Instruction *I) {
3965   MVT RetVT;
3966   if (!isTypeSupported(I->getType(), RetVT, /*IsVectorAllowed=*/true))
3967     return false;
3968
3969   if (RetVT.isVector())
3970     return selectOperator(I, I->getOpcode());
3971
3972   if (const auto *C = dyn_cast<ConstantInt>(I->getOperand(1))) {
3973     unsigned ResultReg = 0;
3974     uint64_t ShiftVal = C->getZExtValue();
3975     MVT SrcVT = RetVT;
3976     bool IsZExt = (I->getOpcode() == Instruction::AShr) ? false : true;
3977     const Value *Op0 = I->getOperand(0);
3978     if (const auto *ZExt = dyn_cast<ZExtInst>(Op0)) {
3979       MVT TmpVT;
3980       if (isValueAvailable(ZExt) && isTypeSupported(ZExt->getSrcTy(), TmpVT)) {
3981         SrcVT = TmpVT;
3982         IsZExt = true;
3983         Op0 = ZExt->getOperand(0);
3984       }
3985     } else if (const auto *SExt = dyn_cast<SExtInst>(Op0)) {
3986       MVT TmpVT;
3987       if (isValueAvailable(SExt) && isTypeSupported(SExt->getSrcTy(), TmpVT)) {
3988         SrcVT = TmpVT;
3989         IsZExt = false;
3990         Op0 = SExt->getOperand(0);
3991       }
3992     }
3993
3994     unsigned Op0Reg = getRegForValue(Op0);
3995     if (!Op0Reg)
3996       return false;
3997     bool Op0IsKill = hasTrivialKill(Op0);
3998
3999     switch (I->getOpcode()) {
4000     default: llvm_unreachable("Unexpected instruction.");
4001     case Instruction::Shl:
4002       ResultReg = emitLSL_ri(RetVT, SrcVT, Op0Reg, Op0IsKill, ShiftVal, IsZExt);
4003       break;
4004     case Instruction::AShr:
4005       ResultReg = emitASR_ri(RetVT, SrcVT, Op0Reg, Op0IsKill, ShiftVal, IsZExt);
4006       break;
4007     case Instruction::LShr:
4008       ResultReg = emitLSR_ri(RetVT, SrcVT, Op0Reg, Op0IsKill, ShiftVal, IsZExt);
4009       break;
4010     }
4011     if (!ResultReg)
4012       return false;
4013
4014     updateValueMap(I, ResultReg);
4015     return true;
4016   }
4017
4018   unsigned Op0Reg = getRegForValue(I->getOperand(0));
4019   if (!Op0Reg)
4020     return false;
4021   bool Op0IsKill = hasTrivialKill(I->getOperand(0));
4022
4023   unsigned Op1Reg = getRegForValue(I->getOperand(1));
4024   if (!Op1Reg)
4025     return false;
4026   bool Op1IsKill = hasTrivialKill(I->getOperand(1));
4027
4028   unsigned ResultReg = 0;
4029   switch (I->getOpcode()) {
4030   default: llvm_unreachable("Unexpected instruction.");
4031   case Instruction::Shl:
4032     ResultReg = emitLSL_rr(RetVT, Op0Reg, Op0IsKill, Op1Reg, Op1IsKill);
4033     break;
4034   case Instruction::AShr:
4035     ResultReg = emitASR_rr(RetVT, Op0Reg, Op0IsKill, Op1Reg, Op1IsKill);
4036     break;
4037   case Instruction::LShr:
4038     ResultReg = emitLSR_rr(RetVT, Op0Reg, Op0IsKill, Op1Reg, Op1IsKill);
4039     break;
4040   }
4041
4042   if (!ResultReg)
4043     return false;
4044
4045   updateValueMap(I, ResultReg);
4046   return true;
4047 }
4048
4049 bool AArch64FastISel::selectBitCast(const Instruction *I) {
4050   MVT RetVT, SrcVT;
4051
4052   if (!isTypeLegal(I->getOperand(0)->getType(), SrcVT))
4053     return false;
4054   if (!isTypeLegal(I->getType(), RetVT))
4055     return false;
4056
4057   unsigned Opc;
4058   if (RetVT == MVT::f32 && SrcVT == MVT::i32)
4059     Opc = AArch64::FMOVWSr;
4060   else if (RetVT == MVT::f64 && SrcVT == MVT::i64)
4061     Opc = AArch64::FMOVXDr;
4062   else if (RetVT == MVT::i32 && SrcVT == MVT::f32)
4063     Opc = AArch64::FMOVSWr;
4064   else if (RetVT == MVT::i64 && SrcVT == MVT::f64)
4065     Opc = AArch64::FMOVDXr;
4066   else
4067     return false;
4068
4069   const TargetRegisterClass *RC = nullptr;
4070   switch (RetVT.SimpleTy) {
4071   default: llvm_unreachable("Unexpected value type.");
4072   case MVT::i32: RC = &AArch64::GPR32RegClass; break;
4073   case MVT::i64: RC = &AArch64::GPR64RegClass; break;
4074   case MVT::f32: RC = &AArch64::FPR32RegClass; break;
4075   case MVT::f64: RC = &AArch64::FPR64RegClass; break;
4076   }
4077   unsigned Op0Reg = getRegForValue(I->getOperand(0));
4078   if (!Op0Reg)
4079     return false;
4080   bool Op0IsKill = hasTrivialKill(I->getOperand(0));
4081   unsigned ResultReg = fastEmitInst_r(Opc, RC, Op0Reg, Op0IsKill);
4082
4083   if (!ResultReg)
4084     return false;
4085
4086   updateValueMap(I, ResultReg);
4087   return true;
4088 }
4089
4090 bool AArch64FastISel::selectFRem(const Instruction *I) {
4091   MVT RetVT;
4092   if (!isTypeLegal(I->getType(), RetVT))
4093     return false;
4094
4095   RTLIB::Libcall LC;
4096   switch (RetVT.SimpleTy) {
4097   default:
4098     return false;
4099   case MVT::f32:
4100     LC = RTLIB::REM_F32;
4101     break;
4102   case MVT::f64:
4103     LC = RTLIB::REM_F64;
4104     break;
4105   }
4106
4107   ArgListTy Args;
4108   Args.reserve(I->getNumOperands());
4109
4110   // Populate the argument list.
4111   for (auto &Arg : I->operands()) {
4112     ArgListEntry Entry;
4113     Entry.Val = Arg;
4114     Entry.Ty = Arg->getType();
4115     Args.push_back(Entry);
4116   }
4117
4118   CallLoweringInfo CLI;
4119   CLI.setCallee(TLI.getLibcallCallingConv(LC), I->getType(),
4120                 TLI.getLibcallName(LC), std::move(Args));
4121   if (!lowerCallTo(CLI))
4122     return false;
4123   updateValueMap(I, CLI.ResultReg);
4124   return true;
4125 }
4126
4127 bool AArch64FastISel::selectSDiv(const Instruction *I) {
4128   MVT VT;
4129   if (!isTypeLegal(I->getType(), VT))
4130     return false;
4131
4132   if (!isa<ConstantInt>(I->getOperand(1)))
4133     return selectBinaryOp(I, ISD::SDIV);
4134
4135   const APInt &C = cast<ConstantInt>(I->getOperand(1))->getValue();
4136   if ((VT != MVT::i32 && VT != MVT::i64) || !C ||
4137       !(C.isPowerOf2() || (-C).isPowerOf2()))
4138     return selectBinaryOp(I, ISD::SDIV);
4139
4140   unsigned Lg2 = C.countTrailingZeros();
4141   unsigned Src0Reg = getRegForValue(I->getOperand(0));
4142   if (!Src0Reg)
4143     return false;
4144   bool Src0IsKill = hasTrivialKill(I->getOperand(0));
4145
4146   if (cast<BinaryOperator>(I)->isExact()) {
4147     unsigned ResultReg = emitASR_ri(VT, VT, Src0Reg, Src0IsKill, Lg2);
4148     if (!ResultReg)
4149       return false;
4150     updateValueMap(I, ResultReg);
4151     return true;
4152   }
4153
4154   unsigned Pow2MinusOne = (1 << Lg2) - 1;
4155   unsigned AddReg = emitAddSub_ri(/*UseAdd=*/true, VT, Src0Reg,
4156                                   /*IsKill=*/false, Pow2MinusOne);
4157   if (!AddReg)
4158     return false;
4159
4160   // (Src0 < 0) ? Pow2 - 1 : 0;
4161   if (!emitICmp_ri(VT, Src0Reg, /*IsKill=*/false, 0))
4162     return false;
4163
4164   unsigned SelectOpc;
4165   const TargetRegisterClass *RC;
4166   if (VT == MVT::i64) {
4167     SelectOpc = AArch64::CSELXr;
4168     RC = &AArch64::GPR64RegClass;
4169   } else {
4170     SelectOpc = AArch64::CSELWr;
4171     RC = &AArch64::GPR32RegClass;
4172   }
4173   unsigned SelectReg =
4174       fastEmitInst_rri(SelectOpc, RC, AddReg, /*IsKill=*/true, Src0Reg,
4175                        Src0IsKill, AArch64CC::LT);
4176   if (!SelectReg)
4177     return false;
4178
4179   // Divide by Pow2 --> ashr. If we're dividing by a negative value we must also
4180   // negate the result.
4181   unsigned ZeroReg = (VT == MVT::i64) ? AArch64::XZR : AArch64::WZR;
4182   unsigned ResultReg;
4183   if (C.isNegative())
4184     ResultReg = emitAddSub_rs(/*UseAdd=*/false, VT, ZeroReg, /*IsKill=*/true,
4185                               SelectReg, /*IsKill=*/true, AArch64_AM::ASR, Lg2);
4186   else
4187     ResultReg = emitASR_ri(VT, VT, SelectReg, /*IsKill=*/true, Lg2);
4188
4189   if (!ResultReg)
4190     return false;
4191
4192   updateValueMap(I, ResultReg);
4193   return true;
4194 }
4195
4196 bool AArch64FastISel::fastSelectInstruction(const Instruction *I) {
4197   switch (I->getOpcode()) {
4198   default:
4199     break;
4200   case Instruction::Add:
4201   case Instruction::Sub:
4202     return selectAddSub(I);
4203   case Instruction::Mul:
4204     return selectMul(I);
4205   case Instruction::SDiv:
4206     return selectSDiv(I);
4207   case Instruction::SRem:
4208     if (!selectBinaryOp(I, ISD::SREM))
4209       return selectRem(I, ISD::SREM);
4210     return true;
4211   case Instruction::URem:
4212     if (!selectBinaryOp(I, ISD::UREM))
4213       return selectRem(I, ISD::UREM);
4214     return true;
4215   case Instruction::Shl:
4216   case Instruction::LShr:
4217   case Instruction::AShr:
4218     return selectShift(I);
4219   case Instruction::And:
4220   case Instruction::Or:
4221   case Instruction::Xor:
4222     return selectLogicalOp(I);
4223   case Instruction::Br:
4224     return selectBranch(I);
4225   case Instruction::IndirectBr:
4226     return selectIndirectBr(I);
4227   case Instruction::BitCast:
4228     if (!FastISel::selectBitCast(I))
4229       return selectBitCast(I);
4230     return true;
4231   case Instruction::FPToSI:
4232     if (!selectCast(I, ISD::FP_TO_SINT))
4233       return selectFPToInt(I, /*Signed=*/true);
4234     return true;
4235   case Instruction::FPToUI:
4236     return selectFPToInt(I, /*Signed=*/false);
4237   case Instruction::ZExt:
4238     if (!selectCast(I, ISD::ZERO_EXTEND))
4239       return selectIntExt(I);
4240     return true;
4241   case Instruction::SExt:
4242     if (!selectCast(I, ISD::SIGN_EXTEND))
4243       return selectIntExt(I);
4244     return true;
4245   case Instruction::Trunc:
4246     if (!selectCast(I, ISD::TRUNCATE))
4247       return selectTrunc(I);
4248     return true;
4249   case Instruction::FPExt:
4250     return selectFPExt(I);
4251   case Instruction::FPTrunc:
4252     return selectFPTrunc(I);
4253   case Instruction::SIToFP:
4254     if (!selectCast(I, ISD::SINT_TO_FP))
4255       return selectIntToFP(I, /*Signed=*/true);
4256     return true;
4257   case Instruction::UIToFP:
4258     return selectIntToFP(I, /*Signed=*/false);
4259   case Instruction::Load:
4260     return selectLoad(I);
4261   case Instruction::Store:
4262     return selectStore(I);
4263   case Instruction::FCmp:
4264   case Instruction::ICmp:
4265     return selectCmp(I);
4266   case Instruction::Select:
4267     return selectSelect(I);
4268   case Instruction::Ret:
4269     return selectRet(I);
4270   case Instruction::FRem:
4271     return selectFRem(I);
4272   }
4273
4274   // fall-back to target-independent instruction selection.
4275   return selectOperator(I, I->getOpcode());
4276   // Silence warnings.
4277   (void)&CC_AArch64_DarwinPCS_VarArg;
4278 }
4279
4280 namespace llvm {
4281 llvm::FastISel *AArch64::createFastISel(FunctionLoweringInfo &FuncInfo,
4282                                         const TargetLibraryInfo *LibInfo) {
4283   return new AArch64FastISel(FuncInfo, LibInfo);
4284 }
4285 }