lib/Target/AArch64/AArch64FastISel.cpp

   1 //===-- AArch6464FastISel.cpp - AArch64 FastISel implementation -----------===//
   2 //
   3 //                     The LLVM Compiler Infrastructure
   4 //
   5 // This file is distributed under the University of Illinois Open Source
   6 // License. See LICENSE.TXT for details.
   7 //
   8 //===----------------------------------------------------------------------===//
   9 //
  10 // This file defines the AArch64-specific support for the FastISel class. Some
  11 // of the target-specific code is generated by tablegen in the file
  12 // AArch64GenFastISel.inc, which is #included here.
  13 //
  14 //===----------------------------------------------------------------------===//
  15
  16 #include "AArch64.h"
  17 #include "AArch64Subtarget.h"
  18 #include "AArch64TargetMachine.h"
  19 #include "MCTargetDesc/AArch64AddressingModes.h"
  20 #include "llvm/Analysis/BranchProbabilityInfo.h"
  21 #include "llvm/CodeGen/CallingConvLower.h"
  22 #include "llvm/CodeGen/FastISel.h"
  23 #include "llvm/CodeGen/FunctionLoweringInfo.h"
  24 #include "llvm/CodeGen/MachineConstantPool.h"
  25 #include "llvm/CodeGen/MachineFrameInfo.h"
  26 #include "llvm/CodeGen/MachineInstrBuilder.h"
  27 #include "llvm/CodeGen/MachineRegisterInfo.h"
  28 #include "llvm/IR/CallingConv.h"
  29 #include "llvm/IR/DataLayout.h"
  30 #include "llvm/IR/DerivedTypes.h"
  31 #include "llvm/IR/Function.h"
  32 #include "llvm/IR/GetElementPtrTypeIterator.h"
  33 #include "llvm/IR/GlobalAlias.h"
  34 #include "llvm/IR/GlobalVariable.h"
  35 #include "llvm/IR/Instructions.h"
  36 #include "llvm/IR/IntrinsicInst.h"
  37 #include "llvm/IR/Operator.h"
  38 #include "llvm/Support/CommandLine.h"
  39 using namespace llvm;
  40
  41 namespace {
  42
  43 class AArch64FastISel final : public FastISel {
  44   class Address {
  45   public:
  46     typedef enum {
  47       RegBase,
  48       FrameIndexBase
  49     } BaseKind;
  50
  51   private:
  52     BaseKind Kind;
  53     AArch64_AM::ShiftExtendType ExtType;
  54     union {
  55       unsigned Reg;
  56       int FI;
  57     } Base;
  58     unsigned OffsetReg;
  59     unsigned Shift;
  60     int64_t Offset;
  61     const GlobalValue *GV;
  62
  63   public:
  64     Address() : Kind(RegBase), ExtType(AArch64_AM::InvalidShiftExtend),
  65       OffsetReg(0), Shift(0), Offset(0), GV(nullptr) { Base.Reg = 0; }
  66     void setKind(BaseKind K) { Kind = K; }
  67     BaseKind getKind() const { return Kind; }
  68     void setExtendType(AArch64_AM::ShiftExtendType E) { ExtType = E; }
  69     AArch64_AM::ShiftExtendType getExtendType() const { return ExtType; }
  70     bool isRegBase() const { return Kind == RegBase; }
  71     bool isFIBase() const { return Kind == FrameIndexBase; }
  72     void setReg(unsigned Reg) {
  73       assert(isRegBase() && "Invalid base register access!");
  74       Base.Reg = Reg;
  75     }
  76     unsigned getReg() const {
  77       assert(isRegBase() && "Invalid base register access!");
  78       return Base.Reg;
  79     }
  80     void setOffsetReg(unsigned Reg) {
  81       assert(isRegBase() && "Invalid offset register access!");
  82       OffsetReg = Reg;
  83     }
  84     unsigned getOffsetReg() const {
  85       assert(isRegBase() && "Invalid offset register access!");
  86       return OffsetReg;
  87     }
  88     void setFI(unsigned FI) {
  89       assert(isFIBase() && "Invalid base frame index  access!");
  90       Base.FI = FI;
  91     }
  92     unsigned getFI() const {
  93       assert(isFIBase() && "Invalid base frame index access!");
  94       return Base.FI;
  95     }
  96     void setOffset(int64_t O) { Offset = O; }
  97     int64_t getOffset() { return Offset; }
  98     void setShift(unsigned S) { Shift = S; }
  99     unsigned getShift() { return Shift; }
 100
 101     void setGlobalValue(const GlobalValue *G) { GV = G; }
 102     const GlobalValue *getGlobalValue() { return GV; }
 103   };
 104
 105   /// Subtarget - Keep a pointer to the AArch64Subtarget around so that we can
 106   /// make the right decision when generating code for different targets.
 107   const AArch64Subtarget *Subtarget;
 108   LLVMContext *Context;
 109
 110   bool fastLowerArguments() override;
 111   bool fastLowerCall(CallLoweringInfo &CLI) override;
 112   bool fastLowerIntrinsicCall(const IntrinsicInst *II) override;
 113
 114 private:
 115   // Selection routines.
 116   bool selectAddSub(const Instruction *I);
 117   bool selectLogicalOp(const Instruction *I);
 118   bool selectLoad(const Instruction *I);
 119   bool selectStore(const Instruction *I);
 120   bool selectBranch(const Instruction *I);
 121   bool selectIndirectBr(const Instruction *I);
 122   bool selectCmp(const Instruction *I);
 123   bool selectSelect(const Instruction *I);
 124   bool selectFPExt(const Instruction *I);
 125   bool selectFPTrunc(const Instruction *I);
 126   bool selectFPToInt(const Instruction *I, bool Signed);
 127   bool selectIntToFP(const Instruction *I, bool Signed);
 128   bool selectRem(const Instruction *I, unsigned ISDOpcode);
 129   bool selectRet(const Instruction *I);
 130   bool selectTrunc(const Instruction *I);
 131   bool selectIntExt(const Instruction *I);
 132   bool selectMul(const Instruction *I);
 133   bool selectShift(const Instruction *I);
 134   bool selectBitCast(const Instruction *I);
 135   bool selectFRem(const Instruction *I);
 136   bool selectSDiv(const Instruction *I);
 137
 138   // Utility helper routines.
 139   bool isTypeLegal(Type *Ty, MVT &VT);
 140   bool isTypeSupported(Type *Ty, MVT &VT, bool IsVectorAllowed = false);
 141   bool isValueAvailable(const Value *V) const;
 142   bool computeAddress(const Value *Obj, Address &Addr, Type *Ty = nullptr);
 143   bool computeCallAddress(const Value *V, Address &Addr);
 144   bool simplifyAddress(Address &Addr, MVT VT);
 145   void addLoadStoreOperands(Address &Addr, const MachineInstrBuilder &MIB,
 146                             unsigned Flags, unsigned ScaleFactor,
 147                             MachineMemOperand *MMO);
 148   bool isMemCpySmall(uint64_t Len, unsigned Alignment);
 149   bool tryEmitSmallMemCpy(Address Dest, Address Src, uint64_t Len,
 150                           unsigned Alignment);
 151   bool foldXALUIntrinsic(AArch64CC::CondCode &CC, const Instruction *I,
 152                          const Value *Cond);
 153
 154   // Emit helper routines.
 155   unsigned emitAddSub(bool UseAdd, MVT RetVT, const Value *LHS,
 156                       const Value *RHS, bool SetFlags = false,
 157                       bool WantResult = true,  bool IsZExt = false);
 158   unsigned emitAddSub_rr(bool UseAdd, MVT RetVT, unsigned LHSReg,
 159                          bool LHSIsKill, unsigned RHSReg, bool RHSIsKill,
 160                          bool SetFlags = false, bool WantResult = true);
 161   unsigned emitAddSub_ri(bool UseAdd, MVT RetVT, unsigned LHSReg,
 162                          bool LHSIsKill, uint64_t Imm, bool SetFlags = false,
 163                          bool WantResult = true);
 164   unsigned emitAddSub_rs(bool UseAdd, MVT RetVT, unsigned LHSReg,
 165                          bool LHSIsKill, unsigned RHSReg, bool RHSIsKill,
 166                          AArch64_AM::ShiftExtendType ShiftType,
 167                          uint64_t ShiftImm, bool SetFlags = false,
 168                          bool WantResult = true);
 169   unsigned emitAddSub_rx(bool UseAdd, MVT RetVT, unsigned LHSReg,
 170                          bool LHSIsKill, unsigned RHSReg, bool RHSIsKill,
 171                           AArch64_AM::ShiftExtendType ExtType,
 172                           uint64_t ShiftImm, bool SetFlags = false,
 173                          bool WantResult = true);
 174
 175   // Emit functions.
 176   bool emitCompareAndBranch(const BranchInst *BI);
 177   bool emitCmp(const Value *LHS, const Value *RHS, bool IsZExt);
 178   bool emitICmp(MVT RetVT, const Value *LHS, const Value *RHS, bool IsZExt);
 179   bool emitICmp_ri(MVT RetVT, unsigned LHSReg, bool LHSIsKill, uint64_t Imm);
 180   bool emitFCmp(MVT RetVT, const Value *LHS, const Value *RHS);
 181   bool emitLoad(MVT VT, MVT ResultVT, unsigned &ResultReg, Address Addr,
 182                 bool WantZExt = true, MachineMemOperand *MMO = nullptr);
 183   bool emitStore(MVT VT, unsigned SrcReg, Address Addr,
 184                  MachineMemOperand *MMO = nullptr);
 185   unsigned emitIntExt(MVT SrcVT, unsigned SrcReg, MVT DestVT, bool isZExt);
 186   unsigned emiti1Ext(unsigned SrcReg, MVT DestVT, bool isZExt);
 187   unsigned emitAdd(MVT RetVT, const Value *LHS, const Value *RHS,
 188                    bool SetFlags = false, bool WantResult = true,
 189                    bool IsZExt = false);
 190   unsigned emitSub(MVT RetVT, const Value *LHS, const Value *RHS,
 191                    bool SetFlags = false, bool WantResult = true,
 192                    bool IsZExt = false);
 193   unsigned emitSubs_rr(MVT RetVT, unsigned LHSReg, bool LHSIsKill,
 194                        unsigned RHSReg, bool RHSIsKill, bool WantResult = true);
 195   unsigned emitSubs_rs(MVT RetVT, unsigned LHSReg, bool LHSIsKill,
 196                        unsigned RHSReg, bool RHSIsKill,
 197                        AArch64_AM::ShiftExtendType ShiftType, uint64_t ShiftImm,
 198                        bool WantResult = true);
 199   unsigned emitLogicalOp(unsigned ISDOpc, MVT RetVT, const Value *LHS,
 200                          const Value *RHS);
 201   unsigned emitLogicalOp_ri(unsigned ISDOpc, MVT RetVT, unsigned LHSReg,
 202                             bool LHSIsKill, uint64_t Imm);
 203   unsigned emitLogicalOp_rs(unsigned ISDOpc, MVT RetVT, unsigned LHSReg,
 204                             bool LHSIsKill, unsigned RHSReg, bool RHSIsKill,
 205                             uint64_t ShiftImm);
 206   unsigned emitAnd_ri(MVT RetVT, unsigned LHSReg, bool LHSIsKill, uint64_t Imm);
 207   unsigned emitMul_rr(MVT RetVT, unsigned Op0, bool Op0IsKill,
 208                       unsigned Op1, bool Op1IsKill);
 209   unsigned emitSMULL_rr(MVT RetVT, unsigned Op0, bool Op0IsKill,
 210                         unsigned Op1, bool Op1IsKill);
 211   unsigned emitUMULL_rr(MVT RetVT, unsigned Op0, bool Op0IsKill,
 212                         unsigned Op1, bool Op1IsKill);
 213   unsigned emitLSL_rr(MVT RetVT, unsigned Op0Reg, bool Op0IsKill,
 214                       unsigned Op1Reg, bool Op1IsKill);
 215   unsigned emitLSL_ri(MVT RetVT, MVT SrcVT, unsigned Op0Reg, bool Op0IsKill,
 216                       uint64_t Imm, bool IsZExt = true);
 217   unsigned emitLSR_rr(MVT RetVT, unsigned Op0Reg, bool Op0IsKill,
 218                       unsigned Op1Reg, bool Op1IsKill);
 219   unsigned emitLSR_ri(MVT RetVT, MVT SrcVT, unsigned Op0Reg, bool Op0IsKill,
 220                       uint64_t Imm, bool IsZExt = true);
 221   unsigned emitASR_rr(MVT RetVT, unsigned Op0Reg, bool Op0IsKill,
 222                       unsigned Op1Reg, bool Op1IsKill);
 223   unsigned emitASR_ri(MVT RetVT, MVT SrcVT, unsigned Op0Reg, bool Op0IsKill,
 224                       uint64_t Imm, bool IsZExt = false);
 225
 226   unsigned materializeInt(const ConstantInt *CI, MVT VT);
 227   unsigned materializeFP(const ConstantFP *CFP, MVT VT);
 228   unsigned materializeGV(const GlobalValue *GV);
 229
 230   // Call handling routines.
 231 private:
 232   CCAssignFn *CCAssignFnForCall(CallingConv::ID CC) const;
 233   bool processCallArgs(CallLoweringInfo &CLI, SmallVectorImpl<MVT> &ArgVTs,
 234                        unsigned &NumBytes);
 235   bool finishCall(CallLoweringInfo &CLI, MVT RetVT, unsigned NumBytes);
 236
 237 public:
 238   // Backend specific FastISel code.
 239   unsigned fastMaterializeAlloca(const AllocaInst *AI) override;
 240   unsigned fastMaterializeConstant(const Constant *C) override;
 241   unsigned fastMaterializeFloatZero(const ConstantFP* CF) override;
 242
 243   explicit AArch64FastISel(FunctionLoweringInfo &FuncInfo,
 244                          const TargetLibraryInfo *LibInfo)
 245       : FastISel(FuncInfo, LibInfo, /*SkipTargetIndependentISel=*/true) {
 246     Subtarget = &TM.getSubtarget<AArch64Subtarget>();
 247     Context = &FuncInfo.Fn->getContext();
 248   }
 249
 250   bool fastSelectInstruction(const Instruction *I) override;
 251
 252 #include "AArch64GenFastISel.inc"
 253 };
 254
 255 } // end anonymous namespace
 256
 257 #include "AArch64GenCallingConv.inc"
 258
 259 /// \brief Check if the sign-/zero-extend will be a noop.
 260 static bool isIntExtFree(const Instruction *I) {
 261   assert((isa<ZExtInst>(I) || isa<SExtInst>(I)) &&
 262          "Unexpected integer extend instruction.");
 263   assert(!I->getType()->isVectorTy() && I->getType()->isIntegerTy() &&
 264          "Unexpected value type.");
 265   bool IsZExt = isa<ZExtInst>(I);
 266
 267   if (const auto *LI = dyn_cast<LoadInst>(I->getOperand(0)))
 268     if (LI->hasOneUse())
 269       return true;
 270
 271   if (const auto *Arg = dyn_cast<Argument>(I->getOperand(0)))
 272     if ((IsZExt && Arg->hasZExtAttr()) || (!IsZExt && Arg->hasSExtAttr()))
 273       return true;
 274
 275   return false;
 276 }
 277
 278 /// \brief Determine the implicit scale factor that is applied by a memory
 279 /// operation for a given value type.
 280 static unsigned getImplicitScaleFactor(MVT VT) {
 281   switch (VT.SimpleTy) {
 282   default:
 283     return 0;    // invalid
 284   case MVT::i1:  // fall-through
 285   case MVT::i8:
 286     return 1;
 287   case MVT::i16:
 288     return 2;
 289   case MVT::i32: // fall-through
 290   case MVT::f32:
 291     return 4;
 292   case MVT::i64: // fall-through
 293   case MVT::f64:
 294     return 8;
 295   }
 296 }
 297
 298 CCAssignFn *AArch64FastISel::CCAssignFnForCall(CallingConv::ID CC) const {
 299   if (CC == CallingConv::WebKit_JS)
 300     return CC_AArch64_WebKit_JS;
 301   return Subtarget->isTargetDarwin() ? CC_AArch64_DarwinPCS : CC_AArch64_AAPCS;
 302 }
 303
 304 unsigned AArch64FastISel::fastMaterializeAlloca(const AllocaInst *AI) {
 305   assert(TLI.getValueType(AI->getType(), true) == MVT::i64 &&
 306          "Alloca should always return a pointer.");
 307
 308   // Don't handle dynamic allocas.
 309   if (!FuncInfo.StaticAllocaMap.count(AI))
 310     return 0;
 311
 312   DenseMap<const AllocaInst *, int>::iterator SI =
 313       FuncInfo.StaticAllocaMap.find(AI);
 314
 315   if (SI != FuncInfo.StaticAllocaMap.end()) {
 316     unsigned ResultReg = createResultReg(&AArch64::GPR64spRegClass);
 317     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADDXri),
 318             ResultReg)
 319         .addFrameIndex(SI->second)
 320         .addImm(0)
 321         .addImm(0);
 322     return ResultReg;
 323   }
 324
 325   return 0;
 326 }
 327
 328 unsigned AArch64FastISel::materializeInt(const ConstantInt *CI, MVT VT) {
 329   if (VT > MVT::i64)
 330     return 0;
 331
 332   if (!CI->isZero())
 333     return fastEmit_i(VT, VT, ISD::Constant, CI->getZExtValue());
 334
 335   // Create a copy from the zero register to materialize a "0" value.
 336   const TargetRegisterClass *RC = (VT == MVT::i64) ? &AArch64::GPR64RegClass
 337                                                    : &AArch64::GPR32RegClass;
 338   unsigned ZeroReg = (VT == MVT::i64) ? AArch64::XZR : AArch64::WZR;
 339   unsigned ResultReg = createResultReg(RC);
 340   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(TargetOpcode::COPY),
 341           ResultReg).addReg(ZeroReg, getKillRegState(true));
 342   return ResultReg;
 343 }
 344
 345 unsigned AArch64FastISel::materializeFP(const ConstantFP *CFP, MVT VT) {
 346   // Positive zero (+0.0) has to be materialized with a fmov from the zero
 347   // register, because the immediate version of fmov cannot encode zero.
 348   if (CFP->isNullValue())
 349     return fastMaterializeFloatZero(CFP);
 350
 351   if (VT != MVT::f32 && VT != MVT::f64)
 352     return 0;
 353
 354   const APFloat Val = CFP->getValueAPF();
 355   bool Is64Bit = (VT == MVT::f64);
 356   // This checks to see if we can use FMOV instructions to materialize
 357   // a constant, otherwise we have to materialize via the constant pool.
 358   if (TLI.isFPImmLegal(Val, VT)) {
 359     int Imm =
 360         Is64Bit ? AArch64_AM::getFP64Imm(Val) : AArch64_AM::getFP32Imm(Val);
 361     assert((Imm != -1) && "Cannot encode floating-point constant.");
 362     unsigned Opc = Is64Bit ? AArch64::FMOVDi : AArch64::FMOVSi;
 363     return fastEmitInst_i(Opc, TLI.getRegClassFor(VT), Imm);
 364   }
 365
 366   // Materialize via constant pool.  MachineConstantPool wants an explicit
 367   // alignment.
 368   unsigned Align = DL.getPrefTypeAlignment(CFP->getType());
 369   if (Align == 0)
 370     Align = DL.getTypeAllocSize(CFP->getType());
 371
 372   unsigned CPI = MCP.getConstantPoolIndex(cast<Constant>(CFP), Align);
 373   unsigned ADRPReg = createResultReg(&AArch64::GPR64commonRegClass);
 374   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADRP),
 375           ADRPReg).addConstantPoolIndex(CPI, 0, AArch64II::MO_PAGE);
 376
 377   unsigned Opc = Is64Bit ? AArch64::LDRDui : AArch64::LDRSui;
 378   unsigned ResultReg = createResultReg(TLI.getRegClassFor(VT));
 379   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg)
 380       .addReg(ADRPReg)
 381       .addConstantPoolIndex(CPI, 0, AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
 382   return ResultReg;
 383 }
 384
 385 unsigned AArch64FastISel::materializeGV(const GlobalValue *GV) {
 386   // We can't handle thread-local variables quickly yet.
 387   if (GV->isThreadLocal())
 388     return 0;
 389
 390   // MachO still uses GOT for large code-model accesses, but ELF requires
 391   // movz/movk sequences, which FastISel doesn't handle yet.
 392   if (TM.getCodeModel() != CodeModel::Small && !Subtarget->isTargetMachO())
 393     return 0;
 394
 395   unsigned char OpFlags = Subtarget->ClassifyGlobalReference(GV, TM);
 396
 397   EVT DestEVT = TLI.getValueType(GV->getType(), true);
 398   if (!DestEVT.isSimple())
 399     return 0;
 400
 401   unsigned ADRPReg = createResultReg(&AArch64::GPR64commonRegClass);
 402   unsigned ResultReg;
 403
 404   if (OpFlags & AArch64II::MO_GOT) {
 405     // ADRP + LDRX
 406     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADRP),
 407             ADRPReg)
 408       .addGlobalAddress(GV, 0, AArch64II::MO_GOT | AArch64II::MO_PAGE);
 409
 410     ResultReg = createResultReg(&AArch64::GPR64RegClass);
 411     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::LDRXui),
 412             ResultReg)
 413       .addReg(ADRPReg)
 414       .addGlobalAddress(GV, 0, AArch64II::MO_GOT | AArch64II::MO_PAGEOFF |
 415                         AArch64II::MO_NC);
 416   } else if (OpFlags & AArch64II::MO_CONSTPOOL) {
 417     // We can't handle addresses loaded from a constant pool quickly yet.
 418     return 0;
 419   } else {
 420     // ADRP + ADDX
 421     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADRP),
 422             ADRPReg)
 423       .addGlobalAddress(GV, 0, AArch64II::MO_PAGE);
 424
 425     ResultReg = createResultReg(&AArch64::GPR64spRegClass);
 426     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADDXri),
 427             ResultReg)
 428       .addReg(ADRPReg)
 429       .addGlobalAddress(GV, 0, AArch64II::MO_PAGEOFF | AArch64II::MO_NC)
 430       .addImm(0);
 431   }
 432   return ResultReg;
 433 }
 434
 435 unsigned AArch64FastISel::fastMaterializeConstant(const Constant *C) {
 436   EVT CEVT = TLI.getValueType(C->getType(), true);
 437
 438   // Only handle simple types.
 439   if (!CEVT.isSimple())
 440     return 0;
 441   MVT VT = CEVT.getSimpleVT();
 442
 443   if (const auto *CI = dyn_cast<ConstantInt>(C))
 444     return materializeInt(CI, VT);
 445   else if (const ConstantFP *CFP = dyn_cast<ConstantFP>(C))
 446     return materializeFP(CFP, VT);
 447   else if (const GlobalValue *GV = dyn_cast<GlobalValue>(C))
 448     return materializeGV(GV);
 449
 450   return 0;
 451 }
 452
 453 unsigned AArch64FastISel::fastMaterializeFloatZero(const ConstantFP* CFP) {
 454   assert(CFP->isNullValue() &&
 455          "Floating-point constant is not a positive zero.");
 456   MVT VT;
 457   if (!isTypeLegal(CFP->getType(), VT))
 458     return 0;
 459
 460   if (VT != MVT::f32 && VT != MVT::f64)
 461     return 0;
 462
 463   bool Is64Bit = (VT == MVT::f64);
 464   unsigned ZReg = Is64Bit ? AArch64::XZR : AArch64::WZR;
 465   unsigned Opc = Is64Bit ? AArch64::FMOVXDr : AArch64::FMOVWSr;
 466   return fastEmitInst_r(Opc, TLI.getRegClassFor(VT), ZReg, /*IsKill=*/true);
 467 }
 468
 469 /// \brief Check if the multiply is by a power-of-2 constant.
 470 static bool isMulPowOf2(const Value *I) {
 471   if (const auto *MI = dyn_cast<MulOperator>(I)) {
 472     if (const auto *C = dyn_cast<ConstantInt>(MI->getOperand(0)))
 473       if (C->getValue().isPowerOf2())
 474         return true;
 475     if (const auto *C = dyn_cast<ConstantInt>(MI->getOperand(1)))
 476       if (C->getValue().isPowerOf2())
 477         return true;
 478   }
 479   return false;
 480 }
 481
 482 // Computes the address to get to an object.
 483 bool AArch64FastISel::computeAddress(const Value *Obj, Address &Addr, Type *Ty)
 484 {
 485   const User *U = nullptr;
 486   unsigned Opcode = Instruction::UserOp1;
 487   if (const Instruction *I = dyn_cast<Instruction>(Obj)) {
 488     // Don't walk into other basic blocks unless the object is an alloca from
 489     // another block, otherwise it may not have a virtual register assigned.
 490     if (FuncInfo.StaticAllocaMap.count(static_cast<const AllocaInst *>(Obj)) ||
 491         FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB) {
 492       Opcode = I->getOpcode();
 493       U = I;
 494     }
 495   } else if (const ConstantExpr *C = dyn_cast<ConstantExpr>(Obj)) {
 496     Opcode = C->getOpcode();
 497     U = C;
 498   }
 499
 500   if (const PointerType *Ty = dyn_cast<PointerType>(Obj->getType()))
 501     if (Ty->getAddressSpace() > 255)
 502       // Fast instruction selection doesn't support the special
 503       // address spaces.
 504       return false;
 505
 506   switch (Opcode) {
 507   default:
 508     break;
 509   case Instruction::BitCast: {
 510     // Look through bitcasts.
 511     return computeAddress(U->getOperand(0), Addr, Ty);
 512   }
 513   case Instruction::IntToPtr: {
 514     // Look past no-op inttoptrs.
 515     if (TLI.getValueType(U->getOperand(0)->getType()) == TLI.getPointerTy())
 516       return computeAddress(U->getOperand(0), Addr, Ty);
 517     break;
 518   }
 519   case Instruction::PtrToInt: {
 520     // Look past no-op ptrtoints.
 521     if (TLI.getValueType(U->getType()) == TLI.getPointerTy())
 522       return computeAddress(U->getOperand(0), Addr, Ty);
 523     break;
 524   }
 525   case Instruction::GetElementPtr: {
 526     Address SavedAddr = Addr;
 527     uint64_t TmpOffset = Addr.getOffset();
 528
 529     // Iterate through the GEP folding the constants into offsets where
 530     // we can.
 531     gep_type_iterator GTI = gep_type_begin(U);
 532     for (User::const_op_iterator i = U->op_begin() + 1, e = U->op_end(); i != e;
 533          ++i, ++GTI) {
 534       const Value *Op = *i;
 535       if (StructType *STy = dyn_cast<StructType>(*GTI)) {
 536         const StructLayout *SL = DL.getStructLayout(STy);
 537         unsigned Idx = cast<ConstantInt>(Op)->getZExtValue();
 538         TmpOffset += SL->getElementOffset(Idx);
 539       } else {
 540         uint64_t S = DL.getTypeAllocSize(GTI.getIndexedType());
 541         for (;;) {
 542           if (const ConstantInt *CI = dyn_cast<ConstantInt>(Op)) {
 543             // Constant-offset addressing.
 544             TmpOffset += CI->getSExtValue() * S;
 545             break;
 546           }
 547           if (canFoldAddIntoGEP(U, Op)) {
 548             // A compatible add with a constant operand. Fold the constant.
 549             ConstantInt *CI =
 550                 cast<ConstantInt>(cast<AddOperator>(Op)->getOperand(1));
 551             TmpOffset += CI->getSExtValue() * S;
 552             // Iterate on the other operand.
 553             Op = cast<AddOperator>(Op)->getOperand(0);
 554             continue;
 555           }
 556           // Unsupported
 557           goto unsupported_gep;
 558         }
 559       }
 560     }
 561
 562     // Try to grab the base operand now.
 563     Addr.setOffset(TmpOffset);
 564     if (computeAddress(U->getOperand(0), Addr, Ty))
 565       return true;
 566
 567     // We failed, restore everything and try the other options.
 568     Addr = SavedAddr;
 569
 570   unsupported_gep:
 571     break;
 572   }
 573   case Instruction::Alloca: {
 574     const AllocaInst *AI = cast<AllocaInst>(Obj);
 575     DenseMap<const AllocaInst *, int>::iterator SI =
 576         FuncInfo.StaticAllocaMap.find(AI);
 577     if (SI != FuncInfo.StaticAllocaMap.end()) {
 578       Addr.setKind(Address::FrameIndexBase);
 579       Addr.setFI(SI->second);
 580       return true;
 581     }
 582     break;
 583   }
 584   case Instruction::Add: {
 585     // Adds of constants are common and easy enough.
 586     const Value *LHS = U->getOperand(0);
 587     const Value *RHS = U->getOperand(1);
 588
 589     if (isa<ConstantInt>(LHS))
 590       std::swap(LHS, RHS);
 591
 592     if (const ConstantInt *CI = dyn_cast<ConstantInt>(RHS)) {
 593       Addr.setOffset(Addr.getOffset() + CI->getSExtValue());
 594       return computeAddress(LHS, Addr, Ty);
 595     }
 596
 597     Address Backup = Addr;
 598     if (computeAddress(LHS, Addr, Ty) && computeAddress(RHS, Addr, Ty))
 599       return true;
 600     Addr = Backup;
 601
 602     break;
 603   }
 604   case Instruction::Sub: {
 605     // Subs of constants are common and easy enough.
 606     const Value *LHS = U->getOperand(0);
 607     const Value *RHS = U->getOperand(1);
 608
 609     if (const ConstantInt *CI = dyn_cast<ConstantInt>(RHS)) {
 610       Addr.setOffset(Addr.getOffset() - CI->getSExtValue());
 611       return computeAddress(LHS, Addr, Ty);
 612     }
 613     break;
 614   }
 615   case Instruction::Shl: {
 616     if (Addr.getOffsetReg())
 617       break;
 618
 619     const auto *CI = dyn_cast<ConstantInt>(U->getOperand(1));
 620     if (!CI)
 621       break;
 622
 623     unsigned Val = CI->getZExtValue();
 624     if (Val < 1 || Val > 3)
 625       break;
 626
 627     uint64_t NumBytes = 0;
 628     if (Ty && Ty->isSized()) {
 629       uint64_t NumBits = DL.getTypeSizeInBits(Ty);
 630       NumBytes = NumBits / 8;
 631       if (!isPowerOf2_64(NumBits))
 632         NumBytes = 0;
 633     }
 634
 635     if (NumBytes != (1ULL << Val))
 636       break;
 637
 638     Addr.setShift(Val);
 639     Addr.setExtendType(AArch64_AM::LSL);
 640
 641     const Value *Src = U->getOperand(0);
 642     if (const auto *I = dyn_cast<Instruction>(Src))
 643       if (FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB)
 644         Src = I;
 645
 646     // Fold the zext or sext when it won't become a noop.
 647     if (const auto *ZE = dyn_cast<ZExtInst>(Src)) {
 648       if (!isIntExtFree(ZE) && ZE->getOperand(0)->getType()->isIntegerTy(32)) {
 649           Addr.setExtendType(AArch64_AM::UXTW);
 650           Src = ZE->getOperand(0);
 651       }
 652     } else if (const auto *SE = dyn_cast<SExtInst>(Src)) {
 653       if (!isIntExtFree(SE) && SE->getOperand(0)->getType()->isIntegerTy(32)) {
 654         Addr.setExtendType(AArch64_AM::SXTW);
 655         Src = SE->getOperand(0);
 656       }
 657     }
 658
 659     if (const auto *AI = dyn_cast<BinaryOperator>(Src))
 660       if (AI->getOpcode() == Instruction::And) {
 661         const Value *LHS = AI->getOperand(0);
 662         const Value *RHS = AI->getOperand(1);
 663
 664         if (const auto *C = dyn_cast<ConstantInt>(LHS))
 665           if (C->getValue() == 0xffffffff)
 666             std::swap(LHS, RHS);
 667
 668         if (const auto *C = dyn_cast<ConstantInt>(RHS))
 669           if (C->getValue() == 0xffffffff) {
 670             Addr.setExtendType(AArch64_AM::UXTW);
 671             unsigned Reg = getRegForValue(LHS);
 672             if (!Reg)
 673               return false;
 674             bool RegIsKill = hasTrivialKill(LHS);
 675             Reg = fastEmitInst_extractsubreg(MVT::i32, Reg, RegIsKill,
 676                                              AArch64::sub_32);
 677             Addr.setOffsetReg(Reg);
 678             return true;
 679           }
 680       }
 681
 682     unsigned Reg = getRegForValue(Src);
 683     if (!Reg)
 684       return false;
 685     Addr.setOffsetReg(Reg);
 686     return true;
 687   }
 688   case Instruction::Mul: {
 689     if (Addr.getOffsetReg())
 690       break;
 691
 692     if (!isMulPowOf2(U))
 693       break;
 694
 695     const Value *LHS = U->getOperand(0);
 696     const Value *RHS = U->getOperand(1);
 697
 698     // Canonicalize power-of-2 value to the RHS.
 699     if (const auto *C = dyn_cast<ConstantInt>(LHS))
 700       if (C->getValue().isPowerOf2())
 701         std::swap(LHS, RHS);
 702
 703     assert(isa<ConstantInt>(RHS) && "Expected an ConstantInt.");
 704     const auto *C = cast<ConstantInt>(RHS);
 705     unsigned Val = C->getValue().logBase2();
 706     if (Val < 1 || Val > 3)
 707       break;
 708
 709     uint64_t NumBytes = 0;
 710     if (Ty && Ty->isSized()) {
 711       uint64_t NumBits = DL.getTypeSizeInBits(Ty);
 712       NumBytes = NumBits / 8;
 713       if (!isPowerOf2_64(NumBits))
 714         NumBytes = 0;
 715     }
 716
 717     if (NumBytes != (1ULL << Val))
 718       break;
 719
 720     Addr.setShift(Val);
 721     Addr.setExtendType(AArch64_AM::LSL);
 722
 723     const Value *Src = LHS;
 724     if (const auto *I = dyn_cast<Instruction>(Src))
 725       if (FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB)
 726         Src = I;
 727
 728
 729     // Fold the zext or sext when it won't become a noop.
 730     if (const auto *ZE = dyn_cast<ZExtInst>(Src)) {
 731       if (!isIntExtFree(ZE) && ZE->getOperand(0)->getType()->isIntegerTy(32)) {
 732         Addr.setExtendType(AArch64_AM::UXTW);
 733         Src = ZE->getOperand(0);
 734       }
 735     } else if (const auto *SE = dyn_cast<SExtInst>(Src)) {
 736       if (!isIntExtFree(SE) && SE->getOperand(0)->getType()->isIntegerTy(32)) {
 737         Addr.setExtendType(AArch64_AM::SXTW);
 738         Src = SE->getOperand(0);
 739       }
 740     }
 741
 742     unsigned Reg = getRegForValue(Src);
 743     if (!Reg)
 744       return false;
 745     Addr.setOffsetReg(Reg);
 746     return true;
 747   }
 748   case Instruction::And: {
 749     if (Addr.getOffsetReg())
 750       break;
 751
 752     if (DL.getTypeSizeInBits(Ty) != 8)
 753       break;
 754
 755     const Value *LHS = U->getOperand(0);
 756     const Value *RHS = U->getOperand(1);
 757
 758     if (const auto *C = dyn_cast<ConstantInt>(LHS))
 759       if (C->getValue() == 0xffffffff)
 760         std::swap(LHS, RHS);
 761
 762     if (const auto *C = dyn_cast<ConstantInt>(RHS))
 763       if (C->getValue() == 0xffffffff) {
 764         Addr.setShift(0);
 765         Addr.setExtendType(AArch64_AM::LSL);
 766         Addr.setExtendType(AArch64_AM::UXTW);
 767
 768         unsigned Reg = getRegForValue(LHS);
 769         if (!Reg)
 770           return false;
 771         bool RegIsKill = hasTrivialKill(LHS);
 772         Reg = fastEmitInst_extractsubreg(MVT::i32, Reg, RegIsKill,
 773                                          AArch64::sub_32);
 774         Addr.setOffsetReg(Reg);
 775         return true;
 776       }
 777     break;
 778   }
 779   case Instruction::SExt:
 780   case Instruction::ZExt: {
 781     if (!Addr.getReg() || Addr.getOffsetReg())
 782       break;
 783
 784     const Value *Src = nullptr;
 785     // Fold the zext or sext when it won't become a noop.
 786     if (const auto *ZE = dyn_cast<ZExtInst>(U)) {
 787       if (!isIntExtFree(ZE) && ZE->getOperand(0)->getType()->isIntegerTy(32)) {
 788         Addr.setExtendType(AArch64_AM::UXTW);
 789         Src = ZE->getOperand(0);
 790       }
 791     } else if (const auto *SE = dyn_cast<SExtInst>(U)) {
 792       if (!isIntExtFree(SE) && SE->getOperand(0)->getType()->isIntegerTy(32)) {
 793         Addr.setExtendType(AArch64_AM::SXTW);
 794         Src = SE->getOperand(0);
 795       }
 796     }
 797
 798     if (!Src)
 799       break;
 800
 801     Addr.setShift(0);
 802     unsigned Reg = getRegForValue(Src);
 803     if (!Reg)
 804       return false;
 805     Addr.setOffsetReg(Reg);
 806     return true;
 807   }
 808   } // end switch
 809
 810   if (Addr.getReg()) {
 811     if (!Addr.getOffsetReg()) {
 812       unsigned Reg = getRegForValue(Obj);
 813       if (!Reg)
 814         return false;
 815       Addr.setOffsetReg(Reg);
 816       return true;
 817     }
 818     return false;
 819   }
 820
 821   unsigned Reg = getRegForValue(Obj);
 822   if (!Reg)
 823     return false;
 824   Addr.setReg(Reg);
 825   return true;
 826 }
 827
 828 bool AArch64FastISel::computeCallAddress(const Value *V, Address &Addr) {
 829   const User *U = nullptr;
 830   unsigned Opcode = Instruction::UserOp1;
 831   bool InMBB = true;
 832
 833   if (const auto *I = dyn_cast<Instruction>(V)) {
 834     Opcode = I->getOpcode();
 835     U = I;
 836     InMBB = I->getParent() == FuncInfo.MBB->getBasicBlock();
 837   } else if (const auto *C = dyn_cast<ConstantExpr>(V)) {
 838     Opcode = C->getOpcode();
 839     U = C;
 840   }
 841
 842   switch (Opcode) {
 843   default: break;
 844   case Instruction::BitCast:
 845     // Look past bitcasts if its operand is in the same BB.
 846     if (InMBB)
 847       return computeCallAddress(U->getOperand(0), Addr);
 848     break;
 849   case Instruction::IntToPtr:
 850     // Look past no-op inttoptrs if its operand is in the same BB.
 851     if (InMBB &&
 852         TLI.getValueType(U->getOperand(0)->getType()) == TLI.getPointerTy())
 853       return computeCallAddress(U->getOperand(0), Addr);
 854     break;
 855   case Instruction::PtrToInt:
 856     // Look past no-op ptrtoints if its operand is in the same BB.
 857     if (InMBB &&
 858         TLI.getValueType(U->getType()) == TLI.getPointerTy())
 859       return computeCallAddress(U->getOperand(0), Addr);
 860     break;
 861   }
 862
 863   if (const GlobalValue *GV = dyn_cast<GlobalValue>(V)) {
 864     Addr.setGlobalValue(GV);
 865     return true;
 866   }
 867
 868   // If all else fails, try to materialize the value in a register.
 869   if (!Addr.getGlobalValue()) {
 870     Addr.setReg(getRegForValue(V));
 871     return Addr.getReg() != 0;
 872   }
 873
 874   return false;
 875 }
 876
 877
 878 bool AArch64FastISel::isTypeLegal(Type *Ty, MVT &VT) {
 879   EVT evt = TLI.getValueType(Ty, true);
 880
 881   // Only handle simple types.
 882   if (evt == MVT::Other || !evt.isSimple())
 883     return false;
 884   VT = evt.getSimpleVT();
 885
 886   // This is a legal type, but it's not something we handle in fast-isel.
 887   if (VT == MVT::f128)
 888     return false;
 889
 890   // Handle all other legal types, i.e. a register that will directly hold this
 891   // value.
 892   return TLI.isTypeLegal(VT);
 893 }
 894
 895 /// \brief Determine if the value type is supported by FastISel.
 896 ///
 897 /// FastISel for AArch64 can handle more value types than are legal. This adds
 898 /// simple value type such as i1, i8, and i16.
 899 bool AArch64FastISel::isTypeSupported(Type *Ty, MVT &VT, bool IsVectorAllowed) {
 900   if (Ty->isVectorTy() && !IsVectorAllowed)
 901     return false;
 902
 903   if (isTypeLegal(Ty, VT))
 904     return true;
 905
 906   // If this is a type than can be sign or zero-extended to a basic operation
 907   // go ahead and accept it now.
 908   if (VT == MVT::i1 || VT == MVT::i8 || VT == MVT::i16)
 909     return true;
 910
 911   return false;
 912 }
 913
 914 bool AArch64FastISel::isValueAvailable(const Value *V) const {
 915   if (!isa<Instruction>(V))
 916     return true;
 917
 918   const auto *I = cast<Instruction>(V);
 919   if (FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB)
 920     return true;
 921
 922   return false;
 923 }
 924
 925 bool AArch64FastISel::simplifyAddress(Address &Addr, MVT VT) {
 926   unsigned ScaleFactor = getImplicitScaleFactor(VT);
 927   if (!ScaleFactor)
 928     return false;
 929
 930   bool ImmediateOffsetNeedsLowering = false;
 931   bool RegisterOffsetNeedsLowering = false;
 932   int64_t Offset = Addr.getOffset();
 933   if (((Offset < 0) || (Offset & (ScaleFactor - 1))) && !isInt<9>(Offset))
 934     ImmediateOffsetNeedsLowering = true;
 935   else if (Offset > 0 && !(Offset & (ScaleFactor - 1)) &&
 936            !isUInt<12>(Offset / ScaleFactor))
 937     ImmediateOffsetNeedsLowering = true;
 938
 939   // Cannot encode an offset register and an immediate offset in the same
 940   // instruction. Fold the immediate offset into the load/store instruction and
 941   // emit an additonal add to take care of the offset register.
 942   if (!ImmediateOffsetNeedsLowering && Addr.getOffset() && Addr.isRegBase() &&
 943       Addr.getOffsetReg())
 944     RegisterOffsetNeedsLowering = true;
 945
 946   // Cannot encode zero register as base.
 947   if (Addr.isRegBase() && Addr.getOffsetReg() && !Addr.getReg())
 948     RegisterOffsetNeedsLowering = true;
 949
 950   // If this is a stack pointer and the offset needs to be simplified then put
 951   // the alloca address into a register, set the base type back to register and
 952   // continue. This should almost never happen.
 953   if (ImmediateOffsetNeedsLowering && Addr.isFIBase()) {
 954     unsigned ResultReg = createResultReg(&AArch64::GPR64spRegClass);
 955     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADDXri),
 956             ResultReg)
 957       .addFrameIndex(Addr.getFI())
 958       .addImm(0)
 959       .addImm(0);
 960     Addr.setKind(Address::RegBase);
 961     Addr.setReg(ResultReg);
 962   }
 963
 964   if (RegisterOffsetNeedsLowering) {
 965     unsigned ResultReg = 0;
 966     if (Addr.getReg()) {
 967       if (Addr.getExtendType() == AArch64_AM::SXTW ||
 968           Addr.getExtendType() == AArch64_AM::UXTW   )
 969         ResultReg = emitAddSub_rx(/*UseAdd=*/true, MVT::i64, Addr.getReg(),
 970                                   /*TODO:IsKill=*/false, Addr.getOffsetReg(),
 971                                   /*TODO:IsKill=*/false, Addr.getExtendType(),
 972                                   Addr.getShift());
 973       else
 974         ResultReg = emitAddSub_rs(/*UseAdd=*/true, MVT::i64, Addr.getReg(),
 975                                   /*TODO:IsKill=*/false, Addr.getOffsetReg(),
 976                                   /*TODO:IsKill=*/false, AArch64_AM::LSL,
 977                                   Addr.getShift());
 978     } else {
 979       if (Addr.getExtendType() == AArch64_AM::UXTW)
 980         ResultReg = emitLSL_ri(MVT::i64, MVT::i32, Addr.getOffsetReg(),
 981                                /*Op0IsKill=*/false, Addr.getShift(),
 982                                /*IsZExt=*/true);
 983       else if (Addr.getExtendType() == AArch64_AM::SXTW)
 984         ResultReg = emitLSL_ri(MVT::i64, MVT::i32, Addr.getOffsetReg(),
 985                                /*Op0IsKill=*/false, Addr.getShift(),
 986                                /*IsZExt=*/false);
 987       else
 988         ResultReg = emitLSL_ri(MVT::i64, MVT::i64, Addr.getOffsetReg(),
 989                                /*Op0IsKill=*/false, Addr.getShift());
 990     }
 991     if (!ResultReg)
 992       return false;
 993
 994     Addr.setReg(ResultReg);
 995     Addr.setOffsetReg(0);
 996     Addr.setShift(0);
 997     Addr.setExtendType(AArch64_AM::InvalidShiftExtend);
 998   }
 999
1000   // Since the offset is too large for the load/store instruction get the
1001   // reg+offset into a register.
1002   if (ImmediateOffsetNeedsLowering) {
1003     unsigned ResultReg;
1004     if (Addr.getReg()) {
1005       // Try to fold the immediate into the add instruction.
1006       if (Offset < 0)
1007         ResultReg = emitAddSub_ri(/*UseAdd=*/false, MVT::i64, Addr.getReg(),
1008                                   /*IsKill=*/false, -Offset);
1009       else
1010         ResultReg = emitAddSub_ri(/*UseAdd=*/true, MVT::i64, Addr.getReg(),
1011                                   /*IsKill=*/false, Offset);
1012       if (!ResultReg) {
1013         unsigned ImmReg = fastEmit_i(MVT::i64, MVT::i64, ISD::Constant, Offset);
1014         ResultReg = emitAddSub_rr(/*UseAdd=*/true, MVT::i64, Addr.getReg(),
1015                                   /*IsKill=*/false, ImmReg, /*IsKill=*/true);
1016       }
1017     } else
1018       ResultReg = fastEmit_i(MVT::i64, MVT::i64, ISD::Constant, Offset);
1019
1020     if (!ResultReg)
1021       return false;
1022     Addr.setReg(ResultReg);
1023     Addr.setOffset(0);
1024   }
1025   return true;
1026 }
1027
1028 void AArch64FastISel::addLoadStoreOperands(Address &Addr,
1029                                            const MachineInstrBuilder &MIB,
1030                                            unsigned Flags,
1031                                            unsigned ScaleFactor,
1032                                            MachineMemOperand *MMO) {
1033   int64_t Offset = Addr.getOffset() / ScaleFactor;
1034   // Frame base works a bit differently. Handle it separately.
1035   if (Addr.isFIBase()) {
1036     int FI = Addr.getFI();
1037     // FIXME: We shouldn't be using getObjectSize/getObjectAlignment.  The size
1038     // and alignment should be based on the VT.
1039     MMO = FuncInfo.MF->getMachineMemOperand(
1040       MachinePointerInfo::getFixedStack(FI, Offset), Flags,
1041       MFI.getObjectSize(FI), MFI.getObjectAlignment(FI));
1042     // Now add the rest of the operands.
1043     MIB.addFrameIndex(FI).addImm(Offset);
1044   } else {
1045     assert(Addr.isRegBase() && "Unexpected address kind.");
1046     const MCInstrDesc &II = MIB->getDesc();
1047     unsigned Idx = (Flags & MachineMemOperand::MOStore) ? 1 : 0;
1048     Addr.setReg(
1049       constrainOperandRegClass(II, Addr.getReg(), II.getNumDefs()+Idx));
1050     Addr.setOffsetReg(
1051       constrainOperandRegClass(II, Addr.getOffsetReg(), II.getNumDefs()+Idx+1));
1052     if (Addr.getOffsetReg()) {
1053       assert(Addr.getOffset() == 0 && "Unexpected offset");
1054       bool IsSigned = Addr.getExtendType() == AArch64_AM::SXTW ||
1055                       Addr.getExtendType() == AArch64_AM::SXTX;
1056       MIB.addReg(Addr.getReg());
1057       MIB.addReg(Addr.getOffsetReg());
1058       MIB.addImm(IsSigned);
1059       MIB.addImm(Addr.getShift() != 0);
1060     } else {
1061       MIB.addReg(Addr.getReg());
1062       MIB.addImm(Offset);
1063     }
1064   }
1065
1066   if (MMO)
1067     MIB.addMemOperand(MMO);
1068 }
1069
1070 unsigned AArch64FastISel::emitAddSub(bool UseAdd, MVT RetVT, const Value *LHS,
1071                                      const Value *RHS, bool SetFlags,
1072                                      bool WantResult,  bool IsZExt) {
1073   AArch64_AM::ShiftExtendType ExtendType = AArch64_AM::InvalidShiftExtend;
1074   bool NeedExtend = false;
1075   switch (RetVT.SimpleTy) {
1076   default:
1077     return 0;
1078   case MVT::i1:
1079     NeedExtend = true;
1080     break;
1081   case MVT::i8:
1082     NeedExtend = true;
1083     ExtendType = IsZExt ? AArch64_AM::UXTB : AArch64_AM::SXTB;
1084     break;
1085   case MVT::i16:
1086     NeedExtend = true;
1087     ExtendType = IsZExt ? AArch64_AM::UXTH : AArch64_AM::SXTH;
1088     break;
1089   case MVT::i32:  // fall-through
1090   case MVT::i64:
1091     break;
1092   }
1093   MVT SrcVT = RetVT;
1094   RetVT.SimpleTy = std::max(RetVT.SimpleTy, MVT::i32);
1095
1096   // Canonicalize immediates to the RHS first.
1097   if (UseAdd && isa<ConstantInt>(LHS) && !isa<ConstantInt>(RHS))
1098     std::swap(LHS, RHS);
1099
1100   // Canonicalize mul by power of 2 to the RHS.
1101   if (UseAdd && LHS->hasOneUse() && isValueAvailable(LHS))
1102     if (isMulPowOf2(LHS))
1103       std::swap(LHS, RHS);
1104
1105   // Canonicalize shift immediate to the RHS.
1106   if (UseAdd && LHS->hasOneUse() && isValueAvailable(LHS))
1107     if (const auto *SI = dyn_cast<BinaryOperator>(LHS))
1108       if (isa<ConstantInt>(SI->getOperand(1)))
1109         if (SI->getOpcode() == Instruction::Shl  ||
1110             SI->getOpcode() == Instruction::LShr ||
1111             SI->getOpcode() == Instruction::AShr   )
1112           std::swap(LHS, RHS);
1113
1114   unsigned LHSReg = getRegForValue(LHS);
1115   if (!LHSReg)
1116     return 0;
1117   bool LHSIsKill = hasTrivialKill(LHS);
1118
1119   if (NeedExtend)
1120     LHSReg = emitIntExt(SrcVT, LHSReg, RetVT, IsZExt);
1121
1122   unsigned ResultReg = 0;
1123   if (const auto *C = dyn_cast<ConstantInt>(RHS)) {
1124     uint64_t Imm = IsZExt ? C->getZExtValue() : C->getSExtValue();
1125     if (C->isNegative())
1126       ResultReg = emitAddSub_ri(!UseAdd, RetVT, LHSReg, LHSIsKill, -Imm,
1127                                 SetFlags, WantResult);
1128     else
1129       ResultReg = emitAddSub_ri(UseAdd, RetVT, LHSReg, LHSIsKill, Imm, SetFlags,
1130                                 WantResult);
1131   }
1132   if (ResultReg)
1133     return ResultReg;
1134
1135   // Only extend the RHS within the instruction if there is a valid extend type.
1136   if (ExtendType != AArch64_AM::InvalidShiftExtend && RHS->hasOneUse() &&
1137       isValueAvailable(RHS)) {
1138     if (const auto *SI = dyn_cast<BinaryOperator>(RHS))
1139       if (const auto *C = dyn_cast<ConstantInt>(SI->getOperand(1)))
1140         if ((SI->getOpcode() == Instruction::Shl) && (C->getZExtValue() < 4)) {
1141           unsigned RHSReg = getRegForValue(SI->getOperand(0));
1142           if (!RHSReg)
1143             return 0;
1144           bool RHSIsKill = hasTrivialKill(SI->getOperand(0));
1145           return emitAddSub_rx(UseAdd, RetVT, LHSReg, LHSIsKill, RHSReg,
1146                                RHSIsKill, ExtendType, C->getZExtValue(),
1147                                SetFlags, WantResult);
1148         }
1149     unsigned RHSReg = getRegForValue(RHS);
1150     if (!RHSReg)
1151       return 0;
1152     bool RHSIsKill = hasTrivialKill(RHS);
1153     return emitAddSub_rx(UseAdd, RetVT, LHSReg, LHSIsKill, RHSReg, RHSIsKill,
1154                          ExtendType, 0, SetFlags, WantResult);
1155   }
1156
1157   // Check if the mul can be folded into the instruction.
1158   if (RHS->hasOneUse() && isValueAvailable(RHS))
1159     if (isMulPowOf2(RHS)) {
1160       const Value *MulLHS = cast<MulOperator>(RHS)->getOperand(0);
1161       const Value *MulRHS = cast<MulOperator>(RHS)->getOperand(1);
1162
1163       if (const auto *C = dyn_cast<ConstantInt>(MulLHS))
1164         if (C->getValue().isPowerOf2())
1165           std::swap(MulLHS, MulRHS);
1166
1167       assert(isa<ConstantInt>(MulRHS) && "Expected a ConstantInt.");
1168       uint64_t ShiftVal = cast<ConstantInt>(MulRHS)->getValue().logBase2();
1169       unsigned RHSReg = getRegForValue(MulLHS);
1170       if (!RHSReg)
1171         return 0;
1172       bool RHSIsKill = hasTrivialKill(MulLHS);
1173       return emitAddSub_rs(UseAdd, RetVT, LHSReg, LHSIsKill, RHSReg, RHSIsKill,
1174                            AArch64_AM::LSL, ShiftVal, SetFlags, WantResult);
1175     }
1176
1177   // Check if the shift can be folded into the instruction.
1178   if (RHS->hasOneUse() && isValueAvailable(RHS))
1179     if (const auto *SI = dyn_cast<BinaryOperator>(RHS)) {
1180       if (const auto *C = dyn_cast<ConstantInt>(SI->getOperand(1))) {
1181         AArch64_AM::ShiftExtendType ShiftType = AArch64_AM::InvalidShiftExtend;
1182         switch (SI->getOpcode()) {
1183         default: break;
1184         case Instruction::Shl:  ShiftType = AArch64_AM::LSL; break;
1185         case Instruction::LShr: ShiftType = AArch64_AM::LSR; break;
1186         case Instruction::AShr: ShiftType = AArch64_AM::ASR; break;
1187         }
1188         uint64_t ShiftVal = C->getZExtValue();
1189         if (ShiftType != AArch64_AM::InvalidShiftExtend) {
1190           unsigned RHSReg = getRegForValue(SI->getOperand(0));
1191           if (!RHSReg)
1192             return 0;
1193           bool RHSIsKill = hasTrivialKill(SI->getOperand(0));
1194           return emitAddSub_rs(UseAdd, RetVT, LHSReg, LHSIsKill, RHSReg,
1195                                RHSIsKill, ShiftType, ShiftVal, SetFlags,
1196                                WantResult);
1197         }
1198       }
1199     }
1200
1201   unsigned RHSReg = getRegForValue(RHS);
1202   if (!RHSReg)
1203     return 0;
1204   bool RHSIsKill = hasTrivialKill(RHS);
1205
1206   if (NeedExtend)
1207     RHSReg = emitIntExt(SrcVT, RHSReg, RetVT, IsZExt);
1208
1209   return emitAddSub_rr(UseAdd, RetVT, LHSReg, LHSIsKill, RHSReg, RHSIsKill,
1210                        SetFlags, WantResult);
1211 }
1212
1213 unsigned AArch64FastISel::emitAddSub_rr(bool UseAdd, MVT RetVT, unsigned LHSReg,
1214                                         bool LHSIsKill, unsigned RHSReg,
1215                                         bool RHSIsKill, bool SetFlags,
1216                                         bool WantResult) {
1217   assert(LHSReg && RHSReg && "Invalid register number.");
1218
1219   if (RetVT != MVT::i32 && RetVT != MVT::i64)
1220     return 0;
1221
1222   static const unsigned OpcTable[2][2][2] = {
1223     { { AArch64::SUBWrr,  AArch64::SUBXrr  },
1224       { AArch64::ADDWrr,  AArch64::ADDXrr  }  },
1225     { { AArch64::SUBSWrr, AArch64::SUBSXrr },
1226       { AArch64::ADDSWrr, AArch64::ADDSXrr }  }
1227   };
1228   bool Is64Bit = RetVT == MVT::i64;
1229   unsigned Opc = OpcTable[SetFlags][UseAdd][Is64Bit];
1230   const TargetRegisterClass *RC =
1231       Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
1232   unsigned ResultReg;
1233   if (WantResult)
1234     ResultReg = createResultReg(RC);
1235   else
1236     ResultReg = Is64Bit ? AArch64::XZR : AArch64::WZR;
1237
1238   const MCInstrDesc &II = TII.get(Opc);
1239   LHSReg = constrainOperandRegClass(II, LHSReg, II.getNumDefs());
1240   RHSReg = constrainOperandRegClass(II, RHSReg, II.getNumDefs() + 1);
1241   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg)
1242       .addReg(LHSReg, getKillRegState(LHSIsKill))
1243       .addReg(RHSReg, getKillRegState(RHSIsKill));
1244   return ResultReg;
1245 }
1246
1247 unsigned AArch64FastISel::emitAddSub_ri(bool UseAdd, MVT RetVT, unsigned LHSReg,
1248                                         bool LHSIsKill, uint64_t Imm,
1249                                         bool SetFlags, bool WantResult) {
1250   assert(LHSReg && "Invalid register number.");
1251
1252   if (RetVT != MVT::i32 && RetVT != MVT::i64)
1253     return 0;
1254
1255   unsigned ShiftImm;
1256   if (isUInt<12>(Imm))
1257     ShiftImm = 0;
1258   else if ((Imm & 0xfff000) == Imm) {
1259     ShiftImm = 12;
1260     Imm >>= 12;
1261   } else
1262     return 0;
1263
1264   static const unsigned OpcTable[2][2][2] = {
1265     { { AArch64::SUBWri,  AArch64::SUBXri  },
1266       { AArch64::ADDWri,  AArch64::ADDXri  }  },
1267     { { AArch64::SUBSWri, AArch64::SUBSXri },
1268       { AArch64::ADDSWri, AArch64::ADDSXri }  }
1269   };
1270   bool Is64Bit = RetVT == MVT::i64;
1271   unsigned Opc = OpcTable[SetFlags][UseAdd][Is64Bit];
1272   const TargetRegisterClass *RC;
1273   if (SetFlags)
1274     RC = Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
1275   else
1276     RC = Is64Bit ? &AArch64::GPR64spRegClass : &AArch64::GPR32spRegClass;
1277   unsigned ResultReg;
1278   if (WantResult)
1279     ResultReg = createResultReg(RC);
1280   else
1281     ResultReg = Is64Bit ? AArch64::XZR : AArch64::WZR;
1282
1283   const MCInstrDesc &II = TII.get(Opc);
1284   LHSReg = constrainOperandRegClass(II, LHSReg, II.getNumDefs());
1285   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg)
1286       .addReg(LHSReg, getKillRegState(LHSIsKill))
1287       .addImm(Imm)
1288       .addImm(getShifterImm(AArch64_AM::LSL, ShiftImm));
1289   return ResultReg;
1290 }
1291
1292 unsigned AArch64FastISel::emitAddSub_rs(bool UseAdd, MVT RetVT, unsigned LHSReg,
1293                                         bool LHSIsKill, unsigned RHSReg,
1294                                         bool RHSIsKill,
1295                                         AArch64_AM::ShiftExtendType ShiftType,
1296                                         uint64_t ShiftImm, bool SetFlags,
1297                                         bool WantResult) {
1298   assert(LHSReg && RHSReg && "Invalid register number.");
1299
1300   if (RetVT != MVT::i32 && RetVT != MVT::i64)
1301     return 0;
1302
1303   static const unsigned OpcTable[2][2][2] = {
1304     { { AArch64::SUBWrs,  AArch64::SUBXrs  },
1305       { AArch64::ADDWrs,  AArch64::ADDXrs  }  },
1306     { { AArch64::SUBSWrs, AArch64::SUBSXrs },
1307       { AArch64::ADDSWrs, AArch64::ADDSXrs }  }
1308   };
1309   bool Is64Bit = RetVT == MVT::i64;
1310   unsigned Opc = OpcTable[SetFlags][UseAdd][Is64Bit];
1311   const TargetRegisterClass *RC =
1312       Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
1313   unsigned ResultReg;
1314   if (WantResult)
1315     ResultReg = createResultReg(RC);
1316   else
1317     ResultReg = Is64Bit ? AArch64::XZR : AArch64::WZR;
1318
1319   const MCInstrDesc &II = TII.get(Opc);
1320   LHSReg = constrainOperandRegClass(II, LHSReg, II.getNumDefs());
1321   RHSReg = constrainOperandRegClass(II, RHSReg, II.getNumDefs() + 1);
1322   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg)
1323       .addReg(LHSReg, getKillRegState(LHSIsKill))
1324       .addReg(RHSReg, getKillRegState(RHSIsKill))
1325       .addImm(getShifterImm(ShiftType, ShiftImm));
1326   return ResultReg;
1327 }
1328
1329 unsigned AArch64FastISel::emitAddSub_rx(bool UseAdd, MVT RetVT, unsigned LHSReg,
1330                                         bool LHSIsKill, unsigned RHSReg,
1331                                         bool RHSIsKill,
1332                                         AArch64_AM::ShiftExtendType ExtType,
1333                                         uint64_t ShiftImm, bool SetFlags,
1334                                         bool WantResult) {
1335   assert(LHSReg && RHSReg && "Invalid register number.");
1336
1337   if (RetVT != MVT::i32 && RetVT != MVT::i64)
1338     return 0;
1339
1340   static const unsigned OpcTable[2][2][2] = {
1341     { { AArch64::SUBWrx,  AArch64::SUBXrx  },
1342       { AArch64::ADDWrx,  AArch64::ADDXrx  }  },
1343     { { AArch64::SUBSWrx, AArch64::SUBSXrx },
1344       { AArch64::ADDSWrx, AArch64::ADDSXrx }  }
1345   };
1346   bool Is64Bit = RetVT == MVT::i64;
1347   unsigned Opc = OpcTable[SetFlags][UseAdd][Is64Bit];
1348   const TargetRegisterClass *RC = nullptr;
1349   if (SetFlags)
1350     RC = Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
1351   else
1352     RC = Is64Bit ? &AArch64::GPR64spRegClass : &AArch64::GPR32spRegClass;
1353   unsigned ResultReg;
1354   if (WantResult)
1355     ResultReg = createResultReg(RC);
1356   else
1357     ResultReg = Is64Bit ? AArch64::XZR : AArch64::WZR;
1358
1359   const MCInstrDesc &II = TII.get(Opc);
1360   LHSReg = constrainOperandRegClass(II, LHSReg, II.getNumDefs());
1361   RHSReg = constrainOperandRegClass(II, RHSReg, II.getNumDefs() + 1);
1362   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg)
1363       .addReg(LHSReg, getKillRegState(LHSIsKill))
1364       .addReg(RHSReg, getKillRegState(RHSIsKill))
1365       .addImm(getArithExtendImm(ExtType, ShiftImm));
1366   return ResultReg;
1367 }
1368
1369 bool AArch64FastISel::emitCmp(const Value *LHS, const Value *RHS, bool IsZExt) {
1370   Type *Ty = LHS->getType();
1371   EVT EVT = TLI.getValueType(Ty, true);
1372   if (!EVT.isSimple())
1373     return false;
1374   MVT VT = EVT.getSimpleVT();
1375
1376   switch (VT.SimpleTy) {
1377   default:
1378     return false;
1379   case MVT::i1:
1380   case MVT::i8:
1381   case MVT::i16:
1382   case MVT::i32:
1383   case MVT::i64:
1384     return emitICmp(VT, LHS, RHS, IsZExt);
1385   case MVT::f32:
1386   case MVT::f64:
1387     return emitFCmp(VT, LHS, RHS);
1388   }
1389 }
1390
1391 bool AArch64FastISel::emitICmp(MVT RetVT, const Value *LHS, const Value *RHS,
1392                                bool IsZExt) {
1393   return emitSub(RetVT, LHS, RHS, /*SetFlags=*/true, /*WantResult=*/false,
1394                  IsZExt) != 0;
1395 }
1396
1397 bool AArch64FastISel::emitICmp_ri(MVT RetVT, unsigned LHSReg, bool LHSIsKill,
1398                                   uint64_t Imm) {
1399   return emitAddSub_ri(/*UseAdd=*/false, RetVT, LHSReg, LHSIsKill, Imm,
1400                        /*SetFlags=*/true, /*WantResult=*/false) != 0;
1401 }
1402
1403 bool AArch64FastISel::emitFCmp(MVT RetVT, const Value *LHS, const Value *RHS) {
1404   if (RetVT != MVT::f32 && RetVT != MVT::f64)
1405     return false;
1406
1407   // Check to see if the 2nd operand is a constant that we can encode directly
1408   // in the compare.
1409   bool UseImm = false;
1410   if (const auto *CFP = dyn_cast<ConstantFP>(RHS))
1411     if (CFP->isZero() && !CFP->isNegative())
1412       UseImm = true;
1413
1414   unsigned LHSReg = getRegForValue(LHS);
1415   if (!LHSReg)
1416     return false;
1417   bool LHSIsKill = hasTrivialKill(LHS);
1418
1419   if (UseImm) {
1420     unsigned Opc = (RetVT == MVT::f64) ? AArch64::FCMPDri : AArch64::FCMPSri;
1421     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc))
1422         .addReg(LHSReg, getKillRegState(LHSIsKill));
1423     return true;
1424   }
1425
1426   unsigned RHSReg = getRegForValue(RHS);
1427   if (!RHSReg)
1428     return false;
1429   bool RHSIsKill = hasTrivialKill(RHS);
1430
1431   unsigned Opc = (RetVT == MVT::f64) ? AArch64::FCMPDrr : AArch64::FCMPSrr;
1432   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc))
1433       .addReg(LHSReg, getKillRegState(LHSIsKill))
1434       .addReg(RHSReg, getKillRegState(RHSIsKill));
1435   return true;
1436 }
1437
1438 unsigned AArch64FastISel::emitAdd(MVT RetVT, const Value *LHS, const Value *RHS,
1439                                   bool SetFlags, bool WantResult, bool IsZExt) {
1440   return emitAddSub(/*UseAdd=*/true, RetVT, LHS, RHS, SetFlags, WantResult,
1441                     IsZExt);
1442 }
1443
1444 unsigned AArch64FastISel::emitSub(MVT RetVT, const Value *LHS, const Value *RHS,
1445                                   bool SetFlags, bool WantResult, bool IsZExt) {
1446   return emitAddSub(/*UseAdd=*/false, RetVT, LHS, RHS, SetFlags, WantResult,
1447                     IsZExt);
1448 }
1449
1450 unsigned AArch64FastISel::emitSubs_rr(MVT RetVT, unsigned LHSReg,
1451                                       bool LHSIsKill, unsigned RHSReg,
1452                                       bool RHSIsKill, bool WantResult) {
1453   return emitAddSub_rr(/*UseAdd=*/false, RetVT, LHSReg, LHSIsKill, RHSReg,
1454                        RHSIsKill, /*SetFlags=*/true, WantResult);
1455 }
1456
1457 unsigned AArch64FastISel::emitSubs_rs(MVT RetVT, unsigned LHSReg,
1458                                       bool LHSIsKill, unsigned RHSReg,
1459                                       bool RHSIsKill,
1460                                       AArch64_AM::ShiftExtendType ShiftType,
1461                                       uint64_t ShiftImm, bool WantResult) {
1462   return emitAddSub_rs(/*UseAdd=*/false, RetVT, LHSReg, LHSIsKill, RHSReg,
1463                        RHSIsKill, ShiftType, ShiftImm, /*SetFlags=*/true,
1464                        WantResult);
1465 }
1466
1467 unsigned AArch64FastISel::emitLogicalOp(unsigned ISDOpc, MVT RetVT,
1468                                         const Value *LHS, const Value *RHS) {
1469   // Canonicalize immediates to the RHS first.
1470   if (isa<ConstantInt>(LHS) && !isa<ConstantInt>(RHS))
1471     std::swap(LHS, RHS);
1472
1473   // Canonicalize mul by power-of-2 to the RHS.
1474   if (LHS->hasOneUse() && isValueAvailable(LHS))
1475     if (isMulPowOf2(LHS))
1476       std::swap(LHS, RHS);
1477
1478   // Canonicalize shift immediate to the RHS.
1479   if (LHS->hasOneUse() && isValueAvailable(LHS))
1480     if (const auto *SI = dyn_cast<ShlOperator>(LHS))
1481       if (isa<ConstantInt>(SI->getOperand(1)))
1482         std::swap(LHS, RHS);
1483
1484   unsigned LHSReg = getRegForValue(LHS);
1485   if (!LHSReg)
1486     return 0;
1487   bool LHSIsKill = hasTrivialKill(LHS);
1488
1489   unsigned ResultReg = 0;
1490   if (const auto *C = dyn_cast<ConstantInt>(RHS)) {
1491     uint64_t Imm = C->getZExtValue();
1492     ResultReg = emitLogicalOp_ri(ISDOpc, RetVT, LHSReg, LHSIsKill, Imm);
1493   }
1494   if (ResultReg)
1495     return ResultReg;
1496
1497   // Check if the mul can be folded into the instruction.
1498   if (RHS->hasOneUse() && isValueAvailable(RHS))
1499     if (isMulPowOf2(RHS)) {
1500       const Value *MulLHS = cast<MulOperator>(RHS)->getOperand(0);
1501       const Value *MulRHS = cast<MulOperator>(RHS)->getOperand(1);
1502
1503       if (const auto *C = dyn_cast<ConstantInt>(MulLHS))
1504         if (C->getValue().isPowerOf2())
1505           std::swap(MulLHS, MulRHS);
1506
1507       assert(isa<ConstantInt>(MulRHS) && "Expected a ConstantInt.");
1508       uint64_t ShiftVal = cast<ConstantInt>(MulRHS)->getValue().logBase2();
1509
1510       unsigned RHSReg = getRegForValue(MulLHS);
1511       if (!RHSReg)
1512         return 0;
1513       bool RHSIsKill = hasTrivialKill(MulLHS);
1514       return emitLogicalOp_rs(ISDOpc, RetVT, LHSReg, LHSIsKill, RHSReg,
1515                               RHSIsKill, ShiftVal);
1516     }
1517
1518   // Check if the shift can be folded into the instruction.
1519   if (RHS->hasOneUse() && isValueAvailable(RHS))
1520     if (const auto *SI = dyn_cast<ShlOperator>(RHS))
1521       if (const auto *C = dyn_cast<ConstantInt>(SI->getOperand(1))) {
1522         uint64_t ShiftVal = C->getZExtValue();
1523         unsigned RHSReg = getRegForValue(SI->getOperand(0));
1524         if (!RHSReg)
1525           return 0;
1526         bool RHSIsKill = hasTrivialKill(SI->getOperand(0));
1527         return emitLogicalOp_rs(ISDOpc, RetVT, LHSReg, LHSIsKill, RHSReg,
1528                                 RHSIsKill, ShiftVal);
1529       }
1530
1531   unsigned RHSReg = getRegForValue(RHS);
1532   if (!RHSReg)
1533     return 0;
1534   bool RHSIsKill = hasTrivialKill(RHS);
1535
1536   MVT VT = std::max(MVT::i32, RetVT.SimpleTy);
1537   ResultReg = fastEmit_rr(VT, VT, ISDOpc, LHSReg, LHSIsKill, RHSReg, RHSIsKill);
1538   if (RetVT >= MVT::i8 && RetVT <= MVT::i16) {
1539     uint64_t Mask = (RetVT == MVT::i8) ? 0xff : 0xffff;
1540     ResultReg = emitAnd_ri(MVT::i32, ResultReg, /*IsKill=*/true, Mask);
1541   }
1542   return ResultReg;
1543 }
1544
1545 unsigned AArch64FastISel::emitLogicalOp_ri(unsigned ISDOpc, MVT RetVT,
1546                                            unsigned LHSReg, bool LHSIsKill,
1547                                            uint64_t Imm) {
1548   assert((ISD::AND + 1 == ISD::OR) && (ISD::AND + 2 == ISD::XOR) &&
1549          "ISD nodes are not consecutive!");
1550   static const unsigned OpcTable[3][2] = {
1551     { AArch64::ANDWri, AArch64::ANDXri },
1552     { AArch64::ORRWri, AArch64::ORRXri },
1553     { AArch64::EORWri, AArch64::EORXri }
1554   };
1555   const TargetRegisterClass *RC;
1556   unsigned Opc;
1557   unsigned RegSize;
1558   switch (RetVT.SimpleTy) {
1559   default:
1560     return 0;
1561   case MVT::i1:
1562   case MVT::i8:
1563   case MVT::i16:
1564   case MVT::i32: {
1565     unsigned Idx = ISDOpc - ISD::AND;
1566     Opc = OpcTable[Idx][0];
1567     RC = &AArch64::GPR32spRegClass;
1568     RegSize = 32;
1569     break;
1570   }
1571   case MVT::i64:
1572     Opc = OpcTable[ISDOpc - ISD::AND][1];
1573     RC = &AArch64::GPR64spRegClass;
1574     RegSize = 64;
1575     break;
1576   }
1577
1578   if (!AArch64_AM::isLogicalImmediate(Imm, RegSize))
1579     return 0;
1580
1581   unsigned ResultReg =
1582       fastEmitInst_ri(Opc, RC, LHSReg, LHSIsKill,
1583                       AArch64_AM::encodeLogicalImmediate(Imm, RegSize));
1584   if (RetVT >= MVT::i8 && RetVT <= MVT::i16 && ISDOpc != ISD::AND) {
1585     uint64_t Mask = (RetVT == MVT::i8) ? 0xff : 0xffff;
1586     ResultReg = emitAnd_ri(MVT::i32, ResultReg, /*IsKill=*/true, Mask);
1587   }
1588   return ResultReg;
1589 }
1590
1591 unsigned AArch64FastISel::emitLogicalOp_rs(unsigned ISDOpc, MVT RetVT,
1592                                            unsigned LHSReg, bool LHSIsKill,
1593                                            unsigned RHSReg, bool RHSIsKill,
1594                                            uint64_t ShiftImm) {
1595   assert((ISD::AND + 1 == ISD::OR) && (ISD::AND + 2 == ISD::XOR) &&
1596          "ISD nodes are not consecutive!");
1597   static const unsigned OpcTable[3][2] = {
1598     { AArch64::ANDWrs, AArch64::ANDXrs },
1599     { AArch64::ORRWrs, AArch64::ORRXrs },
1600     { AArch64::EORWrs, AArch64::EORXrs }
1601   };
1602   const TargetRegisterClass *RC;
1603   unsigned Opc;
1604   switch (RetVT.SimpleTy) {
1605   default:
1606     return 0;
1607   case MVT::i1:
1608   case MVT::i8:
1609   case MVT::i16:
1610   case MVT::i32:
1611     Opc = OpcTable[ISDOpc - ISD::AND][0];
1612     RC = &AArch64::GPR32RegClass;
1613     break;
1614   case MVT::i64:
1615     Opc = OpcTable[ISDOpc - ISD::AND][1];
1616     RC = &AArch64::GPR64RegClass;
1617     break;
1618   }
1619   unsigned ResultReg =
1620       fastEmitInst_rri(Opc, RC, LHSReg, LHSIsKill, RHSReg, RHSIsKill,
1621                        AArch64_AM::getShifterImm(AArch64_AM::LSL, ShiftImm));
1622   if (RetVT >= MVT::i8 && RetVT <= MVT::i16) {
1623     uint64_t Mask = (RetVT == MVT::i8) ? 0xff : 0xffff;
1624     ResultReg = emitAnd_ri(MVT::i32, ResultReg, /*IsKill=*/true, Mask);
1625   }
1626   return ResultReg;
1627 }
1628
1629 unsigned AArch64FastISel::emitAnd_ri(MVT RetVT, unsigned LHSReg, bool LHSIsKill,
1630                                      uint64_t Imm) {
1631   return emitLogicalOp_ri(ISD::AND, RetVT, LHSReg, LHSIsKill, Imm);
1632 }
1633
1634 bool AArch64FastISel::emitLoad(MVT VT, MVT RetVT, unsigned &ResultReg,
1635                                Address Addr, bool WantZExt,
1636                                MachineMemOperand *MMO) {
1637   // Simplify this down to something we can handle.
1638   if (!simplifyAddress(Addr, VT))
1639     return false;
1640
1641   unsigned ScaleFactor = getImplicitScaleFactor(VT);
1642   if (!ScaleFactor)
1643     llvm_unreachable("Unexpected value type.");
1644
1645   // Negative offsets require unscaled, 9-bit, signed immediate offsets.
1646   // Otherwise, we try using scaled, 12-bit, unsigned immediate offsets.
1647   bool UseScaled = true;
1648   if ((Addr.getOffset() < 0) || (Addr.getOffset() & (ScaleFactor - 1))) {
1649     UseScaled = false;
1650     ScaleFactor = 1;
1651   }
1652
1653   static const unsigned GPOpcTable[2][8][4] = {
1654     // Sign-extend.
1655     { { AArch64::LDURSBWi,  AArch64::LDURSHWi,  AArch64::LDURWi,
1656         AArch64::LDURXi  },
1657       { AArch64::LDURSBXi,  AArch64::LDURSHXi,  AArch64::LDURSWi,
1658         AArch64::LDURXi  },
1659       { AArch64::LDRSBWui,  AArch64::LDRSHWui,  AArch64::LDRWui,
1660         AArch64::LDRXui  },
1661       { AArch64::LDRSBXui,  AArch64::LDRSHXui,  AArch64::LDRSWui,
1662         AArch64::LDRXui  },
1663       { AArch64::LDRSBWroX, AArch64::LDRSHWroX, AArch64::LDRWroX,
1664         AArch64::LDRXroX },
1665       { AArch64::LDRSBXroX, AArch64::LDRSHXroX, AArch64::LDRSWroX,
1666         AArch64::LDRXroX },
1667       { AArch64::LDRSBWroW, AArch64::LDRSHWroW, AArch64::LDRWroW,
1668         AArch64::LDRXroW },
1669       { AArch64::LDRSBXroW, AArch64::LDRSHXroW, AArch64::LDRSWroW,
1670         AArch64::LDRXroW }
1671     },
1672     // Zero-extend.
1673     { { AArch64::LDURBBi,   AArch64::LDURHHi,   AArch64::LDURWi,
1674         AArch64::LDURXi  },
1675       { AArch64::LDURBBi,   AArch64::LDURHHi,   AArch64::LDURWi,
1676         AArch64::LDURXi  },
1677       { AArch64::LDRBBui,   AArch64::LDRHHui,   AArch64::LDRWui,
1678         AArch64::LDRXui  },
1679       { AArch64::LDRBBui,   AArch64::LDRHHui,   AArch64::LDRWui,
1680         AArch64::LDRXui  },
1681       { AArch64::LDRBBroX,  AArch64::LDRHHroX,  AArch64::LDRWroX,
1682         AArch64::LDRXroX },
1683       { AArch64::LDRBBroX,  AArch64::LDRHHroX,  AArch64::LDRWroX,
1684         AArch64::LDRXroX },
1685       { AArch64::LDRBBroW,  AArch64::LDRHHroW,  AArch64::LDRWroW,
1686         AArch64::LDRXroW },
1687       { AArch64::LDRBBroW,  AArch64::LDRHHroW,  AArch64::LDRWroW,
1688         AArch64::LDRXroW }
1689     }
1690   };
1691
1692   static const unsigned FPOpcTable[4][2] = {
1693     { AArch64::LDURSi,  AArch64::LDURDi  },
1694     { AArch64::LDRSui,  AArch64::LDRDui  },
1695     { AArch64::LDRSroX, AArch64::LDRDroX },
1696     { AArch64::LDRSroW, AArch64::LDRDroW }
1697   };
1698
1699   unsigned Opc;
1700   const TargetRegisterClass *RC;
1701   bool UseRegOffset = Addr.isRegBase() && !Addr.getOffset() && Addr.getReg() &&
1702                       Addr.getOffsetReg();
1703   unsigned Idx = UseRegOffset ? 2 : UseScaled ? 1 : 0;
1704   if (Addr.getExtendType() == AArch64_AM::UXTW ||
1705       Addr.getExtendType() == AArch64_AM::SXTW)
1706     Idx++;
1707
1708   bool IsRet64Bit = RetVT == MVT::i64;
1709   switch (VT.SimpleTy) {
1710   default:
1711     llvm_unreachable("Unexpected value type.");
1712   case MVT::i1: // Intentional fall-through.
1713   case MVT::i8:
1714     Opc = GPOpcTable[WantZExt][2 * Idx + IsRet64Bit][0];
1715     RC = (IsRet64Bit && !WantZExt) ?
1716              &AArch64::GPR64RegClass: &AArch64::GPR32RegClass;
1717     break;
1718   case MVT::i16:
1719     Opc = GPOpcTable[WantZExt][2 * Idx + IsRet64Bit][1];
1720     RC = (IsRet64Bit && !WantZExt) ?
1721              &AArch64::GPR64RegClass: &AArch64::GPR32RegClass;
1722     break;
1723   case MVT::i32:
1724     Opc = GPOpcTable[WantZExt][2 * Idx + IsRet64Bit][2];
1725     RC = (IsRet64Bit && !WantZExt) ?
1726              &AArch64::GPR64RegClass: &AArch64::GPR32RegClass;
1727     break;
1728   case MVT::i64:
1729     Opc = GPOpcTable[WantZExt][2 * Idx + IsRet64Bit][3];
1730     RC = &AArch64::GPR64RegClass;
1731     break;
1732   case MVT::f32:
1733     Opc = FPOpcTable[Idx][0];
1734     RC = &AArch64::FPR32RegClass;
1735     break;
1736   case MVT::f64:
1737     Opc = FPOpcTable[Idx][1];
1738     RC = &AArch64::FPR64RegClass;
1739     break;
1740   }
1741
1742   // Create the base instruction, then add the operands.
1743   ResultReg = createResultReg(RC);
1744   MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
1745                                     TII.get(Opc), ResultReg);
1746   addLoadStoreOperands(Addr, MIB, MachineMemOperand::MOLoad, ScaleFactor, MMO);
1747
1748   // For zero-extending loads to 64bit we emit a 32bit load and then convert
1749   // the w-reg to an x-reg. In the end this is just an noop and will be removed.
1750   if (WantZExt && RetVT == MVT::i64 && VT <= MVT::i32) {
1751     unsigned Reg64 = createResultReg(&AArch64::GPR64RegClass);
1752     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
1753             TII.get(AArch64::SUBREG_TO_REG), Reg64)
1754         .addImm(0)
1755         .addReg(ResultReg, getKillRegState(true))
1756         .addImm(AArch64::sub_32);
1757     ResultReg = Reg64;
1758   }
1759
1760   // Loading an i1 requires special handling.
1761   if (VT == MVT::i1) {
1762     unsigned ANDReg = emitAnd_ri(IsRet64Bit ? MVT::i64 : MVT::i32, ResultReg,
1763                                  /*IsKill=*/true, 1);
1764     assert(ANDReg && "Unexpected AND instruction emission failure.");
1765     ResultReg = ANDReg;
1766   }
1767   return true;
1768 }
1769
1770 bool AArch64FastISel::selectAddSub(const Instruction *I) {
1771   MVT VT;
1772   if (!isTypeSupported(I->getType(), VT, /*IsVectorAllowed=*/true))
1773     return false;
1774
1775   if (VT.isVector())
1776     return selectOperator(I, I->getOpcode());
1777
1778   unsigned ResultReg;
1779   switch (I->getOpcode()) {
1780   default:
1781     llvm_unreachable("Unexpected instruction.");
1782   case Instruction::Add:
1783     ResultReg = emitAdd(VT, I->getOperand(0), I->getOperand(1));
1784     break;
1785   case Instruction::Sub:
1786     ResultReg = emitSub(VT, I->getOperand(0), I->getOperand(1));
1787     break;
1788   }
1789   if (!ResultReg)
1790     return false;
1791
1792   updateValueMap(I, ResultReg);
1793   return true;
1794 }
1795
1796 bool AArch64FastISel::selectLogicalOp(const Instruction *I) {
1797   MVT VT;
1798   if (!isTypeSupported(I->getType(), VT, /*IsVectorAllowed=*/true))
1799     return false;
1800
1801   if (VT.isVector())
1802     return selectOperator(I, I->getOpcode());
1803
1804   unsigned ResultReg;
1805   switch (I->getOpcode()) {
1806   default:
1807     llvm_unreachable("Unexpected instruction.");
1808   case Instruction::And:
1809     ResultReg = emitLogicalOp(ISD::AND, VT, I->getOperand(0), I->getOperand(1));
1810     break;
1811   case Instruction::Or:
1812     ResultReg = emitLogicalOp(ISD::OR, VT, I->getOperand(0), I->getOperand(1));
1813     break;
1814   case Instruction::Xor:
1815     ResultReg = emitLogicalOp(ISD::XOR, VT, I->getOperand(0), I->getOperand(1));
1816     break;
1817   }
1818   if (!ResultReg)
1819     return false;
1820
1821   updateValueMap(I, ResultReg);
1822   return true;
1823 }
1824
1825 bool AArch64FastISel::selectLoad(const Instruction *I) {
1826   MVT VT;
1827   // Verify we have a legal type before going any further.  Currently, we handle
1828   // simple types that will directly fit in a register (i32/f32/i64/f64) or
1829   // those that can be sign or zero-extended to a basic operation (i1/i8/i16).
1830   if (!isTypeSupported(I->getType(), VT, /*IsVectorAllowed=*/true) ||
1831       cast<LoadInst>(I)->isAtomic())
1832     return false;
1833
1834   // See if we can handle this address.
1835   Address Addr;
1836   if (!computeAddress(I->getOperand(0), Addr, I->getType()))
1837     return false;
1838
1839   bool WantZExt = true;
1840   MVT RetVT = VT;
1841   if (I->hasOneUse()) {
1842     if (const auto *ZE = dyn_cast<ZExtInst>(I->use_begin()->getUser())) {
1843       if (!isTypeSupported(ZE->getType(), RetVT, /*IsVectorAllowed=*/false))
1844         RetVT = VT;
1845     } else if (const auto *SE = dyn_cast<SExtInst>(I->use_begin()->getUser())) {
1846       if (!isTypeSupported(SE->getType(), RetVT, /*IsVectorAllowed=*/false))
1847         RetVT = VT;
1848       WantZExt = false;
1849     }
1850   }
1851
1852   unsigned ResultReg;
1853   if (!emitLoad(VT, RetVT, ResultReg, Addr, WantZExt,
1854                 createMachineMemOperandFor(I)))
1855     return false;
1856
1857   updateValueMap(I, ResultReg);
1858   return true;
1859 }
1860
1861 bool AArch64FastISel::emitStore(MVT VT, unsigned SrcReg, Address Addr,
1862                                 MachineMemOperand *MMO) {
1863   // Simplify this down to something we can handle.
1864   if (!simplifyAddress(Addr, VT))
1865     return false;
1866
1867   unsigned ScaleFactor = getImplicitScaleFactor(VT);
1868   if (!ScaleFactor)
1869     llvm_unreachable("Unexpected value type.");
1870
1871   // Negative offsets require unscaled, 9-bit, signed immediate offsets.
1872   // Otherwise, we try using scaled, 12-bit, unsigned immediate offsets.
1873   bool UseScaled = true;
1874   if ((Addr.getOffset() < 0) || (Addr.getOffset() & (ScaleFactor - 1))) {
1875     UseScaled = false;
1876     ScaleFactor = 1;
1877   }
1878
1879   static const unsigned OpcTable[4][6] = {
1880     { AArch64::STURBBi,  AArch64::STURHHi,  AArch64::STURWi,  AArch64::STURXi,
1881       AArch64::STURSi,   AArch64::STURDi },
1882     { AArch64::STRBBui,  AArch64::STRHHui,  AArch64::STRWui,  AArch64::STRXui,
1883       AArch64::STRSui,   AArch64::STRDui },
1884     { AArch64::STRBBroX, AArch64::STRHHroX, AArch64::STRWroX, AArch64::STRXroX,
1885       AArch64::STRSroX,  AArch64::STRDroX },
1886     { AArch64::STRBBroW, AArch64::STRHHroW, AArch64::STRWroW, AArch64::STRXroW,
1887       AArch64::STRSroW,  AArch64::STRDroW }
1888   };
1889
1890   unsigned Opc;
1891   bool VTIsi1 = false;
1892   bool UseRegOffset = Addr.isRegBase() && !Addr.getOffset() && Addr.getReg() &&
1893                       Addr.getOffsetReg();
1894   unsigned Idx = UseRegOffset ? 2 : UseScaled ? 1 : 0;
1895   if (Addr.getExtendType() == AArch64_AM::UXTW ||
1896       Addr.getExtendType() == AArch64_AM::SXTW)
1897     Idx++;
1898
1899   switch (VT.SimpleTy) {
1900   default: llvm_unreachable("Unexpected value type.");
1901   case MVT::i1:  VTIsi1 = true;
1902   case MVT::i8:  Opc = OpcTable[Idx][0]; break;
1903   case MVT::i16: Opc = OpcTable[Idx][1]; break;
1904   case MVT::i32: Opc = OpcTable[Idx][2]; break;
1905   case MVT::i64: Opc = OpcTable[Idx][3]; break;
1906   case MVT::f32: Opc = OpcTable[Idx][4]; break;
1907   case MVT::f64: Opc = OpcTable[Idx][5]; break;
1908   }
1909
1910   // Storing an i1 requires special handling.
1911   if (VTIsi1 && SrcReg != AArch64::WZR) {
1912     unsigned ANDReg = emitAnd_ri(MVT::i32, SrcReg, /*TODO:IsKill=*/false, 1);
1913     assert(ANDReg && "Unexpected AND instruction emission failure.");
1914     SrcReg = ANDReg;
1915   }
1916   // Create the base instruction, then add the operands.
1917   const MCInstrDesc &II = TII.get(Opc);
1918   SrcReg = constrainOperandRegClass(II, SrcReg, II.getNumDefs());
1919   MachineInstrBuilder MIB =
1920       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II).addReg(SrcReg);
1921   addLoadStoreOperands(Addr, MIB, MachineMemOperand::MOStore, ScaleFactor, MMO);
1922
1923   return true;
1924 }
1925
1926 bool AArch64FastISel::selectStore(const Instruction *I) {
1927   MVT VT;
1928   const Value *Op0 = I->getOperand(0);
1929   // Verify we have a legal type before going any further.  Currently, we handle
1930   // simple types that will directly fit in a register (i32/f32/i64/f64) or
1931   // those that can be sign or zero-extended to a basic operation (i1/i8/i16).
1932   if (!isTypeSupported(Op0->getType(), VT, /*IsVectorAllowed=*/true) ||
1933       cast<StoreInst>(I)->isAtomic())
1934     return false;
1935
1936   // Get the value to be stored into a register. Use the zero register directly
1937   // when possible to avoid an unnecessary copy and a wasted register.
1938   unsigned SrcReg = 0;
1939   if (const auto *CI = dyn_cast<ConstantInt>(Op0)) {
1940     if (CI->isZero())
1941       SrcReg = (VT == MVT::i64) ? AArch64::XZR : AArch64::WZR;
1942   } else if (const auto *CF = dyn_cast<ConstantFP>(Op0)) {
1943     if (CF->isZero() && !CF->isNegative()) {
1944       VT = MVT::getIntegerVT(VT.getSizeInBits());
1945       SrcReg = (VT == MVT::i64) ? AArch64::XZR : AArch64::WZR;
1946     }
1947   }
1948
1949   if (!SrcReg)
1950     SrcReg = getRegForValue(Op0);
1951
1952   if (!SrcReg)
1953     return false;
1954
1955   // See if we can handle this address.
1956   Address Addr;
1957   if (!computeAddress(I->getOperand(1), Addr, I->getOperand(0)->getType()))
1958     return false;
1959
1960   if (!emitStore(VT, SrcReg, Addr, createMachineMemOperandFor(I)))
1961     return false;
1962   return true;
1963 }
1964
1965 static AArch64CC::CondCode getCompareCC(CmpInst::Predicate Pred) {
1966   switch (Pred) {
1967   case CmpInst::FCMP_ONE:
1968   case CmpInst::FCMP_UEQ:
1969   default:
1970     // AL is our "false" for now. The other two need more compares.
1971     return AArch64CC::AL;
1972   case CmpInst::ICMP_EQ:
1973   case CmpInst::FCMP_OEQ:
1974     return AArch64CC::EQ;
1975   case CmpInst::ICMP_SGT:
1976   case CmpInst::FCMP_OGT:
1977     return AArch64CC::GT;
1978   case CmpInst::ICMP_SGE:
1979   case CmpInst::FCMP_OGE:
1980     return AArch64CC::GE;
1981   case CmpInst::ICMP_UGT:
1982   case CmpInst::FCMP_UGT:
1983     return AArch64CC::HI;
1984   case CmpInst::FCMP_OLT:
1985     return AArch64CC::MI;
1986   case CmpInst::ICMP_ULE:
1987   case CmpInst::FCMP_OLE:
1988     return AArch64CC::LS;
1989   case CmpInst::FCMP_ORD:
1990     return AArch64CC::VC;
1991   case CmpInst::FCMP_UNO:
1992     return AArch64CC::VS;
1993   case CmpInst::FCMP_UGE:
1994     return AArch64CC::PL;
1995   case CmpInst::ICMP_SLT:
1996   case CmpInst::FCMP_ULT:
1997     return AArch64CC::LT;
1998   case CmpInst::ICMP_SLE:
1999   case CmpInst::FCMP_ULE:
2000     return AArch64CC::LE;
2001   case CmpInst::FCMP_UNE:
2002   case CmpInst::ICMP_NE:
2003     return AArch64CC::NE;
2004   case CmpInst::ICMP_UGE:
2005     return AArch64CC::HS;
2006   case CmpInst::ICMP_ULT:
2007     return AArch64CC::LO;
2008   }
2009 }
2010
2011 /// \brief Try to emit a combined compare-and-branch instruction.
2012 bool AArch64FastISel::emitCompareAndBranch(const BranchInst *BI) {
2013   assert(isa<CmpInst>(BI->getCondition()) && "Expected cmp instruction");
2014   const CmpInst *CI = cast<CmpInst>(BI->getCondition());
2015   CmpInst::Predicate Predicate = optimizeCmpPredicate(CI);
2016
2017   const Value *LHS = CI->getOperand(0);
2018   const Value *RHS = CI->getOperand(1);
2019
2020   Type *Ty = LHS->getType();
2021     if (!Ty->isIntegerTy())
2022       return false;
2023
2024   unsigned BW = cast<IntegerType>(Ty)->getBitWidth();
2025   if (BW != 1 && BW != 8 && BW != 16 && BW != 32 && BW != 64)
2026     return false;
2027
2028   MachineBasicBlock *TBB = FuncInfo.MBBMap[BI->getSuccessor(0)];
2029   MachineBasicBlock *FBB = FuncInfo.MBBMap[BI->getSuccessor(1)];
2030
2031   // Try to take advantage of fallthrough opportunities.
2032   if (FuncInfo.MBB->isLayoutSuccessor(TBB)) {
2033     std::swap(TBB, FBB);
2034     Predicate = CmpInst::getInversePredicate(Predicate);
2035   }
2036
2037   int TestBit = -1;
2038   bool IsCmpNE;
2039   if ((Predicate == CmpInst::ICMP_EQ) || (Predicate == CmpInst::ICMP_NE)) {
2040     if (const auto *C = dyn_cast<ConstantInt>(LHS))
2041       if (C->isNullValue())
2042         std::swap(LHS, RHS);
2043
2044     if (!isa<ConstantInt>(RHS))
2045       return false;
2046
2047     if (!cast<ConstantInt>(RHS)->isNullValue())
2048       return false;
2049
2050     if (const auto *AI = dyn_cast<BinaryOperator>(LHS))
2051       if (AI->getOpcode() == Instruction::And) {
2052         const Value *AndLHS = AI->getOperand(0);
2053         const Value *AndRHS = AI->getOperand(1);
2054
2055         if (const auto *C = dyn_cast<ConstantInt>(AndLHS))
2056           if (C->getValue().isPowerOf2())
2057             std::swap(AndLHS, AndRHS);
2058
2059         if (const auto *C = dyn_cast<ConstantInt>(AndRHS))
2060           if (C->getValue().isPowerOf2()) {
2061             TestBit = C->getValue().logBase2();
2062             LHS = AndLHS;
2063           }
2064       }
2065     IsCmpNE = Predicate == CmpInst::ICMP_NE;
2066   } else if (Predicate == CmpInst::ICMP_SLT) {
2067     if (!isa<ConstantInt>(RHS))
2068       return false;
2069
2070     if (!cast<ConstantInt>(RHS)->isNullValue())
2071       return false;
2072
2073     TestBit = BW - 1;
2074     IsCmpNE = true;
2075   } else if (Predicate == CmpInst::ICMP_SGT) {
2076     if (!isa<ConstantInt>(RHS))
2077       return false;
2078
2079     if (cast<ConstantInt>(RHS)->getValue() != -1)
2080       return false;
2081
2082     TestBit = BW - 1;
2083     IsCmpNE = false;
2084   } else
2085     return false;
2086
2087   static const unsigned OpcTable[2][2][2] = {
2088     { {AArch64::CBZW,  AArch64::CBZX },
2089       {AArch64::CBNZW, AArch64::CBNZX} },
2090     { {AArch64::TBZW,  AArch64::TBZX },
2091       {AArch64::TBNZW, AArch64::TBNZX} }
2092   };
2093
2094   bool IsBitTest = TestBit != -1;
2095   bool Is64Bit = BW == 64;
2096   if (TestBit < 32 && TestBit >= 0)
2097     Is64Bit = false;
2098
2099   unsigned Opc = OpcTable[IsBitTest][IsCmpNE][Is64Bit];
2100   const MCInstrDesc &II = TII.get(Opc);
2101
2102   unsigned SrcReg = getRegForValue(LHS);
2103   if (!SrcReg)
2104     return false;
2105   bool SrcIsKill = hasTrivialKill(LHS);
2106
2107   if (BW == 64 && !Is64Bit) {
2108     SrcReg = fastEmitInst_extractsubreg(MVT::i32, SrcReg, SrcIsKill,
2109                                         AArch64::sub_32);
2110     SrcReg = constrainOperandRegClass(II, SrcReg,  II.getNumDefs());
2111   }
2112
2113   // Emit the combined compare and branch instruction.
2114   MachineInstrBuilder MIB =
2115       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc))
2116           .addReg(SrcReg, getKillRegState(SrcIsKill));
2117   if (IsBitTest)
2118     MIB.addImm(TestBit);
2119   MIB.addMBB(TBB);
2120
2121   // Obtain the branch weight and add the TrueBB to the successor list.
2122   uint32_t BranchWeight = 0;
2123   if (FuncInfo.BPI)
2124     BranchWeight = FuncInfo.BPI->getEdgeWeight(BI->getParent(),
2125                                                TBB->getBasicBlock());
2126   FuncInfo.MBB->addSuccessor(TBB, BranchWeight);
2127   fastEmitBranch(FBB, DbgLoc);
2128
2129   return true;
2130 }
2131
2132 bool AArch64FastISel::selectBranch(const Instruction *I) {
2133   const BranchInst *BI = cast<BranchInst>(I);
2134   if (BI->isUnconditional()) {
2135     MachineBasicBlock *MSucc = FuncInfo.MBBMap[BI->getSuccessor(0)];
2136     fastEmitBranch(MSucc, BI->getDebugLoc());
2137     return true;
2138   }
2139
2140   MachineBasicBlock *TBB = FuncInfo.MBBMap[BI->getSuccessor(0)];
2141   MachineBasicBlock *FBB = FuncInfo.MBBMap[BI->getSuccessor(1)];
2142
2143   AArch64CC::CondCode CC = AArch64CC::NE;
2144   if (const CmpInst *CI = dyn_cast<CmpInst>(BI->getCondition())) {
2145     if (CI->hasOneUse() && isValueAvailable(CI)) {
2146       // Try to optimize or fold the cmp.
2147       CmpInst::Predicate Predicate = optimizeCmpPredicate(CI);
2148       switch (Predicate) {
2149       default:
2150         break;
2151       case CmpInst::FCMP_FALSE:
2152         fastEmitBranch(FBB, DbgLoc);
2153         return true;
2154       case CmpInst::FCMP_TRUE:
2155         fastEmitBranch(TBB, DbgLoc);
2156         return true;
2157       }
2158
2159       // Try to emit a combined compare-and-branch first.
2160       if (emitCompareAndBranch(BI))
2161         return true;
2162
2163       // Try to take advantage of fallthrough opportunities.
2164       if (FuncInfo.MBB->isLayoutSuccessor(TBB)) {
2165         std::swap(TBB, FBB);
2166         Predicate = CmpInst::getInversePredicate(Predicate);
2167       }
2168
2169       // Emit the cmp.
2170       if (!emitCmp(CI->getOperand(0), CI->getOperand(1), CI->isUnsigned()))
2171         return false;
2172
2173       // FCMP_UEQ and FCMP_ONE cannot be checked with a single branch
2174       // instruction.
2175       CC = getCompareCC(Predicate);
2176       AArch64CC::CondCode ExtraCC = AArch64CC::AL;
2177       switch (Predicate) {
2178       default:
2179         break;
2180       case CmpInst::FCMP_UEQ:
2181         ExtraCC = AArch64CC::EQ;
2182         CC = AArch64CC::VS;
2183         break;
2184       case CmpInst::FCMP_ONE:
2185         ExtraCC = AArch64CC::MI;
2186         CC = AArch64CC::GT;
2187         break;
2188       }
2189       assert((CC != AArch64CC::AL) && "Unexpected condition code.");
2190
2191       // Emit the extra branch for FCMP_UEQ and FCMP_ONE.
2192       if (ExtraCC != AArch64CC::AL) {
2193         BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::Bcc))
2194             .addImm(ExtraCC)
2195             .addMBB(TBB);
2196       }
2197
2198       // Emit the branch.
2199       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::Bcc))
2200           .addImm(CC)
2201           .addMBB(TBB);
2202
2203       // Obtain the branch weight and add the TrueBB to the successor list.
2204       uint32_t BranchWeight = 0;
2205       if (FuncInfo.BPI)
2206         BranchWeight = FuncInfo.BPI->getEdgeWeight(BI->getParent(),
2207                                                   TBB->getBasicBlock());
2208       FuncInfo.MBB->addSuccessor(TBB, BranchWeight);
2209
2210       fastEmitBranch(FBB, DbgLoc);
2211       return true;
2212     }
2213   } else if (TruncInst *TI = dyn_cast<TruncInst>(BI->getCondition())) {
2214     MVT SrcVT;
2215     if (TI->hasOneUse() && isValueAvailable(TI) &&
2216         isTypeSupported(TI->getOperand(0)->getType(), SrcVT)) {
2217       unsigned CondReg = getRegForValue(TI->getOperand(0));
2218       if (!CondReg)
2219         return false;
2220       bool CondIsKill = hasTrivialKill(TI->getOperand(0));
2221
2222       // Issue an extract_subreg to get the lower 32-bits.
2223       if (SrcVT == MVT::i64) {
2224         CondReg = fastEmitInst_extractsubreg(MVT::i32, CondReg, CondIsKill,
2225                                              AArch64::sub_32);
2226         CondIsKill = true;
2227       }
2228
2229       unsigned ANDReg = emitAnd_ri(MVT::i32, CondReg, CondIsKill, 1);
2230       assert(ANDReg && "Unexpected AND instruction emission failure.");
2231       emitICmp_ri(MVT::i32, ANDReg, /*IsKill=*/true, 0);
2232
2233       if (FuncInfo.MBB->isLayoutSuccessor(TBB)) {
2234         std::swap(TBB, FBB);
2235         CC = AArch64CC::EQ;
2236       }
2237       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::Bcc))
2238           .addImm(CC)
2239           .addMBB(TBB);
2240
2241       // Obtain the branch weight and add the TrueBB to the successor list.
2242       uint32_t BranchWeight = 0;
2243       if (FuncInfo.BPI)
2244         BranchWeight = FuncInfo.BPI->getEdgeWeight(BI->getParent(),
2245                                                   TBB->getBasicBlock());
2246       FuncInfo.MBB->addSuccessor(TBB, BranchWeight);
2247
2248       fastEmitBranch(FBB, DbgLoc);
2249       return true;
2250     }
2251   } else if (const auto *CI = dyn_cast<ConstantInt>(BI->getCondition())) {
2252     uint64_t Imm = CI->getZExtValue();
2253     MachineBasicBlock *Target = (Imm == 0) ? FBB : TBB;
2254     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::B))
2255         .addMBB(Target);
2256
2257     // Obtain the branch weight and add the target to the successor list.
2258     uint32_t BranchWeight = 0;
2259     if (FuncInfo.BPI)
2260       BranchWeight = FuncInfo.BPI->getEdgeWeight(BI->getParent(),
2261                                                  Target->getBasicBlock());
2262     FuncInfo.MBB->addSuccessor(Target, BranchWeight);
2263     return true;
2264   } else if (foldXALUIntrinsic(CC, I, BI->getCondition())) {
2265     // Fake request the condition, otherwise the intrinsic might be completely
2266     // optimized away.
2267     unsigned CondReg = getRegForValue(BI->getCondition());
2268     if (!CondReg)
2269       return false;
2270
2271     // Emit the branch.
2272     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::Bcc))
2273       .addImm(CC)
2274       .addMBB(TBB);
2275
2276     // Obtain the branch weight and add the TrueBB to the successor list.
2277     uint32_t BranchWeight = 0;
2278     if (FuncInfo.BPI)
2279       BranchWeight = FuncInfo.BPI->getEdgeWeight(BI->getParent(),
2280                                                  TBB->getBasicBlock());
2281     FuncInfo.MBB->addSuccessor(TBB, BranchWeight);
2282
2283     fastEmitBranch(FBB, DbgLoc);
2284     return true;
2285   }
2286
2287   unsigned CondReg = getRegForValue(BI->getCondition());
2288   if (CondReg == 0)
2289     return false;
2290   bool CondRegIsKill = hasTrivialKill(BI->getCondition());
2291
2292   // We've been divorced from our compare!  Our block was split, and
2293   // now our compare lives in a predecessor block.  We musn't
2294   // re-compare here, as the children of the compare aren't guaranteed
2295   // live across the block boundary (we *could* check for this).
2296   // Regardless, the compare has been done in the predecessor block,
2297   // and it left a value for us in a virtual register.  Ergo, we test
2298   // the one-bit value left in the virtual register.
2299   emitICmp_ri(MVT::i32, CondReg, CondRegIsKill, 0);
2300
2301   if (FuncInfo.MBB->isLayoutSuccessor(TBB)) {
2302     std::swap(TBB, FBB);
2303     CC = AArch64CC::EQ;
2304   }
2305
2306   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::Bcc))
2307       .addImm(CC)
2308       .addMBB(TBB);
2309
2310   // Obtain the branch weight and add the TrueBB to the successor list.
2311   uint32_t BranchWeight = 0;
2312   if (FuncInfo.BPI)
2313     BranchWeight = FuncInfo.BPI->getEdgeWeight(BI->getParent(),
2314                                                TBB->getBasicBlock());
2315   FuncInfo.MBB->addSuccessor(TBB, BranchWeight);
2316
2317   fastEmitBranch(FBB, DbgLoc);
2318   return true;
2319 }
2320
2321 bool AArch64FastISel::selectIndirectBr(const Instruction *I) {
2322   const IndirectBrInst *BI = cast<IndirectBrInst>(I);
2323   unsigned AddrReg = getRegForValue(BI->getOperand(0));
2324   if (AddrReg == 0)
2325     return false;
2326
2327   // Emit the indirect branch.
2328   const MCInstrDesc &II = TII.get(AArch64::BR);
2329   AddrReg = constrainOperandRegClass(II, AddrReg,  II.getNumDefs());
2330   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II).addReg(AddrReg);
2331
2332   // Make sure the CFG is up-to-date.
2333   for (unsigned i = 0, e = BI->getNumSuccessors(); i != e; ++i)
2334     FuncInfo.MBB->addSuccessor(FuncInfo.MBBMap[BI->getSuccessor(i)]);
2335
2336   return true;
2337 }
2338
2339 bool AArch64FastISel::selectCmp(const Instruction *I) {
2340   const CmpInst *CI = cast<CmpInst>(I);
2341
2342   // Try to optimize or fold the cmp.
2343   CmpInst::Predicate Predicate = optimizeCmpPredicate(CI);
2344   unsigned ResultReg = 0;
2345   switch (Predicate) {
2346   default:
2347     break;
2348   case CmpInst::FCMP_FALSE:
2349     ResultReg = createResultReg(&AArch64::GPR32RegClass);
2350     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2351             TII.get(TargetOpcode::COPY), ResultReg)
2352         .addReg(AArch64::WZR, getKillRegState(true));
2353     break;
2354   case CmpInst::FCMP_TRUE:
2355     ResultReg = fastEmit_i(MVT::i32, MVT::i32, ISD::Constant, 1);
2356     break;
2357   }
2358
2359   if (ResultReg) {
2360     updateValueMap(I, ResultReg);
2361     return true;
2362   }
2363
2364   // Emit the cmp.
2365   if (!emitCmp(CI->getOperand(0), CI->getOperand(1), CI->isUnsigned()))
2366     return false;
2367
2368   ResultReg = createResultReg(&AArch64::GPR32RegClass);
2369
2370   // FCMP_UEQ and FCMP_ONE cannot be checked with a single instruction. These
2371   // condition codes are inverted, because they are used by CSINC.
2372   static unsigned CondCodeTable[2][2] = {
2373     { AArch64CC::NE, AArch64CC::VC },
2374     { AArch64CC::PL, AArch64CC::LE }
2375   };
2376   unsigned *CondCodes = nullptr;
2377   switch (Predicate) {
2378   default:
2379     break;
2380   case CmpInst::FCMP_UEQ:
2381     CondCodes = &CondCodeTable[0][0];
2382     break;
2383   case CmpInst::FCMP_ONE:
2384     CondCodes = &CondCodeTable[1][0];
2385     break;
2386   }
2387
2388   if (CondCodes) {
2389     unsigned TmpReg1 = createResultReg(&AArch64::GPR32RegClass);
2390     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::CSINCWr),
2391             TmpReg1)
2392         .addReg(AArch64::WZR, getKillRegState(true))
2393         .addReg(AArch64::WZR, getKillRegState(true))
2394         .addImm(CondCodes[0]);
2395     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::CSINCWr),
2396             ResultReg)
2397         .addReg(TmpReg1, getKillRegState(true))
2398         .addReg(AArch64::WZR, getKillRegState(true))
2399         .addImm(CondCodes[1]);
2400
2401     updateValueMap(I, ResultReg);
2402     return true;
2403   }
2404
2405   // Now set a register based on the comparison.
2406   AArch64CC::CondCode CC = getCompareCC(Predicate);
2407   assert((CC != AArch64CC::AL) && "Unexpected condition code.");
2408   AArch64CC::CondCode invertedCC = getInvertedCondCode(CC);
2409   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::CSINCWr),
2410           ResultReg)
2411       .addReg(AArch64::WZR, getKillRegState(true))
2412       .addReg(AArch64::WZR, getKillRegState(true))
2413       .addImm(invertedCC);
2414
2415   updateValueMap(I, ResultReg);
2416   return true;
2417 }
2418
2419 bool AArch64FastISel::selectSelect(const Instruction *I) {
2420   const SelectInst *SI = cast<SelectInst>(I);
2421
2422   EVT DestEVT = TLI.getValueType(SI->getType(), true);
2423   if (!DestEVT.isSimple())
2424     return false;
2425
2426   MVT DestVT = DestEVT.getSimpleVT();
2427   if (DestVT != MVT::i32 && DestVT != MVT::i64 && DestVT != MVT::f32 &&
2428       DestVT != MVT::f64)
2429     return false;
2430
2431   unsigned SelectOpc;
2432   const TargetRegisterClass *RC = nullptr;
2433   switch (DestVT.SimpleTy) {
2434   default: return false;
2435   case MVT::i32:
2436     SelectOpc = AArch64::CSELWr;    RC = &AArch64::GPR32RegClass; break;
2437   case MVT::i64:
2438     SelectOpc = AArch64::CSELXr;    RC = &AArch64::GPR64RegClass; break;
2439   case MVT::f32:
2440     SelectOpc = AArch64::FCSELSrrr; RC = &AArch64::FPR32RegClass; break;
2441   case MVT::f64:
2442     SelectOpc = AArch64::FCSELDrrr; RC = &AArch64::FPR64RegClass; break;
2443   }
2444
2445   const Value *Cond = SI->getCondition();
2446   bool NeedTest = true;
2447   AArch64CC::CondCode CC = AArch64CC::NE;
2448   if (foldXALUIntrinsic(CC, I, Cond))
2449     NeedTest = false;
2450
2451   unsigned CondReg = getRegForValue(Cond);
2452   if (!CondReg)
2453     return false;
2454   bool CondIsKill = hasTrivialKill(Cond);
2455
2456   if (NeedTest) {
2457     unsigned ANDReg = emitAnd_ri(MVT::i32, CondReg, CondIsKill, 1);
2458     assert(ANDReg && "Unexpected AND instruction emission failure.");
2459     emitICmp_ri(MVT::i32, ANDReg, /*IsKill=*/true, 0);
2460   }
2461
2462   unsigned TrueReg = getRegForValue(SI->getTrueValue());
2463   bool TrueIsKill = hasTrivialKill(SI->getTrueValue());
2464
2465   unsigned FalseReg = getRegForValue(SI->getFalseValue());
2466   bool FalseIsKill = hasTrivialKill(SI->getFalseValue());
2467
2468   if (!TrueReg || !FalseReg)
2469     return false;
2470
2471   unsigned ResultReg = fastEmitInst_rri(SelectOpc, RC, TrueReg, TrueIsKill,
2472                                         FalseReg, FalseIsKill, CC);
2473   updateValueMap(I, ResultReg);
2474   return true;
2475 }
2476
2477 bool AArch64FastISel::selectFPExt(const Instruction *I) {
2478   Value *V = I->getOperand(0);
2479   if (!I->getType()->isDoubleTy() || !V->getType()->isFloatTy())
2480     return false;
2481
2482   unsigned Op = getRegForValue(V);
2483   if (Op == 0)
2484     return false;
2485
2486   unsigned ResultReg = createResultReg(&AArch64::FPR64RegClass);
2487   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::FCVTDSr),
2488           ResultReg).addReg(Op);
2489   updateValueMap(I, ResultReg);
2490   return true;
2491 }
2492
2493 bool AArch64FastISel::selectFPTrunc(const Instruction *I) {
2494   Value *V = I->getOperand(0);
2495   if (!I->getType()->isFloatTy() || !V->getType()->isDoubleTy())
2496     return false;
2497
2498   unsigned Op = getRegForValue(V);
2499   if (Op == 0)
2500     return false;
2501
2502   unsigned ResultReg = createResultReg(&AArch64::FPR32RegClass);
2503   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::FCVTSDr),
2504           ResultReg).addReg(Op);
2505   updateValueMap(I, ResultReg);
2506   return true;
2507 }
2508
2509 // FPToUI and FPToSI
2510 bool AArch64FastISel::selectFPToInt(const Instruction *I, bool Signed) {
2511   MVT DestVT;
2512   if (!isTypeLegal(I->getType(), DestVT) || DestVT.isVector())
2513     return false;
2514
2515   unsigned SrcReg = getRegForValue(I->getOperand(0));
2516   if (SrcReg == 0)
2517     return false;
2518
2519   EVT SrcVT = TLI.getValueType(I->getOperand(0)->getType(), true);
2520   if (SrcVT == MVT::f128)
2521     return false;
2522
2523   unsigned Opc;
2524   if (SrcVT == MVT::f64) {
2525     if (Signed)
2526       Opc = (DestVT == MVT::i32) ? AArch64::FCVTZSUWDr : AArch64::FCVTZSUXDr;
2527     else
2528       Opc = (DestVT == MVT::i32) ? AArch64::FCVTZUUWDr : AArch64::FCVTZUUXDr;
2529   } else {
2530     if (Signed)
2531       Opc = (DestVT == MVT::i32) ? AArch64::FCVTZSUWSr : AArch64::FCVTZSUXSr;
2532     else
2533       Opc = (DestVT == MVT::i32) ? AArch64::FCVTZUUWSr : AArch64::FCVTZUUXSr;
2534   }
2535   unsigned ResultReg = createResultReg(
2536       DestVT == MVT::i32 ? &AArch64::GPR32RegClass : &AArch64::GPR64RegClass);
2537   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg)
2538       .addReg(SrcReg);
2539   updateValueMap(I, ResultReg);
2540   return true;
2541 }
2542
2543 bool AArch64FastISel::selectIntToFP(const Instruction *I, bool Signed) {
2544   MVT DestVT;
2545   if (!isTypeLegal(I->getType(), DestVT) || DestVT.isVector())
2546     return false;
2547   assert ((DestVT == MVT::f32 || DestVT == MVT::f64) &&
2548           "Unexpected value type.");
2549
2550   unsigned SrcReg = getRegForValue(I->getOperand(0));
2551   if (!SrcReg)
2552     return false;
2553   bool SrcIsKill = hasTrivialKill(I->getOperand(0));
2554
2555   EVT SrcVT = TLI.getValueType(I->getOperand(0)->getType(), true);
2556
2557   // Handle sign-extension.
2558   if (SrcVT == MVT::i16 || SrcVT == MVT::i8 || SrcVT == MVT::i1) {
2559     SrcReg =
2560         emitIntExt(SrcVT.getSimpleVT(), SrcReg, MVT::i32, /*isZExt*/ !Signed);
2561     if (!SrcReg)
2562       return false;
2563     SrcIsKill = true;
2564   }
2565
2566   unsigned Opc;
2567   if (SrcVT == MVT::i64) {
2568     if (Signed)
2569       Opc = (DestVT == MVT::f32) ? AArch64::SCVTFUXSri : AArch64::SCVTFUXDri;
2570     else
2571       Opc = (DestVT == MVT::f32) ? AArch64::UCVTFUXSri : AArch64::UCVTFUXDri;
2572   } else {
2573     if (Signed)
2574       Opc = (DestVT == MVT::f32) ? AArch64::SCVTFUWSri : AArch64::SCVTFUWDri;
2575     else
2576       Opc = (DestVT == MVT::f32) ? AArch64::UCVTFUWSri : AArch64::UCVTFUWDri;
2577   }
2578
2579   unsigned ResultReg = fastEmitInst_r(Opc, TLI.getRegClassFor(DestVT), SrcReg,
2580                                       SrcIsKill);
2581   updateValueMap(I, ResultReg);
2582   return true;
2583 }
2584
2585 bool AArch64FastISel::fastLowerArguments() {
2586   if (!FuncInfo.CanLowerReturn)
2587     return false;
2588
2589   const Function *F = FuncInfo.Fn;
2590   if (F->isVarArg())
2591     return false;
2592
2593   CallingConv::ID CC = F->getCallingConv();
2594   if (CC != CallingConv::C)
2595     return false;
2596
2597   // Only handle simple cases of up to 8 GPR and FPR each.
2598   unsigned GPRCnt = 0;
2599   unsigned FPRCnt = 0;
2600   unsigned Idx = 0;
2601   for (auto const &Arg : F->args()) {
2602     // The first argument is at index 1.
2603     ++Idx;
2604     if (F->getAttributes().hasAttribute(Idx, Attribute::ByVal) ||
2605         F->getAttributes().hasAttribute(Idx, Attribute::InReg) ||
2606         F->getAttributes().hasAttribute(Idx, Attribute::StructRet) ||
2607         F->getAttributes().hasAttribute(Idx, Attribute::Nest))
2608       return false;
2609
2610     Type *ArgTy = Arg.getType();
2611     if (ArgTy->isStructTy() || ArgTy->isArrayTy())
2612       return false;
2613
2614     EVT ArgVT = TLI.getValueType(ArgTy);
2615     if (!ArgVT.isSimple())
2616       return false;
2617
2618     MVT VT = ArgVT.getSimpleVT().SimpleTy;
2619     if (VT.isFloatingPoint() && !Subtarget->hasFPARMv8())
2620       return false;
2621
2622     if (VT.isVector() &&
2623         (!Subtarget->hasNEON() || !Subtarget->isLittleEndian()))
2624       return false;
2625
2626     if (VT >= MVT::i1 && VT <= MVT::i64)
2627       ++GPRCnt;
2628     else if ((VT >= MVT::f16 && VT <= MVT::f64) || VT.is64BitVector() ||
2629              VT.is128BitVector())
2630       ++FPRCnt;
2631     else
2632       return false;
2633
2634     if (GPRCnt > 8 || FPRCnt > 8)
2635       return false;
2636   }
2637
2638   static const MCPhysReg Registers[6][8] = {
2639     { AArch64::W0, AArch64::W1, AArch64::W2, AArch64::W3, AArch64::W4,
2640       AArch64::W5, AArch64::W6, AArch64::W7 },
2641     { AArch64::X0, AArch64::X1, AArch64::X2, AArch64::X3, AArch64::X4,
2642       AArch64::X5, AArch64::X6, AArch64::X7 },
2643     { AArch64::H0, AArch64::H1, AArch64::H2, AArch64::H3, AArch64::H4,
2644       AArch64::H5, AArch64::H6, AArch64::H7 },
2645     { AArch64::S0, AArch64::S1, AArch64::S2, AArch64::S3, AArch64::S4,
2646       AArch64::S5, AArch64::S6, AArch64::S7 },
2647     { AArch64::D0, AArch64::D1, AArch64::D2, AArch64::D3, AArch64::D4,
2648       AArch64::D5, AArch64::D6, AArch64::D7 },
2649     { AArch64::Q0, AArch64::Q1, AArch64::Q2, AArch64::Q3, AArch64::Q4,
2650       AArch64::Q5, AArch64::Q6, AArch64::Q7 }
2651   };
2652
2653   unsigned GPRIdx = 0;
2654   unsigned FPRIdx = 0;
2655   for (auto const &Arg : F->args()) {
2656     MVT VT = TLI.getSimpleValueType(Arg.getType());
2657     unsigned SrcReg;
2658     const TargetRegisterClass *RC;
2659     if (VT >= MVT::i1 && VT <= MVT::i32) {
2660       SrcReg = Registers[0][GPRIdx++];
2661       RC = &AArch64::GPR32RegClass;
2662       VT = MVT::i32;
2663     } else if (VT == MVT::i64) {
2664       SrcReg = Registers[1][GPRIdx++];
2665       RC = &AArch64::GPR64RegClass;
2666     } else if (VT == MVT::f16) {
2667       SrcReg = Registers[2][FPRIdx++];
2668       RC = &AArch64::FPR16RegClass;
2669     } else if (VT ==  MVT::f32) {
2670       SrcReg = Registers[3][FPRIdx++];
2671       RC = &AArch64::FPR32RegClass;
2672     } else if ((VT == MVT::f64) || VT.is64BitVector()) {
2673       SrcReg = Registers[4][FPRIdx++];
2674       RC = &AArch64::FPR64RegClass;
2675     } else if (VT.is128BitVector()) {
2676       SrcReg = Registers[5][FPRIdx++];
2677       RC = &AArch64::FPR128RegClass;
2678     } else
2679       llvm_unreachable("Unexpected value type.");
2680
2681     unsigned DstReg = FuncInfo.MF->addLiveIn(SrcReg, RC);
2682     // FIXME: Unfortunately it's necessary to emit a copy from the livein copy.
2683     // Without this, EmitLiveInCopies may eliminate the livein if its only
2684     // use is a bitcast (which isn't turned into an instruction).
2685     unsigned ResultReg = createResultReg(RC);
2686     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2687             TII.get(TargetOpcode::COPY), ResultReg)
2688         .addReg(DstReg, getKillRegState(true));
2689     updateValueMap(&Arg, ResultReg);
2690   }
2691   return true;
2692 }
2693
2694 bool AArch64FastISel::processCallArgs(CallLoweringInfo &CLI,
2695                                       SmallVectorImpl<MVT> &OutVTs,
2696                                       unsigned &NumBytes) {
2697   CallingConv::ID CC = CLI.CallConv;
2698   SmallVector<CCValAssign, 16> ArgLocs;
2699   CCState CCInfo(CC, false, *FuncInfo.MF, ArgLocs, *Context);
2700   CCInfo.AnalyzeCallOperands(OutVTs, CLI.OutFlags, CCAssignFnForCall(CC));
2701
2702   // Get a count of how many bytes are to be pushed on the stack.
2703   NumBytes = CCInfo.getNextStackOffset();
2704
2705   // Issue CALLSEQ_START
2706   unsigned AdjStackDown = TII.getCallFrameSetupOpcode();
2707   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AdjStackDown))
2708     .addImm(NumBytes);
2709
2710   // Process the args.
2711   for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
2712     CCValAssign &VA = ArgLocs[i];
2713     const Value *ArgVal = CLI.OutVals[VA.getValNo()];
2714     MVT ArgVT = OutVTs[VA.getValNo()];
2715
2716     unsigned ArgReg = getRegForValue(ArgVal);
2717     if (!ArgReg)
2718       return false;
2719
2720     // Handle arg promotion: SExt, ZExt, AExt.
2721     switch (VA.getLocInfo()) {
2722     case CCValAssign::Full:
2723       break;
2724     case CCValAssign::SExt: {
2725       MVT DestVT = VA.getLocVT();
2726       MVT SrcVT = ArgVT;
2727       ArgReg = emitIntExt(SrcVT, ArgReg, DestVT, /*isZExt=*/false);
2728       if (!ArgReg)
2729         return false;
2730       break;
2731     }
2732     case CCValAssign::AExt:
2733     // Intentional fall-through.
2734     case CCValAssign::ZExt: {
2735       MVT DestVT = VA.getLocVT();
2736       MVT SrcVT = ArgVT;
2737       ArgReg = emitIntExt(SrcVT, ArgReg, DestVT, /*isZExt=*/true);
2738       if (!ArgReg)
2739         return false;
2740       break;
2741     }
2742     default:
2743       llvm_unreachable("Unknown arg promotion!");
2744     }
2745
2746     // Now copy/store arg to correct locations.
2747     if (VA.isRegLoc() && !VA.needsCustom()) {
2748       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2749               TII.get(TargetOpcode::COPY), VA.getLocReg()).addReg(ArgReg);
2750       CLI.OutRegs.push_back(VA.getLocReg());
2751     } else if (VA.needsCustom()) {
2752       // FIXME: Handle custom args.
2753       return false;
2754     } else {
2755       assert(VA.isMemLoc() && "Assuming store on stack.");
2756
2757       // Don't emit stores for undef values.
2758       if (isa<UndefValue>(ArgVal))
2759         continue;
2760
2761       // Need to store on the stack.
2762       unsigned ArgSize = (ArgVT.getSizeInBits() + 7) / 8;
2763
2764       unsigned BEAlign = 0;
2765       if (ArgSize < 8 && !Subtarget->isLittleEndian())
2766         BEAlign = 8 - ArgSize;
2767
2768       Address Addr;
2769       Addr.setKind(Address::RegBase);
2770       Addr.setReg(AArch64::SP);
2771       Addr.setOffset(VA.getLocMemOffset() + BEAlign);
2772
2773       unsigned Alignment = DL.getABITypeAlignment(ArgVal->getType());
2774       MachineMemOperand *MMO = FuncInfo.MF->getMachineMemOperand(
2775         MachinePointerInfo::getStack(Addr.getOffset()),
2776         MachineMemOperand::MOStore, ArgVT.getStoreSize(), Alignment);
2777
2778       if (!emitStore(ArgVT, ArgReg, Addr, MMO))
2779         return false;
2780     }
2781   }
2782   return true;
2783 }
2784
2785 bool AArch64FastISel::finishCall(CallLoweringInfo &CLI, MVT RetVT,
2786                                  unsigned NumBytes) {
2787   CallingConv::ID CC = CLI.CallConv;
2788
2789   // Issue CALLSEQ_END
2790   unsigned AdjStackUp = TII.getCallFrameDestroyOpcode();
2791   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AdjStackUp))
2792     .addImm(NumBytes).addImm(0);
2793
2794   // Now the return value.
2795   if (RetVT != MVT::isVoid) {
2796     SmallVector<CCValAssign, 16> RVLocs;
2797     CCState CCInfo(CC, false, *FuncInfo.MF, RVLocs, *Context);
2798     CCInfo.AnalyzeCallResult(RetVT, CCAssignFnForCall(CC));
2799
2800     // Only handle a single return value.
2801     if (RVLocs.size() != 1)
2802       return false;
2803
2804     // Copy all of the result registers out of their specified physreg.
2805     MVT CopyVT = RVLocs[0].getValVT();
2806     unsigned ResultReg = createResultReg(TLI.getRegClassFor(CopyVT));
2807     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2808             TII.get(TargetOpcode::COPY), ResultReg)
2809         .addReg(RVLocs[0].getLocReg());
2810     CLI.InRegs.push_back(RVLocs[0].getLocReg());
2811
2812     CLI.ResultReg = ResultReg;
2813     CLI.NumResultRegs = 1;
2814   }
2815
2816   return true;
2817 }
2818
2819 bool AArch64FastISel::fastLowerCall(CallLoweringInfo &CLI) {
2820   CallingConv::ID CC  = CLI.CallConv;
2821   bool IsTailCall     = CLI.IsTailCall;
2822   bool IsVarArg       = CLI.IsVarArg;
2823   const Value *Callee = CLI.Callee;
2824   const char *SymName = CLI.SymName;
2825
2826   if (!Callee && !SymName)
2827     return false;
2828
2829   // Allow SelectionDAG isel to handle tail calls.
2830   if (IsTailCall)
2831     return false;
2832
2833   CodeModel::Model CM = TM.getCodeModel();
2834   // Only support the small and large code model.
2835   if (CM != CodeModel::Small && CM != CodeModel::Large)
2836     return false;
2837
2838   // FIXME: Add large code model support for ELF.
2839   if (CM == CodeModel::Large && !Subtarget->isTargetMachO())
2840     return false;
2841
2842   // Let SDISel handle vararg functions.
2843   if (IsVarArg)
2844     return false;
2845
2846   // FIXME: Only handle *simple* calls for now.
2847   MVT RetVT;
2848   if (CLI.RetTy->isVoidTy())
2849     RetVT = MVT::isVoid;
2850   else if (!isTypeLegal(CLI.RetTy, RetVT))
2851     return false;
2852
2853   for (auto Flag : CLI.OutFlags)
2854     if (Flag.isInReg() || Flag.isSRet() || Flag.isNest() || Flag.isByVal())
2855       return false;
2856
2857   // Set up the argument vectors.
2858   SmallVector<MVT, 16> OutVTs;
2859   OutVTs.reserve(CLI.OutVals.size());
2860
2861   for (auto *Val : CLI.OutVals) {
2862     MVT VT;
2863     if (!isTypeLegal(Val->getType(), VT) &&
2864         !(VT == MVT::i1 || VT == MVT::i8 || VT == MVT::i16))
2865       return false;
2866
2867     // We don't handle vector parameters yet.
2868     if (VT.isVector() || VT.getSizeInBits() > 64)
2869       return false;
2870
2871     OutVTs.push_back(VT);
2872   }
2873
2874   Address Addr;
2875   if (Callee && !computeCallAddress(Callee, Addr))
2876     return false;
2877
2878   // Handle the arguments now that we've gotten them.
2879   unsigned NumBytes;
2880   if (!processCallArgs(CLI, OutVTs, NumBytes))
2881     return false;
2882
2883   // Issue the call.
2884   MachineInstrBuilder MIB;
2885   if (CM == CodeModel::Small) {
2886     const MCInstrDesc &II = TII.get(Addr.getReg() ? AArch64::BLR : AArch64::BL);
2887     MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II);
2888     if (SymName)
2889       MIB.addExternalSymbol(SymName, 0);
2890     else if (Addr.getGlobalValue())
2891       MIB.addGlobalAddress(Addr.getGlobalValue(), 0, 0);
2892     else if (Addr.getReg()) {
2893       unsigned Reg = constrainOperandRegClass(II, Addr.getReg(), 0);
2894       MIB.addReg(Reg);
2895     } else
2896       return false;
2897   } else {
2898     unsigned CallReg = 0;
2899     if (SymName) {
2900       unsigned ADRPReg = createResultReg(&AArch64::GPR64commonRegClass);
2901       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADRP),
2902               ADRPReg)
2903         .addExternalSymbol(SymName, AArch64II::MO_GOT | AArch64II::MO_PAGE);
2904
2905       CallReg = createResultReg(&AArch64::GPR64RegClass);
2906       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::LDRXui),
2907               CallReg)
2908         .addReg(ADRPReg)
2909         .addExternalSymbol(SymName, AArch64II::MO_GOT | AArch64II::MO_PAGEOFF |
2910                            AArch64II::MO_NC);
2911     } else if (Addr.getGlobalValue())
2912       CallReg = materializeGV(Addr.getGlobalValue());
2913     else if (Addr.getReg())
2914       CallReg = Addr.getReg();
2915
2916     if (!CallReg)
2917       return false;
2918
2919     const MCInstrDesc &II = TII.get(AArch64::BLR);
2920     CallReg = constrainOperandRegClass(II, CallReg, 0);
2921     MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II).addReg(CallReg);
2922   }
2923
2924   // Add implicit physical register uses to the call.
2925   for (auto Reg : CLI.OutRegs)
2926     MIB.addReg(Reg, RegState::Implicit);
2927
2928   // Add a register mask with the call-preserved registers.
2929   // Proper defs for return values will be added by setPhysRegsDeadExcept().
2930   MIB.addRegMask(TRI.getCallPreservedMask(CC));
2931
2932   CLI.Call = MIB;
2933
2934   // Finish off the call including any return values.
2935   return finishCall(CLI, RetVT, NumBytes);
2936 }
2937
2938 bool AArch64FastISel::isMemCpySmall(uint64_t Len, unsigned Alignment) {
2939   if (Alignment)
2940     return Len / Alignment <= 4;
2941   else
2942     return Len < 32;
2943 }
2944
2945 bool AArch64FastISel::tryEmitSmallMemCpy(Address Dest, Address Src,
2946                                          uint64_t Len, unsigned Alignment) {
2947   // Make sure we don't bloat code by inlining very large memcpy's.
2948   if (!isMemCpySmall(Len, Alignment))
2949     return false;
2950
2951   int64_t UnscaledOffset = 0;
2952   Address OrigDest = Dest;
2953   Address OrigSrc = Src;
2954
2955   while (Len) {
2956     MVT VT;
2957     if (!Alignment || Alignment >= 8) {
2958       if (Len >= 8)
2959         VT = MVT::i64;
2960       else if (Len >= 4)
2961         VT = MVT::i32;
2962       else if (Len >= 2)
2963         VT = MVT::i16;
2964       else {
2965         VT = MVT::i8;
2966       }
2967     } else {
2968       // Bound based on alignment.
2969       if (Len >= 4 && Alignment == 4)
2970         VT = MVT::i32;
2971       else if (Len >= 2 && Alignment == 2)
2972         VT = MVT::i16;
2973       else {
2974         VT = MVT::i8;
2975       }
2976     }
2977
2978     bool RV;
2979     unsigned ResultReg;
2980     RV = emitLoad(VT, VT, ResultReg, Src);
2981     if (!RV)
2982       return false;
2983
2984     RV = emitStore(VT, ResultReg, Dest);
2985     if (!RV)
2986       return false;
2987
2988     int64_t Size = VT.getSizeInBits() / 8;
2989     Len -= Size;
2990     UnscaledOffset += Size;
2991
2992     // We need to recompute the unscaled offset for each iteration.
2993     Dest.setOffset(OrigDest.getOffset() + UnscaledOffset);
2994     Src.setOffset(OrigSrc.getOffset() + UnscaledOffset);
2995   }
2996
2997   return true;
2998 }
2999
3000 /// \brief Check if it is possible to fold the condition from the XALU intrinsic
3001 /// into the user. The condition code will only be updated on success.
3002 bool AArch64FastISel::foldXALUIntrinsic(AArch64CC::CondCode &CC,
3003                                         const Instruction *I,
3004                                         const Value *Cond) {
3005   if (!isa<ExtractValueInst>(Cond))
3006     return false;
3007
3008   const auto *EV = cast<ExtractValueInst>(Cond);
3009   if (!isa<IntrinsicInst>(EV->getAggregateOperand()))
3010     return false;
3011
3012   const auto *II = cast<IntrinsicInst>(EV->getAggregateOperand());
3013   MVT RetVT;
3014   const Function *Callee = II->getCalledFunction();
3015   Type *RetTy =
3016   cast<StructType>(Callee->getReturnType())->getTypeAtIndex(0U);
3017   if (!isTypeLegal(RetTy, RetVT))
3018     return false;
3019
3020   if (RetVT != MVT::i32 && RetVT != MVT::i64)
3021     return false;
3022
3023   const Value *LHS = II->getArgOperand(0);
3024   const Value *RHS = II->getArgOperand(1);
3025
3026   // Canonicalize immediate to the RHS.
3027   if (isa<ConstantInt>(LHS) && !isa<ConstantInt>(RHS) &&
3028       isCommutativeIntrinsic(II))
3029     std::swap(LHS, RHS);
3030
3031   // Simplify multiplies.
3032   unsigned IID = II->getIntrinsicID();
3033   switch (IID) {
3034   default:
3035     break;
3036   case Intrinsic::smul_with_overflow:
3037     if (const auto *C = dyn_cast<ConstantInt>(RHS))
3038       if (C->getValue() == 2)
3039         IID = Intrinsic::sadd_with_overflow;
3040     break;
3041   case Intrinsic::umul_with_overflow:
3042     if (const auto *C = dyn_cast<ConstantInt>(RHS))
3043       if (C->getValue() == 2)
3044         IID = Intrinsic::uadd_with_overflow;
3045     break;
3046   }
3047
3048   AArch64CC::CondCode TmpCC;
3049   switch (IID) {
3050   default:
3051     return false;
3052   case Intrinsic::sadd_with_overflow:
3053   case Intrinsic::ssub_with_overflow:
3054     TmpCC = AArch64CC::VS;
3055     break;
3056   case Intrinsic::uadd_with_overflow:
3057     TmpCC = AArch64CC::HS;
3058     break;
3059   case Intrinsic::usub_with_overflow:
3060     TmpCC = AArch64CC::LO;
3061     break;
3062   case Intrinsic::smul_with_overflow:
3063   case Intrinsic::umul_with_overflow:
3064     TmpCC = AArch64CC::NE;
3065     break;
3066   }
3067
3068   // Check if both instructions are in the same basic block.
3069   if (!isValueAvailable(II))
3070     return false;
3071
3072   // Make sure nothing is in the way
3073   BasicBlock::const_iterator Start = I;
3074   BasicBlock::const_iterator End = II;
3075   for (auto Itr = std::prev(Start); Itr != End; --Itr) {
3076     // We only expect extractvalue instructions between the intrinsic and the
3077     // instruction to be selected.
3078     if (!isa<ExtractValueInst>(Itr))
3079       return false;
3080
3081     // Check that the extractvalue operand comes from the intrinsic.
3082     const auto *EVI = cast<ExtractValueInst>(Itr);
3083     if (EVI->getAggregateOperand() != II)
3084       return false;
3085   }
3086
3087   CC = TmpCC;
3088   return true;
3089 }
3090
3091 bool AArch64FastISel::fastLowerIntrinsicCall(const IntrinsicInst *II) {
3092   // FIXME: Handle more intrinsics.
3093   switch (II->getIntrinsicID()) {
3094   default: return false;
3095   case Intrinsic::frameaddress: {
3096     MachineFrameInfo *MFI = FuncInfo.MF->getFrameInfo();
3097     MFI->setFrameAddressIsTaken(true);
3098
3099     const AArch64RegisterInfo *RegInfo =
3100         static_cast<const AArch64RegisterInfo *>(
3101             TM.getSubtargetImpl()->getRegisterInfo());
3102     unsigned FramePtr = RegInfo->getFrameRegister(*(FuncInfo.MF));
3103     unsigned SrcReg = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
3104     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3105             TII.get(TargetOpcode::COPY), SrcReg).addReg(FramePtr);
3106     // Recursively load frame address
3107     // ldr x0, [fp]
3108     // ldr x0, [x0]
3109     // ldr x0, [x0]
3110     // ...
3111     unsigned DestReg;
3112     unsigned Depth = cast<ConstantInt>(II->getOperand(0))->getZExtValue();
3113     while (Depth--) {
3114       DestReg = fastEmitInst_ri(AArch64::LDRXui, &AArch64::GPR64RegClass,
3115                                 SrcReg, /*IsKill=*/true, 0);
3116       assert(DestReg && "Unexpected LDR instruction emission failure.");
3117       SrcReg = DestReg;
3118     }
3119
3120     updateValueMap(II, SrcReg);
3121     return true;
3122   }
3123   case Intrinsic::memcpy:
3124   case Intrinsic::memmove: {
3125     const auto *MTI = cast<MemTransferInst>(II);
3126     // Don't handle volatile.
3127     if (MTI->isVolatile())
3128       return false;
3129
3130     // Disable inlining for memmove before calls to ComputeAddress.  Otherwise,
3131     // we would emit dead code because we don't currently handle memmoves.
3132     bool IsMemCpy = (II->getIntrinsicID() == Intrinsic::memcpy);
3133     if (isa<ConstantInt>(MTI->getLength()) && IsMemCpy) {
3134       // Small memcpy's are common enough that we want to do them without a call
3135       // if possible.
3136       uint64_t Len = cast<ConstantInt>(MTI->getLength())->getZExtValue();
3137       unsigned Alignment = MTI->getAlignment();
3138       if (isMemCpySmall(Len, Alignment)) {
3139         Address Dest, Src;
3140         if (!computeAddress(MTI->getRawDest(), Dest) ||
3141             !computeAddress(MTI->getRawSource(), Src))
3142           return false;
3143         if (tryEmitSmallMemCpy(Dest, Src, Len, Alignment))
3144           return true;
3145       }
3146     }
3147
3148     if (!MTI->getLength()->getType()->isIntegerTy(64))
3149       return false;
3150
3151     if (MTI->getSourceAddressSpace() > 255 || MTI->getDestAddressSpace() > 255)
3152       // Fast instruction selection doesn't support the special
3153       // address spaces.
3154       return false;
3155
3156     const char *IntrMemName = isa<MemCpyInst>(II) ? "memcpy" : "memmove";
3157     return lowerCallTo(II, IntrMemName, II->getNumArgOperands() - 2);
3158   }
3159   case Intrinsic::memset: {
3160     const MemSetInst *MSI = cast<MemSetInst>(II);
3161     // Don't handle volatile.
3162     if (MSI->isVolatile())
3163       return false;
3164
3165     if (!MSI->getLength()->getType()->isIntegerTy(64))
3166       return false;
3167
3168     if (MSI->getDestAddressSpace() > 255)
3169       // Fast instruction selection doesn't support the special
3170       // address spaces.
3171       return false;
3172
3173     return lowerCallTo(II, "memset", II->getNumArgOperands() - 2);
3174   }
3175   case Intrinsic::sin:
3176   case Intrinsic::cos:
3177   case Intrinsic::pow: {
3178     MVT RetVT;
3179     if (!isTypeLegal(II->getType(), RetVT))
3180       return false;
3181
3182     if (RetVT != MVT::f32 && RetVT != MVT::f64)
3183       return false;
3184
3185     static const RTLIB::Libcall LibCallTable[3][2] = {
3186       { RTLIB::SIN_F32, RTLIB::SIN_F64 },
3187       { RTLIB::COS_F32, RTLIB::COS_F64 },
3188       { RTLIB::POW_F32, RTLIB::POW_F64 }
3189     };
3190     RTLIB::Libcall LC;
3191     bool Is64Bit = RetVT == MVT::f64;
3192     switch (II->getIntrinsicID()) {
3193     default:
3194       llvm_unreachable("Unexpected intrinsic.");
3195     case Intrinsic::sin:
3196       LC = LibCallTable[0][Is64Bit];
3197       break;
3198     case Intrinsic::cos:
3199       LC = LibCallTable[1][Is64Bit];
3200       break;
3201     case Intrinsic::pow:
3202       LC = LibCallTable[2][Is64Bit];
3203       break;
3204     }
3205
3206     ArgListTy Args;
3207     Args.reserve(II->getNumArgOperands());
3208
3209     // Populate the argument list.
3210     for (auto &Arg : II->arg_operands()) {
3211       ArgListEntry Entry;
3212       Entry.Val = Arg;
3213       Entry.Ty = Arg->getType();
3214       Args.push_back(Entry);
3215     }
3216
3217     CallLoweringInfo CLI;
3218     CLI.setCallee(TLI.getLibcallCallingConv(LC), II->getType(),
3219                   TLI.getLibcallName(LC), std::move(Args));
3220     if (!lowerCallTo(CLI))
3221       return false;
3222     updateValueMap(II, CLI.ResultReg);
3223     return true;
3224   }
3225   case Intrinsic::trap: {
3226     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::BRK))
3227         .addImm(1);
3228     return true;
3229   }
3230   case Intrinsic::sqrt: {
3231     Type *RetTy = II->getCalledFunction()->getReturnType();
3232
3233     MVT VT;
3234     if (!isTypeLegal(RetTy, VT))
3235       return false;
3236
3237     unsigned Op0Reg = getRegForValue(II->getOperand(0));
3238     if (!Op0Reg)
3239       return false;
3240     bool Op0IsKill = hasTrivialKill(II->getOperand(0));
3241
3242     unsigned ResultReg = fastEmit_r(VT, VT, ISD::FSQRT, Op0Reg, Op0IsKill);
3243     if (!ResultReg)
3244       return false;
3245
3246     updateValueMap(II, ResultReg);
3247     return true;
3248   }
3249   case Intrinsic::sadd_with_overflow:
3250   case Intrinsic::uadd_with_overflow:
3251   case Intrinsic::ssub_with_overflow:
3252   case Intrinsic::usub_with_overflow:
3253   case Intrinsic::smul_with_overflow:
3254   case Intrinsic::umul_with_overflow: {
3255     // This implements the basic lowering of the xalu with overflow intrinsics.
3256     const Function *Callee = II->getCalledFunction();
3257     auto *Ty = cast<StructType>(Callee->getReturnType());
3258     Type *RetTy = Ty->getTypeAtIndex(0U);
3259
3260     MVT VT;
3261     if (!isTypeLegal(RetTy, VT))
3262       return false;
3263
3264     if (VT != MVT::i32 && VT != MVT::i64)
3265       return false;
3266
3267     const Value *LHS = II->getArgOperand(0);
3268     const Value *RHS = II->getArgOperand(1);
3269     // Canonicalize immediate to the RHS.
3270     if (isa<ConstantInt>(LHS) && !isa<ConstantInt>(RHS) &&
3271         isCommutativeIntrinsic(II))
3272       std::swap(LHS, RHS);
3273
3274     // Simplify multiplies.
3275     unsigned IID = II->getIntrinsicID();
3276     switch (IID) {
3277     default:
3278       break;
3279     case Intrinsic::smul_with_overflow:
3280       if (const auto *C = dyn_cast<ConstantInt>(RHS))
3281         if (C->getValue() == 2) {
3282           IID = Intrinsic::sadd_with_overflow;
3283           RHS = LHS;
3284         }
3285       break;
3286     case Intrinsic::umul_with_overflow:
3287       if (const auto *C = dyn_cast<ConstantInt>(RHS))
3288         if (C->getValue() == 2) {
3289           IID = Intrinsic::uadd_with_overflow;
3290           RHS = LHS;
3291         }
3292       break;
3293     }
3294
3295     unsigned ResultReg1 = 0, ResultReg2 = 0, MulReg = 0;
3296     AArch64CC::CondCode CC = AArch64CC::Invalid;
3297     switch (IID) {
3298     default: llvm_unreachable("Unexpected intrinsic!");
3299     case Intrinsic::sadd_with_overflow:
3300       ResultReg1 = emitAdd(VT, LHS, RHS, /*SetFlags=*/true);
3301       CC = AArch64CC::VS;
3302       break;
3303     case Intrinsic::uadd_with_overflow:
3304       ResultReg1 = emitAdd(VT, LHS, RHS, /*SetFlags=*/true);
3305       CC = AArch64CC::HS;
3306       break;
3307     case Intrinsic::ssub_with_overflow:
3308       ResultReg1 = emitSub(VT, LHS, RHS, /*SetFlags=*/true);
3309       CC = AArch64CC::VS;
3310       break;
3311     case Intrinsic::usub_with_overflow:
3312       ResultReg1 = emitSub(VT, LHS, RHS, /*SetFlags=*/true);
3313       CC = AArch64CC::LO;
3314       break;
3315     case Intrinsic::smul_with_overflow: {
3316       CC = AArch64CC::NE;
3317       unsigned LHSReg = getRegForValue(LHS);
3318       if (!LHSReg)
3319         return false;
3320       bool LHSIsKill = hasTrivialKill(LHS);
3321
3322       unsigned RHSReg = getRegForValue(RHS);
3323       if (!RHSReg)
3324         return false;
3325       bool RHSIsKill = hasTrivialKill(RHS);
3326
3327       if (VT == MVT::i32) {
3328         MulReg = emitSMULL_rr(MVT::i64, LHSReg, LHSIsKill, RHSReg, RHSIsKill);
3329         unsigned ShiftReg = emitLSR_ri(MVT::i64, MVT::i64, MulReg,
3330                                        /*IsKill=*/false, 32);
3331         MulReg = fastEmitInst_extractsubreg(VT, MulReg, /*IsKill=*/true,
3332                                             AArch64::sub_32);
3333         ShiftReg = fastEmitInst_extractsubreg(VT, ShiftReg, /*IsKill=*/true,
3334                                               AArch64::sub_32);
3335         emitSubs_rs(VT, ShiftReg, /*IsKill=*/true, MulReg, /*IsKill=*/false,
3336                     AArch64_AM::ASR, 31, /*WantResult=*/false);
3337       } else {
3338         assert(VT == MVT::i64 && "Unexpected value type.");
3339         MulReg = emitMul_rr(VT, LHSReg, LHSIsKill, RHSReg, RHSIsKill);
3340         unsigned SMULHReg = fastEmit_rr(VT, VT, ISD::MULHS, LHSReg, LHSIsKill,
3341                                         RHSReg, RHSIsKill);
3342         emitSubs_rs(VT, SMULHReg, /*IsKill=*/true, MulReg, /*IsKill=*/false,
3343                     AArch64_AM::ASR, 63, /*WantResult=*/false);
3344       }
3345       break;
3346     }
3347     case Intrinsic::umul_with_overflow: {
3348       CC = AArch64CC::NE;
3349       unsigned LHSReg = getRegForValue(LHS);
3350       if (!LHSReg)
3351         return false;
3352       bool LHSIsKill = hasTrivialKill(LHS);
3353
3354       unsigned RHSReg = getRegForValue(RHS);
3355       if (!RHSReg)
3356         return false;
3357       bool RHSIsKill = hasTrivialKill(RHS);
3358
3359       if (VT == MVT::i32) {
3360         MulReg = emitUMULL_rr(MVT::i64, LHSReg, LHSIsKill, RHSReg, RHSIsKill);
3361         emitSubs_rs(MVT::i64, AArch64::XZR, /*IsKill=*/true, MulReg,
3362                     /*IsKill=*/false, AArch64_AM::LSR, 32,
3363                     /*WantResult=*/false);
3364         MulReg = fastEmitInst_extractsubreg(VT, MulReg, /*IsKill=*/true,
3365                                             AArch64::sub_32);
3366       } else {
3367         assert(VT == MVT::i64 && "Unexpected value type.");
3368         MulReg = emitMul_rr(VT, LHSReg, LHSIsKill, RHSReg, RHSIsKill);
3369         unsigned UMULHReg = fastEmit_rr(VT, VT, ISD::MULHU, LHSReg, LHSIsKill,
3370                                         RHSReg, RHSIsKill);
3371         emitSubs_rr(VT, AArch64::XZR, /*IsKill=*/true, UMULHReg,
3372                     /*IsKill=*/false, /*WantResult=*/false);
3373       }
3374       break;
3375     }
3376     }
3377
3378     if (MulReg) {
3379       ResultReg1 = createResultReg(TLI.getRegClassFor(VT));
3380       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3381               TII.get(TargetOpcode::COPY), ResultReg1).addReg(MulReg);
3382     }
3383
3384     ResultReg2 = fastEmitInst_rri(AArch64::CSINCWr, &AArch64::GPR32RegClass,
3385                                   AArch64::WZR, /*IsKill=*/true, AArch64::WZR,
3386                                   /*IsKill=*/true, getInvertedCondCode(CC));
3387     (void)ResultReg2;
3388     assert((ResultReg1 + 1) == ResultReg2 &&
3389            "Nonconsecutive result registers.");
3390     updateValueMap(II, ResultReg1, 2);
3391     return true;
3392   }
3393   }
3394   return false;
3395 }
3396
3397 bool AArch64FastISel::selectRet(const Instruction *I) {
3398   const ReturnInst *Ret = cast<ReturnInst>(I);
3399   const Function &F = *I->getParent()->getParent();
3400
3401   if (!FuncInfo.CanLowerReturn)
3402     return false;
3403
3404   if (F.isVarArg())
3405     return false;
3406
3407   // Build a list of return value registers.
3408   SmallVector<unsigned, 4> RetRegs;
3409
3410   if (Ret->getNumOperands() > 0) {
3411     CallingConv::ID CC = F.getCallingConv();
3412     SmallVector<ISD::OutputArg, 4> Outs;
3413     GetReturnInfo(F.getReturnType(), F.getAttributes(), Outs, TLI);
3414
3415     // Analyze operands of the call, assigning locations to each operand.
3416     SmallVector<CCValAssign, 16> ValLocs;
3417     CCState CCInfo(CC, F.isVarArg(), *FuncInfo.MF, ValLocs, I->getContext());
3418     CCAssignFn *RetCC = CC == CallingConv::WebKit_JS ? RetCC_AArch64_WebKit_JS
3419                                                      : RetCC_AArch64_AAPCS;
3420     CCInfo.AnalyzeReturn(Outs, RetCC);
3421
3422     // Only handle a single return value for now.
3423     if (ValLocs.size() != 1)
3424       return false;
3425
3426     CCValAssign &VA = ValLocs[0];
3427     const Value *RV = Ret->getOperand(0);
3428
3429     // Don't bother handling odd stuff for now.
3430     if ((VA.getLocInfo() != CCValAssign::Full) &&
3431         (VA.getLocInfo() != CCValAssign::BCvt))
3432       return false;
3433
3434     // Only handle register returns for now.
3435     if (!VA.isRegLoc())
3436       return false;
3437
3438     unsigned Reg = getRegForValue(RV);
3439     if (Reg == 0)
3440       return false;
3441
3442     unsigned SrcReg = Reg + VA.getValNo();
3443     unsigned DestReg = VA.getLocReg();
3444     // Avoid a cross-class copy. This is very unlikely.
3445     if (!MRI.getRegClass(SrcReg)->contains(DestReg))
3446       return false;
3447
3448     EVT RVEVT = TLI.getValueType(RV->getType());
3449     if (!RVEVT.isSimple())
3450       return false;
3451
3452     // Vectors (of > 1 lane) in big endian need tricky handling.
3453     if (RVEVT.isVector() && RVEVT.getVectorNumElements() > 1 &&
3454         !Subtarget->isLittleEndian())
3455       return false;
3456
3457     MVT RVVT = RVEVT.getSimpleVT();
3458     if (RVVT == MVT::f128)
3459       return false;
3460
3461     MVT DestVT = VA.getValVT();
3462     // Special handling for extended integers.
3463     if (RVVT != DestVT) {
3464       if (RVVT != MVT::i1 && RVVT != MVT::i8 && RVVT != MVT::i16)
3465         return false;
3466
3467       if (!Outs[0].Flags.isZExt() && !Outs[0].Flags.isSExt())
3468         return false;
3469
3470       bool IsZExt = Outs[0].Flags.isZExt();
3471       SrcReg = emitIntExt(RVVT, SrcReg, DestVT, IsZExt);
3472       if (SrcReg == 0)
3473         return false;
3474     }
3475
3476     // Make the copy.
3477     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3478             TII.get(TargetOpcode::COPY), DestReg).addReg(SrcReg);
3479
3480     // Add register to return instruction.
3481     RetRegs.push_back(VA.getLocReg());
3482   }
3483
3484   MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3485                                     TII.get(AArch64::RET_ReallyLR));
3486   for (unsigned i = 0, e = RetRegs.size(); i != e; ++i)
3487     MIB.addReg(RetRegs[i], RegState::Implicit);
3488   return true;
3489 }
3490
3491 bool AArch64FastISel::selectTrunc(const Instruction *I) {
3492   Type *DestTy = I->getType();
3493   Value *Op = I->getOperand(0);
3494   Type *SrcTy = Op->getType();
3495
3496   EVT SrcEVT = TLI.getValueType(SrcTy, true);
3497   EVT DestEVT = TLI.getValueType(DestTy, true);
3498   if (!SrcEVT.isSimple())
3499     return false;
3500   if (!DestEVT.isSimple())
3501     return false;
3502
3503   MVT SrcVT = SrcEVT.getSimpleVT();
3504   MVT DestVT = DestEVT.getSimpleVT();
3505
3506   if (SrcVT != MVT::i64 && SrcVT != MVT::i32 && SrcVT != MVT::i16 &&
3507       SrcVT != MVT::i8)
3508     return false;
3509   if (DestVT != MVT::i32 && DestVT != MVT::i16 && DestVT != MVT::i8 &&
3510       DestVT != MVT::i1)
3511     return false;
3512
3513   unsigned SrcReg = getRegForValue(Op);
3514   if (!SrcReg)
3515     return false;
3516   bool SrcIsKill = hasTrivialKill(Op);
3517
3518   // If we're truncating from i64 to a smaller non-legal type then generate an
3519   // AND. Otherwise, we know the high bits are undefined and a truncate only
3520   // generate a COPY. We cannot mark the source register also as result
3521   // register, because this can incorrectly transfer the kill flag onto the
3522   // source register.
3523   unsigned ResultReg;
3524   if (SrcVT == MVT::i64) {
3525     uint64_t Mask = 0;
3526     switch (DestVT.SimpleTy) {
3527     default:
3528       // Trunc i64 to i32 is handled by the target-independent fast-isel.
3529       return false;
3530     case MVT::i1:
3531       Mask = 0x1;
3532       break;
3533     case MVT::i8:
3534       Mask = 0xff;
3535       break;
3536     case MVT::i16:
3537       Mask = 0xffff;
3538       break;
3539     }
3540     // Issue an extract_subreg to get the lower 32-bits.
3541     unsigned Reg32 = fastEmitInst_extractsubreg(MVT::i32, SrcReg, SrcIsKill,
3542                                                 AArch64::sub_32);
3543     // Create the AND instruction which performs the actual truncation.
3544     ResultReg = emitAnd_ri(MVT::i32, Reg32, /*IsKill=*/true, Mask);
3545     assert(ResultReg && "Unexpected AND instruction emission failure.");
3546   } else {
3547     ResultReg = createResultReg(&AArch64::GPR32RegClass);
3548     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3549             TII.get(TargetOpcode::COPY), ResultReg)
3550         .addReg(SrcReg, getKillRegState(SrcIsKill));
3551   }
3552
3553   updateValueMap(I, ResultReg);
3554   return true;
3555 }
3556
3557 unsigned AArch64FastISel::emiti1Ext(unsigned SrcReg, MVT DestVT, bool IsZExt) {
3558   assert((DestVT == MVT::i8 || DestVT == MVT::i16 || DestVT == MVT::i32 ||
3559           DestVT == MVT::i64) &&
3560          "Unexpected value type.");
3561   // Handle i8 and i16 as i32.
3562   if (DestVT == MVT::i8 || DestVT == MVT::i16)
3563     DestVT = MVT::i32;
3564
3565   if (IsZExt) {
3566     unsigned ResultReg = emitAnd_ri(MVT::i32, SrcReg, /*TODO:IsKill=*/false, 1);
3567     assert(ResultReg && "Unexpected AND instruction emission failure.");
3568     if (DestVT == MVT::i64) {
3569       // We're ZExt i1 to i64.  The ANDWri Wd, Ws, #1 implicitly clears the
3570       // upper 32 bits.  Emit a SUBREG_TO_REG to extend from Wd to Xd.
3571       unsigned Reg64 = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
3572       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3573               TII.get(AArch64::SUBREG_TO_REG), Reg64)
3574           .addImm(0)
3575           .addReg(ResultReg)
3576           .addImm(AArch64::sub_32);
3577       ResultReg = Reg64;
3578     }
3579     return ResultReg;
3580   } else {
3581     if (DestVT == MVT::i64) {
3582       // FIXME: We're SExt i1 to i64.
3583       return 0;
3584     }
3585     return fastEmitInst_rii(AArch64::SBFMWri, &AArch64::GPR32RegClass, SrcReg,
3586                             /*TODO:IsKill=*/false, 0, 0);
3587   }
3588 }
3589
3590 unsigned AArch64FastISel::emitMul_rr(MVT RetVT, unsigned Op0, bool Op0IsKill,
3591                                       unsigned Op1, bool Op1IsKill) {
3592   unsigned Opc, ZReg;
3593   switch (RetVT.SimpleTy) {
3594   default: return 0;
3595   case MVT::i8:
3596   case MVT::i16:
3597   case MVT::i32:
3598     RetVT = MVT::i32;
3599     Opc = AArch64::MADDWrrr; ZReg = AArch64::WZR; break;
3600   case MVT::i64:
3601     Opc = AArch64::MADDXrrr; ZReg = AArch64::XZR; break;
3602   }
3603
3604   const TargetRegisterClass *RC =
3605       (RetVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
3606   return fastEmitInst_rrr(Opc, RC, Op0, Op0IsKill, Op1, Op1IsKill,
3607                           /*IsKill=*/ZReg, true);
3608 }
3609
3610 unsigned AArch64FastISel::emitSMULL_rr(MVT RetVT, unsigned Op0, bool Op0IsKill,
3611                                         unsigned Op1, bool Op1IsKill) {
3612   if (RetVT != MVT::i64)
3613     return 0;
3614
3615   return fastEmitInst_rrr(AArch64::SMADDLrrr, &AArch64::GPR64RegClass,
3616                           Op0, Op0IsKill, Op1, Op1IsKill,
3617                           AArch64::XZR, /*IsKill=*/true);
3618 }
3619
3620 unsigned AArch64FastISel::emitUMULL_rr(MVT RetVT, unsigned Op0, bool Op0IsKill,
3621                                         unsigned Op1, bool Op1IsKill) {
3622   if (RetVT != MVT::i64)
3623     return 0;
3624
3625   return fastEmitInst_rrr(AArch64::UMADDLrrr, &AArch64::GPR64RegClass,
3626                           Op0, Op0IsKill, Op1, Op1IsKill,
3627                           AArch64::XZR, /*IsKill=*/true);
3628 }
3629
3630 unsigned AArch64FastISel::emitLSL_rr(MVT RetVT, unsigned Op0Reg, bool Op0IsKill,
3631                                      unsigned Op1Reg, bool Op1IsKill) {
3632   unsigned Opc = 0;
3633   bool NeedTrunc = false;
3634   uint64_t Mask = 0;
3635   switch (RetVT.SimpleTy) {
3636   default: return 0;
3637   case MVT::i8:  Opc = AArch64::LSLVWr; NeedTrunc = true; Mask = 0xff;   break;
3638   case MVT::i16: Opc = AArch64::LSLVWr; NeedTrunc = true; Mask = 0xffff; break;
3639   case MVT::i32: Opc = AArch64::LSLVWr;                                  break;
3640   case MVT::i64: Opc = AArch64::LSLVXr;                                  break;
3641   }
3642
3643   const TargetRegisterClass *RC =
3644       (RetVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
3645   if (NeedTrunc) {
3646     Op1Reg = emitAnd_ri(MVT::i32, Op1Reg, Op1IsKill, Mask);
3647     Op1IsKill = true;
3648   }
3649   unsigned ResultReg = fastEmitInst_rr(Opc, RC, Op0Reg, Op0IsKill, Op1Reg,
3650                                        Op1IsKill);
3651   if (NeedTrunc)
3652     ResultReg = emitAnd_ri(MVT::i32, ResultReg, /*IsKill=*/true, Mask);
3653   return ResultReg;
3654 }
3655
3656 unsigned AArch64FastISel::emitLSL_ri(MVT RetVT, MVT SrcVT, unsigned Op0,
3657                                      bool Op0IsKill, uint64_t Shift,
3658                                      bool IsZext) {
3659   assert(RetVT.SimpleTy >= SrcVT.SimpleTy &&
3660          "Unexpected source/return type pair.");
3661   assert((SrcVT == MVT::i1 || SrcVT == MVT::i8 || SrcVT == MVT::i16 ||
3662           SrcVT == MVT::i32 || SrcVT == MVT::i64) &&
3663          "Unexpected source value type.");
3664   assert((RetVT == MVT::i8 || RetVT == MVT::i16 || RetVT == MVT::i32 ||
3665           RetVT == MVT::i64) && "Unexpected return value type.");
3666
3667   bool Is64Bit = (RetVT == MVT::i64);
3668   unsigned RegSize = Is64Bit ? 64 : 32;
3669   unsigned DstBits = RetVT.getSizeInBits();
3670   unsigned SrcBits = SrcVT.getSizeInBits();
3671
3672   // Don't deal with undefined shifts.
3673   if (Shift >= DstBits)
3674     return 0;
3675
3676   // For immediate shifts we can fold the zero-/sign-extension into the shift.
3677   // {S|U}BFM Wd, Wn, #r, #s
3678   // Wd<32+s-r,32-r> = Wn<s:0> when r > s
3679
3680   // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
3681   // %2 = shl i16 %1, 4
3682   // Wd<32+7-28,32-28> = Wn<7:0> <- clamp s to 7
3683   // 0b1111_1111_1111_1111__1111_1010_1010_0000 sext
3684   // 0b0000_0000_0000_0000__0000_0101_0101_0000 sext | zext
3685   // 0b0000_0000_0000_0000__0000_1010_1010_0000 zext
3686
3687   // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
3688   // %2 = shl i16 %1, 8
3689   // Wd<32+7-24,32-24> = Wn<7:0>
3690   // 0b1111_1111_1111_1111__1010_1010_0000_0000 sext
3691   // 0b0000_0000_0000_0000__0101_0101_0000_0000 sext | zext
3692   // 0b0000_0000_0000_0000__1010_1010_0000_0000 zext
3693
3694   // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
3695   // %2 = shl i16 %1, 12
3696   // Wd<32+3-20,32-20> = Wn<3:0>
3697   // 0b1111_1111_1111_1111__1010_0000_0000_0000 sext
3698   // 0b0000_0000_0000_0000__0101_0000_0000_0000 sext | zext
3699   // 0b0000_0000_0000_0000__1010_0000_0000_0000 zext
3700
3701   unsigned ImmR = RegSize - Shift;
3702   // Limit the width to the length of the source type.
3703   unsigned ImmS = std::min<unsigned>(SrcBits - 1, DstBits - 1 - Shift);
3704   static const unsigned OpcTable[2][2] = {
3705     {AArch64::SBFMWri, AArch64::SBFMXri},
3706     {AArch64::UBFMWri, AArch64::UBFMXri}
3707   };
3708   unsigned Opc = OpcTable[IsZext][Is64Bit];
3709   const TargetRegisterClass *RC =
3710       Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
3711   if (SrcVT.SimpleTy <= MVT::i32 && RetVT == MVT::i64) {
3712     unsigned TmpReg = MRI.createVirtualRegister(RC);
3713     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3714             TII.get(AArch64::SUBREG_TO_REG), TmpReg)
3715         .addImm(0)
3716         .addReg(Op0, getKillRegState(Op0IsKill))
3717         .addImm(AArch64::sub_32);
3718     Op0 = TmpReg;
3719     Op0IsKill = true;
3720   }
3721   return fastEmitInst_rii(Opc, RC, Op0, Op0IsKill, ImmR, ImmS);
3722 }
3723
3724 unsigned AArch64FastISel::emitLSR_rr(MVT RetVT, unsigned Op0Reg, bool Op0IsKill,
3725                                      unsigned Op1Reg, bool Op1IsKill) {
3726   unsigned Opc = 0;
3727   bool NeedTrunc = false;
3728   uint64_t Mask = 0;
3729   switch (RetVT.SimpleTy) {
3730   default: return 0;
3731   case MVT::i8:  Opc = AArch64::LSRVWr; NeedTrunc = true; Mask = 0xff;   break;
3732   case MVT::i16: Opc = AArch64::LSRVWr; NeedTrunc = true; Mask = 0xffff; break;
3733   case MVT::i32: Opc = AArch64::LSRVWr; break;
3734   case MVT::i64: Opc = AArch64::LSRVXr; break;
3735   }
3736
3737   const TargetRegisterClass *RC =
3738       (RetVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
3739   if (NeedTrunc) {
3740     Op0Reg = emitAnd_ri(MVT::i32, Op0Reg, Op0IsKill, Mask);
3741     Op1Reg = emitAnd_ri(MVT::i32, Op1Reg, Op1IsKill, Mask);
3742     Op0IsKill = Op1IsKill = true;
3743   }
3744   unsigned ResultReg = fastEmitInst_rr(Opc, RC, Op0Reg, Op0IsKill, Op1Reg,
3745                                        Op1IsKill);
3746   if (NeedTrunc)
3747     ResultReg = emitAnd_ri(MVT::i32, ResultReg, /*IsKill=*/true, Mask);
3748   return ResultReg;
3749 }
3750
3751 unsigned AArch64FastISel::emitLSR_ri(MVT RetVT, MVT SrcVT, unsigned Op0,
3752                                      bool Op0IsKill, uint64_t Shift,
3753                                      bool IsZExt) {
3754   assert(RetVT.SimpleTy >= SrcVT.SimpleTy &&
3755          "Unexpected source/return type pair.");
3756   assert((SrcVT == MVT::i8 || SrcVT == MVT::i16 || SrcVT == MVT::i32 ||
3757           SrcVT == MVT::i64) && "Unexpected source value type.");
3758   assert((RetVT == MVT::i8 || RetVT == MVT::i16 || RetVT == MVT::i32 ||
3759           RetVT == MVT::i64) && "Unexpected return value type.");
3760
3761   bool Is64Bit = (RetVT == MVT::i64);
3762   unsigned RegSize = Is64Bit ? 64 : 32;
3763   unsigned DstBits = RetVT.getSizeInBits();
3764   unsigned SrcBits = SrcVT.getSizeInBits();
3765
3766   // Don't deal with undefined shifts.
3767   if (Shift >= DstBits)
3768     return 0;
3769
3770   // For immediate shifts we can fold the zero-/sign-extension into the shift.
3771   // {S|U}BFM Wd, Wn, #r, #s
3772   // Wd<s-r:0> = Wn<s:r> when r <= s
3773
3774   // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
3775   // %2 = lshr i16 %1, 4
3776   // Wd<7-4:0> = Wn<7:4>
3777   // 0b0000_0000_0000_0000__0000_1111_1111_1010 sext
3778   // 0b0000_0000_0000_0000__0000_0000_0000_0101 sext | zext
3779   // 0b0000_0000_0000_0000__0000_0000_0000_1010 zext
3780
3781   // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
3782   // %2 = lshr i16 %1, 8
3783   // Wd<7-7,0> = Wn<7:7>
3784   // 0b0000_0000_0000_0000__0000_0000_1111_1111 sext
3785   // 0b0000_0000_0000_0000__0000_0000_0000_0000 sext
3786   // 0b0000_0000_0000_0000__0000_0000_0000_0000 zext
3787
3788   // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
3789   // %2 = lshr i16 %1, 12
3790   // Wd<7-7,0> = Wn<7:7> <- clamp r to 7
3791   // 0b0000_0000_0000_0000__0000_0000_0000_1111 sext
3792   // 0b0000_0000_0000_0000__0000_0000_0000_0000 sext
3793   // 0b0000_0000_0000_0000__0000_0000_0000_0000 zext
3794
3795   if (Shift >= SrcBits && IsZExt)
3796     return materializeInt(ConstantInt::get(*Context, APInt(RegSize, 0)), RetVT);
3797
3798   // It is not possible to fold a sign-extend into the LShr instruction. In this
3799   // case emit a sign-extend.
3800   if (!IsZExt) {
3801     Op0 = emitIntExt(SrcVT, Op0, RetVT, IsZExt);
3802     if (!Op0)
3803       return 0;
3804     Op0IsKill = true;
3805     SrcVT = RetVT;
3806     SrcBits = SrcVT.getSizeInBits();
3807     IsZExt = true;
3808   }
3809
3810   unsigned ImmR = std::min<unsigned>(SrcBits - 1, Shift);
3811   unsigned ImmS = SrcBits - 1;
3812   static const unsigned OpcTable[2][2] = {
3813     {AArch64::SBFMWri, AArch64::SBFMXri},
3814     {AArch64::UBFMWri, AArch64::UBFMXri}
3815   };
3816   unsigned Opc = OpcTable[IsZExt][Is64Bit];
3817   const TargetRegisterClass *RC =
3818       Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
3819   if (SrcVT.SimpleTy <= MVT::i32 && RetVT == MVT::i64) {
3820     unsigned TmpReg = MRI.createVirtualRegister(RC);
3821     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3822             TII.get(AArch64::SUBREG_TO_REG), TmpReg)
3823         .addImm(0)
3824         .addReg(Op0, getKillRegState(Op0IsKill))
3825         .addImm(AArch64::sub_32);
3826     Op0 = TmpReg;
3827     Op0IsKill = true;
3828   }
3829   return fastEmitInst_rii(Opc, RC, Op0, Op0IsKill, ImmR, ImmS);
3830 }
3831
3832 unsigned AArch64FastISel::emitASR_rr(MVT RetVT, unsigned Op0Reg, bool Op0IsKill,
3833                                      unsigned Op1Reg, bool Op1IsKill) {
3834   unsigned Opc = 0;
3835   bool NeedTrunc = false;
3836   uint64_t Mask = 0;
3837   switch (RetVT.SimpleTy) {
3838   default: return 0;
3839   case MVT::i8:  Opc = AArch64::ASRVWr; NeedTrunc = true; Mask = 0xff;   break;
3840   case MVT::i16: Opc = AArch64::ASRVWr; NeedTrunc = true; Mask = 0xffff; break;
3841   case MVT::i32: Opc = AArch64::ASRVWr;                                  break;
3842   case MVT::i64: Opc = AArch64::ASRVXr;                                  break;
3843   }
3844
3845   const TargetRegisterClass *RC =
3846       (RetVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
3847   if (NeedTrunc) {
3848     Op0Reg = emitIntExt(RetVT, Op0Reg, MVT::i32, /*IsZExt=*/false);
3849     Op1Reg = emitAnd_ri(MVT::i32, Op1Reg, Op1IsKill, Mask);
3850     Op0IsKill = Op1IsKill = true;
3851   }
3852   unsigned ResultReg = fastEmitInst_rr(Opc, RC, Op0Reg, Op0IsKill, Op1Reg,
3853                                        Op1IsKill);
3854   if (NeedTrunc)
3855     ResultReg = emitAnd_ri(MVT::i32, ResultReg, /*IsKill=*/true, Mask);
3856   return ResultReg;
3857 }
3858
3859 unsigned AArch64FastISel::emitASR_ri(MVT RetVT, MVT SrcVT, unsigned Op0,
3860                                      bool Op0IsKill, uint64_t Shift,
3861                                      bool IsZExt) {
3862   assert(RetVT.SimpleTy >= SrcVT.SimpleTy &&
3863          "Unexpected source/return type pair.");
3864   assert((SrcVT == MVT::i8 || SrcVT == MVT::i16 || SrcVT == MVT::i32 ||
3865           SrcVT == MVT::i64) && "Unexpected source value type.");
3866   assert((RetVT == MVT::i8 || RetVT == MVT::i16 || RetVT == MVT::i32 ||
3867           RetVT == MVT::i64) && "Unexpected return value type.");
3868
3869   bool Is64Bit = (RetVT == MVT::i64);
3870   unsigned RegSize = Is64Bit ? 64 : 32;
3871   unsigned DstBits = RetVT.getSizeInBits();
3872   unsigned SrcBits = SrcVT.getSizeInBits();
3873
3874   // Don't deal with undefined shifts.
3875   if (Shift >= DstBits)
3876     return 0;
3877
3878   // For immediate shifts we can fold the zero-/sign-extension into the shift.
3879   // {S|U}BFM Wd, Wn, #r, #s
3880   // Wd<s-r:0> = Wn<s:r> when r <= s
3881
3882   // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
3883   // %2 = ashr i16 %1, 4
3884   // Wd<7-4:0> = Wn<7:4>
3885   // 0b1111_1111_1111_1111__1111_1111_1111_1010 sext
3886   // 0b0000_0000_0000_0000__0000_0000_0000_0101 sext | zext
3887   // 0b0000_0000_0000_0000__0000_0000_0000_1010 zext
3888
3889   // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
3890   // %2 = ashr i16 %1, 8
3891   // Wd<7-7,0> = Wn<7:7>
3892   // 0b1111_1111_1111_1111__1111_1111_1111_1111 sext
3893   // 0b0000_0000_0000_0000__0000_0000_0000_0000 sext
3894   // 0b0000_0000_0000_0000__0000_0000_0000_0000 zext
3895
3896   // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
3897   // %2 = ashr i16 %1, 12
3898   // Wd<7-7,0> = Wn<7:7> <- clamp r to 7
3899   // 0b1111_1111_1111_1111__1111_1111_1111_1111 sext
3900   // 0b0000_0000_0000_0000__0000_0000_0000_0000 sext
3901   // 0b0000_0000_0000_0000__0000_0000_0000_0000 zext
3902
3903   if (Shift >= SrcBits && IsZExt)
3904     return materializeInt(ConstantInt::get(*Context, APInt(RegSize, 0)), RetVT);
3905
3906   unsigned ImmR = std::min<unsigned>(SrcBits - 1, Shift);
3907   unsigned ImmS = SrcBits - 1;
3908   static const unsigned OpcTable[2][2] = {
3909     {AArch64::SBFMWri, AArch64::SBFMXri},
3910     {AArch64::UBFMWri, AArch64::UBFMXri}
3911   };
3912   unsigned Opc = OpcTable[IsZExt][Is64Bit];
3913   const TargetRegisterClass *RC =
3914       Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
3915   if (SrcVT.SimpleTy <= MVT::i32 && RetVT == MVT::i64) {
3916     unsigned TmpReg = MRI.createVirtualRegister(RC);
3917     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3918             TII.get(AArch64::SUBREG_TO_REG), TmpReg)
3919         .addImm(0)
3920         .addReg(Op0, getKillRegState(Op0IsKill))
3921         .addImm(AArch64::sub_32);
3922     Op0 = TmpReg;
3923     Op0IsKill = true;
3924   }
3925   return fastEmitInst_rii(Opc, RC, Op0, Op0IsKill, ImmR, ImmS);
3926 }
3927
3928 unsigned AArch64FastISel::emitIntExt(MVT SrcVT, unsigned SrcReg, MVT DestVT,
3929                                      bool IsZExt) {
3930   assert(DestVT != MVT::i1 && "ZeroExt/SignExt an i1?");
3931
3932   // FastISel does not have plumbing to deal with extensions where the SrcVT or
3933   // DestVT are odd things, so test to make sure that they are both types we can
3934   // handle (i1/i8/i16/i32 for SrcVT and i8/i16/i32/i64 for DestVT), otherwise
3935   // bail out to SelectionDAG.
3936   if (((DestVT != MVT::i8) && (DestVT != MVT::i16) &&
3937        (DestVT != MVT::i32) && (DestVT != MVT::i64)) ||
3938       ((SrcVT !=  MVT::i1) && (SrcVT !=  MVT::i8) &&
3939        (SrcVT !=  MVT::i16) && (SrcVT !=  MVT::i32)))
3940     return 0;
3941
3942   unsigned Opc;
3943   unsigned Imm = 0;
3944
3945   switch (SrcVT.SimpleTy) {
3946   default:
3947     return 0;
3948   case MVT::i1:
3949     return emiti1Ext(SrcReg, DestVT, IsZExt);
3950   case MVT::i8:
3951     if (DestVT == MVT::i64)
3952       Opc = IsZExt ? AArch64::UBFMXri : AArch64::SBFMXri;
3953     else
3954       Opc = IsZExt ? AArch64::UBFMWri : AArch64::SBFMWri;
3955     Imm = 7;
3956     break;
3957   case MVT::i16:
3958     if (DestVT == MVT::i64)
3959       Opc = IsZExt ? AArch64::UBFMXri : AArch64::SBFMXri;
3960     else
3961       Opc = IsZExt ? AArch64::UBFMWri : AArch64::SBFMWri;
3962     Imm = 15;
3963     break;
3964   case MVT::i32:
3965     assert(DestVT == MVT::i64 && "IntExt i32 to i32?!?");
3966     Opc = IsZExt ? AArch64::UBFMXri : AArch64::SBFMXri;
3967     Imm = 31;
3968     break;
3969   }
3970
3971   // Handle i8 and i16 as i32.
3972   if (DestVT == MVT::i8 || DestVT == MVT::i16)
3973     DestVT = MVT::i32;
3974   else if (DestVT == MVT::i64) {
3975     unsigned Src64 = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
3976     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3977             TII.get(AArch64::SUBREG_TO_REG), Src64)
3978         .addImm(0)
3979         .addReg(SrcReg)
3980         .addImm(AArch64::sub_32);
3981     SrcReg = Src64;
3982   }
3983
3984   const TargetRegisterClass *RC =
3985       (DestVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
3986   return fastEmitInst_rii(Opc, RC, SrcReg, /*TODO:IsKill=*/false, 0, Imm);
3987 }
3988
3989 bool AArch64FastISel::selectIntExt(const Instruction *I) {
3990   assert((isa<ZExtInst>(I) || isa<SExtInst>(I)) &&
3991          "Unexpected integer extend instruction.");
3992   MVT RetVT;
3993   MVT SrcVT;
3994   if (!isTypeSupported(I->getType(), RetVT))
3995     return false;
3996
3997   if (!isTypeSupported(I->getOperand(0)->getType(), SrcVT))
3998     return false;
3999
4000   unsigned SrcReg = getRegForValue(I->getOperand(0));
4001   if (!SrcReg)
4002     return false;
4003   bool SrcIsKill = hasTrivialKill(I->getOperand(0));
4004
4005   // The load instruction selection code handles the sign-/zero-extension.
4006   if (const auto *LI = dyn_cast<LoadInst>(I->getOperand(0))) {
4007     if (LI->hasOneUse()) {
4008       updateValueMap(I, SrcReg);
4009       return true;
4010     }
4011   }
4012
4013   bool IsZExt = isa<ZExtInst>(I);
4014   if (const auto *Arg = dyn_cast<Argument>(I->getOperand(0))) {
4015     if ((IsZExt && Arg->hasZExtAttr()) || (!IsZExt && Arg->hasSExtAttr())) {
4016       if (RetVT == MVT::i64 && SrcVT != MVT::i64) {
4017         unsigned ResultReg = createResultReg(&AArch64::GPR64RegClass);
4018         BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
4019                 TII.get(AArch64::SUBREG_TO_REG), ResultReg)
4020             .addImm(0)
4021             .addReg(SrcReg, getKillRegState(SrcIsKill))
4022             .addImm(AArch64::sub_32);
4023         SrcReg = ResultReg;
4024       }
4025       updateValueMap(I, SrcReg);
4026       return true;
4027     }
4028   }
4029
4030   unsigned ResultReg = emitIntExt(SrcVT, SrcReg, RetVT, IsZExt);
4031   if (!ResultReg)
4032     return false;
4033
4034   updateValueMap(I, ResultReg);
4035   return true;
4036 }
4037
4038 bool AArch64FastISel::selectRem(const Instruction *I, unsigned ISDOpcode) {
4039   EVT DestEVT = TLI.getValueType(I->getType(), true);
4040   if (!DestEVT.isSimple())
4041     return false;
4042
4043   MVT DestVT = DestEVT.getSimpleVT();
4044   if (DestVT != MVT::i64 && DestVT != MVT::i32)
4045     return false;
4046
4047   unsigned DivOpc;
4048   bool Is64bit = (DestVT == MVT::i64);
4049   switch (ISDOpcode) {
4050   default:
4051     return false;
4052   case ISD::SREM:
4053     DivOpc = Is64bit ? AArch64::SDIVXr : AArch64::SDIVWr;
4054     break;
4055   case ISD::UREM:
4056     DivOpc = Is64bit ? AArch64::UDIVXr : AArch64::UDIVWr;
4057     break;
4058   }
4059   unsigned MSubOpc = Is64bit ? AArch64::MSUBXrrr : AArch64::MSUBWrrr;
4060   unsigned Src0Reg = getRegForValue(I->getOperand(0));
4061   if (!Src0Reg)
4062     return false;
4063   bool Src0IsKill = hasTrivialKill(I->getOperand(0));
4064
4065   unsigned Src1Reg = getRegForValue(I->getOperand(1));
4066   if (!Src1Reg)
4067     return false;
4068   bool Src1IsKill = hasTrivialKill(I->getOperand(1));
4069
4070   const TargetRegisterClass *RC =
4071       (DestVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
4072   unsigned QuotReg = fastEmitInst_rr(DivOpc, RC, Src0Reg, /*IsKill=*/false,
4073                                      Src1Reg, /*IsKill=*/false);
4074   assert(QuotReg && "Unexpected DIV instruction emission failure.");
4075   // The remainder is computed as numerator - (quotient * denominator) using the
4076   // MSUB instruction.
4077   unsigned ResultReg = fastEmitInst_rrr(MSubOpc, RC, QuotReg, /*IsKill=*/true,
4078                                         Src1Reg, Src1IsKill, Src0Reg,
4079                                         Src0IsKill);
4080   updateValueMap(I, ResultReg);
4081   return true;
4082 }
4083
4084 bool AArch64FastISel::selectMul(const Instruction *I) {
4085   MVT VT;
4086   if (!isTypeSupported(I->getType(), VT, /*IsVectorAllowed=*/true))
4087     return false;
4088
4089   if (VT.isVector())
4090     return selectBinaryOp(I, ISD::MUL);
4091
4092   const Value *Src0 = I->getOperand(0);
4093   const Value *Src1 = I->getOperand(1);
4094   if (const auto *C = dyn_cast<ConstantInt>(Src0))
4095     if (C->getValue().isPowerOf2())
4096       std::swap(Src0, Src1);
4097
4098   // Try to simplify to a shift instruction.
4099   if (const auto *C = dyn_cast<ConstantInt>(Src1))
4100     if (C->getValue().isPowerOf2()) {
4101       uint64_t ShiftVal = C->getValue().logBase2();
4102       MVT SrcVT = VT;
4103       bool IsZExt = true;
4104       if (const auto *ZExt = dyn_cast<ZExtInst>(Src0)) {
4105         if (!isIntExtFree(ZExt)) {
4106           MVT VT;
4107           if (isValueAvailable(ZExt) && isTypeSupported(ZExt->getSrcTy(), VT)) {
4108             SrcVT = VT;
4109             IsZExt = true;
4110             Src0 = ZExt->getOperand(0);
4111           }
4112         }
4113       } else if (const auto *SExt = dyn_cast<SExtInst>(Src0)) {
4114         if (!isIntExtFree(SExt)) {
4115           MVT VT;
4116           if (isValueAvailable(SExt) && isTypeSupported(SExt->getSrcTy(), VT)) {
4117             SrcVT = VT;
4118             IsZExt = false;
4119             Src0 = SExt->getOperand(0);
4120           }
4121         }
4122       }
4123
4124       unsigned Src0Reg = getRegForValue(Src0);
4125       if (!Src0Reg)
4126         return false;
4127       bool Src0IsKill = hasTrivialKill(Src0);
4128
4129       unsigned ResultReg =
4130           emitLSL_ri(VT, SrcVT, Src0Reg, Src0IsKill, ShiftVal, IsZExt);
4131
4132       if (ResultReg) {
4133         updateValueMap(I, ResultReg);
4134         return true;
4135       }
4136     }
4137
4138   unsigned Src0Reg = getRegForValue(I->getOperand(0));
4139   if (!Src0Reg)
4140     return false;
4141   bool Src0IsKill = hasTrivialKill(I->getOperand(0));
4142
4143   unsigned Src1Reg = getRegForValue(I->getOperand(1));
4144   if (!Src1Reg)
4145     return false;
4146   bool Src1IsKill = hasTrivialKill(I->getOperand(1));
4147
4148   unsigned ResultReg = emitMul_rr(VT, Src0Reg, Src0IsKill, Src1Reg, Src1IsKill);
4149
4150   if (!ResultReg)
4151     return false;
4152
4153   updateValueMap(I, ResultReg);
4154   return true;
4155 }
4156
4157 bool AArch64FastISel::selectShift(const Instruction *I) {
4158   MVT RetVT;
4159   if (!isTypeSupported(I->getType(), RetVT, /*IsVectorAllowed=*/true))
4160     return false;
4161
4162   if (RetVT.isVector())
4163     return selectOperator(I, I->getOpcode());
4164
4165   if (const auto *C = dyn_cast<ConstantInt>(I->getOperand(1))) {
4166     unsigned ResultReg = 0;
4167     uint64_t ShiftVal = C->getZExtValue();
4168     MVT SrcVT = RetVT;
4169     bool IsZExt = (I->getOpcode() == Instruction::AShr) ? false : true;
4170     const Value *Op0 = I->getOperand(0);
4171     if (const auto *ZExt = dyn_cast<ZExtInst>(Op0)) {
4172       if (!isIntExtFree(ZExt)) {
4173         MVT TmpVT;
4174         if (isValueAvailable(ZExt) && isTypeSupported(ZExt->getSrcTy(), TmpVT)) {
4175           SrcVT = TmpVT;
4176           IsZExt = true;
4177           Op0 = ZExt->getOperand(0);
4178         }
4179       }
4180     } else if (const auto *SExt = dyn_cast<SExtInst>(Op0)) {
4181       if (!isIntExtFree(SExt)) {
4182         MVT TmpVT;
4183         if (isValueAvailable(SExt) && isTypeSupported(SExt->getSrcTy(), TmpVT)) {
4184           SrcVT = TmpVT;
4185           IsZExt = false;
4186           Op0 = SExt->getOperand(0);
4187         }
4188       }
4189     }
4190
4191     unsigned Op0Reg = getRegForValue(Op0);
4192     if (!Op0Reg)
4193       return false;
4194     bool Op0IsKill = hasTrivialKill(Op0);
4195
4196     switch (I->getOpcode()) {
4197     default: llvm_unreachable("Unexpected instruction.");
4198     case Instruction::Shl:
4199       ResultReg = emitLSL_ri(RetVT, SrcVT, Op0Reg, Op0IsKill, ShiftVal, IsZExt);
4200       break;
4201     case Instruction::AShr:
4202       ResultReg = emitASR_ri(RetVT, SrcVT, Op0Reg, Op0IsKill, ShiftVal, IsZExt);
4203       break;
4204     case Instruction::LShr:
4205       ResultReg = emitLSR_ri(RetVT, SrcVT, Op0Reg, Op0IsKill, ShiftVal, IsZExt);
4206       break;
4207     }
4208     if (!ResultReg)
4209       return false;
4210
4211     updateValueMap(I, ResultReg);
4212     return true;
4213   }
4214
4215   unsigned Op0Reg = getRegForValue(I->getOperand(0));
4216   if (!Op0Reg)
4217     return false;
4218   bool Op0IsKill = hasTrivialKill(I->getOperand(0));
4219
4220   unsigned Op1Reg = getRegForValue(I->getOperand(1));
4221   if (!Op1Reg)
4222     return false;
4223   bool Op1IsKill = hasTrivialKill(I->getOperand(1));
4224
4225   unsigned ResultReg = 0;
4226   switch (I->getOpcode()) {
4227   default: llvm_unreachable("Unexpected instruction.");
4228   case Instruction::Shl:
4229     ResultReg = emitLSL_rr(RetVT, Op0Reg, Op0IsKill, Op1Reg, Op1IsKill);
4230     break;
4231   case Instruction::AShr:
4232     ResultReg = emitASR_rr(RetVT, Op0Reg, Op0IsKill, Op1Reg, Op1IsKill);
4233     break;
4234   case Instruction::LShr:
4235     ResultReg = emitLSR_rr(RetVT, Op0Reg, Op0IsKill, Op1Reg, Op1IsKill);
4236     break;
4237   }
4238
4239   if (!ResultReg)
4240     return false;
4241
4242   updateValueMap(I, ResultReg);
4243   return true;
4244 }
4245
4246 bool AArch64FastISel::selectBitCast(const Instruction *I) {
4247   MVT RetVT, SrcVT;
4248
4249   if (!isTypeLegal(I->getOperand(0)->getType(), SrcVT))
4250     return false;
4251   if (!isTypeLegal(I->getType(), RetVT))
4252     return false;
4253
4254   unsigned Opc;
4255   if (RetVT == MVT::f32 && SrcVT == MVT::i32)
4256     Opc = AArch64::FMOVWSr;
4257   else if (RetVT == MVT::f64 && SrcVT == MVT::i64)
4258     Opc = AArch64::FMOVXDr;
4259   else if (RetVT == MVT::i32 && SrcVT == MVT::f32)
4260     Opc = AArch64::FMOVSWr;
4261   else if (RetVT == MVT::i64 && SrcVT == MVT::f64)
4262     Opc = AArch64::FMOVDXr;
4263   else
4264     return false;
4265
4266   const TargetRegisterClass *RC = nullptr;
4267   switch (RetVT.SimpleTy) {
4268   default: llvm_unreachable("Unexpected value type.");
4269   case MVT::i32: RC = &AArch64::GPR32RegClass; break;
4270   case MVT::i64: RC = &AArch64::GPR64RegClass; break;
4271   case MVT::f32: RC = &AArch64::FPR32RegClass; break;
4272   case MVT::f64: RC = &AArch64::FPR64RegClass; break;
4273   }
4274   unsigned Op0Reg = getRegForValue(I->getOperand(0));
4275   if (!Op0Reg)
4276     return false;
4277   bool Op0IsKill = hasTrivialKill(I->getOperand(0));
4278   unsigned ResultReg = fastEmitInst_r(Opc, RC, Op0Reg, Op0IsKill);
4279
4280   if (!ResultReg)
4281     return false;
4282
4283   updateValueMap(I, ResultReg);
4284   return true;
4285 }
4286
4287 bool AArch64FastISel::selectFRem(const Instruction *I) {
4288   MVT RetVT;
4289   if (!isTypeLegal(I->getType(), RetVT))
4290     return false;
4291
4292   RTLIB::Libcall LC;
4293   switch (RetVT.SimpleTy) {
4294   default:
4295     return false;
4296   case MVT::f32:
4297     LC = RTLIB::REM_F32;
4298     break;
4299   case MVT::f64:
4300     LC = RTLIB::REM_F64;
4301     break;
4302   }
4303
4304   ArgListTy Args;
4305   Args.reserve(I->getNumOperands());
4306
4307   // Populate the argument list.
4308   for (auto &Arg : I->operands()) {
4309     ArgListEntry Entry;
4310     Entry.Val = Arg;
4311     Entry.Ty = Arg->getType();
4312     Args.push_back(Entry);
4313   }
4314
4315   CallLoweringInfo CLI;
4316   CLI.setCallee(TLI.getLibcallCallingConv(LC), I->getType(),
4317                 TLI.getLibcallName(LC), std::move(Args));
4318   if (!lowerCallTo(CLI))
4319     return false;
4320   updateValueMap(I, CLI.ResultReg);
4321   return true;
4322 }
4323
4324 bool AArch64FastISel::selectSDiv(const Instruction *I) {
4325   MVT VT;
4326   if (!isTypeLegal(I->getType(), VT))
4327     return false;
4328
4329   if (!isa<ConstantInt>(I->getOperand(1)))
4330     return selectBinaryOp(I, ISD::SDIV);
4331
4332   const APInt &C = cast<ConstantInt>(I->getOperand(1))->getValue();
4333   if ((VT != MVT::i32 && VT != MVT::i64) || !C ||
4334       !(C.isPowerOf2() || (-C).isPowerOf2()))
4335     return selectBinaryOp(I, ISD::SDIV);
4336
4337   unsigned Lg2 = C.countTrailingZeros();
4338   unsigned Src0Reg = getRegForValue(I->getOperand(0));
4339   if (!Src0Reg)
4340     return false;
4341   bool Src0IsKill = hasTrivialKill(I->getOperand(0));
4342
4343   if (cast<BinaryOperator>(I)->isExact()) {
4344     unsigned ResultReg = emitASR_ri(VT, VT, Src0Reg, Src0IsKill, Lg2);
4345     if (!ResultReg)
4346       return false;
4347     updateValueMap(I, ResultReg);
4348     return true;
4349   }
4350
4351   unsigned Pow2MinusOne = (1 << Lg2) - 1;
4352   unsigned AddReg = emitAddSub_ri(/*UseAdd=*/true, VT, Src0Reg,
4353                                   /*IsKill=*/false, Pow2MinusOne);
4354   if (!AddReg)
4355     return false;
4356
4357   // (Src0 < 0) ? Pow2 - 1 : 0;
4358   if (!emitICmp_ri(VT, Src0Reg, /*IsKill=*/false, 0))
4359     return false;
4360
4361   unsigned SelectOpc;
4362   const TargetRegisterClass *RC;
4363   if (VT == MVT::i64) {
4364     SelectOpc = AArch64::CSELXr;
4365     RC = &AArch64::GPR64RegClass;
4366   } else {
4367     SelectOpc = AArch64::CSELWr;
4368     RC = &AArch64::GPR32RegClass;
4369   }
4370   unsigned SelectReg =
4371       fastEmitInst_rri(SelectOpc, RC, AddReg, /*IsKill=*/true, Src0Reg,
4372                        Src0IsKill, AArch64CC::LT);
4373   if (!SelectReg)
4374     return false;
4375
4376   // Divide by Pow2 --> ashr. If we're dividing by a negative value we must also
4377   // negate the result.
4378   unsigned ZeroReg = (VT == MVT::i64) ? AArch64::XZR : AArch64::WZR;
4379   unsigned ResultReg;
4380   if (C.isNegative())
4381     ResultReg = emitAddSub_rs(/*UseAdd=*/false, VT, ZeroReg, /*IsKill=*/true,
4382                               SelectReg, /*IsKill=*/true, AArch64_AM::ASR, Lg2);
4383   else
4384     ResultReg = emitASR_ri(VT, VT, SelectReg, /*IsKill=*/true, Lg2);
4385
4386   if (!ResultReg)
4387     return false;
4388
4389   updateValueMap(I, ResultReg);
4390   return true;
4391 }
4392
4393 bool AArch64FastISel::fastSelectInstruction(const Instruction *I) {
4394   switch (I->getOpcode()) {
4395   default:
4396     break;
4397   case Instruction::Add:
4398   case Instruction::Sub:
4399     return selectAddSub(I);
4400   case Instruction::Mul:
4401     return selectMul(I);
4402   case Instruction::SDiv:
4403     return selectSDiv(I);
4404   case Instruction::SRem:
4405     if (!selectBinaryOp(I, ISD::SREM))
4406       return selectRem(I, ISD::SREM);
4407     return true;
4408   case Instruction::URem:
4409     if (!selectBinaryOp(I, ISD::UREM))
4410       return selectRem(I, ISD::UREM);
4411     return true;
4412   case Instruction::Shl:
4413   case Instruction::LShr:
4414   case Instruction::AShr:
4415     return selectShift(I);
4416   case Instruction::And:
4417   case Instruction::Or:
4418   case Instruction::Xor:
4419     return selectLogicalOp(I);
4420   case Instruction::Br:
4421     return selectBranch(I);
4422   case Instruction::IndirectBr:
4423     return selectIndirectBr(I);
4424   case Instruction::BitCast:
4425     if (!FastISel::selectBitCast(I))
4426       return selectBitCast(I);
4427     return true;
4428   case Instruction::FPToSI:
4429     if (!selectCast(I, ISD::FP_TO_SINT))
4430       return selectFPToInt(I, /*Signed=*/true);
4431     return true;
4432   case Instruction::FPToUI:
4433     return selectFPToInt(I, /*Signed=*/false);
4434   case Instruction::ZExt:
4435   case Instruction::SExt:
4436     return selectIntExt(I);
4437   case Instruction::Trunc:
4438     if (!selectCast(I, ISD::TRUNCATE))
4439       return selectTrunc(I);
4440     return true;
4441   case Instruction::FPExt:
4442     return selectFPExt(I);
4443   case Instruction::FPTrunc:
4444     return selectFPTrunc(I);
4445   case Instruction::SIToFP:
4446     if (!selectCast(I, ISD::SINT_TO_FP))
4447       return selectIntToFP(I, /*Signed=*/true);
4448     return true;
4449   case Instruction::UIToFP:
4450     return selectIntToFP(I, /*Signed=*/false);
4451   case Instruction::Load:
4452     return selectLoad(I);
4453   case Instruction::Store:
4454     return selectStore(I);
4455   case Instruction::FCmp:
4456   case Instruction::ICmp:
4457     return selectCmp(I);
4458   case Instruction::Select:
4459     return selectSelect(I);
4460   case Instruction::Ret:
4461     return selectRet(I);
4462   case Instruction::FRem:
4463     return selectFRem(I);
4464   }
4465
4466   // fall-back to target-independent instruction selection.
4467   return selectOperator(I, I->getOpcode());
4468   // Silence warnings.
4469   (void)&CC_AArch64_DarwinPCS_VarArg;
4470 }
4471
4472 namespace llvm {
4473 llvm::FastISel *AArch64::createFastISel(FunctionLoweringInfo &FuncInfo,
4474                                         const TargetLibraryInfo *LibInfo) {
4475   return new AArch64FastISel(FuncInfo, LibInfo);
4476 }
4477 }