lib/Target/AArch64/AArch64FastISel.cpp

   1 //===-- AArch6464FastISel.cpp - AArch64 FastISel implementation -----------===//
   2 //
   3 //                     The LLVM Compiler Infrastructure
   4 //
   5 // This file is distributed under the University of Illinois Open Source
   6 // License. See LICENSE.TXT for details.
   7 //
   8 //===----------------------------------------------------------------------===//
   9 //
  10 // This file defines the AArch64-specific support for the FastISel class. Some
  11 // of the target-specific code is generated by tablegen in the file
  12 // AArch64GenFastISel.inc, which is #included here.
  13 //
  14 //===----------------------------------------------------------------------===//
  15
  16 #include "AArch64.h"
  17 #include "AArch64Subtarget.h"
  18 #include "AArch64TargetMachine.h"
  19 #include "MCTargetDesc/AArch64AddressingModes.h"
  20 #include "llvm/Analysis/BranchProbabilityInfo.h"
  21 #include "llvm/CodeGen/CallingConvLower.h"
  22 #include "llvm/CodeGen/FastISel.h"
  23 #include "llvm/CodeGen/FunctionLoweringInfo.h"
  24 #include "llvm/CodeGen/MachineConstantPool.h"
  25 #include "llvm/CodeGen/MachineFrameInfo.h"
  26 #include "llvm/CodeGen/MachineInstrBuilder.h"
  27 #include "llvm/CodeGen/MachineRegisterInfo.h"
  28 #include "llvm/IR/CallingConv.h"
  29 #include "llvm/IR/DataLayout.h"
  30 #include "llvm/IR/DerivedTypes.h"
  31 #include "llvm/IR/Function.h"
  32 #include "llvm/IR/GetElementPtrTypeIterator.h"
  33 #include "llvm/IR/GlobalAlias.h"
  34 #include "llvm/IR/GlobalVariable.h"
  35 #include "llvm/IR/Instructions.h"
  36 #include "llvm/IR/IntrinsicInst.h"
  37 #include "llvm/IR/Operator.h"
  38 #include "llvm/Support/CommandLine.h"
  39 using namespace llvm;
  40
  41 namespace {
  42
  43 class AArch64FastISel final : public FastISel {
  44   class Address {
  45   public:
  46     typedef enum {
  47       RegBase,
  48       FrameIndexBase
  49     } BaseKind;
  50
  51   private:
  52     BaseKind Kind;
  53     AArch64_AM::ShiftExtendType ExtType;
  54     union {
  55       unsigned Reg;
  56       int FI;
  57     } Base;
  58     unsigned OffsetReg;
  59     unsigned Shift;
  60     int64_t Offset;
  61     const GlobalValue *GV;
  62
  63   public:
  64     Address() : Kind(RegBase), ExtType(AArch64_AM::InvalidShiftExtend),
  65       OffsetReg(0), Shift(0), Offset(0), GV(nullptr) { Base.Reg = 0; }
  66     void setKind(BaseKind K) { Kind = K; }
  67     BaseKind getKind() const { return Kind; }
  68     void setExtendType(AArch64_AM::ShiftExtendType E) { ExtType = E; }
  69     AArch64_AM::ShiftExtendType getExtendType() const { return ExtType; }
  70     bool isRegBase() const { return Kind == RegBase; }
  71     bool isFIBase() const { return Kind == FrameIndexBase; }
  72     void setReg(unsigned Reg) {
  73       assert(isRegBase() && "Invalid base register access!");
  74       Base.Reg = Reg;
  75     }
  76     unsigned getReg() const {
  77       assert(isRegBase() && "Invalid base register access!");
  78       return Base.Reg;
  79     }
  80     void setOffsetReg(unsigned Reg) {
  81       assert(isRegBase() && "Invalid offset register access!");
  82       OffsetReg = Reg;
  83     }
  84     unsigned getOffsetReg() const {
  85       assert(isRegBase() && "Invalid offset register access!");
  86       return OffsetReg;
  87     }
  88     void setFI(unsigned FI) {
  89       assert(isFIBase() && "Invalid base frame index  access!");
  90       Base.FI = FI;
  91     }
  92     unsigned getFI() const {
  93       assert(isFIBase() && "Invalid base frame index access!");
  94       return Base.FI;
  95     }
  96     void setOffset(int64_t O) { Offset = O; }
  97     int64_t getOffset() { return Offset; }
  98     void setShift(unsigned S) { Shift = S; }
  99     unsigned getShift() { return Shift; }
 100
 101     void setGlobalValue(const GlobalValue *G) { GV = G; }
 102     const GlobalValue *getGlobalValue() { return GV; }
 103   };
 104
 105   /// Subtarget - Keep a pointer to the AArch64Subtarget around so that we can
 106   /// make the right decision when generating code for different targets.
 107   const AArch64Subtarget *Subtarget;
 108   LLVMContext *Context;
 109
 110   bool fastLowerArguments() override;
 111   bool fastLowerCall(CallLoweringInfo &CLI) override;
 112   bool fastLowerIntrinsicCall(const IntrinsicInst *II) override;
 113
 114 private:
 115   // Selection routines.
 116   bool selectAddSub(const Instruction *I);
 117   bool selectLogicalOp(const Instruction *I);
 118   bool selectLoad(const Instruction *I);
 119   bool selectStore(const Instruction *I);
 120   bool selectBranch(const Instruction *I);
 121   bool selectIndirectBr(const Instruction *I);
 122   bool selectCmp(const Instruction *I);
 123   bool selectSelect(const Instruction *I);
 124   bool selectFPExt(const Instruction *I);
 125   bool selectFPTrunc(const Instruction *I);
 126   bool selectFPToInt(const Instruction *I, bool Signed);
 127   bool selectIntToFP(const Instruction *I, bool Signed);
 128   bool selectRem(const Instruction *I, unsigned ISDOpcode);
 129   bool selectRet(const Instruction *I);
 130   bool selectTrunc(const Instruction *I);
 131   bool selectIntExt(const Instruction *I);
 132   bool selectMul(const Instruction *I);
 133   bool selectShift(const Instruction *I);
 134   bool selectBitCast(const Instruction *I);
 135   bool selectFRem(const Instruction *I);
 136   bool selectSDiv(const Instruction *I);
 137
 138   // Utility helper routines.
 139   bool isTypeLegal(Type *Ty, MVT &VT);
 140   bool isTypeSupported(Type *Ty, MVT &VT, bool IsVectorAllowed = false);
 141   bool isValueAvailable(const Value *V) const;
 142   bool computeAddress(const Value *Obj, Address &Addr, Type *Ty = nullptr);
 143   bool computeCallAddress(const Value *V, Address &Addr);
 144   bool simplifyAddress(Address &Addr, MVT VT);
 145   void addLoadStoreOperands(Address &Addr, const MachineInstrBuilder &MIB,
 146                             unsigned Flags, unsigned ScaleFactor,
 147                             MachineMemOperand *MMO);
 148   bool isMemCpySmall(uint64_t Len, unsigned Alignment);
 149   bool tryEmitSmallMemCpy(Address Dest, Address Src, uint64_t Len,
 150                           unsigned Alignment);
 151   bool foldXALUIntrinsic(AArch64CC::CondCode &CC, const Instruction *I,
 152                          const Value *Cond);
 153   bool optimizeIntExtLoad(const Instruction *I, MVT RetVT, MVT SrcVT);
 154
 155   // Emit helper routines.
 156   unsigned emitAddSub(bool UseAdd, MVT RetVT, const Value *LHS,
 157                       const Value *RHS, bool SetFlags = false,
 158                       bool WantResult = true,  bool IsZExt = false);
 159   unsigned emitAddSub_rr(bool UseAdd, MVT RetVT, unsigned LHSReg,
 160                          bool LHSIsKill, unsigned RHSReg, bool RHSIsKill,
 161                          bool SetFlags = false, bool WantResult = true);
 162   unsigned emitAddSub_ri(bool UseAdd, MVT RetVT, unsigned LHSReg,
 163                          bool LHSIsKill, uint64_t Imm, bool SetFlags = false,
 164                          bool WantResult = true);
 165   unsigned emitAddSub_rs(bool UseAdd, MVT RetVT, unsigned LHSReg,
 166                          bool LHSIsKill, unsigned RHSReg, bool RHSIsKill,
 167                          AArch64_AM::ShiftExtendType ShiftType,
 168                          uint64_t ShiftImm, bool SetFlags = false,
 169                          bool WantResult = true);
 170   unsigned emitAddSub_rx(bool UseAdd, MVT RetVT, unsigned LHSReg,
 171                          bool LHSIsKill, unsigned RHSReg, bool RHSIsKill,
 172                           AArch64_AM::ShiftExtendType ExtType,
 173                           uint64_t ShiftImm, bool SetFlags = false,
 174                          bool WantResult = true);
 175
 176   // Emit functions.
 177   bool emitCompareAndBranch(const BranchInst *BI);
 178   bool emitCmp(const Value *LHS, const Value *RHS, bool IsZExt);
 179   bool emitICmp(MVT RetVT, const Value *LHS, const Value *RHS, bool IsZExt);
 180   bool emitICmp_ri(MVT RetVT, unsigned LHSReg, bool LHSIsKill, uint64_t Imm);
 181   bool emitFCmp(MVT RetVT, const Value *LHS, const Value *RHS);
 182   unsigned emitLoad(MVT VT, MVT ResultVT, Address Addr, bool WantZExt = true,
 183                     MachineMemOperand *MMO = nullptr);
 184   bool emitStore(MVT VT, unsigned SrcReg, Address Addr,
 185                  MachineMemOperand *MMO = nullptr);
 186   unsigned emitIntExt(MVT SrcVT, unsigned SrcReg, MVT DestVT, bool isZExt);
 187   unsigned emiti1Ext(unsigned SrcReg, MVT DestVT, bool isZExt);
 188   unsigned emitAdd(MVT RetVT, const Value *LHS, const Value *RHS,
 189                    bool SetFlags = false, bool WantResult = true,
 190                    bool IsZExt = false);
 191   unsigned emitAdd_ri_(MVT VT, unsigned Op0, bool Op0IsKill, int64_t Imm);
 192   unsigned emitSub(MVT RetVT, const Value *LHS, const Value *RHS,
 193                    bool SetFlags = false, bool WantResult = true,
 194                    bool IsZExt = false);
 195   unsigned emitSubs_rr(MVT RetVT, unsigned LHSReg, bool LHSIsKill,
 196                        unsigned RHSReg, bool RHSIsKill, bool WantResult = true);
 197   unsigned emitSubs_rs(MVT RetVT, unsigned LHSReg, bool LHSIsKill,
 198                        unsigned RHSReg, bool RHSIsKill,
 199                        AArch64_AM::ShiftExtendType ShiftType, uint64_t ShiftImm,
 200                        bool WantResult = true);
 201   unsigned emitLogicalOp(unsigned ISDOpc, MVT RetVT, const Value *LHS,
 202                          const Value *RHS);
 203   unsigned emitLogicalOp_ri(unsigned ISDOpc, MVT RetVT, unsigned LHSReg,
 204                             bool LHSIsKill, uint64_t Imm);
 205   unsigned emitLogicalOp_rs(unsigned ISDOpc, MVT RetVT, unsigned LHSReg,
 206                             bool LHSIsKill, unsigned RHSReg, bool RHSIsKill,
 207                             uint64_t ShiftImm);
 208   unsigned emitAnd_ri(MVT RetVT, unsigned LHSReg, bool LHSIsKill, uint64_t Imm);
 209   unsigned emitMul_rr(MVT RetVT, unsigned Op0, bool Op0IsKill,
 210                       unsigned Op1, bool Op1IsKill);
 211   unsigned emitSMULL_rr(MVT RetVT, unsigned Op0, bool Op0IsKill,
 212                         unsigned Op1, bool Op1IsKill);
 213   unsigned emitUMULL_rr(MVT RetVT, unsigned Op0, bool Op0IsKill,
 214                         unsigned Op1, bool Op1IsKill);
 215   unsigned emitLSL_rr(MVT RetVT, unsigned Op0Reg, bool Op0IsKill,
 216                       unsigned Op1Reg, bool Op1IsKill);
 217   unsigned emitLSL_ri(MVT RetVT, MVT SrcVT, unsigned Op0Reg, bool Op0IsKill,
 218                       uint64_t Imm, bool IsZExt = true);
 219   unsigned emitLSR_rr(MVT RetVT, unsigned Op0Reg, bool Op0IsKill,
 220                       unsigned Op1Reg, bool Op1IsKill);
 221   unsigned emitLSR_ri(MVT RetVT, MVT SrcVT, unsigned Op0Reg, bool Op0IsKill,
 222                       uint64_t Imm, bool IsZExt = true);
 223   unsigned emitASR_rr(MVT RetVT, unsigned Op0Reg, bool Op0IsKill,
 224                       unsigned Op1Reg, bool Op1IsKill);
 225   unsigned emitASR_ri(MVT RetVT, MVT SrcVT, unsigned Op0Reg, bool Op0IsKill,
 226                       uint64_t Imm, bool IsZExt = false);
 227
 228   unsigned materializeInt(const ConstantInt *CI, MVT VT);
 229   unsigned materializeFP(const ConstantFP *CFP, MVT VT);
 230   unsigned materializeGV(const GlobalValue *GV);
 231
 232   // Call handling routines.
 233 private:
 234   CCAssignFn *CCAssignFnForCall(CallingConv::ID CC) const;
 235   bool processCallArgs(CallLoweringInfo &CLI, SmallVectorImpl<MVT> &ArgVTs,
 236                        unsigned &NumBytes);
 237   bool finishCall(CallLoweringInfo &CLI, MVT RetVT, unsigned NumBytes);
 238
 239 public:
 240   // Backend specific FastISel code.
 241   unsigned fastMaterializeAlloca(const AllocaInst *AI) override;
 242   unsigned fastMaterializeConstant(const Constant *C) override;
 243   unsigned fastMaterializeFloatZero(const ConstantFP* CF) override;
 244
 245   explicit AArch64FastISel(FunctionLoweringInfo &FuncInfo,
 246                          const TargetLibraryInfo *LibInfo)
 247       : FastISel(FuncInfo, LibInfo, /*SkipTargetIndependentISel=*/true) {
 248     Subtarget = &TM.getSubtarget<AArch64Subtarget>();
 249     Context = &FuncInfo.Fn->getContext();
 250   }
 251
 252   bool fastSelectInstruction(const Instruction *I) override;
 253
 254 #include "AArch64GenFastISel.inc"
 255 };
 256
 257 } // end anonymous namespace
 258
 259 #include "AArch64GenCallingConv.inc"
 260
 261 /// \brief Check if the sign-/zero-extend will be a noop.
 262 static bool isIntExtFree(const Instruction *I) {
 263   assert((isa<ZExtInst>(I) || isa<SExtInst>(I)) &&
 264          "Unexpected integer extend instruction.");
 265   assert(!I->getType()->isVectorTy() && I->getType()->isIntegerTy() &&
 266          "Unexpected value type.");
 267   bool IsZExt = isa<ZExtInst>(I);
 268
 269   if (const auto *LI = dyn_cast<LoadInst>(I->getOperand(0)))
 270     if (LI->hasOneUse())
 271       return true;
 272
 273   if (const auto *Arg = dyn_cast<Argument>(I->getOperand(0)))
 274     if ((IsZExt && Arg->hasZExtAttr()) || (!IsZExt && Arg->hasSExtAttr()))
 275       return true;
 276
 277   return false;
 278 }
 279
 280 /// \brief Determine the implicit scale factor that is applied by a memory
 281 /// operation for a given value type.
 282 static unsigned getImplicitScaleFactor(MVT VT) {
 283   switch (VT.SimpleTy) {
 284   default:
 285     return 0;    // invalid
 286   case MVT::i1:  // fall-through
 287   case MVT::i8:
 288     return 1;
 289   case MVT::i16:
 290     return 2;
 291   case MVT::i32: // fall-through
 292   case MVT::f32:
 293     return 4;
 294   case MVT::i64: // fall-through
 295   case MVT::f64:
 296     return 8;
 297   }
 298 }
 299
 300 CCAssignFn *AArch64FastISel::CCAssignFnForCall(CallingConv::ID CC) const {
 301   if (CC == CallingConv::WebKit_JS)
 302     return CC_AArch64_WebKit_JS;
 303   return Subtarget->isTargetDarwin() ? CC_AArch64_DarwinPCS : CC_AArch64_AAPCS;
 304 }
 305
 306 unsigned AArch64FastISel::fastMaterializeAlloca(const AllocaInst *AI) {
 307   assert(TLI.getValueType(AI->getType(), true) == MVT::i64 &&
 308          "Alloca should always return a pointer.");
 309
 310   // Don't handle dynamic allocas.
 311   if (!FuncInfo.StaticAllocaMap.count(AI))
 312     return 0;
 313
 314   DenseMap<const AllocaInst *, int>::iterator SI =
 315       FuncInfo.StaticAllocaMap.find(AI);
 316
 317   if (SI != FuncInfo.StaticAllocaMap.end()) {
 318     unsigned ResultReg = createResultReg(&AArch64::GPR64spRegClass);
 319     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADDXri),
 320             ResultReg)
 321         .addFrameIndex(SI->second)
 322         .addImm(0)
 323         .addImm(0);
 324     return ResultReg;
 325   }
 326
 327   return 0;
 328 }
 329
 330 unsigned AArch64FastISel::materializeInt(const ConstantInt *CI, MVT VT) {
 331   if (VT > MVT::i64)
 332     return 0;
 333
 334   if (!CI->isZero())
 335     return fastEmit_i(VT, VT, ISD::Constant, CI->getZExtValue());
 336
 337   // Create a copy from the zero register to materialize a "0" value.
 338   const TargetRegisterClass *RC = (VT == MVT::i64) ? &AArch64::GPR64RegClass
 339                                                    : &AArch64::GPR32RegClass;
 340   unsigned ZeroReg = (VT == MVT::i64) ? AArch64::XZR : AArch64::WZR;
 341   unsigned ResultReg = createResultReg(RC);
 342   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(TargetOpcode::COPY),
 343           ResultReg).addReg(ZeroReg, getKillRegState(true));
 344   return ResultReg;
 345 }
 346
 347 unsigned AArch64FastISel::materializeFP(const ConstantFP *CFP, MVT VT) {
 348   // Positive zero (+0.0) has to be materialized with a fmov from the zero
 349   // register, because the immediate version of fmov cannot encode zero.
 350   if (CFP->isNullValue())
 351     return fastMaterializeFloatZero(CFP);
 352
 353   if (VT != MVT::f32 && VT != MVT::f64)
 354     return 0;
 355
 356   const APFloat Val = CFP->getValueAPF();
 357   bool Is64Bit = (VT == MVT::f64);
 358   // This checks to see if we can use FMOV instructions to materialize
 359   // a constant, otherwise we have to materialize via the constant pool.
 360   if (TLI.isFPImmLegal(Val, VT)) {
 361     int Imm =
 362         Is64Bit ? AArch64_AM::getFP64Imm(Val) : AArch64_AM::getFP32Imm(Val);
 363     assert((Imm != -1) && "Cannot encode floating-point constant.");
 364     unsigned Opc = Is64Bit ? AArch64::FMOVDi : AArch64::FMOVSi;
 365     return fastEmitInst_i(Opc, TLI.getRegClassFor(VT), Imm);
 366   }
 367
 368   // Materialize via constant pool.  MachineConstantPool wants an explicit
 369   // alignment.
 370   unsigned Align = DL.getPrefTypeAlignment(CFP->getType());
 371   if (Align == 0)
 372     Align = DL.getTypeAllocSize(CFP->getType());
 373
 374   unsigned CPI = MCP.getConstantPoolIndex(cast<Constant>(CFP), Align);
 375   unsigned ADRPReg = createResultReg(&AArch64::GPR64commonRegClass);
 376   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADRP),
 377           ADRPReg).addConstantPoolIndex(CPI, 0, AArch64II::MO_PAGE);
 378
 379   unsigned Opc = Is64Bit ? AArch64::LDRDui : AArch64::LDRSui;
 380   unsigned ResultReg = createResultReg(TLI.getRegClassFor(VT));
 381   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg)
 382       .addReg(ADRPReg)
 383       .addConstantPoolIndex(CPI, 0, AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
 384   return ResultReg;
 385 }
 386
 387 unsigned AArch64FastISel::materializeGV(const GlobalValue *GV) {
 388   // We can't handle thread-local variables quickly yet.
 389   if (GV->isThreadLocal())
 390     return 0;
 391
 392   // MachO still uses GOT for large code-model accesses, but ELF requires
 393   // movz/movk sequences, which FastISel doesn't handle yet.
 394   if (TM.getCodeModel() != CodeModel::Small && !Subtarget->isTargetMachO())
 395     return 0;
 396
 397   unsigned char OpFlags = Subtarget->ClassifyGlobalReference(GV, TM);
 398
 399   EVT DestEVT = TLI.getValueType(GV->getType(), true);
 400   if (!DestEVT.isSimple())
 401     return 0;
 402
 403   unsigned ADRPReg = createResultReg(&AArch64::GPR64commonRegClass);
 404   unsigned ResultReg;
 405
 406   if (OpFlags & AArch64II::MO_GOT) {
 407     // ADRP + LDRX
 408     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADRP),
 409             ADRPReg)
 410       .addGlobalAddress(GV, 0, AArch64II::MO_GOT | AArch64II::MO_PAGE);
 411
 412     ResultReg = createResultReg(&AArch64::GPR64RegClass);
 413     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::LDRXui),
 414             ResultReg)
 415       .addReg(ADRPReg)
 416       .addGlobalAddress(GV, 0, AArch64II::MO_GOT | AArch64II::MO_PAGEOFF |
 417                         AArch64II::MO_NC);
 418   } else if (OpFlags & AArch64II::MO_CONSTPOOL) {
 419     // We can't handle addresses loaded from a constant pool quickly yet.
 420     return 0;
 421   } else {
 422     // ADRP + ADDX
 423     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADRP),
 424             ADRPReg)
 425       .addGlobalAddress(GV, 0, AArch64II::MO_PAGE);
 426
 427     ResultReg = createResultReg(&AArch64::GPR64spRegClass);
 428     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADDXri),
 429             ResultReg)
 430       .addReg(ADRPReg)
 431       .addGlobalAddress(GV, 0, AArch64II::MO_PAGEOFF | AArch64II::MO_NC)
 432       .addImm(0);
 433   }
 434   return ResultReg;
 435 }
 436
 437 unsigned AArch64FastISel::fastMaterializeConstant(const Constant *C) {
 438   EVT CEVT = TLI.getValueType(C->getType(), true);
 439
 440   // Only handle simple types.
 441   if (!CEVT.isSimple())
 442     return 0;
 443   MVT VT = CEVT.getSimpleVT();
 444
 445   if (const auto *CI = dyn_cast<ConstantInt>(C))
 446     return materializeInt(CI, VT);
 447   else if (const ConstantFP *CFP = dyn_cast<ConstantFP>(C))
 448     return materializeFP(CFP, VT);
 449   else if (const GlobalValue *GV = dyn_cast<GlobalValue>(C))
 450     return materializeGV(GV);
 451
 452   return 0;
 453 }
 454
 455 unsigned AArch64FastISel::fastMaterializeFloatZero(const ConstantFP* CFP) {
 456   assert(CFP->isNullValue() &&
 457          "Floating-point constant is not a positive zero.");
 458   MVT VT;
 459   if (!isTypeLegal(CFP->getType(), VT))
 460     return 0;
 461
 462   if (VT != MVT::f32 && VT != MVT::f64)
 463     return 0;
 464
 465   bool Is64Bit = (VT == MVT::f64);
 466   unsigned ZReg = Is64Bit ? AArch64::XZR : AArch64::WZR;
 467   unsigned Opc = Is64Bit ? AArch64::FMOVXDr : AArch64::FMOVWSr;
 468   return fastEmitInst_r(Opc, TLI.getRegClassFor(VT), ZReg, /*IsKill=*/true);
 469 }
 470
 471 /// \brief Check if the multiply is by a power-of-2 constant.
 472 static bool isMulPowOf2(const Value *I) {
 473   if (const auto *MI = dyn_cast<MulOperator>(I)) {
 474     if (const auto *C = dyn_cast<ConstantInt>(MI->getOperand(0)))
 475       if (C->getValue().isPowerOf2())
 476         return true;
 477     if (const auto *C = dyn_cast<ConstantInt>(MI->getOperand(1)))
 478       if (C->getValue().isPowerOf2())
 479         return true;
 480   }
 481   return false;
 482 }
 483
 484 // Computes the address to get to an object.
 485 bool AArch64FastISel::computeAddress(const Value *Obj, Address &Addr, Type *Ty)
 486 {
 487   const User *U = nullptr;
 488   unsigned Opcode = Instruction::UserOp1;
 489   if (const Instruction *I = dyn_cast<Instruction>(Obj)) {
 490     // Don't walk into other basic blocks unless the object is an alloca from
 491     // another block, otherwise it may not have a virtual register assigned.
 492     if (FuncInfo.StaticAllocaMap.count(static_cast<const AllocaInst *>(Obj)) ||
 493         FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB) {
 494       Opcode = I->getOpcode();
 495       U = I;
 496     }
 497   } else if (const ConstantExpr *C = dyn_cast<ConstantExpr>(Obj)) {
 498     Opcode = C->getOpcode();
 499     U = C;
 500   }
 501
 502   if (const PointerType *Ty = dyn_cast<PointerType>(Obj->getType()))
 503     if (Ty->getAddressSpace() > 255)
 504       // Fast instruction selection doesn't support the special
 505       // address spaces.
 506       return false;
 507
 508   switch (Opcode) {
 509   default:
 510     break;
 511   case Instruction::BitCast: {
 512     // Look through bitcasts.
 513     return computeAddress(U->getOperand(0), Addr, Ty);
 514   }
 515   case Instruction::IntToPtr: {
 516     // Look past no-op inttoptrs.
 517     if (TLI.getValueType(U->getOperand(0)->getType()) == TLI.getPointerTy())
 518       return computeAddress(U->getOperand(0), Addr, Ty);
 519     break;
 520   }
 521   case Instruction::PtrToInt: {
 522     // Look past no-op ptrtoints.
 523     if (TLI.getValueType(U->getType()) == TLI.getPointerTy())
 524       return computeAddress(U->getOperand(0), Addr, Ty);
 525     break;
 526   }
 527   case Instruction::GetElementPtr: {
 528     Address SavedAddr = Addr;
 529     uint64_t TmpOffset = Addr.getOffset();
 530
 531     // Iterate through the GEP folding the constants into offsets where
 532     // we can.
 533     gep_type_iterator GTI = gep_type_begin(U);
 534     for (User::const_op_iterator i = U->op_begin() + 1, e = U->op_end(); i != e;
 535          ++i, ++GTI) {
 536       const Value *Op = *i;
 537       if (StructType *STy = dyn_cast<StructType>(*GTI)) {
 538         const StructLayout *SL = DL.getStructLayout(STy);
 539         unsigned Idx = cast<ConstantInt>(Op)->getZExtValue();
 540         TmpOffset += SL->getElementOffset(Idx);
 541       } else {
 542         uint64_t S = DL.getTypeAllocSize(GTI.getIndexedType());
 543         for (;;) {
 544           if (const ConstantInt *CI = dyn_cast<ConstantInt>(Op)) {
 545             // Constant-offset addressing.
 546             TmpOffset += CI->getSExtValue() * S;
 547             break;
 548           }
 549           if (canFoldAddIntoGEP(U, Op)) {
 550             // A compatible add with a constant operand. Fold the constant.
 551             ConstantInt *CI =
 552                 cast<ConstantInt>(cast<AddOperator>(Op)->getOperand(1));
 553             TmpOffset += CI->getSExtValue() * S;
 554             // Iterate on the other operand.
 555             Op = cast<AddOperator>(Op)->getOperand(0);
 556             continue;
 557           }
 558           // Unsupported
 559           goto unsupported_gep;
 560         }
 561       }
 562     }
 563
 564     // Try to grab the base operand now.
 565     Addr.setOffset(TmpOffset);
 566     if (computeAddress(U->getOperand(0), Addr, Ty))
 567       return true;
 568
 569     // We failed, restore everything and try the other options.
 570     Addr = SavedAddr;
 571
 572   unsupported_gep:
 573     break;
 574   }
 575   case Instruction::Alloca: {
 576     const AllocaInst *AI = cast<AllocaInst>(Obj);
 577     DenseMap<const AllocaInst *, int>::iterator SI =
 578         FuncInfo.StaticAllocaMap.find(AI);
 579     if (SI != FuncInfo.StaticAllocaMap.end()) {
 580       Addr.setKind(Address::FrameIndexBase);
 581       Addr.setFI(SI->second);
 582       return true;
 583     }
 584     break;
 585   }
 586   case Instruction::Add: {
 587     // Adds of constants are common and easy enough.
 588     const Value *LHS = U->getOperand(0);
 589     const Value *RHS = U->getOperand(1);
 590
 591     if (isa<ConstantInt>(LHS))
 592       std::swap(LHS, RHS);
 593
 594     if (const ConstantInt *CI = dyn_cast<ConstantInt>(RHS)) {
 595       Addr.setOffset(Addr.getOffset() + CI->getSExtValue());
 596       return computeAddress(LHS, Addr, Ty);
 597     }
 598
 599     Address Backup = Addr;
 600     if (computeAddress(LHS, Addr, Ty) && computeAddress(RHS, Addr, Ty))
 601       return true;
 602     Addr = Backup;
 603
 604     break;
 605   }
 606   case Instruction::Sub: {
 607     // Subs of constants are common and easy enough.
 608     const Value *LHS = U->getOperand(0);
 609     const Value *RHS = U->getOperand(1);
 610
 611     if (const ConstantInt *CI = dyn_cast<ConstantInt>(RHS)) {
 612       Addr.setOffset(Addr.getOffset() - CI->getSExtValue());
 613       return computeAddress(LHS, Addr, Ty);
 614     }
 615     break;
 616   }
 617   case Instruction::Shl: {
 618     if (Addr.getOffsetReg())
 619       break;
 620
 621     const auto *CI = dyn_cast<ConstantInt>(U->getOperand(1));
 622     if (!CI)
 623       break;
 624
 625     unsigned Val = CI->getZExtValue();
 626     if (Val < 1 || Val > 3)
 627       break;
 628
 629     uint64_t NumBytes = 0;
 630     if (Ty && Ty->isSized()) {
 631       uint64_t NumBits = DL.getTypeSizeInBits(Ty);
 632       NumBytes = NumBits / 8;
 633       if (!isPowerOf2_64(NumBits))
 634         NumBytes = 0;
 635     }
 636
 637     if (NumBytes != (1ULL << Val))
 638       break;
 639
 640     Addr.setShift(Val);
 641     Addr.setExtendType(AArch64_AM::LSL);
 642
 643     const Value *Src = U->getOperand(0);
 644     if (const auto *I = dyn_cast<Instruction>(Src))
 645       if (FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB)
 646         Src = I;
 647
 648     // Fold the zext or sext when it won't become a noop.
 649     if (const auto *ZE = dyn_cast<ZExtInst>(Src)) {
 650       if (!isIntExtFree(ZE) && ZE->getOperand(0)->getType()->isIntegerTy(32)) {
 651           Addr.setExtendType(AArch64_AM::UXTW);
 652           Src = ZE->getOperand(0);
 653       }
 654     } else if (const auto *SE = dyn_cast<SExtInst>(Src)) {
 655       if (!isIntExtFree(SE) && SE->getOperand(0)->getType()->isIntegerTy(32)) {
 656         Addr.setExtendType(AArch64_AM::SXTW);
 657         Src = SE->getOperand(0);
 658       }
 659     }
 660
 661     if (const auto *AI = dyn_cast<BinaryOperator>(Src))
 662       if (AI->getOpcode() == Instruction::And) {
 663         const Value *LHS = AI->getOperand(0);
 664         const Value *RHS = AI->getOperand(1);
 665
 666         if (const auto *C = dyn_cast<ConstantInt>(LHS))
 667           if (C->getValue() == 0xffffffff)
 668             std::swap(LHS, RHS);
 669
 670         if (const auto *C = dyn_cast<ConstantInt>(RHS))
 671           if (C->getValue() == 0xffffffff) {
 672             Addr.setExtendType(AArch64_AM::UXTW);
 673             unsigned Reg = getRegForValue(LHS);
 674             if (!Reg)
 675               return false;
 676             bool RegIsKill = hasTrivialKill(LHS);
 677             Reg = fastEmitInst_extractsubreg(MVT::i32, Reg, RegIsKill,
 678                                              AArch64::sub_32);
 679             Addr.setOffsetReg(Reg);
 680             return true;
 681           }
 682       }
 683
 684     unsigned Reg = getRegForValue(Src);
 685     if (!Reg)
 686       return false;
 687     Addr.setOffsetReg(Reg);
 688     return true;
 689   }
 690   case Instruction::Mul: {
 691     if (Addr.getOffsetReg())
 692       break;
 693
 694     if (!isMulPowOf2(U))
 695       break;
 696
 697     const Value *LHS = U->getOperand(0);
 698     const Value *RHS = U->getOperand(1);
 699
 700     // Canonicalize power-of-2 value to the RHS.
 701     if (const auto *C = dyn_cast<ConstantInt>(LHS))
 702       if (C->getValue().isPowerOf2())
 703         std::swap(LHS, RHS);
 704
 705     assert(isa<ConstantInt>(RHS) && "Expected an ConstantInt.");
 706     const auto *C = cast<ConstantInt>(RHS);
 707     unsigned Val = C->getValue().logBase2();
 708     if (Val < 1 || Val > 3)
 709       break;
 710
 711     uint64_t NumBytes = 0;
 712     if (Ty && Ty->isSized()) {
 713       uint64_t NumBits = DL.getTypeSizeInBits(Ty);
 714       NumBytes = NumBits / 8;
 715       if (!isPowerOf2_64(NumBits))
 716         NumBytes = 0;
 717     }
 718
 719     if (NumBytes != (1ULL << Val))
 720       break;
 721
 722     Addr.setShift(Val);
 723     Addr.setExtendType(AArch64_AM::LSL);
 724
 725     const Value *Src = LHS;
 726     if (const auto *I = dyn_cast<Instruction>(Src))
 727       if (FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB)
 728         Src = I;
 729
 730
 731     // Fold the zext or sext when it won't become a noop.
 732     if (const auto *ZE = dyn_cast<ZExtInst>(Src)) {
 733       if (!isIntExtFree(ZE) && ZE->getOperand(0)->getType()->isIntegerTy(32)) {
 734         Addr.setExtendType(AArch64_AM::UXTW);
 735         Src = ZE->getOperand(0);
 736       }
 737     } else if (const auto *SE = dyn_cast<SExtInst>(Src)) {
 738       if (!isIntExtFree(SE) && SE->getOperand(0)->getType()->isIntegerTy(32)) {
 739         Addr.setExtendType(AArch64_AM::SXTW);
 740         Src = SE->getOperand(0);
 741       }
 742     }
 743
 744     unsigned Reg = getRegForValue(Src);
 745     if (!Reg)
 746       return false;
 747     Addr.setOffsetReg(Reg);
 748     return true;
 749   }
 750   case Instruction::And: {
 751     if (Addr.getOffsetReg())
 752       break;
 753
 754     if (DL.getTypeSizeInBits(Ty) != 8)
 755       break;
 756
 757     const Value *LHS = U->getOperand(0);
 758     const Value *RHS = U->getOperand(1);
 759
 760     if (const auto *C = dyn_cast<ConstantInt>(LHS))
 761       if (C->getValue() == 0xffffffff)
 762         std::swap(LHS, RHS);
 763
 764     if (const auto *C = dyn_cast<ConstantInt>(RHS))
 765       if (C->getValue() == 0xffffffff) {
 766         Addr.setShift(0);
 767         Addr.setExtendType(AArch64_AM::LSL);
 768         Addr.setExtendType(AArch64_AM::UXTW);
 769
 770         unsigned Reg = getRegForValue(LHS);
 771         if (!Reg)
 772           return false;
 773         bool RegIsKill = hasTrivialKill(LHS);
 774         Reg = fastEmitInst_extractsubreg(MVT::i32, Reg, RegIsKill,
 775                                          AArch64::sub_32);
 776         Addr.setOffsetReg(Reg);
 777         return true;
 778       }
 779     break;
 780   }
 781   case Instruction::SExt:
 782   case Instruction::ZExt: {
 783     if (!Addr.getReg() || Addr.getOffsetReg())
 784       break;
 785
 786     const Value *Src = nullptr;
 787     // Fold the zext or sext when it won't become a noop.
 788     if (const auto *ZE = dyn_cast<ZExtInst>(U)) {
 789       if (!isIntExtFree(ZE) && ZE->getOperand(0)->getType()->isIntegerTy(32)) {
 790         Addr.setExtendType(AArch64_AM::UXTW);
 791         Src = ZE->getOperand(0);
 792       }
 793     } else if (const auto *SE = dyn_cast<SExtInst>(U)) {
 794       if (!isIntExtFree(SE) && SE->getOperand(0)->getType()->isIntegerTy(32)) {
 795         Addr.setExtendType(AArch64_AM::SXTW);
 796         Src = SE->getOperand(0);
 797       }
 798     }
 799
 800     if (!Src)
 801       break;
 802
 803     Addr.setShift(0);
 804     unsigned Reg = getRegForValue(Src);
 805     if (!Reg)
 806       return false;
 807     Addr.setOffsetReg(Reg);
 808     return true;
 809   }
 810   } // end switch
 811
 812   if (Addr.getReg()) {
 813     if (!Addr.getOffsetReg()) {
 814       unsigned Reg = getRegForValue(Obj);
 815       if (!Reg)
 816         return false;
 817       Addr.setOffsetReg(Reg);
 818       return true;
 819     }
 820     return false;
 821   }
 822
 823   unsigned Reg = getRegForValue(Obj);
 824   if (!Reg)
 825     return false;
 826   Addr.setReg(Reg);
 827   return true;
 828 }
 829
 830 bool AArch64FastISel::computeCallAddress(const Value *V, Address &Addr) {
 831   const User *U = nullptr;
 832   unsigned Opcode = Instruction::UserOp1;
 833   bool InMBB = true;
 834
 835   if (const auto *I = dyn_cast<Instruction>(V)) {
 836     Opcode = I->getOpcode();
 837     U = I;
 838     InMBB = I->getParent() == FuncInfo.MBB->getBasicBlock();
 839   } else if (const auto *C = dyn_cast<ConstantExpr>(V)) {
 840     Opcode = C->getOpcode();
 841     U = C;
 842   }
 843
 844   switch (Opcode) {
 845   default: break;
 846   case Instruction::BitCast:
 847     // Look past bitcasts if its operand is in the same BB.
 848     if (InMBB)
 849       return computeCallAddress(U->getOperand(0), Addr);
 850     break;
 851   case Instruction::IntToPtr:
 852     // Look past no-op inttoptrs if its operand is in the same BB.
 853     if (InMBB &&
 854         TLI.getValueType(U->getOperand(0)->getType()) == TLI.getPointerTy())
 855       return computeCallAddress(U->getOperand(0), Addr);
 856     break;
 857   case Instruction::PtrToInt:
 858     // Look past no-op ptrtoints if its operand is in the same BB.
 859     if (InMBB &&
 860         TLI.getValueType(U->getType()) == TLI.getPointerTy())
 861       return computeCallAddress(U->getOperand(0), Addr);
 862     break;
 863   }
 864
 865   if (const GlobalValue *GV = dyn_cast<GlobalValue>(V)) {
 866     Addr.setGlobalValue(GV);
 867     return true;
 868   }
 869
 870   // If all else fails, try to materialize the value in a register.
 871   if (!Addr.getGlobalValue()) {
 872     Addr.setReg(getRegForValue(V));
 873     return Addr.getReg() != 0;
 874   }
 875
 876   return false;
 877 }
 878
 879
 880 bool AArch64FastISel::isTypeLegal(Type *Ty, MVT &VT) {
 881   EVT evt = TLI.getValueType(Ty, true);
 882
 883   // Only handle simple types.
 884   if (evt == MVT::Other || !evt.isSimple())
 885     return false;
 886   VT = evt.getSimpleVT();
 887
 888   // This is a legal type, but it's not something we handle in fast-isel.
 889   if (VT == MVT::f128)
 890     return false;
 891
 892   // Handle all other legal types, i.e. a register that will directly hold this
 893   // value.
 894   return TLI.isTypeLegal(VT);
 895 }
 896
 897 /// \brief Determine if the value type is supported by FastISel.
 898 ///
 899 /// FastISel for AArch64 can handle more value types than are legal. This adds
 900 /// simple value type such as i1, i8, and i16.
 901 bool AArch64FastISel::isTypeSupported(Type *Ty, MVT &VT, bool IsVectorAllowed) {
 902   if (Ty->isVectorTy() && !IsVectorAllowed)
 903     return false;
 904
 905   if (isTypeLegal(Ty, VT))
 906     return true;
 907
 908   // If this is a type than can be sign or zero-extended to a basic operation
 909   // go ahead and accept it now.
 910   if (VT == MVT::i1 || VT == MVT::i8 || VT == MVT::i16)
 911     return true;
 912
 913   return false;
 914 }
 915
 916 bool AArch64FastISel::isValueAvailable(const Value *V) const {
 917   if (!isa<Instruction>(V))
 918     return true;
 919
 920   const auto *I = cast<Instruction>(V);
 921   if (FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB)
 922     return true;
 923
 924   return false;
 925 }
 926
 927 bool AArch64FastISel::simplifyAddress(Address &Addr, MVT VT) {
 928   unsigned ScaleFactor = getImplicitScaleFactor(VT);
 929   if (!ScaleFactor)
 930     return false;
 931
 932   bool ImmediateOffsetNeedsLowering = false;
 933   bool RegisterOffsetNeedsLowering = false;
 934   int64_t Offset = Addr.getOffset();
 935   if (((Offset < 0) || (Offset & (ScaleFactor - 1))) && !isInt<9>(Offset))
 936     ImmediateOffsetNeedsLowering = true;
 937   else if (Offset > 0 && !(Offset & (ScaleFactor - 1)) &&
 938            !isUInt<12>(Offset / ScaleFactor))
 939     ImmediateOffsetNeedsLowering = true;
 940
 941   // Cannot encode an offset register and an immediate offset in the same
 942   // instruction. Fold the immediate offset into the load/store instruction and
 943   // emit an additonal add to take care of the offset register.
 944   if (!ImmediateOffsetNeedsLowering && Addr.getOffset() && Addr.isRegBase() &&
 945       Addr.getOffsetReg())
 946     RegisterOffsetNeedsLowering = true;
 947
 948   // Cannot encode zero register as base.
 949   if (Addr.isRegBase() && Addr.getOffsetReg() && !Addr.getReg())
 950     RegisterOffsetNeedsLowering = true;
 951
 952   // If this is a stack pointer and the offset needs to be simplified then put
 953   // the alloca address into a register, set the base type back to register and
 954   // continue. This should almost never happen.
 955   if (ImmediateOffsetNeedsLowering && Addr.isFIBase()) {
 956     unsigned ResultReg = createResultReg(&AArch64::GPR64spRegClass);
 957     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADDXri),
 958             ResultReg)
 959       .addFrameIndex(Addr.getFI())
 960       .addImm(0)
 961       .addImm(0);
 962     Addr.setKind(Address::RegBase);
 963     Addr.setReg(ResultReg);
 964   }
 965
 966   if (RegisterOffsetNeedsLowering) {
 967     unsigned ResultReg = 0;
 968     if (Addr.getReg()) {
 969       if (Addr.getExtendType() == AArch64_AM::SXTW ||
 970           Addr.getExtendType() == AArch64_AM::UXTW   )
 971         ResultReg = emitAddSub_rx(/*UseAdd=*/true, MVT::i64, Addr.getReg(),
 972                                   /*TODO:IsKill=*/false, Addr.getOffsetReg(),
 973                                   /*TODO:IsKill=*/false, Addr.getExtendType(),
 974                                   Addr.getShift());
 975       else
 976         ResultReg = emitAddSub_rs(/*UseAdd=*/true, MVT::i64, Addr.getReg(),
 977                                   /*TODO:IsKill=*/false, Addr.getOffsetReg(),
 978                                   /*TODO:IsKill=*/false, AArch64_AM::LSL,
 979                                   Addr.getShift());
 980     } else {
 981       if (Addr.getExtendType() == AArch64_AM::UXTW)
 982         ResultReg = emitLSL_ri(MVT::i64, MVT::i32, Addr.getOffsetReg(),
 983                                /*Op0IsKill=*/false, Addr.getShift(),
 984                                /*IsZExt=*/true);
 985       else if (Addr.getExtendType() == AArch64_AM::SXTW)
 986         ResultReg = emitLSL_ri(MVT::i64, MVT::i32, Addr.getOffsetReg(),
 987                                /*Op0IsKill=*/false, Addr.getShift(),
 988                                /*IsZExt=*/false);
 989       else
 990         ResultReg = emitLSL_ri(MVT::i64, MVT::i64, Addr.getOffsetReg(),
 991                                /*Op0IsKill=*/false, Addr.getShift());
 992     }
 993     if (!ResultReg)
 994       return false;
 995
 996     Addr.setReg(ResultReg);
 997     Addr.setOffsetReg(0);
 998     Addr.setShift(0);
 999     Addr.setExtendType(AArch64_AM::InvalidShiftExtend);
1000   }
1001
1002   // Since the offset is too large for the load/store instruction get the
1003   // reg+offset into a register.
1004   if (ImmediateOffsetNeedsLowering) {
1005     unsigned ResultReg;
1006     if (Addr.getReg())
1007       // Try to fold the immediate into the add instruction.
1008       ResultReg = emitAdd_ri_(MVT::i64, Addr.getReg(), /*IsKill=*/false, Offset);
1009     else
1010       ResultReg = fastEmit_i(MVT::i64, MVT::i64, ISD::Constant, Offset);
1011
1012     if (!ResultReg)
1013       return false;
1014     Addr.setReg(ResultReg);
1015     Addr.setOffset(0);
1016   }
1017   return true;
1018 }
1019
1020 void AArch64FastISel::addLoadStoreOperands(Address &Addr,
1021                                            const MachineInstrBuilder &MIB,
1022                                            unsigned Flags,
1023                                            unsigned ScaleFactor,
1024                                            MachineMemOperand *MMO) {
1025   int64_t Offset = Addr.getOffset() / ScaleFactor;
1026   // Frame base works a bit differently. Handle it separately.
1027   if (Addr.isFIBase()) {
1028     int FI = Addr.getFI();
1029     // FIXME: We shouldn't be using getObjectSize/getObjectAlignment.  The size
1030     // and alignment should be based on the VT.
1031     MMO = FuncInfo.MF->getMachineMemOperand(
1032       MachinePointerInfo::getFixedStack(FI, Offset), Flags,
1033       MFI.getObjectSize(FI), MFI.getObjectAlignment(FI));
1034     // Now add the rest of the operands.
1035     MIB.addFrameIndex(FI).addImm(Offset);
1036   } else {
1037     assert(Addr.isRegBase() && "Unexpected address kind.");
1038     const MCInstrDesc &II = MIB->getDesc();
1039     unsigned Idx = (Flags & MachineMemOperand::MOStore) ? 1 : 0;
1040     Addr.setReg(
1041       constrainOperandRegClass(II, Addr.getReg(), II.getNumDefs()+Idx));
1042     Addr.setOffsetReg(
1043       constrainOperandRegClass(II, Addr.getOffsetReg(), II.getNumDefs()+Idx+1));
1044     if (Addr.getOffsetReg()) {
1045       assert(Addr.getOffset() == 0 && "Unexpected offset");
1046       bool IsSigned = Addr.getExtendType() == AArch64_AM::SXTW ||
1047                       Addr.getExtendType() == AArch64_AM::SXTX;
1048       MIB.addReg(Addr.getReg());
1049       MIB.addReg(Addr.getOffsetReg());
1050       MIB.addImm(IsSigned);
1051       MIB.addImm(Addr.getShift() != 0);
1052     } else {
1053       MIB.addReg(Addr.getReg());
1054       MIB.addImm(Offset);
1055     }
1056   }
1057
1058   if (MMO)
1059     MIB.addMemOperand(MMO);
1060 }
1061
1062 unsigned AArch64FastISel::emitAddSub(bool UseAdd, MVT RetVT, const Value *LHS,
1063                                      const Value *RHS, bool SetFlags,
1064                                      bool WantResult,  bool IsZExt) {
1065   AArch64_AM::ShiftExtendType ExtendType = AArch64_AM::InvalidShiftExtend;
1066   bool NeedExtend = false;
1067   switch (RetVT.SimpleTy) {
1068   default:
1069     return 0;
1070   case MVT::i1:
1071     NeedExtend = true;
1072     break;
1073   case MVT::i8:
1074     NeedExtend = true;
1075     ExtendType = IsZExt ? AArch64_AM::UXTB : AArch64_AM::SXTB;
1076     break;
1077   case MVT::i16:
1078     NeedExtend = true;
1079     ExtendType = IsZExt ? AArch64_AM::UXTH : AArch64_AM::SXTH;
1080     break;
1081   case MVT::i32:  // fall-through
1082   case MVT::i64:
1083     break;
1084   }
1085   MVT SrcVT = RetVT;
1086   RetVT.SimpleTy = std::max(RetVT.SimpleTy, MVT::i32);
1087
1088   // Canonicalize immediates to the RHS first.
1089   if (UseAdd && isa<ConstantInt>(LHS) && !isa<ConstantInt>(RHS))
1090     std::swap(LHS, RHS);
1091
1092   // Canonicalize mul by power of 2 to the RHS.
1093   if (UseAdd && LHS->hasOneUse() && isValueAvailable(LHS))
1094     if (isMulPowOf2(LHS))
1095       std::swap(LHS, RHS);
1096
1097   // Canonicalize shift immediate to the RHS.
1098   if (UseAdd && LHS->hasOneUse() && isValueAvailable(LHS))
1099     if (const auto *SI = dyn_cast<BinaryOperator>(LHS))
1100       if (isa<ConstantInt>(SI->getOperand(1)))
1101         if (SI->getOpcode() == Instruction::Shl  ||
1102             SI->getOpcode() == Instruction::LShr ||
1103             SI->getOpcode() == Instruction::AShr   )
1104           std::swap(LHS, RHS);
1105
1106   unsigned LHSReg = getRegForValue(LHS);
1107   if (!LHSReg)
1108     return 0;
1109   bool LHSIsKill = hasTrivialKill(LHS);
1110
1111   if (NeedExtend)
1112     LHSReg = emitIntExt(SrcVT, LHSReg, RetVT, IsZExt);
1113
1114   unsigned ResultReg = 0;
1115   if (const auto *C = dyn_cast<ConstantInt>(RHS)) {
1116     uint64_t Imm = IsZExt ? C->getZExtValue() : C->getSExtValue();
1117     if (C->isNegative())
1118       ResultReg = emitAddSub_ri(!UseAdd, RetVT, LHSReg, LHSIsKill, -Imm,
1119                                 SetFlags, WantResult);
1120     else
1121       ResultReg = emitAddSub_ri(UseAdd, RetVT, LHSReg, LHSIsKill, Imm, SetFlags,
1122                                 WantResult);
1123   }
1124   if (ResultReg)
1125     return ResultReg;
1126
1127   // Only extend the RHS within the instruction if there is a valid extend type.
1128   if (ExtendType != AArch64_AM::InvalidShiftExtend && RHS->hasOneUse() &&
1129       isValueAvailable(RHS)) {
1130     if (const auto *SI = dyn_cast<BinaryOperator>(RHS))
1131       if (const auto *C = dyn_cast<ConstantInt>(SI->getOperand(1)))
1132         if ((SI->getOpcode() == Instruction::Shl) && (C->getZExtValue() < 4)) {
1133           unsigned RHSReg = getRegForValue(SI->getOperand(0));
1134           if (!RHSReg)
1135             return 0;
1136           bool RHSIsKill = hasTrivialKill(SI->getOperand(0));
1137           return emitAddSub_rx(UseAdd, RetVT, LHSReg, LHSIsKill, RHSReg,
1138                                RHSIsKill, ExtendType, C->getZExtValue(),
1139                                SetFlags, WantResult);
1140         }
1141     unsigned RHSReg = getRegForValue(RHS);
1142     if (!RHSReg)
1143       return 0;
1144     bool RHSIsKill = hasTrivialKill(RHS);
1145     return emitAddSub_rx(UseAdd, RetVT, LHSReg, LHSIsKill, RHSReg, RHSIsKill,
1146                          ExtendType, 0, SetFlags, WantResult);
1147   }
1148
1149   // Check if the mul can be folded into the instruction.
1150   if (RHS->hasOneUse() && isValueAvailable(RHS))
1151     if (isMulPowOf2(RHS)) {
1152       const Value *MulLHS = cast<MulOperator>(RHS)->getOperand(0);
1153       const Value *MulRHS = cast<MulOperator>(RHS)->getOperand(1);
1154
1155       if (const auto *C = dyn_cast<ConstantInt>(MulLHS))
1156         if (C->getValue().isPowerOf2())
1157           std::swap(MulLHS, MulRHS);
1158
1159       assert(isa<ConstantInt>(MulRHS) && "Expected a ConstantInt.");
1160       uint64_t ShiftVal = cast<ConstantInt>(MulRHS)->getValue().logBase2();
1161       unsigned RHSReg = getRegForValue(MulLHS);
1162       if (!RHSReg)
1163         return 0;
1164       bool RHSIsKill = hasTrivialKill(MulLHS);
1165       return emitAddSub_rs(UseAdd, RetVT, LHSReg, LHSIsKill, RHSReg, RHSIsKill,
1166                            AArch64_AM::LSL, ShiftVal, SetFlags, WantResult);
1167     }
1168
1169   // Check if the shift can be folded into the instruction.
1170   if (RHS->hasOneUse() && isValueAvailable(RHS))
1171     if (const auto *SI = dyn_cast<BinaryOperator>(RHS)) {
1172       if (const auto *C = dyn_cast<ConstantInt>(SI->getOperand(1))) {
1173         AArch64_AM::ShiftExtendType ShiftType = AArch64_AM::InvalidShiftExtend;
1174         switch (SI->getOpcode()) {
1175         default: break;
1176         case Instruction::Shl:  ShiftType = AArch64_AM::LSL; break;
1177         case Instruction::LShr: ShiftType = AArch64_AM::LSR; break;
1178         case Instruction::AShr: ShiftType = AArch64_AM::ASR; break;
1179         }
1180         uint64_t ShiftVal = C->getZExtValue();
1181         if (ShiftType != AArch64_AM::InvalidShiftExtend) {
1182           unsigned RHSReg = getRegForValue(SI->getOperand(0));
1183           if (!RHSReg)
1184             return 0;
1185           bool RHSIsKill = hasTrivialKill(SI->getOperand(0));
1186           return emitAddSub_rs(UseAdd, RetVT, LHSReg, LHSIsKill, RHSReg,
1187                                RHSIsKill, ShiftType, ShiftVal, SetFlags,
1188                                WantResult);
1189         }
1190       }
1191     }
1192
1193   unsigned RHSReg = getRegForValue(RHS);
1194   if (!RHSReg)
1195     return 0;
1196   bool RHSIsKill = hasTrivialKill(RHS);
1197
1198   if (NeedExtend)
1199     RHSReg = emitIntExt(SrcVT, RHSReg, RetVT, IsZExt);
1200
1201   return emitAddSub_rr(UseAdd, RetVT, LHSReg, LHSIsKill, RHSReg, RHSIsKill,
1202                        SetFlags, WantResult);
1203 }
1204
1205 unsigned AArch64FastISel::emitAddSub_rr(bool UseAdd, MVT RetVT, unsigned LHSReg,
1206                                         bool LHSIsKill, unsigned RHSReg,
1207                                         bool RHSIsKill, bool SetFlags,
1208                                         bool WantResult) {
1209   assert(LHSReg && RHSReg && "Invalid register number.");
1210
1211   if (RetVT != MVT::i32 && RetVT != MVT::i64)
1212     return 0;
1213
1214   static const unsigned OpcTable[2][2][2] = {
1215     { { AArch64::SUBWrr,  AArch64::SUBXrr  },
1216       { AArch64::ADDWrr,  AArch64::ADDXrr  }  },
1217     { { AArch64::SUBSWrr, AArch64::SUBSXrr },
1218       { AArch64::ADDSWrr, AArch64::ADDSXrr }  }
1219   };
1220   bool Is64Bit = RetVT == MVT::i64;
1221   unsigned Opc = OpcTable[SetFlags][UseAdd][Is64Bit];
1222   const TargetRegisterClass *RC =
1223       Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
1224   unsigned ResultReg;
1225   if (WantResult)
1226     ResultReg = createResultReg(RC);
1227   else
1228     ResultReg = Is64Bit ? AArch64::XZR : AArch64::WZR;
1229
1230   const MCInstrDesc &II = TII.get(Opc);
1231   LHSReg = constrainOperandRegClass(II, LHSReg, II.getNumDefs());
1232   RHSReg = constrainOperandRegClass(II, RHSReg, II.getNumDefs() + 1);
1233   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg)
1234       .addReg(LHSReg, getKillRegState(LHSIsKill))
1235       .addReg(RHSReg, getKillRegState(RHSIsKill));
1236   return ResultReg;
1237 }
1238
1239 unsigned AArch64FastISel::emitAddSub_ri(bool UseAdd, MVT RetVT, unsigned LHSReg,
1240                                         bool LHSIsKill, uint64_t Imm,
1241                                         bool SetFlags, bool WantResult) {
1242   assert(LHSReg && "Invalid register number.");
1243
1244   if (RetVT != MVT::i32 && RetVT != MVT::i64)
1245     return 0;
1246
1247   unsigned ShiftImm;
1248   if (isUInt<12>(Imm))
1249     ShiftImm = 0;
1250   else if ((Imm & 0xfff000) == Imm) {
1251     ShiftImm = 12;
1252     Imm >>= 12;
1253   } else
1254     return 0;
1255
1256   static const unsigned OpcTable[2][2][2] = {
1257     { { AArch64::SUBWri,  AArch64::SUBXri  },
1258       { AArch64::ADDWri,  AArch64::ADDXri  }  },
1259     { { AArch64::SUBSWri, AArch64::SUBSXri },
1260       { AArch64::ADDSWri, AArch64::ADDSXri }  }
1261   };
1262   bool Is64Bit = RetVT == MVT::i64;
1263   unsigned Opc = OpcTable[SetFlags][UseAdd][Is64Bit];
1264   const TargetRegisterClass *RC;
1265   if (SetFlags)
1266     RC = Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
1267   else
1268     RC = Is64Bit ? &AArch64::GPR64spRegClass : &AArch64::GPR32spRegClass;
1269   unsigned ResultReg;
1270   if (WantResult)
1271     ResultReg = createResultReg(RC);
1272   else
1273     ResultReg = Is64Bit ? AArch64::XZR : AArch64::WZR;
1274
1275   const MCInstrDesc &II = TII.get(Opc);
1276   LHSReg = constrainOperandRegClass(II, LHSReg, II.getNumDefs());
1277   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg)
1278       .addReg(LHSReg, getKillRegState(LHSIsKill))
1279       .addImm(Imm)
1280       .addImm(getShifterImm(AArch64_AM::LSL, ShiftImm));
1281   return ResultReg;
1282 }
1283
1284 unsigned AArch64FastISel::emitAddSub_rs(bool UseAdd, MVT RetVT, unsigned LHSReg,
1285                                         bool LHSIsKill, unsigned RHSReg,
1286                                         bool RHSIsKill,
1287                                         AArch64_AM::ShiftExtendType ShiftType,
1288                                         uint64_t ShiftImm, bool SetFlags,
1289                                         bool WantResult) {
1290   assert(LHSReg && RHSReg && "Invalid register number.");
1291
1292   if (RetVT != MVT::i32 && RetVT != MVT::i64)
1293     return 0;
1294
1295   static const unsigned OpcTable[2][2][2] = {
1296     { { AArch64::SUBWrs,  AArch64::SUBXrs  },
1297       { AArch64::ADDWrs,  AArch64::ADDXrs  }  },
1298     { { AArch64::SUBSWrs, AArch64::SUBSXrs },
1299       { AArch64::ADDSWrs, AArch64::ADDSXrs }  }
1300   };
1301   bool Is64Bit = RetVT == MVT::i64;
1302   unsigned Opc = OpcTable[SetFlags][UseAdd][Is64Bit];
1303   const TargetRegisterClass *RC =
1304       Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
1305   unsigned ResultReg;
1306   if (WantResult)
1307     ResultReg = createResultReg(RC);
1308   else
1309     ResultReg = Is64Bit ? AArch64::XZR : AArch64::WZR;
1310
1311   const MCInstrDesc &II = TII.get(Opc);
1312   LHSReg = constrainOperandRegClass(II, LHSReg, II.getNumDefs());
1313   RHSReg = constrainOperandRegClass(II, RHSReg, II.getNumDefs() + 1);
1314   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg)
1315       .addReg(LHSReg, getKillRegState(LHSIsKill))
1316       .addReg(RHSReg, getKillRegState(RHSIsKill))
1317       .addImm(getShifterImm(ShiftType, ShiftImm));
1318   return ResultReg;
1319 }
1320
1321 unsigned AArch64FastISel::emitAddSub_rx(bool UseAdd, MVT RetVT, unsigned LHSReg,
1322                                         bool LHSIsKill, unsigned RHSReg,
1323                                         bool RHSIsKill,
1324                                         AArch64_AM::ShiftExtendType ExtType,
1325                                         uint64_t ShiftImm, bool SetFlags,
1326                                         bool WantResult) {
1327   assert(LHSReg && RHSReg && "Invalid register number.");
1328
1329   if (RetVT != MVT::i32 && RetVT != MVT::i64)
1330     return 0;
1331
1332   static const unsigned OpcTable[2][2][2] = {
1333     { { AArch64::SUBWrx,  AArch64::SUBXrx  },
1334       { AArch64::ADDWrx,  AArch64::ADDXrx  }  },
1335     { { AArch64::SUBSWrx, AArch64::SUBSXrx },
1336       { AArch64::ADDSWrx, AArch64::ADDSXrx }  }
1337   };
1338   bool Is64Bit = RetVT == MVT::i64;
1339   unsigned Opc = OpcTable[SetFlags][UseAdd][Is64Bit];
1340   const TargetRegisterClass *RC = nullptr;
1341   if (SetFlags)
1342     RC = Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
1343   else
1344     RC = Is64Bit ? &AArch64::GPR64spRegClass : &AArch64::GPR32spRegClass;
1345   unsigned ResultReg;
1346   if (WantResult)
1347     ResultReg = createResultReg(RC);
1348   else
1349     ResultReg = Is64Bit ? AArch64::XZR : AArch64::WZR;
1350
1351   const MCInstrDesc &II = TII.get(Opc);
1352   LHSReg = constrainOperandRegClass(II, LHSReg, II.getNumDefs());
1353   RHSReg = constrainOperandRegClass(II, RHSReg, II.getNumDefs() + 1);
1354   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg)
1355       .addReg(LHSReg, getKillRegState(LHSIsKill))
1356       .addReg(RHSReg, getKillRegState(RHSIsKill))
1357       .addImm(getArithExtendImm(ExtType, ShiftImm));
1358   return ResultReg;
1359 }
1360
1361 bool AArch64FastISel::emitCmp(const Value *LHS, const Value *RHS, bool IsZExt) {
1362   Type *Ty = LHS->getType();
1363   EVT EVT = TLI.getValueType(Ty, true);
1364   if (!EVT.isSimple())
1365     return false;
1366   MVT VT = EVT.getSimpleVT();
1367
1368   switch (VT.SimpleTy) {
1369   default:
1370     return false;
1371   case MVT::i1:
1372   case MVT::i8:
1373   case MVT::i16:
1374   case MVT::i32:
1375   case MVT::i64:
1376     return emitICmp(VT, LHS, RHS, IsZExt);
1377   case MVT::f32:
1378   case MVT::f64:
1379     return emitFCmp(VT, LHS, RHS);
1380   }
1381 }
1382
1383 bool AArch64FastISel::emitICmp(MVT RetVT, const Value *LHS, const Value *RHS,
1384                                bool IsZExt) {
1385   return emitSub(RetVT, LHS, RHS, /*SetFlags=*/true, /*WantResult=*/false,
1386                  IsZExt) != 0;
1387 }
1388
1389 bool AArch64FastISel::emitICmp_ri(MVT RetVT, unsigned LHSReg, bool LHSIsKill,
1390                                   uint64_t Imm) {
1391   return emitAddSub_ri(/*UseAdd=*/false, RetVT, LHSReg, LHSIsKill, Imm,
1392                        /*SetFlags=*/true, /*WantResult=*/false) != 0;
1393 }
1394
1395 bool AArch64FastISel::emitFCmp(MVT RetVT, const Value *LHS, const Value *RHS) {
1396   if (RetVT != MVT::f32 && RetVT != MVT::f64)
1397     return false;
1398
1399   // Check to see if the 2nd operand is a constant that we can encode directly
1400   // in the compare.
1401   bool UseImm = false;
1402   if (const auto *CFP = dyn_cast<ConstantFP>(RHS))
1403     if (CFP->isZero() && !CFP->isNegative())
1404       UseImm = true;
1405
1406   unsigned LHSReg = getRegForValue(LHS);
1407   if (!LHSReg)
1408     return false;
1409   bool LHSIsKill = hasTrivialKill(LHS);
1410
1411   if (UseImm) {
1412     unsigned Opc = (RetVT == MVT::f64) ? AArch64::FCMPDri : AArch64::FCMPSri;
1413     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc))
1414         .addReg(LHSReg, getKillRegState(LHSIsKill));
1415     return true;
1416   }
1417
1418   unsigned RHSReg = getRegForValue(RHS);
1419   if (!RHSReg)
1420     return false;
1421   bool RHSIsKill = hasTrivialKill(RHS);
1422
1423   unsigned Opc = (RetVT == MVT::f64) ? AArch64::FCMPDrr : AArch64::FCMPSrr;
1424   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc))
1425       .addReg(LHSReg, getKillRegState(LHSIsKill))
1426       .addReg(RHSReg, getKillRegState(RHSIsKill));
1427   return true;
1428 }
1429
1430 unsigned AArch64FastISel::emitAdd(MVT RetVT, const Value *LHS, const Value *RHS,
1431                                   bool SetFlags, bool WantResult, bool IsZExt) {
1432   return emitAddSub(/*UseAdd=*/true, RetVT, LHS, RHS, SetFlags, WantResult,
1433                     IsZExt);
1434 }
1435
1436 /// \brief This method is a wrapper to simplify add emission.
1437 ///
1438 /// First try to emit an add with an immediate operand using emitAddSub_ri. If
1439 /// that fails, then try to materialize the immediate into a register and use
1440 /// emitAddSub_rr instead.
1441 unsigned AArch64FastISel::emitAdd_ri_(MVT VT, unsigned Op0, bool Op0IsKill,
1442                                       int64_t Imm) {
1443   unsigned ResultReg;
1444   if (Imm < 0)
1445     ResultReg = emitAddSub_ri(false, VT, Op0, Op0IsKill, -Imm);
1446   else
1447     ResultReg = emitAddSub_ri(true, VT, Op0, Op0IsKill, Imm);
1448
1449   if (ResultReg)
1450     return ResultReg;
1451
1452   unsigned CReg = fastEmit_i(VT, VT, ISD::Constant, Imm);
1453   if (!CReg)
1454     return 0;
1455
1456   ResultReg = emitAddSub_rr(true, VT, Op0, Op0IsKill, CReg, true);
1457   return ResultReg;
1458 }
1459
1460 unsigned AArch64FastISel::emitSub(MVT RetVT, const Value *LHS, const Value *RHS,
1461                                   bool SetFlags, bool WantResult, bool IsZExt) {
1462   return emitAddSub(/*UseAdd=*/false, RetVT, LHS, RHS, SetFlags, WantResult,
1463                     IsZExt);
1464 }
1465
1466 unsigned AArch64FastISel::emitSubs_rr(MVT RetVT, unsigned LHSReg,
1467                                       bool LHSIsKill, unsigned RHSReg,
1468                                       bool RHSIsKill, bool WantResult) {
1469   return emitAddSub_rr(/*UseAdd=*/false, RetVT, LHSReg, LHSIsKill, RHSReg,
1470                        RHSIsKill, /*SetFlags=*/true, WantResult);
1471 }
1472
1473 unsigned AArch64FastISel::emitSubs_rs(MVT RetVT, unsigned LHSReg,
1474                                       bool LHSIsKill, unsigned RHSReg,
1475                                       bool RHSIsKill,
1476                                       AArch64_AM::ShiftExtendType ShiftType,
1477                                       uint64_t ShiftImm, bool WantResult) {
1478   return emitAddSub_rs(/*UseAdd=*/false, RetVT, LHSReg, LHSIsKill, RHSReg,
1479                        RHSIsKill, ShiftType, ShiftImm, /*SetFlags=*/true,
1480                        WantResult);
1481 }
1482
1483 unsigned AArch64FastISel::emitLogicalOp(unsigned ISDOpc, MVT RetVT,
1484                                         const Value *LHS, const Value *RHS) {
1485   // Canonicalize immediates to the RHS first.
1486   if (isa<ConstantInt>(LHS) && !isa<ConstantInt>(RHS))
1487     std::swap(LHS, RHS);
1488
1489   // Canonicalize mul by power-of-2 to the RHS.
1490   if (LHS->hasOneUse() && isValueAvailable(LHS))
1491     if (isMulPowOf2(LHS))
1492       std::swap(LHS, RHS);
1493
1494   // Canonicalize shift immediate to the RHS.
1495   if (LHS->hasOneUse() && isValueAvailable(LHS))
1496     if (const auto *SI = dyn_cast<ShlOperator>(LHS))
1497       if (isa<ConstantInt>(SI->getOperand(1)))
1498         std::swap(LHS, RHS);
1499
1500   unsigned LHSReg = getRegForValue(LHS);
1501   if (!LHSReg)
1502     return 0;
1503   bool LHSIsKill = hasTrivialKill(LHS);
1504
1505   unsigned ResultReg = 0;
1506   if (const auto *C = dyn_cast<ConstantInt>(RHS)) {
1507     uint64_t Imm = C->getZExtValue();
1508     ResultReg = emitLogicalOp_ri(ISDOpc, RetVT, LHSReg, LHSIsKill, Imm);
1509   }
1510   if (ResultReg)
1511     return ResultReg;
1512
1513   // Check if the mul can be folded into the instruction.
1514   if (RHS->hasOneUse() && isValueAvailable(RHS))
1515     if (isMulPowOf2(RHS)) {
1516       const Value *MulLHS = cast<MulOperator>(RHS)->getOperand(0);
1517       const Value *MulRHS = cast<MulOperator>(RHS)->getOperand(1);
1518
1519       if (const auto *C = dyn_cast<ConstantInt>(MulLHS))
1520         if (C->getValue().isPowerOf2())
1521           std::swap(MulLHS, MulRHS);
1522
1523       assert(isa<ConstantInt>(MulRHS) && "Expected a ConstantInt.");
1524       uint64_t ShiftVal = cast<ConstantInt>(MulRHS)->getValue().logBase2();
1525
1526       unsigned RHSReg = getRegForValue(MulLHS);
1527       if (!RHSReg)
1528         return 0;
1529       bool RHSIsKill = hasTrivialKill(MulLHS);
1530       return emitLogicalOp_rs(ISDOpc, RetVT, LHSReg, LHSIsKill, RHSReg,
1531                               RHSIsKill, ShiftVal);
1532     }
1533
1534   // Check if the shift can be folded into the instruction.
1535   if (RHS->hasOneUse() && isValueAvailable(RHS))
1536     if (const auto *SI = dyn_cast<ShlOperator>(RHS))
1537       if (const auto *C = dyn_cast<ConstantInt>(SI->getOperand(1))) {
1538         uint64_t ShiftVal = C->getZExtValue();
1539         unsigned RHSReg = getRegForValue(SI->getOperand(0));
1540         if (!RHSReg)
1541           return 0;
1542         bool RHSIsKill = hasTrivialKill(SI->getOperand(0));
1543         return emitLogicalOp_rs(ISDOpc, RetVT, LHSReg, LHSIsKill, RHSReg,
1544                                 RHSIsKill, ShiftVal);
1545       }
1546
1547   unsigned RHSReg = getRegForValue(RHS);
1548   if (!RHSReg)
1549     return 0;
1550   bool RHSIsKill = hasTrivialKill(RHS);
1551
1552   MVT VT = std::max(MVT::i32, RetVT.SimpleTy);
1553   ResultReg = fastEmit_rr(VT, VT, ISDOpc, LHSReg, LHSIsKill, RHSReg, RHSIsKill);
1554   if (RetVT >= MVT::i8 && RetVT <= MVT::i16) {
1555     uint64_t Mask = (RetVT == MVT::i8) ? 0xff : 0xffff;
1556     ResultReg = emitAnd_ri(MVT::i32, ResultReg, /*IsKill=*/true, Mask);
1557   }
1558   return ResultReg;
1559 }
1560
1561 unsigned AArch64FastISel::emitLogicalOp_ri(unsigned ISDOpc, MVT RetVT,
1562                                            unsigned LHSReg, bool LHSIsKill,
1563                                            uint64_t Imm) {
1564   assert((ISD::AND + 1 == ISD::OR) && (ISD::AND + 2 == ISD::XOR) &&
1565          "ISD nodes are not consecutive!");
1566   static const unsigned OpcTable[3][2] = {
1567     { AArch64::ANDWri, AArch64::ANDXri },
1568     { AArch64::ORRWri, AArch64::ORRXri },
1569     { AArch64::EORWri, AArch64::EORXri }
1570   };
1571   const TargetRegisterClass *RC;
1572   unsigned Opc;
1573   unsigned RegSize;
1574   switch (RetVT.SimpleTy) {
1575   default:
1576     return 0;
1577   case MVT::i1:
1578   case MVT::i8:
1579   case MVT::i16:
1580   case MVT::i32: {
1581     unsigned Idx = ISDOpc - ISD::AND;
1582     Opc = OpcTable[Idx][0];
1583     RC = &AArch64::GPR32spRegClass;
1584     RegSize = 32;
1585     break;
1586   }
1587   case MVT::i64:
1588     Opc = OpcTable[ISDOpc - ISD::AND][1];
1589     RC = &AArch64::GPR64spRegClass;
1590     RegSize = 64;
1591     break;
1592   }
1593
1594   if (!AArch64_AM::isLogicalImmediate(Imm, RegSize))
1595     return 0;
1596
1597   unsigned ResultReg =
1598       fastEmitInst_ri(Opc, RC, LHSReg, LHSIsKill,
1599                       AArch64_AM::encodeLogicalImmediate(Imm, RegSize));
1600   if (RetVT >= MVT::i8 && RetVT <= MVT::i16 && ISDOpc != ISD::AND) {
1601     uint64_t Mask = (RetVT == MVT::i8) ? 0xff : 0xffff;
1602     ResultReg = emitAnd_ri(MVT::i32, ResultReg, /*IsKill=*/true, Mask);
1603   }
1604   return ResultReg;
1605 }
1606
1607 unsigned AArch64FastISel::emitLogicalOp_rs(unsigned ISDOpc, MVT RetVT,
1608                                            unsigned LHSReg, bool LHSIsKill,
1609                                            unsigned RHSReg, bool RHSIsKill,
1610                                            uint64_t ShiftImm) {
1611   assert((ISD::AND + 1 == ISD::OR) && (ISD::AND + 2 == ISD::XOR) &&
1612          "ISD nodes are not consecutive!");
1613   static const unsigned OpcTable[3][2] = {
1614     { AArch64::ANDWrs, AArch64::ANDXrs },
1615     { AArch64::ORRWrs, AArch64::ORRXrs },
1616     { AArch64::EORWrs, AArch64::EORXrs }
1617   };
1618   const TargetRegisterClass *RC;
1619   unsigned Opc;
1620   switch (RetVT.SimpleTy) {
1621   default:
1622     return 0;
1623   case MVT::i1:
1624   case MVT::i8:
1625   case MVT::i16:
1626   case MVT::i32:
1627     Opc = OpcTable[ISDOpc - ISD::AND][0];
1628     RC = &AArch64::GPR32RegClass;
1629     break;
1630   case MVT::i64:
1631     Opc = OpcTable[ISDOpc - ISD::AND][1];
1632     RC = &AArch64::GPR64RegClass;
1633     break;
1634   }
1635   unsigned ResultReg =
1636       fastEmitInst_rri(Opc, RC, LHSReg, LHSIsKill, RHSReg, RHSIsKill,
1637                        AArch64_AM::getShifterImm(AArch64_AM::LSL, ShiftImm));
1638   if (RetVT >= MVT::i8 && RetVT <= MVT::i16) {
1639     uint64_t Mask = (RetVT == MVT::i8) ? 0xff : 0xffff;
1640     ResultReg = emitAnd_ri(MVT::i32, ResultReg, /*IsKill=*/true, Mask);
1641   }
1642   return ResultReg;
1643 }
1644
1645 unsigned AArch64FastISel::emitAnd_ri(MVT RetVT, unsigned LHSReg, bool LHSIsKill,
1646                                      uint64_t Imm) {
1647   return emitLogicalOp_ri(ISD::AND, RetVT, LHSReg, LHSIsKill, Imm);
1648 }
1649
1650 unsigned AArch64FastISel::emitLoad(MVT VT, MVT RetVT, Address Addr,
1651                                    bool WantZExt, MachineMemOperand *MMO) {
1652   // Simplify this down to something we can handle.
1653   if (!simplifyAddress(Addr, VT))
1654     return 0;
1655
1656   unsigned ScaleFactor = getImplicitScaleFactor(VT);
1657   if (!ScaleFactor)
1658     llvm_unreachable("Unexpected value type.");
1659
1660   // Negative offsets require unscaled, 9-bit, signed immediate offsets.
1661   // Otherwise, we try using scaled, 12-bit, unsigned immediate offsets.
1662   bool UseScaled = true;
1663   if ((Addr.getOffset() < 0) || (Addr.getOffset() & (ScaleFactor - 1))) {
1664     UseScaled = false;
1665     ScaleFactor = 1;
1666   }
1667
1668   static const unsigned GPOpcTable[2][8][4] = {
1669     // Sign-extend.
1670     { { AArch64::LDURSBWi,  AArch64::LDURSHWi,  AArch64::LDURWi,
1671         AArch64::LDURXi  },
1672       { AArch64::LDURSBXi,  AArch64::LDURSHXi,  AArch64::LDURSWi,
1673         AArch64::LDURXi  },
1674       { AArch64::LDRSBWui,  AArch64::LDRSHWui,  AArch64::LDRWui,
1675         AArch64::LDRXui  },
1676       { AArch64::LDRSBXui,  AArch64::LDRSHXui,  AArch64::LDRSWui,
1677         AArch64::LDRXui  },
1678       { AArch64::LDRSBWroX, AArch64::LDRSHWroX, AArch64::LDRWroX,
1679         AArch64::LDRXroX },
1680       { AArch64::LDRSBXroX, AArch64::LDRSHXroX, AArch64::LDRSWroX,
1681         AArch64::LDRXroX },
1682       { AArch64::LDRSBWroW, AArch64::LDRSHWroW, AArch64::LDRWroW,
1683         AArch64::LDRXroW },
1684       { AArch64::LDRSBXroW, AArch64::LDRSHXroW, AArch64::LDRSWroW,
1685         AArch64::LDRXroW }
1686     },
1687     // Zero-extend.
1688     { { AArch64::LDURBBi,   AArch64::LDURHHi,   AArch64::LDURWi,
1689         AArch64::LDURXi  },
1690       { AArch64::LDURBBi,   AArch64::LDURHHi,   AArch64::LDURWi,
1691         AArch64::LDURXi  },
1692       { AArch64::LDRBBui,   AArch64::LDRHHui,   AArch64::LDRWui,
1693         AArch64::LDRXui  },
1694       { AArch64::LDRBBui,   AArch64::LDRHHui,   AArch64::LDRWui,
1695         AArch64::LDRXui  },
1696       { AArch64::LDRBBroX,  AArch64::LDRHHroX,  AArch64::LDRWroX,
1697         AArch64::LDRXroX },
1698       { AArch64::LDRBBroX,  AArch64::LDRHHroX,  AArch64::LDRWroX,
1699         AArch64::LDRXroX },
1700       { AArch64::LDRBBroW,  AArch64::LDRHHroW,  AArch64::LDRWroW,
1701         AArch64::LDRXroW },
1702       { AArch64::LDRBBroW,  AArch64::LDRHHroW,  AArch64::LDRWroW,
1703         AArch64::LDRXroW }
1704     }
1705   };
1706
1707   static const unsigned FPOpcTable[4][2] = {
1708     { AArch64::LDURSi,  AArch64::LDURDi  },
1709     { AArch64::LDRSui,  AArch64::LDRDui  },
1710     { AArch64::LDRSroX, AArch64::LDRDroX },
1711     { AArch64::LDRSroW, AArch64::LDRDroW }
1712   };
1713
1714   unsigned Opc;
1715   const TargetRegisterClass *RC;
1716   bool UseRegOffset = Addr.isRegBase() && !Addr.getOffset() && Addr.getReg() &&
1717                       Addr.getOffsetReg();
1718   unsigned Idx = UseRegOffset ? 2 : UseScaled ? 1 : 0;
1719   if (Addr.getExtendType() == AArch64_AM::UXTW ||
1720       Addr.getExtendType() == AArch64_AM::SXTW)
1721     Idx++;
1722
1723   bool IsRet64Bit = RetVT == MVT::i64;
1724   switch (VT.SimpleTy) {
1725   default:
1726     llvm_unreachable("Unexpected value type.");
1727   case MVT::i1: // Intentional fall-through.
1728   case MVT::i8:
1729     Opc = GPOpcTable[WantZExt][2 * Idx + IsRet64Bit][0];
1730     RC = (IsRet64Bit && !WantZExt) ?
1731              &AArch64::GPR64RegClass: &AArch64::GPR32RegClass;
1732     break;
1733   case MVT::i16:
1734     Opc = GPOpcTable[WantZExt][2 * Idx + IsRet64Bit][1];
1735     RC = (IsRet64Bit && !WantZExt) ?
1736              &AArch64::GPR64RegClass: &AArch64::GPR32RegClass;
1737     break;
1738   case MVT::i32:
1739     Opc = GPOpcTable[WantZExt][2 * Idx + IsRet64Bit][2];
1740     RC = (IsRet64Bit && !WantZExt) ?
1741              &AArch64::GPR64RegClass: &AArch64::GPR32RegClass;
1742     break;
1743   case MVT::i64:
1744     Opc = GPOpcTable[WantZExt][2 * Idx + IsRet64Bit][3];
1745     RC = &AArch64::GPR64RegClass;
1746     break;
1747   case MVT::f32:
1748     Opc = FPOpcTable[Idx][0];
1749     RC = &AArch64::FPR32RegClass;
1750     break;
1751   case MVT::f64:
1752     Opc = FPOpcTable[Idx][1];
1753     RC = &AArch64::FPR64RegClass;
1754     break;
1755   }
1756
1757   // Create the base instruction, then add the operands.
1758   unsigned ResultReg = createResultReg(RC);
1759   MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
1760                                     TII.get(Opc), ResultReg);
1761   addLoadStoreOperands(Addr, MIB, MachineMemOperand::MOLoad, ScaleFactor, MMO);
1762
1763   // Loading an i1 requires special handling.
1764   if (VT == MVT::i1) {
1765     unsigned ANDReg = emitAnd_ri(MVT::i32, ResultReg, /*IsKill=*/true, 1);
1766     assert(ANDReg && "Unexpected AND instruction emission failure.");
1767     ResultReg = ANDReg;
1768   }
1769
1770   // For zero-extending loads to 64bit we emit a 32bit load and then convert
1771   // the 32bit reg to a 64bit reg.
1772   if (WantZExt && RetVT == MVT::i64 && VT <= MVT::i32) {
1773     unsigned Reg64 = createResultReg(&AArch64::GPR64RegClass);
1774     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
1775             TII.get(AArch64::SUBREG_TO_REG), Reg64)
1776         .addImm(0)
1777         .addReg(ResultReg, getKillRegState(true))
1778         .addImm(AArch64::sub_32);
1779     ResultReg = Reg64;
1780   }
1781   return ResultReg;
1782 }
1783
1784 bool AArch64FastISel::selectAddSub(const Instruction *I) {
1785   MVT VT;
1786   if (!isTypeSupported(I->getType(), VT, /*IsVectorAllowed=*/true))
1787     return false;
1788
1789   if (VT.isVector())
1790     return selectOperator(I, I->getOpcode());
1791
1792   unsigned ResultReg;
1793   switch (I->getOpcode()) {
1794   default:
1795     llvm_unreachable("Unexpected instruction.");
1796   case Instruction::Add:
1797     ResultReg = emitAdd(VT, I->getOperand(0), I->getOperand(1));
1798     break;
1799   case Instruction::Sub:
1800     ResultReg = emitSub(VT, I->getOperand(0), I->getOperand(1));
1801     break;
1802   }
1803   if (!ResultReg)
1804     return false;
1805
1806   updateValueMap(I, ResultReg);
1807   return true;
1808 }
1809
1810 bool AArch64FastISel::selectLogicalOp(const Instruction *I) {
1811   MVT VT;
1812   if (!isTypeSupported(I->getType(), VT, /*IsVectorAllowed=*/true))
1813     return false;
1814
1815   if (VT.isVector())
1816     return selectOperator(I, I->getOpcode());
1817
1818   unsigned ResultReg;
1819   switch (I->getOpcode()) {
1820   default:
1821     llvm_unreachable("Unexpected instruction.");
1822   case Instruction::And:
1823     ResultReg = emitLogicalOp(ISD::AND, VT, I->getOperand(0), I->getOperand(1));
1824     break;
1825   case Instruction::Or:
1826     ResultReg = emitLogicalOp(ISD::OR, VT, I->getOperand(0), I->getOperand(1));
1827     break;
1828   case Instruction::Xor:
1829     ResultReg = emitLogicalOp(ISD::XOR, VT, I->getOperand(0), I->getOperand(1));
1830     break;
1831   }
1832   if (!ResultReg)
1833     return false;
1834
1835   updateValueMap(I, ResultReg);
1836   return true;
1837 }
1838
1839 bool AArch64FastISel::selectLoad(const Instruction *I) {
1840   MVT VT;
1841   // Verify we have a legal type before going any further.  Currently, we handle
1842   // simple types that will directly fit in a register (i32/f32/i64/f64) or
1843   // those that can be sign or zero-extended to a basic operation (i1/i8/i16).
1844   if (!isTypeSupported(I->getType(), VT, /*IsVectorAllowed=*/true) ||
1845       cast<LoadInst>(I)->isAtomic())
1846     return false;
1847
1848   // See if we can handle this address.
1849   Address Addr;
1850   if (!computeAddress(I->getOperand(0), Addr, I->getType()))
1851     return false;
1852
1853   // Fold the following sign-/zero-extend into the load instruction.
1854   bool WantZExt = true;
1855   MVT RetVT = VT;
1856   const Value *IntExtVal = nullptr;
1857   if (I->hasOneUse()) {
1858     if (const auto *ZE = dyn_cast<ZExtInst>(I->use_begin()->getUser())) {
1859       if (isTypeSupported(ZE->getType(), RetVT))
1860         IntExtVal = ZE;
1861       else
1862         RetVT = VT;
1863     } else if (const auto *SE = dyn_cast<SExtInst>(I->use_begin()->getUser())) {
1864       if (isTypeSupported(SE->getType(), RetVT))
1865         IntExtVal = SE;
1866       else
1867         RetVT = VT;
1868       WantZExt = false;
1869     }
1870   }
1871
1872   unsigned ResultReg =
1873       emitLoad(VT, RetVT, Addr, WantZExt, createMachineMemOperandFor(I));
1874   if (!ResultReg)
1875     return false;
1876
1877   // There are a few different cases we have to handle, because the load or the
1878   // sign-/zero-extend might not be selected by FastISel if we fall-back to
1879   // SelectionDAG. There is also an ordering issue when both instructions are in
1880   // different basic blocks.
1881   // 1.) The load instruction is selected by FastISel, but the integer extend
1882   //     not. This usually happens when the integer extend is in a different
1883   //     basic block and SelectionDAG took over for that basic block.
1884   // 2.) The load instruction is selected before the integer extend. This only
1885   //     happens when the integer extend is in a different basic block.
1886   // 3.) The load instruction is selected by SelectionDAG and the integer extend
1887   //     by FastISel. This happens if there are instructions between the load
1888   //     and the integer extend that couldn't be selected by FastISel.
1889   if (IntExtVal) {
1890     // The integer extend hasn't been emitted yet. FastISel or SelectionDAG
1891     // could select it. Emit a copy to subreg if necessary. FastISel will remove
1892     // it when it selects the integer extend.
1893     unsigned Reg = lookUpRegForValue(IntExtVal);
1894     if (!Reg) {
1895       if (RetVT == MVT::i64 && VT <= MVT::i32) {
1896         if (WantZExt) {
1897           // Delete the last emitted instruction from emitLoad (SUBREG_TO_REG).
1898           std::prev(FuncInfo.InsertPt)->eraseFromParent();
1899           ResultReg = std::prev(FuncInfo.InsertPt)->getOperand(0).getReg();
1900         } else
1901           ResultReg = fastEmitInst_extractsubreg(MVT::i32, ResultReg,
1902                                                  /*IsKill=*/true,
1903                                                  AArch64::sub_32);
1904       }
1905       updateValueMap(I, ResultReg);
1906       return true;
1907     }
1908
1909     // The integer extend has already been emitted - delete all the instructions
1910     // that have been emitted by the integer extend lowering code and use the
1911     // result from the load instruction directly.
1912     while (Reg) {
1913       auto *MI = MRI.getUniqueVRegDef(Reg);
1914       if (!MI)
1915         break;
1916       Reg = 0;
1917       for (auto &Opnd : MI->uses()) {
1918         if (Opnd.isReg()) {
1919           Reg = Opnd.getReg();
1920           break;
1921         }
1922       }
1923       MI->eraseFromParent();
1924     }
1925     updateValueMap(IntExtVal, ResultReg);
1926     return true;
1927   }
1928
1929   updateValueMap(I, ResultReg);
1930   return true;
1931 }
1932
1933 bool AArch64FastISel::emitStore(MVT VT, unsigned SrcReg, Address Addr,
1934                                 MachineMemOperand *MMO) {
1935   // Simplify this down to something we can handle.
1936   if (!simplifyAddress(Addr, VT))
1937     return false;
1938
1939   unsigned ScaleFactor = getImplicitScaleFactor(VT);
1940   if (!ScaleFactor)
1941     llvm_unreachable("Unexpected value type.");
1942
1943   // Negative offsets require unscaled, 9-bit, signed immediate offsets.
1944   // Otherwise, we try using scaled, 12-bit, unsigned immediate offsets.
1945   bool UseScaled = true;
1946   if ((Addr.getOffset() < 0) || (Addr.getOffset() & (ScaleFactor - 1))) {
1947     UseScaled = false;
1948     ScaleFactor = 1;
1949   }
1950
1951   static const unsigned OpcTable[4][6] = {
1952     { AArch64::STURBBi,  AArch64::STURHHi,  AArch64::STURWi,  AArch64::STURXi,
1953       AArch64::STURSi,   AArch64::STURDi },
1954     { AArch64::STRBBui,  AArch64::STRHHui,  AArch64::STRWui,  AArch64::STRXui,
1955       AArch64::STRSui,   AArch64::STRDui },
1956     { AArch64::STRBBroX, AArch64::STRHHroX, AArch64::STRWroX, AArch64::STRXroX,
1957       AArch64::STRSroX,  AArch64::STRDroX },
1958     { AArch64::STRBBroW, AArch64::STRHHroW, AArch64::STRWroW, AArch64::STRXroW,
1959       AArch64::STRSroW,  AArch64::STRDroW }
1960   };
1961
1962   unsigned Opc;
1963   bool VTIsi1 = false;
1964   bool UseRegOffset = Addr.isRegBase() && !Addr.getOffset() && Addr.getReg() &&
1965                       Addr.getOffsetReg();
1966   unsigned Idx = UseRegOffset ? 2 : UseScaled ? 1 : 0;
1967   if (Addr.getExtendType() == AArch64_AM::UXTW ||
1968       Addr.getExtendType() == AArch64_AM::SXTW)
1969     Idx++;
1970
1971   switch (VT.SimpleTy) {
1972   default: llvm_unreachable("Unexpected value type.");
1973   case MVT::i1:  VTIsi1 = true;
1974   case MVT::i8:  Opc = OpcTable[Idx][0]; break;
1975   case MVT::i16: Opc = OpcTable[Idx][1]; break;
1976   case MVT::i32: Opc = OpcTable[Idx][2]; break;
1977   case MVT::i64: Opc = OpcTable[Idx][3]; break;
1978   case MVT::f32: Opc = OpcTable[Idx][4]; break;
1979   case MVT::f64: Opc = OpcTable[Idx][5]; break;
1980   }
1981
1982   // Storing an i1 requires special handling.
1983   if (VTIsi1 && SrcReg != AArch64::WZR) {
1984     unsigned ANDReg = emitAnd_ri(MVT::i32, SrcReg, /*TODO:IsKill=*/false, 1);
1985     assert(ANDReg && "Unexpected AND instruction emission failure.");
1986     SrcReg = ANDReg;
1987   }
1988   // Create the base instruction, then add the operands.
1989   const MCInstrDesc &II = TII.get(Opc);
1990   SrcReg = constrainOperandRegClass(II, SrcReg, II.getNumDefs());
1991   MachineInstrBuilder MIB =
1992       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II).addReg(SrcReg);
1993   addLoadStoreOperands(Addr, MIB, MachineMemOperand::MOStore, ScaleFactor, MMO);
1994
1995   return true;
1996 }
1997
1998 bool AArch64FastISel::selectStore(const Instruction *I) {
1999   MVT VT;
2000   const Value *Op0 = I->getOperand(0);
2001   // Verify we have a legal type before going any further.  Currently, we handle
2002   // simple types that will directly fit in a register (i32/f32/i64/f64) or
2003   // those that can be sign or zero-extended to a basic operation (i1/i8/i16).
2004   if (!isTypeSupported(Op0->getType(), VT, /*IsVectorAllowed=*/true) ||
2005       cast<StoreInst>(I)->isAtomic())
2006     return false;
2007
2008   // Get the value to be stored into a register. Use the zero register directly
2009   // when possible to avoid an unnecessary copy and a wasted register.
2010   unsigned SrcReg = 0;
2011   if (const auto *CI = dyn_cast<ConstantInt>(Op0)) {
2012     if (CI->isZero())
2013       SrcReg = (VT == MVT::i64) ? AArch64::XZR : AArch64::WZR;
2014   } else if (const auto *CF = dyn_cast<ConstantFP>(Op0)) {
2015     if (CF->isZero() && !CF->isNegative()) {
2016       VT = MVT::getIntegerVT(VT.getSizeInBits());
2017       SrcReg = (VT == MVT::i64) ? AArch64::XZR : AArch64::WZR;
2018     }
2019   }
2020
2021   if (!SrcReg)
2022     SrcReg = getRegForValue(Op0);
2023
2024   if (!SrcReg)
2025     return false;
2026
2027   // See if we can handle this address.
2028   Address Addr;
2029   if (!computeAddress(I->getOperand(1), Addr, I->getOperand(0)->getType()))
2030     return false;
2031
2032   if (!emitStore(VT, SrcReg, Addr, createMachineMemOperandFor(I)))
2033     return false;
2034   return true;
2035 }
2036
2037 static AArch64CC::CondCode getCompareCC(CmpInst::Predicate Pred) {
2038   switch (Pred) {
2039   case CmpInst::FCMP_ONE:
2040   case CmpInst::FCMP_UEQ:
2041   default:
2042     // AL is our "false" for now. The other two need more compares.
2043     return AArch64CC::AL;
2044   case CmpInst::ICMP_EQ:
2045   case CmpInst::FCMP_OEQ:
2046     return AArch64CC::EQ;
2047   case CmpInst::ICMP_SGT:
2048   case CmpInst::FCMP_OGT:
2049     return AArch64CC::GT;
2050   case CmpInst::ICMP_SGE:
2051   case CmpInst::FCMP_OGE:
2052     return AArch64CC::GE;
2053   case CmpInst::ICMP_UGT:
2054   case CmpInst::FCMP_UGT:
2055     return AArch64CC::HI;
2056   case CmpInst::FCMP_OLT:
2057     return AArch64CC::MI;
2058   case CmpInst::ICMP_ULE:
2059   case CmpInst::FCMP_OLE:
2060     return AArch64CC::LS;
2061   case CmpInst::FCMP_ORD:
2062     return AArch64CC::VC;
2063   case CmpInst::FCMP_UNO:
2064     return AArch64CC::VS;
2065   case CmpInst::FCMP_UGE:
2066     return AArch64CC::PL;
2067   case CmpInst::ICMP_SLT:
2068   case CmpInst::FCMP_ULT:
2069     return AArch64CC::LT;
2070   case CmpInst::ICMP_SLE:
2071   case CmpInst::FCMP_ULE:
2072     return AArch64CC::LE;
2073   case CmpInst::FCMP_UNE:
2074   case CmpInst::ICMP_NE:
2075     return AArch64CC::NE;
2076   case CmpInst::ICMP_UGE:
2077     return AArch64CC::HS;
2078   case CmpInst::ICMP_ULT:
2079     return AArch64CC::LO;
2080   }
2081 }
2082
2083 /// \brief Try to emit a combined compare-and-branch instruction.
2084 bool AArch64FastISel::emitCompareAndBranch(const BranchInst *BI) {
2085   assert(isa<CmpInst>(BI->getCondition()) && "Expected cmp instruction");
2086   const CmpInst *CI = cast<CmpInst>(BI->getCondition());
2087   CmpInst::Predicate Predicate = optimizeCmpPredicate(CI);
2088
2089   const Value *LHS = CI->getOperand(0);
2090   const Value *RHS = CI->getOperand(1);
2091
2092   Type *Ty = LHS->getType();
2093     if (!Ty->isIntegerTy())
2094       return false;
2095
2096   unsigned BW = cast<IntegerType>(Ty)->getBitWidth();
2097   if (BW != 1 && BW != 8 && BW != 16 && BW != 32 && BW != 64)
2098     return false;
2099
2100   MachineBasicBlock *TBB = FuncInfo.MBBMap[BI->getSuccessor(0)];
2101   MachineBasicBlock *FBB = FuncInfo.MBBMap[BI->getSuccessor(1)];
2102
2103   // Try to take advantage of fallthrough opportunities.
2104   if (FuncInfo.MBB->isLayoutSuccessor(TBB)) {
2105     std::swap(TBB, FBB);
2106     Predicate = CmpInst::getInversePredicate(Predicate);
2107   }
2108
2109   int TestBit = -1;
2110   bool IsCmpNE;
2111   if ((Predicate == CmpInst::ICMP_EQ) || (Predicate == CmpInst::ICMP_NE)) {
2112     if (const auto *C = dyn_cast<ConstantInt>(LHS))
2113       if (C->isNullValue())
2114         std::swap(LHS, RHS);
2115
2116     if (!isa<ConstantInt>(RHS))
2117       return false;
2118
2119     if (!cast<ConstantInt>(RHS)->isNullValue())
2120       return false;
2121
2122     if (const auto *AI = dyn_cast<BinaryOperator>(LHS))
2123       if (AI->getOpcode() == Instruction::And) {
2124         const Value *AndLHS = AI->getOperand(0);
2125         const Value *AndRHS = AI->getOperand(1);
2126
2127         if (const auto *C = dyn_cast<ConstantInt>(AndLHS))
2128           if (C->getValue().isPowerOf2())
2129             std::swap(AndLHS, AndRHS);
2130
2131         if (const auto *C = dyn_cast<ConstantInt>(AndRHS))
2132           if (C->getValue().isPowerOf2()) {
2133             TestBit = C->getValue().logBase2();
2134             LHS = AndLHS;
2135           }
2136       }
2137     IsCmpNE = Predicate == CmpInst::ICMP_NE;
2138   } else if (Predicate == CmpInst::ICMP_SLT) {
2139     if (!isa<ConstantInt>(RHS))
2140       return false;
2141
2142     if (!cast<ConstantInt>(RHS)->isNullValue())
2143       return false;
2144
2145     TestBit = BW - 1;
2146     IsCmpNE = true;
2147   } else if (Predicate == CmpInst::ICMP_SGT) {
2148     if (!isa<ConstantInt>(RHS))
2149       return false;
2150
2151     if (cast<ConstantInt>(RHS)->getValue() != -1)
2152       return false;
2153
2154     TestBit = BW - 1;
2155     IsCmpNE = false;
2156   } else
2157     return false;
2158
2159   static const unsigned OpcTable[2][2][2] = {
2160     { {AArch64::CBZW,  AArch64::CBZX },
2161       {AArch64::CBNZW, AArch64::CBNZX} },
2162     { {AArch64::TBZW,  AArch64::TBZX },
2163       {AArch64::TBNZW, AArch64::TBNZX} }
2164   };
2165
2166   bool IsBitTest = TestBit != -1;
2167   bool Is64Bit = BW == 64;
2168   if (TestBit < 32 && TestBit >= 0)
2169     Is64Bit = false;
2170
2171   unsigned Opc = OpcTable[IsBitTest][IsCmpNE][Is64Bit];
2172   const MCInstrDesc &II = TII.get(Opc);
2173
2174   unsigned SrcReg = getRegForValue(LHS);
2175   if (!SrcReg)
2176     return false;
2177   bool SrcIsKill = hasTrivialKill(LHS);
2178
2179   if (BW == 64 && !Is64Bit)
2180     SrcReg = fastEmitInst_extractsubreg(MVT::i32, SrcReg, SrcIsKill,
2181                                         AArch64::sub_32);
2182
2183   // Emit the combined compare and branch instruction.
2184   SrcReg = constrainOperandRegClass(II, SrcReg,  II.getNumDefs());
2185   MachineInstrBuilder MIB =
2186       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc))
2187           .addReg(SrcReg, getKillRegState(SrcIsKill));
2188   if (IsBitTest)
2189     MIB.addImm(TestBit);
2190   MIB.addMBB(TBB);
2191
2192   // Obtain the branch weight and add the TrueBB to the successor list.
2193   uint32_t BranchWeight = 0;
2194   if (FuncInfo.BPI)
2195     BranchWeight = FuncInfo.BPI->getEdgeWeight(BI->getParent(),
2196                                                TBB->getBasicBlock());
2197   FuncInfo.MBB->addSuccessor(TBB, BranchWeight);
2198   fastEmitBranch(FBB, DbgLoc);
2199
2200   return true;
2201 }
2202
2203 bool AArch64FastISel::selectBranch(const Instruction *I) {
2204   const BranchInst *BI = cast<BranchInst>(I);
2205   if (BI->isUnconditional()) {
2206     MachineBasicBlock *MSucc = FuncInfo.MBBMap[BI->getSuccessor(0)];
2207     fastEmitBranch(MSucc, BI->getDebugLoc());
2208     return true;
2209   }
2210
2211   MachineBasicBlock *TBB = FuncInfo.MBBMap[BI->getSuccessor(0)];
2212   MachineBasicBlock *FBB = FuncInfo.MBBMap[BI->getSuccessor(1)];
2213
2214   AArch64CC::CondCode CC = AArch64CC::NE;
2215   if (const CmpInst *CI = dyn_cast<CmpInst>(BI->getCondition())) {
2216     if (CI->hasOneUse() && isValueAvailable(CI)) {
2217       // Try to optimize or fold the cmp.
2218       CmpInst::Predicate Predicate = optimizeCmpPredicate(CI);
2219       switch (Predicate) {
2220       default:
2221         break;
2222       case CmpInst::FCMP_FALSE:
2223         fastEmitBranch(FBB, DbgLoc);
2224         return true;
2225       case CmpInst::FCMP_TRUE:
2226         fastEmitBranch(TBB, DbgLoc);
2227         return true;
2228       }
2229
2230       // Try to emit a combined compare-and-branch first.
2231       if (emitCompareAndBranch(BI))
2232         return true;
2233
2234       // Try to take advantage of fallthrough opportunities.
2235       if (FuncInfo.MBB->isLayoutSuccessor(TBB)) {
2236         std::swap(TBB, FBB);
2237         Predicate = CmpInst::getInversePredicate(Predicate);
2238       }
2239
2240       // Emit the cmp.
2241       if (!emitCmp(CI->getOperand(0), CI->getOperand(1), CI->isUnsigned()))
2242         return false;
2243
2244       // FCMP_UEQ and FCMP_ONE cannot be checked with a single branch
2245       // instruction.
2246       CC = getCompareCC(Predicate);
2247       AArch64CC::CondCode ExtraCC = AArch64CC::AL;
2248       switch (Predicate) {
2249       default:
2250         break;
2251       case CmpInst::FCMP_UEQ:
2252         ExtraCC = AArch64CC::EQ;
2253         CC = AArch64CC::VS;
2254         break;
2255       case CmpInst::FCMP_ONE:
2256         ExtraCC = AArch64CC::MI;
2257         CC = AArch64CC::GT;
2258         break;
2259       }
2260       assert((CC != AArch64CC::AL) && "Unexpected condition code.");
2261
2262       // Emit the extra branch for FCMP_UEQ and FCMP_ONE.
2263       if (ExtraCC != AArch64CC::AL) {
2264         BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::Bcc))
2265             .addImm(ExtraCC)
2266             .addMBB(TBB);
2267       }
2268
2269       // Emit the branch.
2270       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::Bcc))
2271           .addImm(CC)
2272           .addMBB(TBB);
2273
2274       // Obtain the branch weight and add the TrueBB to the successor list.
2275       uint32_t BranchWeight = 0;
2276       if (FuncInfo.BPI)
2277         BranchWeight = FuncInfo.BPI->getEdgeWeight(BI->getParent(),
2278                                                   TBB->getBasicBlock());
2279       FuncInfo.MBB->addSuccessor(TBB, BranchWeight);
2280
2281       fastEmitBranch(FBB, DbgLoc);
2282       return true;
2283     }
2284   } else if (TruncInst *TI = dyn_cast<TruncInst>(BI->getCondition())) {
2285     MVT SrcVT;
2286     if (TI->hasOneUse() && isValueAvailable(TI) &&
2287         isTypeSupported(TI->getOperand(0)->getType(), SrcVT)) {
2288       unsigned CondReg = getRegForValue(TI->getOperand(0));
2289       if (!CondReg)
2290         return false;
2291       bool CondIsKill = hasTrivialKill(TI->getOperand(0));
2292
2293       // Issue an extract_subreg to get the lower 32-bits.
2294       if (SrcVT == MVT::i64) {
2295         CondReg = fastEmitInst_extractsubreg(MVT::i32, CondReg, CondIsKill,
2296                                              AArch64::sub_32);
2297         CondIsKill = true;
2298       }
2299
2300       unsigned ANDReg = emitAnd_ri(MVT::i32, CondReg, CondIsKill, 1);
2301       assert(ANDReg && "Unexpected AND instruction emission failure.");
2302       emitICmp_ri(MVT::i32, ANDReg, /*IsKill=*/true, 0);
2303
2304       if (FuncInfo.MBB->isLayoutSuccessor(TBB)) {
2305         std::swap(TBB, FBB);
2306         CC = AArch64CC::EQ;
2307       }
2308       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::Bcc))
2309           .addImm(CC)
2310           .addMBB(TBB);
2311
2312       // Obtain the branch weight and add the TrueBB to the successor list.
2313       uint32_t BranchWeight = 0;
2314       if (FuncInfo.BPI)
2315         BranchWeight = FuncInfo.BPI->getEdgeWeight(BI->getParent(),
2316                                                   TBB->getBasicBlock());
2317       FuncInfo.MBB->addSuccessor(TBB, BranchWeight);
2318
2319       fastEmitBranch(FBB, DbgLoc);
2320       return true;
2321     }
2322   } else if (const auto *CI = dyn_cast<ConstantInt>(BI->getCondition())) {
2323     uint64_t Imm = CI->getZExtValue();
2324     MachineBasicBlock *Target = (Imm == 0) ? FBB : TBB;
2325     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::B))
2326         .addMBB(Target);
2327
2328     // Obtain the branch weight and add the target to the successor list.
2329     uint32_t BranchWeight = 0;
2330     if (FuncInfo.BPI)
2331       BranchWeight = FuncInfo.BPI->getEdgeWeight(BI->getParent(),
2332                                                  Target->getBasicBlock());
2333     FuncInfo.MBB->addSuccessor(Target, BranchWeight);
2334     return true;
2335   } else if (foldXALUIntrinsic(CC, I, BI->getCondition())) {
2336     // Fake request the condition, otherwise the intrinsic might be completely
2337     // optimized away.
2338     unsigned CondReg = getRegForValue(BI->getCondition());
2339     if (!CondReg)
2340       return false;
2341
2342     // Emit the branch.
2343     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::Bcc))
2344       .addImm(CC)
2345       .addMBB(TBB);
2346
2347     // Obtain the branch weight and add the TrueBB to the successor list.
2348     uint32_t BranchWeight = 0;
2349     if (FuncInfo.BPI)
2350       BranchWeight = FuncInfo.BPI->getEdgeWeight(BI->getParent(),
2351                                                  TBB->getBasicBlock());
2352     FuncInfo.MBB->addSuccessor(TBB, BranchWeight);
2353
2354     fastEmitBranch(FBB, DbgLoc);
2355     return true;
2356   }
2357
2358   unsigned CondReg = getRegForValue(BI->getCondition());
2359   if (CondReg == 0)
2360     return false;
2361   bool CondRegIsKill = hasTrivialKill(BI->getCondition());
2362
2363   // We've been divorced from our compare!  Our block was split, and
2364   // now our compare lives in a predecessor block.  We musn't
2365   // re-compare here, as the children of the compare aren't guaranteed
2366   // live across the block boundary (we *could* check for this).
2367   // Regardless, the compare has been done in the predecessor block,
2368   // and it left a value for us in a virtual register.  Ergo, we test
2369   // the one-bit value left in the virtual register.
2370   emitICmp_ri(MVT::i32, CondReg, CondRegIsKill, 0);
2371
2372   if (FuncInfo.MBB->isLayoutSuccessor(TBB)) {
2373     std::swap(TBB, FBB);
2374     CC = AArch64CC::EQ;
2375   }
2376
2377   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::Bcc))
2378       .addImm(CC)
2379       .addMBB(TBB);
2380
2381   // Obtain the branch weight and add the TrueBB to the successor list.
2382   uint32_t BranchWeight = 0;
2383   if (FuncInfo.BPI)
2384     BranchWeight = FuncInfo.BPI->getEdgeWeight(BI->getParent(),
2385                                                TBB->getBasicBlock());
2386   FuncInfo.MBB->addSuccessor(TBB, BranchWeight);
2387
2388   fastEmitBranch(FBB, DbgLoc);
2389   return true;
2390 }
2391
2392 bool AArch64FastISel::selectIndirectBr(const Instruction *I) {
2393   const IndirectBrInst *BI = cast<IndirectBrInst>(I);
2394   unsigned AddrReg = getRegForValue(BI->getOperand(0));
2395   if (AddrReg == 0)
2396     return false;
2397
2398   // Emit the indirect branch.
2399   const MCInstrDesc &II = TII.get(AArch64::BR);
2400   AddrReg = constrainOperandRegClass(II, AddrReg,  II.getNumDefs());
2401   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II).addReg(AddrReg);
2402
2403   // Make sure the CFG is up-to-date.
2404   for (unsigned i = 0, e = BI->getNumSuccessors(); i != e; ++i)
2405     FuncInfo.MBB->addSuccessor(FuncInfo.MBBMap[BI->getSuccessor(i)]);
2406
2407   return true;
2408 }
2409
2410 bool AArch64FastISel::selectCmp(const Instruction *I) {
2411   const CmpInst *CI = cast<CmpInst>(I);
2412
2413   // Try to optimize or fold the cmp.
2414   CmpInst::Predicate Predicate = optimizeCmpPredicate(CI);
2415   unsigned ResultReg = 0;
2416   switch (Predicate) {
2417   default:
2418     break;
2419   case CmpInst::FCMP_FALSE:
2420     ResultReg = createResultReg(&AArch64::GPR32RegClass);
2421     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2422             TII.get(TargetOpcode::COPY), ResultReg)
2423         .addReg(AArch64::WZR, getKillRegState(true));
2424     break;
2425   case CmpInst::FCMP_TRUE:
2426     ResultReg = fastEmit_i(MVT::i32, MVT::i32, ISD::Constant, 1);
2427     break;
2428   }
2429
2430   if (ResultReg) {
2431     updateValueMap(I, ResultReg);
2432     return true;
2433   }
2434
2435   // Emit the cmp.
2436   if (!emitCmp(CI->getOperand(0), CI->getOperand(1), CI->isUnsigned()))
2437     return false;
2438
2439   ResultReg = createResultReg(&AArch64::GPR32RegClass);
2440
2441   // FCMP_UEQ and FCMP_ONE cannot be checked with a single instruction. These
2442   // condition codes are inverted, because they are used by CSINC.
2443   static unsigned CondCodeTable[2][2] = {
2444     { AArch64CC::NE, AArch64CC::VC },
2445     { AArch64CC::PL, AArch64CC::LE }
2446   };
2447   unsigned *CondCodes = nullptr;
2448   switch (Predicate) {
2449   default:
2450     break;
2451   case CmpInst::FCMP_UEQ:
2452     CondCodes = &CondCodeTable[0][0];
2453     break;
2454   case CmpInst::FCMP_ONE:
2455     CondCodes = &CondCodeTable[1][0];
2456     break;
2457   }
2458
2459   if (CondCodes) {
2460     unsigned TmpReg1 = createResultReg(&AArch64::GPR32RegClass);
2461     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::CSINCWr),
2462             TmpReg1)
2463         .addReg(AArch64::WZR, getKillRegState(true))
2464         .addReg(AArch64::WZR, getKillRegState(true))
2465         .addImm(CondCodes[0]);
2466     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::CSINCWr),
2467             ResultReg)
2468         .addReg(TmpReg1, getKillRegState(true))
2469         .addReg(AArch64::WZR, getKillRegState(true))
2470         .addImm(CondCodes[1]);
2471
2472     updateValueMap(I, ResultReg);
2473     return true;
2474   }
2475
2476   // Now set a register based on the comparison.
2477   AArch64CC::CondCode CC = getCompareCC(Predicate);
2478   assert((CC != AArch64CC::AL) && "Unexpected condition code.");
2479   AArch64CC::CondCode invertedCC = getInvertedCondCode(CC);
2480   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::CSINCWr),
2481           ResultReg)
2482       .addReg(AArch64::WZR, getKillRegState(true))
2483       .addReg(AArch64::WZR, getKillRegState(true))
2484       .addImm(invertedCC);
2485
2486   updateValueMap(I, ResultReg);
2487   return true;
2488 }
2489
2490 bool AArch64FastISel::selectSelect(const Instruction *I) {
2491   const SelectInst *SI = cast<SelectInst>(I);
2492
2493   EVT DestEVT = TLI.getValueType(SI->getType(), true);
2494   if (!DestEVT.isSimple())
2495     return false;
2496
2497   MVT DestVT = DestEVT.getSimpleVT();
2498   if (DestVT != MVT::i32 && DestVT != MVT::i64 && DestVT != MVT::f32 &&
2499       DestVT != MVT::f64)
2500     return false;
2501
2502   unsigned SelectOpc;
2503   const TargetRegisterClass *RC = nullptr;
2504   switch (DestVT.SimpleTy) {
2505   default: return false;
2506   case MVT::i32:
2507     SelectOpc = AArch64::CSELWr;    RC = &AArch64::GPR32RegClass; break;
2508   case MVT::i64:
2509     SelectOpc = AArch64::CSELXr;    RC = &AArch64::GPR64RegClass; break;
2510   case MVT::f32:
2511     SelectOpc = AArch64::FCSELSrrr; RC = &AArch64::FPR32RegClass; break;
2512   case MVT::f64:
2513     SelectOpc = AArch64::FCSELDrrr; RC = &AArch64::FPR64RegClass; break;
2514   }
2515
2516   const Value *Cond = SI->getCondition();
2517   bool NeedTest = true;
2518   AArch64CC::CondCode CC = AArch64CC::NE;
2519   if (foldXALUIntrinsic(CC, I, Cond))
2520     NeedTest = false;
2521
2522   unsigned CondReg = getRegForValue(Cond);
2523   if (!CondReg)
2524     return false;
2525   bool CondIsKill = hasTrivialKill(Cond);
2526
2527   if (NeedTest) {
2528     unsigned ANDReg = emitAnd_ri(MVT::i32, CondReg, CondIsKill, 1);
2529     assert(ANDReg && "Unexpected AND instruction emission failure.");
2530     emitICmp_ri(MVT::i32, ANDReg, /*IsKill=*/true, 0);
2531   }
2532
2533   unsigned TrueReg = getRegForValue(SI->getTrueValue());
2534   bool TrueIsKill = hasTrivialKill(SI->getTrueValue());
2535
2536   unsigned FalseReg = getRegForValue(SI->getFalseValue());
2537   bool FalseIsKill = hasTrivialKill(SI->getFalseValue());
2538
2539   if (!TrueReg || !FalseReg)
2540     return false;
2541
2542   unsigned ResultReg = fastEmitInst_rri(SelectOpc, RC, TrueReg, TrueIsKill,
2543                                         FalseReg, FalseIsKill, CC);
2544   updateValueMap(I, ResultReg);
2545   return true;
2546 }
2547
2548 bool AArch64FastISel::selectFPExt(const Instruction *I) {
2549   Value *V = I->getOperand(0);
2550   if (!I->getType()->isDoubleTy() || !V->getType()->isFloatTy())
2551     return false;
2552
2553   unsigned Op = getRegForValue(V);
2554   if (Op == 0)
2555     return false;
2556
2557   unsigned ResultReg = createResultReg(&AArch64::FPR64RegClass);
2558   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::FCVTDSr),
2559           ResultReg).addReg(Op);
2560   updateValueMap(I, ResultReg);
2561   return true;
2562 }
2563
2564 bool AArch64FastISel::selectFPTrunc(const Instruction *I) {
2565   Value *V = I->getOperand(0);
2566   if (!I->getType()->isFloatTy() || !V->getType()->isDoubleTy())
2567     return false;
2568
2569   unsigned Op = getRegForValue(V);
2570   if (Op == 0)
2571     return false;
2572
2573   unsigned ResultReg = createResultReg(&AArch64::FPR32RegClass);
2574   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::FCVTSDr),
2575           ResultReg).addReg(Op);
2576   updateValueMap(I, ResultReg);
2577   return true;
2578 }
2579
2580 // FPToUI and FPToSI
2581 bool AArch64FastISel::selectFPToInt(const Instruction *I, bool Signed) {
2582   MVT DestVT;
2583   if (!isTypeLegal(I->getType(), DestVT) || DestVT.isVector())
2584     return false;
2585
2586   unsigned SrcReg = getRegForValue(I->getOperand(0));
2587   if (SrcReg == 0)
2588     return false;
2589
2590   EVT SrcVT = TLI.getValueType(I->getOperand(0)->getType(), true);
2591   if (SrcVT == MVT::f128)
2592     return false;
2593
2594   unsigned Opc;
2595   if (SrcVT == MVT::f64) {
2596     if (Signed)
2597       Opc = (DestVT == MVT::i32) ? AArch64::FCVTZSUWDr : AArch64::FCVTZSUXDr;
2598     else
2599       Opc = (DestVT == MVT::i32) ? AArch64::FCVTZUUWDr : AArch64::FCVTZUUXDr;
2600   } else {
2601     if (Signed)
2602       Opc = (DestVT == MVT::i32) ? AArch64::FCVTZSUWSr : AArch64::FCVTZSUXSr;
2603     else
2604       Opc = (DestVT == MVT::i32) ? AArch64::FCVTZUUWSr : AArch64::FCVTZUUXSr;
2605   }
2606   unsigned ResultReg = createResultReg(
2607       DestVT == MVT::i32 ? &AArch64::GPR32RegClass : &AArch64::GPR64RegClass);
2608   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg)
2609       .addReg(SrcReg);
2610   updateValueMap(I, ResultReg);
2611   return true;
2612 }
2613
2614 bool AArch64FastISel::selectIntToFP(const Instruction *I, bool Signed) {
2615   MVT DestVT;
2616   if (!isTypeLegal(I->getType(), DestVT) || DestVT.isVector())
2617     return false;
2618   assert ((DestVT == MVT::f32 || DestVT == MVT::f64) &&
2619           "Unexpected value type.");
2620
2621   unsigned SrcReg = getRegForValue(I->getOperand(0));
2622   if (!SrcReg)
2623     return false;
2624   bool SrcIsKill = hasTrivialKill(I->getOperand(0));
2625
2626   EVT SrcVT = TLI.getValueType(I->getOperand(0)->getType(), true);
2627
2628   // Handle sign-extension.
2629   if (SrcVT == MVT::i16 || SrcVT == MVT::i8 || SrcVT == MVT::i1) {
2630     SrcReg =
2631         emitIntExt(SrcVT.getSimpleVT(), SrcReg, MVT::i32, /*isZExt*/ !Signed);
2632     if (!SrcReg)
2633       return false;
2634     SrcIsKill = true;
2635   }
2636
2637   unsigned Opc;
2638   if (SrcVT == MVT::i64) {
2639     if (Signed)
2640       Opc = (DestVT == MVT::f32) ? AArch64::SCVTFUXSri : AArch64::SCVTFUXDri;
2641     else
2642       Opc = (DestVT == MVT::f32) ? AArch64::UCVTFUXSri : AArch64::UCVTFUXDri;
2643   } else {
2644     if (Signed)
2645       Opc = (DestVT == MVT::f32) ? AArch64::SCVTFUWSri : AArch64::SCVTFUWDri;
2646     else
2647       Opc = (DestVT == MVT::f32) ? AArch64::UCVTFUWSri : AArch64::UCVTFUWDri;
2648   }
2649
2650   unsigned ResultReg = fastEmitInst_r(Opc, TLI.getRegClassFor(DestVT), SrcReg,
2651                                       SrcIsKill);
2652   updateValueMap(I, ResultReg);
2653   return true;
2654 }
2655
2656 bool AArch64FastISel::fastLowerArguments() {
2657   if (!FuncInfo.CanLowerReturn)
2658     return false;
2659
2660   const Function *F = FuncInfo.Fn;
2661   if (F->isVarArg())
2662     return false;
2663
2664   CallingConv::ID CC = F->getCallingConv();
2665   if (CC != CallingConv::C)
2666     return false;
2667
2668   // Only handle simple cases of up to 8 GPR and FPR each.
2669   unsigned GPRCnt = 0;
2670   unsigned FPRCnt = 0;
2671   unsigned Idx = 0;
2672   for (auto const &Arg : F->args()) {
2673     // The first argument is at index 1.
2674     ++Idx;
2675     if (F->getAttributes().hasAttribute(Idx, Attribute::ByVal) ||
2676         F->getAttributes().hasAttribute(Idx, Attribute::InReg) ||
2677         F->getAttributes().hasAttribute(Idx, Attribute::StructRet) ||
2678         F->getAttributes().hasAttribute(Idx, Attribute::Nest))
2679       return false;
2680
2681     Type *ArgTy = Arg.getType();
2682     if (ArgTy->isStructTy() || ArgTy->isArrayTy())
2683       return false;
2684
2685     EVT ArgVT = TLI.getValueType(ArgTy);
2686     if (!ArgVT.isSimple())
2687       return false;
2688
2689     MVT VT = ArgVT.getSimpleVT().SimpleTy;
2690     if (VT.isFloatingPoint() && !Subtarget->hasFPARMv8())
2691       return false;
2692
2693     if (VT.isVector() &&
2694         (!Subtarget->hasNEON() || !Subtarget->isLittleEndian()))
2695       return false;
2696
2697     if (VT >= MVT::i1 && VT <= MVT::i64)
2698       ++GPRCnt;
2699     else if ((VT >= MVT::f16 && VT <= MVT::f64) || VT.is64BitVector() ||
2700              VT.is128BitVector())
2701       ++FPRCnt;
2702     else
2703       return false;
2704
2705     if (GPRCnt > 8 || FPRCnt > 8)
2706       return false;
2707   }
2708
2709   static const MCPhysReg Registers[6][8] = {
2710     { AArch64::W0, AArch64::W1, AArch64::W2, AArch64::W3, AArch64::W4,
2711       AArch64::W5, AArch64::W6, AArch64::W7 },
2712     { AArch64::X0, AArch64::X1, AArch64::X2, AArch64::X3, AArch64::X4,
2713       AArch64::X5, AArch64::X6, AArch64::X7 },
2714     { AArch64::H0, AArch64::H1, AArch64::H2, AArch64::H3, AArch64::H4,
2715       AArch64::H5, AArch64::H6, AArch64::H7 },
2716     { AArch64::S0, AArch64::S1, AArch64::S2, AArch64::S3, AArch64::S4,
2717       AArch64::S5, AArch64::S6, AArch64::S7 },
2718     { AArch64::D0, AArch64::D1, AArch64::D2, AArch64::D3, AArch64::D4,
2719       AArch64::D5, AArch64::D6, AArch64::D7 },
2720     { AArch64::Q0, AArch64::Q1, AArch64::Q2, AArch64::Q3, AArch64::Q4,
2721       AArch64::Q5, AArch64::Q6, AArch64::Q7 }
2722   };
2723
2724   unsigned GPRIdx = 0;
2725   unsigned FPRIdx = 0;
2726   for (auto const &Arg : F->args()) {
2727     MVT VT = TLI.getSimpleValueType(Arg.getType());
2728     unsigned SrcReg;
2729     const TargetRegisterClass *RC;
2730     if (VT >= MVT::i1 && VT <= MVT::i32) {
2731       SrcReg = Registers[0][GPRIdx++];
2732       RC = &AArch64::GPR32RegClass;
2733       VT = MVT::i32;
2734     } else if (VT == MVT::i64) {
2735       SrcReg = Registers[1][GPRIdx++];
2736       RC = &AArch64::GPR64RegClass;
2737     } else if (VT == MVT::f16) {
2738       SrcReg = Registers[2][FPRIdx++];
2739       RC = &AArch64::FPR16RegClass;
2740     } else if (VT ==  MVT::f32) {
2741       SrcReg = Registers[3][FPRIdx++];
2742       RC = &AArch64::FPR32RegClass;
2743     } else if ((VT == MVT::f64) || VT.is64BitVector()) {
2744       SrcReg = Registers[4][FPRIdx++];
2745       RC = &AArch64::FPR64RegClass;
2746     } else if (VT.is128BitVector()) {
2747       SrcReg = Registers[5][FPRIdx++];
2748       RC = &AArch64::FPR128RegClass;
2749     } else
2750       llvm_unreachable("Unexpected value type.");
2751
2752     unsigned DstReg = FuncInfo.MF->addLiveIn(SrcReg, RC);
2753     // FIXME: Unfortunately it's necessary to emit a copy from the livein copy.
2754     // Without this, EmitLiveInCopies may eliminate the livein if its only
2755     // use is a bitcast (which isn't turned into an instruction).
2756     unsigned ResultReg = createResultReg(RC);
2757     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2758             TII.get(TargetOpcode::COPY), ResultReg)
2759         .addReg(DstReg, getKillRegState(true));
2760     updateValueMap(&Arg, ResultReg);
2761   }
2762   return true;
2763 }
2764
2765 bool AArch64FastISel::processCallArgs(CallLoweringInfo &CLI,
2766                                       SmallVectorImpl<MVT> &OutVTs,
2767                                       unsigned &NumBytes) {
2768   CallingConv::ID CC = CLI.CallConv;
2769   SmallVector<CCValAssign, 16> ArgLocs;
2770   CCState CCInfo(CC, false, *FuncInfo.MF, ArgLocs, *Context);
2771   CCInfo.AnalyzeCallOperands(OutVTs, CLI.OutFlags, CCAssignFnForCall(CC));
2772
2773   // Get a count of how many bytes are to be pushed on the stack.
2774   NumBytes = CCInfo.getNextStackOffset();
2775
2776   // Issue CALLSEQ_START
2777   unsigned AdjStackDown = TII.getCallFrameSetupOpcode();
2778   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AdjStackDown))
2779     .addImm(NumBytes);
2780
2781   // Process the args.
2782   for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
2783     CCValAssign &VA = ArgLocs[i];
2784     const Value *ArgVal = CLI.OutVals[VA.getValNo()];
2785     MVT ArgVT = OutVTs[VA.getValNo()];
2786
2787     unsigned ArgReg = getRegForValue(ArgVal);
2788     if (!ArgReg)
2789       return false;
2790
2791     // Handle arg promotion: SExt, ZExt, AExt.
2792     switch (VA.getLocInfo()) {
2793     case CCValAssign::Full:
2794       break;
2795     case CCValAssign::SExt: {
2796       MVT DestVT = VA.getLocVT();
2797       MVT SrcVT = ArgVT;
2798       ArgReg = emitIntExt(SrcVT, ArgReg, DestVT, /*isZExt=*/false);
2799       if (!ArgReg)
2800         return false;
2801       break;
2802     }
2803     case CCValAssign::AExt:
2804     // Intentional fall-through.
2805     case CCValAssign::ZExt: {
2806       MVT DestVT = VA.getLocVT();
2807       MVT SrcVT = ArgVT;
2808       ArgReg = emitIntExt(SrcVT, ArgReg, DestVT, /*isZExt=*/true);
2809       if (!ArgReg)
2810         return false;
2811       break;
2812     }
2813     default:
2814       llvm_unreachable("Unknown arg promotion!");
2815     }
2816
2817     // Now copy/store arg to correct locations.
2818     if (VA.isRegLoc() && !VA.needsCustom()) {
2819       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2820               TII.get(TargetOpcode::COPY), VA.getLocReg()).addReg(ArgReg);
2821       CLI.OutRegs.push_back(VA.getLocReg());
2822     } else if (VA.needsCustom()) {
2823       // FIXME: Handle custom args.
2824       return false;
2825     } else {
2826       assert(VA.isMemLoc() && "Assuming store on stack.");
2827
2828       // Don't emit stores for undef values.
2829       if (isa<UndefValue>(ArgVal))
2830         continue;
2831
2832       // Need to store on the stack.
2833       unsigned ArgSize = (ArgVT.getSizeInBits() + 7) / 8;
2834
2835       unsigned BEAlign = 0;
2836       if (ArgSize < 8 && !Subtarget->isLittleEndian())
2837         BEAlign = 8 - ArgSize;
2838
2839       Address Addr;
2840       Addr.setKind(Address::RegBase);
2841       Addr.setReg(AArch64::SP);
2842       Addr.setOffset(VA.getLocMemOffset() + BEAlign);
2843
2844       unsigned Alignment = DL.getABITypeAlignment(ArgVal->getType());
2845       MachineMemOperand *MMO = FuncInfo.MF->getMachineMemOperand(
2846         MachinePointerInfo::getStack(Addr.getOffset()),
2847         MachineMemOperand::MOStore, ArgVT.getStoreSize(), Alignment);
2848
2849       if (!emitStore(ArgVT, ArgReg, Addr, MMO))
2850         return false;
2851     }
2852   }
2853   return true;
2854 }
2855
2856 bool AArch64FastISel::finishCall(CallLoweringInfo &CLI, MVT RetVT,
2857                                  unsigned NumBytes) {
2858   CallingConv::ID CC = CLI.CallConv;
2859
2860   // Issue CALLSEQ_END
2861   unsigned AdjStackUp = TII.getCallFrameDestroyOpcode();
2862   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AdjStackUp))
2863     .addImm(NumBytes).addImm(0);
2864
2865   // Now the return value.
2866   if (RetVT != MVT::isVoid) {
2867     SmallVector<CCValAssign, 16> RVLocs;
2868     CCState CCInfo(CC, false, *FuncInfo.MF, RVLocs, *Context);
2869     CCInfo.AnalyzeCallResult(RetVT, CCAssignFnForCall(CC));
2870
2871     // Only handle a single return value.
2872     if (RVLocs.size() != 1)
2873       return false;
2874
2875     // Copy all of the result registers out of their specified physreg.
2876     MVT CopyVT = RVLocs[0].getValVT();
2877     unsigned ResultReg = createResultReg(TLI.getRegClassFor(CopyVT));
2878     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2879             TII.get(TargetOpcode::COPY), ResultReg)
2880         .addReg(RVLocs[0].getLocReg());
2881     CLI.InRegs.push_back(RVLocs[0].getLocReg());
2882
2883     CLI.ResultReg = ResultReg;
2884     CLI.NumResultRegs = 1;
2885   }
2886
2887   return true;
2888 }
2889
2890 bool AArch64FastISel::fastLowerCall(CallLoweringInfo &CLI) {
2891   CallingConv::ID CC  = CLI.CallConv;
2892   bool IsTailCall     = CLI.IsTailCall;
2893   bool IsVarArg       = CLI.IsVarArg;
2894   const Value *Callee = CLI.Callee;
2895   const char *SymName = CLI.SymName;
2896
2897   if (!Callee && !SymName)
2898     return false;
2899
2900   // Allow SelectionDAG isel to handle tail calls.
2901   if (IsTailCall)
2902     return false;
2903
2904   CodeModel::Model CM = TM.getCodeModel();
2905   // Only support the small and large code model.
2906   if (CM != CodeModel::Small && CM != CodeModel::Large)
2907     return false;
2908
2909   // FIXME: Add large code model support for ELF.
2910   if (CM == CodeModel::Large && !Subtarget->isTargetMachO())
2911     return false;
2912
2913   // Let SDISel handle vararg functions.
2914   if (IsVarArg)
2915     return false;
2916
2917   // FIXME: Only handle *simple* calls for now.
2918   MVT RetVT;
2919   if (CLI.RetTy->isVoidTy())
2920     RetVT = MVT::isVoid;
2921   else if (!isTypeLegal(CLI.RetTy, RetVT))
2922     return false;
2923
2924   for (auto Flag : CLI.OutFlags)
2925     if (Flag.isInReg() || Flag.isSRet() || Flag.isNest() || Flag.isByVal())
2926       return false;
2927
2928   // Set up the argument vectors.
2929   SmallVector<MVT, 16> OutVTs;
2930   OutVTs.reserve(CLI.OutVals.size());
2931
2932   for (auto *Val : CLI.OutVals) {
2933     MVT VT;
2934     if (!isTypeLegal(Val->getType(), VT) &&
2935         !(VT == MVT::i1 || VT == MVT::i8 || VT == MVT::i16))
2936       return false;
2937
2938     // We don't handle vector parameters yet.
2939     if (VT.isVector() || VT.getSizeInBits() > 64)
2940       return false;
2941
2942     OutVTs.push_back(VT);
2943   }
2944
2945   Address Addr;
2946   if (Callee && !computeCallAddress(Callee, Addr))
2947     return false;
2948
2949   // Handle the arguments now that we've gotten them.
2950   unsigned NumBytes;
2951   if (!processCallArgs(CLI, OutVTs, NumBytes))
2952     return false;
2953
2954   // Issue the call.
2955   MachineInstrBuilder MIB;
2956   if (CM == CodeModel::Small) {
2957     const MCInstrDesc &II = TII.get(Addr.getReg() ? AArch64::BLR : AArch64::BL);
2958     MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II);
2959     if (SymName)
2960       MIB.addExternalSymbol(SymName, 0);
2961     else if (Addr.getGlobalValue())
2962       MIB.addGlobalAddress(Addr.getGlobalValue(), 0, 0);
2963     else if (Addr.getReg()) {
2964       unsigned Reg = constrainOperandRegClass(II, Addr.getReg(), 0);
2965       MIB.addReg(Reg);
2966     } else
2967       return false;
2968   } else {
2969     unsigned CallReg = 0;
2970     if (SymName) {
2971       unsigned ADRPReg = createResultReg(&AArch64::GPR64commonRegClass);
2972       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADRP),
2973               ADRPReg)
2974         .addExternalSymbol(SymName, AArch64II::MO_GOT | AArch64II::MO_PAGE);
2975
2976       CallReg = createResultReg(&AArch64::GPR64RegClass);
2977       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::LDRXui),
2978               CallReg)
2979         .addReg(ADRPReg)
2980         .addExternalSymbol(SymName, AArch64II::MO_GOT | AArch64II::MO_PAGEOFF |
2981                            AArch64II::MO_NC);
2982     } else if (Addr.getGlobalValue())
2983       CallReg = materializeGV(Addr.getGlobalValue());
2984     else if (Addr.getReg())
2985       CallReg = Addr.getReg();
2986
2987     if (!CallReg)
2988       return false;
2989
2990     const MCInstrDesc &II = TII.get(AArch64::BLR);
2991     CallReg = constrainOperandRegClass(II, CallReg, 0);
2992     MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II).addReg(CallReg);
2993   }
2994
2995   // Add implicit physical register uses to the call.
2996   for (auto Reg : CLI.OutRegs)
2997     MIB.addReg(Reg, RegState::Implicit);
2998
2999   // Add a register mask with the call-preserved registers.
3000   // Proper defs for return values will be added by setPhysRegsDeadExcept().
3001   MIB.addRegMask(TRI.getCallPreservedMask(CC));
3002
3003   CLI.Call = MIB;
3004
3005   // Finish off the call including any return values.
3006   return finishCall(CLI, RetVT, NumBytes);
3007 }
3008
3009 bool AArch64FastISel::isMemCpySmall(uint64_t Len, unsigned Alignment) {
3010   if (Alignment)
3011     return Len / Alignment <= 4;
3012   else
3013     return Len < 32;
3014 }
3015
3016 bool AArch64FastISel::tryEmitSmallMemCpy(Address Dest, Address Src,
3017                                          uint64_t Len, unsigned Alignment) {
3018   // Make sure we don't bloat code by inlining very large memcpy's.
3019   if (!isMemCpySmall(Len, Alignment))
3020     return false;
3021
3022   int64_t UnscaledOffset = 0;
3023   Address OrigDest = Dest;
3024   Address OrigSrc = Src;
3025
3026   while (Len) {
3027     MVT VT;
3028     if (!Alignment || Alignment >= 8) {
3029       if (Len >= 8)
3030         VT = MVT::i64;
3031       else if (Len >= 4)
3032         VT = MVT::i32;
3033       else if (Len >= 2)
3034         VT = MVT::i16;
3035       else {
3036         VT = MVT::i8;
3037       }
3038     } else {
3039       // Bound based on alignment.
3040       if (Len >= 4 && Alignment == 4)
3041         VT = MVT::i32;
3042       else if (Len >= 2 && Alignment == 2)
3043         VT = MVT::i16;
3044       else {
3045         VT = MVT::i8;
3046       }
3047     }
3048
3049     unsigned ResultReg = emitLoad(VT, VT, Src);
3050     if (!ResultReg)
3051       return false;
3052
3053     if (!emitStore(VT, ResultReg, Dest))
3054       return false;
3055
3056     int64_t Size = VT.getSizeInBits() / 8;
3057     Len -= Size;
3058     UnscaledOffset += Size;
3059
3060     // We need to recompute the unscaled offset for each iteration.
3061     Dest.setOffset(OrigDest.getOffset() + UnscaledOffset);
3062     Src.setOffset(OrigSrc.getOffset() + UnscaledOffset);
3063   }
3064
3065   return true;
3066 }
3067
3068 /// \brief Check if it is possible to fold the condition from the XALU intrinsic
3069 /// into the user. The condition code will only be updated on success.
3070 bool AArch64FastISel::foldXALUIntrinsic(AArch64CC::CondCode &CC,
3071                                         const Instruction *I,
3072                                         const Value *Cond) {
3073   if (!isa<ExtractValueInst>(Cond))
3074     return false;
3075
3076   const auto *EV = cast<ExtractValueInst>(Cond);
3077   if (!isa<IntrinsicInst>(EV->getAggregateOperand()))
3078     return false;
3079
3080   const auto *II = cast<IntrinsicInst>(EV->getAggregateOperand());
3081   MVT RetVT;
3082   const Function *Callee = II->getCalledFunction();
3083   Type *RetTy =
3084   cast<StructType>(Callee->getReturnType())->getTypeAtIndex(0U);
3085   if (!isTypeLegal(RetTy, RetVT))
3086     return false;
3087
3088   if (RetVT != MVT::i32 && RetVT != MVT::i64)
3089     return false;
3090
3091   const Value *LHS = II->getArgOperand(0);
3092   const Value *RHS = II->getArgOperand(1);
3093
3094   // Canonicalize immediate to the RHS.
3095   if (isa<ConstantInt>(LHS) && !isa<ConstantInt>(RHS) &&
3096       isCommutativeIntrinsic(II))
3097     std::swap(LHS, RHS);
3098
3099   // Simplify multiplies.
3100   unsigned IID = II->getIntrinsicID();
3101   switch (IID) {
3102   default:
3103     break;
3104   case Intrinsic::smul_with_overflow:
3105     if (const auto *C = dyn_cast<ConstantInt>(RHS))
3106       if (C->getValue() == 2)
3107         IID = Intrinsic::sadd_with_overflow;
3108     break;
3109   case Intrinsic::umul_with_overflow:
3110     if (const auto *C = dyn_cast<ConstantInt>(RHS))
3111       if (C->getValue() == 2)
3112         IID = Intrinsic::uadd_with_overflow;
3113     break;
3114   }
3115
3116   AArch64CC::CondCode TmpCC;
3117   switch (IID) {
3118   default:
3119     return false;
3120   case Intrinsic::sadd_with_overflow:
3121   case Intrinsic::ssub_with_overflow:
3122     TmpCC = AArch64CC::VS;
3123     break;
3124   case Intrinsic::uadd_with_overflow:
3125     TmpCC = AArch64CC::HS;
3126     break;
3127   case Intrinsic::usub_with_overflow:
3128     TmpCC = AArch64CC::LO;
3129     break;
3130   case Intrinsic::smul_with_overflow:
3131   case Intrinsic::umul_with_overflow:
3132     TmpCC = AArch64CC::NE;
3133     break;
3134   }
3135
3136   // Check if both instructions are in the same basic block.
3137   if (!isValueAvailable(II))
3138     return false;
3139
3140   // Make sure nothing is in the way
3141   BasicBlock::const_iterator Start = I;
3142   BasicBlock::const_iterator End = II;
3143   for (auto Itr = std::prev(Start); Itr != End; --Itr) {
3144     // We only expect extractvalue instructions between the intrinsic and the
3145     // instruction to be selected.
3146     if (!isa<ExtractValueInst>(Itr))
3147       return false;
3148
3149     // Check that the extractvalue operand comes from the intrinsic.
3150     const auto *EVI = cast<ExtractValueInst>(Itr);
3151     if (EVI->getAggregateOperand() != II)
3152       return false;
3153   }
3154
3155   CC = TmpCC;
3156   return true;
3157 }
3158
3159 bool AArch64FastISel::fastLowerIntrinsicCall(const IntrinsicInst *II) {
3160   // FIXME: Handle more intrinsics.
3161   switch (II->getIntrinsicID()) {
3162   default: return false;
3163   case Intrinsic::frameaddress: {
3164     MachineFrameInfo *MFI = FuncInfo.MF->getFrameInfo();
3165     MFI->setFrameAddressIsTaken(true);
3166
3167     const AArch64RegisterInfo *RegInfo =
3168         static_cast<const AArch64RegisterInfo *>(
3169             TM.getSubtargetImpl()->getRegisterInfo());
3170     unsigned FramePtr = RegInfo->getFrameRegister(*(FuncInfo.MF));
3171     unsigned SrcReg = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
3172     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3173             TII.get(TargetOpcode::COPY), SrcReg).addReg(FramePtr);
3174     // Recursively load frame address
3175     // ldr x0, [fp]
3176     // ldr x0, [x0]
3177     // ldr x0, [x0]
3178     // ...
3179     unsigned DestReg;
3180     unsigned Depth = cast<ConstantInt>(II->getOperand(0))->getZExtValue();
3181     while (Depth--) {
3182       DestReg = fastEmitInst_ri(AArch64::LDRXui, &AArch64::GPR64RegClass,
3183                                 SrcReg, /*IsKill=*/true, 0);
3184       assert(DestReg && "Unexpected LDR instruction emission failure.");
3185       SrcReg = DestReg;
3186     }
3187
3188     updateValueMap(II, SrcReg);
3189     return true;
3190   }
3191   case Intrinsic::memcpy:
3192   case Intrinsic::memmove: {
3193     const auto *MTI = cast<MemTransferInst>(II);
3194     // Don't handle volatile.
3195     if (MTI->isVolatile())
3196       return false;
3197
3198     // Disable inlining for memmove before calls to ComputeAddress.  Otherwise,
3199     // we would emit dead code because we don't currently handle memmoves.
3200     bool IsMemCpy = (II->getIntrinsicID() == Intrinsic::memcpy);
3201     if (isa<ConstantInt>(MTI->getLength()) && IsMemCpy) {
3202       // Small memcpy's are common enough that we want to do them without a call
3203       // if possible.
3204       uint64_t Len = cast<ConstantInt>(MTI->getLength())->getZExtValue();
3205       unsigned Alignment = MTI->getAlignment();
3206       if (isMemCpySmall(Len, Alignment)) {
3207         Address Dest, Src;
3208         if (!computeAddress(MTI->getRawDest(), Dest) ||
3209             !computeAddress(MTI->getRawSource(), Src))
3210           return false;
3211         if (tryEmitSmallMemCpy(Dest, Src, Len, Alignment))
3212           return true;
3213       }
3214     }
3215
3216     if (!MTI->getLength()->getType()->isIntegerTy(64))
3217       return false;
3218
3219     if (MTI->getSourceAddressSpace() > 255 || MTI->getDestAddressSpace() > 255)
3220       // Fast instruction selection doesn't support the special
3221       // address spaces.
3222       return false;
3223
3224     const char *IntrMemName = isa<MemCpyInst>(II) ? "memcpy" : "memmove";
3225     return lowerCallTo(II, IntrMemName, II->getNumArgOperands() - 2);
3226   }
3227   case Intrinsic::memset: {
3228     const MemSetInst *MSI = cast<MemSetInst>(II);
3229     // Don't handle volatile.
3230     if (MSI->isVolatile())
3231       return false;
3232
3233     if (!MSI->getLength()->getType()->isIntegerTy(64))
3234       return false;
3235
3236     if (MSI->getDestAddressSpace() > 255)
3237       // Fast instruction selection doesn't support the special
3238       // address spaces.
3239       return false;
3240
3241     return lowerCallTo(II, "memset", II->getNumArgOperands() - 2);
3242   }
3243   case Intrinsic::sin:
3244   case Intrinsic::cos:
3245   case Intrinsic::pow: {
3246     MVT RetVT;
3247     if (!isTypeLegal(II->getType(), RetVT))
3248       return false;
3249
3250     if (RetVT != MVT::f32 && RetVT != MVT::f64)
3251       return false;
3252
3253     static const RTLIB::Libcall LibCallTable[3][2] = {
3254       { RTLIB::SIN_F32, RTLIB::SIN_F64 },
3255       { RTLIB::COS_F32, RTLIB::COS_F64 },
3256       { RTLIB::POW_F32, RTLIB::POW_F64 }
3257     };
3258     RTLIB::Libcall LC;
3259     bool Is64Bit = RetVT == MVT::f64;
3260     switch (II->getIntrinsicID()) {
3261     default:
3262       llvm_unreachable("Unexpected intrinsic.");
3263     case Intrinsic::sin:
3264       LC = LibCallTable[0][Is64Bit];
3265       break;
3266     case Intrinsic::cos:
3267       LC = LibCallTable[1][Is64Bit];
3268       break;
3269     case Intrinsic::pow:
3270       LC = LibCallTable[2][Is64Bit];
3271       break;
3272     }
3273
3274     ArgListTy Args;
3275     Args.reserve(II->getNumArgOperands());
3276
3277     // Populate the argument list.
3278     for (auto &Arg : II->arg_operands()) {
3279       ArgListEntry Entry;
3280       Entry.Val = Arg;
3281       Entry.Ty = Arg->getType();
3282       Args.push_back(Entry);
3283     }
3284
3285     CallLoweringInfo CLI;
3286     CLI.setCallee(TLI.getLibcallCallingConv(LC), II->getType(),
3287                   TLI.getLibcallName(LC), std::move(Args));
3288     if (!lowerCallTo(CLI))
3289       return false;
3290     updateValueMap(II, CLI.ResultReg);
3291     return true;
3292   }
3293   case Intrinsic::trap: {
3294     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::BRK))
3295         .addImm(1);
3296     return true;
3297   }
3298   case Intrinsic::sqrt: {
3299     Type *RetTy = II->getCalledFunction()->getReturnType();
3300
3301     MVT VT;
3302     if (!isTypeLegal(RetTy, VT))
3303       return false;
3304
3305     unsigned Op0Reg = getRegForValue(II->getOperand(0));
3306     if (!Op0Reg)
3307       return false;
3308     bool Op0IsKill = hasTrivialKill(II->getOperand(0));
3309
3310     unsigned ResultReg = fastEmit_r(VT, VT, ISD::FSQRT, Op0Reg, Op0IsKill);
3311     if (!ResultReg)
3312       return false;
3313
3314     updateValueMap(II, ResultReg);
3315     return true;
3316   }
3317   case Intrinsic::sadd_with_overflow:
3318   case Intrinsic::uadd_with_overflow:
3319   case Intrinsic::ssub_with_overflow:
3320   case Intrinsic::usub_with_overflow:
3321   case Intrinsic::smul_with_overflow:
3322   case Intrinsic::umul_with_overflow: {
3323     // This implements the basic lowering of the xalu with overflow intrinsics.
3324     const Function *Callee = II->getCalledFunction();
3325     auto *Ty = cast<StructType>(Callee->getReturnType());
3326     Type *RetTy = Ty->getTypeAtIndex(0U);
3327
3328     MVT VT;
3329     if (!isTypeLegal(RetTy, VT))
3330       return false;
3331
3332     if (VT != MVT::i32 && VT != MVT::i64)
3333       return false;
3334
3335     const Value *LHS = II->getArgOperand(0);
3336     const Value *RHS = II->getArgOperand(1);
3337     // Canonicalize immediate to the RHS.
3338     if (isa<ConstantInt>(LHS) && !isa<ConstantInt>(RHS) &&
3339         isCommutativeIntrinsic(II))
3340       std::swap(LHS, RHS);
3341
3342     // Simplify multiplies.
3343     unsigned IID = II->getIntrinsicID();
3344     switch (IID) {
3345     default:
3346       break;
3347     case Intrinsic::smul_with_overflow:
3348       if (const auto *C = dyn_cast<ConstantInt>(RHS))
3349         if (C->getValue() == 2) {
3350           IID = Intrinsic::sadd_with_overflow;
3351           RHS = LHS;
3352         }
3353       break;
3354     case Intrinsic::umul_with_overflow:
3355       if (const auto *C = dyn_cast<ConstantInt>(RHS))
3356         if (C->getValue() == 2) {
3357           IID = Intrinsic::uadd_with_overflow;
3358           RHS = LHS;
3359         }
3360       break;
3361     }
3362
3363     unsigned ResultReg1 = 0, ResultReg2 = 0, MulReg = 0;
3364     AArch64CC::CondCode CC = AArch64CC::Invalid;
3365     switch (IID) {
3366     default: llvm_unreachable("Unexpected intrinsic!");
3367     case Intrinsic::sadd_with_overflow:
3368       ResultReg1 = emitAdd(VT, LHS, RHS, /*SetFlags=*/true);
3369       CC = AArch64CC::VS;
3370       break;
3371     case Intrinsic::uadd_with_overflow:
3372       ResultReg1 = emitAdd(VT, LHS, RHS, /*SetFlags=*/true);
3373       CC = AArch64CC::HS;
3374       break;
3375     case Intrinsic::ssub_with_overflow:
3376       ResultReg1 = emitSub(VT, LHS, RHS, /*SetFlags=*/true);
3377       CC = AArch64CC::VS;
3378       break;
3379     case Intrinsic::usub_with_overflow:
3380       ResultReg1 = emitSub(VT, LHS, RHS, /*SetFlags=*/true);
3381       CC = AArch64CC::LO;
3382       break;
3383     case Intrinsic::smul_with_overflow: {
3384       CC = AArch64CC::NE;
3385       unsigned LHSReg = getRegForValue(LHS);
3386       if (!LHSReg)
3387         return false;
3388       bool LHSIsKill = hasTrivialKill(LHS);
3389
3390       unsigned RHSReg = getRegForValue(RHS);
3391       if (!RHSReg)
3392         return false;
3393       bool RHSIsKill = hasTrivialKill(RHS);
3394
3395       if (VT == MVT::i32) {
3396         MulReg = emitSMULL_rr(MVT::i64, LHSReg, LHSIsKill, RHSReg, RHSIsKill);
3397         unsigned ShiftReg = emitLSR_ri(MVT::i64, MVT::i64, MulReg,
3398                                        /*IsKill=*/false, 32);
3399         MulReg = fastEmitInst_extractsubreg(VT, MulReg, /*IsKill=*/true,
3400                                             AArch64::sub_32);
3401         ShiftReg = fastEmitInst_extractsubreg(VT, ShiftReg, /*IsKill=*/true,
3402                                               AArch64::sub_32);
3403         emitSubs_rs(VT, ShiftReg, /*IsKill=*/true, MulReg, /*IsKill=*/false,
3404                     AArch64_AM::ASR, 31, /*WantResult=*/false);
3405       } else {
3406         assert(VT == MVT::i64 && "Unexpected value type.");
3407         MulReg = emitMul_rr(VT, LHSReg, LHSIsKill, RHSReg, RHSIsKill);
3408         unsigned SMULHReg = fastEmit_rr(VT, VT, ISD::MULHS, LHSReg, LHSIsKill,
3409                                         RHSReg, RHSIsKill);
3410         emitSubs_rs(VT, SMULHReg, /*IsKill=*/true, MulReg, /*IsKill=*/false,
3411                     AArch64_AM::ASR, 63, /*WantResult=*/false);
3412       }
3413       break;
3414     }
3415     case Intrinsic::umul_with_overflow: {
3416       CC = AArch64CC::NE;
3417       unsigned LHSReg = getRegForValue(LHS);
3418       if (!LHSReg)
3419         return false;
3420       bool LHSIsKill = hasTrivialKill(LHS);
3421
3422       unsigned RHSReg = getRegForValue(RHS);
3423       if (!RHSReg)
3424         return false;
3425       bool RHSIsKill = hasTrivialKill(RHS);
3426
3427       if (VT == MVT::i32) {
3428         MulReg = emitUMULL_rr(MVT::i64, LHSReg, LHSIsKill, RHSReg, RHSIsKill);
3429         emitSubs_rs(MVT::i64, AArch64::XZR, /*IsKill=*/true, MulReg,
3430                     /*IsKill=*/false, AArch64_AM::LSR, 32,
3431                     /*WantResult=*/false);
3432         MulReg = fastEmitInst_extractsubreg(VT, MulReg, /*IsKill=*/true,
3433                                             AArch64::sub_32);
3434       } else {
3435         assert(VT == MVT::i64 && "Unexpected value type.");
3436         MulReg = emitMul_rr(VT, LHSReg, LHSIsKill, RHSReg, RHSIsKill);
3437         unsigned UMULHReg = fastEmit_rr(VT, VT, ISD::MULHU, LHSReg, LHSIsKill,
3438                                         RHSReg, RHSIsKill);
3439         emitSubs_rr(VT, AArch64::XZR, /*IsKill=*/true, UMULHReg,
3440                     /*IsKill=*/false, /*WantResult=*/false);
3441       }
3442       break;
3443     }
3444     }
3445
3446     if (MulReg) {
3447       ResultReg1 = createResultReg(TLI.getRegClassFor(VT));
3448       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3449               TII.get(TargetOpcode::COPY), ResultReg1).addReg(MulReg);
3450     }
3451
3452     ResultReg2 = fastEmitInst_rri(AArch64::CSINCWr, &AArch64::GPR32RegClass,
3453                                   AArch64::WZR, /*IsKill=*/true, AArch64::WZR,
3454                                   /*IsKill=*/true, getInvertedCondCode(CC));
3455     (void)ResultReg2;
3456     assert((ResultReg1 + 1) == ResultReg2 &&
3457            "Nonconsecutive result registers.");
3458     updateValueMap(II, ResultReg1, 2);
3459     return true;
3460   }
3461   }
3462   return false;
3463 }
3464
3465 bool AArch64FastISel::selectRet(const Instruction *I) {
3466   const ReturnInst *Ret = cast<ReturnInst>(I);
3467   const Function &F = *I->getParent()->getParent();
3468
3469   if (!FuncInfo.CanLowerReturn)
3470     return false;
3471
3472   if (F.isVarArg())
3473     return false;
3474
3475   // Build a list of return value registers.
3476   SmallVector<unsigned, 4> RetRegs;
3477
3478   if (Ret->getNumOperands() > 0) {
3479     CallingConv::ID CC = F.getCallingConv();
3480     SmallVector<ISD::OutputArg, 4> Outs;
3481     GetReturnInfo(F.getReturnType(), F.getAttributes(), Outs, TLI);
3482
3483     // Analyze operands of the call, assigning locations to each operand.
3484     SmallVector<CCValAssign, 16> ValLocs;
3485     CCState CCInfo(CC, F.isVarArg(), *FuncInfo.MF, ValLocs, I->getContext());
3486     CCAssignFn *RetCC = CC == CallingConv::WebKit_JS ? RetCC_AArch64_WebKit_JS
3487                                                      : RetCC_AArch64_AAPCS;
3488     CCInfo.AnalyzeReturn(Outs, RetCC);
3489
3490     // Only handle a single return value for now.
3491     if (ValLocs.size() != 1)
3492       return false;
3493
3494     CCValAssign &VA = ValLocs[0];
3495     const Value *RV = Ret->getOperand(0);
3496
3497     // Don't bother handling odd stuff for now.
3498     if ((VA.getLocInfo() != CCValAssign::Full) &&
3499         (VA.getLocInfo() != CCValAssign::BCvt))
3500       return false;
3501
3502     // Only handle register returns for now.
3503     if (!VA.isRegLoc())
3504       return false;
3505
3506     unsigned Reg = getRegForValue(RV);
3507     if (Reg == 0)
3508       return false;
3509
3510     unsigned SrcReg = Reg + VA.getValNo();
3511     unsigned DestReg = VA.getLocReg();
3512     // Avoid a cross-class copy. This is very unlikely.
3513     if (!MRI.getRegClass(SrcReg)->contains(DestReg))
3514       return false;
3515
3516     EVT RVEVT = TLI.getValueType(RV->getType());
3517     if (!RVEVT.isSimple())
3518       return false;
3519
3520     // Vectors (of > 1 lane) in big endian need tricky handling.
3521     if (RVEVT.isVector() && RVEVT.getVectorNumElements() > 1 &&
3522         !Subtarget->isLittleEndian())
3523       return false;
3524
3525     MVT RVVT = RVEVT.getSimpleVT();
3526     if (RVVT == MVT::f128)
3527       return false;
3528
3529     MVT DestVT = VA.getValVT();
3530     // Special handling for extended integers.
3531     if (RVVT != DestVT) {
3532       if (RVVT != MVT::i1 && RVVT != MVT::i8 && RVVT != MVT::i16)
3533         return false;
3534
3535       if (!Outs[0].Flags.isZExt() && !Outs[0].Flags.isSExt())
3536         return false;
3537
3538       bool IsZExt = Outs[0].Flags.isZExt();
3539       SrcReg = emitIntExt(RVVT, SrcReg, DestVT, IsZExt);
3540       if (SrcReg == 0)
3541         return false;
3542     }
3543
3544     // Make the copy.
3545     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3546             TII.get(TargetOpcode::COPY), DestReg).addReg(SrcReg);
3547
3548     // Add register to return instruction.
3549     RetRegs.push_back(VA.getLocReg());
3550   }
3551
3552   MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3553                                     TII.get(AArch64::RET_ReallyLR));
3554   for (unsigned i = 0, e = RetRegs.size(); i != e; ++i)
3555     MIB.addReg(RetRegs[i], RegState::Implicit);
3556   return true;
3557 }
3558
3559 bool AArch64FastISel::selectTrunc(const Instruction *I) {
3560   Type *DestTy = I->getType();
3561   Value *Op = I->getOperand(0);
3562   Type *SrcTy = Op->getType();
3563
3564   EVT SrcEVT = TLI.getValueType(SrcTy, true);
3565   EVT DestEVT = TLI.getValueType(DestTy, true);
3566   if (!SrcEVT.isSimple())
3567     return false;
3568   if (!DestEVT.isSimple())
3569     return false;
3570
3571   MVT SrcVT = SrcEVT.getSimpleVT();
3572   MVT DestVT = DestEVT.getSimpleVT();
3573
3574   if (SrcVT != MVT::i64 && SrcVT != MVT::i32 && SrcVT != MVT::i16 &&
3575       SrcVT != MVT::i8)
3576     return false;
3577   if (DestVT != MVT::i32 && DestVT != MVT::i16 && DestVT != MVT::i8 &&
3578       DestVT != MVT::i1)
3579     return false;
3580
3581   unsigned SrcReg = getRegForValue(Op);
3582   if (!SrcReg)
3583     return false;
3584   bool SrcIsKill = hasTrivialKill(Op);
3585
3586   // If we're truncating from i64 to a smaller non-legal type then generate an
3587   // AND. Otherwise, we know the high bits are undefined and a truncate only
3588   // generate a COPY. We cannot mark the source register also as result
3589   // register, because this can incorrectly transfer the kill flag onto the
3590   // source register.
3591   unsigned ResultReg;
3592   if (SrcVT == MVT::i64) {
3593     uint64_t Mask = 0;
3594     switch (DestVT.SimpleTy) {
3595     default:
3596       // Trunc i64 to i32 is handled by the target-independent fast-isel.
3597       return false;
3598     case MVT::i1:
3599       Mask = 0x1;
3600       break;
3601     case MVT::i8:
3602       Mask = 0xff;
3603       break;
3604     case MVT::i16:
3605       Mask = 0xffff;
3606       break;
3607     }
3608     // Issue an extract_subreg to get the lower 32-bits.
3609     unsigned Reg32 = fastEmitInst_extractsubreg(MVT::i32, SrcReg, SrcIsKill,
3610                                                 AArch64::sub_32);
3611     // Create the AND instruction which performs the actual truncation.
3612     ResultReg = emitAnd_ri(MVT::i32, Reg32, /*IsKill=*/true, Mask);
3613     assert(ResultReg && "Unexpected AND instruction emission failure.");
3614   } else {
3615     ResultReg = createResultReg(&AArch64::GPR32RegClass);
3616     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3617             TII.get(TargetOpcode::COPY), ResultReg)
3618         .addReg(SrcReg, getKillRegState(SrcIsKill));
3619   }
3620
3621   updateValueMap(I, ResultReg);
3622   return true;
3623 }
3624
3625 unsigned AArch64FastISel::emiti1Ext(unsigned SrcReg, MVT DestVT, bool IsZExt) {
3626   assert((DestVT == MVT::i8 || DestVT == MVT::i16 || DestVT == MVT::i32 ||
3627           DestVT == MVT::i64) &&
3628          "Unexpected value type.");
3629   // Handle i8 and i16 as i32.
3630   if (DestVT == MVT::i8 || DestVT == MVT::i16)
3631     DestVT = MVT::i32;
3632
3633   if (IsZExt) {
3634     unsigned ResultReg = emitAnd_ri(MVT::i32, SrcReg, /*TODO:IsKill=*/false, 1);
3635     assert(ResultReg && "Unexpected AND instruction emission failure.");
3636     if (DestVT == MVT::i64) {
3637       // We're ZExt i1 to i64.  The ANDWri Wd, Ws, #1 implicitly clears the
3638       // upper 32 bits.  Emit a SUBREG_TO_REG to extend from Wd to Xd.
3639       unsigned Reg64 = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
3640       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3641               TII.get(AArch64::SUBREG_TO_REG), Reg64)
3642           .addImm(0)
3643           .addReg(ResultReg)
3644           .addImm(AArch64::sub_32);
3645       ResultReg = Reg64;
3646     }
3647     return ResultReg;
3648   } else {
3649     if (DestVT == MVT::i64) {
3650       // FIXME: We're SExt i1 to i64.
3651       return 0;
3652     }
3653     return fastEmitInst_rii(AArch64::SBFMWri, &AArch64::GPR32RegClass, SrcReg,
3654                             /*TODO:IsKill=*/false, 0, 0);
3655   }
3656 }
3657
3658 unsigned AArch64FastISel::emitMul_rr(MVT RetVT, unsigned Op0, bool Op0IsKill,
3659                                       unsigned Op1, bool Op1IsKill) {
3660   unsigned Opc, ZReg;
3661   switch (RetVT.SimpleTy) {
3662   default: return 0;
3663   case MVT::i8:
3664   case MVT::i16:
3665   case MVT::i32:
3666     RetVT = MVT::i32;
3667     Opc = AArch64::MADDWrrr; ZReg = AArch64::WZR; break;
3668   case MVT::i64:
3669     Opc = AArch64::MADDXrrr; ZReg = AArch64::XZR; break;
3670   }
3671
3672   const TargetRegisterClass *RC =
3673       (RetVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
3674   return fastEmitInst_rrr(Opc, RC, Op0, Op0IsKill, Op1, Op1IsKill,
3675                           /*IsKill=*/ZReg, true);
3676 }
3677
3678 unsigned AArch64FastISel::emitSMULL_rr(MVT RetVT, unsigned Op0, bool Op0IsKill,
3679                                         unsigned Op1, bool Op1IsKill) {
3680   if (RetVT != MVT::i64)
3681     return 0;
3682
3683   return fastEmitInst_rrr(AArch64::SMADDLrrr, &AArch64::GPR64RegClass,
3684                           Op0, Op0IsKill, Op1, Op1IsKill,
3685                           AArch64::XZR, /*IsKill=*/true);
3686 }
3687
3688 unsigned AArch64FastISel::emitUMULL_rr(MVT RetVT, unsigned Op0, bool Op0IsKill,
3689                                         unsigned Op1, bool Op1IsKill) {
3690   if (RetVT != MVT::i64)
3691     return 0;
3692
3693   return fastEmitInst_rrr(AArch64::UMADDLrrr, &AArch64::GPR64RegClass,
3694                           Op0, Op0IsKill, Op1, Op1IsKill,
3695                           AArch64::XZR, /*IsKill=*/true);
3696 }
3697
3698 unsigned AArch64FastISel::emitLSL_rr(MVT RetVT, unsigned Op0Reg, bool Op0IsKill,
3699                                      unsigned Op1Reg, bool Op1IsKill) {
3700   unsigned Opc = 0;
3701   bool NeedTrunc = false;
3702   uint64_t Mask = 0;
3703   switch (RetVT.SimpleTy) {
3704   default: return 0;
3705   case MVT::i8:  Opc = AArch64::LSLVWr; NeedTrunc = true; Mask = 0xff;   break;
3706   case MVT::i16: Opc = AArch64::LSLVWr; NeedTrunc = true; Mask = 0xffff; break;
3707   case MVT::i32: Opc = AArch64::LSLVWr;                                  break;
3708   case MVT::i64: Opc = AArch64::LSLVXr;                                  break;
3709   }
3710
3711   const TargetRegisterClass *RC =
3712       (RetVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
3713   if (NeedTrunc) {
3714     Op1Reg = emitAnd_ri(MVT::i32, Op1Reg, Op1IsKill, Mask);
3715     Op1IsKill = true;
3716   }
3717   unsigned ResultReg = fastEmitInst_rr(Opc, RC, Op0Reg, Op0IsKill, Op1Reg,
3718                                        Op1IsKill);
3719   if (NeedTrunc)
3720     ResultReg = emitAnd_ri(MVT::i32, ResultReg, /*IsKill=*/true, Mask);
3721   return ResultReg;
3722 }
3723
3724 unsigned AArch64FastISel::emitLSL_ri(MVT RetVT, MVT SrcVT, unsigned Op0,
3725                                      bool Op0IsKill, uint64_t Shift,
3726                                      bool IsZext) {
3727   assert(RetVT.SimpleTy >= SrcVT.SimpleTy &&
3728          "Unexpected source/return type pair.");
3729   assert((SrcVT == MVT::i1 || SrcVT == MVT::i8 || SrcVT == MVT::i16 ||
3730           SrcVT == MVT::i32 || SrcVT == MVT::i64) &&
3731          "Unexpected source value type.");
3732   assert((RetVT == MVT::i8 || RetVT == MVT::i16 || RetVT == MVT::i32 ||
3733           RetVT == MVT::i64) && "Unexpected return value type.");
3734
3735   bool Is64Bit = (RetVT == MVT::i64);
3736   unsigned RegSize = Is64Bit ? 64 : 32;
3737   unsigned DstBits = RetVT.getSizeInBits();
3738   unsigned SrcBits = SrcVT.getSizeInBits();
3739
3740   // Don't deal with undefined shifts.
3741   if (Shift >= DstBits)
3742     return 0;
3743
3744   // For immediate shifts we can fold the zero-/sign-extension into the shift.
3745   // {S|U}BFM Wd, Wn, #r, #s
3746   // Wd<32+s-r,32-r> = Wn<s:0> when r > s
3747
3748   // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
3749   // %2 = shl i16 %1, 4
3750   // Wd<32+7-28,32-28> = Wn<7:0> <- clamp s to 7
3751   // 0b1111_1111_1111_1111__1111_1010_1010_0000 sext
3752   // 0b0000_0000_0000_0000__0000_0101_0101_0000 sext | zext
3753   // 0b0000_0000_0000_0000__0000_1010_1010_0000 zext
3754
3755   // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
3756   // %2 = shl i16 %1, 8
3757   // Wd<32+7-24,32-24> = Wn<7:0>
3758   // 0b1111_1111_1111_1111__1010_1010_0000_0000 sext
3759   // 0b0000_0000_0000_0000__0101_0101_0000_0000 sext | zext
3760   // 0b0000_0000_0000_0000__1010_1010_0000_0000 zext
3761
3762   // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
3763   // %2 = shl i16 %1, 12
3764   // Wd<32+3-20,32-20> = Wn<3:0>
3765   // 0b1111_1111_1111_1111__1010_0000_0000_0000 sext
3766   // 0b0000_0000_0000_0000__0101_0000_0000_0000 sext | zext
3767   // 0b0000_0000_0000_0000__1010_0000_0000_0000 zext
3768
3769   unsigned ImmR = RegSize - Shift;
3770   // Limit the width to the length of the source type.
3771   unsigned ImmS = std::min<unsigned>(SrcBits - 1, DstBits - 1 - Shift);
3772   static const unsigned OpcTable[2][2] = {
3773     {AArch64::SBFMWri, AArch64::SBFMXri},
3774     {AArch64::UBFMWri, AArch64::UBFMXri}
3775   };
3776   unsigned Opc = OpcTable[IsZext][Is64Bit];
3777   const TargetRegisterClass *RC =
3778       Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
3779   if (SrcVT.SimpleTy <= MVT::i32 && RetVT == MVT::i64) {
3780     unsigned TmpReg = MRI.createVirtualRegister(RC);
3781     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3782             TII.get(AArch64::SUBREG_TO_REG), TmpReg)
3783         .addImm(0)
3784         .addReg(Op0, getKillRegState(Op0IsKill))
3785         .addImm(AArch64::sub_32);
3786     Op0 = TmpReg;
3787     Op0IsKill = true;
3788   }
3789   return fastEmitInst_rii(Opc, RC, Op0, Op0IsKill, ImmR, ImmS);
3790 }
3791
3792 unsigned AArch64FastISel::emitLSR_rr(MVT RetVT, unsigned Op0Reg, bool Op0IsKill,
3793                                      unsigned Op1Reg, bool Op1IsKill) {
3794   unsigned Opc = 0;
3795   bool NeedTrunc = false;
3796   uint64_t Mask = 0;
3797   switch (RetVT.SimpleTy) {
3798   default: return 0;
3799   case MVT::i8:  Opc = AArch64::LSRVWr; NeedTrunc = true; Mask = 0xff;   break;
3800   case MVT::i16: Opc = AArch64::LSRVWr; NeedTrunc = true; Mask = 0xffff; break;
3801   case MVT::i32: Opc = AArch64::LSRVWr; break;
3802   case MVT::i64: Opc = AArch64::LSRVXr; break;
3803   }
3804
3805   const TargetRegisterClass *RC =
3806       (RetVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
3807   if (NeedTrunc) {
3808     Op0Reg = emitAnd_ri(MVT::i32, Op0Reg, Op0IsKill, Mask);
3809     Op1Reg = emitAnd_ri(MVT::i32, Op1Reg, Op1IsKill, Mask);
3810     Op0IsKill = Op1IsKill = true;
3811   }
3812   unsigned ResultReg = fastEmitInst_rr(Opc, RC, Op0Reg, Op0IsKill, Op1Reg,
3813                                        Op1IsKill);
3814   if (NeedTrunc)
3815     ResultReg = emitAnd_ri(MVT::i32, ResultReg, /*IsKill=*/true, Mask);
3816   return ResultReg;
3817 }
3818
3819 unsigned AArch64FastISel::emitLSR_ri(MVT RetVT, MVT SrcVT, unsigned Op0,
3820                                      bool Op0IsKill, uint64_t Shift,
3821                                      bool IsZExt) {
3822   assert(RetVT.SimpleTy >= SrcVT.SimpleTy &&
3823          "Unexpected source/return type pair.");
3824   assert((SrcVT == MVT::i8 || SrcVT == MVT::i16 || SrcVT == MVT::i32 ||
3825           SrcVT == MVT::i64) && "Unexpected source value type.");
3826   assert((RetVT == MVT::i8 || RetVT == MVT::i16 || RetVT == MVT::i32 ||
3827           RetVT == MVT::i64) && "Unexpected return value type.");
3828
3829   bool Is64Bit = (RetVT == MVT::i64);
3830   unsigned RegSize = Is64Bit ? 64 : 32;
3831   unsigned DstBits = RetVT.getSizeInBits();
3832   unsigned SrcBits = SrcVT.getSizeInBits();
3833
3834   // Don't deal with undefined shifts.
3835   if (Shift >= DstBits)
3836     return 0;
3837
3838   // For immediate shifts we can fold the zero-/sign-extension into the shift.
3839   // {S|U}BFM Wd, Wn, #r, #s
3840   // Wd<s-r:0> = Wn<s:r> when r <= s
3841
3842   // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
3843   // %2 = lshr i16 %1, 4
3844   // Wd<7-4:0> = Wn<7:4>
3845   // 0b0000_0000_0000_0000__0000_1111_1111_1010 sext
3846   // 0b0000_0000_0000_0000__0000_0000_0000_0101 sext | zext
3847   // 0b0000_0000_0000_0000__0000_0000_0000_1010 zext
3848
3849   // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
3850   // %2 = lshr i16 %1, 8
3851   // Wd<7-7,0> = Wn<7:7>
3852   // 0b0000_0000_0000_0000__0000_0000_1111_1111 sext
3853   // 0b0000_0000_0000_0000__0000_0000_0000_0000 sext
3854   // 0b0000_0000_0000_0000__0000_0000_0000_0000 zext
3855
3856   // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
3857   // %2 = lshr i16 %1, 12
3858   // Wd<7-7,0> = Wn<7:7> <- clamp r to 7
3859   // 0b0000_0000_0000_0000__0000_0000_0000_1111 sext
3860   // 0b0000_0000_0000_0000__0000_0000_0000_0000 sext
3861   // 0b0000_0000_0000_0000__0000_0000_0000_0000 zext
3862
3863   if (Shift >= SrcBits && IsZExt)
3864     return materializeInt(ConstantInt::get(*Context, APInt(RegSize, 0)), RetVT);
3865
3866   // It is not possible to fold a sign-extend into the LShr instruction. In this
3867   // case emit a sign-extend.
3868   if (!IsZExt) {
3869     Op0 = emitIntExt(SrcVT, Op0, RetVT, IsZExt);
3870     if (!Op0)
3871       return 0;
3872     Op0IsKill = true;
3873     SrcVT = RetVT;
3874     SrcBits = SrcVT.getSizeInBits();
3875     IsZExt = true;
3876   }
3877
3878   unsigned ImmR = std::min<unsigned>(SrcBits - 1, Shift);
3879   unsigned ImmS = SrcBits - 1;
3880   static const unsigned OpcTable[2][2] = {
3881     {AArch64::SBFMWri, AArch64::SBFMXri},
3882     {AArch64::UBFMWri, AArch64::UBFMXri}
3883   };
3884   unsigned Opc = OpcTable[IsZExt][Is64Bit];
3885   const TargetRegisterClass *RC =
3886       Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
3887   if (SrcVT.SimpleTy <= MVT::i32 && RetVT == MVT::i64) {
3888     unsigned TmpReg = MRI.createVirtualRegister(RC);
3889     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3890             TII.get(AArch64::SUBREG_TO_REG), TmpReg)
3891         .addImm(0)
3892         .addReg(Op0, getKillRegState(Op0IsKill))
3893         .addImm(AArch64::sub_32);
3894     Op0 = TmpReg;
3895     Op0IsKill = true;
3896   }
3897   return fastEmitInst_rii(Opc, RC, Op0, Op0IsKill, ImmR, ImmS);
3898 }
3899
3900 unsigned AArch64FastISel::emitASR_rr(MVT RetVT, unsigned Op0Reg, bool Op0IsKill,
3901                                      unsigned Op1Reg, bool Op1IsKill) {
3902   unsigned Opc = 0;
3903   bool NeedTrunc = false;
3904   uint64_t Mask = 0;
3905   switch (RetVT.SimpleTy) {
3906   default: return 0;
3907   case MVT::i8:  Opc = AArch64::ASRVWr; NeedTrunc = true; Mask = 0xff;   break;
3908   case MVT::i16: Opc = AArch64::ASRVWr; NeedTrunc = true; Mask = 0xffff; break;
3909   case MVT::i32: Opc = AArch64::ASRVWr;                                  break;
3910   case MVT::i64: Opc = AArch64::ASRVXr;                                  break;
3911   }
3912
3913   const TargetRegisterClass *RC =
3914       (RetVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
3915   if (NeedTrunc) {
3916     Op0Reg = emitIntExt(RetVT, Op0Reg, MVT::i32, /*IsZExt=*/false);
3917     Op1Reg = emitAnd_ri(MVT::i32, Op1Reg, Op1IsKill, Mask);
3918     Op0IsKill = Op1IsKill = true;
3919   }
3920   unsigned ResultReg = fastEmitInst_rr(Opc, RC, Op0Reg, Op0IsKill, Op1Reg,
3921                                        Op1IsKill);
3922   if (NeedTrunc)
3923     ResultReg = emitAnd_ri(MVT::i32, ResultReg, /*IsKill=*/true, Mask);
3924   return ResultReg;
3925 }
3926
3927 unsigned AArch64FastISel::emitASR_ri(MVT RetVT, MVT SrcVT, unsigned Op0,
3928                                      bool Op0IsKill, uint64_t Shift,
3929                                      bool IsZExt) {
3930   assert(RetVT.SimpleTy >= SrcVT.SimpleTy &&
3931          "Unexpected source/return type pair.");
3932   assert((SrcVT == MVT::i8 || SrcVT == MVT::i16 || SrcVT == MVT::i32 ||
3933           SrcVT == MVT::i64) && "Unexpected source value type.");
3934   assert((RetVT == MVT::i8 || RetVT == MVT::i16 || RetVT == MVT::i32 ||
3935           RetVT == MVT::i64) && "Unexpected return value type.");
3936
3937   bool Is64Bit = (RetVT == MVT::i64);
3938   unsigned RegSize = Is64Bit ? 64 : 32;
3939   unsigned DstBits = RetVT.getSizeInBits();
3940   unsigned SrcBits = SrcVT.getSizeInBits();
3941
3942   // Don't deal with undefined shifts.
3943   if (Shift >= DstBits)
3944     return 0;
3945
3946   // For immediate shifts we can fold the zero-/sign-extension into the shift.
3947   // {S|U}BFM Wd, Wn, #r, #s
3948   // Wd<s-r:0> = Wn<s:r> when r <= s
3949
3950   // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
3951   // %2 = ashr i16 %1, 4
3952   // Wd<7-4:0> = Wn<7:4>
3953   // 0b1111_1111_1111_1111__1111_1111_1111_1010 sext
3954   // 0b0000_0000_0000_0000__0000_0000_0000_0101 sext | zext
3955   // 0b0000_0000_0000_0000__0000_0000_0000_1010 zext
3956
3957   // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
3958   // %2 = ashr i16 %1, 8
3959   // Wd<7-7,0> = Wn<7:7>
3960   // 0b1111_1111_1111_1111__1111_1111_1111_1111 sext
3961   // 0b0000_0000_0000_0000__0000_0000_0000_0000 sext
3962   // 0b0000_0000_0000_0000__0000_0000_0000_0000 zext
3963
3964   // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
3965   // %2 = ashr i16 %1, 12
3966   // Wd<7-7,0> = Wn<7:7> <- clamp r to 7
3967   // 0b1111_1111_1111_1111__1111_1111_1111_1111 sext
3968   // 0b0000_0000_0000_0000__0000_0000_0000_0000 sext
3969   // 0b0000_0000_0000_0000__0000_0000_0000_0000 zext
3970
3971   if (Shift >= SrcBits && IsZExt)
3972     return materializeInt(ConstantInt::get(*Context, APInt(RegSize, 0)), RetVT);
3973
3974   unsigned ImmR = std::min<unsigned>(SrcBits - 1, Shift);
3975   unsigned ImmS = SrcBits - 1;
3976   static const unsigned OpcTable[2][2] = {
3977     {AArch64::SBFMWri, AArch64::SBFMXri},
3978     {AArch64::UBFMWri, AArch64::UBFMXri}
3979   };
3980   unsigned Opc = OpcTable[IsZExt][Is64Bit];
3981   const TargetRegisterClass *RC =
3982       Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
3983   if (SrcVT.SimpleTy <= MVT::i32 && RetVT == MVT::i64) {
3984     unsigned TmpReg = MRI.createVirtualRegister(RC);
3985     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3986             TII.get(AArch64::SUBREG_TO_REG), TmpReg)
3987         .addImm(0)
3988         .addReg(Op0, getKillRegState(Op0IsKill))
3989         .addImm(AArch64::sub_32);
3990     Op0 = TmpReg;
3991     Op0IsKill = true;
3992   }
3993   return fastEmitInst_rii(Opc, RC, Op0, Op0IsKill, ImmR, ImmS);
3994 }
3995
3996 unsigned AArch64FastISel::emitIntExt(MVT SrcVT, unsigned SrcReg, MVT DestVT,
3997                                      bool IsZExt) {
3998   assert(DestVT != MVT::i1 && "ZeroExt/SignExt an i1?");
3999
4000   // FastISel does not have plumbing to deal with extensions where the SrcVT or
4001   // DestVT are odd things, so test to make sure that they are both types we can
4002   // handle (i1/i8/i16/i32 for SrcVT and i8/i16/i32/i64 for DestVT), otherwise
4003   // bail out to SelectionDAG.
4004   if (((DestVT != MVT::i8) && (DestVT != MVT::i16) &&
4005        (DestVT != MVT::i32) && (DestVT != MVT::i64)) ||
4006       ((SrcVT !=  MVT::i1) && (SrcVT !=  MVT::i8) &&
4007        (SrcVT !=  MVT::i16) && (SrcVT !=  MVT::i32)))
4008     return 0;
4009
4010   unsigned Opc;
4011   unsigned Imm = 0;
4012
4013   switch (SrcVT.SimpleTy) {
4014   default:
4015     return 0;
4016   case MVT::i1:
4017     return emiti1Ext(SrcReg, DestVT, IsZExt);
4018   case MVT::i8:
4019     if (DestVT == MVT::i64)
4020       Opc = IsZExt ? AArch64::UBFMXri : AArch64::SBFMXri;
4021     else
4022       Opc = IsZExt ? AArch64::UBFMWri : AArch64::SBFMWri;
4023     Imm = 7;
4024     break;
4025   case MVT::i16:
4026     if (DestVT == MVT::i64)
4027       Opc = IsZExt ? AArch64::UBFMXri : AArch64::SBFMXri;
4028     else
4029       Opc = IsZExt ? AArch64::UBFMWri : AArch64::SBFMWri;
4030     Imm = 15;
4031     break;
4032   case MVT::i32:
4033     assert(DestVT == MVT::i64 && "IntExt i32 to i32?!?");
4034     Opc = IsZExt ? AArch64::UBFMXri : AArch64::SBFMXri;
4035     Imm = 31;
4036     break;
4037   }
4038
4039   // Handle i8 and i16 as i32.
4040   if (DestVT == MVT::i8 || DestVT == MVT::i16)
4041     DestVT = MVT::i32;
4042   else if (DestVT == MVT::i64) {
4043     unsigned Src64 = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
4044     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
4045             TII.get(AArch64::SUBREG_TO_REG), Src64)
4046         .addImm(0)
4047         .addReg(SrcReg)
4048         .addImm(AArch64::sub_32);
4049     SrcReg = Src64;
4050   }
4051
4052   const TargetRegisterClass *RC =
4053       (DestVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
4054   return fastEmitInst_rii(Opc, RC, SrcReg, /*TODO:IsKill=*/false, 0, Imm);
4055 }
4056
4057 static bool isZExtLoad(const MachineInstr *LI) {
4058   switch (LI->getOpcode()) {
4059   default:
4060     return false;
4061   case AArch64::LDURBBi:
4062   case AArch64::LDURHHi:
4063   case AArch64::LDURWi:
4064   case AArch64::LDRBBui:
4065   case AArch64::LDRHHui:
4066   case AArch64::LDRWui:
4067   case AArch64::LDRBBroX:
4068   case AArch64::LDRHHroX:
4069   case AArch64::LDRWroX:
4070   case AArch64::LDRBBroW:
4071   case AArch64::LDRHHroW:
4072   case AArch64::LDRWroW:
4073     return true;
4074   }
4075 }
4076
4077 static bool isSExtLoad(const MachineInstr *LI) {
4078   switch (LI->getOpcode()) {
4079   default:
4080     return false;
4081   case AArch64::LDURSBWi:
4082   case AArch64::LDURSHWi:
4083   case AArch64::LDURSBXi:
4084   case AArch64::LDURSHXi:
4085   case AArch64::LDURSWi:
4086   case AArch64::LDRSBWui:
4087   case AArch64::LDRSHWui:
4088   case AArch64::LDRSBXui:
4089   case AArch64::LDRSHXui:
4090   case AArch64::LDRSWui:
4091   case AArch64::LDRSBWroX:
4092   case AArch64::LDRSHWroX:
4093   case AArch64::LDRSBXroX:
4094   case AArch64::LDRSHXroX:
4095   case AArch64::LDRSWroX:
4096   case AArch64::LDRSBWroW:
4097   case AArch64::LDRSHWroW:
4098   case AArch64::LDRSBXroW:
4099   case AArch64::LDRSHXroW:
4100   case AArch64::LDRSWroW:
4101     return true;
4102   }
4103 }
4104
4105 bool AArch64FastISel::optimizeIntExtLoad(const Instruction *I, MVT RetVT,
4106                                          MVT SrcVT) {
4107   const auto *LI = dyn_cast<LoadInst>(I->getOperand(0));
4108   if (!LI || !LI->hasOneUse())
4109     return false;
4110
4111   // Check if the load instruction has already been selected.
4112   unsigned Reg = lookUpRegForValue(LI);
4113   if (!Reg)
4114     return false;
4115
4116   MachineInstr *MI = MRI.getUniqueVRegDef(Reg);
4117   if (!MI)
4118     return false;
4119
4120   // Check if the correct load instruction has been emitted - SelectionDAG might
4121   // have emitted a zero-extending load, but we need a sign-extending load.
4122   bool IsZExt = isa<ZExtInst>(I);
4123   const auto *LoadMI = MI;
4124   if (LoadMI->getOpcode() == TargetOpcode::COPY &&
4125       LoadMI->getOperand(1).getSubReg() == AArch64::sub_32) {
4126     unsigned LoadReg = MI->getOperand(1).getReg();
4127     LoadMI = MRI.getUniqueVRegDef(LoadReg);
4128     assert(LoadMI && "Expected valid instruction");
4129   }
4130   if (!(IsZExt && isZExtLoad(LoadMI)) && !(!IsZExt && isSExtLoad(LoadMI)))
4131     return false;
4132
4133   // Nothing to be done.
4134   if (RetVT != MVT::i64 || SrcVT > MVT::i32) {
4135     updateValueMap(I, Reg);
4136     return true;
4137   }
4138
4139   if (IsZExt) {
4140     unsigned Reg64 = createResultReg(&AArch64::GPR64RegClass);
4141     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
4142             TII.get(AArch64::SUBREG_TO_REG), Reg64)
4143         .addImm(0)
4144         .addReg(Reg, getKillRegState(true))
4145         .addImm(AArch64::sub_32);
4146     Reg = Reg64;
4147   } else {
4148     assert((MI->getOpcode() == TargetOpcode::COPY &&
4149             MI->getOperand(1).getSubReg() == AArch64::sub_32) &&
4150            "Expected copy instruction");
4151     Reg = MI->getOperand(1).getReg();
4152     MI->eraseFromParent();
4153   }
4154   updateValueMap(I, Reg);
4155   return true;
4156 }
4157
4158 bool AArch64FastISel::selectIntExt(const Instruction *I) {
4159   assert((isa<ZExtInst>(I) || isa<SExtInst>(I)) &&
4160          "Unexpected integer extend instruction.");
4161   MVT RetVT;
4162   MVT SrcVT;
4163   if (!isTypeSupported(I->getType(), RetVT))
4164     return false;
4165
4166   if (!isTypeSupported(I->getOperand(0)->getType(), SrcVT))
4167     return false;
4168
4169   // Try to optimize already sign-/zero-extended values from load instructions.
4170   if (optimizeIntExtLoad(I, RetVT, SrcVT))
4171     return true;
4172
4173   unsigned SrcReg = getRegForValue(I->getOperand(0));
4174   if (!SrcReg)
4175     return false;
4176   bool SrcIsKill = hasTrivialKill(I->getOperand(0));
4177
4178   // Try to optimize already sign-/zero-extended values from function arguments.
4179   bool IsZExt = isa<ZExtInst>(I);
4180   if (const auto *Arg = dyn_cast<Argument>(I->getOperand(0))) {
4181     if ((IsZExt && Arg->hasZExtAttr()) || (!IsZExt && Arg->hasSExtAttr())) {
4182       if (RetVT == MVT::i64 && SrcVT != MVT::i64) {
4183         unsigned ResultReg = createResultReg(&AArch64::GPR64RegClass);
4184         BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
4185                 TII.get(AArch64::SUBREG_TO_REG), ResultReg)
4186             .addImm(0)
4187             .addReg(SrcReg, getKillRegState(SrcIsKill))
4188             .addImm(AArch64::sub_32);
4189         SrcReg = ResultReg;
4190       }
4191       updateValueMap(I, SrcReg);
4192       return true;
4193     }
4194   }
4195
4196   unsigned ResultReg = emitIntExt(SrcVT, SrcReg, RetVT, IsZExt);
4197   if (!ResultReg)
4198     return false;
4199
4200   updateValueMap(I, ResultReg);
4201   return true;
4202 }
4203
4204 bool AArch64FastISel::selectRem(const Instruction *I, unsigned ISDOpcode) {
4205   EVT DestEVT = TLI.getValueType(I->getType(), true);
4206   if (!DestEVT.isSimple())
4207     return false;
4208
4209   MVT DestVT = DestEVT.getSimpleVT();
4210   if (DestVT != MVT::i64 && DestVT != MVT::i32)
4211     return false;
4212
4213   unsigned DivOpc;
4214   bool Is64bit = (DestVT == MVT::i64);
4215   switch (ISDOpcode) {
4216   default:
4217     return false;
4218   case ISD::SREM:
4219     DivOpc = Is64bit ? AArch64::SDIVXr : AArch64::SDIVWr;
4220     break;
4221   case ISD::UREM:
4222     DivOpc = Is64bit ? AArch64::UDIVXr : AArch64::UDIVWr;
4223     break;
4224   }
4225   unsigned MSubOpc = Is64bit ? AArch64::MSUBXrrr : AArch64::MSUBWrrr;
4226   unsigned Src0Reg = getRegForValue(I->getOperand(0));
4227   if (!Src0Reg)
4228     return false;
4229   bool Src0IsKill = hasTrivialKill(I->getOperand(0));
4230
4231   unsigned Src1Reg = getRegForValue(I->getOperand(1));
4232   if (!Src1Reg)
4233     return false;
4234   bool Src1IsKill = hasTrivialKill(I->getOperand(1));
4235
4236   const TargetRegisterClass *RC =
4237       (DestVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
4238   unsigned QuotReg = fastEmitInst_rr(DivOpc, RC, Src0Reg, /*IsKill=*/false,
4239                                      Src1Reg, /*IsKill=*/false);
4240   assert(QuotReg && "Unexpected DIV instruction emission failure.");
4241   // The remainder is computed as numerator - (quotient * denominator) using the
4242   // MSUB instruction.
4243   unsigned ResultReg = fastEmitInst_rrr(MSubOpc, RC, QuotReg, /*IsKill=*/true,
4244                                         Src1Reg, Src1IsKill, Src0Reg,
4245                                         Src0IsKill);
4246   updateValueMap(I, ResultReg);
4247   return true;
4248 }
4249
4250 bool AArch64FastISel::selectMul(const Instruction *I) {
4251   MVT VT;
4252   if (!isTypeSupported(I->getType(), VT, /*IsVectorAllowed=*/true))
4253     return false;
4254
4255   if (VT.isVector())
4256     return selectBinaryOp(I, ISD::MUL);
4257
4258   const Value *Src0 = I->getOperand(0);
4259   const Value *Src1 = I->getOperand(1);
4260   if (const auto *C = dyn_cast<ConstantInt>(Src0))
4261     if (C->getValue().isPowerOf2())
4262       std::swap(Src0, Src1);
4263
4264   // Try to simplify to a shift instruction.
4265   if (const auto *C = dyn_cast<ConstantInt>(Src1))
4266     if (C->getValue().isPowerOf2()) {
4267       uint64_t ShiftVal = C->getValue().logBase2();
4268       MVT SrcVT = VT;
4269       bool IsZExt = true;
4270       if (const auto *ZExt = dyn_cast<ZExtInst>(Src0)) {
4271         if (!isIntExtFree(ZExt)) {
4272           MVT VT;
4273           if (isValueAvailable(ZExt) && isTypeSupported(ZExt->getSrcTy(), VT)) {
4274             SrcVT = VT;
4275             IsZExt = true;
4276             Src0 = ZExt->getOperand(0);
4277           }
4278         }
4279       } else if (const auto *SExt = dyn_cast<SExtInst>(Src0)) {
4280         if (!isIntExtFree(SExt)) {
4281           MVT VT;
4282           if (isValueAvailable(SExt) && isTypeSupported(SExt->getSrcTy(), VT)) {
4283             SrcVT = VT;
4284             IsZExt = false;
4285             Src0 = SExt->getOperand(0);
4286           }
4287         }
4288       }
4289
4290       unsigned Src0Reg = getRegForValue(Src0);
4291       if (!Src0Reg)
4292         return false;
4293       bool Src0IsKill = hasTrivialKill(Src0);
4294
4295       unsigned ResultReg =
4296           emitLSL_ri(VT, SrcVT, Src0Reg, Src0IsKill, ShiftVal, IsZExt);
4297
4298       if (ResultReg) {
4299         updateValueMap(I, ResultReg);
4300         return true;
4301       }
4302     }
4303
4304   unsigned Src0Reg = getRegForValue(I->getOperand(0));
4305   if (!Src0Reg)
4306     return false;
4307   bool Src0IsKill = hasTrivialKill(I->getOperand(0));
4308
4309   unsigned Src1Reg = getRegForValue(I->getOperand(1));
4310   if (!Src1Reg)
4311     return false;
4312   bool Src1IsKill = hasTrivialKill(I->getOperand(1));
4313
4314   unsigned ResultReg = emitMul_rr(VT, Src0Reg, Src0IsKill, Src1Reg, Src1IsKill);
4315
4316   if (!ResultReg)
4317     return false;
4318
4319   updateValueMap(I, ResultReg);
4320   return true;
4321 }
4322
4323 bool AArch64FastISel::selectShift(const Instruction *I) {
4324   MVT RetVT;
4325   if (!isTypeSupported(I->getType(), RetVT, /*IsVectorAllowed=*/true))
4326     return false;
4327
4328   if (RetVT.isVector())
4329     return selectOperator(I, I->getOpcode());
4330
4331   if (const auto *C = dyn_cast<ConstantInt>(I->getOperand(1))) {
4332     unsigned ResultReg = 0;
4333     uint64_t ShiftVal = C->getZExtValue();
4334     MVT SrcVT = RetVT;
4335     bool IsZExt = (I->getOpcode() == Instruction::AShr) ? false : true;
4336     const Value *Op0 = I->getOperand(0);
4337     if (const auto *ZExt = dyn_cast<ZExtInst>(Op0)) {
4338       if (!isIntExtFree(ZExt)) {
4339         MVT TmpVT;
4340         if (isValueAvailable(ZExt) && isTypeSupported(ZExt->getSrcTy(), TmpVT)) {
4341           SrcVT = TmpVT;
4342           IsZExt = true;
4343           Op0 = ZExt->getOperand(0);
4344         }
4345       }
4346     } else if (const auto *SExt = dyn_cast<SExtInst>(Op0)) {
4347       if (!isIntExtFree(SExt)) {
4348         MVT TmpVT;
4349         if (isValueAvailable(SExt) && isTypeSupported(SExt->getSrcTy(), TmpVT)) {
4350           SrcVT = TmpVT;
4351           IsZExt = false;
4352           Op0 = SExt->getOperand(0);
4353         }
4354       }
4355     }
4356
4357     unsigned Op0Reg = getRegForValue(Op0);
4358     if (!Op0Reg)
4359       return false;
4360     bool Op0IsKill = hasTrivialKill(Op0);
4361
4362     switch (I->getOpcode()) {
4363     default: llvm_unreachable("Unexpected instruction.");
4364     case Instruction::Shl:
4365       ResultReg = emitLSL_ri(RetVT, SrcVT, Op0Reg, Op0IsKill, ShiftVal, IsZExt);
4366       break;
4367     case Instruction::AShr:
4368       ResultReg = emitASR_ri(RetVT, SrcVT, Op0Reg, Op0IsKill, ShiftVal, IsZExt);
4369       break;
4370     case Instruction::LShr:
4371       ResultReg = emitLSR_ri(RetVT, SrcVT, Op0Reg, Op0IsKill, ShiftVal, IsZExt);
4372       break;
4373     }
4374     if (!ResultReg)
4375       return false;
4376
4377     updateValueMap(I, ResultReg);
4378     return true;
4379   }
4380
4381   unsigned Op0Reg = getRegForValue(I->getOperand(0));
4382   if (!Op0Reg)
4383     return false;
4384   bool Op0IsKill = hasTrivialKill(I->getOperand(0));
4385
4386   unsigned Op1Reg = getRegForValue(I->getOperand(1));
4387   if (!Op1Reg)
4388     return false;
4389   bool Op1IsKill = hasTrivialKill(I->getOperand(1));
4390
4391   unsigned ResultReg = 0;
4392   switch (I->getOpcode()) {
4393   default: llvm_unreachable("Unexpected instruction.");
4394   case Instruction::Shl:
4395     ResultReg = emitLSL_rr(RetVT, Op0Reg, Op0IsKill, Op1Reg, Op1IsKill);
4396     break;
4397   case Instruction::AShr:
4398     ResultReg = emitASR_rr(RetVT, Op0Reg, Op0IsKill, Op1Reg, Op1IsKill);
4399     break;
4400   case Instruction::LShr:
4401     ResultReg = emitLSR_rr(RetVT, Op0Reg, Op0IsKill, Op1Reg, Op1IsKill);
4402     break;
4403   }
4404
4405   if (!ResultReg)
4406     return false;
4407
4408   updateValueMap(I, ResultReg);
4409   return true;
4410 }
4411
4412 bool AArch64FastISel::selectBitCast(const Instruction *I) {
4413   MVT RetVT, SrcVT;
4414
4415   if (!isTypeLegal(I->getOperand(0)->getType(), SrcVT))
4416     return false;
4417   if (!isTypeLegal(I->getType(), RetVT))
4418     return false;
4419
4420   unsigned Opc;
4421   if (RetVT == MVT::f32 && SrcVT == MVT::i32)
4422     Opc = AArch64::FMOVWSr;
4423   else if (RetVT == MVT::f64 && SrcVT == MVT::i64)
4424     Opc = AArch64::FMOVXDr;
4425   else if (RetVT == MVT::i32 && SrcVT == MVT::f32)
4426     Opc = AArch64::FMOVSWr;
4427   else if (RetVT == MVT::i64 && SrcVT == MVT::f64)
4428     Opc = AArch64::FMOVDXr;
4429   else
4430     return false;
4431
4432   const TargetRegisterClass *RC = nullptr;
4433   switch (RetVT.SimpleTy) {
4434   default: llvm_unreachable("Unexpected value type.");
4435   case MVT::i32: RC = &AArch64::GPR32RegClass; break;
4436   case MVT::i64: RC = &AArch64::GPR64RegClass; break;
4437   case MVT::f32: RC = &AArch64::FPR32RegClass; break;
4438   case MVT::f64: RC = &AArch64::FPR64RegClass; break;
4439   }
4440   unsigned Op0Reg = getRegForValue(I->getOperand(0));
4441   if (!Op0Reg)
4442     return false;
4443   bool Op0IsKill = hasTrivialKill(I->getOperand(0));
4444   unsigned ResultReg = fastEmitInst_r(Opc, RC, Op0Reg, Op0IsKill);
4445
4446   if (!ResultReg)
4447     return false;
4448
4449   updateValueMap(I, ResultReg);
4450   return true;
4451 }
4452
4453 bool AArch64FastISel::selectFRem(const Instruction *I) {
4454   MVT RetVT;
4455   if (!isTypeLegal(I->getType(), RetVT))
4456     return false;
4457
4458   RTLIB::Libcall LC;
4459   switch (RetVT.SimpleTy) {
4460   default:
4461     return false;
4462   case MVT::f32:
4463     LC = RTLIB::REM_F32;
4464     break;
4465   case MVT::f64:
4466     LC = RTLIB::REM_F64;
4467     break;
4468   }
4469
4470   ArgListTy Args;
4471   Args.reserve(I->getNumOperands());
4472
4473   // Populate the argument list.
4474   for (auto &Arg : I->operands()) {
4475     ArgListEntry Entry;
4476     Entry.Val = Arg;
4477     Entry.Ty = Arg->getType();
4478     Args.push_back(Entry);
4479   }
4480
4481   CallLoweringInfo CLI;
4482   CLI.setCallee(TLI.getLibcallCallingConv(LC), I->getType(),
4483                 TLI.getLibcallName(LC), std::move(Args));
4484   if (!lowerCallTo(CLI))
4485     return false;
4486   updateValueMap(I, CLI.ResultReg);
4487   return true;
4488 }
4489
4490 bool AArch64FastISel::selectSDiv(const Instruction *I) {
4491   MVT VT;
4492   if (!isTypeLegal(I->getType(), VT))
4493     return false;
4494
4495   if (!isa<ConstantInt>(I->getOperand(1)))
4496     return selectBinaryOp(I, ISD::SDIV);
4497
4498   const APInt &C = cast<ConstantInt>(I->getOperand(1))->getValue();
4499   if ((VT != MVT::i32 && VT != MVT::i64) || !C ||
4500       !(C.isPowerOf2() || (-C).isPowerOf2()))
4501     return selectBinaryOp(I, ISD::SDIV);
4502
4503   unsigned Lg2 = C.countTrailingZeros();
4504   unsigned Src0Reg = getRegForValue(I->getOperand(0));
4505   if (!Src0Reg)
4506     return false;
4507   bool Src0IsKill = hasTrivialKill(I->getOperand(0));
4508
4509   if (cast<BinaryOperator>(I)->isExact()) {
4510     unsigned ResultReg = emitASR_ri(VT, VT, Src0Reg, Src0IsKill, Lg2);
4511     if (!ResultReg)
4512       return false;
4513     updateValueMap(I, ResultReg);
4514     return true;
4515   }
4516
4517   unsigned Pow2MinusOne = (1 << Lg2) - 1;
4518   unsigned AddReg = emitAddSub_ri(/*UseAdd=*/true, VT, Src0Reg,
4519                                   /*IsKill=*/false, Pow2MinusOne);
4520   if (!AddReg)
4521     return false;
4522
4523   // (Src0 < 0) ? Pow2 - 1 : 0;
4524   if (!emitICmp_ri(VT, Src0Reg, /*IsKill=*/false, 0))
4525     return false;
4526
4527   unsigned SelectOpc;
4528   const TargetRegisterClass *RC;
4529   if (VT == MVT::i64) {
4530     SelectOpc = AArch64::CSELXr;
4531     RC = &AArch64::GPR64RegClass;
4532   } else {
4533     SelectOpc = AArch64::CSELWr;
4534     RC = &AArch64::GPR32RegClass;
4535   }
4536   unsigned SelectReg =
4537       fastEmitInst_rri(SelectOpc, RC, AddReg, /*IsKill=*/true, Src0Reg,
4538                        Src0IsKill, AArch64CC::LT);
4539   if (!SelectReg)
4540     return false;
4541
4542   // Divide by Pow2 --> ashr. If we're dividing by a negative value we must also
4543   // negate the result.
4544   unsigned ZeroReg = (VT == MVT::i64) ? AArch64::XZR : AArch64::WZR;
4545   unsigned ResultReg;
4546   if (C.isNegative())
4547     ResultReg = emitAddSub_rs(/*UseAdd=*/false, VT, ZeroReg, /*IsKill=*/true,
4548                               SelectReg, /*IsKill=*/true, AArch64_AM::ASR, Lg2);
4549   else
4550     ResultReg = emitASR_ri(VT, VT, SelectReg, /*IsKill=*/true, Lg2);
4551
4552   if (!ResultReg)
4553     return false;
4554
4555   updateValueMap(I, ResultReg);
4556   return true;
4557 }
4558
4559 bool AArch64FastISel::fastSelectInstruction(const Instruction *I) {
4560   switch (I->getOpcode()) {
4561   default:
4562     break;
4563   case Instruction::Add:
4564   case Instruction::Sub:
4565     return selectAddSub(I);
4566   case Instruction::Mul:
4567     return selectMul(I);
4568   case Instruction::SDiv:
4569     return selectSDiv(I);
4570   case Instruction::SRem:
4571     if (!selectBinaryOp(I, ISD::SREM))
4572       return selectRem(I, ISD::SREM);
4573     return true;
4574   case Instruction::URem:
4575     if (!selectBinaryOp(I, ISD::UREM))
4576       return selectRem(I, ISD::UREM);
4577     return true;
4578   case Instruction::Shl:
4579   case Instruction::LShr:
4580   case Instruction::AShr:
4581     return selectShift(I);
4582   case Instruction::And:
4583   case Instruction::Or:
4584   case Instruction::Xor:
4585     return selectLogicalOp(I);
4586   case Instruction::Br:
4587     return selectBranch(I);
4588   case Instruction::IndirectBr:
4589     return selectIndirectBr(I);
4590   case Instruction::BitCast:
4591     if (!FastISel::selectBitCast(I))
4592       return selectBitCast(I);
4593     return true;
4594   case Instruction::FPToSI:
4595     if (!selectCast(I, ISD::FP_TO_SINT))
4596       return selectFPToInt(I, /*Signed=*/true);
4597     return true;
4598   case Instruction::FPToUI:
4599     return selectFPToInt(I, /*Signed=*/false);
4600   case Instruction::ZExt:
4601   case Instruction::SExt:
4602     return selectIntExt(I);
4603   case Instruction::Trunc:
4604     if (!selectCast(I, ISD::TRUNCATE))
4605       return selectTrunc(I);
4606     return true;
4607   case Instruction::FPExt:
4608     return selectFPExt(I);
4609   case Instruction::FPTrunc:
4610     return selectFPTrunc(I);
4611   case Instruction::SIToFP:
4612     if (!selectCast(I, ISD::SINT_TO_FP))
4613       return selectIntToFP(I, /*Signed=*/true);
4614     return true;
4615   case Instruction::UIToFP:
4616     return selectIntToFP(I, /*Signed=*/false);
4617   case Instruction::Load:
4618     return selectLoad(I);
4619   case Instruction::Store:
4620     return selectStore(I);
4621   case Instruction::FCmp:
4622   case Instruction::ICmp:
4623     return selectCmp(I);
4624   case Instruction::Select:
4625     return selectSelect(I);
4626   case Instruction::Ret:
4627     return selectRet(I);
4628   case Instruction::FRem:
4629     return selectFRem(I);
4630   }
4631
4632   // fall-back to target-independent instruction selection.
4633   return selectOperator(I, I->getOpcode());
4634   // Silence warnings.
4635   (void)&CC_AArch64_DarwinPCS_VarArg;
4636 }
4637
4638 namespace llvm {
4639 llvm::FastISel *AArch64::createFastISel(FunctionLoweringInfo &FuncInfo,
4640                                         const TargetLibraryInfo *LibInfo) {
4641   return new AArch64FastISel(FuncInfo, LibInfo);
4642 }
4643 }