lib/Target/AArch64/AArch64FastISel.cpp

   1 //===-- AArch6464FastISel.cpp - AArch64 FastISel implementation -----------===//
   2 //
   3 //                     The LLVM Compiler Infrastructure
   4 //
   5 // This file is distributed under the University of Illinois Open Source
   6 // License. See LICENSE.TXT for details.
   7 //
   8 //===----------------------------------------------------------------------===//
   9 //
  10 // This file defines the AArch64-specific support for the FastISel class. Some
  11 // of the target-specific code is generated by tablegen in the file
  12 // AArch64GenFastISel.inc, which is #included here.
  13 //
  14 //===----------------------------------------------------------------------===//
  15
  16 #include "AArch64.h"
  17 #include "AArch64Subtarget.h"
  18 #include "AArch64TargetMachine.h"
  19 #include "MCTargetDesc/AArch64AddressingModes.h"
  20 #include "llvm/Analysis/BranchProbabilityInfo.h"
  21 #include "llvm/CodeGen/CallingConvLower.h"
  22 #include "llvm/CodeGen/FastISel.h"
  23 #include "llvm/CodeGen/FunctionLoweringInfo.h"
  24 #include "llvm/CodeGen/MachineConstantPool.h"
  25 #include "llvm/CodeGen/MachineFrameInfo.h"
  26 #include "llvm/CodeGen/MachineInstrBuilder.h"
  27 #include "llvm/CodeGen/MachineRegisterInfo.h"
  28 #include "llvm/IR/CallingConv.h"
  29 #include "llvm/IR/DataLayout.h"
  30 #include "llvm/IR/DerivedTypes.h"
  31 #include "llvm/IR/Function.h"
  32 #include "llvm/IR/GetElementPtrTypeIterator.h"
  33 #include "llvm/IR/GlobalAlias.h"
  34 #include "llvm/IR/GlobalVariable.h"
  35 #include "llvm/IR/Instructions.h"
  36 #include "llvm/IR/IntrinsicInst.h"
  37 #include "llvm/IR/Operator.h"
  38 #include "llvm/Support/CommandLine.h"
  39 using namespace llvm;
  40
  41 namespace {
  42
  43 class AArch64FastISel final : public FastISel {
  44   class Address {
  45   public:
  46     typedef enum {
  47       RegBase,
  48       FrameIndexBase
  49     } BaseKind;
  50
  51   private:
  52     BaseKind Kind;
  53     AArch64_AM::ShiftExtendType ExtType;
  54     union {
  55       unsigned Reg;
  56       int FI;
  57     } Base;
  58     unsigned OffsetReg;
  59     unsigned Shift;
  60     int64_t Offset;
  61     const GlobalValue *GV;
  62
  63   public:
  64     Address() : Kind(RegBase), ExtType(AArch64_AM::InvalidShiftExtend),
  65       OffsetReg(0), Shift(0), Offset(0), GV(nullptr) { Base.Reg = 0; }
  66     void setKind(BaseKind K) { Kind = K; }
  67     BaseKind getKind() const { return Kind; }
  68     void setExtendType(AArch64_AM::ShiftExtendType E) { ExtType = E; }
  69     AArch64_AM::ShiftExtendType getExtendType() const { return ExtType; }
  70     bool isRegBase() const { return Kind == RegBase; }
  71     bool isFIBase() const { return Kind == FrameIndexBase; }
  72     void setReg(unsigned Reg) {
  73       assert(isRegBase() && "Invalid base register access!");
  74       Base.Reg = Reg;
  75     }
  76     unsigned getReg() const {
  77       assert(isRegBase() && "Invalid base register access!");
  78       return Base.Reg;
  79     }
  80     void setOffsetReg(unsigned Reg) {
  81       OffsetReg = Reg;
  82     }
  83     unsigned getOffsetReg() const {
  84       return OffsetReg;
  85     }
  86     void setFI(unsigned FI) {
  87       assert(isFIBase() && "Invalid base frame index  access!");
  88       Base.FI = FI;
  89     }
  90     unsigned getFI() const {
  91       assert(isFIBase() && "Invalid base frame index access!");
  92       return Base.FI;
  93     }
  94     void setOffset(int64_t O) { Offset = O; }
  95     int64_t getOffset() { return Offset; }
  96     void setShift(unsigned S) { Shift = S; }
  97     unsigned getShift() { return Shift; }
  98
  99     void setGlobalValue(const GlobalValue *G) { GV = G; }
 100     const GlobalValue *getGlobalValue() { return GV; }
 101   };
 102
 103   /// Subtarget - Keep a pointer to the AArch64Subtarget around so that we can
 104   /// make the right decision when generating code for different targets.
 105   const AArch64Subtarget *Subtarget;
 106   LLVMContext *Context;
 107
 108   bool fastLowerArguments() override;
 109   bool fastLowerCall(CallLoweringInfo &CLI) override;
 110   bool fastLowerIntrinsicCall(const IntrinsicInst *II) override;
 111
 112 private:
 113   // Selection routines.
 114   bool selectAddSub(const Instruction *I);
 115   bool selectLogicalOp(const Instruction *I);
 116   bool selectLoad(const Instruction *I);
 117   bool selectStore(const Instruction *I);
 118   bool selectBranch(const Instruction *I);
 119   bool selectIndirectBr(const Instruction *I);
 120   bool selectCmp(const Instruction *I);
 121   bool selectSelect(const Instruction *I);
 122   bool selectFPExt(const Instruction *I);
 123   bool selectFPTrunc(const Instruction *I);
 124   bool selectFPToInt(const Instruction *I, bool Signed);
 125   bool selectIntToFP(const Instruction *I, bool Signed);
 126   bool selectRem(const Instruction *I, unsigned ISDOpcode);
 127   bool selectRet(const Instruction *I);
 128   bool selectTrunc(const Instruction *I);
 129   bool selectIntExt(const Instruction *I);
 130   bool selectMul(const Instruction *I);
 131   bool selectShift(const Instruction *I);
 132   bool selectBitCast(const Instruction *I);
 133   bool selectFRem(const Instruction *I);
 134   bool selectSDiv(const Instruction *I);
 135   bool selectGetElementPtr(const Instruction *I);
 136
 137   // Utility helper routines.
 138   bool isTypeLegal(Type *Ty, MVT &VT);
 139   bool isTypeSupported(Type *Ty, MVT &VT, bool IsVectorAllowed = false);
 140   bool isValueAvailable(const Value *V) const;
 141   bool computeAddress(const Value *Obj, Address &Addr, Type *Ty = nullptr);
 142   bool computeCallAddress(const Value *V, Address &Addr);
 143   bool simplifyAddress(Address &Addr, MVT VT);
 144   void addLoadStoreOperands(Address &Addr, const MachineInstrBuilder &MIB,
 145                             unsigned Flags, unsigned ScaleFactor,
 146                             MachineMemOperand *MMO);
 147   bool isMemCpySmall(uint64_t Len, unsigned Alignment);
 148   bool tryEmitSmallMemCpy(Address Dest, Address Src, uint64_t Len,
 149                           unsigned Alignment);
 150   bool foldXALUIntrinsic(AArch64CC::CondCode &CC, const Instruction *I,
 151                          const Value *Cond);
 152   bool optimizeIntExtLoad(const Instruction *I, MVT RetVT, MVT SrcVT);
 153   bool optimizeSelect(const SelectInst *SI);
 154   std::pair<unsigned, bool> getRegForGEPIndex(const Value *Idx);
 155
 156   // Emit helper routines.
 157   unsigned emitAddSub(bool UseAdd, MVT RetVT, const Value *LHS,
 158                       const Value *RHS, bool SetFlags = false,
 159                       bool WantResult = true,  bool IsZExt = false);
 160   unsigned emitAddSub_rr(bool UseAdd, MVT RetVT, unsigned LHSReg,
 161                          bool LHSIsKill, unsigned RHSReg, bool RHSIsKill,
 162                          bool SetFlags = false, bool WantResult = true);
 163   unsigned emitAddSub_ri(bool UseAdd, MVT RetVT, unsigned LHSReg,
 164                          bool LHSIsKill, uint64_t Imm, bool SetFlags = false,
 165                          bool WantResult = true);
 166   unsigned emitAddSub_rs(bool UseAdd, MVT RetVT, unsigned LHSReg,
 167                          bool LHSIsKill, unsigned RHSReg, bool RHSIsKill,
 168                          AArch64_AM::ShiftExtendType ShiftType,
 169                          uint64_t ShiftImm, bool SetFlags = false,
 170                          bool WantResult = true);
 171   unsigned emitAddSub_rx(bool UseAdd, MVT RetVT, unsigned LHSReg,
 172                          bool LHSIsKill, unsigned RHSReg, bool RHSIsKill,
 173                           AArch64_AM::ShiftExtendType ExtType,
 174                           uint64_t ShiftImm, bool SetFlags = false,
 175                          bool WantResult = true);
 176
 177   // Emit functions.
 178   bool emitCompareAndBranch(const BranchInst *BI);
 179   bool emitCmp(const Value *LHS, const Value *RHS, bool IsZExt);
 180   bool emitICmp(MVT RetVT, const Value *LHS, const Value *RHS, bool IsZExt);
 181   bool emitICmp_ri(MVT RetVT, unsigned LHSReg, bool LHSIsKill, uint64_t Imm);
 182   bool emitFCmp(MVT RetVT, const Value *LHS, const Value *RHS);
 183   unsigned emitLoad(MVT VT, MVT ResultVT, Address Addr, bool WantZExt = true,
 184                     MachineMemOperand *MMO = nullptr);
 185   bool emitStore(MVT VT, unsigned SrcReg, Address Addr,
 186                  MachineMemOperand *MMO = nullptr);
 187   unsigned emitIntExt(MVT SrcVT, unsigned SrcReg, MVT DestVT, bool isZExt);
 188   unsigned emiti1Ext(unsigned SrcReg, MVT DestVT, bool isZExt);
 189   unsigned emitAdd(MVT RetVT, const Value *LHS, const Value *RHS,
 190                    bool SetFlags = false, bool WantResult = true,
 191                    bool IsZExt = false);
 192   unsigned emitAdd_ri_(MVT VT, unsigned Op0, bool Op0IsKill, int64_t Imm);
 193   unsigned emitSub(MVT RetVT, const Value *LHS, const Value *RHS,
 194                    bool SetFlags = false, bool WantResult = true,
 195                    bool IsZExt = false);
 196   unsigned emitSubs_rr(MVT RetVT, unsigned LHSReg, bool LHSIsKill,
 197                        unsigned RHSReg, bool RHSIsKill, bool WantResult = true);
 198   unsigned emitSubs_rs(MVT RetVT, unsigned LHSReg, bool LHSIsKill,
 199                        unsigned RHSReg, bool RHSIsKill,
 200                        AArch64_AM::ShiftExtendType ShiftType, uint64_t ShiftImm,
 201                        bool WantResult = true);
 202   unsigned emitLogicalOp(unsigned ISDOpc, MVT RetVT, const Value *LHS,
 203                          const Value *RHS);
 204   unsigned emitLogicalOp_ri(unsigned ISDOpc, MVT RetVT, unsigned LHSReg,
 205                             bool LHSIsKill, uint64_t Imm);
 206   unsigned emitLogicalOp_rs(unsigned ISDOpc, MVT RetVT, unsigned LHSReg,
 207                             bool LHSIsKill, unsigned RHSReg, bool RHSIsKill,
 208                             uint64_t ShiftImm);
 209   unsigned emitAnd_ri(MVT RetVT, unsigned LHSReg, bool LHSIsKill, uint64_t Imm);
 210   unsigned emitMul_rr(MVT RetVT, unsigned Op0, bool Op0IsKill,
 211                       unsigned Op1, bool Op1IsKill);
 212   unsigned emitSMULL_rr(MVT RetVT, unsigned Op0, bool Op0IsKill,
 213                         unsigned Op1, bool Op1IsKill);
 214   unsigned emitUMULL_rr(MVT RetVT, unsigned Op0, bool Op0IsKill,
 215                         unsigned Op1, bool Op1IsKill);
 216   unsigned emitLSL_rr(MVT RetVT, unsigned Op0Reg, bool Op0IsKill,
 217                       unsigned Op1Reg, bool Op1IsKill);
 218   unsigned emitLSL_ri(MVT RetVT, MVT SrcVT, unsigned Op0Reg, bool Op0IsKill,
 219                       uint64_t Imm, bool IsZExt = true);
 220   unsigned emitLSR_rr(MVT RetVT, unsigned Op0Reg, bool Op0IsKill,
 221                       unsigned Op1Reg, bool Op1IsKill);
 222   unsigned emitLSR_ri(MVT RetVT, MVT SrcVT, unsigned Op0Reg, bool Op0IsKill,
 223                       uint64_t Imm, bool IsZExt = true);
 224   unsigned emitASR_rr(MVT RetVT, unsigned Op0Reg, bool Op0IsKill,
 225                       unsigned Op1Reg, bool Op1IsKill);
 226   unsigned emitASR_ri(MVT RetVT, MVT SrcVT, unsigned Op0Reg, bool Op0IsKill,
 227                       uint64_t Imm, bool IsZExt = false);
 228
 229   unsigned materializeInt(const ConstantInt *CI, MVT VT);
 230   unsigned materializeFP(const ConstantFP *CFP, MVT VT);
 231   unsigned materializeGV(const GlobalValue *GV);
 232
 233   // Call handling routines.
 234 private:
 235   CCAssignFn *CCAssignFnForCall(CallingConv::ID CC) const;
 236   bool processCallArgs(CallLoweringInfo &CLI, SmallVectorImpl<MVT> &ArgVTs,
 237                        unsigned &NumBytes);
 238   bool finishCall(CallLoweringInfo &CLI, MVT RetVT, unsigned NumBytes);
 239
 240 public:
 241   // Backend specific FastISel code.
 242   unsigned fastMaterializeAlloca(const AllocaInst *AI) override;
 243   unsigned fastMaterializeConstant(const Constant *C) override;
 244   unsigned fastMaterializeFloatZero(const ConstantFP* CF) override;
 245
 246   explicit AArch64FastISel(FunctionLoweringInfo &FuncInfo,
 247                          const TargetLibraryInfo *LibInfo)
 248       : FastISel(FuncInfo, LibInfo, /*SkipTargetIndependentISel=*/true) {
 249     Subtarget = &TM.getSubtarget<AArch64Subtarget>();
 250     Context = &FuncInfo.Fn->getContext();
 251   }
 252
 253   bool fastSelectInstruction(const Instruction *I) override;
 254
 255 #include "AArch64GenFastISel.inc"
 256 };
 257
 258 } // end anonymous namespace
 259
 260 #include "AArch64GenCallingConv.inc"
 261
 262 /// \brief Check if the sign-/zero-extend will be a noop.
 263 static bool isIntExtFree(const Instruction *I) {
 264   assert((isa<ZExtInst>(I) || isa<SExtInst>(I)) &&
 265          "Unexpected integer extend instruction.");
 266   assert(!I->getType()->isVectorTy() && I->getType()->isIntegerTy() &&
 267          "Unexpected value type.");
 268   bool IsZExt = isa<ZExtInst>(I);
 269
 270   if (const auto *LI = dyn_cast<LoadInst>(I->getOperand(0)))
 271     if (LI->hasOneUse())
 272       return true;
 273
 274   if (const auto *Arg = dyn_cast<Argument>(I->getOperand(0)))
 275     if ((IsZExt && Arg->hasZExtAttr()) || (!IsZExt && Arg->hasSExtAttr()))
 276       return true;
 277
 278   return false;
 279 }
 280
 281 /// \brief Determine the implicit scale factor that is applied by a memory
 282 /// operation for a given value type.
 283 static unsigned getImplicitScaleFactor(MVT VT) {
 284   switch (VT.SimpleTy) {
 285   default:
 286     return 0;    // invalid
 287   case MVT::i1:  // fall-through
 288   case MVT::i8:
 289     return 1;
 290   case MVT::i16:
 291     return 2;
 292   case MVT::i32: // fall-through
 293   case MVT::f32:
 294     return 4;
 295   case MVT::i64: // fall-through
 296   case MVT::f64:
 297     return 8;
 298   }
 299 }
 300
 301 CCAssignFn *AArch64FastISel::CCAssignFnForCall(CallingConv::ID CC) const {
 302   if (CC == CallingConv::WebKit_JS)
 303     return CC_AArch64_WebKit_JS;
 304   return Subtarget->isTargetDarwin() ? CC_AArch64_DarwinPCS : CC_AArch64_AAPCS;
 305 }
 306
 307 unsigned AArch64FastISel::fastMaterializeAlloca(const AllocaInst *AI) {
 308   assert(TLI.getValueType(AI->getType(), true) == MVT::i64 &&
 309          "Alloca should always return a pointer.");
 310
 311   // Don't handle dynamic allocas.
 312   if (!FuncInfo.StaticAllocaMap.count(AI))
 313     return 0;
 314
 315   DenseMap<const AllocaInst *, int>::iterator SI =
 316       FuncInfo.StaticAllocaMap.find(AI);
 317
 318   if (SI != FuncInfo.StaticAllocaMap.end()) {
 319     unsigned ResultReg = createResultReg(&AArch64::GPR64spRegClass);
 320     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADDXri),
 321             ResultReg)
 322         .addFrameIndex(SI->second)
 323         .addImm(0)
 324         .addImm(0);
 325     return ResultReg;
 326   }
 327
 328   return 0;
 329 }
 330
 331 unsigned AArch64FastISel::materializeInt(const ConstantInt *CI, MVT VT) {
 332   if (VT > MVT::i64)
 333     return 0;
 334
 335   if (!CI->isZero())
 336     return fastEmit_i(VT, VT, ISD::Constant, CI->getZExtValue());
 337
 338   // Create a copy from the zero register to materialize a "0" value.
 339   const TargetRegisterClass *RC = (VT == MVT::i64) ? &AArch64::GPR64RegClass
 340                                                    : &AArch64::GPR32RegClass;
 341   unsigned ZeroReg = (VT == MVT::i64) ? AArch64::XZR : AArch64::WZR;
 342   unsigned ResultReg = createResultReg(RC);
 343   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(TargetOpcode::COPY),
 344           ResultReg).addReg(ZeroReg, getKillRegState(true));
 345   return ResultReg;
 346 }
 347
 348 unsigned AArch64FastISel::materializeFP(const ConstantFP *CFP, MVT VT) {
 349   // Positive zero (+0.0) has to be materialized with a fmov from the zero
 350   // register, because the immediate version of fmov cannot encode zero.
 351   if (CFP->isNullValue())
 352     return fastMaterializeFloatZero(CFP);
 353
 354   if (VT != MVT::f32 && VT != MVT::f64)
 355     return 0;
 356
 357   const APFloat Val = CFP->getValueAPF();
 358   bool Is64Bit = (VT == MVT::f64);
 359   // This checks to see if we can use FMOV instructions to materialize
 360   // a constant, otherwise we have to materialize via the constant pool.
 361   if (TLI.isFPImmLegal(Val, VT)) {
 362     int Imm =
 363         Is64Bit ? AArch64_AM::getFP64Imm(Val) : AArch64_AM::getFP32Imm(Val);
 364     assert((Imm != -1) && "Cannot encode floating-point constant.");
 365     unsigned Opc = Is64Bit ? AArch64::FMOVDi : AArch64::FMOVSi;
 366     return fastEmitInst_i(Opc, TLI.getRegClassFor(VT), Imm);
 367   }
 368
 369   // Materialize via constant pool.  MachineConstantPool wants an explicit
 370   // alignment.
 371   unsigned Align = DL.getPrefTypeAlignment(CFP->getType());
 372   if (Align == 0)
 373     Align = DL.getTypeAllocSize(CFP->getType());
 374
 375   unsigned CPI = MCP.getConstantPoolIndex(cast<Constant>(CFP), Align);
 376   unsigned ADRPReg = createResultReg(&AArch64::GPR64commonRegClass);
 377   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADRP),
 378           ADRPReg).addConstantPoolIndex(CPI, 0, AArch64II::MO_PAGE);
 379
 380   unsigned Opc = Is64Bit ? AArch64::LDRDui : AArch64::LDRSui;
 381   unsigned ResultReg = createResultReg(TLI.getRegClassFor(VT));
 382   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg)
 383       .addReg(ADRPReg)
 384       .addConstantPoolIndex(CPI, 0, AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
 385   return ResultReg;
 386 }
 387
 388 unsigned AArch64FastISel::materializeGV(const GlobalValue *GV) {
 389   // We can't handle thread-local variables quickly yet.
 390   if (GV->isThreadLocal())
 391     return 0;
 392
 393   // MachO still uses GOT for large code-model accesses, but ELF requires
 394   // movz/movk sequences, which FastISel doesn't handle yet.
 395   if (TM.getCodeModel() != CodeModel::Small && !Subtarget->isTargetMachO())
 396     return 0;
 397
 398   unsigned char OpFlags = Subtarget->ClassifyGlobalReference(GV, TM);
 399
 400   EVT DestEVT = TLI.getValueType(GV->getType(), true);
 401   if (!DestEVT.isSimple())
 402     return 0;
 403
 404   unsigned ADRPReg = createResultReg(&AArch64::GPR64commonRegClass);
 405   unsigned ResultReg;
 406
 407   if (OpFlags & AArch64II::MO_GOT) {
 408     // ADRP + LDRX
 409     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADRP),
 410             ADRPReg)
 411       .addGlobalAddress(GV, 0, AArch64II::MO_GOT | AArch64II::MO_PAGE);
 412
 413     ResultReg = createResultReg(&AArch64::GPR64RegClass);
 414     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::LDRXui),
 415             ResultReg)
 416       .addReg(ADRPReg)
 417       .addGlobalAddress(GV, 0, AArch64II::MO_GOT | AArch64II::MO_PAGEOFF |
 418                         AArch64II::MO_NC);
 419   } else if (OpFlags & AArch64II::MO_CONSTPOOL) {
 420     // We can't handle addresses loaded from a constant pool quickly yet.
 421     return 0;
 422   } else {
 423     // ADRP + ADDX
 424     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADRP),
 425             ADRPReg)
 426       .addGlobalAddress(GV, 0, AArch64II::MO_PAGE);
 427
 428     ResultReg = createResultReg(&AArch64::GPR64spRegClass);
 429     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADDXri),
 430             ResultReg)
 431       .addReg(ADRPReg)
 432       .addGlobalAddress(GV, 0, AArch64II::MO_PAGEOFF | AArch64II::MO_NC)
 433       .addImm(0);
 434   }
 435   return ResultReg;
 436 }
 437
 438 unsigned AArch64FastISel::fastMaterializeConstant(const Constant *C) {
 439   EVT CEVT = TLI.getValueType(C->getType(), true);
 440
 441   // Only handle simple types.
 442   if (!CEVT.isSimple())
 443     return 0;
 444   MVT VT = CEVT.getSimpleVT();
 445
 446   if (const auto *CI = dyn_cast<ConstantInt>(C))
 447     return materializeInt(CI, VT);
 448   else if (const ConstantFP *CFP = dyn_cast<ConstantFP>(C))
 449     return materializeFP(CFP, VT);
 450   else if (const GlobalValue *GV = dyn_cast<GlobalValue>(C))
 451     return materializeGV(GV);
 452
 453   return 0;
 454 }
 455
 456 unsigned AArch64FastISel::fastMaterializeFloatZero(const ConstantFP* CFP) {
 457   assert(CFP->isNullValue() &&
 458          "Floating-point constant is not a positive zero.");
 459   MVT VT;
 460   if (!isTypeLegal(CFP->getType(), VT))
 461     return 0;
 462
 463   if (VT != MVT::f32 && VT != MVT::f64)
 464     return 0;
 465
 466   bool Is64Bit = (VT == MVT::f64);
 467   unsigned ZReg = Is64Bit ? AArch64::XZR : AArch64::WZR;
 468   unsigned Opc = Is64Bit ? AArch64::FMOVXDr : AArch64::FMOVWSr;
 469   return fastEmitInst_r(Opc, TLI.getRegClassFor(VT), ZReg, /*IsKill=*/true);
 470 }
 471
 472 /// \brief Check if the multiply is by a power-of-2 constant.
 473 static bool isMulPowOf2(const Value *I) {
 474   if (const auto *MI = dyn_cast<MulOperator>(I)) {
 475     if (const auto *C = dyn_cast<ConstantInt>(MI->getOperand(0)))
 476       if (C->getValue().isPowerOf2())
 477         return true;
 478     if (const auto *C = dyn_cast<ConstantInt>(MI->getOperand(1)))
 479       if (C->getValue().isPowerOf2())
 480         return true;
 481   }
 482   return false;
 483 }
 484
 485 // Computes the address to get to an object.
 486 bool AArch64FastISel::computeAddress(const Value *Obj, Address &Addr, Type *Ty)
 487 {
 488   const User *U = nullptr;
 489   unsigned Opcode = Instruction::UserOp1;
 490   if (const Instruction *I = dyn_cast<Instruction>(Obj)) {
 491     // Don't walk into other basic blocks unless the object is an alloca from
 492     // another block, otherwise it may not have a virtual register assigned.
 493     if (FuncInfo.StaticAllocaMap.count(static_cast<const AllocaInst *>(Obj)) ||
 494         FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB) {
 495       Opcode = I->getOpcode();
 496       U = I;
 497     }
 498   } else if (const ConstantExpr *C = dyn_cast<ConstantExpr>(Obj)) {
 499     Opcode = C->getOpcode();
 500     U = C;
 501   }
 502
 503   if (const PointerType *Ty = dyn_cast<PointerType>(Obj->getType()))
 504     if (Ty->getAddressSpace() > 255)
 505       // Fast instruction selection doesn't support the special
 506       // address spaces.
 507       return false;
 508
 509   switch (Opcode) {
 510   default:
 511     break;
 512   case Instruction::BitCast: {
 513     // Look through bitcasts.
 514     return computeAddress(U->getOperand(0), Addr, Ty);
 515   }
 516   case Instruction::IntToPtr: {
 517     // Look past no-op inttoptrs.
 518     if (TLI.getValueType(U->getOperand(0)->getType()) == TLI.getPointerTy())
 519       return computeAddress(U->getOperand(0), Addr, Ty);
 520     break;
 521   }
 522   case Instruction::PtrToInt: {
 523     // Look past no-op ptrtoints.
 524     if (TLI.getValueType(U->getType()) == TLI.getPointerTy())
 525       return computeAddress(U->getOperand(0), Addr, Ty);
 526     break;
 527   }
 528   case Instruction::GetElementPtr: {
 529     Address SavedAddr = Addr;
 530     uint64_t TmpOffset = Addr.getOffset();
 531
 532     // Iterate through the GEP folding the constants into offsets where
 533     // we can.
 534     gep_type_iterator GTI = gep_type_begin(U);
 535     for (User::const_op_iterator i = U->op_begin() + 1, e = U->op_end(); i != e;
 536          ++i, ++GTI) {
 537       const Value *Op = *i;
 538       if (StructType *STy = dyn_cast<StructType>(*GTI)) {
 539         const StructLayout *SL = DL.getStructLayout(STy);
 540         unsigned Idx = cast<ConstantInt>(Op)->getZExtValue();
 541         TmpOffset += SL->getElementOffset(Idx);
 542       } else {
 543         uint64_t S = DL.getTypeAllocSize(GTI.getIndexedType());
 544         for (;;) {
 545           if (const ConstantInt *CI = dyn_cast<ConstantInt>(Op)) {
 546             // Constant-offset addressing.
 547             TmpOffset += CI->getSExtValue() * S;
 548             break;
 549           }
 550           if (canFoldAddIntoGEP(U, Op)) {
 551             // A compatible add with a constant operand. Fold the constant.
 552             ConstantInt *CI =
 553                 cast<ConstantInt>(cast<AddOperator>(Op)->getOperand(1));
 554             TmpOffset += CI->getSExtValue() * S;
 555             // Iterate on the other operand.
 556             Op = cast<AddOperator>(Op)->getOperand(0);
 557             continue;
 558           }
 559           // Unsupported
 560           goto unsupported_gep;
 561         }
 562       }
 563     }
 564
 565     // Try to grab the base operand now.
 566     Addr.setOffset(TmpOffset);
 567     if (computeAddress(U->getOperand(0), Addr, Ty))
 568       return true;
 569
 570     // We failed, restore everything and try the other options.
 571     Addr = SavedAddr;
 572
 573   unsupported_gep:
 574     break;
 575   }
 576   case Instruction::Alloca: {
 577     const AllocaInst *AI = cast<AllocaInst>(Obj);
 578     DenseMap<const AllocaInst *, int>::iterator SI =
 579         FuncInfo.StaticAllocaMap.find(AI);
 580     if (SI != FuncInfo.StaticAllocaMap.end()) {
 581       Addr.setKind(Address::FrameIndexBase);
 582       Addr.setFI(SI->second);
 583       return true;
 584     }
 585     break;
 586   }
 587   case Instruction::Add: {
 588     // Adds of constants are common and easy enough.
 589     const Value *LHS = U->getOperand(0);
 590     const Value *RHS = U->getOperand(1);
 591
 592     if (isa<ConstantInt>(LHS))
 593       std::swap(LHS, RHS);
 594
 595     if (const ConstantInt *CI = dyn_cast<ConstantInt>(RHS)) {
 596       Addr.setOffset(Addr.getOffset() + CI->getSExtValue());
 597       return computeAddress(LHS, Addr, Ty);
 598     }
 599
 600     Address Backup = Addr;
 601     if (computeAddress(LHS, Addr, Ty) && computeAddress(RHS, Addr, Ty))
 602       return true;
 603     Addr = Backup;
 604
 605     break;
 606   }
 607   case Instruction::Sub: {
 608     // Subs of constants are common and easy enough.
 609     const Value *LHS = U->getOperand(0);
 610     const Value *RHS = U->getOperand(1);
 611
 612     if (const ConstantInt *CI = dyn_cast<ConstantInt>(RHS)) {
 613       Addr.setOffset(Addr.getOffset() - CI->getSExtValue());
 614       return computeAddress(LHS, Addr, Ty);
 615     }
 616     break;
 617   }
 618   case Instruction::Shl: {
 619     if (Addr.getOffsetReg())
 620       break;
 621
 622     const auto *CI = dyn_cast<ConstantInt>(U->getOperand(1));
 623     if (!CI)
 624       break;
 625
 626     unsigned Val = CI->getZExtValue();
 627     if (Val < 1 || Val > 3)
 628       break;
 629
 630     uint64_t NumBytes = 0;
 631     if (Ty && Ty->isSized()) {
 632       uint64_t NumBits = DL.getTypeSizeInBits(Ty);
 633       NumBytes = NumBits / 8;
 634       if (!isPowerOf2_64(NumBits))
 635         NumBytes = 0;
 636     }
 637
 638     if (NumBytes != (1ULL << Val))
 639       break;
 640
 641     Addr.setShift(Val);
 642     Addr.setExtendType(AArch64_AM::LSL);
 643
 644     const Value *Src = U->getOperand(0);
 645     if (const auto *I = dyn_cast<Instruction>(Src))
 646       if (FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB)
 647         Src = I;
 648
 649     // Fold the zext or sext when it won't become a noop.
 650     if (const auto *ZE = dyn_cast<ZExtInst>(Src)) {
 651       if (!isIntExtFree(ZE) && ZE->getOperand(0)->getType()->isIntegerTy(32)) {
 652           Addr.setExtendType(AArch64_AM::UXTW);
 653           Src = ZE->getOperand(0);
 654       }
 655     } else if (const auto *SE = dyn_cast<SExtInst>(Src)) {
 656       if (!isIntExtFree(SE) && SE->getOperand(0)->getType()->isIntegerTy(32)) {
 657         Addr.setExtendType(AArch64_AM::SXTW);
 658         Src = SE->getOperand(0);
 659       }
 660     }
 661
 662     if (const auto *AI = dyn_cast<BinaryOperator>(Src))
 663       if (AI->getOpcode() == Instruction::And) {
 664         const Value *LHS = AI->getOperand(0);
 665         const Value *RHS = AI->getOperand(1);
 666
 667         if (const auto *C = dyn_cast<ConstantInt>(LHS))
 668           if (C->getValue() == 0xffffffff)
 669             std::swap(LHS, RHS);
 670
 671         if (const auto *C = dyn_cast<ConstantInt>(RHS))
 672           if (C->getValue() == 0xffffffff) {
 673             Addr.setExtendType(AArch64_AM::UXTW);
 674             unsigned Reg = getRegForValue(LHS);
 675             if (!Reg)
 676               return false;
 677             bool RegIsKill = hasTrivialKill(LHS);
 678             Reg = fastEmitInst_extractsubreg(MVT::i32, Reg, RegIsKill,
 679                                              AArch64::sub_32);
 680             Addr.setOffsetReg(Reg);
 681             return true;
 682           }
 683       }
 684
 685     unsigned Reg = getRegForValue(Src);
 686     if (!Reg)
 687       return false;
 688     Addr.setOffsetReg(Reg);
 689     return true;
 690   }
 691   case Instruction::Mul: {
 692     if (Addr.getOffsetReg())
 693       break;
 694
 695     if (!isMulPowOf2(U))
 696       break;
 697
 698     const Value *LHS = U->getOperand(0);
 699     const Value *RHS = U->getOperand(1);
 700
 701     // Canonicalize power-of-2 value to the RHS.
 702     if (const auto *C = dyn_cast<ConstantInt>(LHS))
 703       if (C->getValue().isPowerOf2())
 704         std::swap(LHS, RHS);
 705
 706     assert(isa<ConstantInt>(RHS) && "Expected an ConstantInt.");
 707     const auto *C = cast<ConstantInt>(RHS);
 708     unsigned Val = C->getValue().logBase2();
 709     if (Val < 1 || Val > 3)
 710       break;
 711
 712     uint64_t NumBytes = 0;
 713     if (Ty && Ty->isSized()) {
 714       uint64_t NumBits = DL.getTypeSizeInBits(Ty);
 715       NumBytes = NumBits / 8;
 716       if (!isPowerOf2_64(NumBits))
 717         NumBytes = 0;
 718     }
 719
 720     if (NumBytes != (1ULL << Val))
 721       break;
 722
 723     Addr.setShift(Val);
 724     Addr.setExtendType(AArch64_AM::LSL);
 725
 726     const Value *Src = LHS;
 727     if (const auto *I = dyn_cast<Instruction>(Src))
 728       if (FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB)
 729         Src = I;
 730
 731
 732     // Fold the zext or sext when it won't become a noop.
 733     if (const auto *ZE = dyn_cast<ZExtInst>(Src)) {
 734       if (!isIntExtFree(ZE) && ZE->getOperand(0)->getType()->isIntegerTy(32)) {
 735         Addr.setExtendType(AArch64_AM::UXTW);
 736         Src = ZE->getOperand(0);
 737       }
 738     } else if (const auto *SE = dyn_cast<SExtInst>(Src)) {
 739       if (!isIntExtFree(SE) && SE->getOperand(0)->getType()->isIntegerTy(32)) {
 740         Addr.setExtendType(AArch64_AM::SXTW);
 741         Src = SE->getOperand(0);
 742       }
 743     }
 744
 745     unsigned Reg = getRegForValue(Src);
 746     if (!Reg)
 747       return false;
 748     Addr.setOffsetReg(Reg);
 749     return true;
 750   }
 751   case Instruction::And: {
 752     if (Addr.getOffsetReg())
 753       break;
 754
 755     if (DL.getTypeSizeInBits(Ty) != 8)
 756       break;
 757
 758     const Value *LHS = U->getOperand(0);
 759     const Value *RHS = U->getOperand(1);
 760
 761     if (const auto *C = dyn_cast<ConstantInt>(LHS))
 762       if (C->getValue() == 0xffffffff)
 763         std::swap(LHS, RHS);
 764
 765     if (const auto *C = dyn_cast<ConstantInt>(RHS))
 766       if (C->getValue() == 0xffffffff) {
 767         Addr.setShift(0);
 768         Addr.setExtendType(AArch64_AM::LSL);
 769         Addr.setExtendType(AArch64_AM::UXTW);
 770
 771         unsigned Reg = getRegForValue(LHS);
 772         if (!Reg)
 773           return false;
 774         bool RegIsKill = hasTrivialKill(LHS);
 775         Reg = fastEmitInst_extractsubreg(MVT::i32, Reg, RegIsKill,
 776                                          AArch64::sub_32);
 777         Addr.setOffsetReg(Reg);
 778         return true;
 779       }
 780     break;
 781   }
 782   case Instruction::SExt:
 783   case Instruction::ZExt: {
 784     if (!Addr.getReg() || Addr.getOffsetReg())
 785       break;
 786
 787     const Value *Src = nullptr;
 788     // Fold the zext or sext when it won't become a noop.
 789     if (const auto *ZE = dyn_cast<ZExtInst>(U)) {
 790       if (!isIntExtFree(ZE) && ZE->getOperand(0)->getType()->isIntegerTy(32)) {
 791         Addr.setExtendType(AArch64_AM::UXTW);
 792         Src = ZE->getOperand(0);
 793       }
 794     } else if (const auto *SE = dyn_cast<SExtInst>(U)) {
 795       if (!isIntExtFree(SE) && SE->getOperand(0)->getType()->isIntegerTy(32)) {
 796         Addr.setExtendType(AArch64_AM::SXTW);
 797         Src = SE->getOperand(0);
 798       }
 799     }
 800
 801     if (!Src)
 802       break;
 803
 804     Addr.setShift(0);
 805     unsigned Reg = getRegForValue(Src);
 806     if (!Reg)
 807       return false;
 808     Addr.setOffsetReg(Reg);
 809     return true;
 810   }
 811   } // end switch
 812
 813   if (Addr.isRegBase() && !Addr.getReg()) {
 814     unsigned Reg = getRegForValue(Obj);
 815     if (!Reg)
 816       return false;
 817     Addr.setReg(Reg);
 818     return true;
 819   }
 820
 821   if (!Addr.getOffsetReg()) {
 822     unsigned Reg = getRegForValue(Obj);
 823     if (!Reg)
 824       return false;
 825     Addr.setOffsetReg(Reg);
 826     return true;
 827   }
 828
 829   return false;
 830 }
 831
 832 bool AArch64FastISel::computeCallAddress(const Value *V, Address &Addr) {
 833   const User *U = nullptr;
 834   unsigned Opcode = Instruction::UserOp1;
 835   bool InMBB = true;
 836
 837   if (const auto *I = dyn_cast<Instruction>(V)) {
 838     Opcode = I->getOpcode();
 839     U = I;
 840     InMBB = I->getParent() == FuncInfo.MBB->getBasicBlock();
 841   } else if (const auto *C = dyn_cast<ConstantExpr>(V)) {
 842     Opcode = C->getOpcode();
 843     U = C;
 844   }
 845
 846   switch (Opcode) {
 847   default: break;
 848   case Instruction::BitCast:
 849     // Look past bitcasts if its operand is in the same BB.
 850     if (InMBB)
 851       return computeCallAddress(U->getOperand(0), Addr);
 852     break;
 853   case Instruction::IntToPtr:
 854     // Look past no-op inttoptrs if its operand is in the same BB.
 855     if (InMBB &&
 856         TLI.getValueType(U->getOperand(0)->getType()) == TLI.getPointerTy())
 857       return computeCallAddress(U->getOperand(0), Addr);
 858     break;
 859   case Instruction::PtrToInt:
 860     // Look past no-op ptrtoints if its operand is in the same BB.
 861     if (InMBB &&
 862         TLI.getValueType(U->getType()) == TLI.getPointerTy())
 863       return computeCallAddress(U->getOperand(0), Addr);
 864     break;
 865   }
 866
 867   if (const GlobalValue *GV = dyn_cast<GlobalValue>(V)) {
 868     Addr.setGlobalValue(GV);
 869     return true;
 870   }
 871
 872   // If all else fails, try to materialize the value in a register.
 873   if (!Addr.getGlobalValue()) {
 874     Addr.setReg(getRegForValue(V));
 875     return Addr.getReg() != 0;
 876   }
 877
 878   return false;
 879 }
 880
 881
 882 bool AArch64FastISel::isTypeLegal(Type *Ty, MVT &VT) {
 883   EVT evt = TLI.getValueType(Ty, true);
 884
 885   // Only handle simple types.
 886   if (evt == MVT::Other || !evt.isSimple())
 887     return false;
 888   VT = evt.getSimpleVT();
 889
 890   // This is a legal type, but it's not something we handle in fast-isel.
 891   if (VT == MVT::f128)
 892     return false;
 893
 894   // Handle all other legal types, i.e. a register that will directly hold this
 895   // value.
 896   return TLI.isTypeLegal(VT);
 897 }
 898
 899 /// \brief Determine if the value type is supported by FastISel.
 900 ///
 901 /// FastISel for AArch64 can handle more value types than are legal. This adds
 902 /// simple value type such as i1, i8, and i16.
 903 bool AArch64FastISel::isTypeSupported(Type *Ty, MVT &VT, bool IsVectorAllowed) {
 904   if (Ty->isVectorTy() && !IsVectorAllowed)
 905     return false;
 906
 907   if (isTypeLegal(Ty, VT))
 908     return true;
 909
 910   // If this is a type than can be sign or zero-extended to a basic operation
 911   // go ahead and accept it now.
 912   if (VT == MVT::i1 || VT == MVT::i8 || VT == MVT::i16)
 913     return true;
 914
 915   return false;
 916 }
 917
 918 bool AArch64FastISel::isValueAvailable(const Value *V) const {
 919   if (!isa<Instruction>(V))
 920     return true;
 921
 922   const auto *I = cast<Instruction>(V);
 923   if (FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB)
 924     return true;
 925
 926   return false;
 927 }
 928
 929 bool AArch64FastISel::simplifyAddress(Address &Addr, MVT VT) {
 930   unsigned ScaleFactor = getImplicitScaleFactor(VT);
 931   if (!ScaleFactor)
 932     return false;
 933
 934   bool ImmediateOffsetNeedsLowering = false;
 935   bool RegisterOffsetNeedsLowering = false;
 936   int64_t Offset = Addr.getOffset();
 937   if (((Offset < 0) || (Offset & (ScaleFactor - 1))) && !isInt<9>(Offset))
 938     ImmediateOffsetNeedsLowering = true;
 939   else if (Offset > 0 && !(Offset & (ScaleFactor - 1)) &&
 940            !isUInt<12>(Offset / ScaleFactor))
 941     ImmediateOffsetNeedsLowering = true;
 942
 943   // Cannot encode an offset register and an immediate offset in the same
 944   // instruction. Fold the immediate offset into the load/store instruction and
 945   // emit an additonal add to take care of the offset register.
 946   if (!ImmediateOffsetNeedsLowering && Addr.getOffset() && Addr.getOffsetReg())
 947     RegisterOffsetNeedsLowering = true;
 948
 949   // Cannot encode zero register as base.
 950   if (Addr.isRegBase() && Addr.getOffsetReg() && !Addr.getReg())
 951     RegisterOffsetNeedsLowering = true;
 952
 953   // If this is a stack pointer and the offset needs to be simplified then put
 954   // the alloca address into a register, set the base type back to register and
 955   // continue. This should almost never happen.
 956   if ((ImmediateOffsetNeedsLowering || Addr.getOffsetReg()) && Addr.isFIBase())
 957   {
 958     unsigned ResultReg = createResultReg(&AArch64::GPR64spRegClass);
 959     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADDXri),
 960             ResultReg)
 961       .addFrameIndex(Addr.getFI())
 962       .addImm(0)
 963       .addImm(0);
 964     Addr.setKind(Address::RegBase);
 965     Addr.setReg(ResultReg);
 966   }
 967
 968   if (RegisterOffsetNeedsLowering) {
 969     unsigned ResultReg = 0;
 970     if (Addr.getReg()) {
 971       if (Addr.getExtendType() == AArch64_AM::SXTW ||
 972           Addr.getExtendType() == AArch64_AM::UXTW   )
 973         ResultReg = emitAddSub_rx(/*UseAdd=*/true, MVT::i64, Addr.getReg(),
 974                                   /*TODO:IsKill=*/false, Addr.getOffsetReg(),
 975                                   /*TODO:IsKill=*/false, Addr.getExtendType(),
 976                                   Addr.getShift());
 977       else
 978         ResultReg = emitAddSub_rs(/*UseAdd=*/true, MVT::i64, Addr.getReg(),
 979                                   /*TODO:IsKill=*/false, Addr.getOffsetReg(),
 980                                   /*TODO:IsKill=*/false, AArch64_AM::LSL,
 981                                   Addr.getShift());
 982     } else {
 983       if (Addr.getExtendType() == AArch64_AM::UXTW)
 984         ResultReg = emitLSL_ri(MVT::i64, MVT::i32, Addr.getOffsetReg(),
 985                                /*Op0IsKill=*/false, Addr.getShift(),
 986                                /*IsZExt=*/true);
 987       else if (Addr.getExtendType() == AArch64_AM::SXTW)
 988         ResultReg = emitLSL_ri(MVT::i64, MVT::i32, Addr.getOffsetReg(),
 989                                /*Op0IsKill=*/false, Addr.getShift(),
 990                                /*IsZExt=*/false);
 991       else
 992         ResultReg = emitLSL_ri(MVT::i64, MVT::i64, Addr.getOffsetReg(),
 993                                /*Op0IsKill=*/false, Addr.getShift());
 994     }
 995     if (!ResultReg)
 996       return false;
 997
 998     Addr.setReg(ResultReg);
 999     Addr.setOffsetReg(0);
1000     Addr.setShift(0);
1001     Addr.setExtendType(AArch64_AM::InvalidShiftExtend);
1002   }
1003
1004   // Since the offset is too large for the load/store instruction get the
1005   // reg+offset into a register.
1006   if (ImmediateOffsetNeedsLowering) {
1007     unsigned ResultReg;
1008     if (Addr.getReg())
1009       // Try to fold the immediate into the add instruction.
1010       ResultReg = emitAdd_ri_(MVT::i64, Addr.getReg(), /*IsKill=*/false, Offset);
1011     else
1012       ResultReg = fastEmit_i(MVT::i64, MVT::i64, ISD::Constant, Offset);
1013
1014     if (!ResultReg)
1015       return false;
1016     Addr.setReg(ResultReg);
1017     Addr.setOffset(0);
1018   }
1019   return true;
1020 }
1021
1022 void AArch64FastISel::addLoadStoreOperands(Address &Addr,
1023                                            const MachineInstrBuilder &MIB,
1024                                            unsigned Flags,
1025                                            unsigned ScaleFactor,
1026                                            MachineMemOperand *MMO) {
1027   int64_t Offset = Addr.getOffset() / ScaleFactor;
1028   // Frame base works a bit differently. Handle it separately.
1029   if (Addr.isFIBase()) {
1030     int FI = Addr.getFI();
1031     // FIXME: We shouldn't be using getObjectSize/getObjectAlignment.  The size
1032     // and alignment should be based on the VT.
1033     MMO = FuncInfo.MF->getMachineMemOperand(
1034       MachinePointerInfo::getFixedStack(FI, Offset), Flags,
1035       MFI.getObjectSize(FI), MFI.getObjectAlignment(FI));
1036     // Now add the rest of the operands.
1037     MIB.addFrameIndex(FI).addImm(Offset);
1038   } else {
1039     assert(Addr.isRegBase() && "Unexpected address kind.");
1040     const MCInstrDesc &II = MIB->getDesc();
1041     unsigned Idx = (Flags & MachineMemOperand::MOStore) ? 1 : 0;
1042     Addr.setReg(
1043       constrainOperandRegClass(II, Addr.getReg(), II.getNumDefs()+Idx));
1044     Addr.setOffsetReg(
1045       constrainOperandRegClass(II, Addr.getOffsetReg(), II.getNumDefs()+Idx+1));
1046     if (Addr.getOffsetReg()) {
1047       assert(Addr.getOffset() == 0 && "Unexpected offset");
1048       bool IsSigned = Addr.getExtendType() == AArch64_AM::SXTW ||
1049                       Addr.getExtendType() == AArch64_AM::SXTX;
1050       MIB.addReg(Addr.getReg());
1051       MIB.addReg(Addr.getOffsetReg());
1052       MIB.addImm(IsSigned);
1053       MIB.addImm(Addr.getShift() != 0);
1054     } else
1055       MIB.addReg(Addr.getReg()).addImm(Offset);
1056   }
1057
1058   if (MMO)
1059     MIB.addMemOperand(MMO);
1060 }
1061
1062 unsigned AArch64FastISel::emitAddSub(bool UseAdd, MVT RetVT, const Value *LHS,
1063                                      const Value *RHS, bool SetFlags,
1064                                      bool WantResult,  bool IsZExt) {
1065   AArch64_AM::ShiftExtendType ExtendType = AArch64_AM::InvalidShiftExtend;
1066   bool NeedExtend = false;
1067   switch (RetVT.SimpleTy) {
1068   default:
1069     return 0;
1070   case MVT::i1:
1071     NeedExtend = true;
1072     break;
1073   case MVT::i8:
1074     NeedExtend = true;
1075     ExtendType = IsZExt ? AArch64_AM::UXTB : AArch64_AM::SXTB;
1076     break;
1077   case MVT::i16:
1078     NeedExtend = true;
1079     ExtendType = IsZExt ? AArch64_AM::UXTH : AArch64_AM::SXTH;
1080     break;
1081   case MVT::i32:  // fall-through
1082   case MVT::i64:
1083     break;
1084   }
1085   MVT SrcVT = RetVT;
1086   RetVT.SimpleTy = std::max(RetVT.SimpleTy, MVT::i32);
1087
1088   // Canonicalize immediates to the RHS first.
1089   if (UseAdd && isa<Constant>(LHS) && !isa<Constant>(RHS))
1090     std::swap(LHS, RHS);
1091
1092   // Canonicalize mul by power of 2 to the RHS.
1093   if (UseAdd && LHS->hasOneUse() && isValueAvailable(LHS))
1094     if (isMulPowOf2(LHS))
1095       std::swap(LHS, RHS);
1096
1097   // Canonicalize shift immediate to the RHS.
1098   if (UseAdd && LHS->hasOneUse() && isValueAvailable(LHS))
1099     if (const auto *SI = dyn_cast<BinaryOperator>(LHS))
1100       if (isa<ConstantInt>(SI->getOperand(1)))
1101         if (SI->getOpcode() == Instruction::Shl  ||
1102             SI->getOpcode() == Instruction::LShr ||
1103             SI->getOpcode() == Instruction::AShr   )
1104           std::swap(LHS, RHS);
1105
1106   unsigned LHSReg = getRegForValue(LHS);
1107   if (!LHSReg)
1108     return 0;
1109   bool LHSIsKill = hasTrivialKill(LHS);
1110
1111   if (NeedExtend)
1112     LHSReg = emitIntExt(SrcVT, LHSReg, RetVT, IsZExt);
1113
1114   unsigned ResultReg = 0;
1115   if (const auto *C = dyn_cast<ConstantInt>(RHS)) {
1116     uint64_t Imm = IsZExt ? C->getZExtValue() : C->getSExtValue();
1117     if (C->isNegative())
1118       ResultReg = emitAddSub_ri(!UseAdd, RetVT, LHSReg, LHSIsKill, -Imm,
1119                                 SetFlags, WantResult);
1120     else
1121       ResultReg = emitAddSub_ri(UseAdd, RetVT, LHSReg, LHSIsKill, Imm, SetFlags,
1122                                 WantResult);
1123   } else if (const auto *C = dyn_cast<Constant>(RHS))
1124     if (C->isNullValue())
1125       ResultReg = emitAddSub_ri(UseAdd, RetVT, LHSReg, LHSIsKill, 0, SetFlags,
1126                                 WantResult);
1127
1128   if (ResultReg)
1129     return ResultReg;
1130
1131   // Only extend the RHS within the instruction if there is a valid extend type.
1132   if (ExtendType != AArch64_AM::InvalidShiftExtend && RHS->hasOneUse() &&
1133       isValueAvailable(RHS)) {
1134     if (const auto *SI = dyn_cast<BinaryOperator>(RHS))
1135       if (const auto *C = dyn_cast<ConstantInt>(SI->getOperand(1)))
1136         if ((SI->getOpcode() == Instruction::Shl) && (C->getZExtValue() < 4)) {
1137           unsigned RHSReg = getRegForValue(SI->getOperand(0));
1138           if (!RHSReg)
1139             return 0;
1140           bool RHSIsKill = hasTrivialKill(SI->getOperand(0));
1141           return emitAddSub_rx(UseAdd, RetVT, LHSReg, LHSIsKill, RHSReg,
1142                                RHSIsKill, ExtendType, C->getZExtValue(),
1143                                SetFlags, WantResult);
1144         }
1145     unsigned RHSReg = getRegForValue(RHS);
1146     if (!RHSReg)
1147       return 0;
1148     bool RHSIsKill = hasTrivialKill(RHS);
1149     return emitAddSub_rx(UseAdd, RetVT, LHSReg, LHSIsKill, RHSReg, RHSIsKill,
1150                          ExtendType, 0, SetFlags, WantResult);
1151   }
1152
1153   // Check if the mul can be folded into the instruction.
1154   if (RHS->hasOneUse() && isValueAvailable(RHS))
1155     if (isMulPowOf2(RHS)) {
1156       const Value *MulLHS = cast<MulOperator>(RHS)->getOperand(0);
1157       const Value *MulRHS = cast<MulOperator>(RHS)->getOperand(1);
1158
1159       if (const auto *C = dyn_cast<ConstantInt>(MulLHS))
1160         if (C->getValue().isPowerOf2())
1161           std::swap(MulLHS, MulRHS);
1162
1163       assert(isa<ConstantInt>(MulRHS) && "Expected a ConstantInt.");
1164       uint64_t ShiftVal = cast<ConstantInt>(MulRHS)->getValue().logBase2();
1165       unsigned RHSReg = getRegForValue(MulLHS);
1166       if (!RHSReg)
1167         return 0;
1168       bool RHSIsKill = hasTrivialKill(MulLHS);
1169       return emitAddSub_rs(UseAdd, RetVT, LHSReg, LHSIsKill, RHSReg, RHSIsKill,
1170                            AArch64_AM::LSL, ShiftVal, SetFlags, WantResult);
1171     }
1172
1173   // Check if the shift can be folded into the instruction.
1174   if (RHS->hasOneUse() && isValueAvailable(RHS))
1175     if (const auto *SI = dyn_cast<BinaryOperator>(RHS)) {
1176       if (const auto *C = dyn_cast<ConstantInt>(SI->getOperand(1))) {
1177         AArch64_AM::ShiftExtendType ShiftType = AArch64_AM::InvalidShiftExtend;
1178         switch (SI->getOpcode()) {
1179         default: break;
1180         case Instruction::Shl:  ShiftType = AArch64_AM::LSL; break;
1181         case Instruction::LShr: ShiftType = AArch64_AM::LSR; break;
1182         case Instruction::AShr: ShiftType = AArch64_AM::ASR; break;
1183         }
1184         uint64_t ShiftVal = C->getZExtValue();
1185         if (ShiftType != AArch64_AM::InvalidShiftExtend) {
1186           unsigned RHSReg = getRegForValue(SI->getOperand(0));
1187           if (!RHSReg)
1188             return 0;
1189           bool RHSIsKill = hasTrivialKill(SI->getOperand(0));
1190           return emitAddSub_rs(UseAdd, RetVT, LHSReg, LHSIsKill, RHSReg,
1191                                RHSIsKill, ShiftType, ShiftVal, SetFlags,
1192                                WantResult);
1193         }
1194       }
1195     }
1196
1197   unsigned RHSReg = getRegForValue(RHS);
1198   if (!RHSReg)
1199     return 0;
1200   bool RHSIsKill = hasTrivialKill(RHS);
1201
1202   if (NeedExtend)
1203     RHSReg = emitIntExt(SrcVT, RHSReg, RetVT, IsZExt);
1204
1205   return emitAddSub_rr(UseAdd, RetVT, LHSReg, LHSIsKill, RHSReg, RHSIsKill,
1206                        SetFlags, WantResult);
1207 }
1208
1209 unsigned AArch64FastISel::emitAddSub_rr(bool UseAdd, MVT RetVT, unsigned LHSReg,
1210                                         bool LHSIsKill, unsigned RHSReg,
1211                                         bool RHSIsKill, bool SetFlags,
1212                                         bool WantResult) {
1213   assert(LHSReg && RHSReg && "Invalid register number.");
1214
1215   if (RetVT != MVT::i32 && RetVT != MVT::i64)
1216     return 0;
1217
1218   static const unsigned OpcTable[2][2][2] = {
1219     { { AArch64::SUBWrr,  AArch64::SUBXrr  },
1220       { AArch64::ADDWrr,  AArch64::ADDXrr  }  },
1221     { { AArch64::SUBSWrr, AArch64::SUBSXrr },
1222       { AArch64::ADDSWrr, AArch64::ADDSXrr }  }
1223   };
1224   bool Is64Bit = RetVT == MVT::i64;
1225   unsigned Opc = OpcTable[SetFlags][UseAdd][Is64Bit];
1226   const TargetRegisterClass *RC =
1227       Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
1228   unsigned ResultReg;
1229   if (WantResult)
1230     ResultReg = createResultReg(RC);
1231   else
1232     ResultReg = Is64Bit ? AArch64::XZR : AArch64::WZR;
1233
1234   const MCInstrDesc &II = TII.get(Opc);
1235   LHSReg = constrainOperandRegClass(II, LHSReg, II.getNumDefs());
1236   RHSReg = constrainOperandRegClass(II, RHSReg, II.getNumDefs() + 1);
1237   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg)
1238       .addReg(LHSReg, getKillRegState(LHSIsKill))
1239       .addReg(RHSReg, getKillRegState(RHSIsKill));
1240   return ResultReg;
1241 }
1242
1243 unsigned AArch64FastISel::emitAddSub_ri(bool UseAdd, MVT RetVT, unsigned LHSReg,
1244                                         bool LHSIsKill, uint64_t Imm,
1245                                         bool SetFlags, bool WantResult) {
1246   assert(LHSReg && "Invalid register number.");
1247
1248   if (RetVT != MVT::i32 && RetVT != MVT::i64)
1249     return 0;
1250
1251   unsigned ShiftImm;
1252   if (isUInt<12>(Imm))
1253     ShiftImm = 0;
1254   else if ((Imm & 0xfff000) == Imm) {
1255     ShiftImm = 12;
1256     Imm >>= 12;
1257   } else
1258     return 0;
1259
1260   static const unsigned OpcTable[2][2][2] = {
1261     { { AArch64::SUBWri,  AArch64::SUBXri  },
1262       { AArch64::ADDWri,  AArch64::ADDXri  }  },
1263     { { AArch64::SUBSWri, AArch64::SUBSXri },
1264       { AArch64::ADDSWri, AArch64::ADDSXri }  }
1265   };
1266   bool Is64Bit = RetVT == MVT::i64;
1267   unsigned Opc = OpcTable[SetFlags][UseAdd][Is64Bit];
1268   const TargetRegisterClass *RC;
1269   if (SetFlags)
1270     RC = Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
1271   else
1272     RC = Is64Bit ? &AArch64::GPR64spRegClass : &AArch64::GPR32spRegClass;
1273   unsigned ResultReg;
1274   if (WantResult)
1275     ResultReg = createResultReg(RC);
1276   else
1277     ResultReg = Is64Bit ? AArch64::XZR : AArch64::WZR;
1278
1279   const MCInstrDesc &II = TII.get(Opc);
1280   LHSReg = constrainOperandRegClass(II, LHSReg, II.getNumDefs());
1281   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg)
1282       .addReg(LHSReg, getKillRegState(LHSIsKill))
1283       .addImm(Imm)
1284       .addImm(getShifterImm(AArch64_AM::LSL, ShiftImm));
1285   return ResultReg;
1286 }
1287
1288 unsigned AArch64FastISel::emitAddSub_rs(bool UseAdd, MVT RetVT, unsigned LHSReg,
1289                                         bool LHSIsKill, unsigned RHSReg,
1290                                         bool RHSIsKill,
1291                                         AArch64_AM::ShiftExtendType ShiftType,
1292                                         uint64_t ShiftImm, bool SetFlags,
1293                                         bool WantResult) {
1294   assert(LHSReg && RHSReg && "Invalid register number.");
1295
1296   if (RetVT != MVT::i32 && RetVT != MVT::i64)
1297     return 0;
1298
1299   static const unsigned OpcTable[2][2][2] = {
1300     { { AArch64::SUBWrs,  AArch64::SUBXrs  },
1301       { AArch64::ADDWrs,  AArch64::ADDXrs  }  },
1302     { { AArch64::SUBSWrs, AArch64::SUBSXrs },
1303       { AArch64::ADDSWrs, AArch64::ADDSXrs }  }
1304   };
1305   bool Is64Bit = RetVT == MVT::i64;
1306   unsigned Opc = OpcTable[SetFlags][UseAdd][Is64Bit];
1307   const TargetRegisterClass *RC =
1308       Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
1309   unsigned ResultReg;
1310   if (WantResult)
1311     ResultReg = createResultReg(RC);
1312   else
1313     ResultReg = Is64Bit ? AArch64::XZR : AArch64::WZR;
1314
1315   const MCInstrDesc &II = TII.get(Opc);
1316   LHSReg = constrainOperandRegClass(II, LHSReg, II.getNumDefs());
1317   RHSReg = constrainOperandRegClass(II, RHSReg, II.getNumDefs() + 1);
1318   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg)
1319       .addReg(LHSReg, getKillRegState(LHSIsKill))
1320       .addReg(RHSReg, getKillRegState(RHSIsKill))
1321       .addImm(getShifterImm(ShiftType, ShiftImm));
1322   return ResultReg;
1323 }
1324
1325 unsigned AArch64FastISel::emitAddSub_rx(bool UseAdd, MVT RetVT, unsigned LHSReg,
1326                                         bool LHSIsKill, unsigned RHSReg,
1327                                         bool RHSIsKill,
1328                                         AArch64_AM::ShiftExtendType ExtType,
1329                                         uint64_t ShiftImm, bool SetFlags,
1330                                         bool WantResult) {
1331   assert(LHSReg && RHSReg && "Invalid register number.");
1332
1333   if (RetVT != MVT::i32 && RetVT != MVT::i64)
1334     return 0;
1335
1336   static const unsigned OpcTable[2][2][2] = {
1337     { { AArch64::SUBWrx,  AArch64::SUBXrx  },
1338       { AArch64::ADDWrx,  AArch64::ADDXrx  }  },
1339     { { AArch64::SUBSWrx, AArch64::SUBSXrx },
1340       { AArch64::ADDSWrx, AArch64::ADDSXrx }  }
1341   };
1342   bool Is64Bit = RetVT == MVT::i64;
1343   unsigned Opc = OpcTable[SetFlags][UseAdd][Is64Bit];
1344   const TargetRegisterClass *RC = nullptr;
1345   if (SetFlags)
1346     RC = Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
1347   else
1348     RC = Is64Bit ? &AArch64::GPR64spRegClass : &AArch64::GPR32spRegClass;
1349   unsigned ResultReg;
1350   if (WantResult)
1351     ResultReg = createResultReg(RC);
1352   else
1353     ResultReg = Is64Bit ? AArch64::XZR : AArch64::WZR;
1354
1355   const MCInstrDesc &II = TII.get(Opc);
1356   LHSReg = constrainOperandRegClass(II, LHSReg, II.getNumDefs());
1357   RHSReg = constrainOperandRegClass(II, RHSReg, II.getNumDefs() + 1);
1358   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg)
1359       .addReg(LHSReg, getKillRegState(LHSIsKill))
1360       .addReg(RHSReg, getKillRegState(RHSIsKill))
1361       .addImm(getArithExtendImm(ExtType, ShiftImm));
1362   return ResultReg;
1363 }
1364
1365 bool AArch64FastISel::emitCmp(const Value *LHS, const Value *RHS, bool IsZExt) {
1366   Type *Ty = LHS->getType();
1367   EVT EVT = TLI.getValueType(Ty, true);
1368   if (!EVT.isSimple())
1369     return false;
1370   MVT VT = EVT.getSimpleVT();
1371
1372   switch (VT.SimpleTy) {
1373   default:
1374     return false;
1375   case MVT::i1:
1376   case MVT::i8:
1377   case MVT::i16:
1378   case MVT::i32:
1379   case MVT::i64:
1380     return emitICmp(VT, LHS, RHS, IsZExt);
1381   case MVT::f32:
1382   case MVT::f64:
1383     return emitFCmp(VT, LHS, RHS);
1384   }
1385 }
1386
1387 bool AArch64FastISel::emitICmp(MVT RetVT, const Value *LHS, const Value *RHS,
1388                                bool IsZExt) {
1389   return emitSub(RetVT, LHS, RHS, /*SetFlags=*/true, /*WantResult=*/false,
1390                  IsZExt) != 0;
1391 }
1392
1393 bool AArch64FastISel::emitICmp_ri(MVT RetVT, unsigned LHSReg, bool LHSIsKill,
1394                                   uint64_t Imm) {
1395   return emitAddSub_ri(/*UseAdd=*/false, RetVT, LHSReg, LHSIsKill, Imm,
1396                        /*SetFlags=*/true, /*WantResult=*/false) != 0;
1397 }
1398
1399 bool AArch64FastISel::emitFCmp(MVT RetVT, const Value *LHS, const Value *RHS) {
1400   if (RetVT != MVT::f32 && RetVT != MVT::f64)
1401     return false;
1402
1403   // Check to see if the 2nd operand is a constant that we can encode directly
1404   // in the compare.
1405   bool UseImm = false;
1406   if (const auto *CFP = dyn_cast<ConstantFP>(RHS))
1407     if (CFP->isZero() && !CFP->isNegative())
1408       UseImm = true;
1409
1410   unsigned LHSReg = getRegForValue(LHS);
1411   if (!LHSReg)
1412     return false;
1413   bool LHSIsKill = hasTrivialKill(LHS);
1414
1415   if (UseImm) {
1416     unsigned Opc = (RetVT == MVT::f64) ? AArch64::FCMPDri : AArch64::FCMPSri;
1417     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc))
1418         .addReg(LHSReg, getKillRegState(LHSIsKill));
1419     return true;
1420   }
1421
1422   unsigned RHSReg = getRegForValue(RHS);
1423   if (!RHSReg)
1424     return false;
1425   bool RHSIsKill = hasTrivialKill(RHS);
1426
1427   unsigned Opc = (RetVT == MVT::f64) ? AArch64::FCMPDrr : AArch64::FCMPSrr;
1428   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc))
1429       .addReg(LHSReg, getKillRegState(LHSIsKill))
1430       .addReg(RHSReg, getKillRegState(RHSIsKill));
1431   return true;
1432 }
1433
1434 unsigned AArch64FastISel::emitAdd(MVT RetVT, const Value *LHS, const Value *RHS,
1435                                   bool SetFlags, bool WantResult, bool IsZExt) {
1436   return emitAddSub(/*UseAdd=*/true, RetVT, LHS, RHS, SetFlags, WantResult,
1437                     IsZExt);
1438 }
1439
1440 /// \brief This method is a wrapper to simplify add emission.
1441 ///
1442 /// First try to emit an add with an immediate operand using emitAddSub_ri. If
1443 /// that fails, then try to materialize the immediate into a register and use
1444 /// emitAddSub_rr instead.
1445 unsigned AArch64FastISel::emitAdd_ri_(MVT VT, unsigned Op0, bool Op0IsKill,
1446                                       int64_t Imm) {
1447   unsigned ResultReg;
1448   if (Imm < 0)
1449     ResultReg = emitAddSub_ri(false, VT, Op0, Op0IsKill, -Imm);
1450   else
1451     ResultReg = emitAddSub_ri(true, VT, Op0, Op0IsKill, Imm);
1452
1453   if (ResultReg)
1454     return ResultReg;
1455
1456   unsigned CReg = fastEmit_i(VT, VT, ISD::Constant, Imm);
1457   if (!CReg)
1458     return 0;
1459
1460   ResultReg = emitAddSub_rr(true, VT, Op0, Op0IsKill, CReg, true);
1461   return ResultReg;
1462 }
1463
1464 unsigned AArch64FastISel::emitSub(MVT RetVT, const Value *LHS, const Value *RHS,
1465                                   bool SetFlags, bool WantResult, bool IsZExt) {
1466   return emitAddSub(/*UseAdd=*/false, RetVT, LHS, RHS, SetFlags, WantResult,
1467                     IsZExt);
1468 }
1469
1470 unsigned AArch64FastISel::emitSubs_rr(MVT RetVT, unsigned LHSReg,
1471                                       bool LHSIsKill, unsigned RHSReg,
1472                                       bool RHSIsKill, bool WantResult) {
1473   return emitAddSub_rr(/*UseAdd=*/false, RetVT, LHSReg, LHSIsKill, RHSReg,
1474                        RHSIsKill, /*SetFlags=*/true, WantResult);
1475 }
1476
1477 unsigned AArch64FastISel::emitSubs_rs(MVT RetVT, unsigned LHSReg,
1478                                       bool LHSIsKill, unsigned RHSReg,
1479                                       bool RHSIsKill,
1480                                       AArch64_AM::ShiftExtendType ShiftType,
1481                                       uint64_t ShiftImm, bool WantResult) {
1482   return emitAddSub_rs(/*UseAdd=*/false, RetVT, LHSReg, LHSIsKill, RHSReg,
1483                        RHSIsKill, ShiftType, ShiftImm, /*SetFlags=*/true,
1484                        WantResult);
1485 }
1486
1487 unsigned AArch64FastISel::emitLogicalOp(unsigned ISDOpc, MVT RetVT,
1488                                         const Value *LHS, const Value *RHS) {
1489   // Canonicalize immediates to the RHS first.
1490   if (isa<ConstantInt>(LHS) && !isa<ConstantInt>(RHS))
1491     std::swap(LHS, RHS);
1492
1493   // Canonicalize mul by power-of-2 to the RHS.
1494   if (LHS->hasOneUse() && isValueAvailable(LHS))
1495     if (isMulPowOf2(LHS))
1496       std::swap(LHS, RHS);
1497
1498   // Canonicalize shift immediate to the RHS.
1499   if (LHS->hasOneUse() && isValueAvailable(LHS))
1500     if (const auto *SI = dyn_cast<ShlOperator>(LHS))
1501       if (isa<ConstantInt>(SI->getOperand(1)))
1502         std::swap(LHS, RHS);
1503
1504   unsigned LHSReg = getRegForValue(LHS);
1505   if (!LHSReg)
1506     return 0;
1507   bool LHSIsKill = hasTrivialKill(LHS);
1508
1509   unsigned ResultReg = 0;
1510   if (const auto *C = dyn_cast<ConstantInt>(RHS)) {
1511     uint64_t Imm = C->getZExtValue();
1512     ResultReg = emitLogicalOp_ri(ISDOpc, RetVT, LHSReg, LHSIsKill, Imm);
1513   }
1514   if (ResultReg)
1515     return ResultReg;
1516
1517   // Check if the mul can be folded into the instruction.
1518   if (RHS->hasOneUse() && isValueAvailable(RHS))
1519     if (isMulPowOf2(RHS)) {
1520       const Value *MulLHS = cast<MulOperator>(RHS)->getOperand(0);
1521       const Value *MulRHS = cast<MulOperator>(RHS)->getOperand(1);
1522
1523       if (const auto *C = dyn_cast<ConstantInt>(MulLHS))
1524         if (C->getValue().isPowerOf2())
1525           std::swap(MulLHS, MulRHS);
1526
1527       assert(isa<ConstantInt>(MulRHS) && "Expected a ConstantInt.");
1528       uint64_t ShiftVal = cast<ConstantInt>(MulRHS)->getValue().logBase2();
1529
1530       unsigned RHSReg = getRegForValue(MulLHS);
1531       if (!RHSReg)
1532         return 0;
1533       bool RHSIsKill = hasTrivialKill(MulLHS);
1534       return emitLogicalOp_rs(ISDOpc, RetVT, LHSReg, LHSIsKill, RHSReg,
1535                               RHSIsKill, ShiftVal);
1536     }
1537
1538   // Check if the shift can be folded into the instruction.
1539   if (RHS->hasOneUse() && isValueAvailable(RHS))
1540     if (const auto *SI = dyn_cast<ShlOperator>(RHS))
1541       if (const auto *C = dyn_cast<ConstantInt>(SI->getOperand(1))) {
1542         uint64_t ShiftVal = C->getZExtValue();
1543         unsigned RHSReg = getRegForValue(SI->getOperand(0));
1544         if (!RHSReg)
1545           return 0;
1546         bool RHSIsKill = hasTrivialKill(SI->getOperand(0));
1547         return emitLogicalOp_rs(ISDOpc, RetVT, LHSReg, LHSIsKill, RHSReg,
1548                                 RHSIsKill, ShiftVal);
1549       }
1550
1551   unsigned RHSReg = getRegForValue(RHS);
1552   if (!RHSReg)
1553     return 0;
1554   bool RHSIsKill = hasTrivialKill(RHS);
1555
1556   MVT VT = std::max(MVT::i32, RetVT.SimpleTy);
1557   ResultReg = fastEmit_rr(VT, VT, ISDOpc, LHSReg, LHSIsKill, RHSReg, RHSIsKill);
1558   if (RetVT >= MVT::i8 && RetVT <= MVT::i16) {
1559     uint64_t Mask = (RetVT == MVT::i8) ? 0xff : 0xffff;
1560     ResultReg = emitAnd_ri(MVT::i32, ResultReg, /*IsKill=*/true, Mask);
1561   }
1562   return ResultReg;
1563 }
1564
1565 unsigned AArch64FastISel::emitLogicalOp_ri(unsigned ISDOpc, MVT RetVT,
1566                                            unsigned LHSReg, bool LHSIsKill,
1567                                            uint64_t Imm) {
1568   assert((ISD::AND + 1 == ISD::OR) && (ISD::AND + 2 == ISD::XOR) &&
1569          "ISD nodes are not consecutive!");
1570   static const unsigned OpcTable[3][2] = {
1571     { AArch64::ANDWri, AArch64::ANDXri },
1572     { AArch64::ORRWri, AArch64::ORRXri },
1573     { AArch64::EORWri, AArch64::EORXri }
1574   };
1575   const TargetRegisterClass *RC;
1576   unsigned Opc;
1577   unsigned RegSize;
1578   switch (RetVT.SimpleTy) {
1579   default:
1580     return 0;
1581   case MVT::i1:
1582   case MVT::i8:
1583   case MVT::i16:
1584   case MVT::i32: {
1585     unsigned Idx = ISDOpc - ISD::AND;
1586     Opc = OpcTable[Idx][0];
1587     RC = &AArch64::GPR32spRegClass;
1588     RegSize = 32;
1589     break;
1590   }
1591   case MVT::i64:
1592     Opc = OpcTable[ISDOpc - ISD::AND][1];
1593     RC = &AArch64::GPR64spRegClass;
1594     RegSize = 64;
1595     break;
1596   }
1597
1598   if (!AArch64_AM::isLogicalImmediate(Imm, RegSize))
1599     return 0;
1600
1601   unsigned ResultReg =
1602       fastEmitInst_ri(Opc, RC, LHSReg, LHSIsKill,
1603                       AArch64_AM::encodeLogicalImmediate(Imm, RegSize));
1604   if (RetVT >= MVT::i8 && RetVT <= MVT::i16 && ISDOpc != ISD::AND) {
1605     uint64_t Mask = (RetVT == MVT::i8) ? 0xff : 0xffff;
1606     ResultReg = emitAnd_ri(MVT::i32, ResultReg, /*IsKill=*/true, Mask);
1607   }
1608   return ResultReg;
1609 }
1610
1611 unsigned AArch64FastISel::emitLogicalOp_rs(unsigned ISDOpc, MVT RetVT,
1612                                            unsigned LHSReg, bool LHSIsKill,
1613                                            unsigned RHSReg, bool RHSIsKill,
1614                                            uint64_t ShiftImm) {
1615   assert((ISD::AND + 1 == ISD::OR) && (ISD::AND + 2 == ISD::XOR) &&
1616          "ISD nodes are not consecutive!");
1617   static const unsigned OpcTable[3][2] = {
1618     { AArch64::ANDWrs, AArch64::ANDXrs },
1619     { AArch64::ORRWrs, AArch64::ORRXrs },
1620     { AArch64::EORWrs, AArch64::EORXrs }
1621   };
1622   const TargetRegisterClass *RC;
1623   unsigned Opc;
1624   switch (RetVT.SimpleTy) {
1625   default:
1626     return 0;
1627   case MVT::i1:
1628   case MVT::i8:
1629   case MVT::i16:
1630   case MVT::i32:
1631     Opc = OpcTable[ISDOpc - ISD::AND][0];
1632     RC = &AArch64::GPR32RegClass;
1633     break;
1634   case MVT::i64:
1635     Opc = OpcTable[ISDOpc - ISD::AND][1];
1636     RC = &AArch64::GPR64RegClass;
1637     break;
1638   }
1639   unsigned ResultReg =
1640       fastEmitInst_rri(Opc, RC, LHSReg, LHSIsKill, RHSReg, RHSIsKill,
1641                        AArch64_AM::getShifterImm(AArch64_AM::LSL, ShiftImm));
1642   if (RetVT >= MVT::i8 && RetVT <= MVT::i16) {
1643     uint64_t Mask = (RetVT == MVT::i8) ? 0xff : 0xffff;
1644     ResultReg = emitAnd_ri(MVT::i32, ResultReg, /*IsKill=*/true, Mask);
1645   }
1646   return ResultReg;
1647 }
1648
1649 unsigned AArch64FastISel::emitAnd_ri(MVT RetVT, unsigned LHSReg, bool LHSIsKill,
1650                                      uint64_t Imm) {
1651   return emitLogicalOp_ri(ISD::AND, RetVT, LHSReg, LHSIsKill, Imm);
1652 }
1653
1654 unsigned AArch64FastISel::emitLoad(MVT VT, MVT RetVT, Address Addr,
1655                                    bool WantZExt, MachineMemOperand *MMO) {
1656   // Simplify this down to something we can handle.
1657   if (!simplifyAddress(Addr, VT))
1658     return 0;
1659
1660   unsigned ScaleFactor = getImplicitScaleFactor(VT);
1661   if (!ScaleFactor)
1662     llvm_unreachable("Unexpected value type.");
1663
1664   // Negative offsets require unscaled, 9-bit, signed immediate offsets.
1665   // Otherwise, we try using scaled, 12-bit, unsigned immediate offsets.
1666   bool UseScaled = true;
1667   if ((Addr.getOffset() < 0) || (Addr.getOffset() & (ScaleFactor - 1))) {
1668     UseScaled = false;
1669     ScaleFactor = 1;
1670   }
1671
1672   static const unsigned GPOpcTable[2][8][4] = {
1673     // Sign-extend.
1674     { { AArch64::LDURSBWi,  AArch64::LDURSHWi,  AArch64::LDURWi,
1675         AArch64::LDURXi  },
1676       { AArch64::LDURSBXi,  AArch64::LDURSHXi,  AArch64::LDURSWi,
1677         AArch64::LDURXi  },
1678       { AArch64::LDRSBWui,  AArch64::LDRSHWui,  AArch64::LDRWui,
1679         AArch64::LDRXui  },
1680       { AArch64::LDRSBXui,  AArch64::LDRSHXui,  AArch64::LDRSWui,
1681         AArch64::LDRXui  },
1682       { AArch64::LDRSBWroX, AArch64::LDRSHWroX, AArch64::LDRWroX,
1683         AArch64::LDRXroX },
1684       { AArch64::LDRSBXroX, AArch64::LDRSHXroX, AArch64::LDRSWroX,
1685         AArch64::LDRXroX },
1686       { AArch64::LDRSBWroW, AArch64::LDRSHWroW, AArch64::LDRWroW,
1687         AArch64::LDRXroW },
1688       { AArch64::LDRSBXroW, AArch64::LDRSHXroW, AArch64::LDRSWroW,
1689         AArch64::LDRXroW }
1690     },
1691     // Zero-extend.
1692     { { AArch64::LDURBBi,   AArch64::LDURHHi,   AArch64::LDURWi,
1693         AArch64::LDURXi  },
1694       { AArch64::LDURBBi,   AArch64::LDURHHi,   AArch64::LDURWi,
1695         AArch64::LDURXi  },
1696       { AArch64::LDRBBui,   AArch64::LDRHHui,   AArch64::LDRWui,
1697         AArch64::LDRXui  },
1698       { AArch64::LDRBBui,   AArch64::LDRHHui,   AArch64::LDRWui,
1699         AArch64::LDRXui  },
1700       { AArch64::LDRBBroX,  AArch64::LDRHHroX,  AArch64::LDRWroX,
1701         AArch64::LDRXroX },
1702       { AArch64::LDRBBroX,  AArch64::LDRHHroX,  AArch64::LDRWroX,
1703         AArch64::LDRXroX },
1704       { AArch64::LDRBBroW,  AArch64::LDRHHroW,  AArch64::LDRWroW,
1705         AArch64::LDRXroW },
1706       { AArch64::LDRBBroW,  AArch64::LDRHHroW,  AArch64::LDRWroW,
1707         AArch64::LDRXroW }
1708     }
1709   };
1710
1711   static const unsigned FPOpcTable[4][2] = {
1712     { AArch64::LDURSi,  AArch64::LDURDi  },
1713     { AArch64::LDRSui,  AArch64::LDRDui  },
1714     { AArch64::LDRSroX, AArch64::LDRDroX },
1715     { AArch64::LDRSroW, AArch64::LDRDroW }
1716   };
1717
1718   unsigned Opc;
1719   const TargetRegisterClass *RC;
1720   bool UseRegOffset = Addr.isRegBase() && !Addr.getOffset() && Addr.getReg() &&
1721                       Addr.getOffsetReg();
1722   unsigned Idx = UseRegOffset ? 2 : UseScaled ? 1 : 0;
1723   if (Addr.getExtendType() == AArch64_AM::UXTW ||
1724       Addr.getExtendType() == AArch64_AM::SXTW)
1725     Idx++;
1726
1727   bool IsRet64Bit = RetVT == MVT::i64;
1728   switch (VT.SimpleTy) {
1729   default:
1730     llvm_unreachable("Unexpected value type.");
1731   case MVT::i1: // Intentional fall-through.
1732   case MVT::i8:
1733     Opc = GPOpcTable[WantZExt][2 * Idx + IsRet64Bit][0];
1734     RC = (IsRet64Bit && !WantZExt) ?
1735              &AArch64::GPR64RegClass: &AArch64::GPR32RegClass;
1736     break;
1737   case MVT::i16:
1738     Opc = GPOpcTable[WantZExt][2 * Idx + IsRet64Bit][1];
1739     RC = (IsRet64Bit && !WantZExt) ?
1740              &AArch64::GPR64RegClass: &AArch64::GPR32RegClass;
1741     break;
1742   case MVT::i32:
1743     Opc = GPOpcTable[WantZExt][2 * Idx + IsRet64Bit][2];
1744     RC = (IsRet64Bit && !WantZExt) ?
1745              &AArch64::GPR64RegClass: &AArch64::GPR32RegClass;
1746     break;
1747   case MVT::i64:
1748     Opc = GPOpcTable[WantZExt][2 * Idx + IsRet64Bit][3];
1749     RC = &AArch64::GPR64RegClass;
1750     break;
1751   case MVT::f32:
1752     Opc = FPOpcTable[Idx][0];
1753     RC = &AArch64::FPR32RegClass;
1754     break;
1755   case MVT::f64:
1756     Opc = FPOpcTable[Idx][1];
1757     RC = &AArch64::FPR64RegClass;
1758     break;
1759   }
1760
1761   // Create the base instruction, then add the operands.
1762   unsigned ResultReg = createResultReg(RC);
1763   MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
1764                                     TII.get(Opc), ResultReg);
1765   addLoadStoreOperands(Addr, MIB, MachineMemOperand::MOLoad, ScaleFactor, MMO);
1766
1767   // Loading an i1 requires special handling.
1768   if (VT == MVT::i1) {
1769     unsigned ANDReg = emitAnd_ri(MVT::i32, ResultReg, /*IsKill=*/true, 1);
1770     assert(ANDReg && "Unexpected AND instruction emission failure.");
1771     ResultReg = ANDReg;
1772   }
1773
1774   // For zero-extending loads to 64bit we emit a 32bit load and then convert
1775   // the 32bit reg to a 64bit reg.
1776   if (WantZExt && RetVT == MVT::i64 && VT <= MVT::i32) {
1777     unsigned Reg64 = createResultReg(&AArch64::GPR64RegClass);
1778     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
1779             TII.get(AArch64::SUBREG_TO_REG), Reg64)
1780         .addImm(0)
1781         .addReg(ResultReg, getKillRegState(true))
1782         .addImm(AArch64::sub_32);
1783     ResultReg = Reg64;
1784   }
1785   return ResultReg;
1786 }
1787
1788 bool AArch64FastISel::selectAddSub(const Instruction *I) {
1789   MVT VT;
1790   if (!isTypeSupported(I->getType(), VT, /*IsVectorAllowed=*/true))
1791     return false;
1792
1793   if (VT.isVector())
1794     return selectOperator(I, I->getOpcode());
1795
1796   unsigned ResultReg;
1797   switch (I->getOpcode()) {
1798   default:
1799     llvm_unreachable("Unexpected instruction.");
1800   case Instruction::Add:
1801     ResultReg = emitAdd(VT, I->getOperand(0), I->getOperand(1));
1802     break;
1803   case Instruction::Sub:
1804     ResultReg = emitSub(VT, I->getOperand(0), I->getOperand(1));
1805     break;
1806   }
1807   if (!ResultReg)
1808     return false;
1809
1810   updateValueMap(I, ResultReg);
1811   return true;
1812 }
1813
1814 bool AArch64FastISel::selectLogicalOp(const Instruction *I) {
1815   MVT VT;
1816   if (!isTypeSupported(I->getType(), VT, /*IsVectorAllowed=*/true))
1817     return false;
1818
1819   if (VT.isVector())
1820     return selectOperator(I, I->getOpcode());
1821
1822   unsigned ResultReg;
1823   switch (I->getOpcode()) {
1824   default:
1825     llvm_unreachable("Unexpected instruction.");
1826   case Instruction::And:
1827     ResultReg = emitLogicalOp(ISD::AND, VT, I->getOperand(0), I->getOperand(1));
1828     break;
1829   case Instruction::Or:
1830     ResultReg = emitLogicalOp(ISD::OR, VT, I->getOperand(0), I->getOperand(1));
1831     break;
1832   case Instruction::Xor:
1833     ResultReg = emitLogicalOp(ISD::XOR, VT, I->getOperand(0), I->getOperand(1));
1834     break;
1835   }
1836   if (!ResultReg)
1837     return false;
1838
1839   updateValueMap(I, ResultReg);
1840   return true;
1841 }
1842
1843 bool AArch64FastISel::selectLoad(const Instruction *I) {
1844   MVT VT;
1845   // Verify we have a legal type before going any further.  Currently, we handle
1846   // simple types that will directly fit in a register (i32/f32/i64/f64) or
1847   // those that can be sign or zero-extended to a basic operation (i1/i8/i16).
1848   if (!isTypeSupported(I->getType(), VT, /*IsVectorAllowed=*/true) ||
1849       cast<LoadInst>(I)->isAtomic())
1850     return false;
1851
1852   // See if we can handle this address.
1853   Address Addr;
1854   if (!computeAddress(I->getOperand(0), Addr, I->getType()))
1855     return false;
1856
1857   // Fold the following sign-/zero-extend into the load instruction.
1858   bool WantZExt = true;
1859   MVT RetVT = VT;
1860   const Value *IntExtVal = nullptr;
1861   if (I->hasOneUse()) {
1862     if (const auto *ZE = dyn_cast<ZExtInst>(I->use_begin()->getUser())) {
1863       if (isTypeSupported(ZE->getType(), RetVT))
1864         IntExtVal = ZE;
1865       else
1866         RetVT = VT;
1867     } else if (const auto *SE = dyn_cast<SExtInst>(I->use_begin()->getUser())) {
1868       if (isTypeSupported(SE->getType(), RetVT))
1869         IntExtVal = SE;
1870       else
1871         RetVT = VT;
1872       WantZExt = false;
1873     }
1874   }
1875
1876   unsigned ResultReg =
1877       emitLoad(VT, RetVT, Addr, WantZExt, createMachineMemOperandFor(I));
1878   if (!ResultReg)
1879     return false;
1880
1881   // There are a few different cases we have to handle, because the load or the
1882   // sign-/zero-extend might not be selected by FastISel if we fall-back to
1883   // SelectionDAG. There is also an ordering issue when both instructions are in
1884   // different basic blocks.
1885   // 1.) The load instruction is selected by FastISel, but the integer extend
1886   //     not. This usually happens when the integer extend is in a different
1887   //     basic block and SelectionDAG took over for that basic block.
1888   // 2.) The load instruction is selected before the integer extend. This only
1889   //     happens when the integer extend is in a different basic block.
1890   // 3.) The load instruction is selected by SelectionDAG and the integer extend
1891   //     by FastISel. This happens if there are instructions between the load
1892   //     and the integer extend that couldn't be selected by FastISel.
1893   if (IntExtVal) {
1894     // The integer extend hasn't been emitted yet. FastISel or SelectionDAG
1895     // could select it. Emit a copy to subreg if necessary. FastISel will remove
1896     // it when it selects the integer extend.
1897     unsigned Reg = lookUpRegForValue(IntExtVal);
1898     if (!Reg) {
1899       if (RetVT == MVT::i64 && VT <= MVT::i32) {
1900         if (WantZExt) {
1901           // Delete the last emitted instruction from emitLoad (SUBREG_TO_REG).
1902           std::prev(FuncInfo.InsertPt)->eraseFromParent();
1903           ResultReg = std::prev(FuncInfo.InsertPt)->getOperand(0).getReg();
1904         } else
1905           ResultReg = fastEmitInst_extractsubreg(MVT::i32, ResultReg,
1906                                                  /*IsKill=*/true,
1907                                                  AArch64::sub_32);
1908       }
1909       updateValueMap(I, ResultReg);
1910       return true;
1911     }
1912
1913     // The integer extend has already been emitted - delete all the instructions
1914     // that have been emitted by the integer extend lowering code and use the
1915     // result from the load instruction directly.
1916     while (Reg) {
1917       auto *MI = MRI.getUniqueVRegDef(Reg);
1918       if (!MI)
1919         break;
1920       Reg = 0;
1921       for (auto &Opnd : MI->uses()) {
1922         if (Opnd.isReg()) {
1923           Reg = Opnd.getReg();
1924           break;
1925         }
1926       }
1927       MI->eraseFromParent();
1928     }
1929     updateValueMap(IntExtVal, ResultReg);
1930     return true;
1931   }
1932
1933   updateValueMap(I, ResultReg);
1934   return true;
1935 }
1936
1937 bool AArch64FastISel::emitStore(MVT VT, unsigned SrcReg, Address Addr,
1938                                 MachineMemOperand *MMO) {
1939   // Simplify this down to something we can handle.
1940   if (!simplifyAddress(Addr, VT))
1941     return false;
1942
1943   unsigned ScaleFactor = getImplicitScaleFactor(VT);
1944   if (!ScaleFactor)
1945     llvm_unreachable("Unexpected value type.");
1946
1947   // Negative offsets require unscaled, 9-bit, signed immediate offsets.
1948   // Otherwise, we try using scaled, 12-bit, unsigned immediate offsets.
1949   bool UseScaled = true;
1950   if ((Addr.getOffset() < 0) || (Addr.getOffset() & (ScaleFactor - 1))) {
1951     UseScaled = false;
1952     ScaleFactor = 1;
1953   }
1954
1955   static const unsigned OpcTable[4][6] = {
1956     { AArch64::STURBBi,  AArch64::STURHHi,  AArch64::STURWi,  AArch64::STURXi,
1957       AArch64::STURSi,   AArch64::STURDi },
1958     { AArch64::STRBBui,  AArch64::STRHHui,  AArch64::STRWui,  AArch64::STRXui,
1959       AArch64::STRSui,   AArch64::STRDui },
1960     { AArch64::STRBBroX, AArch64::STRHHroX, AArch64::STRWroX, AArch64::STRXroX,
1961       AArch64::STRSroX,  AArch64::STRDroX },
1962     { AArch64::STRBBroW, AArch64::STRHHroW, AArch64::STRWroW, AArch64::STRXroW,
1963       AArch64::STRSroW,  AArch64::STRDroW }
1964   };
1965
1966   unsigned Opc;
1967   bool VTIsi1 = false;
1968   bool UseRegOffset = Addr.isRegBase() && !Addr.getOffset() && Addr.getReg() &&
1969                       Addr.getOffsetReg();
1970   unsigned Idx = UseRegOffset ? 2 : UseScaled ? 1 : 0;
1971   if (Addr.getExtendType() == AArch64_AM::UXTW ||
1972       Addr.getExtendType() == AArch64_AM::SXTW)
1973     Idx++;
1974
1975   switch (VT.SimpleTy) {
1976   default: llvm_unreachable("Unexpected value type.");
1977   case MVT::i1:  VTIsi1 = true;
1978   case MVT::i8:  Opc = OpcTable[Idx][0]; break;
1979   case MVT::i16: Opc = OpcTable[Idx][1]; break;
1980   case MVT::i32: Opc = OpcTable[Idx][2]; break;
1981   case MVT::i64: Opc = OpcTable[Idx][3]; break;
1982   case MVT::f32: Opc = OpcTable[Idx][4]; break;
1983   case MVT::f64: Opc = OpcTable[Idx][5]; break;
1984   }
1985
1986   // Storing an i1 requires special handling.
1987   if (VTIsi1 && SrcReg != AArch64::WZR) {
1988     unsigned ANDReg = emitAnd_ri(MVT::i32, SrcReg, /*TODO:IsKill=*/false, 1);
1989     assert(ANDReg && "Unexpected AND instruction emission failure.");
1990     SrcReg = ANDReg;
1991   }
1992   // Create the base instruction, then add the operands.
1993   const MCInstrDesc &II = TII.get(Opc);
1994   SrcReg = constrainOperandRegClass(II, SrcReg, II.getNumDefs());
1995   MachineInstrBuilder MIB =
1996       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II).addReg(SrcReg);
1997   addLoadStoreOperands(Addr, MIB, MachineMemOperand::MOStore, ScaleFactor, MMO);
1998
1999   return true;
2000 }
2001
2002 bool AArch64FastISel::selectStore(const Instruction *I) {
2003   MVT VT;
2004   const Value *Op0 = I->getOperand(0);
2005   // Verify we have a legal type before going any further.  Currently, we handle
2006   // simple types that will directly fit in a register (i32/f32/i64/f64) or
2007   // those that can be sign or zero-extended to a basic operation (i1/i8/i16).
2008   if (!isTypeSupported(Op0->getType(), VT, /*IsVectorAllowed=*/true) ||
2009       cast<StoreInst>(I)->isAtomic())
2010     return false;
2011
2012   // Get the value to be stored into a register. Use the zero register directly
2013   // when possible to avoid an unnecessary copy and a wasted register.
2014   unsigned SrcReg = 0;
2015   if (const auto *CI = dyn_cast<ConstantInt>(Op0)) {
2016     if (CI->isZero())
2017       SrcReg = (VT == MVT::i64) ? AArch64::XZR : AArch64::WZR;
2018   } else if (const auto *CF = dyn_cast<ConstantFP>(Op0)) {
2019     if (CF->isZero() && !CF->isNegative()) {
2020       VT = MVT::getIntegerVT(VT.getSizeInBits());
2021       SrcReg = (VT == MVT::i64) ? AArch64::XZR : AArch64::WZR;
2022     }
2023   }
2024
2025   if (!SrcReg)
2026     SrcReg = getRegForValue(Op0);
2027
2028   if (!SrcReg)
2029     return false;
2030
2031   // See if we can handle this address.
2032   Address Addr;
2033   if (!computeAddress(I->getOperand(1), Addr, I->getOperand(0)->getType()))
2034     return false;
2035
2036   if (!emitStore(VT, SrcReg, Addr, createMachineMemOperandFor(I)))
2037     return false;
2038   return true;
2039 }
2040
2041 static AArch64CC::CondCode getCompareCC(CmpInst::Predicate Pred) {
2042   switch (Pred) {
2043   case CmpInst::FCMP_ONE:
2044   case CmpInst::FCMP_UEQ:
2045   default:
2046     // AL is our "false" for now. The other two need more compares.
2047     return AArch64CC::AL;
2048   case CmpInst::ICMP_EQ:
2049   case CmpInst::FCMP_OEQ:
2050     return AArch64CC::EQ;
2051   case CmpInst::ICMP_SGT:
2052   case CmpInst::FCMP_OGT:
2053     return AArch64CC::GT;
2054   case CmpInst::ICMP_SGE:
2055   case CmpInst::FCMP_OGE:
2056     return AArch64CC::GE;
2057   case CmpInst::ICMP_UGT:
2058   case CmpInst::FCMP_UGT:
2059     return AArch64CC::HI;
2060   case CmpInst::FCMP_OLT:
2061     return AArch64CC::MI;
2062   case CmpInst::ICMP_ULE:
2063   case CmpInst::FCMP_OLE:
2064     return AArch64CC::LS;
2065   case CmpInst::FCMP_ORD:
2066     return AArch64CC::VC;
2067   case CmpInst::FCMP_UNO:
2068     return AArch64CC::VS;
2069   case CmpInst::FCMP_UGE:
2070     return AArch64CC::PL;
2071   case CmpInst::ICMP_SLT:
2072   case CmpInst::FCMP_ULT:
2073     return AArch64CC::LT;
2074   case CmpInst::ICMP_SLE:
2075   case CmpInst::FCMP_ULE:
2076     return AArch64CC::LE;
2077   case CmpInst::FCMP_UNE:
2078   case CmpInst::ICMP_NE:
2079     return AArch64CC::NE;
2080   case CmpInst::ICMP_UGE:
2081     return AArch64CC::HS;
2082   case CmpInst::ICMP_ULT:
2083     return AArch64CC::LO;
2084   }
2085 }
2086
2087 /// \brief Try to emit a combined compare-and-branch instruction.
2088 bool AArch64FastISel::emitCompareAndBranch(const BranchInst *BI) {
2089   assert(isa<CmpInst>(BI->getCondition()) && "Expected cmp instruction");
2090   const CmpInst *CI = cast<CmpInst>(BI->getCondition());
2091   CmpInst::Predicate Predicate = optimizeCmpPredicate(CI);
2092
2093   const Value *LHS = CI->getOperand(0);
2094   const Value *RHS = CI->getOperand(1);
2095
2096   MVT VT;
2097   if (!isTypeSupported(LHS->getType(), VT))
2098     return false;
2099
2100   unsigned BW = VT.getSizeInBits();
2101   if (BW > 64)
2102     return false;
2103
2104   MachineBasicBlock *TBB = FuncInfo.MBBMap[BI->getSuccessor(0)];
2105   MachineBasicBlock *FBB = FuncInfo.MBBMap[BI->getSuccessor(1)];
2106
2107   // Try to take advantage of fallthrough opportunities.
2108   if (FuncInfo.MBB->isLayoutSuccessor(TBB)) {
2109     std::swap(TBB, FBB);
2110     Predicate = CmpInst::getInversePredicate(Predicate);
2111   }
2112
2113   int TestBit = -1;
2114   bool IsCmpNE;
2115   switch (Predicate) {
2116   default:
2117     return false;
2118   case CmpInst::ICMP_EQ:
2119   case CmpInst::ICMP_NE:
2120     if (isa<Constant>(LHS) && cast<Constant>(LHS)->isNullValue())
2121       std::swap(LHS, RHS);
2122
2123     if (!isa<Constant>(RHS) || !cast<Constant>(RHS)->isNullValue())
2124       return false;
2125
2126     if (const auto *AI = dyn_cast<BinaryOperator>(LHS))
2127       if (AI->getOpcode() == Instruction::And && isValueAvailable(AI)) {
2128         const Value *AndLHS = AI->getOperand(0);
2129         const Value *AndRHS = AI->getOperand(1);
2130
2131         if (const auto *C = dyn_cast<ConstantInt>(AndLHS))
2132           if (C->getValue().isPowerOf2())
2133             std::swap(AndLHS, AndRHS);
2134
2135         if (const auto *C = dyn_cast<ConstantInt>(AndRHS))
2136           if (C->getValue().isPowerOf2()) {
2137             TestBit = C->getValue().logBase2();
2138             LHS = AndLHS;
2139           }
2140       }
2141
2142     if (VT == MVT::i1)
2143       TestBit = 0;
2144
2145     IsCmpNE = Predicate == CmpInst::ICMP_NE;
2146     break;
2147   case CmpInst::ICMP_SLT:
2148   case CmpInst::ICMP_SGE:
2149     if (!isa<Constant>(RHS) || !cast<Constant>(RHS)->isNullValue())
2150       return false;
2151
2152     TestBit = BW - 1;
2153     IsCmpNE = Predicate == CmpInst::ICMP_SLT;
2154     break;
2155   case CmpInst::ICMP_SGT:
2156   case CmpInst::ICMP_SLE:
2157     if (!isa<ConstantInt>(RHS))
2158       return false;
2159
2160     if (cast<ConstantInt>(RHS)->getValue() != APInt(BW, -1, true))
2161       return false;
2162
2163     TestBit = BW - 1;
2164     IsCmpNE = Predicate == CmpInst::ICMP_SLE;
2165     break;
2166   } // end switch
2167
2168   static const unsigned OpcTable[2][2][2] = {
2169     { {AArch64::CBZW,  AArch64::CBZX },
2170       {AArch64::CBNZW, AArch64::CBNZX} },
2171     { {AArch64::TBZW,  AArch64::TBZX },
2172       {AArch64::TBNZW, AArch64::TBNZX} }
2173   };
2174
2175   bool IsBitTest = TestBit != -1;
2176   bool Is64Bit = BW == 64;
2177   if (TestBit < 32 && TestBit >= 0)
2178     Is64Bit = false;
2179
2180   unsigned Opc = OpcTable[IsBitTest][IsCmpNE][Is64Bit];
2181   const MCInstrDesc &II = TII.get(Opc);
2182
2183   unsigned SrcReg = getRegForValue(LHS);
2184   if (!SrcReg)
2185     return false;
2186   bool SrcIsKill = hasTrivialKill(LHS);
2187
2188   if (BW == 64 && !Is64Bit)
2189     SrcReg = fastEmitInst_extractsubreg(MVT::i32, SrcReg, SrcIsKill,
2190                                         AArch64::sub_32);
2191
2192   if ((BW < 32) && !IsBitTest)
2193     SrcReg = emitIntExt(VT, SrcReg, MVT::i32, /*IsZExt=*/true);
2194
2195   // Emit the combined compare and branch instruction.
2196   SrcReg = constrainOperandRegClass(II, SrcReg,  II.getNumDefs());
2197   MachineInstrBuilder MIB =
2198       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc))
2199           .addReg(SrcReg, getKillRegState(SrcIsKill));
2200   if (IsBitTest)
2201     MIB.addImm(TestBit);
2202   MIB.addMBB(TBB);
2203
2204   // Obtain the branch weight and add the TrueBB to the successor list.
2205   uint32_t BranchWeight = 0;
2206   if (FuncInfo.BPI)
2207     BranchWeight = FuncInfo.BPI->getEdgeWeight(BI->getParent(),
2208                                                TBB->getBasicBlock());
2209   FuncInfo.MBB->addSuccessor(TBB, BranchWeight);
2210   fastEmitBranch(FBB, DbgLoc);
2211
2212   return true;
2213 }
2214
2215 bool AArch64FastISel::selectBranch(const Instruction *I) {
2216   const BranchInst *BI = cast<BranchInst>(I);
2217   if (BI->isUnconditional()) {
2218     MachineBasicBlock *MSucc = FuncInfo.MBBMap[BI->getSuccessor(0)];
2219     fastEmitBranch(MSucc, BI->getDebugLoc());
2220     return true;
2221   }
2222
2223   MachineBasicBlock *TBB = FuncInfo.MBBMap[BI->getSuccessor(0)];
2224   MachineBasicBlock *FBB = FuncInfo.MBBMap[BI->getSuccessor(1)];
2225
2226   AArch64CC::CondCode CC = AArch64CC::NE;
2227   if (const CmpInst *CI = dyn_cast<CmpInst>(BI->getCondition())) {
2228     if (CI->hasOneUse() && isValueAvailable(CI)) {
2229       // Try to optimize or fold the cmp.
2230       CmpInst::Predicate Predicate = optimizeCmpPredicate(CI);
2231       switch (Predicate) {
2232       default:
2233         break;
2234       case CmpInst::FCMP_FALSE:
2235         fastEmitBranch(FBB, DbgLoc);
2236         return true;
2237       case CmpInst::FCMP_TRUE:
2238         fastEmitBranch(TBB, DbgLoc);
2239         return true;
2240       }
2241
2242       // Try to emit a combined compare-and-branch first.
2243       if (emitCompareAndBranch(BI))
2244         return true;
2245
2246       // Try to take advantage of fallthrough opportunities.
2247       if (FuncInfo.MBB->isLayoutSuccessor(TBB)) {
2248         std::swap(TBB, FBB);
2249         Predicate = CmpInst::getInversePredicate(Predicate);
2250       }
2251
2252       // Emit the cmp.
2253       if (!emitCmp(CI->getOperand(0), CI->getOperand(1), CI->isUnsigned()))
2254         return false;
2255
2256       // FCMP_UEQ and FCMP_ONE cannot be checked with a single branch
2257       // instruction.
2258       CC = getCompareCC(Predicate);
2259       AArch64CC::CondCode ExtraCC = AArch64CC::AL;
2260       switch (Predicate) {
2261       default:
2262         break;
2263       case CmpInst::FCMP_UEQ:
2264         ExtraCC = AArch64CC::EQ;
2265         CC = AArch64CC::VS;
2266         break;
2267       case CmpInst::FCMP_ONE:
2268         ExtraCC = AArch64CC::MI;
2269         CC = AArch64CC::GT;
2270         break;
2271       }
2272       assert((CC != AArch64CC::AL) && "Unexpected condition code.");
2273
2274       // Emit the extra branch for FCMP_UEQ and FCMP_ONE.
2275       if (ExtraCC != AArch64CC::AL) {
2276         BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::Bcc))
2277             .addImm(ExtraCC)
2278             .addMBB(TBB);
2279       }
2280
2281       // Emit the branch.
2282       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::Bcc))
2283           .addImm(CC)
2284           .addMBB(TBB);
2285
2286       // Obtain the branch weight and add the TrueBB to the successor list.
2287       uint32_t BranchWeight = 0;
2288       if (FuncInfo.BPI)
2289         BranchWeight = FuncInfo.BPI->getEdgeWeight(BI->getParent(),
2290                                                   TBB->getBasicBlock());
2291       FuncInfo.MBB->addSuccessor(TBB, BranchWeight);
2292
2293       fastEmitBranch(FBB, DbgLoc);
2294       return true;
2295     }
2296   } else if (TruncInst *TI = dyn_cast<TruncInst>(BI->getCondition())) {
2297     MVT SrcVT;
2298     if (TI->hasOneUse() && isValueAvailable(TI) &&
2299         isTypeSupported(TI->getOperand(0)->getType(), SrcVT)) {
2300       unsigned CondReg = getRegForValue(TI->getOperand(0));
2301       if (!CondReg)
2302         return false;
2303       bool CondIsKill = hasTrivialKill(TI->getOperand(0));
2304
2305       // Issue an extract_subreg to get the lower 32-bits.
2306       if (SrcVT == MVT::i64) {
2307         CondReg = fastEmitInst_extractsubreg(MVT::i32, CondReg, CondIsKill,
2308                                              AArch64::sub_32);
2309         CondIsKill = true;
2310       }
2311
2312       unsigned ANDReg = emitAnd_ri(MVT::i32, CondReg, CondIsKill, 1);
2313       assert(ANDReg && "Unexpected AND instruction emission failure.");
2314       emitICmp_ri(MVT::i32, ANDReg, /*IsKill=*/true, 0);
2315
2316       if (FuncInfo.MBB->isLayoutSuccessor(TBB)) {
2317         std::swap(TBB, FBB);
2318         CC = AArch64CC::EQ;
2319       }
2320       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::Bcc))
2321           .addImm(CC)
2322           .addMBB(TBB);
2323
2324       // Obtain the branch weight and add the TrueBB to the successor list.
2325       uint32_t BranchWeight = 0;
2326       if (FuncInfo.BPI)
2327         BranchWeight = FuncInfo.BPI->getEdgeWeight(BI->getParent(),
2328                                                   TBB->getBasicBlock());
2329       FuncInfo.MBB->addSuccessor(TBB, BranchWeight);
2330
2331       fastEmitBranch(FBB, DbgLoc);
2332       return true;
2333     }
2334   } else if (const auto *CI = dyn_cast<ConstantInt>(BI->getCondition())) {
2335     uint64_t Imm = CI->getZExtValue();
2336     MachineBasicBlock *Target = (Imm == 0) ? FBB : TBB;
2337     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::B))
2338         .addMBB(Target);
2339
2340     // Obtain the branch weight and add the target to the successor list.
2341     uint32_t BranchWeight = 0;
2342     if (FuncInfo.BPI)
2343       BranchWeight = FuncInfo.BPI->getEdgeWeight(BI->getParent(),
2344                                                  Target->getBasicBlock());
2345     FuncInfo.MBB->addSuccessor(Target, BranchWeight);
2346     return true;
2347   } else if (foldXALUIntrinsic(CC, I, BI->getCondition())) {
2348     // Fake request the condition, otherwise the intrinsic might be completely
2349     // optimized away.
2350     unsigned CondReg = getRegForValue(BI->getCondition());
2351     if (!CondReg)
2352       return false;
2353
2354     // Emit the branch.
2355     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::Bcc))
2356       .addImm(CC)
2357       .addMBB(TBB);
2358
2359     // Obtain the branch weight and add the TrueBB to the successor list.
2360     uint32_t BranchWeight = 0;
2361     if (FuncInfo.BPI)
2362       BranchWeight = FuncInfo.BPI->getEdgeWeight(BI->getParent(),
2363                                                  TBB->getBasicBlock());
2364     FuncInfo.MBB->addSuccessor(TBB, BranchWeight);
2365
2366     fastEmitBranch(FBB, DbgLoc);
2367     return true;
2368   }
2369
2370   unsigned CondReg = getRegForValue(BI->getCondition());
2371   if (CondReg == 0)
2372     return false;
2373   bool CondRegIsKill = hasTrivialKill(BI->getCondition());
2374
2375   // We've been divorced from our compare!  Our block was split, and
2376   // now our compare lives in a predecessor block.  We musn't
2377   // re-compare here, as the children of the compare aren't guaranteed
2378   // live across the block boundary (we *could* check for this).
2379   // Regardless, the compare has been done in the predecessor block,
2380   // and it left a value for us in a virtual register.  Ergo, we test
2381   // the one-bit value left in the virtual register.
2382   emitICmp_ri(MVT::i32, CondReg, CondRegIsKill, 0);
2383
2384   if (FuncInfo.MBB->isLayoutSuccessor(TBB)) {
2385     std::swap(TBB, FBB);
2386     CC = AArch64CC::EQ;
2387   }
2388
2389   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::Bcc))
2390       .addImm(CC)
2391       .addMBB(TBB);
2392
2393   // Obtain the branch weight and add the TrueBB to the successor list.
2394   uint32_t BranchWeight = 0;
2395   if (FuncInfo.BPI)
2396     BranchWeight = FuncInfo.BPI->getEdgeWeight(BI->getParent(),
2397                                                TBB->getBasicBlock());
2398   FuncInfo.MBB->addSuccessor(TBB, BranchWeight);
2399
2400   fastEmitBranch(FBB, DbgLoc);
2401   return true;
2402 }
2403
2404 bool AArch64FastISel::selectIndirectBr(const Instruction *I) {
2405   const IndirectBrInst *BI = cast<IndirectBrInst>(I);
2406   unsigned AddrReg = getRegForValue(BI->getOperand(0));
2407   if (AddrReg == 0)
2408     return false;
2409
2410   // Emit the indirect branch.
2411   const MCInstrDesc &II = TII.get(AArch64::BR);
2412   AddrReg = constrainOperandRegClass(II, AddrReg,  II.getNumDefs());
2413   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II).addReg(AddrReg);
2414
2415   // Make sure the CFG is up-to-date.
2416   for (unsigned i = 0, e = BI->getNumSuccessors(); i != e; ++i)
2417     FuncInfo.MBB->addSuccessor(FuncInfo.MBBMap[BI->getSuccessor(i)]);
2418
2419   return true;
2420 }
2421
2422 bool AArch64FastISel::selectCmp(const Instruction *I) {
2423   const CmpInst *CI = cast<CmpInst>(I);
2424
2425   // Try to optimize or fold the cmp.
2426   CmpInst::Predicate Predicate = optimizeCmpPredicate(CI);
2427   unsigned ResultReg = 0;
2428   switch (Predicate) {
2429   default:
2430     break;
2431   case CmpInst::FCMP_FALSE:
2432     ResultReg = createResultReg(&AArch64::GPR32RegClass);
2433     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2434             TII.get(TargetOpcode::COPY), ResultReg)
2435         .addReg(AArch64::WZR, getKillRegState(true));
2436     break;
2437   case CmpInst::FCMP_TRUE:
2438     ResultReg = fastEmit_i(MVT::i32, MVT::i32, ISD::Constant, 1);
2439     break;
2440   }
2441
2442   if (ResultReg) {
2443     updateValueMap(I, ResultReg);
2444     return true;
2445   }
2446
2447   // Emit the cmp.
2448   if (!emitCmp(CI->getOperand(0), CI->getOperand(1), CI->isUnsigned()))
2449     return false;
2450
2451   ResultReg = createResultReg(&AArch64::GPR32RegClass);
2452
2453   // FCMP_UEQ and FCMP_ONE cannot be checked with a single instruction. These
2454   // condition codes are inverted, because they are used by CSINC.
2455   static unsigned CondCodeTable[2][2] = {
2456     { AArch64CC::NE, AArch64CC::VC },
2457     { AArch64CC::PL, AArch64CC::LE }
2458   };
2459   unsigned *CondCodes = nullptr;
2460   switch (Predicate) {
2461   default:
2462     break;
2463   case CmpInst::FCMP_UEQ:
2464     CondCodes = &CondCodeTable[0][0];
2465     break;
2466   case CmpInst::FCMP_ONE:
2467     CondCodes = &CondCodeTable[1][0];
2468     break;
2469   }
2470
2471   if (CondCodes) {
2472     unsigned TmpReg1 = createResultReg(&AArch64::GPR32RegClass);
2473     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::CSINCWr),
2474             TmpReg1)
2475         .addReg(AArch64::WZR, getKillRegState(true))
2476         .addReg(AArch64::WZR, getKillRegState(true))
2477         .addImm(CondCodes[0]);
2478     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::CSINCWr),
2479             ResultReg)
2480         .addReg(TmpReg1, getKillRegState(true))
2481         .addReg(AArch64::WZR, getKillRegState(true))
2482         .addImm(CondCodes[1]);
2483
2484     updateValueMap(I, ResultReg);
2485     return true;
2486   }
2487
2488   // Now set a register based on the comparison.
2489   AArch64CC::CondCode CC = getCompareCC(Predicate);
2490   assert((CC != AArch64CC::AL) && "Unexpected condition code.");
2491   AArch64CC::CondCode invertedCC = getInvertedCondCode(CC);
2492   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::CSINCWr),
2493           ResultReg)
2494       .addReg(AArch64::WZR, getKillRegState(true))
2495       .addReg(AArch64::WZR, getKillRegState(true))
2496       .addImm(invertedCC);
2497
2498   updateValueMap(I, ResultReg);
2499   return true;
2500 }
2501
2502 /// \brief Optimize selects of i1 if one of the operands has a 'true' or 'false'
2503 /// value.
2504 bool AArch64FastISel::optimizeSelect(const SelectInst *SI) {
2505   if (!SI->getType()->isIntegerTy(1))
2506     return false;
2507
2508   const Value *Src1Val, *Src2Val;
2509   unsigned Opc = 0;
2510   bool NeedExtraOp = false;
2511   if (auto *CI = dyn_cast<ConstantInt>(SI->getTrueValue())) {
2512     if (CI->isOne()) {
2513       Src1Val = SI->getCondition();
2514       Src2Val = SI->getFalseValue();
2515       Opc = AArch64::ORRWrr;
2516     } else {
2517       assert(CI->isZero());
2518       Src1Val = SI->getFalseValue();
2519       Src2Val = SI->getCondition();
2520       Opc = AArch64::BICWrr;
2521     }
2522   } else if (auto *CI = dyn_cast<ConstantInt>(SI->getFalseValue())) {
2523     if (CI->isOne()) {
2524       Src1Val = SI->getCondition();
2525       Src2Val = SI->getTrueValue();
2526       Opc = AArch64::ORRWrr;
2527       NeedExtraOp = true;
2528     } else {
2529       assert(CI->isZero());
2530       Src1Val = SI->getCondition();
2531       Src2Val = SI->getTrueValue();
2532       Opc = AArch64::ANDWrr;
2533     }
2534   }
2535
2536   if (!Opc)
2537     return false;
2538
2539   unsigned Src1Reg = getRegForValue(Src1Val);
2540   if (!Src1Reg)
2541     return false;
2542   bool Src1IsKill = hasTrivialKill(Src1Val);
2543
2544   unsigned Src2Reg = getRegForValue(Src2Val);
2545   if (!Src2Reg)
2546     return false;
2547   bool Src2IsKill = hasTrivialKill(Src2Val);
2548
2549   if (NeedExtraOp) {
2550     Src1Reg = emitLogicalOp_ri(ISD::XOR, MVT::i32, Src1Reg, Src1IsKill, 1);
2551     Src1IsKill = true;
2552   }
2553   unsigned ResultReg = fastEmitInst_rr(Opc, &AArch64::GPR32spRegClass, Src1Reg,
2554                                        Src1IsKill, Src2Reg, Src2IsKill);
2555   updateValueMap(SI, ResultReg);
2556   return true;
2557 }
2558
2559 bool AArch64FastISel::selectSelect(const Instruction *I) {
2560   assert(isa<SelectInst>(I) && "Expected a select instruction.");
2561   MVT VT;
2562   if (!isTypeSupported(I->getType(), VT))
2563     return false;
2564
2565   unsigned Opc;
2566   const TargetRegisterClass *RC;
2567   switch (VT.SimpleTy) {
2568   default:
2569     return false;
2570   case MVT::i1:
2571   case MVT::i8:
2572   case MVT::i16:
2573   case MVT::i32:
2574     Opc = AArch64::CSELWr;
2575     RC = &AArch64::GPR32RegClass;
2576     break;
2577   case MVT::i64:
2578     Opc = AArch64::CSELXr;
2579     RC = &AArch64::GPR64RegClass;
2580     break;
2581   case MVT::f32:
2582     Opc = AArch64::FCSELSrrr;
2583     RC = &AArch64::FPR32RegClass;
2584     break;
2585   case MVT::f64:
2586     Opc = AArch64::FCSELDrrr;
2587     RC = &AArch64::FPR64RegClass;
2588     break;
2589   }
2590
2591   const SelectInst *SI = cast<SelectInst>(I);
2592   const Value *Cond = SI->getCondition();
2593   AArch64CC::CondCode CC = AArch64CC::NE;
2594   AArch64CC::CondCode ExtraCC = AArch64CC::AL;
2595
2596   if (optimizeSelect(SI))
2597     return true;
2598
2599   // Try to pickup the flags, so we don't have to emit another compare.
2600   if (foldXALUIntrinsic(CC, I, Cond)) {
2601     // Fake request the condition to force emission of the XALU intrinsic.
2602     unsigned CondReg = getRegForValue(Cond);
2603     if (!CondReg)
2604       return false;
2605   } else if (isa<CmpInst>(Cond) && cast<CmpInst>(Cond)->hasOneUse() &&
2606              isValueAvailable(Cond)) {
2607     const auto *Cmp = cast<CmpInst>(Cond);
2608     // Try to optimize or fold the cmp.
2609     CmpInst::Predicate Predicate = optimizeCmpPredicate(Cmp);
2610     const Value *FoldSelect = nullptr;
2611     switch (Predicate) {
2612     default:
2613       break;
2614     case CmpInst::FCMP_FALSE:
2615       FoldSelect = SI->getFalseValue();
2616       break;
2617     case CmpInst::FCMP_TRUE:
2618       FoldSelect = SI->getTrueValue();
2619       break;
2620     }
2621
2622     if (FoldSelect) {
2623       unsigned SrcReg = getRegForValue(FoldSelect);
2624       if (!SrcReg)
2625         return false;
2626       unsigned UseReg = lookUpRegForValue(SI);
2627       if (UseReg)
2628         MRI.clearKillFlags(UseReg);
2629
2630       updateValueMap(I, SrcReg);
2631       return true;
2632     }
2633
2634     // Emit the cmp.
2635     if (!emitCmp(Cmp->getOperand(0), Cmp->getOperand(1), Cmp->isUnsigned()))
2636       return false;
2637
2638     // FCMP_UEQ and FCMP_ONE cannot be checked with a single select instruction.
2639     CC = getCompareCC(Predicate);
2640     switch (Predicate) {
2641     default:
2642       break;
2643     case CmpInst::FCMP_UEQ:
2644       ExtraCC = AArch64CC::EQ;
2645       CC = AArch64CC::VS;
2646       break;
2647     case CmpInst::FCMP_ONE:
2648       ExtraCC = AArch64CC::MI;
2649       CC = AArch64CC::GT;
2650       break;
2651     }
2652     assert((CC != AArch64CC::AL) && "Unexpected condition code.");
2653   } else {
2654     unsigned CondReg = getRegForValue(Cond);
2655     if (!CondReg)
2656       return false;
2657     bool CondIsKill = hasTrivialKill(Cond);
2658
2659     // Emit a TST instruction (ANDS wzr, reg, #imm).
2660     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ANDSWri),
2661             AArch64::WZR)
2662         .addReg(CondReg, getKillRegState(CondIsKill))
2663         .addImm(AArch64_AM::encodeLogicalImmediate(1, 32));
2664   }
2665
2666   unsigned Src1Reg = getRegForValue(SI->getTrueValue());
2667   bool Src1IsKill = hasTrivialKill(SI->getTrueValue());
2668
2669   unsigned Src2Reg = getRegForValue(SI->getFalseValue());
2670   bool Src2IsKill = hasTrivialKill(SI->getFalseValue());
2671
2672   if (!Src1Reg || !Src2Reg)
2673     return false;
2674
2675   if (ExtraCC != AArch64CC::AL) {
2676     Src2Reg = fastEmitInst_rri(Opc, RC, Src1Reg, Src1IsKill, Src2Reg,
2677                                Src2IsKill, ExtraCC);
2678     Src2IsKill = true;
2679   }
2680   unsigned ResultReg = fastEmitInst_rri(Opc, RC, Src1Reg, Src1IsKill, Src2Reg,
2681                                         Src2IsKill, CC);
2682   updateValueMap(I, ResultReg);
2683   return true;
2684 }
2685
2686 bool AArch64FastISel::selectFPExt(const Instruction *I) {
2687   Value *V = I->getOperand(0);
2688   if (!I->getType()->isDoubleTy() || !V->getType()->isFloatTy())
2689     return false;
2690
2691   unsigned Op = getRegForValue(V);
2692   if (Op == 0)
2693     return false;
2694
2695   unsigned ResultReg = createResultReg(&AArch64::FPR64RegClass);
2696   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::FCVTDSr),
2697           ResultReg).addReg(Op);
2698   updateValueMap(I, ResultReg);
2699   return true;
2700 }
2701
2702 bool AArch64FastISel::selectFPTrunc(const Instruction *I) {
2703   Value *V = I->getOperand(0);
2704   if (!I->getType()->isFloatTy() || !V->getType()->isDoubleTy())
2705     return false;
2706
2707   unsigned Op = getRegForValue(V);
2708   if (Op == 0)
2709     return false;
2710
2711   unsigned ResultReg = createResultReg(&AArch64::FPR32RegClass);
2712   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::FCVTSDr),
2713           ResultReg).addReg(Op);
2714   updateValueMap(I, ResultReg);
2715   return true;
2716 }
2717
2718 // FPToUI and FPToSI
2719 bool AArch64FastISel::selectFPToInt(const Instruction *I, bool Signed) {
2720   MVT DestVT;
2721   if (!isTypeLegal(I->getType(), DestVT) || DestVT.isVector())
2722     return false;
2723
2724   unsigned SrcReg = getRegForValue(I->getOperand(0));
2725   if (SrcReg == 0)
2726     return false;
2727
2728   EVT SrcVT = TLI.getValueType(I->getOperand(0)->getType(), true);
2729   if (SrcVT == MVT::f128)
2730     return false;
2731
2732   unsigned Opc;
2733   if (SrcVT == MVT::f64) {
2734     if (Signed)
2735       Opc = (DestVT == MVT::i32) ? AArch64::FCVTZSUWDr : AArch64::FCVTZSUXDr;
2736     else
2737       Opc = (DestVT == MVT::i32) ? AArch64::FCVTZUUWDr : AArch64::FCVTZUUXDr;
2738   } else {
2739     if (Signed)
2740       Opc = (DestVT == MVT::i32) ? AArch64::FCVTZSUWSr : AArch64::FCVTZSUXSr;
2741     else
2742       Opc = (DestVT == MVT::i32) ? AArch64::FCVTZUUWSr : AArch64::FCVTZUUXSr;
2743   }
2744   unsigned ResultReg = createResultReg(
2745       DestVT == MVT::i32 ? &AArch64::GPR32RegClass : &AArch64::GPR64RegClass);
2746   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg)
2747       .addReg(SrcReg);
2748   updateValueMap(I, ResultReg);
2749   return true;
2750 }
2751
2752 bool AArch64FastISel::selectIntToFP(const Instruction *I, bool Signed) {
2753   MVT DestVT;
2754   if (!isTypeLegal(I->getType(), DestVT) || DestVT.isVector())
2755     return false;
2756   assert ((DestVT == MVT::f32 || DestVT == MVT::f64) &&
2757           "Unexpected value type.");
2758
2759   unsigned SrcReg = getRegForValue(I->getOperand(0));
2760   if (!SrcReg)
2761     return false;
2762   bool SrcIsKill = hasTrivialKill(I->getOperand(0));
2763
2764   EVT SrcVT = TLI.getValueType(I->getOperand(0)->getType(), true);
2765
2766   // Handle sign-extension.
2767   if (SrcVT == MVT::i16 || SrcVT == MVT::i8 || SrcVT == MVT::i1) {
2768     SrcReg =
2769         emitIntExt(SrcVT.getSimpleVT(), SrcReg, MVT::i32, /*isZExt*/ !Signed);
2770     if (!SrcReg)
2771       return false;
2772     SrcIsKill = true;
2773   }
2774
2775   unsigned Opc;
2776   if (SrcVT == MVT::i64) {
2777     if (Signed)
2778       Opc = (DestVT == MVT::f32) ? AArch64::SCVTFUXSri : AArch64::SCVTFUXDri;
2779     else
2780       Opc = (DestVT == MVT::f32) ? AArch64::UCVTFUXSri : AArch64::UCVTFUXDri;
2781   } else {
2782     if (Signed)
2783       Opc = (DestVT == MVT::f32) ? AArch64::SCVTFUWSri : AArch64::SCVTFUWDri;
2784     else
2785       Opc = (DestVT == MVT::f32) ? AArch64::UCVTFUWSri : AArch64::UCVTFUWDri;
2786   }
2787
2788   unsigned ResultReg = fastEmitInst_r(Opc, TLI.getRegClassFor(DestVT), SrcReg,
2789                                       SrcIsKill);
2790   updateValueMap(I, ResultReg);
2791   return true;
2792 }
2793
2794 bool AArch64FastISel::fastLowerArguments() {
2795   if (!FuncInfo.CanLowerReturn)
2796     return false;
2797
2798   const Function *F = FuncInfo.Fn;
2799   if (F->isVarArg())
2800     return false;
2801
2802   CallingConv::ID CC = F->getCallingConv();
2803   if (CC != CallingConv::C)
2804     return false;
2805
2806   // Only handle simple cases of up to 8 GPR and FPR each.
2807   unsigned GPRCnt = 0;
2808   unsigned FPRCnt = 0;
2809   unsigned Idx = 0;
2810   for (auto const &Arg : F->args()) {
2811     // The first argument is at index 1.
2812     ++Idx;
2813     if (F->getAttributes().hasAttribute(Idx, Attribute::ByVal) ||
2814         F->getAttributes().hasAttribute(Idx, Attribute::InReg) ||
2815         F->getAttributes().hasAttribute(Idx, Attribute::StructRet) ||
2816         F->getAttributes().hasAttribute(Idx, Attribute::Nest))
2817       return false;
2818
2819     Type *ArgTy = Arg.getType();
2820     if (ArgTy->isStructTy() || ArgTy->isArrayTy())
2821       return false;
2822
2823     EVT ArgVT = TLI.getValueType(ArgTy);
2824     if (!ArgVT.isSimple())
2825       return false;
2826
2827     MVT VT = ArgVT.getSimpleVT().SimpleTy;
2828     if (VT.isFloatingPoint() && !Subtarget->hasFPARMv8())
2829       return false;
2830
2831     if (VT.isVector() &&
2832         (!Subtarget->hasNEON() || !Subtarget->isLittleEndian()))
2833       return false;
2834
2835     if (VT >= MVT::i1 && VT <= MVT::i64)
2836       ++GPRCnt;
2837     else if ((VT >= MVT::f16 && VT <= MVT::f64) || VT.is64BitVector() ||
2838              VT.is128BitVector())
2839       ++FPRCnt;
2840     else
2841       return false;
2842
2843     if (GPRCnt > 8 || FPRCnt > 8)
2844       return false;
2845   }
2846
2847   static const MCPhysReg Registers[6][8] = {
2848     { AArch64::W0, AArch64::W1, AArch64::W2, AArch64::W3, AArch64::W4,
2849       AArch64::W5, AArch64::W6, AArch64::W7 },
2850     { AArch64::X0, AArch64::X1, AArch64::X2, AArch64::X3, AArch64::X4,
2851       AArch64::X5, AArch64::X6, AArch64::X7 },
2852     { AArch64::H0, AArch64::H1, AArch64::H2, AArch64::H3, AArch64::H4,
2853       AArch64::H5, AArch64::H6, AArch64::H7 },
2854     { AArch64::S0, AArch64::S1, AArch64::S2, AArch64::S3, AArch64::S4,
2855       AArch64::S5, AArch64::S6, AArch64::S7 },
2856     { AArch64::D0, AArch64::D1, AArch64::D2, AArch64::D3, AArch64::D4,
2857       AArch64::D5, AArch64::D6, AArch64::D7 },
2858     { AArch64::Q0, AArch64::Q1, AArch64::Q2, AArch64::Q3, AArch64::Q4,
2859       AArch64::Q5, AArch64::Q6, AArch64::Q7 }
2860   };
2861
2862   unsigned GPRIdx = 0;
2863   unsigned FPRIdx = 0;
2864   for (auto const &Arg : F->args()) {
2865     MVT VT = TLI.getSimpleValueType(Arg.getType());
2866     unsigned SrcReg;
2867     const TargetRegisterClass *RC;
2868     if (VT >= MVT::i1 && VT <= MVT::i32) {
2869       SrcReg = Registers[0][GPRIdx++];
2870       RC = &AArch64::GPR32RegClass;
2871       VT = MVT::i32;
2872     } else if (VT == MVT::i64) {
2873       SrcReg = Registers[1][GPRIdx++];
2874       RC = &AArch64::GPR64RegClass;
2875     } else if (VT == MVT::f16) {
2876       SrcReg = Registers[2][FPRIdx++];
2877       RC = &AArch64::FPR16RegClass;
2878     } else if (VT ==  MVT::f32) {
2879       SrcReg = Registers[3][FPRIdx++];
2880       RC = &AArch64::FPR32RegClass;
2881     } else if ((VT == MVT::f64) || VT.is64BitVector()) {
2882       SrcReg = Registers[4][FPRIdx++];
2883       RC = &AArch64::FPR64RegClass;
2884     } else if (VT.is128BitVector()) {
2885       SrcReg = Registers[5][FPRIdx++];
2886       RC = &AArch64::FPR128RegClass;
2887     } else
2888       llvm_unreachable("Unexpected value type.");
2889
2890     unsigned DstReg = FuncInfo.MF->addLiveIn(SrcReg, RC);
2891     // FIXME: Unfortunately it's necessary to emit a copy from the livein copy.
2892     // Without this, EmitLiveInCopies may eliminate the livein if its only
2893     // use is a bitcast (which isn't turned into an instruction).
2894     unsigned ResultReg = createResultReg(RC);
2895     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2896             TII.get(TargetOpcode::COPY), ResultReg)
2897         .addReg(DstReg, getKillRegState(true));
2898     updateValueMap(&Arg, ResultReg);
2899   }
2900   return true;
2901 }
2902
2903 bool AArch64FastISel::processCallArgs(CallLoweringInfo &CLI,
2904                                       SmallVectorImpl<MVT> &OutVTs,
2905                                       unsigned &NumBytes) {
2906   CallingConv::ID CC = CLI.CallConv;
2907   SmallVector<CCValAssign, 16> ArgLocs;
2908   CCState CCInfo(CC, false, *FuncInfo.MF, ArgLocs, *Context);
2909   CCInfo.AnalyzeCallOperands(OutVTs, CLI.OutFlags, CCAssignFnForCall(CC));
2910
2911   // Get a count of how many bytes are to be pushed on the stack.
2912   NumBytes = CCInfo.getNextStackOffset();
2913
2914   // Issue CALLSEQ_START
2915   unsigned AdjStackDown = TII.getCallFrameSetupOpcode();
2916   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AdjStackDown))
2917     .addImm(NumBytes);
2918
2919   // Process the args.
2920   for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
2921     CCValAssign &VA = ArgLocs[i];
2922     const Value *ArgVal = CLI.OutVals[VA.getValNo()];
2923     MVT ArgVT = OutVTs[VA.getValNo()];
2924
2925     unsigned ArgReg = getRegForValue(ArgVal);
2926     if (!ArgReg)
2927       return false;
2928
2929     // Handle arg promotion: SExt, ZExt, AExt.
2930     switch (VA.getLocInfo()) {
2931     case CCValAssign::Full:
2932       break;
2933     case CCValAssign::SExt: {
2934       MVT DestVT = VA.getLocVT();
2935       MVT SrcVT = ArgVT;
2936       ArgReg = emitIntExt(SrcVT, ArgReg, DestVT, /*isZExt=*/false);
2937       if (!ArgReg)
2938         return false;
2939       break;
2940     }
2941     case CCValAssign::AExt:
2942     // Intentional fall-through.
2943     case CCValAssign::ZExt: {
2944       MVT DestVT = VA.getLocVT();
2945       MVT SrcVT = ArgVT;
2946       ArgReg = emitIntExt(SrcVT, ArgReg, DestVT, /*isZExt=*/true);
2947       if (!ArgReg)
2948         return false;
2949       break;
2950     }
2951     default:
2952       llvm_unreachable("Unknown arg promotion!");
2953     }
2954
2955     // Now copy/store arg to correct locations.
2956     if (VA.isRegLoc() && !VA.needsCustom()) {
2957       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2958               TII.get(TargetOpcode::COPY), VA.getLocReg()).addReg(ArgReg);
2959       CLI.OutRegs.push_back(VA.getLocReg());
2960     } else if (VA.needsCustom()) {
2961       // FIXME: Handle custom args.
2962       return false;
2963     } else {
2964       assert(VA.isMemLoc() && "Assuming store on stack.");
2965
2966       // Don't emit stores for undef values.
2967       if (isa<UndefValue>(ArgVal))
2968         continue;
2969
2970       // Need to store on the stack.
2971       unsigned ArgSize = (ArgVT.getSizeInBits() + 7) / 8;
2972
2973       unsigned BEAlign = 0;
2974       if (ArgSize < 8 && !Subtarget->isLittleEndian())
2975         BEAlign = 8 - ArgSize;
2976
2977       Address Addr;
2978       Addr.setKind(Address::RegBase);
2979       Addr.setReg(AArch64::SP);
2980       Addr.setOffset(VA.getLocMemOffset() + BEAlign);
2981
2982       unsigned Alignment = DL.getABITypeAlignment(ArgVal->getType());
2983       MachineMemOperand *MMO = FuncInfo.MF->getMachineMemOperand(
2984         MachinePointerInfo::getStack(Addr.getOffset()),
2985         MachineMemOperand::MOStore, ArgVT.getStoreSize(), Alignment);
2986
2987       if (!emitStore(ArgVT, ArgReg, Addr, MMO))
2988         return false;
2989     }
2990   }
2991   return true;
2992 }
2993
2994 bool AArch64FastISel::finishCall(CallLoweringInfo &CLI, MVT RetVT,
2995                                  unsigned NumBytes) {
2996   CallingConv::ID CC = CLI.CallConv;
2997
2998   // Issue CALLSEQ_END
2999   unsigned AdjStackUp = TII.getCallFrameDestroyOpcode();
3000   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AdjStackUp))
3001     .addImm(NumBytes).addImm(0);
3002
3003   // Now the return value.
3004   if (RetVT != MVT::isVoid) {
3005     SmallVector<CCValAssign, 16> RVLocs;
3006     CCState CCInfo(CC, false, *FuncInfo.MF, RVLocs, *Context);
3007     CCInfo.AnalyzeCallResult(RetVT, CCAssignFnForCall(CC));
3008
3009     // Only handle a single return value.
3010     if (RVLocs.size() != 1)
3011       return false;
3012
3013     // Copy all of the result registers out of their specified physreg.
3014     MVT CopyVT = RVLocs[0].getValVT();
3015     unsigned ResultReg = createResultReg(TLI.getRegClassFor(CopyVT));
3016     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3017             TII.get(TargetOpcode::COPY), ResultReg)
3018         .addReg(RVLocs[0].getLocReg());
3019     CLI.InRegs.push_back(RVLocs[0].getLocReg());
3020
3021     CLI.ResultReg = ResultReg;
3022     CLI.NumResultRegs = 1;
3023   }
3024
3025   return true;
3026 }
3027
3028 bool AArch64FastISel::fastLowerCall(CallLoweringInfo &CLI) {
3029   CallingConv::ID CC  = CLI.CallConv;
3030   bool IsTailCall     = CLI.IsTailCall;
3031   bool IsVarArg       = CLI.IsVarArg;
3032   const Value *Callee = CLI.Callee;
3033   const char *SymName = CLI.SymName;
3034
3035   if (!Callee && !SymName)
3036     return false;
3037
3038   // Allow SelectionDAG isel to handle tail calls.
3039   if (IsTailCall)
3040     return false;
3041
3042   CodeModel::Model CM = TM.getCodeModel();
3043   // Only support the small and large code model.
3044   if (CM != CodeModel::Small && CM != CodeModel::Large)
3045     return false;
3046
3047   // FIXME: Add large code model support for ELF.
3048   if (CM == CodeModel::Large && !Subtarget->isTargetMachO())
3049     return false;
3050
3051   // Let SDISel handle vararg functions.
3052   if (IsVarArg)
3053     return false;
3054
3055   // FIXME: Only handle *simple* calls for now.
3056   MVT RetVT;
3057   if (CLI.RetTy->isVoidTy())
3058     RetVT = MVT::isVoid;
3059   else if (!isTypeLegal(CLI.RetTy, RetVT))
3060     return false;
3061
3062   for (auto Flag : CLI.OutFlags)
3063     if (Flag.isInReg() || Flag.isSRet() || Flag.isNest() || Flag.isByVal())
3064       return false;
3065
3066   // Set up the argument vectors.
3067   SmallVector<MVT, 16> OutVTs;
3068   OutVTs.reserve(CLI.OutVals.size());
3069
3070   for (auto *Val : CLI.OutVals) {
3071     MVT VT;
3072     if (!isTypeLegal(Val->getType(), VT) &&
3073         !(VT == MVT::i1 || VT == MVT::i8 || VT == MVT::i16))
3074       return false;
3075
3076     // We don't handle vector parameters yet.
3077     if (VT.isVector() || VT.getSizeInBits() > 64)
3078       return false;
3079
3080     OutVTs.push_back(VT);
3081   }
3082
3083   Address Addr;
3084   if (Callee && !computeCallAddress(Callee, Addr))
3085     return false;
3086
3087   // Handle the arguments now that we've gotten them.
3088   unsigned NumBytes;
3089   if (!processCallArgs(CLI, OutVTs, NumBytes))
3090     return false;
3091
3092   // Issue the call.
3093   MachineInstrBuilder MIB;
3094   if (CM == CodeModel::Small) {
3095     const MCInstrDesc &II = TII.get(Addr.getReg() ? AArch64::BLR : AArch64::BL);
3096     MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II);
3097     if (SymName)
3098       MIB.addExternalSymbol(SymName, 0);
3099     else if (Addr.getGlobalValue())
3100       MIB.addGlobalAddress(Addr.getGlobalValue(), 0, 0);
3101     else if (Addr.getReg()) {
3102       unsigned Reg = constrainOperandRegClass(II, Addr.getReg(), 0);
3103       MIB.addReg(Reg);
3104     } else
3105       return false;
3106   } else {
3107     unsigned CallReg = 0;
3108     if (SymName) {
3109       unsigned ADRPReg = createResultReg(&AArch64::GPR64commonRegClass);
3110       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADRP),
3111               ADRPReg)
3112         .addExternalSymbol(SymName, AArch64II::MO_GOT | AArch64II::MO_PAGE);
3113
3114       CallReg = createResultReg(&AArch64::GPR64RegClass);
3115       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::LDRXui),
3116               CallReg)
3117         .addReg(ADRPReg)
3118         .addExternalSymbol(SymName, AArch64II::MO_GOT | AArch64II::MO_PAGEOFF |
3119                            AArch64II::MO_NC);
3120     } else if (Addr.getGlobalValue())
3121       CallReg = materializeGV(Addr.getGlobalValue());
3122     else if (Addr.getReg())
3123       CallReg = Addr.getReg();
3124
3125     if (!CallReg)
3126       return false;
3127
3128     const MCInstrDesc &II = TII.get(AArch64::BLR);
3129     CallReg = constrainOperandRegClass(II, CallReg, 0);
3130     MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II).addReg(CallReg);
3131   }
3132
3133   // Add implicit physical register uses to the call.
3134   for (auto Reg : CLI.OutRegs)
3135     MIB.addReg(Reg, RegState::Implicit);
3136
3137   // Add a register mask with the call-preserved registers.
3138   // Proper defs for return values will be added by setPhysRegsDeadExcept().
3139   MIB.addRegMask(TRI.getCallPreservedMask(CC));
3140
3141   CLI.Call = MIB;
3142
3143   // Finish off the call including any return values.
3144   return finishCall(CLI, RetVT, NumBytes);
3145 }
3146
3147 bool AArch64FastISel::isMemCpySmall(uint64_t Len, unsigned Alignment) {
3148   if (Alignment)
3149     return Len / Alignment <= 4;
3150   else
3151     return Len < 32;
3152 }
3153
3154 bool AArch64FastISel::tryEmitSmallMemCpy(Address Dest, Address Src,
3155                                          uint64_t Len, unsigned Alignment) {
3156   // Make sure we don't bloat code by inlining very large memcpy's.
3157   if (!isMemCpySmall(Len, Alignment))
3158     return false;
3159
3160   int64_t UnscaledOffset = 0;
3161   Address OrigDest = Dest;
3162   Address OrigSrc = Src;
3163
3164   while (Len) {
3165     MVT VT;
3166     if (!Alignment || Alignment >= 8) {
3167       if (Len >= 8)
3168         VT = MVT::i64;
3169       else if (Len >= 4)
3170         VT = MVT::i32;
3171       else if (Len >= 2)
3172         VT = MVT::i16;
3173       else {
3174         VT = MVT::i8;
3175       }
3176     } else {
3177       // Bound based on alignment.
3178       if (Len >= 4 && Alignment == 4)
3179         VT = MVT::i32;
3180       else if (Len >= 2 && Alignment == 2)
3181         VT = MVT::i16;
3182       else {
3183         VT = MVT::i8;
3184       }
3185     }
3186
3187     unsigned ResultReg = emitLoad(VT, VT, Src);
3188     if (!ResultReg)
3189       return false;
3190
3191     if (!emitStore(VT, ResultReg, Dest))
3192       return false;
3193
3194     int64_t Size = VT.getSizeInBits() / 8;
3195     Len -= Size;
3196     UnscaledOffset += Size;
3197
3198     // We need to recompute the unscaled offset for each iteration.
3199     Dest.setOffset(OrigDest.getOffset() + UnscaledOffset);
3200     Src.setOffset(OrigSrc.getOffset() + UnscaledOffset);
3201   }
3202
3203   return true;
3204 }
3205
3206 /// \brief Check if it is possible to fold the condition from the XALU intrinsic
3207 /// into the user. The condition code will only be updated on success.
3208 bool AArch64FastISel::foldXALUIntrinsic(AArch64CC::CondCode &CC,
3209                                         const Instruction *I,
3210                                         const Value *Cond) {
3211   if (!isa<ExtractValueInst>(Cond))
3212     return false;
3213
3214   const auto *EV = cast<ExtractValueInst>(Cond);
3215   if (!isa<IntrinsicInst>(EV->getAggregateOperand()))
3216     return false;
3217
3218   const auto *II = cast<IntrinsicInst>(EV->getAggregateOperand());
3219   MVT RetVT;
3220   const Function *Callee = II->getCalledFunction();
3221   Type *RetTy =
3222   cast<StructType>(Callee->getReturnType())->getTypeAtIndex(0U);
3223   if (!isTypeLegal(RetTy, RetVT))
3224     return false;
3225
3226   if (RetVT != MVT::i32 && RetVT != MVT::i64)
3227     return false;
3228
3229   const Value *LHS = II->getArgOperand(0);
3230   const Value *RHS = II->getArgOperand(1);
3231
3232   // Canonicalize immediate to the RHS.
3233   if (isa<ConstantInt>(LHS) && !isa<ConstantInt>(RHS) &&
3234       isCommutativeIntrinsic(II))
3235     std::swap(LHS, RHS);
3236
3237   // Simplify multiplies.
3238   unsigned IID = II->getIntrinsicID();
3239   switch (IID) {
3240   default:
3241     break;
3242   case Intrinsic::smul_with_overflow:
3243     if (const auto *C = dyn_cast<ConstantInt>(RHS))
3244       if (C->getValue() == 2)
3245         IID = Intrinsic::sadd_with_overflow;
3246     break;
3247   case Intrinsic::umul_with_overflow:
3248     if (const auto *C = dyn_cast<ConstantInt>(RHS))
3249       if (C->getValue() == 2)
3250         IID = Intrinsic::uadd_with_overflow;
3251     break;
3252   }
3253
3254   AArch64CC::CondCode TmpCC;
3255   switch (IID) {
3256   default:
3257     return false;
3258   case Intrinsic::sadd_with_overflow:
3259   case Intrinsic::ssub_with_overflow:
3260     TmpCC = AArch64CC::VS;
3261     break;
3262   case Intrinsic::uadd_with_overflow:
3263     TmpCC = AArch64CC::HS;
3264     break;
3265   case Intrinsic::usub_with_overflow:
3266     TmpCC = AArch64CC::LO;
3267     break;
3268   case Intrinsic::smul_with_overflow:
3269   case Intrinsic::umul_with_overflow:
3270     TmpCC = AArch64CC::NE;
3271     break;
3272   }
3273
3274   // Check if both instructions are in the same basic block.
3275   if (!isValueAvailable(II))
3276     return false;
3277
3278   // Make sure nothing is in the way
3279   BasicBlock::const_iterator Start = I;
3280   BasicBlock::const_iterator End = II;
3281   for (auto Itr = std::prev(Start); Itr != End; --Itr) {
3282     // We only expect extractvalue instructions between the intrinsic and the
3283     // instruction to be selected.
3284     if (!isa<ExtractValueInst>(Itr))
3285       return false;
3286
3287     // Check that the extractvalue operand comes from the intrinsic.
3288     const auto *EVI = cast<ExtractValueInst>(Itr);
3289     if (EVI->getAggregateOperand() != II)
3290       return false;
3291   }
3292
3293   CC = TmpCC;
3294   return true;
3295 }
3296
3297 bool AArch64FastISel::fastLowerIntrinsicCall(const IntrinsicInst *II) {
3298   // FIXME: Handle more intrinsics.
3299   switch (II->getIntrinsicID()) {
3300   default: return false;
3301   case Intrinsic::frameaddress: {
3302     MachineFrameInfo *MFI = FuncInfo.MF->getFrameInfo();
3303     MFI->setFrameAddressIsTaken(true);
3304
3305     const AArch64RegisterInfo *RegInfo =
3306         static_cast<const AArch64RegisterInfo *>(
3307             TM.getSubtargetImpl()->getRegisterInfo());
3308     unsigned FramePtr = RegInfo->getFrameRegister(*(FuncInfo.MF));
3309     unsigned SrcReg = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
3310     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3311             TII.get(TargetOpcode::COPY), SrcReg).addReg(FramePtr);
3312     // Recursively load frame address
3313     // ldr x0, [fp]
3314     // ldr x0, [x0]
3315     // ldr x0, [x0]
3316     // ...
3317     unsigned DestReg;
3318     unsigned Depth = cast<ConstantInt>(II->getOperand(0))->getZExtValue();
3319     while (Depth--) {
3320       DestReg = fastEmitInst_ri(AArch64::LDRXui, &AArch64::GPR64RegClass,
3321                                 SrcReg, /*IsKill=*/true, 0);
3322       assert(DestReg && "Unexpected LDR instruction emission failure.");
3323       SrcReg = DestReg;
3324     }
3325
3326     updateValueMap(II, SrcReg);
3327     return true;
3328   }
3329   case Intrinsic::memcpy:
3330   case Intrinsic::memmove: {
3331     const auto *MTI = cast<MemTransferInst>(II);
3332     // Don't handle volatile.
3333     if (MTI->isVolatile())
3334       return false;
3335
3336     // Disable inlining for memmove before calls to ComputeAddress.  Otherwise,
3337     // we would emit dead code because we don't currently handle memmoves.
3338     bool IsMemCpy = (II->getIntrinsicID() == Intrinsic::memcpy);
3339     if (isa<ConstantInt>(MTI->getLength()) && IsMemCpy) {
3340       // Small memcpy's are common enough that we want to do them without a call
3341       // if possible.
3342       uint64_t Len = cast<ConstantInt>(MTI->getLength())->getZExtValue();
3343       unsigned Alignment = MTI->getAlignment();
3344       if (isMemCpySmall(Len, Alignment)) {
3345         Address Dest, Src;
3346         if (!computeAddress(MTI->getRawDest(), Dest) ||
3347             !computeAddress(MTI->getRawSource(), Src))
3348           return false;
3349         if (tryEmitSmallMemCpy(Dest, Src, Len, Alignment))
3350           return true;
3351       }
3352     }
3353
3354     if (!MTI->getLength()->getType()->isIntegerTy(64))
3355       return false;
3356
3357     if (MTI->getSourceAddressSpace() > 255 || MTI->getDestAddressSpace() > 255)
3358       // Fast instruction selection doesn't support the special
3359       // address spaces.
3360       return false;
3361
3362     const char *IntrMemName = isa<MemCpyInst>(II) ? "memcpy" : "memmove";
3363     return lowerCallTo(II, IntrMemName, II->getNumArgOperands() - 2);
3364   }
3365   case Intrinsic::memset: {
3366     const MemSetInst *MSI = cast<MemSetInst>(II);
3367     // Don't handle volatile.
3368     if (MSI->isVolatile())
3369       return false;
3370
3371     if (!MSI->getLength()->getType()->isIntegerTy(64))
3372       return false;
3373
3374     if (MSI->getDestAddressSpace() > 255)
3375       // Fast instruction selection doesn't support the special
3376       // address spaces.
3377       return false;
3378
3379     return lowerCallTo(II, "memset", II->getNumArgOperands() - 2);
3380   }
3381   case Intrinsic::sin:
3382   case Intrinsic::cos:
3383   case Intrinsic::pow: {
3384     MVT RetVT;
3385     if (!isTypeLegal(II->getType(), RetVT))
3386       return false;
3387
3388     if (RetVT != MVT::f32 && RetVT != MVT::f64)
3389       return false;
3390
3391     static const RTLIB::Libcall LibCallTable[3][2] = {
3392       { RTLIB::SIN_F32, RTLIB::SIN_F64 },
3393       { RTLIB::COS_F32, RTLIB::COS_F64 },
3394       { RTLIB::POW_F32, RTLIB::POW_F64 }
3395     };
3396     RTLIB::Libcall LC;
3397     bool Is64Bit = RetVT == MVT::f64;
3398     switch (II->getIntrinsicID()) {
3399     default:
3400       llvm_unreachable("Unexpected intrinsic.");
3401     case Intrinsic::sin:
3402       LC = LibCallTable[0][Is64Bit];
3403       break;
3404     case Intrinsic::cos:
3405       LC = LibCallTable[1][Is64Bit];
3406       break;
3407     case Intrinsic::pow:
3408       LC = LibCallTable[2][Is64Bit];
3409       break;
3410     }
3411
3412     ArgListTy Args;
3413     Args.reserve(II->getNumArgOperands());
3414
3415     // Populate the argument list.
3416     for (auto &Arg : II->arg_operands()) {
3417       ArgListEntry Entry;
3418       Entry.Val = Arg;
3419       Entry.Ty = Arg->getType();
3420       Args.push_back(Entry);
3421     }
3422
3423     CallLoweringInfo CLI;
3424     CLI.setCallee(TLI.getLibcallCallingConv(LC), II->getType(),
3425                   TLI.getLibcallName(LC), std::move(Args));
3426     if (!lowerCallTo(CLI))
3427       return false;
3428     updateValueMap(II, CLI.ResultReg);
3429     return true;
3430   }
3431   case Intrinsic::fabs: {
3432     MVT VT;
3433     if (!isTypeLegal(II->getType(), VT))
3434       return false;
3435
3436     unsigned Opc;
3437     switch (VT.SimpleTy) {
3438     default:
3439       return false;
3440     case MVT::f32:
3441       Opc = AArch64::FABSSr;
3442       break;
3443     case MVT::f64:
3444       Opc = AArch64::FABSDr;
3445       break;
3446     }
3447     unsigned SrcReg = getRegForValue(II->getOperand(0));
3448     if (!SrcReg)
3449       return false;
3450     bool SrcRegIsKill = hasTrivialKill(II->getOperand(0));
3451     unsigned ResultReg = createResultReg(TLI.getRegClassFor(VT));
3452     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg)
3453       .addReg(SrcReg, getKillRegState(SrcRegIsKill));
3454     updateValueMap(II, ResultReg);
3455     return true;
3456   }
3457   case Intrinsic::trap: {
3458     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::BRK))
3459         .addImm(1);
3460     return true;
3461   }
3462   case Intrinsic::sqrt: {
3463     Type *RetTy = II->getCalledFunction()->getReturnType();
3464
3465     MVT VT;
3466     if (!isTypeLegal(RetTy, VT))
3467       return false;
3468
3469     unsigned Op0Reg = getRegForValue(II->getOperand(0));
3470     if (!Op0Reg)
3471       return false;
3472     bool Op0IsKill = hasTrivialKill(II->getOperand(0));
3473
3474     unsigned ResultReg = fastEmit_r(VT, VT, ISD::FSQRT, Op0Reg, Op0IsKill);
3475     if (!ResultReg)
3476       return false;
3477
3478     updateValueMap(II, ResultReg);
3479     return true;
3480   }
3481   case Intrinsic::sadd_with_overflow:
3482   case Intrinsic::uadd_with_overflow:
3483   case Intrinsic::ssub_with_overflow:
3484   case Intrinsic::usub_with_overflow:
3485   case Intrinsic::smul_with_overflow:
3486   case Intrinsic::umul_with_overflow: {
3487     // This implements the basic lowering of the xalu with overflow intrinsics.
3488     const Function *Callee = II->getCalledFunction();
3489     auto *Ty = cast<StructType>(Callee->getReturnType());
3490     Type *RetTy = Ty->getTypeAtIndex(0U);
3491
3492     MVT VT;
3493     if (!isTypeLegal(RetTy, VT))
3494       return false;
3495
3496     if (VT != MVT::i32 && VT != MVT::i64)
3497       return false;
3498
3499     const Value *LHS = II->getArgOperand(0);
3500     const Value *RHS = II->getArgOperand(1);
3501     // Canonicalize immediate to the RHS.
3502     if (isa<ConstantInt>(LHS) && !isa<ConstantInt>(RHS) &&
3503         isCommutativeIntrinsic(II))
3504       std::swap(LHS, RHS);
3505
3506     // Simplify multiplies.
3507     unsigned IID = II->getIntrinsicID();
3508     switch (IID) {
3509     default:
3510       break;
3511     case Intrinsic::smul_with_overflow:
3512       if (const auto *C = dyn_cast<ConstantInt>(RHS))
3513         if (C->getValue() == 2) {
3514           IID = Intrinsic::sadd_with_overflow;
3515           RHS = LHS;
3516         }
3517       break;
3518     case Intrinsic::umul_with_overflow:
3519       if (const auto *C = dyn_cast<ConstantInt>(RHS))
3520         if (C->getValue() == 2) {
3521           IID = Intrinsic::uadd_with_overflow;
3522           RHS = LHS;
3523         }
3524       break;
3525     }
3526
3527     unsigned ResultReg1 = 0, ResultReg2 = 0, MulReg = 0;
3528     AArch64CC::CondCode CC = AArch64CC::Invalid;
3529     switch (IID) {
3530     default: llvm_unreachable("Unexpected intrinsic!");
3531     case Intrinsic::sadd_with_overflow:
3532       ResultReg1 = emitAdd(VT, LHS, RHS, /*SetFlags=*/true);
3533       CC = AArch64CC::VS;
3534       break;
3535     case Intrinsic::uadd_with_overflow:
3536       ResultReg1 = emitAdd(VT, LHS, RHS, /*SetFlags=*/true);
3537       CC = AArch64CC::HS;
3538       break;
3539     case Intrinsic::ssub_with_overflow:
3540       ResultReg1 = emitSub(VT, LHS, RHS, /*SetFlags=*/true);
3541       CC = AArch64CC::VS;
3542       break;
3543     case Intrinsic::usub_with_overflow:
3544       ResultReg1 = emitSub(VT, LHS, RHS, /*SetFlags=*/true);
3545       CC = AArch64CC::LO;
3546       break;
3547     case Intrinsic::smul_with_overflow: {
3548       CC = AArch64CC::NE;
3549       unsigned LHSReg = getRegForValue(LHS);
3550       if (!LHSReg)
3551         return false;
3552       bool LHSIsKill = hasTrivialKill(LHS);
3553
3554       unsigned RHSReg = getRegForValue(RHS);
3555       if (!RHSReg)
3556         return false;
3557       bool RHSIsKill = hasTrivialKill(RHS);
3558
3559       if (VT == MVT::i32) {
3560         MulReg = emitSMULL_rr(MVT::i64, LHSReg, LHSIsKill, RHSReg, RHSIsKill);
3561         unsigned ShiftReg = emitLSR_ri(MVT::i64, MVT::i64, MulReg,
3562                                        /*IsKill=*/false, 32);
3563         MulReg = fastEmitInst_extractsubreg(VT, MulReg, /*IsKill=*/true,
3564                                             AArch64::sub_32);
3565         ShiftReg = fastEmitInst_extractsubreg(VT, ShiftReg, /*IsKill=*/true,
3566                                               AArch64::sub_32);
3567         emitSubs_rs(VT, ShiftReg, /*IsKill=*/true, MulReg, /*IsKill=*/false,
3568                     AArch64_AM::ASR, 31, /*WantResult=*/false);
3569       } else {
3570         assert(VT == MVT::i64 && "Unexpected value type.");
3571         MulReg = emitMul_rr(VT, LHSReg, LHSIsKill, RHSReg, RHSIsKill);
3572         unsigned SMULHReg = fastEmit_rr(VT, VT, ISD::MULHS, LHSReg, LHSIsKill,
3573                                         RHSReg, RHSIsKill);
3574         emitSubs_rs(VT, SMULHReg, /*IsKill=*/true, MulReg, /*IsKill=*/false,
3575                     AArch64_AM::ASR, 63, /*WantResult=*/false);
3576       }
3577       break;
3578     }
3579     case Intrinsic::umul_with_overflow: {
3580       CC = AArch64CC::NE;
3581       unsigned LHSReg = getRegForValue(LHS);
3582       if (!LHSReg)
3583         return false;
3584       bool LHSIsKill = hasTrivialKill(LHS);
3585
3586       unsigned RHSReg = getRegForValue(RHS);
3587       if (!RHSReg)
3588         return false;
3589       bool RHSIsKill = hasTrivialKill(RHS);
3590
3591       if (VT == MVT::i32) {
3592         MulReg = emitUMULL_rr(MVT::i64, LHSReg, LHSIsKill, RHSReg, RHSIsKill);
3593         emitSubs_rs(MVT::i64, AArch64::XZR, /*IsKill=*/true, MulReg,
3594                     /*IsKill=*/false, AArch64_AM::LSR, 32,
3595                     /*WantResult=*/false);
3596         MulReg = fastEmitInst_extractsubreg(VT, MulReg, /*IsKill=*/true,
3597                                             AArch64::sub_32);
3598       } else {
3599         assert(VT == MVT::i64 && "Unexpected value type.");
3600         MulReg = emitMul_rr(VT, LHSReg, LHSIsKill, RHSReg, RHSIsKill);
3601         unsigned UMULHReg = fastEmit_rr(VT, VT, ISD::MULHU, LHSReg, LHSIsKill,
3602                                         RHSReg, RHSIsKill);
3603         emitSubs_rr(VT, AArch64::XZR, /*IsKill=*/true, UMULHReg,
3604                     /*IsKill=*/false, /*WantResult=*/false);
3605       }
3606       break;
3607     }
3608     }
3609
3610     if (MulReg) {
3611       ResultReg1 = createResultReg(TLI.getRegClassFor(VT));
3612       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3613               TII.get(TargetOpcode::COPY), ResultReg1).addReg(MulReg);
3614     }
3615
3616     ResultReg2 = fastEmitInst_rri(AArch64::CSINCWr, &AArch64::GPR32RegClass,
3617                                   AArch64::WZR, /*IsKill=*/true, AArch64::WZR,
3618                                   /*IsKill=*/true, getInvertedCondCode(CC));
3619     (void)ResultReg2;
3620     assert((ResultReg1 + 1) == ResultReg2 &&
3621            "Nonconsecutive result registers.");
3622     updateValueMap(II, ResultReg1, 2);
3623     return true;
3624   }
3625   }
3626   return false;
3627 }
3628
3629 bool AArch64FastISel::selectRet(const Instruction *I) {
3630   const ReturnInst *Ret = cast<ReturnInst>(I);
3631   const Function &F = *I->getParent()->getParent();
3632
3633   if (!FuncInfo.CanLowerReturn)
3634     return false;
3635
3636   if (F.isVarArg())
3637     return false;
3638
3639   // Build a list of return value registers.
3640   SmallVector<unsigned, 4> RetRegs;
3641
3642   if (Ret->getNumOperands() > 0) {
3643     CallingConv::ID CC = F.getCallingConv();
3644     SmallVector<ISD::OutputArg, 4> Outs;
3645     GetReturnInfo(F.getReturnType(), F.getAttributes(), Outs, TLI);
3646
3647     // Analyze operands of the call, assigning locations to each operand.
3648     SmallVector<CCValAssign, 16> ValLocs;
3649     CCState CCInfo(CC, F.isVarArg(), *FuncInfo.MF, ValLocs, I->getContext());
3650     CCAssignFn *RetCC = CC == CallingConv::WebKit_JS ? RetCC_AArch64_WebKit_JS
3651                                                      : RetCC_AArch64_AAPCS;
3652     CCInfo.AnalyzeReturn(Outs, RetCC);
3653
3654     // Only handle a single return value for now.
3655     if (ValLocs.size() != 1)
3656       return false;
3657
3658     CCValAssign &VA = ValLocs[0];
3659     const Value *RV = Ret->getOperand(0);
3660
3661     // Don't bother handling odd stuff for now.
3662     if ((VA.getLocInfo() != CCValAssign::Full) &&
3663         (VA.getLocInfo() != CCValAssign::BCvt))
3664       return false;
3665
3666     // Only handle register returns for now.
3667     if (!VA.isRegLoc())
3668       return false;
3669
3670     unsigned Reg = getRegForValue(RV);
3671     if (Reg == 0)
3672       return false;
3673
3674     unsigned SrcReg = Reg + VA.getValNo();
3675     unsigned DestReg = VA.getLocReg();
3676     // Avoid a cross-class copy. This is very unlikely.
3677     if (!MRI.getRegClass(SrcReg)->contains(DestReg))
3678       return false;
3679
3680     EVT RVEVT = TLI.getValueType(RV->getType());
3681     if (!RVEVT.isSimple())
3682       return false;
3683
3684     // Vectors (of > 1 lane) in big endian need tricky handling.
3685     if (RVEVT.isVector() && RVEVT.getVectorNumElements() > 1 &&
3686         !Subtarget->isLittleEndian())
3687       return false;
3688
3689     MVT RVVT = RVEVT.getSimpleVT();
3690     if (RVVT == MVT::f128)
3691       return false;
3692
3693     MVT DestVT = VA.getValVT();
3694     // Special handling for extended integers.
3695     if (RVVT != DestVT) {
3696       if (RVVT != MVT::i1 && RVVT != MVT::i8 && RVVT != MVT::i16)
3697         return false;
3698
3699       if (!Outs[0].Flags.isZExt() && !Outs[0].Flags.isSExt())
3700         return false;
3701
3702       bool IsZExt = Outs[0].Flags.isZExt();
3703       SrcReg = emitIntExt(RVVT, SrcReg, DestVT, IsZExt);
3704       if (SrcReg == 0)
3705         return false;
3706     }
3707
3708     // Make the copy.
3709     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3710             TII.get(TargetOpcode::COPY), DestReg).addReg(SrcReg);
3711
3712     // Add register to return instruction.
3713     RetRegs.push_back(VA.getLocReg());
3714   }
3715
3716   MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3717                                     TII.get(AArch64::RET_ReallyLR));
3718   for (unsigned i = 0, e = RetRegs.size(); i != e; ++i)
3719     MIB.addReg(RetRegs[i], RegState::Implicit);
3720   return true;
3721 }
3722
3723 bool AArch64FastISel::selectTrunc(const Instruction *I) {
3724   Type *DestTy = I->getType();
3725   Value *Op = I->getOperand(0);
3726   Type *SrcTy = Op->getType();
3727
3728   EVT SrcEVT = TLI.getValueType(SrcTy, true);
3729   EVT DestEVT = TLI.getValueType(DestTy, true);
3730   if (!SrcEVT.isSimple())
3731     return false;
3732   if (!DestEVT.isSimple())
3733     return false;
3734
3735   MVT SrcVT = SrcEVT.getSimpleVT();
3736   MVT DestVT = DestEVT.getSimpleVT();
3737
3738   if (SrcVT != MVT::i64 && SrcVT != MVT::i32 && SrcVT != MVT::i16 &&
3739       SrcVT != MVT::i8)
3740     return false;
3741   if (DestVT != MVT::i32 && DestVT != MVT::i16 && DestVT != MVT::i8 &&
3742       DestVT != MVT::i1)
3743     return false;
3744
3745   unsigned SrcReg = getRegForValue(Op);
3746   if (!SrcReg)
3747     return false;
3748   bool SrcIsKill = hasTrivialKill(Op);
3749
3750   // If we're truncating from i64 to a smaller non-legal type then generate an
3751   // AND. Otherwise, we know the high bits are undefined and a truncate only
3752   // generate a COPY. We cannot mark the source register also as result
3753   // register, because this can incorrectly transfer the kill flag onto the
3754   // source register.
3755   unsigned ResultReg;
3756   if (SrcVT == MVT::i64) {
3757     uint64_t Mask = 0;
3758     switch (DestVT.SimpleTy) {
3759     default:
3760       // Trunc i64 to i32 is handled by the target-independent fast-isel.
3761       return false;
3762     case MVT::i1:
3763       Mask = 0x1;
3764       break;
3765     case MVT::i8:
3766       Mask = 0xff;
3767       break;
3768     case MVT::i16:
3769       Mask = 0xffff;
3770       break;
3771     }
3772     // Issue an extract_subreg to get the lower 32-bits.
3773     unsigned Reg32 = fastEmitInst_extractsubreg(MVT::i32, SrcReg, SrcIsKill,
3774                                                 AArch64::sub_32);
3775     // Create the AND instruction which performs the actual truncation.
3776     ResultReg = emitAnd_ri(MVT::i32, Reg32, /*IsKill=*/true, Mask);
3777     assert(ResultReg && "Unexpected AND instruction emission failure.");
3778   } else {
3779     ResultReg = createResultReg(&AArch64::GPR32RegClass);
3780     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3781             TII.get(TargetOpcode::COPY), ResultReg)
3782         .addReg(SrcReg, getKillRegState(SrcIsKill));
3783   }
3784
3785   updateValueMap(I, ResultReg);
3786   return true;
3787 }
3788
3789 unsigned AArch64FastISel::emiti1Ext(unsigned SrcReg, MVT DestVT, bool IsZExt) {
3790   assert((DestVT == MVT::i8 || DestVT == MVT::i16 || DestVT == MVT::i32 ||
3791           DestVT == MVT::i64) &&
3792          "Unexpected value type.");
3793   // Handle i8 and i16 as i32.
3794   if (DestVT == MVT::i8 || DestVT == MVT::i16)
3795     DestVT = MVT::i32;
3796
3797   if (IsZExt) {
3798     unsigned ResultReg = emitAnd_ri(MVT::i32, SrcReg, /*TODO:IsKill=*/false, 1);
3799     assert(ResultReg && "Unexpected AND instruction emission failure.");
3800     if (DestVT == MVT::i64) {
3801       // We're ZExt i1 to i64.  The ANDWri Wd, Ws, #1 implicitly clears the
3802       // upper 32 bits.  Emit a SUBREG_TO_REG to extend from Wd to Xd.
3803       unsigned Reg64 = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
3804       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3805               TII.get(AArch64::SUBREG_TO_REG), Reg64)
3806           .addImm(0)
3807           .addReg(ResultReg)
3808           .addImm(AArch64::sub_32);
3809       ResultReg = Reg64;
3810     }
3811     return ResultReg;
3812   } else {
3813     if (DestVT == MVT::i64) {
3814       // FIXME: We're SExt i1 to i64.
3815       return 0;
3816     }
3817     return fastEmitInst_rii(AArch64::SBFMWri, &AArch64::GPR32RegClass, SrcReg,
3818                             /*TODO:IsKill=*/false, 0, 0);
3819   }
3820 }
3821
3822 unsigned AArch64FastISel::emitMul_rr(MVT RetVT, unsigned Op0, bool Op0IsKill,
3823                                       unsigned Op1, bool Op1IsKill) {
3824   unsigned Opc, ZReg;
3825   switch (RetVT.SimpleTy) {
3826   default: return 0;
3827   case MVT::i8:
3828   case MVT::i16:
3829   case MVT::i32:
3830     RetVT = MVT::i32;
3831     Opc = AArch64::MADDWrrr; ZReg = AArch64::WZR; break;
3832   case MVT::i64:
3833     Opc = AArch64::MADDXrrr; ZReg = AArch64::XZR; break;
3834   }
3835
3836   const TargetRegisterClass *RC =
3837       (RetVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
3838   return fastEmitInst_rrr(Opc, RC, Op0, Op0IsKill, Op1, Op1IsKill,
3839                           /*IsKill=*/ZReg, true);
3840 }
3841
3842 unsigned AArch64FastISel::emitSMULL_rr(MVT RetVT, unsigned Op0, bool Op0IsKill,
3843                                         unsigned Op1, bool Op1IsKill) {
3844   if (RetVT != MVT::i64)
3845     return 0;
3846
3847   return fastEmitInst_rrr(AArch64::SMADDLrrr, &AArch64::GPR64RegClass,
3848                           Op0, Op0IsKill, Op1, Op1IsKill,
3849                           AArch64::XZR, /*IsKill=*/true);
3850 }
3851
3852 unsigned AArch64FastISel::emitUMULL_rr(MVT RetVT, unsigned Op0, bool Op0IsKill,
3853                                         unsigned Op1, bool Op1IsKill) {
3854   if (RetVT != MVT::i64)
3855     return 0;
3856
3857   return fastEmitInst_rrr(AArch64::UMADDLrrr, &AArch64::GPR64RegClass,
3858                           Op0, Op0IsKill, Op1, Op1IsKill,
3859                           AArch64::XZR, /*IsKill=*/true);
3860 }
3861
3862 unsigned AArch64FastISel::emitLSL_rr(MVT RetVT, unsigned Op0Reg, bool Op0IsKill,
3863                                      unsigned Op1Reg, bool Op1IsKill) {
3864   unsigned Opc = 0;
3865   bool NeedTrunc = false;
3866   uint64_t Mask = 0;
3867   switch (RetVT.SimpleTy) {
3868   default: return 0;
3869   case MVT::i8:  Opc = AArch64::LSLVWr; NeedTrunc = true; Mask = 0xff;   break;
3870   case MVT::i16: Opc = AArch64::LSLVWr; NeedTrunc = true; Mask = 0xffff; break;
3871   case MVT::i32: Opc = AArch64::LSLVWr;                                  break;
3872   case MVT::i64: Opc = AArch64::LSLVXr;                                  break;
3873   }
3874
3875   const TargetRegisterClass *RC =
3876       (RetVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
3877   if (NeedTrunc) {
3878     Op1Reg = emitAnd_ri(MVT::i32, Op1Reg, Op1IsKill, Mask);
3879     Op1IsKill = true;
3880   }
3881   unsigned ResultReg = fastEmitInst_rr(Opc, RC, Op0Reg, Op0IsKill, Op1Reg,
3882                                        Op1IsKill);
3883   if (NeedTrunc)
3884     ResultReg = emitAnd_ri(MVT::i32, ResultReg, /*IsKill=*/true, Mask);
3885   return ResultReg;
3886 }
3887
3888 unsigned AArch64FastISel::emitLSL_ri(MVT RetVT, MVT SrcVT, unsigned Op0,
3889                                      bool Op0IsKill, uint64_t Shift,
3890                                      bool IsZExt) {
3891   assert(RetVT.SimpleTy >= SrcVT.SimpleTy &&
3892          "Unexpected source/return type pair.");
3893   assert((SrcVT == MVT::i1 || SrcVT == MVT::i8 || SrcVT == MVT::i16 ||
3894           SrcVT == MVT::i32 || SrcVT == MVT::i64) &&
3895          "Unexpected source value type.");
3896   assert((RetVT == MVT::i8 || RetVT == MVT::i16 || RetVT == MVT::i32 ||
3897           RetVT == MVT::i64) && "Unexpected return value type.");
3898
3899   bool Is64Bit = (RetVT == MVT::i64);
3900   unsigned RegSize = Is64Bit ? 64 : 32;
3901   unsigned DstBits = RetVT.getSizeInBits();
3902   unsigned SrcBits = SrcVT.getSizeInBits();
3903   const TargetRegisterClass *RC =
3904       Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
3905
3906   // Just emit a copy for "zero" shifts.
3907   if (Shift == 0) {
3908     if (RetVT == SrcVT) {
3909       unsigned ResultReg = createResultReg(RC);
3910       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3911               TII.get(TargetOpcode::COPY), ResultReg)
3912           .addReg(Op0, getKillRegState(Op0IsKill));
3913       return ResultReg;
3914     } else
3915       return emitIntExt(SrcVT, Op0, RetVT, IsZExt);
3916   }
3917
3918   // Don't deal with undefined shifts.
3919   if (Shift >= DstBits)
3920     return 0;
3921
3922   // For immediate shifts we can fold the zero-/sign-extension into the shift.
3923   // {S|U}BFM Wd, Wn, #r, #s
3924   // Wd<32+s-r,32-r> = Wn<s:0> when r > s
3925
3926   // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
3927   // %2 = shl i16 %1, 4
3928   // Wd<32+7-28,32-28> = Wn<7:0> <- clamp s to 7
3929   // 0b1111_1111_1111_1111__1111_1010_1010_0000 sext
3930   // 0b0000_0000_0000_0000__0000_0101_0101_0000 sext | zext
3931   // 0b0000_0000_0000_0000__0000_1010_1010_0000 zext
3932
3933   // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
3934   // %2 = shl i16 %1, 8
3935   // Wd<32+7-24,32-24> = Wn<7:0>
3936   // 0b1111_1111_1111_1111__1010_1010_0000_0000 sext
3937   // 0b0000_0000_0000_0000__0101_0101_0000_0000 sext | zext
3938   // 0b0000_0000_0000_0000__1010_1010_0000_0000 zext
3939
3940   // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
3941   // %2 = shl i16 %1, 12
3942   // Wd<32+3-20,32-20> = Wn<3:0>
3943   // 0b1111_1111_1111_1111__1010_0000_0000_0000 sext
3944   // 0b0000_0000_0000_0000__0101_0000_0000_0000 sext | zext
3945   // 0b0000_0000_0000_0000__1010_0000_0000_0000 zext
3946
3947   unsigned ImmR = RegSize - Shift;
3948   // Limit the width to the length of the source type.
3949   unsigned ImmS = std::min<unsigned>(SrcBits - 1, DstBits - 1 - Shift);
3950   static const unsigned OpcTable[2][2] = {
3951     {AArch64::SBFMWri, AArch64::SBFMXri},
3952     {AArch64::UBFMWri, AArch64::UBFMXri}
3953   };
3954   unsigned Opc = OpcTable[IsZExt][Is64Bit];
3955   if (SrcVT.SimpleTy <= MVT::i32 && RetVT == MVT::i64) {
3956     unsigned TmpReg = MRI.createVirtualRegister(RC);
3957     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3958             TII.get(AArch64::SUBREG_TO_REG), TmpReg)
3959         .addImm(0)
3960         .addReg(Op0, getKillRegState(Op0IsKill))
3961         .addImm(AArch64::sub_32);
3962     Op0 = TmpReg;
3963     Op0IsKill = true;
3964   }
3965   return fastEmitInst_rii(Opc, RC, Op0, Op0IsKill, ImmR, ImmS);
3966 }
3967
3968 unsigned AArch64FastISel::emitLSR_rr(MVT RetVT, unsigned Op0Reg, bool Op0IsKill,
3969                                      unsigned Op1Reg, bool Op1IsKill) {
3970   unsigned Opc = 0;
3971   bool NeedTrunc = false;
3972   uint64_t Mask = 0;
3973   switch (RetVT.SimpleTy) {
3974   default: return 0;
3975   case MVT::i8:  Opc = AArch64::LSRVWr; NeedTrunc = true; Mask = 0xff;   break;
3976   case MVT::i16: Opc = AArch64::LSRVWr; NeedTrunc = true; Mask = 0xffff; break;
3977   case MVT::i32: Opc = AArch64::LSRVWr; break;
3978   case MVT::i64: Opc = AArch64::LSRVXr; break;
3979   }
3980
3981   const TargetRegisterClass *RC =
3982       (RetVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
3983   if (NeedTrunc) {
3984     Op0Reg = emitAnd_ri(MVT::i32, Op0Reg, Op0IsKill, Mask);
3985     Op1Reg = emitAnd_ri(MVT::i32, Op1Reg, Op1IsKill, Mask);
3986     Op0IsKill = Op1IsKill = true;
3987   }
3988   unsigned ResultReg = fastEmitInst_rr(Opc, RC, Op0Reg, Op0IsKill, Op1Reg,
3989                                        Op1IsKill);
3990   if (NeedTrunc)
3991     ResultReg = emitAnd_ri(MVT::i32, ResultReg, /*IsKill=*/true, Mask);
3992   return ResultReg;
3993 }
3994
3995 unsigned AArch64FastISel::emitLSR_ri(MVT RetVT, MVT SrcVT, unsigned Op0,
3996                                      bool Op0IsKill, uint64_t Shift,
3997                                      bool IsZExt) {
3998   assert(RetVT.SimpleTy >= SrcVT.SimpleTy &&
3999          "Unexpected source/return type pair.");
4000   assert((SrcVT == MVT::i1 || SrcVT == MVT::i8 || SrcVT == MVT::i16 ||
4001           SrcVT == MVT::i32 || SrcVT == MVT::i64) &&
4002          "Unexpected source value type.");
4003   assert((RetVT == MVT::i8 || RetVT == MVT::i16 || RetVT == MVT::i32 ||
4004           RetVT == MVT::i64) && "Unexpected return value type.");
4005
4006   bool Is64Bit = (RetVT == MVT::i64);
4007   unsigned RegSize = Is64Bit ? 64 : 32;
4008   unsigned DstBits = RetVT.getSizeInBits();
4009   unsigned SrcBits = SrcVT.getSizeInBits();
4010   const TargetRegisterClass *RC =
4011       Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
4012
4013   // Just emit a copy for "zero" shifts.
4014   if (Shift == 0) {
4015     if (RetVT == SrcVT) {
4016       unsigned ResultReg = createResultReg(RC);
4017       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
4018               TII.get(TargetOpcode::COPY), ResultReg)
4019       .addReg(Op0, getKillRegState(Op0IsKill));
4020       return ResultReg;
4021     } else
4022       return emitIntExt(SrcVT, Op0, RetVT, IsZExt);
4023   }
4024
4025   // Don't deal with undefined shifts.
4026   if (Shift >= DstBits)
4027     return 0;
4028
4029   // For immediate shifts we can fold the zero-/sign-extension into the shift.
4030   // {S|U}BFM Wd, Wn, #r, #s
4031   // Wd<s-r:0> = Wn<s:r> when r <= s
4032
4033   // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
4034   // %2 = lshr i16 %1, 4
4035   // Wd<7-4:0> = Wn<7:4>
4036   // 0b0000_0000_0000_0000__0000_1111_1111_1010 sext
4037   // 0b0000_0000_0000_0000__0000_0000_0000_0101 sext | zext
4038   // 0b0000_0000_0000_0000__0000_0000_0000_1010 zext
4039
4040   // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
4041   // %2 = lshr i16 %1, 8
4042   // Wd<7-7,0> = Wn<7:7>
4043   // 0b0000_0000_0000_0000__0000_0000_1111_1111 sext
4044   // 0b0000_0000_0000_0000__0000_0000_0000_0000 sext
4045   // 0b0000_0000_0000_0000__0000_0000_0000_0000 zext
4046
4047   // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
4048   // %2 = lshr i16 %1, 12
4049   // Wd<7-7,0> = Wn<7:7> <- clamp r to 7
4050   // 0b0000_0000_0000_0000__0000_0000_0000_1111 sext
4051   // 0b0000_0000_0000_0000__0000_0000_0000_0000 sext
4052   // 0b0000_0000_0000_0000__0000_0000_0000_0000 zext
4053
4054   if (Shift >= SrcBits && IsZExt)
4055     return materializeInt(ConstantInt::get(*Context, APInt(RegSize, 0)), RetVT);
4056
4057   // It is not possible to fold a sign-extend into the LShr instruction. In this
4058   // case emit a sign-extend.
4059   if (!IsZExt) {
4060     Op0 = emitIntExt(SrcVT, Op0, RetVT, IsZExt);
4061     if (!Op0)
4062       return 0;
4063     Op0IsKill = true;
4064     SrcVT = RetVT;
4065     SrcBits = SrcVT.getSizeInBits();
4066     IsZExt = true;
4067   }
4068
4069   unsigned ImmR = std::min<unsigned>(SrcBits - 1, Shift);
4070   unsigned ImmS = SrcBits - 1;
4071   static const unsigned OpcTable[2][2] = {
4072     {AArch64::SBFMWri, AArch64::SBFMXri},
4073     {AArch64::UBFMWri, AArch64::UBFMXri}
4074   };
4075   unsigned Opc = OpcTable[IsZExt][Is64Bit];
4076   if (SrcVT.SimpleTy <= MVT::i32 && RetVT == MVT::i64) {
4077     unsigned TmpReg = MRI.createVirtualRegister(RC);
4078     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
4079             TII.get(AArch64::SUBREG_TO_REG), TmpReg)
4080         .addImm(0)
4081         .addReg(Op0, getKillRegState(Op0IsKill))
4082         .addImm(AArch64::sub_32);
4083     Op0 = TmpReg;
4084     Op0IsKill = true;
4085   }
4086   return fastEmitInst_rii(Opc, RC, Op0, Op0IsKill, ImmR, ImmS);
4087 }
4088
4089 unsigned AArch64FastISel::emitASR_rr(MVT RetVT, unsigned Op0Reg, bool Op0IsKill,
4090                                      unsigned Op1Reg, bool Op1IsKill) {
4091   unsigned Opc = 0;
4092   bool NeedTrunc = false;
4093   uint64_t Mask = 0;
4094   switch (RetVT.SimpleTy) {
4095   default: return 0;
4096   case MVT::i8:  Opc = AArch64::ASRVWr; NeedTrunc = true; Mask = 0xff;   break;
4097   case MVT::i16: Opc = AArch64::ASRVWr; NeedTrunc = true; Mask = 0xffff; break;
4098   case MVT::i32: Opc = AArch64::ASRVWr;                                  break;
4099   case MVT::i64: Opc = AArch64::ASRVXr;                                  break;
4100   }
4101
4102   const TargetRegisterClass *RC =
4103       (RetVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
4104   if (NeedTrunc) {
4105     Op0Reg = emitIntExt(RetVT, Op0Reg, MVT::i32, /*IsZExt=*/false);
4106     Op1Reg = emitAnd_ri(MVT::i32, Op1Reg, Op1IsKill, Mask);
4107     Op0IsKill = Op1IsKill = true;
4108   }
4109   unsigned ResultReg = fastEmitInst_rr(Opc, RC, Op0Reg, Op0IsKill, Op1Reg,
4110                                        Op1IsKill);
4111   if (NeedTrunc)
4112     ResultReg = emitAnd_ri(MVT::i32, ResultReg, /*IsKill=*/true, Mask);
4113   return ResultReg;
4114 }
4115
4116 unsigned AArch64FastISel::emitASR_ri(MVT RetVT, MVT SrcVT, unsigned Op0,
4117                                      bool Op0IsKill, uint64_t Shift,
4118                                      bool IsZExt) {
4119   assert(RetVT.SimpleTy >= SrcVT.SimpleTy &&
4120          "Unexpected source/return type pair.");
4121   assert((SrcVT == MVT::i1 || SrcVT == MVT::i8 || SrcVT == MVT::i16 ||
4122           SrcVT == MVT::i32 || SrcVT == MVT::i64) &&
4123          "Unexpected source value type.");
4124   assert((RetVT == MVT::i8 || RetVT == MVT::i16 || RetVT == MVT::i32 ||
4125           RetVT == MVT::i64) && "Unexpected return value type.");
4126
4127   bool Is64Bit = (RetVT == MVT::i64);
4128   unsigned RegSize = Is64Bit ? 64 : 32;
4129   unsigned DstBits = RetVT.getSizeInBits();
4130   unsigned SrcBits = SrcVT.getSizeInBits();
4131   const TargetRegisterClass *RC =
4132       Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
4133
4134   // Just emit a copy for "zero" shifts.
4135   if (Shift == 0) {
4136     if (RetVT == SrcVT) {
4137       unsigned ResultReg = createResultReg(RC);
4138       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
4139               TII.get(TargetOpcode::COPY), ResultReg)
4140       .addReg(Op0, getKillRegState(Op0IsKill));
4141       return ResultReg;
4142     } else
4143       return emitIntExt(SrcVT, Op0, RetVT, IsZExt);
4144   }
4145
4146   // Don't deal with undefined shifts.
4147   if (Shift >= DstBits)
4148     return 0;
4149
4150   // For immediate shifts we can fold the zero-/sign-extension into the shift.
4151   // {S|U}BFM Wd, Wn, #r, #s
4152   // Wd<s-r:0> = Wn<s:r> when r <= s
4153
4154   // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
4155   // %2 = ashr i16 %1, 4
4156   // Wd<7-4:0> = Wn<7:4>
4157   // 0b1111_1111_1111_1111__1111_1111_1111_1010 sext
4158   // 0b0000_0000_0000_0000__0000_0000_0000_0101 sext | zext
4159   // 0b0000_0000_0000_0000__0000_0000_0000_1010 zext
4160
4161   // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
4162   // %2 = ashr i16 %1, 8
4163   // Wd<7-7,0> = Wn<7:7>
4164   // 0b1111_1111_1111_1111__1111_1111_1111_1111 sext
4165   // 0b0000_0000_0000_0000__0000_0000_0000_0000 sext
4166   // 0b0000_0000_0000_0000__0000_0000_0000_0000 zext
4167
4168   // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
4169   // %2 = ashr i16 %1, 12
4170   // Wd<7-7,0> = Wn<7:7> <- clamp r to 7
4171   // 0b1111_1111_1111_1111__1111_1111_1111_1111 sext
4172   // 0b0000_0000_0000_0000__0000_0000_0000_0000 sext
4173   // 0b0000_0000_0000_0000__0000_0000_0000_0000 zext
4174
4175   if (Shift >= SrcBits && IsZExt)
4176     return materializeInt(ConstantInt::get(*Context, APInt(RegSize, 0)), RetVT);
4177
4178   unsigned ImmR = std::min<unsigned>(SrcBits - 1, Shift);
4179   unsigned ImmS = SrcBits - 1;
4180   static const unsigned OpcTable[2][2] = {
4181     {AArch64::SBFMWri, AArch64::SBFMXri},
4182     {AArch64::UBFMWri, AArch64::UBFMXri}
4183   };
4184   unsigned Opc = OpcTable[IsZExt][Is64Bit];
4185   if (SrcVT.SimpleTy <= MVT::i32 && RetVT == MVT::i64) {
4186     unsigned TmpReg = MRI.createVirtualRegister(RC);
4187     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
4188             TII.get(AArch64::SUBREG_TO_REG), TmpReg)
4189         .addImm(0)
4190         .addReg(Op0, getKillRegState(Op0IsKill))
4191         .addImm(AArch64::sub_32);
4192     Op0 = TmpReg;
4193     Op0IsKill = true;
4194   }
4195   return fastEmitInst_rii(Opc, RC, Op0, Op0IsKill, ImmR, ImmS);
4196 }
4197
4198 unsigned AArch64FastISel::emitIntExt(MVT SrcVT, unsigned SrcReg, MVT DestVT,
4199                                      bool IsZExt) {
4200   assert(DestVT != MVT::i1 && "ZeroExt/SignExt an i1?");
4201
4202   // FastISel does not have plumbing to deal with extensions where the SrcVT or
4203   // DestVT are odd things, so test to make sure that they are both types we can
4204   // handle (i1/i8/i16/i32 for SrcVT and i8/i16/i32/i64 for DestVT), otherwise
4205   // bail out to SelectionDAG.
4206   if (((DestVT != MVT::i8) && (DestVT != MVT::i16) &&
4207        (DestVT != MVT::i32) && (DestVT != MVT::i64)) ||
4208       ((SrcVT !=  MVT::i1) && (SrcVT !=  MVT::i8) &&
4209        (SrcVT !=  MVT::i16) && (SrcVT !=  MVT::i32)))
4210     return 0;
4211
4212   unsigned Opc;
4213   unsigned Imm = 0;
4214
4215   switch (SrcVT.SimpleTy) {
4216   default:
4217     return 0;
4218   case MVT::i1:
4219     return emiti1Ext(SrcReg, DestVT, IsZExt);
4220   case MVT::i8:
4221     if (DestVT == MVT::i64)
4222       Opc = IsZExt ? AArch64::UBFMXri : AArch64::SBFMXri;
4223     else
4224       Opc = IsZExt ? AArch64::UBFMWri : AArch64::SBFMWri;
4225     Imm = 7;
4226     break;
4227   case MVT::i16:
4228     if (DestVT == MVT::i64)
4229       Opc = IsZExt ? AArch64::UBFMXri : AArch64::SBFMXri;
4230     else
4231       Opc = IsZExt ? AArch64::UBFMWri : AArch64::SBFMWri;
4232     Imm = 15;
4233     break;
4234   case MVT::i32:
4235     assert(DestVT == MVT::i64 && "IntExt i32 to i32?!?");
4236     Opc = IsZExt ? AArch64::UBFMXri : AArch64::SBFMXri;
4237     Imm = 31;
4238     break;
4239   }
4240
4241   // Handle i8 and i16 as i32.
4242   if (DestVT == MVT::i8 || DestVT == MVT::i16)
4243     DestVT = MVT::i32;
4244   else if (DestVT == MVT::i64) {
4245     unsigned Src64 = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
4246     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
4247             TII.get(AArch64::SUBREG_TO_REG), Src64)
4248         .addImm(0)
4249         .addReg(SrcReg)
4250         .addImm(AArch64::sub_32);
4251     SrcReg = Src64;
4252   }
4253
4254   const TargetRegisterClass *RC =
4255       (DestVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
4256   return fastEmitInst_rii(Opc, RC, SrcReg, /*TODO:IsKill=*/false, 0, Imm);
4257 }
4258
4259 static bool isZExtLoad(const MachineInstr *LI) {
4260   switch (LI->getOpcode()) {
4261   default:
4262     return false;
4263   case AArch64::LDURBBi:
4264   case AArch64::LDURHHi:
4265   case AArch64::LDURWi:
4266   case AArch64::LDRBBui:
4267   case AArch64::LDRHHui:
4268   case AArch64::LDRWui:
4269   case AArch64::LDRBBroX:
4270   case AArch64::LDRHHroX:
4271   case AArch64::LDRWroX:
4272   case AArch64::LDRBBroW:
4273   case AArch64::LDRHHroW:
4274   case AArch64::LDRWroW:
4275     return true;
4276   }
4277 }
4278
4279 static bool isSExtLoad(const MachineInstr *LI) {
4280   switch (LI->getOpcode()) {
4281   default:
4282     return false;
4283   case AArch64::LDURSBWi:
4284   case AArch64::LDURSHWi:
4285   case AArch64::LDURSBXi:
4286   case AArch64::LDURSHXi:
4287   case AArch64::LDURSWi:
4288   case AArch64::LDRSBWui:
4289   case AArch64::LDRSHWui:
4290   case AArch64::LDRSBXui:
4291   case AArch64::LDRSHXui:
4292   case AArch64::LDRSWui:
4293   case AArch64::LDRSBWroX:
4294   case AArch64::LDRSHWroX:
4295   case AArch64::LDRSBXroX:
4296   case AArch64::LDRSHXroX:
4297   case AArch64::LDRSWroX:
4298   case AArch64::LDRSBWroW:
4299   case AArch64::LDRSHWroW:
4300   case AArch64::LDRSBXroW:
4301   case AArch64::LDRSHXroW:
4302   case AArch64::LDRSWroW:
4303     return true;
4304   }
4305 }
4306
4307 bool AArch64FastISel::optimizeIntExtLoad(const Instruction *I, MVT RetVT,
4308                                          MVT SrcVT) {
4309   const auto *LI = dyn_cast<LoadInst>(I->getOperand(0));
4310   if (!LI || !LI->hasOneUse())
4311     return false;
4312
4313   // Check if the load instruction has already been selected.
4314   unsigned Reg = lookUpRegForValue(LI);
4315   if (!Reg)
4316     return false;
4317
4318   MachineInstr *MI = MRI.getUniqueVRegDef(Reg);
4319   if (!MI)
4320     return false;
4321
4322   // Check if the correct load instruction has been emitted - SelectionDAG might
4323   // have emitted a zero-extending load, but we need a sign-extending load.
4324   bool IsZExt = isa<ZExtInst>(I);
4325   const auto *LoadMI = MI;
4326   if (LoadMI->getOpcode() == TargetOpcode::COPY &&
4327       LoadMI->getOperand(1).getSubReg() == AArch64::sub_32) {
4328     unsigned LoadReg = MI->getOperand(1).getReg();
4329     LoadMI = MRI.getUniqueVRegDef(LoadReg);
4330     assert(LoadMI && "Expected valid instruction");
4331   }
4332   if (!(IsZExt && isZExtLoad(LoadMI)) && !(!IsZExt && isSExtLoad(LoadMI)))
4333     return false;
4334
4335   // Nothing to be done.
4336   if (RetVT != MVT::i64 || SrcVT > MVT::i32) {
4337     updateValueMap(I, Reg);
4338     return true;
4339   }
4340
4341   if (IsZExt) {
4342     unsigned Reg64 = createResultReg(&AArch64::GPR64RegClass);
4343     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
4344             TII.get(AArch64::SUBREG_TO_REG), Reg64)
4345         .addImm(0)
4346         .addReg(Reg, getKillRegState(true))
4347         .addImm(AArch64::sub_32);
4348     Reg = Reg64;
4349   } else {
4350     assert((MI->getOpcode() == TargetOpcode::COPY &&
4351             MI->getOperand(1).getSubReg() == AArch64::sub_32) &&
4352            "Expected copy instruction");
4353     Reg = MI->getOperand(1).getReg();
4354     MI->eraseFromParent();
4355   }
4356   updateValueMap(I, Reg);
4357   return true;
4358 }
4359
4360 bool AArch64FastISel::selectIntExt(const Instruction *I) {
4361   assert((isa<ZExtInst>(I) || isa<SExtInst>(I)) &&
4362          "Unexpected integer extend instruction.");
4363   MVT RetVT;
4364   MVT SrcVT;
4365   if (!isTypeSupported(I->getType(), RetVT))
4366     return false;
4367
4368   if (!isTypeSupported(I->getOperand(0)->getType(), SrcVT))
4369     return false;
4370
4371   // Try to optimize already sign-/zero-extended values from load instructions.
4372   if (optimizeIntExtLoad(I, RetVT, SrcVT))
4373     return true;
4374
4375   unsigned SrcReg = getRegForValue(I->getOperand(0));
4376   if (!SrcReg)
4377     return false;
4378   bool SrcIsKill = hasTrivialKill(I->getOperand(0));
4379
4380   // Try to optimize already sign-/zero-extended values from function arguments.
4381   bool IsZExt = isa<ZExtInst>(I);
4382   if (const auto *Arg = dyn_cast<Argument>(I->getOperand(0))) {
4383     if ((IsZExt && Arg->hasZExtAttr()) || (!IsZExt && Arg->hasSExtAttr())) {
4384       if (RetVT == MVT::i64 && SrcVT != MVT::i64) {
4385         unsigned ResultReg = createResultReg(&AArch64::GPR64RegClass);
4386         BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
4387                 TII.get(AArch64::SUBREG_TO_REG), ResultReg)
4388             .addImm(0)
4389             .addReg(SrcReg, getKillRegState(SrcIsKill))
4390             .addImm(AArch64::sub_32);
4391         SrcReg = ResultReg;
4392       }
4393       // Conservatively clear all kill flags from all uses, because we are
4394       // replacing a sign-/zero-extend instruction at IR level with a nop at MI
4395       // level. The result of the instruction at IR level might have been
4396       // trivially dead, which is now not longer true.
4397       unsigned UseReg = lookUpRegForValue(I);
4398       if (UseReg)
4399         MRI.clearKillFlags(UseReg);
4400
4401       updateValueMap(I, SrcReg);
4402       return true;
4403     }
4404   }
4405
4406   unsigned ResultReg = emitIntExt(SrcVT, SrcReg, RetVT, IsZExt);
4407   if (!ResultReg)
4408     return false;
4409
4410   updateValueMap(I, ResultReg);
4411   return true;
4412 }
4413
4414 bool AArch64FastISel::selectRem(const Instruction *I, unsigned ISDOpcode) {
4415   EVT DestEVT = TLI.getValueType(I->getType(), true);
4416   if (!DestEVT.isSimple())
4417     return false;
4418
4419   MVT DestVT = DestEVT.getSimpleVT();
4420   if (DestVT != MVT::i64 && DestVT != MVT::i32)
4421     return false;
4422
4423   unsigned DivOpc;
4424   bool Is64bit = (DestVT == MVT::i64);
4425   switch (ISDOpcode) {
4426   default:
4427     return false;
4428   case ISD::SREM:
4429     DivOpc = Is64bit ? AArch64::SDIVXr : AArch64::SDIVWr;
4430     break;
4431   case ISD::UREM:
4432     DivOpc = Is64bit ? AArch64::UDIVXr : AArch64::UDIVWr;
4433     break;
4434   }
4435   unsigned MSubOpc = Is64bit ? AArch64::MSUBXrrr : AArch64::MSUBWrrr;
4436   unsigned Src0Reg = getRegForValue(I->getOperand(0));
4437   if (!Src0Reg)
4438     return false;
4439   bool Src0IsKill = hasTrivialKill(I->getOperand(0));
4440
4441   unsigned Src1Reg = getRegForValue(I->getOperand(1));
4442   if (!Src1Reg)
4443     return false;
4444   bool Src1IsKill = hasTrivialKill(I->getOperand(1));
4445
4446   const TargetRegisterClass *RC =
4447       (DestVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
4448   unsigned QuotReg = fastEmitInst_rr(DivOpc, RC, Src0Reg, /*IsKill=*/false,
4449                                      Src1Reg, /*IsKill=*/false);
4450   assert(QuotReg && "Unexpected DIV instruction emission failure.");
4451   // The remainder is computed as numerator - (quotient * denominator) using the
4452   // MSUB instruction.
4453   unsigned ResultReg = fastEmitInst_rrr(MSubOpc, RC, QuotReg, /*IsKill=*/true,
4454                                         Src1Reg, Src1IsKill, Src0Reg,
4455                                         Src0IsKill);
4456   updateValueMap(I, ResultReg);
4457   return true;
4458 }
4459
4460 bool AArch64FastISel::selectMul(const Instruction *I) {
4461   MVT VT;
4462   if (!isTypeSupported(I->getType(), VT, /*IsVectorAllowed=*/true))
4463     return false;
4464
4465   if (VT.isVector())
4466     return selectBinaryOp(I, ISD::MUL);
4467
4468   const Value *Src0 = I->getOperand(0);
4469   const Value *Src1 = I->getOperand(1);
4470   if (const auto *C = dyn_cast<ConstantInt>(Src0))
4471     if (C->getValue().isPowerOf2())
4472       std::swap(Src0, Src1);
4473
4474   // Try to simplify to a shift instruction.
4475   if (const auto *C = dyn_cast<ConstantInt>(Src1))
4476     if (C->getValue().isPowerOf2()) {
4477       uint64_t ShiftVal = C->getValue().logBase2();
4478       MVT SrcVT = VT;
4479       bool IsZExt = true;
4480       if (const auto *ZExt = dyn_cast<ZExtInst>(Src0)) {
4481         if (!isIntExtFree(ZExt)) {
4482           MVT VT;
4483           if (isValueAvailable(ZExt) && isTypeSupported(ZExt->getSrcTy(), VT)) {
4484             SrcVT = VT;
4485             IsZExt = true;
4486             Src0 = ZExt->getOperand(0);
4487           }
4488         }
4489       } else if (const auto *SExt = dyn_cast<SExtInst>(Src0)) {
4490         if (!isIntExtFree(SExt)) {
4491           MVT VT;
4492           if (isValueAvailable(SExt) && isTypeSupported(SExt->getSrcTy(), VT)) {
4493             SrcVT = VT;
4494             IsZExt = false;
4495             Src0 = SExt->getOperand(0);
4496           }
4497         }
4498       }
4499
4500       unsigned Src0Reg = getRegForValue(Src0);
4501       if (!Src0Reg)
4502         return false;
4503       bool Src0IsKill = hasTrivialKill(Src0);
4504
4505       unsigned ResultReg =
4506           emitLSL_ri(VT, SrcVT, Src0Reg, Src0IsKill, ShiftVal, IsZExt);
4507
4508       if (ResultReg) {
4509         updateValueMap(I, ResultReg);
4510         return true;
4511       }
4512     }
4513
4514   unsigned Src0Reg = getRegForValue(I->getOperand(0));
4515   if (!Src0Reg)
4516     return false;
4517   bool Src0IsKill = hasTrivialKill(I->getOperand(0));
4518
4519   unsigned Src1Reg = getRegForValue(I->getOperand(1));
4520   if (!Src1Reg)
4521     return false;
4522   bool Src1IsKill = hasTrivialKill(I->getOperand(1));
4523
4524   unsigned ResultReg = emitMul_rr(VT, Src0Reg, Src0IsKill, Src1Reg, Src1IsKill);
4525
4526   if (!ResultReg)
4527     return false;
4528
4529   updateValueMap(I, ResultReg);
4530   return true;
4531 }
4532
4533 bool AArch64FastISel::selectShift(const Instruction *I) {
4534   MVT RetVT;
4535   if (!isTypeSupported(I->getType(), RetVT, /*IsVectorAllowed=*/true))
4536     return false;
4537
4538   if (RetVT.isVector())
4539     return selectOperator(I, I->getOpcode());
4540
4541   if (const auto *C = dyn_cast<ConstantInt>(I->getOperand(1))) {
4542     unsigned ResultReg = 0;
4543     uint64_t ShiftVal = C->getZExtValue();
4544     MVT SrcVT = RetVT;
4545     bool IsZExt = (I->getOpcode() == Instruction::AShr) ? false : true;
4546     const Value *Op0 = I->getOperand(0);
4547     if (const auto *ZExt = dyn_cast<ZExtInst>(Op0)) {
4548       if (!isIntExtFree(ZExt)) {
4549         MVT TmpVT;
4550         if (isValueAvailable(ZExt) && isTypeSupported(ZExt->getSrcTy(), TmpVT)) {
4551           SrcVT = TmpVT;
4552           IsZExt = true;
4553           Op0 = ZExt->getOperand(0);
4554         }
4555       }
4556     } else if (const auto *SExt = dyn_cast<SExtInst>(Op0)) {
4557       if (!isIntExtFree(SExt)) {
4558         MVT TmpVT;
4559         if (isValueAvailable(SExt) && isTypeSupported(SExt->getSrcTy(), TmpVT)) {
4560           SrcVT = TmpVT;
4561           IsZExt = false;
4562           Op0 = SExt->getOperand(0);
4563         }
4564       }
4565     }
4566
4567     unsigned Op0Reg = getRegForValue(Op0);
4568     if (!Op0Reg)
4569       return false;
4570     bool Op0IsKill = hasTrivialKill(Op0);
4571
4572     switch (I->getOpcode()) {
4573     default: llvm_unreachable("Unexpected instruction.");
4574     case Instruction::Shl:
4575       ResultReg = emitLSL_ri(RetVT, SrcVT, Op0Reg, Op0IsKill, ShiftVal, IsZExt);
4576       break;
4577     case Instruction::AShr:
4578       ResultReg = emitASR_ri(RetVT, SrcVT, Op0Reg, Op0IsKill, ShiftVal, IsZExt);
4579       break;
4580     case Instruction::LShr:
4581       ResultReg = emitLSR_ri(RetVT, SrcVT, Op0Reg, Op0IsKill, ShiftVal, IsZExt);
4582       break;
4583     }
4584     if (!ResultReg)
4585       return false;
4586
4587     updateValueMap(I, ResultReg);
4588     return true;
4589   }
4590
4591   unsigned Op0Reg = getRegForValue(I->getOperand(0));
4592   if (!Op0Reg)
4593     return false;
4594   bool Op0IsKill = hasTrivialKill(I->getOperand(0));
4595
4596   unsigned Op1Reg = getRegForValue(I->getOperand(1));
4597   if (!Op1Reg)
4598     return false;
4599   bool Op1IsKill = hasTrivialKill(I->getOperand(1));
4600
4601   unsigned ResultReg = 0;
4602   switch (I->getOpcode()) {
4603   default: llvm_unreachable("Unexpected instruction.");
4604   case Instruction::Shl:
4605     ResultReg = emitLSL_rr(RetVT, Op0Reg, Op0IsKill, Op1Reg, Op1IsKill);
4606     break;
4607   case Instruction::AShr:
4608     ResultReg = emitASR_rr(RetVT, Op0Reg, Op0IsKill, Op1Reg, Op1IsKill);
4609     break;
4610   case Instruction::LShr:
4611     ResultReg = emitLSR_rr(RetVT, Op0Reg, Op0IsKill, Op1Reg, Op1IsKill);
4612     break;
4613   }
4614
4615   if (!ResultReg)
4616     return false;
4617
4618   updateValueMap(I, ResultReg);
4619   return true;
4620 }
4621
4622 bool AArch64FastISel::selectBitCast(const Instruction *I) {
4623   MVT RetVT, SrcVT;
4624
4625   if (!isTypeLegal(I->getOperand(0)->getType(), SrcVT))
4626     return false;
4627   if (!isTypeLegal(I->getType(), RetVT))
4628     return false;
4629
4630   unsigned Opc;
4631   if (RetVT == MVT::f32 && SrcVT == MVT::i32)
4632     Opc = AArch64::FMOVWSr;
4633   else if (RetVT == MVT::f64 && SrcVT == MVT::i64)
4634     Opc = AArch64::FMOVXDr;
4635   else if (RetVT == MVT::i32 && SrcVT == MVT::f32)
4636     Opc = AArch64::FMOVSWr;
4637   else if (RetVT == MVT::i64 && SrcVT == MVT::f64)
4638     Opc = AArch64::FMOVDXr;
4639   else
4640     return false;
4641
4642   const TargetRegisterClass *RC = nullptr;
4643   switch (RetVT.SimpleTy) {
4644   default: llvm_unreachable("Unexpected value type.");
4645   case MVT::i32: RC = &AArch64::GPR32RegClass; break;
4646   case MVT::i64: RC = &AArch64::GPR64RegClass; break;
4647   case MVT::f32: RC = &AArch64::FPR32RegClass; break;
4648   case MVT::f64: RC = &AArch64::FPR64RegClass; break;
4649   }
4650   unsigned Op0Reg = getRegForValue(I->getOperand(0));
4651   if (!Op0Reg)
4652     return false;
4653   bool Op0IsKill = hasTrivialKill(I->getOperand(0));
4654   unsigned ResultReg = fastEmitInst_r(Opc, RC, Op0Reg, Op0IsKill);
4655
4656   if (!ResultReg)
4657     return false;
4658
4659   updateValueMap(I, ResultReg);
4660   return true;
4661 }
4662
4663 bool AArch64FastISel::selectFRem(const Instruction *I) {
4664   MVT RetVT;
4665   if (!isTypeLegal(I->getType(), RetVT))
4666     return false;
4667
4668   RTLIB::Libcall LC;
4669   switch (RetVT.SimpleTy) {
4670   default:
4671     return false;
4672   case MVT::f32:
4673     LC = RTLIB::REM_F32;
4674     break;
4675   case MVT::f64:
4676     LC = RTLIB::REM_F64;
4677     break;
4678   }
4679
4680   ArgListTy Args;
4681   Args.reserve(I->getNumOperands());
4682
4683   // Populate the argument list.
4684   for (auto &Arg : I->operands()) {
4685     ArgListEntry Entry;
4686     Entry.Val = Arg;
4687     Entry.Ty = Arg->getType();
4688     Args.push_back(Entry);
4689   }
4690
4691   CallLoweringInfo CLI;
4692   CLI.setCallee(TLI.getLibcallCallingConv(LC), I->getType(),
4693                 TLI.getLibcallName(LC), std::move(Args));
4694   if (!lowerCallTo(CLI))
4695     return false;
4696   updateValueMap(I, CLI.ResultReg);
4697   return true;
4698 }
4699
4700 bool AArch64FastISel::selectSDiv(const Instruction *I) {
4701   MVT VT;
4702   if (!isTypeLegal(I->getType(), VT))
4703     return false;
4704
4705   if (!isa<ConstantInt>(I->getOperand(1)))
4706     return selectBinaryOp(I, ISD::SDIV);
4707
4708   const APInt &C = cast<ConstantInt>(I->getOperand(1))->getValue();
4709   if ((VT != MVT::i32 && VT != MVT::i64) || !C ||
4710       !(C.isPowerOf2() || (-C).isPowerOf2()))
4711     return selectBinaryOp(I, ISD::SDIV);
4712
4713   unsigned Lg2 = C.countTrailingZeros();
4714   unsigned Src0Reg = getRegForValue(I->getOperand(0));
4715   if (!Src0Reg)
4716     return false;
4717   bool Src0IsKill = hasTrivialKill(I->getOperand(0));
4718
4719   if (cast<BinaryOperator>(I)->isExact()) {
4720     unsigned ResultReg = emitASR_ri(VT, VT, Src0Reg, Src0IsKill, Lg2);
4721     if (!ResultReg)
4722       return false;
4723     updateValueMap(I, ResultReg);
4724     return true;
4725   }
4726
4727   int64_t Pow2MinusOne = (1ULL << Lg2) - 1;
4728   unsigned AddReg = emitAdd_ri_(VT, Src0Reg, /*IsKill=*/false, Pow2MinusOne);
4729   if (!AddReg)
4730     return false;
4731
4732   // (Src0 < 0) ? Pow2 - 1 : 0;
4733   if (!emitICmp_ri(VT, Src0Reg, /*IsKill=*/false, 0))
4734     return false;
4735
4736   unsigned SelectOpc;
4737   const TargetRegisterClass *RC;
4738   if (VT == MVT::i64) {
4739     SelectOpc = AArch64::CSELXr;
4740     RC = &AArch64::GPR64RegClass;
4741   } else {
4742     SelectOpc = AArch64::CSELWr;
4743     RC = &AArch64::GPR32RegClass;
4744   }
4745   unsigned SelectReg =
4746       fastEmitInst_rri(SelectOpc, RC, AddReg, /*IsKill=*/true, Src0Reg,
4747                        Src0IsKill, AArch64CC::LT);
4748   if (!SelectReg)
4749     return false;
4750
4751   // Divide by Pow2 --> ashr. If we're dividing by a negative value we must also
4752   // negate the result.
4753   unsigned ZeroReg = (VT == MVT::i64) ? AArch64::XZR : AArch64::WZR;
4754   unsigned ResultReg;
4755   if (C.isNegative())
4756     ResultReg = emitAddSub_rs(/*UseAdd=*/false, VT, ZeroReg, /*IsKill=*/true,
4757                               SelectReg, /*IsKill=*/true, AArch64_AM::ASR, Lg2);
4758   else
4759     ResultReg = emitASR_ri(VT, VT, SelectReg, /*IsKill=*/true, Lg2);
4760
4761   if (!ResultReg)
4762     return false;
4763
4764   updateValueMap(I, ResultReg);
4765   return true;
4766 }
4767
4768 /// This is mostly a copy of the existing FastISel getRegForGEPIndex code. We
4769 /// have to duplicate it for AArch64, because otherwise we would fail during the
4770 /// sign-extend emission.
4771 std::pair<unsigned, bool> AArch64FastISel::getRegForGEPIndex(const Value *Idx) {
4772   unsigned IdxN = getRegForValue(Idx);
4773   if (IdxN == 0)
4774     // Unhandled operand. Halt "fast" selection and bail.
4775     return std::pair<unsigned, bool>(0, false);
4776
4777   bool IdxNIsKill = hasTrivialKill(Idx);
4778
4779   // If the index is smaller or larger than intptr_t, truncate or extend it.
4780   MVT PtrVT = TLI.getPointerTy();
4781   EVT IdxVT = EVT::getEVT(Idx->getType(), /*HandleUnknown=*/false);
4782   if (IdxVT.bitsLT(PtrVT)) {
4783     IdxN = emitIntExt(IdxVT.getSimpleVT(), IdxN, PtrVT, /*IsZExt=*/false);
4784     IdxNIsKill = true;
4785   } else if (IdxVT.bitsGT(PtrVT))
4786     llvm_unreachable("AArch64 FastISel doesn't support types larger than i64");
4787   return std::pair<unsigned, bool>(IdxN, IdxNIsKill);
4788 }
4789
4790 /// This is mostly a copy of the existing FastISel GEP code, but we have to
4791 /// duplicate it for AArch64, because otherwise we would bail out even for
4792 /// simple cases. This is because the standard fastEmit functions don't cover
4793 /// MUL at all and ADD is lowered very inefficientily.
4794 bool AArch64FastISel::selectGetElementPtr(const Instruction *I) {
4795   unsigned N = getRegForValue(I->getOperand(0));
4796   if (!N)
4797     return false;
4798   bool NIsKill = hasTrivialKill(I->getOperand(0));
4799
4800   // Keep a running tab of the total offset to coalesce multiple N = N + Offset
4801   // into a single N = N + TotalOffset.
4802   uint64_t TotalOffs = 0;
4803   Type *Ty = I->getOperand(0)->getType();
4804   MVT VT = TLI.getPointerTy();
4805   for (auto OI = std::next(I->op_begin()), E = I->op_end(); OI != E; ++OI) {
4806     const Value *Idx = *OI;
4807     if (auto *StTy = dyn_cast<StructType>(Ty)) {
4808       unsigned Field = cast<ConstantInt>(Idx)->getZExtValue();
4809       // N = N + Offset
4810       if (Field)
4811         TotalOffs += DL.getStructLayout(StTy)->getElementOffset(Field);
4812       Ty = StTy->getElementType(Field);
4813     } else {
4814       Ty = cast<SequentialType>(Ty)->getElementType();
4815       // If this is a constant subscript, handle it quickly.
4816       if (const auto *CI = dyn_cast<ConstantInt>(Idx)) {
4817         if (CI->isZero())
4818           continue;
4819         // N = N + Offset
4820         TotalOffs +=
4821             DL.getTypeAllocSize(Ty) * cast<ConstantInt>(CI)->getSExtValue();
4822         continue;
4823       }
4824       if (TotalOffs) {
4825         N = emitAdd_ri_(VT, N, NIsKill, TotalOffs);
4826         if (!N)
4827           return false;
4828         NIsKill = true;
4829         TotalOffs = 0;
4830       }
4831
4832       // N = N + Idx * ElementSize;
4833       uint64_t ElementSize = DL.getTypeAllocSize(Ty);
4834       std::pair<unsigned, bool> Pair = getRegForGEPIndex(Idx);
4835       unsigned IdxN = Pair.first;
4836       bool IdxNIsKill = Pair.second;
4837       if (!IdxN)
4838         return false;
4839
4840       if (ElementSize != 1) {
4841         unsigned C = fastEmit_i(VT, VT, ISD::Constant, ElementSize);
4842         if (!C)
4843           return false;
4844         IdxN = emitMul_rr(VT, IdxN, IdxNIsKill, C, true);
4845         if (!IdxN)
4846           return false;
4847         IdxNIsKill = true;
4848       }
4849       N = fastEmit_rr(VT, VT, ISD::ADD, N, NIsKill, IdxN, IdxNIsKill);
4850       if (!N)
4851         return false;
4852     }
4853   }
4854   if (TotalOffs) {
4855     N = emitAdd_ri_(VT, N, NIsKill, TotalOffs);
4856     if (!N)
4857       return false;
4858   }
4859   updateValueMap(I, N);
4860   return true;
4861 }
4862
4863 bool AArch64FastISel::fastSelectInstruction(const Instruction *I) {
4864   switch (I->getOpcode()) {
4865   default:
4866     break;
4867   case Instruction::Add:
4868   case Instruction::Sub:
4869     return selectAddSub(I);
4870   case Instruction::Mul:
4871     return selectMul(I);
4872   case Instruction::SDiv:
4873     return selectSDiv(I);
4874   case Instruction::SRem:
4875     if (!selectBinaryOp(I, ISD::SREM))
4876       return selectRem(I, ISD::SREM);
4877     return true;
4878   case Instruction::URem:
4879     if (!selectBinaryOp(I, ISD::UREM))
4880       return selectRem(I, ISD::UREM);
4881     return true;
4882   case Instruction::Shl:
4883   case Instruction::LShr:
4884   case Instruction::AShr:
4885     return selectShift(I);
4886   case Instruction::And:
4887   case Instruction::Or:
4888   case Instruction::Xor:
4889     return selectLogicalOp(I);
4890   case Instruction::Br:
4891     return selectBranch(I);
4892   case Instruction::IndirectBr:
4893     return selectIndirectBr(I);
4894   case Instruction::BitCast:
4895     if (!FastISel::selectBitCast(I))
4896       return selectBitCast(I);
4897     return true;
4898   case Instruction::FPToSI:
4899     if (!selectCast(I, ISD::FP_TO_SINT))
4900       return selectFPToInt(I, /*Signed=*/true);
4901     return true;
4902   case Instruction::FPToUI:
4903     return selectFPToInt(I, /*Signed=*/false);
4904   case Instruction::ZExt:
4905   case Instruction::SExt:
4906     return selectIntExt(I);
4907   case Instruction::Trunc:
4908     if (!selectCast(I, ISD::TRUNCATE))
4909       return selectTrunc(I);
4910     return true;
4911   case Instruction::FPExt:
4912     return selectFPExt(I);
4913   case Instruction::FPTrunc:
4914     return selectFPTrunc(I);
4915   case Instruction::SIToFP:
4916     if (!selectCast(I, ISD::SINT_TO_FP))
4917       return selectIntToFP(I, /*Signed=*/true);
4918     return true;
4919   case Instruction::UIToFP:
4920     return selectIntToFP(I, /*Signed=*/false);
4921   case Instruction::Load:
4922     return selectLoad(I);
4923   case Instruction::Store:
4924     return selectStore(I);
4925   case Instruction::FCmp:
4926   case Instruction::ICmp:
4927     return selectCmp(I);
4928   case Instruction::Select:
4929     return selectSelect(I);
4930   case Instruction::Ret:
4931     return selectRet(I);
4932   case Instruction::FRem:
4933     return selectFRem(I);
4934   case Instruction::GetElementPtr:
4935     return selectGetElementPtr(I);
4936   }
4937
4938   // fall-back to target-independent instruction selection.
4939   return selectOperator(I, I->getOpcode());
4940   // Silence warnings.
4941   (void)&CC_AArch64_DarwinPCS_VarArg;
4942 }
4943
4944 namespace llvm {
4945 llvm::FastISel *AArch64::createFastISel(FunctionLoweringInfo &FuncInfo,
4946                                         const TargetLibraryInfo *LibInfo) {
4947   return new AArch64FastISel(FuncInfo, LibInfo);
4948 }
4949 }