lib/Target/AArch64/AArch64FastISel.cpp

   1 //===-- AArch6464FastISel.cpp - AArch64 FastISel implementation -----------===//
   2 //
   3 //                     The LLVM Compiler Infrastructure
   4 //
   5 // This file is distributed under the University of Illinois Open Source
   6 // License. See LICENSE.TXT for details.
   7 //
   8 //===----------------------------------------------------------------------===//
   9 //
  10 // This file defines the AArch64-specific support for the FastISel class. Some
  11 // of the target-specific code is generated by tablegen in the file
  12 // AArch64GenFastISel.inc, which is #included here.
  13 //
  14 //===----------------------------------------------------------------------===//
  15
  16 #include "AArch64.h"
  17 #include "AArch64Subtarget.h"
  18 #include "AArch64TargetMachine.h"
  19 #include "MCTargetDesc/AArch64AddressingModes.h"
  20 #include "llvm/Analysis/BranchProbabilityInfo.h"
  21 #include "llvm/CodeGen/CallingConvLower.h"
  22 #include "llvm/CodeGen/FastISel.h"
  23 #include "llvm/CodeGen/FunctionLoweringInfo.h"
  24 #include "llvm/CodeGen/MachineConstantPool.h"
  25 #include "llvm/CodeGen/MachineFrameInfo.h"
  26 #include "llvm/CodeGen/MachineInstrBuilder.h"
  27 #include "llvm/CodeGen/MachineRegisterInfo.h"
  28 #include "llvm/IR/CallingConv.h"
  29 #include "llvm/IR/DataLayout.h"
  30 #include "llvm/IR/DerivedTypes.h"
  31 #include "llvm/IR/Function.h"
  32 #include "llvm/IR/GetElementPtrTypeIterator.h"
  33 #include "llvm/IR/GlobalAlias.h"
  34 #include "llvm/IR/GlobalVariable.h"
  35 #include "llvm/IR/Instructions.h"
  36 #include "llvm/IR/IntrinsicInst.h"
  37 #include "llvm/IR/Operator.h"
  38 #include "llvm/Support/CommandLine.h"
  39 using namespace llvm;
  40
  41 namespace {
  42
  43 class AArch64FastISel final : public FastISel {
  44   class Address {
  45   public:
  46     typedef enum {
  47       RegBase,
  48       FrameIndexBase
  49     } BaseKind;
  50
  51   private:
  52     BaseKind Kind;
  53     AArch64_AM::ShiftExtendType ExtType;
  54     union {
  55       unsigned Reg;
  56       int FI;
  57     } Base;
  58     unsigned OffsetReg;
  59     unsigned Shift;
  60     int64_t Offset;
  61     const GlobalValue *GV;
  62
  63   public:
  64     Address() : Kind(RegBase), ExtType(AArch64_AM::InvalidShiftExtend),
  65       OffsetReg(0), Shift(0), Offset(0), GV(nullptr) { Base.Reg = 0; }
  66     void setKind(BaseKind K) { Kind = K; }
  67     BaseKind getKind() const { return Kind; }
  68     void setExtendType(AArch64_AM::ShiftExtendType E) { ExtType = E; }
  69     AArch64_AM::ShiftExtendType getExtendType() const { return ExtType; }
  70     bool isRegBase() const { return Kind == RegBase; }
  71     bool isFIBase() const { return Kind == FrameIndexBase; }
  72     void setReg(unsigned Reg) {
  73       assert(isRegBase() && "Invalid base register access!");
  74       Base.Reg = Reg;
  75     }
  76     unsigned getReg() const {
  77       assert(isRegBase() && "Invalid base register access!");
  78       return Base.Reg;
  79     }
  80     void setOffsetReg(unsigned Reg) {
  81       assert(isRegBase() && "Invalid offset register access!");
  82       OffsetReg = Reg;
  83     }
  84     unsigned getOffsetReg() const {
  85       assert(isRegBase() && "Invalid offset register access!");
  86       return OffsetReg;
  87     }
  88     void setFI(unsigned FI) {
  89       assert(isFIBase() && "Invalid base frame index  access!");
  90       Base.FI = FI;
  91     }
  92     unsigned getFI() const {
  93       assert(isFIBase() && "Invalid base frame index access!");
  94       return Base.FI;
  95     }
  96     void setOffset(int64_t O) { Offset = O; }
  97     int64_t getOffset() { return Offset; }
  98     void setShift(unsigned S) { Shift = S; }
  99     unsigned getShift() { return Shift; }
 100
 101     void setGlobalValue(const GlobalValue *G) { GV = G; }
 102     const GlobalValue *getGlobalValue() { return GV; }
 103   };
 104
 105   /// Subtarget - Keep a pointer to the AArch64Subtarget around so that we can
 106   /// make the right decision when generating code for different targets.
 107   const AArch64Subtarget *Subtarget;
 108   LLVMContext *Context;
 109
 110   bool fastLowerArguments() override;
 111   bool fastLowerCall(CallLoweringInfo &CLI) override;
 112   bool fastLowerIntrinsicCall(const IntrinsicInst *II) override;
 113
 114 private:
 115   // Selection routines.
 116   bool selectAddSub(const Instruction *I);
 117   bool selectLogicalOp(const Instruction *I);
 118   bool selectLoad(const Instruction *I);
 119   bool selectStore(const Instruction *I);
 120   bool selectBranch(const Instruction *I);
 121   bool selectIndirectBr(const Instruction *I);
 122   bool selectCmp(const Instruction *I);
 123   bool selectSelect(const Instruction *I);
 124   bool selectFPExt(const Instruction *I);
 125   bool selectFPTrunc(const Instruction *I);
 126   bool selectFPToInt(const Instruction *I, bool Signed);
 127   bool selectIntToFP(const Instruction *I, bool Signed);
 128   bool selectRem(const Instruction *I, unsigned ISDOpcode);
 129   bool selectRet(const Instruction *I);
 130   bool selectTrunc(const Instruction *I);
 131   bool selectIntExt(const Instruction *I);
 132   bool selectMul(const Instruction *I);
 133   bool selectShift(const Instruction *I);
 134   bool selectBitCast(const Instruction *I);
 135   bool selectFRem(const Instruction *I);
 136   bool selectSDiv(const Instruction *I);
 137
 138   // Utility helper routines.
 139   bool isTypeLegal(Type *Ty, MVT &VT);
 140   bool isTypeSupported(Type *Ty, MVT &VT, bool IsVectorAllowed = false);
 141   bool isValueAvailable(const Value *V) const;
 142   bool computeAddress(const Value *Obj, Address &Addr, Type *Ty = nullptr);
 143   bool computeCallAddress(const Value *V, Address &Addr);
 144   bool simplifyAddress(Address &Addr, MVT VT);
 145   void addLoadStoreOperands(Address &Addr, const MachineInstrBuilder &MIB,
 146                             unsigned Flags, unsigned ScaleFactor,
 147                             MachineMemOperand *MMO);
 148   bool isMemCpySmall(uint64_t Len, unsigned Alignment);
 149   bool tryEmitSmallMemCpy(Address Dest, Address Src, uint64_t Len,
 150                           unsigned Alignment);
 151   bool foldXALUIntrinsic(AArch64CC::CondCode &CC, const Instruction *I,
 152                          const Value *Cond);
 153
 154   // Emit helper routines.
 155   unsigned emitAddSub(bool UseAdd, MVT RetVT, const Value *LHS,
 156                       const Value *RHS, bool SetFlags = false,
 157                       bool WantResult = true,  bool IsZExt = false);
 158   unsigned emitAddSub_rr(bool UseAdd, MVT RetVT, unsigned LHSReg,
 159                          bool LHSIsKill, unsigned RHSReg, bool RHSIsKill,
 160                          bool SetFlags = false, bool WantResult = true);
 161   unsigned emitAddSub_ri(bool UseAdd, MVT RetVT, unsigned LHSReg,
 162                          bool LHSIsKill, uint64_t Imm, bool SetFlags = false,
 163                          bool WantResult = true);
 164   unsigned emitAddSub_rs(bool UseAdd, MVT RetVT, unsigned LHSReg,
 165                          bool LHSIsKill, unsigned RHSReg, bool RHSIsKill,
 166                          AArch64_AM::ShiftExtendType ShiftType,
 167                          uint64_t ShiftImm, bool SetFlags = false,
 168                          bool WantResult = true);
 169   unsigned emitAddSub_rx(bool UseAdd, MVT RetVT, unsigned LHSReg,
 170                          bool LHSIsKill, unsigned RHSReg, bool RHSIsKill,
 171                           AArch64_AM::ShiftExtendType ExtType,
 172                           uint64_t ShiftImm, bool SetFlags = false,
 173                          bool WantResult = true);
 174
 175   // Emit functions.
 176   bool emitCmp(const Value *LHS, const Value *RHS, bool IsZExt);
 177   bool emitICmp(MVT RetVT, const Value *LHS, const Value *RHS, bool IsZExt);
 178   bool emitICmp_ri(MVT RetVT, unsigned LHSReg, bool LHSIsKill, uint64_t Imm);
 179   bool emitFCmp(MVT RetVT, const Value *LHS, const Value *RHS);
 180   bool emitLoad(MVT VT, unsigned &ResultReg, Address Addr, bool WantZExt = true,
 181                 MachineMemOperand *MMO = nullptr);
 182   bool emitStore(MVT VT, unsigned SrcReg, Address Addr,
 183                  MachineMemOperand *MMO = nullptr);
 184   unsigned emitIntExt(MVT SrcVT, unsigned SrcReg, MVT DestVT, bool isZExt);
 185   unsigned emiti1Ext(unsigned SrcReg, MVT DestVT, bool isZExt);
 186   unsigned emitAdd(MVT RetVT, const Value *LHS, const Value *RHS,
 187                    bool SetFlags = false, bool WantResult = true,
 188                    bool IsZExt = false);
 189   unsigned emitSub(MVT RetVT, const Value *LHS, const Value *RHS,
 190                    bool SetFlags = false, bool WantResult = true,
 191                    bool IsZExt = false);
 192   unsigned emitSubs_rr(MVT RetVT, unsigned LHSReg, bool LHSIsKill,
 193                        unsigned RHSReg, bool RHSIsKill, bool WantResult = true);
 194   unsigned emitSubs_rs(MVT RetVT, unsigned LHSReg, bool LHSIsKill,
 195                        unsigned RHSReg, bool RHSIsKill,
 196                        AArch64_AM::ShiftExtendType ShiftType, uint64_t ShiftImm,
 197                        bool WantResult = true);
 198   unsigned emitLogicalOp(unsigned ISDOpc, MVT RetVT, const Value *LHS,
 199                          const Value *RHS);
 200   unsigned emitLogicalOp_ri(unsigned ISDOpc, MVT RetVT, unsigned LHSReg,
 201                             bool LHSIsKill, uint64_t Imm);
 202   unsigned emitLogicalOp_rs(unsigned ISDOpc, MVT RetVT, unsigned LHSReg,
 203                             bool LHSIsKill, unsigned RHSReg, bool RHSIsKill,
 204                             uint64_t ShiftImm);
 205   unsigned emitAnd_ri(MVT RetVT, unsigned LHSReg, bool LHSIsKill, uint64_t Imm);
 206   unsigned emitMul_rr(MVT RetVT, unsigned Op0, bool Op0IsKill,
 207                       unsigned Op1, bool Op1IsKill);
 208   unsigned emitSMULL_rr(MVT RetVT, unsigned Op0, bool Op0IsKill,
 209                         unsigned Op1, bool Op1IsKill);
 210   unsigned emitUMULL_rr(MVT RetVT, unsigned Op0, bool Op0IsKill,
 211                         unsigned Op1, bool Op1IsKill);
 212   unsigned emitLSL_rr(MVT RetVT, unsigned Op0Reg, bool Op0IsKill,
 213                       unsigned Op1Reg, bool Op1IsKill);
 214   unsigned emitLSL_ri(MVT RetVT, MVT SrcVT, unsigned Op0Reg, bool Op0IsKill,
 215                       uint64_t Imm, bool IsZExt = true);
 216   unsigned emitLSR_rr(MVT RetVT, unsigned Op0Reg, bool Op0IsKill,
 217                       unsigned Op1Reg, bool Op1IsKill);
 218   unsigned emitLSR_ri(MVT RetVT, MVT SrcVT, unsigned Op0Reg, bool Op0IsKill,
 219                       uint64_t Imm, bool IsZExt = true);
 220   unsigned emitASR_rr(MVT RetVT, unsigned Op0Reg, bool Op0IsKill,
 221                       unsigned Op1Reg, bool Op1IsKill);
 222   unsigned emitASR_ri(MVT RetVT, MVT SrcVT, unsigned Op0Reg, bool Op0IsKill,
 223                       uint64_t Imm, bool IsZExt = false);
 224
 225   unsigned materializeInt(const ConstantInt *CI, MVT VT);
 226   unsigned materializeFP(const ConstantFP *CFP, MVT VT);
 227   unsigned materializeGV(const GlobalValue *GV);
 228
 229   // Call handling routines.
 230 private:
 231   CCAssignFn *CCAssignFnForCall(CallingConv::ID CC) const;
 232   bool processCallArgs(CallLoweringInfo &CLI, SmallVectorImpl<MVT> &ArgVTs,
 233                        unsigned &NumBytes);
 234   bool finishCall(CallLoweringInfo &CLI, MVT RetVT, unsigned NumBytes);
 235
 236 public:
 237   // Backend specific FastISel code.
 238   unsigned fastMaterializeAlloca(const AllocaInst *AI) override;
 239   unsigned fastMaterializeConstant(const Constant *C) override;
 240   unsigned fastMaterializeFloatZero(const ConstantFP* CF) override;
 241
 242   explicit AArch64FastISel(FunctionLoweringInfo &FuncInfo,
 243                          const TargetLibraryInfo *LibInfo)
 244       : FastISel(FuncInfo, LibInfo, /*SkipTargetIndependentISel=*/true) {
 245     Subtarget = &TM.getSubtarget<AArch64Subtarget>();
 246     Context = &FuncInfo.Fn->getContext();
 247   }
 248
 249   bool fastSelectInstruction(const Instruction *I) override;
 250
 251 #include "AArch64GenFastISel.inc"
 252 };
 253
 254 } // end anonymous namespace
 255
 256 #include "AArch64GenCallingConv.inc"
 257
 258 /// \brief Check if the sign-/zero-extend will be a noop.
 259 static bool isIntExtFree(const Instruction *I) {
 260   assert((isa<ZExtInst>(I) || isa<SExtInst>(I)) &&
 261          "Unexpected integer extend instruction.");
 262   bool IsZExt = isa<ZExtInst>(I);
 263
 264   if (const auto *LI = dyn_cast<LoadInst>(I->getOperand(0)))
 265     if (LI->hasOneUse())
 266       return true;
 267
 268   if (const auto *Arg = dyn_cast<Argument>(I->getOperand(0)))
 269     if ((IsZExt && Arg->hasZExtAttr()) || (!IsZExt && Arg->hasSExtAttr()))
 270       return true;
 271
 272   return false;
 273 }
 274
 275 /// \brief Determine the implicit scale factor that is applied by a memory
 276 /// operation for a given value type.
 277 static unsigned getImplicitScaleFactor(MVT VT) {
 278   switch (VT.SimpleTy) {
 279   default:
 280     return 0;    // invalid
 281   case MVT::i1:  // fall-through
 282   case MVT::i8:
 283     return 1;
 284   case MVT::i16:
 285     return 2;
 286   case MVT::i32: // fall-through
 287   case MVT::f32:
 288     return 4;
 289   case MVT::i64: // fall-through
 290   case MVT::f64:
 291     return 8;
 292   }
 293 }
 294
 295 CCAssignFn *AArch64FastISel::CCAssignFnForCall(CallingConv::ID CC) const {
 296   if (CC == CallingConv::WebKit_JS)
 297     return CC_AArch64_WebKit_JS;
 298   return Subtarget->isTargetDarwin() ? CC_AArch64_DarwinPCS : CC_AArch64_AAPCS;
 299 }
 300
 301 unsigned AArch64FastISel::fastMaterializeAlloca(const AllocaInst *AI) {
 302   assert(TLI.getValueType(AI->getType(), true) == MVT::i64 &&
 303          "Alloca should always return a pointer.");
 304
 305   // Don't handle dynamic allocas.
 306   if (!FuncInfo.StaticAllocaMap.count(AI))
 307     return 0;
 308
 309   DenseMap<const AllocaInst *, int>::iterator SI =
 310       FuncInfo.StaticAllocaMap.find(AI);
 311
 312   if (SI != FuncInfo.StaticAllocaMap.end()) {
 313     unsigned ResultReg = createResultReg(&AArch64::GPR64spRegClass);
 314     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADDXri),
 315             ResultReg)
 316         .addFrameIndex(SI->second)
 317         .addImm(0)
 318         .addImm(0);
 319     return ResultReg;
 320   }
 321
 322   return 0;
 323 }
 324
 325 unsigned AArch64FastISel::materializeInt(const ConstantInt *CI, MVT VT) {
 326   if (VT > MVT::i64)
 327     return 0;
 328
 329   if (!CI->isZero())
 330     return fastEmit_i(VT, VT, ISD::Constant, CI->getZExtValue());
 331
 332   // Create a copy from the zero register to materialize a "0" value.
 333   const TargetRegisterClass *RC = (VT == MVT::i64) ? &AArch64::GPR64RegClass
 334                                                    : &AArch64::GPR32RegClass;
 335   unsigned ZeroReg = (VT == MVT::i64) ? AArch64::XZR : AArch64::WZR;
 336   unsigned ResultReg = createResultReg(RC);
 337   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(TargetOpcode::COPY),
 338           ResultReg).addReg(ZeroReg, getKillRegState(true));
 339   return ResultReg;
 340 }
 341
 342 unsigned AArch64FastISel::materializeFP(const ConstantFP *CFP, MVT VT) {
 343   // Positive zero (+0.0) has to be materialized with a fmov from the zero
 344   // register, because the immediate version of fmov cannot encode zero.
 345   if (CFP->isNullValue())
 346     return fastMaterializeFloatZero(CFP);
 347
 348   if (VT != MVT::f32 && VT != MVT::f64)
 349     return 0;
 350
 351   const APFloat Val = CFP->getValueAPF();
 352   bool Is64Bit = (VT == MVT::f64);
 353   // This checks to see if we can use FMOV instructions to materialize
 354   // a constant, otherwise we have to materialize via the constant pool.
 355   if (TLI.isFPImmLegal(Val, VT)) {
 356     int Imm =
 357         Is64Bit ? AArch64_AM::getFP64Imm(Val) : AArch64_AM::getFP32Imm(Val);
 358     assert((Imm != -1) && "Cannot encode floating-point constant.");
 359     unsigned Opc = Is64Bit ? AArch64::FMOVDi : AArch64::FMOVSi;
 360     return fastEmitInst_i(Opc, TLI.getRegClassFor(VT), Imm);
 361   }
 362
 363   // Materialize via constant pool.  MachineConstantPool wants an explicit
 364   // alignment.
 365   unsigned Align = DL.getPrefTypeAlignment(CFP->getType());
 366   if (Align == 0)
 367     Align = DL.getTypeAllocSize(CFP->getType());
 368
 369   unsigned CPI = MCP.getConstantPoolIndex(cast<Constant>(CFP), Align);
 370   unsigned ADRPReg = createResultReg(&AArch64::GPR64commonRegClass);
 371   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADRP),
 372           ADRPReg).addConstantPoolIndex(CPI, 0, AArch64II::MO_PAGE);
 373
 374   unsigned Opc = Is64Bit ? AArch64::LDRDui : AArch64::LDRSui;
 375   unsigned ResultReg = createResultReg(TLI.getRegClassFor(VT));
 376   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg)
 377       .addReg(ADRPReg)
 378       .addConstantPoolIndex(CPI, 0, AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
 379   return ResultReg;
 380 }
 381
 382 unsigned AArch64FastISel::materializeGV(const GlobalValue *GV) {
 383   // We can't handle thread-local variables quickly yet.
 384   if (GV->isThreadLocal())
 385     return 0;
 386
 387   // MachO still uses GOT for large code-model accesses, but ELF requires
 388   // movz/movk sequences, which FastISel doesn't handle yet.
 389   if (TM.getCodeModel() != CodeModel::Small && !Subtarget->isTargetMachO())
 390     return 0;
 391
 392   unsigned char OpFlags = Subtarget->ClassifyGlobalReference(GV, TM);
 393
 394   EVT DestEVT = TLI.getValueType(GV->getType(), true);
 395   if (!DestEVT.isSimple())
 396     return 0;
 397
 398   unsigned ADRPReg = createResultReg(&AArch64::GPR64commonRegClass);
 399   unsigned ResultReg;
 400
 401   if (OpFlags & AArch64II::MO_GOT) {
 402     // ADRP + LDRX
 403     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADRP),
 404             ADRPReg)
 405       .addGlobalAddress(GV, 0, AArch64II::MO_GOT | AArch64II::MO_PAGE);
 406
 407     ResultReg = createResultReg(&AArch64::GPR64RegClass);
 408     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::LDRXui),
 409             ResultReg)
 410       .addReg(ADRPReg)
 411       .addGlobalAddress(GV, 0, AArch64II::MO_GOT | AArch64II::MO_PAGEOFF |
 412                         AArch64II::MO_NC);
 413   } else if (OpFlags & AArch64II::MO_CONSTPOOL) {
 414     // We can't handle addresses loaded from a constant pool quickly yet.
 415     return 0;
 416   } else {
 417     // ADRP + ADDX
 418     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADRP),
 419             ADRPReg)
 420       .addGlobalAddress(GV, 0, AArch64II::MO_PAGE);
 421
 422     ResultReg = createResultReg(&AArch64::GPR64spRegClass);
 423     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADDXri),
 424             ResultReg)
 425       .addReg(ADRPReg)
 426       .addGlobalAddress(GV, 0, AArch64II::MO_PAGEOFF | AArch64II::MO_NC)
 427       .addImm(0);
 428   }
 429   return ResultReg;
 430 }
 431
 432 unsigned AArch64FastISel::fastMaterializeConstant(const Constant *C) {
 433   EVT CEVT = TLI.getValueType(C->getType(), true);
 434
 435   // Only handle simple types.
 436   if (!CEVT.isSimple())
 437     return 0;
 438   MVT VT = CEVT.getSimpleVT();
 439
 440   if (const auto *CI = dyn_cast<ConstantInt>(C))
 441     return materializeInt(CI, VT);
 442   else if (const ConstantFP *CFP = dyn_cast<ConstantFP>(C))
 443     return materializeFP(CFP, VT);
 444   else if (const GlobalValue *GV = dyn_cast<GlobalValue>(C))
 445     return materializeGV(GV);
 446
 447   return 0;
 448 }
 449
 450 unsigned AArch64FastISel::fastMaterializeFloatZero(const ConstantFP* CFP) {
 451   assert(CFP->isNullValue() &&
 452          "Floating-point constant is not a positive zero.");
 453   MVT VT;
 454   if (!isTypeLegal(CFP->getType(), VT))
 455     return 0;
 456
 457   if (VT != MVT::f32 && VT != MVT::f64)
 458     return 0;
 459
 460   bool Is64Bit = (VT == MVT::f64);
 461   unsigned ZReg = Is64Bit ? AArch64::XZR : AArch64::WZR;
 462   unsigned Opc = Is64Bit ? AArch64::FMOVXDr : AArch64::FMOVWSr;
 463   return fastEmitInst_r(Opc, TLI.getRegClassFor(VT), ZReg, /*IsKill=*/true);
 464 }
 465
 466 /// \brief Check if the multiply is by a power-of-2 constant.
 467 static bool isMulPowOf2(const Value *I) {
 468   if (const auto *MI = dyn_cast<MulOperator>(I)) {
 469     if (const auto *C = dyn_cast<ConstantInt>(MI->getOperand(0)))
 470       if (C->getValue().isPowerOf2())
 471         return true;
 472     if (const auto *C = dyn_cast<ConstantInt>(MI->getOperand(1)))
 473       if (C->getValue().isPowerOf2())
 474         return true;
 475   }
 476   return false;
 477 }
 478
 479 // Computes the address to get to an object.
 480 bool AArch64FastISel::computeAddress(const Value *Obj, Address &Addr, Type *Ty)
 481 {
 482   const User *U = nullptr;
 483   unsigned Opcode = Instruction::UserOp1;
 484   if (const Instruction *I = dyn_cast<Instruction>(Obj)) {
 485     // Don't walk into other basic blocks unless the object is an alloca from
 486     // another block, otherwise it may not have a virtual register assigned.
 487     if (FuncInfo.StaticAllocaMap.count(static_cast<const AllocaInst *>(Obj)) ||
 488         FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB) {
 489       Opcode = I->getOpcode();
 490       U = I;
 491     }
 492   } else if (const ConstantExpr *C = dyn_cast<ConstantExpr>(Obj)) {
 493     Opcode = C->getOpcode();
 494     U = C;
 495   }
 496
 497   if (const PointerType *Ty = dyn_cast<PointerType>(Obj->getType()))
 498     if (Ty->getAddressSpace() > 255)
 499       // Fast instruction selection doesn't support the special
 500       // address spaces.
 501       return false;
 502
 503   switch (Opcode) {
 504   default:
 505     break;
 506   case Instruction::BitCast: {
 507     // Look through bitcasts.
 508     return computeAddress(U->getOperand(0), Addr, Ty);
 509   }
 510   case Instruction::IntToPtr: {
 511     // Look past no-op inttoptrs.
 512     if (TLI.getValueType(U->getOperand(0)->getType()) == TLI.getPointerTy())
 513       return computeAddress(U->getOperand(0), Addr, Ty);
 514     break;
 515   }
 516   case Instruction::PtrToInt: {
 517     // Look past no-op ptrtoints.
 518     if (TLI.getValueType(U->getType()) == TLI.getPointerTy())
 519       return computeAddress(U->getOperand(0), Addr, Ty);
 520     break;
 521   }
 522   case Instruction::GetElementPtr: {
 523     Address SavedAddr = Addr;
 524     uint64_t TmpOffset = Addr.getOffset();
 525
 526     // Iterate through the GEP folding the constants into offsets where
 527     // we can.
 528     gep_type_iterator GTI = gep_type_begin(U);
 529     for (User::const_op_iterator i = U->op_begin() + 1, e = U->op_end(); i != e;
 530          ++i, ++GTI) {
 531       const Value *Op = *i;
 532       if (StructType *STy = dyn_cast<StructType>(*GTI)) {
 533         const StructLayout *SL = DL.getStructLayout(STy);
 534         unsigned Idx = cast<ConstantInt>(Op)->getZExtValue();
 535         TmpOffset += SL->getElementOffset(Idx);
 536       } else {
 537         uint64_t S = DL.getTypeAllocSize(GTI.getIndexedType());
 538         for (;;) {
 539           if (const ConstantInt *CI = dyn_cast<ConstantInt>(Op)) {
 540             // Constant-offset addressing.
 541             TmpOffset += CI->getSExtValue() * S;
 542             break;
 543           }
 544           if (canFoldAddIntoGEP(U, Op)) {
 545             // A compatible add with a constant operand. Fold the constant.
 546             ConstantInt *CI =
 547                 cast<ConstantInt>(cast<AddOperator>(Op)->getOperand(1));
 548             TmpOffset += CI->getSExtValue() * S;
 549             // Iterate on the other operand.
 550             Op = cast<AddOperator>(Op)->getOperand(0);
 551             continue;
 552           }
 553           // Unsupported
 554           goto unsupported_gep;
 555         }
 556       }
 557     }
 558
 559     // Try to grab the base operand now.
 560     Addr.setOffset(TmpOffset);
 561     if (computeAddress(U->getOperand(0), Addr, Ty))
 562       return true;
 563
 564     // We failed, restore everything and try the other options.
 565     Addr = SavedAddr;
 566
 567   unsupported_gep:
 568     break;
 569   }
 570   case Instruction::Alloca: {
 571     const AllocaInst *AI = cast<AllocaInst>(Obj);
 572     DenseMap<const AllocaInst *, int>::iterator SI =
 573         FuncInfo.StaticAllocaMap.find(AI);
 574     if (SI != FuncInfo.StaticAllocaMap.end()) {
 575       Addr.setKind(Address::FrameIndexBase);
 576       Addr.setFI(SI->second);
 577       return true;
 578     }
 579     break;
 580   }
 581   case Instruction::Add: {
 582     // Adds of constants are common and easy enough.
 583     const Value *LHS = U->getOperand(0);
 584     const Value *RHS = U->getOperand(1);
 585
 586     if (isa<ConstantInt>(LHS))
 587       std::swap(LHS, RHS);
 588
 589     if (const ConstantInt *CI = dyn_cast<ConstantInt>(RHS)) {
 590       Addr.setOffset(Addr.getOffset() + (uint64_t)CI->getSExtValue());
 591       return computeAddress(LHS, Addr, Ty);
 592     }
 593
 594     Address Backup = Addr;
 595     if (computeAddress(LHS, Addr, Ty) && computeAddress(RHS, Addr, Ty))
 596       return true;
 597     Addr = Backup;
 598
 599     break;
 600   }
 601   case Instruction::Shl: {
 602     if (Addr.getOffsetReg())
 603       break;
 604
 605     const auto *CI = dyn_cast<ConstantInt>(U->getOperand(1));
 606     if (!CI)
 607       break;
 608
 609     unsigned Val = CI->getZExtValue();
 610     if (Val < 1 || Val > 3)
 611       break;
 612
 613     uint64_t NumBytes = 0;
 614     if (Ty && Ty->isSized()) {
 615       uint64_t NumBits = DL.getTypeSizeInBits(Ty);
 616       NumBytes = NumBits / 8;
 617       if (!isPowerOf2_64(NumBits))
 618         NumBytes = 0;
 619     }
 620
 621     if (NumBytes != (1ULL << Val))
 622       break;
 623
 624     Addr.setShift(Val);
 625     Addr.setExtendType(AArch64_AM::LSL);
 626
 627     const Value *Src = U->getOperand(0);
 628     if (const auto *I = dyn_cast<Instruction>(Src))
 629       if (FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB)
 630         Src = I;
 631
 632     // Fold the zext or sext when it won't become a noop.
 633     if (const auto *ZE = dyn_cast<ZExtInst>(Src)) {
 634       if (!isIntExtFree(ZE) && ZE->getOperand(0)->getType()->isIntegerTy(32)) {
 635           Addr.setExtendType(AArch64_AM::UXTW);
 636           Src = ZE->getOperand(0);
 637       }
 638     } else if (const auto *SE = dyn_cast<SExtInst>(Src)) {
 639       if (!isIntExtFree(SE) && SE->getOperand(0)->getType()->isIntegerTy(32)) {
 640         Addr.setExtendType(AArch64_AM::SXTW);
 641         Src = SE->getOperand(0);
 642       }
 643     }
 644
 645     if (const auto *AI = dyn_cast<BinaryOperator>(Src))
 646       if (AI->getOpcode() == Instruction::And) {
 647         const Value *LHS = AI->getOperand(0);
 648         const Value *RHS = AI->getOperand(1);
 649
 650         if (const auto *C = dyn_cast<ConstantInt>(LHS))
 651           if (C->getValue() == 0xffffffff)
 652             std::swap(LHS, RHS);
 653
 654         if (const auto *C = dyn_cast<ConstantInt>(RHS))
 655           if (C->getValue() == 0xffffffff) {
 656             Addr.setExtendType(AArch64_AM::UXTW);
 657             unsigned Reg = getRegForValue(LHS);
 658             if (!Reg)
 659               return false;
 660             bool RegIsKill = hasTrivialKill(LHS);
 661             Reg = fastEmitInst_extractsubreg(MVT::i32, Reg, RegIsKill,
 662                                              AArch64::sub_32);
 663             Addr.setOffsetReg(Reg);
 664             return true;
 665           }
 666       }
 667
 668     unsigned Reg = getRegForValue(Src);
 669     if (!Reg)
 670       return false;
 671     Addr.setOffsetReg(Reg);
 672     return true;
 673   }
 674   case Instruction::Mul: {
 675     if (Addr.getOffsetReg())
 676       break;
 677
 678     if (!isMulPowOf2(U))
 679       break;
 680
 681     const Value *LHS = U->getOperand(0);
 682     const Value *RHS = U->getOperand(1);
 683
 684     // Canonicalize power-of-2 value to the RHS.
 685     if (const auto *C = dyn_cast<ConstantInt>(LHS))
 686       if (C->getValue().isPowerOf2())
 687         std::swap(LHS, RHS);
 688
 689     assert(isa<ConstantInt>(RHS) && "Expected an ConstantInt.");
 690     const auto *C = cast<ConstantInt>(RHS);
 691     unsigned Val = C->getValue().logBase2();
 692     if (Val < 1 || Val > 3)
 693       break;
 694
 695     uint64_t NumBytes = 0;
 696     if (Ty && Ty->isSized()) {
 697       uint64_t NumBits = DL.getTypeSizeInBits(Ty);
 698       NumBytes = NumBits / 8;
 699       if (!isPowerOf2_64(NumBits))
 700         NumBytes = 0;
 701     }
 702
 703     if (NumBytes != (1ULL << Val))
 704       break;
 705
 706     Addr.setShift(Val);
 707     Addr.setExtendType(AArch64_AM::LSL);
 708
 709     const Value *Src = LHS;
 710     if (const auto *I = dyn_cast<Instruction>(Src))
 711       if (FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB)
 712         Src = I;
 713
 714
 715     // Fold the zext or sext when it won't become a noop.
 716     if (const auto *ZE = dyn_cast<ZExtInst>(Src)) {
 717       if (!isIntExtFree(ZE) && ZE->getOperand(0)->getType()->isIntegerTy(32)) {
 718         Addr.setExtendType(AArch64_AM::UXTW);
 719         Src = ZE->getOperand(0);
 720       }
 721     } else if (const auto *SE = dyn_cast<SExtInst>(Src)) {
 722       if (!isIntExtFree(SE) && SE->getOperand(0)->getType()->isIntegerTy(32)) {
 723         Addr.setExtendType(AArch64_AM::SXTW);
 724         Src = SE->getOperand(0);
 725       }
 726     }
 727
 728     unsigned Reg = getRegForValue(Src);
 729     if (!Reg)
 730       return false;
 731     Addr.setOffsetReg(Reg);
 732     return true;
 733   }
 734   case Instruction::And: {
 735     if (Addr.getOffsetReg())
 736       break;
 737
 738     if (DL.getTypeSizeInBits(Ty) != 8)
 739       break;
 740
 741     const Value *LHS = U->getOperand(0);
 742     const Value *RHS = U->getOperand(1);
 743
 744     if (const auto *C = dyn_cast<ConstantInt>(LHS))
 745       if (C->getValue() == 0xffffffff)
 746         std::swap(LHS, RHS);
 747
 748     if (const auto *C = dyn_cast<ConstantInt>(RHS))
 749       if (C->getValue() == 0xffffffff) {
 750         Addr.setShift(0);
 751         Addr.setExtendType(AArch64_AM::LSL);
 752         Addr.setExtendType(AArch64_AM::UXTW);
 753
 754         unsigned Reg = getRegForValue(LHS);
 755         if (!Reg)
 756           return false;
 757         bool RegIsKill = hasTrivialKill(LHS);
 758         Reg = fastEmitInst_extractsubreg(MVT::i32, Reg, RegIsKill,
 759                                          AArch64::sub_32);
 760         Addr.setOffsetReg(Reg);
 761         return true;
 762       }
 763     break;
 764   }
 765   } // end switch
 766
 767   if (Addr.getReg()) {
 768     if (!Addr.getOffsetReg()) {
 769       unsigned Reg = getRegForValue(Obj);
 770       if (!Reg)
 771         return false;
 772       Addr.setOffsetReg(Reg);
 773       return true;
 774     }
 775     return false;
 776   }
 777
 778   unsigned Reg = getRegForValue(Obj);
 779   if (!Reg)
 780     return false;
 781   Addr.setReg(Reg);
 782   return true;
 783 }
 784
 785 bool AArch64FastISel::computeCallAddress(const Value *V, Address &Addr) {
 786   const User *U = nullptr;
 787   unsigned Opcode = Instruction::UserOp1;
 788   bool InMBB = true;
 789
 790   if (const auto *I = dyn_cast<Instruction>(V)) {
 791     Opcode = I->getOpcode();
 792     U = I;
 793     InMBB = I->getParent() == FuncInfo.MBB->getBasicBlock();
 794   } else if (const auto *C = dyn_cast<ConstantExpr>(V)) {
 795     Opcode = C->getOpcode();
 796     U = C;
 797   }
 798
 799   switch (Opcode) {
 800   default: break;
 801   case Instruction::BitCast:
 802     // Look past bitcasts if its operand is in the same BB.
 803     if (InMBB)
 804       return computeCallAddress(U->getOperand(0), Addr);
 805     break;
 806   case Instruction::IntToPtr:
 807     // Look past no-op inttoptrs if its operand is in the same BB.
 808     if (InMBB &&
 809         TLI.getValueType(U->getOperand(0)->getType()) == TLI.getPointerTy())
 810       return computeCallAddress(U->getOperand(0), Addr);
 811     break;
 812   case Instruction::PtrToInt:
 813     // Look past no-op ptrtoints if its operand is in the same BB.
 814     if (InMBB &&
 815         TLI.getValueType(U->getType()) == TLI.getPointerTy())
 816       return computeCallAddress(U->getOperand(0), Addr);
 817     break;
 818   }
 819
 820   if (const GlobalValue *GV = dyn_cast<GlobalValue>(V)) {
 821     Addr.setGlobalValue(GV);
 822     return true;
 823   }
 824
 825   // If all else fails, try to materialize the value in a register.
 826   if (!Addr.getGlobalValue()) {
 827     Addr.setReg(getRegForValue(V));
 828     return Addr.getReg() != 0;
 829   }
 830
 831   return false;
 832 }
 833
 834
 835 bool AArch64FastISel::isTypeLegal(Type *Ty, MVT &VT) {
 836   EVT evt = TLI.getValueType(Ty, true);
 837
 838   // Only handle simple types.
 839   if (evt == MVT::Other || !evt.isSimple())
 840     return false;
 841   VT = evt.getSimpleVT();
 842
 843   // This is a legal type, but it's not something we handle in fast-isel.
 844   if (VT == MVT::f128)
 845     return false;
 846
 847   // Handle all other legal types, i.e. a register that will directly hold this
 848   // value.
 849   return TLI.isTypeLegal(VT);
 850 }
 851
 852 /// \brief Determine if the value type is supported by FastISel.
 853 ///
 854 /// FastISel for AArch64 can handle more value types than are legal. This adds
 855 /// simple value type such as i1, i8, and i16.
 856 bool AArch64FastISel::isTypeSupported(Type *Ty, MVT &VT, bool IsVectorAllowed) {
 857   if (Ty->isVectorTy() && !IsVectorAllowed)
 858     return false;
 859
 860   if (isTypeLegal(Ty, VT))
 861     return true;
 862
 863   // If this is a type than can be sign or zero-extended to a basic operation
 864   // go ahead and accept it now.
 865   if (VT == MVT::i1 || VT == MVT::i8 || VT == MVT::i16)
 866     return true;
 867
 868   return false;
 869 }
 870
 871 bool AArch64FastISel::isValueAvailable(const Value *V) const {
 872   if (!isa<Instruction>(V))
 873     return true;
 874
 875   const auto *I = cast<Instruction>(V);
 876   if (FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB)
 877     return true;
 878
 879   return false;
 880 }
 881
 882 bool AArch64FastISel::simplifyAddress(Address &Addr, MVT VT) {
 883   unsigned ScaleFactor = getImplicitScaleFactor(VT);
 884   if (!ScaleFactor)
 885     return false;
 886
 887   bool ImmediateOffsetNeedsLowering = false;
 888   bool RegisterOffsetNeedsLowering = false;
 889   int64_t Offset = Addr.getOffset();
 890   if (((Offset < 0) || (Offset & (ScaleFactor - 1))) && !isInt<9>(Offset))
 891     ImmediateOffsetNeedsLowering = true;
 892   else if (Offset > 0 && !(Offset & (ScaleFactor - 1)) &&
 893            !isUInt<12>(Offset / ScaleFactor))
 894     ImmediateOffsetNeedsLowering = true;
 895
 896   // Cannot encode an offset register and an immediate offset in the same
 897   // instruction. Fold the immediate offset into the load/store instruction and
 898   // emit an additonal add to take care of the offset register.
 899   if (!ImmediateOffsetNeedsLowering && Addr.getOffset() && Addr.isRegBase() &&
 900       Addr.getOffsetReg())
 901     RegisterOffsetNeedsLowering = true;
 902
 903   // Cannot encode zero register as base.
 904   if (Addr.isRegBase() && Addr.getOffsetReg() && !Addr.getReg())
 905     RegisterOffsetNeedsLowering = true;
 906
 907   // If this is a stack pointer and the offset needs to be simplified then put
 908   // the alloca address into a register, set the base type back to register and
 909   // continue. This should almost never happen.
 910   if (ImmediateOffsetNeedsLowering && Addr.isFIBase()) {
 911     unsigned ResultReg = createResultReg(&AArch64::GPR64spRegClass);
 912     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADDXri),
 913             ResultReg)
 914       .addFrameIndex(Addr.getFI())
 915       .addImm(0)
 916       .addImm(0);
 917     Addr.setKind(Address::RegBase);
 918     Addr.setReg(ResultReg);
 919   }
 920
 921   if (RegisterOffsetNeedsLowering) {
 922     unsigned ResultReg = 0;
 923     if (Addr.getReg()) {
 924       if (Addr.getExtendType() == AArch64_AM::SXTW ||
 925           Addr.getExtendType() == AArch64_AM::UXTW   )
 926         ResultReg = emitAddSub_rx(/*UseAdd=*/true, MVT::i64, Addr.getReg(),
 927                                   /*TODO:IsKill=*/false, Addr.getOffsetReg(),
 928                                   /*TODO:IsKill=*/false, Addr.getExtendType(),
 929                                   Addr.getShift());
 930       else
 931         ResultReg = emitAddSub_rs(/*UseAdd=*/true, MVT::i64, Addr.getReg(),
 932                                   /*TODO:IsKill=*/false, Addr.getOffsetReg(),
 933                                   /*TODO:IsKill=*/false, AArch64_AM::LSL,
 934                                   Addr.getShift());
 935     } else {
 936       if (Addr.getExtendType() == AArch64_AM::UXTW)
 937         ResultReg = emitLSL_ri(MVT::i64, MVT::i32, Addr.getOffsetReg(),
 938                                /*Op0IsKill=*/false, Addr.getShift(),
 939                                /*IsZExt=*/true);
 940       else if (Addr.getExtendType() == AArch64_AM::SXTW)
 941         ResultReg = emitLSL_ri(MVT::i64, MVT::i32, Addr.getOffsetReg(),
 942                                /*Op0IsKill=*/false, Addr.getShift(),
 943                                /*IsZExt=*/false);
 944       else
 945         ResultReg = emitLSL_ri(MVT::i64, MVT::i64, Addr.getOffsetReg(),
 946                                /*Op0IsKill=*/false, Addr.getShift());
 947     }
 948     if (!ResultReg)
 949       return false;
 950
 951     Addr.setReg(ResultReg);
 952     Addr.setOffsetReg(0);
 953     Addr.setShift(0);
 954     Addr.setExtendType(AArch64_AM::InvalidShiftExtend);
 955   }
 956
 957   // Since the offset is too large for the load/store instruction get the
 958   // reg+offset into a register.
 959   if (ImmediateOffsetNeedsLowering) {
 960     unsigned ResultReg;
 961     if (Addr.getReg()) {
 962       // Try to fold the immediate into the add instruction.
 963       if (Offset < 0)
 964         ResultReg = emitAddSub_ri(/*UseAdd=*/false, MVT::i64, Addr.getReg(),
 965                                   /*IsKill=*/false, -Offset);
 966       else
 967         ResultReg = emitAddSub_ri(/*UseAdd=*/true, MVT::i64, Addr.getReg(),
 968                                   /*IsKill=*/false, Offset);
 969       if (!ResultReg) {
 970         unsigned ImmReg = fastEmit_i(MVT::i64, MVT::i64, ISD::Constant, Offset);
 971         ResultReg = emitAddSub_rr(/*UseAdd=*/true, MVT::i64, Addr.getReg(),
 972                                   /*IsKill=*/false, ImmReg, /*IsKill=*/true);
 973       }
 974     } else
 975       ResultReg = fastEmit_i(MVT::i64, MVT::i64, ISD::Constant, Offset);
 976
 977     if (!ResultReg)
 978       return false;
 979     Addr.setReg(ResultReg);
 980     Addr.setOffset(0);
 981   }
 982   return true;
 983 }
 984
 985 void AArch64FastISel::addLoadStoreOperands(Address &Addr,
 986                                            const MachineInstrBuilder &MIB,
 987                                            unsigned Flags,
 988                                            unsigned ScaleFactor,
 989                                            MachineMemOperand *MMO) {
 990   int64_t Offset = Addr.getOffset() / ScaleFactor;
 991   // Frame base works a bit differently. Handle it separately.
 992   if (Addr.isFIBase()) {
 993     int FI = Addr.getFI();
 994     // FIXME: We shouldn't be using getObjectSize/getObjectAlignment.  The size
 995     // and alignment should be based on the VT.
 996     MMO = FuncInfo.MF->getMachineMemOperand(
 997       MachinePointerInfo::getFixedStack(FI, Offset), Flags,
 998       MFI.getObjectSize(FI), MFI.getObjectAlignment(FI));
 999     // Now add the rest of the operands.
1000     MIB.addFrameIndex(FI).addImm(Offset);
1001   } else {
1002     assert(Addr.isRegBase() && "Unexpected address kind.");
1003     const MCInstrDesc &II = MIB->getDesc();
1004     unsigned Idx = (Flags & MachineMemOperand::MOStore) ? 1 : 0;
1005     Addr.setReg(
1006       constrainOperandRegClass(II, Addr.getReg(), II.getNumDefs()+Idx));
1007     Addr.setOffsetReg(
1008       constrainOperandRegClass(II, Addr.getOffsetReg(), II.getNumDefs()+Idx+1));
1009     if (Addr.getOffsetReg()) {
1010       assert(Addr.getOffset() == 0 && "Unexpected offset");
1011       bool IsSigned = Addr.getExtendType() == AArch64_AM::SXTW ||
1012                       Addr.getExtendType() == AArch64_AM::SXTX;
1013       MIB.addReg(Addr.getReg());
1014       MIB.addReg(Addr.getOffsetReg());
1015       MIB.addImm(IsSigned);
1016       MIB.addImm(Addr.getShift() != 0);
1017     } else {
1018       MIB.addReg(Addr.getReg());
1019       MIB.addImm(Offset);
1020     }
1021   }
1022
1023   if (MMO)
1024     MIB.addMemOperand(MMO);
1025 }
1026
1027 unsigned AArch64FastISel::emitAddSub(bool UseAdd, MVT RetVT, const Value *LHS,
1028                                      const Value *RHS, bool SetFlags,
1029                                      bool WantResult,  bool IsZExt) {
1030   AArch64_AM::ShiftExtendType ExtendType = AArch64_AM::InvalidShiftExtend;
1031   bool NeedExtend = false;
1032   switch (RetVT.SimpleTy) {
1033   default:
1034     return 0;
1035   case MVT::i1:
1036     NeedExtend = true;
1037     break;
1038   case MVT::i8:
1039     NeedExtend = true;
1040     ExtendType = IsZExt ? AArch64_AM::UXTB : AArch64_AM::SXTB;
1041     break;
1042   case MVT::i16:
1043     NeedExtend = true;
1044     ExtendType = IsZExt ? AArch64_AM::UXTH : AArch64_AM::SXTH;
1045     break;
1046   case MVT::i32:  // fall-through
1047   case MVT::i64:
1048     break;
1049   }
1050   MVT SrcVT = RetVT;
1051   RetVT.SimpleTy = std::max(RetVT.SimpleTy, MVT::i32);
1052
1053   // Canonicalize immediates to the RHS first.
1054   if (UseAdd && isa<ConstantInt>(LHS) && !isa<ConstantInt>(RHS))
1055     std::swap(LHS, RHS);
1056
1057   // Canonicalize mul by power of 2 to the RHS.
1058   if (UseAdd && LHS->hasOneUse() && isValueAvailable(LHS))
1059     if (isMulPowOf2(LHS))
1060       std::swap(LHS, RHS);
1061
1062   // Canonicalize shift immediate to the RHS.
1063   if (UseAdd && LHS->hasOneUse() && isValueAvailable(LHS))
1064     if (const auto *SI = dyn_cast<BinaryOperator>(LHS))
1065       if (isa<ConstantInt>(SI->getOperand(1)))
1066         if (SI->getOpcode() == Instruction::Shl  ||
1067             SI->getOpcode() == Instruction::LShr ||
1068             SI->getOpcode() == Instruction::AShr   )
1069           std::swap(LHS, RHS);
1070
1071   unsigned LHSReg = getRegForValue(LHS);
1072   if (!LHSReg)
1073     return 0;
1074   bool LHSIsKill = hasTrivialKill(LHS);
1075
1076   if (NeedExtend)
1077     LHSReg = emitIntExt(SrcVT, LHSReg, RetVT, IsZExt);
1078
1079   unsigned ResultReg = 0;
1080   if (const auto *C = dyn_cast<ConstantInt>(RHS)) {
1081     uint64_t Imm = IsZExt ? C->getZExtValue() : C->getSExtValue();
1082     if (C->isNegative())
1083       ResultReg = emitAddSub_ri(!UseAdd, RetVT, LHSReg, LHSIsKill, -Imm,
1084                                 SetFlags, WantResult);
1085     else
1086       ResultReg = emitAddSub_ri(UseAdd, RetVT, LHSReg, LHSIsKill, Imm, SetFlags,
1087                                 WantResult);
1088   }
1089   if (ResultReg)
1090     return ResultReg;
1091
1092   // Only extend the RHS within the instruction if there is a valid extend type.
1093   if (ExtendType != AArch64_AM::InvalidShiftExtend && RHS->hasOneUse() &&
1094       isValueAvailable(RHS)) {
1095     if (const auto *SI = dyn_cast<BinaryOperator>(RHS))
1096       if (const auto *C = dyn_cast<ConstantInt>(SI->getOperand(1)))
1097         if ((SI->getOpcode() == Instruction::Shl) && (C->getZExtValue() < 4)) {
1098           unsigned RHSReg = getRegForValue(SI->getOperand(0));
1099           if (!RHSReg)
1100             return 0;
1101           bool RHSIsKill = hasTrivialKill(SI->getOperand(0));
1102           return emitAddSub_rx(UseAdd, RetVT, LHSReg, LHSIsKill, RHSReg,
1103                                RHSIsKill, ExtendType, C->getZExtValue(),
1104                                SetFlags, WantResult);
1105         }
1106     unsigned RHSReg = getRegForValue(RHS);
1107     if (!RHSReg)
1108       return 0;
1109     bool RHSIsKill = hasTrivialKill(RHS);
1110     return emitAddSub_rx(UseAdd, RetVT, LHSReg, LHSIsKill, RHSReg, RHSIsKill,
1111                          ExtendType, 0, SetFlags, WantResult);
1112   }
1113
1114   // Check if the mul can be folded into the instruction.
1115   if (RHS->hasOneUse() && isValueAvailable(RHS))
1116     if (isMulPowOf2(RHS)) {
1117       const Value *MulLHS = cast<MulOperator>(RHS)->getOperand(0);
1118       const Value *MulRHS = cast<MulOperator>(RHS)->getOperand(1);
1119
1120       if (const auto *C = dyn_cast<ConstantInt>(MulLHS))
1121         if (C->getValue().isPowerOf2())
1122           std::swap(MulLHS, MulRHS);
1123
1124       assert(isa<ConstantInt>(MulRHS) && "Expected a ConstantInt.");
1125       uint64_t ShiftVal = cast<ConstantInt>(MulRHS)->getValue().logBase2();
1126       unsigned RHSReg = getRegForValue(MulLHS);
1127       if (!RHSReg)
1128         return 0;
1129       bool RHSIsKill = hasTrivialKill(MulLHS);
1130       return emitAddSub_rs(UseAdd, RetVT, LHSReg, LHSIsKill, RHSReg, RHSIsKill,
1131                            AArch64_AM::LSL, ShiftVal, SetFlags, WantResult);
1132     }
1133
1134   // Check if the shift can be folded into the instruction.
1135   if (RHS->hasOneUse() && isValueAvailable(RHS))
1136     if (const auto *SI = dyn_cast<BinaryOperator>(RHS)) {
1137       if (const auto *C = dyn_cast<ConstantInt>(SI->getOperand(1))) {
1138         AArch64_AM::ShiftExtendType ShiftType = AArch64_AM::InvalidShiftExtend;
1139         switch (SI->getOpcode()) {
1140         default: break;
1141         case Instruction::Shl:  ShiftType = AArch64_AM::LSL; break;
1142         case Instruction::LShr: ShiftType = AArch64_AM::LSR; break;
1143         case Instruction::AShr: ShiftType = AArch64_AM::ASR; break;
1144         }
1145         uint64_t ShiftVal = C->getZExtValue();
1146         if (ShiftType != AArch64_AM::InvalidShiftExtend) {
1147           unsigned RHSReg = getRegForValue(SI->getOperand(0));
1148           if (!RHSReg)
1149             return 0;
1150           bool RHSIsKill = hasTrivialKill(SI->getOperand(0));
1151           return emitAddSub_rs(UseAdd, RetVT, LHSReg, LHSIsKill, RHSReg,
1152                                RHSIsKill, ShiftType, ShiftVal, SetFlags,
1153                                WantResult);
1154         }
1155       }
1156     }
1157
1158   unsigned RHSReg = getRegForValue(RHS);
1159   if (!RHSReg)
1160     return 0;
1161   bool RHSIsKill = hasTrivialKill(RHS);
1162
1163   if (NeedExtend)
1164     RHSReg = emitIntExt(SrcVT, RHSReg, RetVT, IsZExt);
1165
1166   return emitAddSub_rr(UseAdd, RetVT, LHSReg, LHSIsKill, RHSReg, RHSIsKill,
1167                        SetFlags, WantResult);
1168 }
1169
1170 unsigned AArch64FastISel::emitAddSub_rr(bool UseAdd, MVT RetVT, unsigned LHSReg,
1171                                         bool LHSIsKill, unsigned RHSReg,
1172                                         bool RHSIsKill, bool SetFlags,
1173                                         bool WantResult) {
1174   assert(LHSReg && RHSReg && "Invalid register number.");
1175
1176   if (RetVT != MVT::i32 && RetVT != MVT::i64)
1177     return 0;
1178
1179   static const unsigned OpcTable[2][2][2] = {
1180     { { AArch64::SUBWrr,  AArch64::SUBXrr  },
1181       { AArch64::ADDWrr,  AArch64::ADDXrr  }  },
1182     { { AArch64::SUBSWrr, AArch64::SUBSXrr },
1183       { AArch64::ADDSWrr, AArch64::ADDSXrr }  }
1184   };
1185   bool Is64Bit = RetVT == MVT::i64;
1186   unsigned Opc = OpcTable[SetFlags][UseAdd][Is64Bit];
1187   const TargetRegisterClass *RC =
1188       Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
1189   unsigned ResultReg;
1190   if (WantResult)
1191     ResultReg = createResultReg(RC);
1192   else
1193     ResultReg = Is64Bit ? AArch64::XZR : AArch64::WZR;
1194
1195   const MCInstrDesc &II = TII.get(Opc);
1196   LHSReg = constrainOperandRegClass(II, LHSReg, II.getNumDefs());
1197   RHSReg = constrainOperandRegClass(II, RHSReg, II.getNumDefs() + 1);
1198   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg)
1199       .addReg(LHSReg, getKillRegState(LHSIsKill))
1200       .addReg(RHSReg, getKillRegState(RHSIsKill));
1201   return ResultReg;
1202 }
1203
1204 unsigned AArch64FastISel::emitAddSub_ri(bool UseAdd, MVT RetVT, unsigned LHSReg,
1205                                         bool LHSIsKill, uint64_t Imm,
1206                                         bool SetFlags, bool WantResult) {
1207   assert(LHSReg && "Invalid register number.");
1208
1209   if (RetVT != MVT::i32 && RetVT != MVT::i64)
1210     return 0;
1211
1212   unsigned ShiftImm;
1213   if (isUInt<12>(Imm))
1214     ShiftImm = 0;
1215   else if ((Imm & 0xfff000) == Imm) {
1216     ShiftImm = 12;
1217     Imm >>= 12;
1218   } else
1219     return 0;
1220
1221   static const unsigned OpcTable[2][2][2] = {
1222     { { AArch64::SUBWri,  AArch64::SUBXri  },
1223       { AArch64::ADDWri,  AArch64::ADDXri  }  },
1224     { { AArch64::SUBSWri, AArch64::SUBSXri },
1225       { AArch64::ADDSWri, AArch64::ADDSXri }  }
1226   };
1227   bool Is64Bit = RetVT == MVT::i64;
1228   unsigned Opc = OpcTable[SetFlags][UseAdd][Is64Bit];
1229   const TargetRegisterClass *RC;
1230   if (SetFlags)
1231     RC = Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
1232   else
1233     RC = Is64Bit ? &AArch64::GPR64spRegClass : &AArch64::GPR32spRegClass;
1234   unsigned ResultReg;
1235   if (WantResult)
1236     ResultReg = createResultReg(RC);
1237   else
1238     ResultReg = Is64Bit ? AArch64::XZR : AArch64::WZR;
1239
1240   const MCInstrDesc &II = TII.get(Opc);
1241   LHSReg = constrainOperandRegClass(II, LHSReg, II.getNumDefs());
1242   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg)
1243       .addReg(LHSReg, getKillRegState(LHSIsKill))
1244       .addImm(Imm)
1245       .addImm(getShifterImm(AArch64_AM::LSL, ShiftImm));
1246   return ResultReg;
1247 }
1248
1249 unsigned AArch64FastISel::emitAddSub_rs(bool UseAdd, MVT RetVT, unsigned LHSReg,
1250                                         bool LHSIsKill, unsigned RHSReg,
1251                                         bool RHSIsKill,
1252                                         AArch64_AM::ShiftExtendType ShiftType,
1253                                         uint64_t ShiftImm, bool SetFlags,
1254                                         bool WantResult) {
1255   assert(LHSReg && RHSReg && "Invalid register number.");
1256
1257   if (RetVT != MVT::i32 && RetVT != MVT::i64)
1258     return 0;
1259
1260   static const unsigned OpcTable[2][2][2] = {
1261     { { AArch64::SUBWrs,  AArch64::SUBXrs  },
1262       { AArch64::ADDWrs,  AArch64::ADDXrs  }  },
1263     { { AArch64::SUBSWrs, AArch64::SUBSXrs },
1264       { AArch64::ADDSWrs, AArch64::ADDSXrs }  }
1265   };
1266   bool Is64Bit = RetVT == MVT::i64;
1267   unsigned Opc = OpcTable[SetFlags][UseAdd][Is64Bit];
1268   const TargetRegisterClass *RC =
1269       Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
1270   unsigned ResultReg;
1271   if (WantResult)
1272     ResultReg = createResultReg(RC);
1273   else
1274     ResultReg = Is64Bit ? AArch64::XZR : AArch64::WZR;
1275
1276   const MCInstrDesc &II = TII.get(Opc);
1277   LHSReg = constrainOperandRegClass(II, LHSReg, II.getNumDefs());
1278   RHSReg = constrainOperandRegClass(II, RHSReg, II.getNumDefs() + 1);
1279   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg)
1280       .addReg(LHSReg, getKillRegState(LHSIsKill))
1281       .addReg(RHSReg, getKillRegState(RHSIsKill))
1282       .addImm(getShifterImm(ShiftType, ShiftImm));
1283   return ResultReg;
1284 }
1285
1286 unsigned AArch64FastISel::emitAddSub_rx(bool UseAdd, MVT RetVT, unsigned LHSReg,
1287                                         bool LHSIsKill, unsigned RHSReg,
1288                                         bool RHSIsKill,
1289                                         AArch64_AM::ShiftExtendType ExtType,
1290                                         uint64_t ShiftImm, bool SetFlags,
1291                                         bool WantResult) {
1292   assert(LHSReg && RHSReg && "Invalid register number.");
1293
1294   if (RetVT != MVT::i32 && RetVT != MVT::i64)
1295     return 0;
1296
1297   static const unsigned OpcTable[2][2][2] = {
1298     { { AArch64::SUBWrx,  AArch64::SUBXrx  },
1299       { AArch64::ADDWrx,  AArch64::ADDXrx  }  },
1300     { { AArch64::SUBSWrx, AArch64::SUBSXrx },
1301       { AArch64::ADDSWrx, AArch64::ADDSXrx }  }
1302   };
1303   bool Is64Bit = RetVT == MVT::i64;
1304   unsigned Opc = OpcTable[SetFlags][UseAdd][Is64Bit];
1305   const TargetRegisterClass *RC = nullptr;
1306   if (SetFlags)
1307     RC = Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
1308   else
1309     RC = Is64Bit ? &AArch64::GPR64spRegClass : &AArch64::GPR32spRegClass;
1310   unsigned ResultReg;
1311   if (WantResult)
1312     ResultReg = createResultReg(RC);
1313   else
1314     ResultReg = Is64Bit ? AArch64::XZR : AArch64::WZR;
1315
1316   const MCInstrDesc &II = TII.get(Opc);
1317   LHSReg = constrainOperandRegClass(II, LHSReg, II.getNumDefs());
1318   RHSReg = constrainOperandRegClass(II, RHSReg, II.getNumDefs() + 1);
1319   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg)
1320       .addReg(LHSReg, getKillRegState(LHSIsKill))
1321       .addReg(RHSReg, getKillRegState(RHSIsKill))
1322       .addImm(getArithExtendImm(ExtType, ShiftImm));
1323   return ResultReg;
1324 }
1325
1326 bool AArch64FastISel::emitCmp(const Value *LHS, const Value *RHS, bool IsZExt) {
1327   Type *Ty = LHS->getType();
1328   EVT EVT = TLI.getValueType(Ty, true);
1329   if (!EVT.isSimple())
1330     return false;
1331   MVT VT = EVT.getSimpleVT();
1332
1333   switch (VT.SimpleTy) {
1334   default:
1335     return false;
1336   case MVT::i1:
1337   case MVT::i8:
1338   case MVT::i16:
1339   case MVT::i32:
1340   case MVT::i64:
1341     return emitICmp(VT, LHS, RHS, IsZExt);
1342   case MVT::f32:
1343   case MVT::f64:
1344     return emitFCmp(VT, LHS, RHS);
1345   }
1346 }
1347
1348 bool AArch64FastISel::emitICmp(MVT RetVT, const Value *LHS, const Value *RHS,
1349                                bool IsZExt) {
1350   return emitSub(RetVT, LHS, RHS, /*SetFlags=*/true, /*WantResult=*/false,
1351                  IsZExt) != 0;
1352 }
1353
1354 bool AArch64FastISel::emitICmp_ri(MVT RetVT, unsigned LHSReg, bool LHSIsKill,
1355                                   uint64_t Imm) {
1356   return emitAddSub_ri(/*UseAdd=*/false, RetVT, LHSReg, LHSIsKill, Imm,
1357                        /*SetFlags=*/true, /*WantResult=*/false) != 0;
1358 }
1359
1360 bool AArch64FastISel::emitFCmp(MVT RetVT, const Value *LHS, const Value *RHS) {
1361   if (RetVT != MVT::f32 && RetVT != MVT::f64)
1362     return false;
1363
1364   // Check to see if the 2nd operand is a constant that we can encode directly
1365   // in the compare.
1366   bool UseImm = false;
1367   if (const auto *CFP = dyn_cast<ConstantFP>(RHS))
1368     if (CFP->isZero() && !CFP->isNegative())
1369       UseImm = true;
1370
1371   unsigned LHSReg = getRegForValue(LHS);
1372   if (!LHSReg)
1373     return false;
1374   bool LHSIsKill = hasTrivialKill(LHS);
1375
1376   if (UseImm) {
1377     unsigned Opc = (RetVT == MVT::f64) ? AArch64::FCMPDri : AArch64::FCMPSri;
1378     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc))
1379         .addReg(LHSReg, getKillRegState(LHSIsKill));
1380     return true;
1381   }
1382
1383   unsigned RHSReg = getRegForValue(RHS);
1384   if (!RHSReg)
1385     return false;
1386   bool RHSIsKill = hasTrivialKill(RHS);
1387
1388   unsigned Opc = (RetVT == MVT::f64) ? AArch64::FCMPDrr : AArch64::FCMPSrr;
1389   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc))
1390       .addReg(LHSReg, getKillRegState(LHSIsKill))
1391       .addReg(RHSReg, getKillRegState(RHSIsKill));
1392   return true;
1393 }
1394
1395 unsigned AArch64FastISel::emitAdd(MVT RetVT, const Value *LHS, const Value *RHS,
1396                                   bool SetFlags, bool WantResult, bool IsZExt) {
1397   return emitAddSub(/*UseAdd=*/true, RetVT, LHS, RHS, SetFlags, WantResult,
1398                     IsZExt);
1399 }
1400
1401 unsigned AArch64FastISel::emitSub(MVT RetVT, const Value *LHS, const Value *RHS,
1402                                   bool SetFlags, bool WantResult, bool IsZExt) {
1403   return emitAddSub(/*UseAdd=*/false, RetVT, LHS, RHS, SetFlags, WantResult,
1404                     IsZExt);
1405 }
1406
1407 unsigned AArch64FastISel::emitSubs_rr(MVT RetVT, unsigned LHSReg,
1408                                       bool LHSIsKill, unsigned RHSReg,
1409                                       bool RHSIsKill, bool WantResult) {
1410   return emitAddSub_rr(/*UseAdd=*/false, RetVT, LHSReg, LHSIsKill, RHSReg,
1411                        RHSIsKill, /*SetFlags=*/true, WantResult);
1412 }
1413
1414 unsigned AArch64FastISel::emitSubs_rs(MVT RetVT, unsigned LHSReg,
1415                                       bool LHSIsKill, unsigned RHSReg,
1416                                       bool RHSIsKill,
1417                                       AArch64_AM::ShiftExtendType ShiftType,
1418                                       uint64_t ShiftImm, bool WantResult) {
1419   return emitAddSub_rs(/*UseAdd=*/false, RetVT, LHSReg, LHSIsKill, RHSReg,
1420                        RHSIsKill, ShiftType, ShiftImm, /*SetFlags=*/true,
1421                        WantResult);
1422 }
1423
1424 unsigned AArch64FastISel::emitLogicalOp(unsigned ISDOpc, MVT RetVT,
1425                                         const Value *LHS, const Value *RHS) {
1426   // Canonicalize immediates to the RHS first.
1427   if (isa<ConstantInt>(LHS) && !isa<ConstantInt>(RHS))
1428     std::swap(LHS, RHS);
1429
1430   // Canonicalize mul by power-of-2 to the RHS.
1431   if (LHS->hasOneUse() && isValueAvailable(LHS))
1432     if (isMulPowOf2(LHS))
1433       std::swap(LHS, RHS);
1434
1435   // Canonicalize shift immediate to the RHS.
1436   if (LHS->hasOneUse() && isValueAvailable(LHS))
1437     if (const auto *SI = dyn_cast<ShlOperator>(LHS))
1438       if (isa<ConstantInt>(SI->getOperand(1)))
1439         std::swap(LHS, RHS);
1440
1441   unsigned LHSReg = getRegForValue(LHS);
1442   if (!LHSReg)
1443     return 0;
1444   bool LHSIsKill = hasTrivialKill(LHS);
1445
1446   unsigned ResultReg = 0;
1447   if (const auto *C = dyn_cast<ConstantInt>(RHS)) {
1448     uint64_t Imm = C->getZExtValue();
1449     ResultReg = emitLogicalOp_ri(ISDOpc, RetVT, LHSReg, LHSIsKill, Imm);
1450   }
1451   if (ResultReg)
1452     return ResultReg;
1453
1454   // Check if the mul can be folded into the instruction.
1455   if (RHS->hasOneUse() && isValueAvailable(RHS))
1456     if (isMulPowOf2(RHS)) {
1457       const Value *MulLHS = cast<MulOperator>(RHS)->getOperand(0);
1458       const Value *MulRHS = cast<MulOperator>(RHS)->getOperand(1);
1459
1460       if (const auto *C = dyn_cast<ConstantInt>(MulLHS))
1461         if (C->getValue().isPowerOf2())
1462           std::swap(MulLHS, MulRHS);
1463
1464       assert(isa<ConstantInt>(MulRHS) && "Expected a ConstantInt.");
1465       uint64_t ShiftVal = cast<ConstantInt>(MulRHS)->getValue().logBase2();
1466
1467       unsigned RHSReg = getRegForValue(MulLHS);
1468       if (!RHSReg)
1469         return 0;
1470       bool RHSIsKill = hasTrivialKill(MulLHS);
1471       return emitLogicalOp_rs(ISDOpc, RetVT, LHSReg, LHSIsKill, RHSReg,
1472                               RHSIsKill, ShiftVal);
1473     }
1474
1475   // Check if the shift can be folded into the instruction.
1476   if (RHS->hasOneUse() && isValueAvailable(RHS))
1477     if (const auto *SI = dyn_cast<ShlOperator>(RHS))
1478       if (const auto *C = dyn_cast<ConstantInt>(SI->getOperand(1))) {
1479         uint64_t ShiftVal = C->getZExtValue();
1480         unsigned RHSReg = getRegForValue(SI->getOperand(0));
1481         if (!RHSReg)
1482           return 0;
1483         bool RHSIsKill = hasTrivialKill(SI->getOperand(0));
1484         return emitLogicalOp_rs(ISDOpc, RetVT, LHSReg, LHSIsKill, RHSReg,
1485                                 RHSIsKill, ShiftVal);
1486       }
1487
1488   unsigned RHSReg = getRegForValue(RHS);
1489   if (!RHSReg)
1490     return 0;
1491   bool RHSIsKill = hasTrivialKill(RHS);
1492
1493   MVT VT = std::max(MVT::i32, RetVT.SimpleTy);
1494   ResultReg = fastEmit_rr(VT, VT, ISDOpc, LHSReg, LHSIsKill, RHSReg, RHSIsKill);
1495   if (RetVT >= MVT::i8 && RetVT <= MVT::i16) {
1496     uint64_t Mask = (RetVT == MVT::i8) ? 0xff : 0xffff;
1497     ResultReg = emitAnd_ri(MVT::i32, ResultReg, /*IsKill=*/true, Mask);
1498   }
1499   return ResultReg;
1500 }
1501
1502 unsigned AArch64FastISel::emitLogicalOp_ri(unsigned ISDOpc, MVT RetVT,
1503                                            unsigned LHSReg, bool LHSIsKill,
1504                                            uint64_t Imm) {
1505   assert((ISD::AND + 1 == ISD::OR) && (ISD::AND + 2 == ISD::XOR) &&
1506          "ISD nodes are not consecutive!");
1507   static const unsigned OpcTable[3][2] = {
1508     { AArch64::ANDWri, AArch64::ANDXri },
1509     { AArch64::ORRWri, AArch64::ORRXri },
1510     { AArch64::EORWri, AArch64::EORXri }
1511   };
1512   const TargetRegisterClass *RC;
1513   unsigned Opc;
1514   unsigned RegSize;
1515   switch (RetVT.SimpleTy) {
1516   default:
1517     return 0;
1518   case MVT::i1:
1519   case MVT::i8:
1520   case MVT::i16:
1521   case MVT::i32: {
1522     unsigned Idx = ISDOpc - ISD::AND;
1523     Opc = OpcTable[Idx][0];
1524     RC = &AArch64::GPR32spRegClass;
1525     RegSize = 32;
1526     break;
1527   }
1528   case MVT::i64:
1529     Opc = OpcTable[ISDOpc - ISD::AND][1];
1530     RC = &AArch64::GPR64spRegClass;
1531     RegSize = 64;
1532     break;
1533   }
1534
1535   if (!AArch64_AM::isLogicalImmediate(Imm, RegSize))
1536     return 0;
1537
1538   unsigned ResultReg =
1539       fastEmitInst_ri(Opc, RC, LHSReg, LHSIsKill,
1540                       AArch64_AM::encodeLogicalImmediate(Imm, RegSize));
1541   if (RetVT >= MVT::i8 && RetVT <= MVT::i16 && ISDOpc != ISD::AND) {
1542     uint64_t Mask = (RetVT == MVT::i8) ? 0xff : 0xffff;
1543     ResultReg = emitAnd_ri(MVT::i32, ResultReg, /*IsKill=*/true, Mask);
1544   }
1545   return ResultReg;
1546 }
1547
1548 unsigned AArch64FastISel::emitLogicalOp_rs(unsigned ISDOpc, MVT RetVT,
1549                                            unsigned LHSReg, bool LHSIsKill,
1550                                            unsigned RHSReg, bool RHSIsKill,
1551                                            uint64_t ShiftImm) {
1552   assert((ISD::AND + 1 == ISD::OR) && (ISD::AND + 2 == ISD::XOR) &&
1553          "ISD nodes are not consecutive!");
1554   static const unsigned OpcTable[3][2] = {
1555     { AArch64::ANDWrs, AArch64::ANDXrs },
1556     { AArch64::ORRWrs, AArch64::ORRXrs },
1557     { AArch64::EORWrs, AArch64::EORXrs }
1558   };
1559   const TargetRegisterClass *RC;
1560   unsigned Opc;
1561   switch (RetVT.SimpleTy) {
1562   default:
1563     return 0;
1564   case MVT::i1:
1565   case MVT::i8:
1566   case MVT::i16:
1567   case MVT::i32:
1568     Opc = OpcTable[ISDOpc - ISD::AND][0];
1569     RC = &AArch64::GPR32RegClass;
1570     break;
1571   case MVT::i64:
1572     Opc = OpcTable[ISDOpc - ISD::AND][1];
1573     RC = &AArch64::GPR64RegClass;
1574     break;
1575   }
1576   unsigned ResultReg =
1577       fastEmitInst_rri(Opc, RC, LHSReg, LHSIsKill, RHSReg, RHSIsKill,
1578                        AArch64_AM::getShifterImm(AArch64_AM::LSL, ShiftImm));
1579   if (RetVT >= MVT::i8 && RetVT <= MVT::i16) {
1580     uint64_t Mask = (RetVT == MVT::i8) ? 0xff : 0xffff;
1581     ResultReg = emitAnd_ri(MVT::i32, ResultReg, /*IsKill=*/true, Mask);
1582   }
1583   return ResultReg;
1584 }
1585
1586 unsigned AArch64FastISel::emitAnd_ri(MVT RetVT, unsigned LHSReg, bool LHSIsKill,
1587                                      uint64_t Imm) {
1588   return emitLogicalOp_ri(ISD::AND, RetVT, LHSReg, LHSIsKill, Imm);
1589 }
1590
1591 bool AArch64FastISel::emitLoad(MVT VT, unsigned &ResultReg, Address Addr,
1592                                bool WantZExt, MachineMemOperand *MMO) {
1593   // Simplify this down to something we can handle.
1594   if (!simplifyAddress(Addr, VT))
1595     return false;
1596
1597   unsigned ScaleFactor = getImplicitScaleFactor(VT);
1598   if (!ScaleFactor)
1599     llvm_unreachable("Unexpected value type.");
1600
1601   // Negative offsets require unscaled, 9-bit, signed immediate offsets.
1602   // Otherwise, we try using scaled, 12-bit, unsigned immediate offsets.
1603   bool UseScaled = true;
1604   if ((Addr.getOffset() < 0) || (Addr.getOffset() & (ScaleFactor - 1))) {
1605     UseScaled = false;
1606     ScaleFactor = 1;
1607   }
1608
1609   static const unsigned GPOpcTable[2][4][4] = {
1610     // Sign-extend.
1611     { { AArch64::LDURSBWi,  AArch64::LDURSHWi,  AArch64::LDURSWi,
1612         AArch64::LDURXi  },
1613       { AArch64::LDRSBWui,  AArch64::LDRSHWui,  AArch64::LDRSWui,
1614         AArch64::LDRXui  },
1615       { AArch64::LDRSBWroX, AArch64::LDRSHWroX, AArch64::LDRSWroX,
1616         AArch64::LDRXroX },
1617       { AArch64::LDRSBWroW, AArch64::LDRSHWroW, AArch64::LDRSWroW,
1618         AArch64::LDRXroW },
1619     },
1620     // Zero-extend.
1621     { { AArch64::LDURBBi,   AArch64::LDURHHi,   AArch64::LDURWi,
1622         AArch64::LDURXi  },
1623       { AArch64::LDRBBui,   AArch64::LDRHHui,   AArch64::LDRWui,
1624         AArch64::LDRXui  },
1625       { AArch64::LDRBBroX,  AArch64::LDRHHroX,  AArch64::LDRWroX,
1626         AArch64::LDRXroX },
1627       { AArch64::LDRBBroW,  AArch64::LDRHHroW,  AArch64::LDRWroW,
1628         AArch64::LDRXroW }
1629     }
1630   };
1631
1632   static const unsigned FPOpcTable[4][2] = {
1633     { AArch64::LDURSi,  AArch64::LDURDi  },
1634     { AArch64::LDRSui,  AArch64::LDRDui  },
1635     { AArch64::LDRSroX, AArch64::LDRDroX },
1636     { AArch64::LDRSroW, AArch64::LDRDroW }
1637   };
1638
1639   unsigned Opc;
1640   const TargetRegisterClass *RC;
1641   bool UseRegOffset = Addr.isRegBase() && !Addr.getOffset() && Addr.getReg() &&
1642                       Addr.getOffsetReg();
1643   unsigned Idx = UseRegOffset ? 2 : UseScaled ? 1 : 0;
1644   if (Addr.getExtendType() == AArch64_AM::UXTW ||
1645       Addr.getExtendType() == AArch64_AM::SXTW)
1646     Idx++;
1647
1648   switch (VT.SimpleTy) {
1649   default:
1650     llvm_unreachable("Unexpected value type.");
1651   case MVT::i1: // Intentional fall-through.
1652   case MVT::i8:
1653     Opc = GPOpcTable[WantZExt][Idx][0];
1654     RC = &AArch64::GPR32RegClass;
1655     break;
1656   case MVT::i16:
1657     Opc = GPOpcTable[WantZExt][Idx][1];
1658     RC = &AArch64::GPR32RegClass;
1659     break;
1660   case MVT::i32:
1661     Opc = GPOpcTable[WantZExt][Idx][2];
1662     RC = WantZExt ? &AArch64::GPR32RegClass : &AArch64::GPR64RegClass;
1663     break;
1664   case MVT::i64:
1665     Opc = GPOpcTable[WantZExt][Idx][3];
1666     RC = &AArch64::GPR64RegClass;
1667     break;
1668   case MVT::f32:
1669     Opc = FPOpcTable[Idx][0];
1670     RC = &AArch64::FPR32RegClass;
1671     break;
1672   case MVT::f64:
1673     Opc = FPOpcTable[Idx][1];
1674     RC = &AArch64::FPR64RegClass;
1675     break;
1676   }
1677
1678   // Create the base instruction, then add the operands.
1679   ResultReg = createResultReg(RC);
1680   MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
1681                                     TII.get(Opc), ResultReg);
1682   addLoadStoreOperands(Addr, MIB, MachineMemOperand::MOLoad, ScaleFactor, MMO);
1683
1684   // For 32bit loads we do sign-extending loads to 64bit and then extract the
1685   // subreg. In the end this is just a NOOP.
1686   if (VT == MVT::i32 && !WantZExt)
1687     ResultReg = fastEmitInst_extractsubreg(MVT::i32, ResultReg, /*IsKill=*/true,
1688                                            AArch64::sub_32);
1689
1690   // Loading an i1 requires special handling.
1691   if (VT == MVT::i1) {
1692     unsigned ANDReg = emitAnd_ri(MVT::i32, ResultReg, /*IsKill=*/true, 1);
1693     assert(ANDReg && "Unexpected AND instruction emission failure.");
1694     ResultReg = ANDReg;
1695   }
1696   return true;
1697 }
1698
1699 bool AArch64FastISel::selectAddSub(const Instruction *I) {
1700   MVT VT;
1701   if (!isTypeSupported(I->getType(), VT, /*IsVectorAllowed=*/true))
1702     return false;
1703
1704   if (VT.isVector())
1705     return selectOperator(I, I->getOpcode());
1706
1707   unsigned ResultReg;
1708   switch (I->getOpcode()) {
1709   default:
1710     llvm_unreachable("Unexpected instruction.");
1711   case Instruction::Add:
1712     ResultReg = emitAdd(VT, I->getOperand(0), I->getOperand(1));
1713     break;
1714   case Instruction::Sub:
1715     ResultReg = emitSub(VT, I->getOperand(0), I->getOperand(1));
1716     break;
1717   }
1718   if (!ResultReg)
1719     return false;
1720
1721   updateValueMap(I, ResultReg);
1722   return true;
1723 }
1724
1725 bool AArch64FastISel::selectLogicalOp(const Instruction *I) {
1726   MVT VT;
1727   if (!isTypeSupported(I->getType(), VT, /*IsVectorAllowed=*/true))
1728     return false;
1729
1730   if (VT.isVector())
1731     return selectOperator(I, I->getOpcode());
1732
1733   unsigned ResultReg;
1734   switch (I->getOpcode()) {
1735   default:
1736     llvm_unreachable("Unexpected instruction.");
1737   case Instruction::And:
1738     ResultReg = emitLogicalOp(ISD::AND, VT, I->getOperand(0), I->getOperand(1));
1739     break;
1740   case Instruction::Or:
1741     ResultReg = emitLogicalOp(ISD::OR, VT, I->getOperand(0), I->getOperand(1));
1742     break;
1743   case Instruction::Xor:
1744     ResultReg = emitLogicalOp(ISD::XOR, VT, I->getOperand(0), I->getOperand(1));
1745     break;
1746   }
1747   if (!ResultReg)
1748     return false;
1749
1750   updateValueMap(I, ResultReg);
1751   return true;
1752 }
1753
1754 bool AArch64FastISel::selectLoad(const Instruction *I) {
1755   MVT VT;
1756   // Verify we have a legal type before going any further.  Currently, we handle
1757   // simple types that will directly fit in a register (i32/f32/i64/f64) or
1758   // those that can be sign or zero-extended to a basic operation (i1/i8/i16).
1759   if (!isTypeSupported(I->getType(), VT, /*IsVectorAllowed=*/true) ||
1760       cast<LoadInst>(I)->isAtomic())
1761     return false;
1762
1763   // See if we can handle this address.
1764   Address Addr;
1765   if (!computeAddress(I->getOperand(0), Addr, I->getType()))
1766     return false;
1767
1768   bool WantZExt = true;
1769   if (I->hasOneUse() && isa<SExtInst>(I->use_begin()->getUser()))
1770     WantZExt = false;
1771
1772   unsigned ResultReg;
1773   if (!emitLoad(VT, ResultReg, Addr, WantZExt, createMachineMemOperandFor(I)))
1774     return false;
1775
1776   updateValueMap(I, ResultReg);
1777   return true;
1778 }
1779
1780 bool AArch64FastISel::emitStore(MVT VT, unsigned SrcReg, Address Addr,
1781                                 MachineMemOperand *MMO) {
1782   // Simplify this down to something we can handle.
1783   if (!simplifyAddress(Addr, VT))
1784     return false;
1785
1786   unsigned ScaleFactor = getImplicitScaleFactor(VT);
1787   if (!ScaleFactor)
1788     llvm_unreachable("Unexpected value type.");
1789
1790   // Negative offsets require unscaled, 9-bit, signed immediate offsets.
1791   // Otherwise, we try using scaled, 12-bit, unsigned immediate offsets.
1792   bool UseScaled = true;
1793   if ((Addr.getOffset() < 0) || (Addr.getOffset() & (ScaleFactor - 1))) {
1794     UseScaled = false;
1795     ScaleFactor = 1;
1796   }
1797
1798   static const unsigned OpcTable[4][6] = {
1799     { AArch64::STURBBi,  AArch64::STURHHi,  AArch64::STURWi,  AArch64::STURXi,
1800       AArch64::STURSi,   AArch64::STURDi },
1801     { AArch64::STRBBui,  AArch64::STRHHui,  AArch64::STRWui,  AArch64::STRXui,
1802       AArch64::STRSui,   AArch64::STRDui },
1803     { AArch64::STRBBroX, AArch64::STRHHroX, AArch64::STRWroX, AArch64::STRXroX,
1804       AArch64::STRSroX,  AArch64::STRDroX },
1805     { AArch64::STRBBroW, AArch64::STRHHroW, AArch64::STRWroW, AArch64::STRXroW,
1806       AArch64::STRSroW,  AArch64::STRDroW }
1807   };
1808
1809   unsigned Opc;
1810   bool VTIsi1 = false;
1811   bool UseRegOffset = Addr.isRegBase() && !Addr.getOffset() && Addr.getReg() &&
1812                       Addr.getOffsetReg();
1813   unsigned Idx = UseRegOffset ? 2 : UseScaled ? 1 : 0;
1814   if (Addr.getExtendType() == AArch64_AM::UXTW ||
1815       Addr.getExtendType() == AArch64_AM::SXTW)
1816     Idx++;
1817
1818   switch (VT.SimpleTy) {
1819   default: llvm_unreachable("Unexpected value type.");
1820   case MVT::i1:  VTIsi1 = true;
1821   case MVT::i8:  Opc = OpcTable[Idx][0]; break;
1822   case MVT::i16: Opc = OpcTable[Idx][1]; break;
1823   case MVT::i32: Opc = OpcTable[Idx][2]; break;
1824   case MVT::i64: Opc = OpcTable[Idx][3]; break;
1825   case MVT::f32: Opc = OpcTable[Idx][4]; break;
1826   case MVT::f64: Opc = OpcTable[Idx][5]; break;
1827   }
1828
1829   // Storing an i1 requires special handling.
1830   if (VTIsi1 && SrcReg != AArch64::WZR) {
1831     unsigned ANDReg = emitAnd_ri(MVT::i32, SrcReg, /*TODO:IsKill=*/false, 1);
1832     assert(ANDReg && "Unexpected AND instruction emission failure.");
1833     SrcReg = ANDReg;
1834   }
1835   // Create the base instruction, then add the operands.
1836   const MCInstrDesc &II = TII.get(Opc);
1837   SrcReg = constrainOperandRegClass(II, SrcReg, II.getNumDefs());
1838   MachineInstrBuilder MIB =
1839       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II).addReg(SrcReg);
1840   addLoadStoreOperands(Addr, MIB, MachineMemOperand::MOStore, ScaleFactor, MMO);
1841
1842   return true;
1843 }
1844
1845 bool AArch64FastISel::selectStore(const Instruction *I) {
1846   MVT VT;
1847   const Value *Op0 = I->getOperand(0);
1848   // Verify we have a legal type before going any further.  Currently, we handle
1849   // simple types that will directly fit in a register (i32/f32/i64/f64) or
1850   // those that can be sign or zero-extended to a basic operation (i1/i8/i16).
1851   if (!isTypeSupported(Op0->getType(), VT, /*IsVectorAllowed=*/true) ||
1852       cast<StoreInst>(I)->isAtomic())
1853     return false;
1854
1855   // Get the value to be stored into a register. Use the zero register directly
1856   // when possible to avoid an unnecessary copy and a wasted register.
1857   unsigned SrcReg = 0;
1858   if (const auto *CI = dyn_cast<ConstantInt>(Op0)) {
1859     if (CI->isZero())
1860       SrcReg = (VT == MVT::i64) ? AArch64::XZR : AArch64::WZR;
1861   } else if (const auto *CF = dyn_cast<ConstantFP>(Op0)) {
1862     if (CF->isZero() && !CF->isNegative()) {
1863       VT = MVT::getIntegerVT(VT.getSizeInBits());
1864       SrcReg = (VT == MVT::i64) ? AArch64::XZR : AArch64::WZR;
1865     }
1866   }
1867
1868   if (!SrcReg)
1869     SrcReg = getRegForValue(Op0);
1870
1871   if (!SrcReg)
1872     return false;
1873
1874   // See if we can handle this address.
1875   Address Addr;
1876   if (!computeAddress(I->getOperand(1), Addr, I->getOperand(0)->getType()))
1877     return false;
1878
1879   if (!emitStore(VT, SrcReg, Addr, createMachineMemOperandFor(I)))
1880     return false;
1881   return true;
1882 }
1883
1884 static AArch64CC::CondCode getCompareCC(CmpInst::Predicate Pred) {
1885   switch (Pred) {
1886   case CmpInst::FCMP_ONE:
1887   case CmpInst::FCMP_UEQ:
1888   default:
1889     // AL is our "false" for now. The other two need more compares.
1890     return AArch64CC::AL;
1891   case CmpInst::ICMP_EQ:
1892   case CmpInst::FCMP_OEQ:
1893     return AArch64CC::EQ;
1894   case CmpInst::ICMP_SGT:
1895   case CmpInst::FCMP_OGT:
1896     return AArch64CC::GT;
1897   case CmpInst::ICMP_SGE:
1898   case CmpInst::FCMP_OGE:
1899     return AArch64CC::GE;
1900   case CmpInst::ICMP_UGT:
1901   case CmpInst::FCMP_UGT:
1902     return AArch64CC::HI;
1903   case CmpInst::FCMP_OLT:
1904     return AArch64CC::MI;
1905   case CmpInst::ICMP_ULE:
1906   case CmpInst::FCMP_OLE:
1907     return AArch64CC::LS;
1908   case CmpInst::FCMP_ORD:
1909     return AArch64CC::VC;
1910   case CmpInst::FCMP_UNO:
1911     return AArch64CC::VS;
1912   case CmpInst::FCMP_UGE:
1913     return AArch64CC::PL;
1914   case CmpInst::ICMP_SLT:
1915   case CmpInst::FCMP_ULT:
1916     return AArch64CC::LT;
1917   case CmpInst::ICMP_SLE:
1918   case CmpInst::FCMP_ULE:
1919     return AArch64CC::LE;
1920   case CmpInst::FCMP_UNE:
1921   case CmpInst::ICMP_NE:
1922     return AArch64CC::NE;
1923   case CmpInst::ICMP_UGE:
1924     return AArch64CC::HS;
1925   case CmpInst::ICMP_ULT:
1926     return AArch64CC::LO;
1927   }
1928 }
1929
1930 /// \brief Check if the comparison against zero and the following branch can be
1931 /// folded into a single instruction (CBZ or CBNZ).
1932 static bool canFoldZeroIntoBranch(const CmpInst *CI) {
1933   CmpInst::Predicate Predicate = CI->getPredicate();
1934   if ((Predicate != CmpInst::ICMP_EQ) && (Predicate != CmpInst::ICMP_NE))
1935     return false;
1936
1937   Type *Ty = CI->getOperand(0)->getType();
1938   if (!Ty->isIntegerTy())
1939     return false;
1940
1941   unsigned BW = cast<IntegerType>(Ty)->getBitWidth();
1942   if (BW != 1 && BW != 8 && BW != 16 && BW != 32 && BW != 64)
1943     return false;
1944
1945   if (const auto *C = dyn_cast<ConstantInt>(CI->getOperand(0)))
1946     if (C->isNullValue())
1947       return true;
1948
1949   if (const auto *C = dyn_cast<ConstantInt>(CI->getOperand(1)))
1950     if (C->isNullValue())
1951       return true;
1952
1953   return false;
1954 }
1955
1956 bool AArch64FastISel::selectBranch(const Instruction *I) {
1957   const BranchInst *BI = cast<BranchInst>(I);
1958   if (BI->isUnconditional()) {
1959     MachineBasicBlock *MSucc = FuncInfo.MBBMap[BI->getSuccessor(0)];
1960     fastEmitBranch(MSucc, BI->getDebugLoc());
1961     return true;
1962   }
1963
1964   MachineBasicBlock *TBB = FuncInfo.MBBMap[BI->getSuccessor(0)];
1965   MachineBasicBlock *FBB = FuncInfo.MBBMap[BI->getSuccessor(1)];
1966
1967   AArch64CC::CondCode CC = AArch64CC::NE;
1968   if (const CmpInst *CI = dyn_cast<CmpInst>(BI->getCondition())) {
1969     if (CI->hasOneUse() && isValueAvailable(CI)) {
1970       // Try to optimize or fold the cmp.
1971       CmpInst::Predicate Predicate = optimizeCmpPredicate(CI);
1972       switch (Predicate) {
1973       default:
1974         break;
1975       case CmpInst::FCMP_FALSE:
1976         fastEmitBranch(FBB, DbgLoc);
1977         return true;
1978       case CmpInst::FCMP_TRUE:
1979         fastEmitBranch(TBB, DbgLoc);
1980         return true;
1981       }
1982
1983       // Try to take advantage of fallthrough opportunities.
1984       if (FuncInfo.MBB->isLayoutSuccessor(TBB)) {
1985         std::swap(TBB, FBB);
1986         Predicate = CmpInst::getInversePredicate(Predicate);
1987       }
1988
1989       // Try to optimize comparisons against zero.
1990       if (canFoldZeroIntoBranch(CI)) {
1991         const Value *LHS = CI->getOperand(0);
1992         const Value *RHS = CI->getOperand(1);
1993
1994         // Canonicalize zero values to the RHS.
1995         if (const auto *C = dyn_cast<ConstantInt>(LHS))
1996           if (C->isNullValue())
1997             std::swap(LHS, RHS);
1998
1999         static const unsigned OpcTable[2][2] = {
2000           {AArch64::CBZW,  AArch64::CBZX }, {AArch64::CBNZW, AArch64::CBNZX}
2001         };
2002         bool IsCmpNE = Predicate == CmpInst::ICMP_NE;
2003         bool Is64Bit = LHS->getType()->isIntegerTy(64);
2004         unsigned Opc = OpcTable[IsCmpNE][Is64Bit];
2005
2006         unsigned SrcReg = getRegForValue(LHS);
2007         if (!SrcReg)
2008           return false;
2009         bool SrcIsKill = hasTrivialKill(LHS);
2010
2011         // Emit the combined compare and branch instruction.
2012         BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc))
2013             .addReg(SrcReg, getKillRegState(SrcIsKill))
2014             .addMBB(TBB);
2015
2016         // Obtain the branch weight and add the TrueBB to the successor list.
2017         uint32_t BranchWeight = 0;
2018         if (FuncInfo.BPI)
2019           BranchWeight = FuncInfo.BPI->getEdgeWeight(BI->getParent(),
2020                                                      TBB->getBasicBlock());
2021         FuncInfo.MBB->addSuccessor(TBB, BranchWeight);
2022
2023         fastEmitBranch(FBB, DbgLoc);
2024         return true;
2025       }
2026
2027       // Emit the cmp.
2028       if (!emitCmp(CI->getOperand(0), CI->getOperand(1), CI->isUnsigned()))
2029         return false;
2030
2031       // FCMP_UEQ and FCMP_ONE cannot be checked with a single branch
2032       // instruction.
2033       CC = getCompareCC(Predicate);
2034       AArch64CC::CondCode ExtraCC = AArch64CC::AL;
2035       switch (Predicate) {
2036       default:
2037         break;
2038       case CmpInst::FCMP_UEQ:
2039         ExtraCC = AArch64CC::EQ;
2040         CC = AArch64CC::VS;
2041         break;
2042       case CmpInst::FCMP_ONE:
2043         ExtraCC = AArch64CC::MI;
2044         CC = AArch64CC::GT;
2045         break;
2046       }
2047       assert((CC != AArch64CC::AL) && "Unexpected condition code.");
2048
2049       // Emit the extra branch for FCMP_UEQ and FCMP_ONE.
2050       if (ExtraCC != AArch64CC::AL) {
2051         BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::Bcc))
2052             .addImm(ExtraCC)
2053             .addMBB(TBB);
2054       }
2055
2056       // Emit the branch.
2057       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::Bcc))
2058           .addImm(CC)
2059           .addMBB(TBB);
2060
2061       // Obtain the branch weight and add the TrueBB to the successor list.
2062       uint32_t BranchWeight = 0;
2063       if (FuncInfo.BPI)
2064         BranchWeight = FuncInfo.BPI->getEdgeWeight(BI->getParent(),
2065                                                   TBB->getBasicBlock());
2066       FuncInfo.MBB->addSuccessor(TBB, BranchWeight);
2067
2068       fastEmitBranch(FBB, DbgLoc);
2069       return true;
2070     }
2071   } else if (TruncInst *TI = dyn_cast<TruncInst>(BI->getCondition())) {
2072     MVT SrcVT;
2073     if (TI->hasOneUse() && isValueAvailable(TI) &&
2074         isTypeSupported(TI->getOperand(0)->getType(), SrcVT)) {
2075       unsigned CondReg = getRegForValue(TI->getOperand(0));
2076       if (!CondReg)
2077         return false;
2078       bool CondIsKill = hasTrivialKill(TI->getOperand(0));
2079
2080       // Issue an extract_subreg to get the lower 32-bits.
2081       if (SrcVT == MVT::i64) {
2082         CondReg = fastEmitInst_extractsubreg(MVT::i32, CondReg, CondIsKill,
2083                                              AArch64::sub_32);
2084         CondIsKill = true;
2085       }
2086
2087       unsigned ANDReg = emitAnd_ri(MVT::i32, CondReg, CondIsKill, 1);
2088       assert(ANDReg && "Unexpected AND instruction emission failure.");
2089       emitICmp_ri(MVT::i32, ANDReg, /*IsKill=*/true, 0);
2090
2091       if (FuncInfo.MBB->isLayoutSuccessor(TBB)) {
2092         std::swap(TBB, FBB);
2093         CC = AArch64CC::EQ;
2094       }
2095       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::Bcc))
2096           .addImm(CC)
2097           .addMBB(TBB);
2098
2099       // Obtain the branch weight and add the TrueBB to the successor list.
2100       uint32_t BranchWeight = 0;
2101       if (FuncInfo.BPI)
2102         BranchWeight = FuncInfo.BPI->getEdgeWeight(BI->getParent(),
2103                                                   TBB->getBasicBlock());
2104       FuncInfo.MBB->addSuccessor(TBB, BranchWeight);
2105
2106       fastEmitBranch(FBB, DbgLoc);
2107       return true;
2108     }
2109   } else if (const auto *CI = dyn_cast<ConstantInt>(BI->getCondition())) {
2110     uint64_t Imm = CI->getZExtValue();
2111     MachineBasicBlock *Target = (Imm == 0) ? FBB : TBB;
2112     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::B))
2113         .addMBB(Target);
2114
2115     // Obtain the branch weight and add the target to the successor list.
2116     uint32_t BranchWeight = 0;
2117     if (FuncInfo.BPI)
2118       BranchWeight = FuncInfo.BPI->getEdgeWeight(BI->getParent(),
2119                                                  Target->getBasicBlock());
2120     FuncInfo.MBB->addSuccessor(Target, BranchWeight);
2121     return true;
2122   } else if (foldXALUIntrinsic(CC, I, BI->getCondition())) {
2123     // Fake request the condition, otherwise the intrinsic might be completely
2124     // optimized away.
2125     unsigned CondReg = getRegForValue(BI->getCondition());
2126     if (!CondReg)
2127       return false;
2128
2129     // Emit the branch.
2130     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::Bcc))
2131       .addImm(CC)
2132       .addMBB(TBB);
2133
2134     // Obtain the branch weight and add the TrueBB to the successor list.
2135     uint32_t BranchWeight = 0;
2136     if (FuncInfo.BPI)
2137       BranchWeight = FuncInfo.BPI->getEdgeWeight(BI->getParent(),
2138                                                  TBB->getBasicBlock());
2139     FuncInfo.MBB->addSuccessor(TBB, BranchWeight);
2140
2141     fastEmitBranch(FBB, DbgLoc);
2142     return true;
2143   }
2144
2145   unsigned CondReg = getRegForValue(BI->getCondition());
2146   if (CondReg == 0)
2147     return false;
2148   bool CondRegIsKill = hasTrivialKill(BI->getCondition());
2149
2150   // We've been divorced from our compare!  Our block was split, and
2151   // now our compare lives in a predecessor block.  We musn't
2152   // re-compare here, as the children of the compare aren't guaranteed
2153   // live across the block boundary (we *could* check for this).
2154   // Regardless, the compare has been done in the predecessor block,
2155   // and it left a value for us in a virtual register.  Ergo, we test
2156   // the one-bit value left in the virtual register.
2157   emitICmp_ri(MVT::i32, CondReg, CondRegIsKill, 0);
2158
2159   if (FuncInfo.MBB->isLayoutSuccessor(TBB)) {
2160     std::swap(TBB, FBB);
2161     CC = AArch64CC::EQ;
2162   }
2163
2164   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::Bcc))
2165       .addImm(CC)
2166       .addMBB(TBB);
2167
2168   // Obtain the branch weight and add the TrueBB to the successor list.
2169   uint32_t BranchWeight = 0;
2170   if (FuncInfo.BPI)
2171     BranchWeight = FuncInfo.BPI->getEdgeWeight(BI->getParent(),
2172                                                TBB->getBasicBlock());
2173   FuncInfo.MBB->addSuccessor(TBB, BranchWeight);
2174
2175   fastEmitBranch(FBB, DbgLoc);
2176   return true;
2177 }
2178
2179 bool AArch64FastISel::selectIndirectBr(const Instruction *I) {
2180   const IndirectBrInst *BI = cast<IndirectBrInst>(I);
2181   unsigned AddrReg = getRegForValue(BI->getOperand(0));
2182   if (AddrReg == 0)
2183     return false;
2184
2185   // Emit the indirect branch.
2186   const MCInstrDesc &II = TII.get(AArch64::BR);
2187   AddrReg = constrainOperandRegClass(II, AddrReg,  II.getNumDefs());
2188   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II).addReg(AddrReg);
2189
2190   // Make sure the CFG is up-to-date.
2191   for (unsigned i = 0, e = BI->getNumSuccessors(); i != e; ++i)
2192     FuncInfo.MBB->addSuccessor(FuncInfo.MBBMap[BI->getSuccessor(i)]);
2193
2194   return true;
2195 }
2196
2197 bool AArch64FastISel::selectCmp(const Instruction *I) {
2198   const CmpInst *CI = cast<CmpInst>(I);
2199
2200   // Try to optimize or fold the cmp.
2201   CmpInst::Predicate Predicate = optimizeCmpPredicate(CI);
2202   unsigned ResultReg = 0;
2203   switch (Predicate) {
2204   default:
2205     break;
2206   case CmpInst::FCMP_FALSE:
2207     ResultReg = createResultReg(&AArch64::GPR32RegClass);
2208     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2209             TII.get(TargetOpcode::COPY), ResultReg)
2210         .addReg(AArch64::WZR, getKillRegState(true));
2211     break;
2212   case CmpInst::FCMP_TRUE:
2213     ResultReg = fastEmit_i(MVT::i32, MVT::i32, ISD::Constant, 1);
2214     break;
2215   }
2216
2217   if (ResultReg) {
2218     updateValueMap(I, ResultReg);
2219     return true;
2220   }
2221
2222   // Emit the cmp.
2223   if (!emitCmp(CI->getOperand(0), CI->getOperand(1), CI->isUnsigned()))
2224     return false;
2225
2226   ResultReg = createResultReg(&AArch64::GPR32RegClass);
2227
2228   // FCMP_UEQ and FCMP_ONE cannot be checked with a single instruction. These
2229   // condition codes are inverted, because they are used by CSINC.
2230   static unsigned CondCodeTable[2][2] = {
2231     { AArch64CC::NE, AArch64CC::VC },
2232     { AArch64CC::PL, AArch64CC::LE }
2233   };
2234   unsigned *CondCodes = nullptr;
2235   switch (Predicate) {
2236   default:
2237     break;
2238   case CmpInst::FCMP_UEQ:
2239     CondCodes = &CondCodeTable[0][0];
2240     break;
2241   case CmpInst::FCMP_ONE:
2242     CondCodes = &CondCodeTable[1][0];
2243     break;
2244   }
2245
2246   if (CondCodes) {
2247     unsigned TmpReg1 = createResultReg(&AArch64::GPR32RegClass);
2248     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::CSINCWr),
2249             TmpReg1)
2250         .addReg(AArch64::WZR, getKillRegState(true))
2251         .addReg(AArch64::WZR, getKillRegState(true))
2252         .addImm(CondCodes[0]);
2253     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::CSINCWr),
2254             ResultReg)
2255         .addReg(TmpReg1, getKillRegState(true))
2256         .addReg(AArch64::WZR, getKillRegState(true))
2257         .addImm(CondCodes[1]);
2258
2259     updateValueMap(I, ResultReg);
2260     return true;
2261   }
2262
2263   // Now set a register based on the comparison.
2264   AArch64CC::CondCode CC = getCompareCC(Predicate);
2265   assert((CC != AArch64CC::AL) && "Unexpected condition code.");
2266   AArch64CC::CondCode invertedCC = getInvertedCondCode(CC);
2267   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::CSINCWr),
2268           ResultReg)
2269       .addReg(AArch64::WZR, getKillRegState(true))
2270       .addReg(AArch64::WZR, getKillRegState(true))
2271       .addImm(invertedCC);
2272
2273   updateValueMap(I, ResultReg);
2274   return true;
2275 }
2276
2277 bool AArch64FastISel::selectSelect(const Instruction *I) {
2278   const SelectInst *SI = cast<SelectInst>(I);
2279
2280   EVT DestEVT = TLI.getValueType(SI->getType(), true);
2281   if (!DestEVT.isSimple())
2282     return false;
2283
2284   MVT DestVT = DestEVT.getSimpleVT();
2285   if (DestVT != MVT::i32 && DestVT != MVT::i64 && DestVT != MVT::f32 &&
2286       DestVT != MVT::f64)
2287     return false;
2288
2289   unsigned SelectOpc;
2290   const TargetRegisterClass *RC = nullptr;
2291   switch (DestVT.SimpleTy) {
2292   default: return false;
2293   case MVT::i32:
2294     SelectOpc = AArch64::CSELWr;    RC = &AArch64::GPR32RegClass; break;
2295   case MVT::i64:
2296     SelectOpc = AArch64::CSELXr;    RC = &AArch64::GPR64RegClass; break;
2297   case MVT::f32:
2298     SelectOpc = AArch64::FCSELSrrr; RC = &AArch64::FPR32RegClass; break;
2299   case MVT::f64:
2300     SelectOpc = AArch64::FCSELDrrr; RC = &AArch64::FPR64RegClass; break;
2301   }
2302
2303   const Value *Cond = SI->getCondition();
2304   bool NeedTest = true;
2305   AArch64CC::CondCode CC = AArch64CC::NE;
2306   if (foldXALUIntrinsic(CC, I, Cond))
2307     NeedTest = false;
2308
2309   unsigned CondReg = getRegForValue(Cond);
2310   if (!CondReg)
2311     return false;
2312   bool CondIsKill = hasTrivialKill(Cond);
2313
2314   if (NeedTest) {
2315     unsigned ANDReg = emitAnd_ri(MVT::i32, CondReg, CondIsKill, 1);
2316     assert(ANDReg && "Unexpected AND instruction emission failure.");
2317     emitICmp_ri(MVT::i32, ANDReg, /*IsKill=*/true, 0);
2318   }
2319
2320   unsigned TrueReg = getRegForValue(SI->getTrueValue());
2321   bool TrueIsKill = hasTrivialKill(SI->getTrueValue());
2322
2323   unsigned FalseReg = getRegForValue(SI->getFalseValue());
2324   bool FalseIsKill = hasTrivialKill(SI->getFalseValue());
2325
2326   if (!TrueReg || !FalseReg)
2327     return false;
2328
2329   unsigned ResultReg = fastEmitInst_rri(SelectOpc, RC, TrueReg, TrueIsKill,
2330                                         FalseReg, FalseIsKill, CC);
2331   updateValueMap(I, ResultReg);
2332   return true;
2333 }
2334
2335 bool AArch64FastISel::selectFPExt(const Instruction *I) {
2336   Value *V = I->getOperand(0);
2337   if (!I->getType()->isDoubleTy() || !V->getType()->isFloatTy())
2338     return false;
2339
2340   unsigned Op = getRegForValue(V);
2341   if (Op == 0)
2342     return false;
2343
2344   unsigned ResultReg = createResultReg(&AArch64::FPR64RegClass);
2345   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::FCVTDSr),
2346           ResultReg).addReg(Op);
2347   updateValueMap(I, ResultReg);
2348   return true;
2349 }
2350
2351 bool AArch64FastISel::selectFPTrunc(const Instruction *I) {
2352   Value *V = I->getOperand(0);
2353   if (!I->getType()->isFloatTy() || !V->getType()->isDoubleTy())
2354     return false;
2355
2356   unsigned Op = getRegForValue(V);
2357   if (Op == 0)
2358     return false;
2359
2360   unsigned ResultReg = createResultReg(&AArch64::FPR32RegClass);
2361   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::FCVTSDr),
2362           ResultReg).addReg(Op);
2363   updateValueMap(I, ResultReg);
2364   return true;
2365 }
2366
2367 // FPToUI and FPToSI
2368 bool AArch64FastISel::selectFPToInt(const Instruction *I, bool Signed) {
2369   MVT DestVT;
2370   if (!isTypeLegal(I->getType(), DestVT) || DestVT.isVector())
2371     return false;
2372
2373   unsigned SrcReg = getRegForValue(I->getOperand(0));
2374   if (SrcReg == 0)
2375     return false;
2376
2377   EVT SrcVT = TLI.getValueType(I->getOperand(0)->getType(), true);
2378   if (SrcVT == MVT::f128)
2379     return false;
2380
2381   unsigned Opc;
2382   if (SrcVT == MVT::f64) {
2383     if (Signed)
2384       Opc = (DestVT == MVT::i32) ? AArch64::FCVTZSUWDr : AArch64::FCVTZSUXDr;
2385     else
2386       Opc = (DestVT == MVT::i32) ? AArch64::FCVTZUUWDr : AArch64::FCVTZUUXDr;
2387   } else {
2388     if (Signed)
2389       Opc = (DestVT == MVT::i32) ? AArch64::FCVTZSUWSr : AArch64::FCVTZSUXSr;
2390     else
2391       Opc = (DestVT == MVT::i32) ? AArch64::FCVTZUUWSr : AArch64::FCVTZUUXSr;
2392   }
2393   unsigned ResultReg = createResultReg(
2394       DestVT == MVT::i32 ? &AArch64::GPR32RegClass : &AArch64::GPR64RegClass);
2395   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg)
2396       .addReg(SrcReg);
2397   updateValueMap(I, ResultReg);
2398   return true;
2399 }
2400
2401 bool AArch64FastISel::selectIntToFP(const Instruction *I, bool Signed) {
2402   MVT DestVT;
2403   if (!isTypeLegal(I->getType(), DestVT) || DestVT.isVector())
2404     return false;
2405   assert ((DestVT == MVT::f32 || DestVT == MVT::f64) &&
2406           "Unexpected value type.");
2407
2408   unsigned SrcReg = getRegForValue(I->getOperand(0));
2409   if (!SrcReg)
2410     return false;
2411   bool SrcIsKill = hasTrivialKill(I->getOperand(0));
2412
2413   EVT SrcVT = TLI.getValueType(I->getOperand(0)->getType(), true);
2414
2415   // Handle sign-extension.
2416   if (SrcVT == MVT::i16 || SrcVT == MVT::i8 || SrcVT == MVT::i1) {
2417     SrcReg =
2418         emitIntExt(SrcVT.getSimpleVT(), SrcReg, MVT::i32, /*isZExt*/ !Signed);
2419     if (!SrcReg)
2420       return false;
2421     SrcIsKill = true;
2422   }
2423
2424   unsigned Opc;
2425   if (SrcVT == MVT::i64) {
2426     if (Signed)
2427       Opc = (DestVT == MVT::f32) ? AArch64::SCVTFUXSri : AArch64::SCVTFUXDri;
2428     else
2429       Opc = (DestVT == MVT::f32) ? AArch64::UCVTFUXSri : AArch64::UCVTFUXDri;
2430   } else {
2431     if (Signed)
2432       Opc = (DestVT == MVT::f32) ? AArch64::SCVTFUWSri : AArch64::SCVTFUWDri;
2433     else
2434       Opc = (DestVT == MVT::f32) ? AArch64::UCVTFUWSri : AArch64::UCVTFUWDri;
2435   }
2436
2437   unsigned ResultReg = fastEmitInst_r(Opc, TLI.getRegClassFor(DestVT), SrcReg,
2438                                       SrcIsKill);
2439   updateValueMap(I, ResultReg);
2440   return true;
2441 }
2442
2443 bool AArch64FastISel::fastLowerArguments() {
2444   if (!FuncInfo.CanLowerReturn)
2445     return false;
2446
2447   const Function *F = FuncInfo.Fn;
2448   if (F->isVarArg())
2449     return false;
2450
2451   CallingConv::ID CC = F->getCallingConv();
2452   if (CC != CallingConv::C)
2453     return false;
2454
2455   // Only handle simple cases of up to 8 GPR and FPR each.
2456   unsigned GPRCnt = 0;
2457   unsigned FPRCnt = 0;
2458   unsigned Idx = 0;
2459   for (auto const &Arg : F->args()) {
2460     // The first argument is at index 1.
2461     ++Idx;
2462     if (F->getAttributes().hasAttribute(Idx, Attribute::ByVal) ||
2463         F->getAttributes().hasAttribute(Idx, Attribute::InReg) ||
2464         F->getAttributes().hasAttribute(Idx, Attribute::StructRet) ||
2465         F->getAttributes().hasAttribute(Idx, Attribute::Nest))
2466       return false;
2467
2468     Type *ArgTy = Arg.getType();
2469     if (ArgTy->isStructTy() || ArgTy->isArrayTy())
2470       return false;
2471
2472     EVT ArgVT = TLI.getValueType(ArgTy);
2473     if (!ArgVT.isSimple())
2474       return false;
2475
2476     MVT VT = ArgVT.getSimpleVT().SimpleTy;
2477     if (VT.isFloatingPoint() && !Subtarget->hasFPARMv8())
2478       return false;
2479
2480     if (VT.isVector() &&
2481         (!Subtarget->hasNEON() || !Subtarget->isLittleEndian()))
2482       return false;
2483
2484     if (VT >= MVT::i1 && VT <= MVT::i64)
2485       ++GPRCnt;
2486     else if ((VT >= MVT::f16 && VT <= MVT::f64) || VT.is64BitVector() ||
2487              VT.is128BitVector())
2488       ++FPRCnt;
2489     else
2490       return false;
2491
2492     if (GPRCnt > 8 || FPRCnt > 8)
2493       return false;
2494   }
2495
2496   static const MCPhysReg Registers[6][8] = {
2497     { AArch64::W0, AArch64::W1, AArch64::W2, AArch64::W3, AArch64::W4,
2498       AArch64::W5, AArch64::W6, AArch64::W7 },
2499     { AArch64::X0, AArch64::X1, AArch64::X2, AArch64::X3, AArch64::X4,
2500       AArch64::X5, AArch64::X6, AArch64::X7 },
2501     { AArch64::H0, AArch64::H1, AArch64::H2, AArch64::H3, AArch64::H4,
2502       AArch64::H5, AArch64::H6, AArch64::H7 },
2503     { AArch64::S0, AArch64::S1, AArch64::S2, AArch64::S3, AArch64::S4,
2504       AArch64::S5, AArch64::S6, AArch64::S7 },
2505     { AArch64::D0, AArch64::D1, AArch64::D2, AArch64::D3, AArch64::D4,
2506       AArch64::D5, AArch64::D6, AArch64::D7 },
2507     { AArch64::Q0, AArch64::Q1, AArch64::Q2, AArch64::Q3, AArch64::Q4,
2508       AArch64::Q5, AArch64::Q6, AArch64::Q7 }
2509   };
2510
2511   unsigned GPRIdx = 0;
2512   unsigned FPRIdx = 0;
2513   for (auto const &Arg : F->args()) {
2514     MVT VT = TLI.getSimpleValueType(Arg.getType());
2515     unsigned SrcReg;
2516     const TargetRegisterClass *RC;
2517     if (VT >= MVT::i1 && VT <= MVT::i32) {
2518       SrcReg = Registers[0][GPRIdx++];
2519       RC = &AArch64::GPR32RegClass;
2520       VT = MVT::i32;
2521     } else if (VT == MVT::i64) {
2522       SrcReg = Registers[1][GPRIdx++];
2523       RC = &AArch64::GPR64RegClass;
2524     } else if (VT == MVT::f16) {
2525       SrcReg = Registers[2][FPRIdx++];
2526       RC = &AArch64::FPR16RegClass;
2527     } else if (VT ==  MVT::f32) {
2528       SrcReg = Registers[3][FPRIdx++];
2529       RC = &AArch64::FPR32RegClass;
2530     } else if ((VT == MVT::f64) || VT.is64BitVector()) {
2531       SrcReg = Registers[4][FPRIdx++];
2532       RC = &AArch64::FPR64RegClass;
2533     } else if (VT.is128BitVector()) {
2534       SrcReg = Registers[5][FPRIdx++];
2535       RC = &AArch64::FPR128RegClass;
2536     } else
2537       llvm_unreachable("Unexpected value type.");
2538
2539     unsigned DstReg = FuncInfo.MF->addLiveIn(SrcReg, RC);
2540     // FIXME: Unfortunately it's necessary to emit a copy from the livein copy.
2541     // Without this, EmitLiveInCopies may eliminate the livein if its only
2542     // use is a bitcast (which isn't turned into an instruction).
2543     unsigned ResultReg = createResultReg(RC);
2544     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2545             TII.get(TargetOpcode::COPY), ResultReg)
2546         .addReg(DstReg, getKillRegState(true));
2547     updateValueMap(&Arg, ResultReg);
2548   }
2549   return true;
2550 }
2551
2552 bool AArch64FastISel::processCallArgs(CallLoweringInfo &CLI,
2553                                       SmallVectorImpl<MVT> &OutVTs,
2554                                       unsigned &NumBytes) {
2555   CallingConv::ID CC = CLI.CallConv;
2556   SmallVector<CCValAssign, 16> ArgLocs;
2557   CCState CCInfo(CC, false, *FuncInfo.MF, ArgLocs, *Context);
2558   CCInfo.AnalyzeCallOperands(OutVTs, CLI.OutFlags, CCAssignFnForCall(CC));
2559
2560   // Get a count of how many bytes are to be pushed on the stack.
2561   NumBytes = CCInfo.getNextStackOffset();
2562
2563   // Issue CALLSEQ_START
2564   unsigned AdjStackDown = TII.getCallFrameSetupOpcode();
2565   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AdjStackDown))
2566     .addImm(NumBytes);
2567
2568   // Process the args.
2569   for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
2570     CCValAssign &VA = ArgLocs[i];
2571     const Value *ArgVal = CLI.OutVals[VA.getValNo()];
2572     MVT ArgVT = OutVTs[VA.getValNo()];
2573
2574     unsigned ArgReg = getRegForValue(ArgVal);
2575     if (!ArgReg)
2576       return false;
2577
2578     // Handle arg promotion: SExt, ZExt, AExt.
2579     switch (VA.getLocInfo()) {
2580     case CCValAssign::Full:
2581       break;
2582     case CCValAssign::SExt: {
2583       MVT DestVT = VA.getLocVT();
2584       MVT SrcVT = ArgVT;
2585       ArgReg = emitIntExt(SrcVT, ArgReg, DestVT, /*isZExt=*/false);
2586       if (!ArgReg)
2587         return false;
2588       break;
2589     }
2590     case CCValAssign::AExt:
2591     // Intentional fall-through.
2592     case CCValAssign::ZExt: {
2593       MVT DestVT = VA.getLocVT();
2594       MVT SrcVT = ArgVT;
2595       ArgReg = emitIntExt(SrcVT, ArgReg, DestVT, /*isZExt=*/true);
2596       if (!ArgReg)
2597         return false;
2598       break;
2599     }
2600     default:
2601       llvm_unreachable("Unknown arg promotion!");
2602     }
2603
2604     // Now copy/store arg to correct locations.
2605     if (VA.isRegLoc() && !VA.needsCustom()) {
2606       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2607               TII.get(TargetOpcode::COPY), VA.getLocReg()).addReg(ArgReg);
2608       CLI.OutRegs.push_back(VA.getLocReg());
2609     } else if (VA.needsCustom()) {
2610       // FIXME: Handle custom args.
2611       return false;
2612     } else {
2613       assert(VA.isMemLoc() && "Assuming store on stack.");
2614
2615       // Don't emit stores for undef values.
2616       if (isa<UndefValue>(ArgVal))
2617         continue;
2618
2619       // Need to store on the stack.
2620       unsigned ArgSize = (ArgVT.getSizeInBits() + 7) / 8;
2621
2622       unsigned BEAlign = 0;
2623       if (ArgSize < 8 && !Subtarget->isLittleEndian())
2624         BEAlign = 8 - ArgSize;
2625
2626       Address Addr;
2627       Addr.setKind(Address::RegBase);
2628       Addr.setReg(AArch64::SP);
2629       Addr.setOffset(VA.getLocMemOffset() + BEAlign);
2630
2631       unsigned Alignment = DL.getABITypeAlignment(ArgVal->getType());
2632       MachineMemOperand *MMO = FuncInfo.MF->getMachineMemOperand(
2633         MachinePointerInfo::getStack(Addr.getOffset()),
2634         MachineMemOperand::MOStore, ArgVT.getStoreSize(), Alignment);
2635
2636       if (!emitStore(ArgVT, ArgReg, Addr, MMO))
2637         return false;
2638     }
2639   }
2640   return true;
2641 }
2642
2643 bool AArch64FastISel::finishCall(CallLoweringInfo &CLI, MVT RetVT,
2644                                  unsigned NumBytes) {
2645   CallingConv::ID CC = CLI.CallConv;
2646
2647   // Issue CALLSEQ_END
2648   unsigned AdjStackUp = TII.getCallFrameDestroyOpcode();
2649   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AdjStackUp))
2650     .addImm(NumBytes).addImm(0);
2651
2652   // Now the return value.
2653   if (RetVT != MVT::isVoid) {
2654     SmallVector<CCValAssign, 16> RVLocs;
2655     CCState CCInfo(CC, false, *FuncInfo.MF, RVLocs, *Context);
2656     CCInfo.AnalyzeCallResult(RetVT, CCAssignFnForCall(CC));
2657
2658     // Only handle a single return value.
2659     if (RVLocs.size() != 1)
2660       return false;
2661
2662     // Copy all of the result registers out of their specified physreg.
2663     MVT CopyVT = RVLocs[0].getValVT();
2664     unsigned ResultReg = createResultReg(TLI.getRegClassFor(CopyVT));
2665     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2666             TII.get(TargetOpcode::COPY), ResultReg)
2667         .addReg(RVLocs[0].getLocReg());
2668     CLI.InRegs.push_back(RVLocs[0].getLocReg());
2669
2670     CLI.ResultReg = ResultReg;
2671     CLI.NumResultRegs = 1;
2672   }
2673
2674   return true;
2675 }
2676
2677 bool AArch64FastISel::fastLowerCall(CallLoweringInfo &CLI) {
2678   CallingConv::ID CC  = CLI.CallConv;
2679   bool IsTailCall     = CLI.IsTailCall;
2680   bool IsVarArg       = CLI.IsVarArg;
2681   const Value *Callee = CLI.Callee;
2682   const char *SymName = CLI.SymName;
2683
2684   if (!Callee && !SymName)
2685     return false;
2686
2687   // Allow SelectionDAG isel to handle tail calls.
2688   if (IsTailCall)
2689     return false;
2690
2691   CodeModel::Model CM = TM.getCodeModel();
2692   // Only support the small and large code model.
2693   if (CM != CodeModel::Small && CM != CodeModel::Large)
2694     return false;
2695
2696   // FIXME: Add large code model support for ELF.
2697   if (CM == CodeModel::Large && !Subtarget->isTargetMachO())
2698     return false;
2699
2700   // Let SDISel handle vararg functions.
2701   if (IsVarArg)
2702     return false;
2703
2704   // FIXME: Only handle *simple* calls for now.
2705   MVT RetVT;
2706   if (CLI.RetTy->isVoidTy())
2707     RetVT = MVT::isVoid;
2708   else if (!isTypeLegal(CLI.RetTy, RetVT))
2709     return false;
2710
2711   for (auto Flag : CLI.OutFlags)
2712     if (Flag.isInReg() || Flag.isSRet() || Flag.isNest() || Flag.isByVal())
2713       return false;
2714
2715   // Set up the argument vectors.
2716   SmallVector<MVT, 16> OutVTs;
2717   OutVTs.reserve(CLI.OutVals.size());
2718
2719   for (auto *Val : CLI.OutVals) {
2720     MVT VT;
2721     if (!isTypeLegal(Val->getType(), VT) &&
2722         !(VT == MVT::i1 || VT == MVT::i8 || VT == MVT::i16))
2723       return false;
2724
2725     // We don't handle vector parameters yet.
2726     if (VT.isVector() || VT.getSizeInBits() > 64)
2727       return false;
2728
2729     OutVTs.push_back(VT);
2730   }
2731
2732   Address Addr;
2733   if (Callee && !computeCallAddress(Callee, Addr))
2734     return false;
2735
2736   // Handle the arguments now that we've gotten them.
2737   unsigned NumBytes;
2738   if (!processCallArgs(CLI, OutVTs, NumBytes))
2739     return false;
2740
2741   // Issue the call.
2742   MachineInstrBuilder MIB;
2743   if (CM == CodeModel::Small) {
2744     const MCInstrDesc &II = TII.get(Addr.getReg() ? AArch64::BLR : AArch64::BL);
2745     MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II);
2746     if (SymName)
2747       MIB.addExternalSymbol(SymName, 0);
2748     else if (Addr.getGlobalValue())
2749       MIB.addGlobalAddress(Addr.getGlobalValue(), 0, 0);
2750     else if (Addr.getReg()) {
2751       unsigned Reg = constrainOperandRegClass(II, Addr.getReg(), 0);
2752       MIB.addReg(Reg);
2753     } else
2754       return false;
2755   } else {
2756     unsigned CallReg = 0;
2757     if (SymName) {
2758       unsigned ADRPReg = createResultReg(&AArch64::GPR64commonRegClass);
2759       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADRP),
2760               ADRPReg)
2761         .addExternalSymbol(SymName, AArch64II::MO_GOT | AArch64II::MO_PAGE);
2762
2763       CallReg = createResultReg(&AArch64::GPR64RegClass);
2764       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::LDRXui),
2765               CallReg)
2766         .addReg(ADRPReg)
2767         .addExternalSymbol(SymName, AArch64II::MO_GOT | AArch64II::MO_PAGEOFF |
2768                            AArch64II::MO_NC);
2769     } else if (Addr.getGlobalValue())
2770       CallReg = materializeGV(Addr.getGlobalValue());
2771     else if (Addr.getReg())
2772       CallReg = Addr.getReg();
2773
2774     if (!CallReg)
2775       return false;
2776
2777     const MCInstrDesc &II = TII.get(AArch64::BLR);
2778     CallReg = constrainOperandRegClass(II, CallReg, 0);
2779     MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II).addReg(CallReg);
2780   }
2781
2782   // Add implicit physical register uses to the call.
2783   for (auto Reg : CLI.OutRegs)
2784     MIB.addReg(Reg, RegState::Implicit);
2785
2786   // Add a register mask with the call-preserved registers.
2787   // Proper defs for return values will be added by setPhysRegsDeadExcept().
2788   MIB.addRegMask(TRI.getCallPreservedMask(CC));
2789
2790   CLI.Call = MIB;
2791
2792   // Finish off the call including any return values.
2793   return finishCall(CLI, RetVT, NumBytes);
2794 }
2795
2796 bool AArch64FastISel::isMemCpySmall(uint64_t Len, unsigned Alignment) {
2797   if (Alignment)
2798     return Len / Alignment <= 4;
2799   else
2800     return Len < 32;
2801 }
2802
2803 bool AArch64FastISel::tryEmitSmallMemCpy(Address Dest, Address Src,
2804                                          uint64_t Len, unsigned Alignment) {
2805   // Make sure we don't bloat code by inlining very large memcpy's.
2806   if (!isMemCpySmall(Len, Alignment))
2807     return false;
2808
2809   int64_t UnscaledOffset = 0;
2810   Address OrigDest = Dest;
2811   Address OrigSrc = Src;
2812
2813   while (Len) {
2814     MVT VT;
2815     if (!Alignment || Alignment >= 8) {
2816       if (Len >= 8)
2817         VT = MVT::i64;
2818       else if (Len >= 4)
2819         VT = MVT::i32;
2820       else if (Len >= 2)
2821         VT = MVT::i16;
2822       else {
2823         VT = MVT::i8;
2824       }
2825     } else {
2826       // Bound based on alignment.
2827       if (Len >= 4 && Alignment == 4)
2828         VT = MVT::i32;
2829       else if (Len >= 2 && Alignment == 2)
2830         VT = MVT::i16;
2831       else {
2832         VT = MVT::i8;
2833       }
2834     }
2835
2836     bool RV;
2837     unsigned ResultReg;
2838     RV = emitLoad(VT, ResultReg, Src);
2839     if (!RV)
2840       return false;
2841
2842     RV = emitStore(VT, ResultReg, Dest);
2843     if (!RV)
2844       return false;
2845
2846     int64_t Size = VT.getSizeInBits() / 8;
2847     Len -= Size;
2848     UnscaledOffset += Size;
2849
2850     // We need to recompute the unscaled offset for each iteration.
2851     Dest.setOffset(OrigDest.getOffset() + UnscaledOffset);
2852     Src.setOffset(OrigSrc.getOffset() + UnscaledOffset);
2853   }
2854
2855   return true;
2856 }
2857
2858 /// \brief Check if it is possible to fold the condition from the XALU intrinsic
2859 /// into the user. The condition code will only be updated on success.
2860 bool AArch64FastISel::foldXALUIntrinsic(AArch64CC::CondCode &CC,
2861                                         const Instruction *I,
2862                                         const Value *Cond) {
2863   if (!isa<ExtractValueInst>(Cond))
2864     return false;
2865
2866   const auto *EV = cast<ExtractValueInst>(Cond);
2867   if (!isa<IntrinsicInst>(EV->getAggregateOperand()))
2868     return false;
2869
2870   const auto *II = cast<IntrinsicInst>(EV->getAggregateOperand());
2871   MVT RetVT;
2872   const Function *Callee = II->getCalledFunction();
2873   Type *RetTy =
2874   cast<StructType>(Callee->getReturnType())->getTypeAtIndex(0U);
2875   if (!isTypeLegal(RetTy, RetVT))
2876     return false;
2877
2878   if (RetVT != MVT::i32 && RetVT != MVT::i64)
2879     return false;
2880
2881   const Value *LHS = II->getArgOperand(0);
2882   const Value *RHS = II->getArgOperand(1);
2883
2884   // Canonicalize immediate to the RHS.
2885   if (isa<ConstantInt>(LHS) && !isa<ConstantInt>(RHS) &&
2886       isCommutativeIntrinsic(II))
2887     std::swap(LHS, RHS);
2888
2889   // Simplify multiplies.
2890   unsigned IID = II->getIntrinsicID();
2891   switch (IID) {
2892   default:
2893     break;
2894   case Intrinsic::smul_with_overflow:
2895     if (const auto *C = dyn_cast<ConstantInt>(RHS))
2896       if (C->getValue() == 2)
2897         IID = Intrinsic::sadd_with_overflow;
2898     break;
2899   case Intrinsic::umul_with_overflow:
2900     if (const auto *C = dyn_cast<ConstantInt>(RHS))
2901       if (C->getValue() == 2)
2902         IID = Intrinsic::uadd_with_overflow;
2903     break;
2904   }
2905
2906   AArch64CC::CondCode TmpCC;
2907   switch (IID) {
2908   default:
2909     return false;
2910   case Intrinsic::sadd_with_overflow:
2911   case Intrinsic::ssub_with_overflow:
2912     TmpCC = AArch64CC::VS;
2913     break;
2914   case Intrinsic::uadd_with_overflow:
2915     TmpCC = AArch64CC::HS;
2916     break;
2917   case Intrinsic::usub_with_overflow:
2918     TmpCC = AArch64CC::LO;
2919     break;
2920   case Intrinsic::smul_with_overflow:
2921   case Intrinsic::umul_with_overflow:
2922     TmpCC = AArch64CC::NE;
2923     break;
2924   }
2925
2926   // Check if both instructions are in the same basic block.
2927   if (!isValueAvailable(II))
2928     return false;
2929
2930   // Make sure nothing is in the way
2931   BasicBlock::const_iterator Start = I;
2932   BasicBlock::const_iterator End = II;
2933   for (auto Itr = std::prev(Start); Itr != End; --Itr) {
2934     // We only expect extractvalue instructions between the intrinsic and the
2935     // instruction to be selected.
2936     if (!isa<ExtractValueInst>(Itr))
2937       return false;
2938
2939     // Check that the extractvalue operand comes from the intrinsic.
2940     const auto *EVI = cast<ExtractValueInst>(Itr);
2941     if (EVI->getAggregateOperand() != II)
2942       return false;
2943   }
2944
2945   CC = TmpCC;
2946   return true;
2947 }
2948
2949 bool AArch64FastISel::fastLowerIntrinsicCall(const IntrinsicInst *II) {
2950   // FIXME: Handle more intrinsics.
2951   switch (II->getIntrinsicID()) {
2952   default: return false;
2953   case Intrinsic::frameaddress: {
2954     MachineFrameInfo *MFI = FuncInfo.MF->getFrameInfo();
2955     MFI->setFrameAddressIsTaken(true);
2956
2957     const AArch64RegisterInfo *RegInfo =
2958         static_cast<const AArch64RegisterInfo *>(
2959             TM.getSubtargetImpl()->getRegisterInfo());
2960     unsigned FramePtr = RegInfo->getFrameRegister(*(FuncInfo.MF));
2961     unsigned SrcReg = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
2962     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2963             TII.get(TargetOpcode::COPY), SrcReg).addReg(FramePtr);
2964     // Recursively load frame address
2965     // ldr x0, [fp]
2966     // ldr x0, [x0]
2967     // ldr x0, [x0]
2968     // ...
2969     unsigned DestReg;
2970     unsigned Depth = cast<ConstantInt>(II->getOperand(0))->getZExtValue();
2971     while (Depth--) {
2972       DestReg = fastEmitInst_ri(AArch64::LDRXui, &AArch64::GPR64RegClass,
2973                                 SrcReg, /*IsKill=*/true, 0);
2974       assert(DestReg && "Unexpected LDR instruction emission failure.");
2975       SrcReg = DestReg;
2976     }
2977
2978     updateValueMap(II, SrcReg);
2979     return true;
2980   }
2981   case Intrinsic::memcpy:
2982   case Intrinsic::memmove: {
2983     const auto *MTI = cast<MemTransferInst>(II);
2984     // Don't handle volatile.
2985     if (MTI->isVolatile())
2986       return false;
2987
2988     // Disable inlining for memmove before calls to ComputeAddress.  Otherwise,
2989     // we would emit dead code because we don't currently handle memmoves.
2990     bool IsMemCpy = (II->getIntrinsicID() == Intrinsic::memcpy);
2991     if (isa<ConstantInt>(MTI->getLength()) && IsMemCpy) {
2992       // Small memcpy's are common enough that we want to do them without a call
2993       // if possible.
2994       uint64_t Len = cast<ConstantInt>(MTI->getLength())->getZExtValue();
2995       unsigned Alignment = MTI->getAlignment();
2996       if (isMemCpySmall(Len, Alignment)) {
2997         Address Dest, Src;
2998         if (!computeAddress(MTI->getRawDest(), Dest) ||
2999             !computeAddress(MTI->getRawSource(), Src))
3000           return false;
3001         if (tryEmitSmallMemCpy(Dest, Src, Len, Alignment))
3002           return true;
3003       }
3004     }
3005
3006     if (!MTI->getLength()->getType()->isIntegerTy(64))
3007       return false;
3008
3009     if (MTI->getSourceAddressSpace() > 255 || MTI->getDestAddressSpace() > 255)
3010       // Fast instruction selection doesn't support the special
3011       // address spaces.
3012       return false;
3013
3014     const char *IntrMemName = isa<MemCpyInst>(II) ? "memcpy" : "memmove";
3015     return lowerCallTo(II, IntrMemName, II->getNumArgOperands() - 2);
3016   }
3017   case Intrinsic::memset: {
3018     const MemSetInst *MSI = cast<MemSetInst>(II);
3019     // Don't handle volatile.
3020     if (MSI->isVolatile())
3021       return false;
3022
3023     if (!MSI->getLength()->getType()->isIntegerTy(64))
3024       return false;
3025
3026     if (MSI->getDestAddressSpace() > 255)
3027       // Fast instruction selection doesn't support the special
3028       // address spaces.
3029       return false;
3030
3031     return lowerCallTo(II, "memset", II->getNumArgOperands() - 2);
3032   }
3033   case Intrinsic::sin:
3034   case Intrinsic::cos:
3035   case Intrinsic::pow: {
3036     MVT RetVT;
3037     if (!isTypeLegal(II->getType(), RetVT))
3038       return false;
3039
3040     if (RetVT != MVT::f32 && RetVT != MVT::f64)
3041       return false;
3042
3043     static const RTLIB::Libcall LibCallTable[3][2] = {
3044       { RTLIB::SIN_F32, RTLIB::SIN_F64 },
3045       { RTLIB::COS_F32, RTLIB::COS_F64 },
3046       { RTLIB::POW_F32, RTLIB::POW_F64 }
3047     };
3048     RTLIB::Libcall LC;
3049     bool Is64Bit = RetVT == MVT::f64;
3050     switch (II->getIntrinsicID()) {
3051     default:
3052       llvm_unreachable("Unexpected intrinsic.");
3053     case Intrinsic::sin:
3054       LC = LibCallTable[0][Is64Bit];
3055       break;
3056     case Intrinsic::cos:
3057       LC = LibCallTable[1][Is64Bit];
3058       break;
3059     case Intrinsic::pow:
3060       LC = LibCallTable[2][Is64Bit];
3061       break;
3062     }
3063
3064     ArgListTy Args;
3065     Args.reserve(II->getNumArgOperands());
3066
3067     // Populate the argument list.
3068     for (auto &Arg : II->arg_operands()) {
3069       ArgListEntry Entry;
3070       Entry.Val = Arg;
3071       Entry.Ty = Arg->getType();
3072       Args.push_back(Entry);
3073     }
3074
3075     CallLoweringInfo CLI;
3076     CLI.setCallee(TLI.getLibcallCallingConv(LC), II->getType(),
3077                   TLI.getLibcallName(LC), std::move(Args));
3078     if (!lowerCallTo(CLI))
3079       return false;
3080     updateValueMap(II, CLI.ResultReg);
3081     return true;
3082   }
3083   case Intrinsic::trap: {
3084     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::BRK))
3085         .addImm(1);
3086     return true;
3087   }
3088   case Intrinsic::sqrt: {
3089     Type *RetTy = II->getCalledFunction()->getReturnType();
3090
3091     MVT VT;
3092     if (!isTypeLegal(RetTy, VT))
3093       return false;
3094
3095     unsigned Op0Reg = getRegForValue(II->getOperand(0));
3096     if (!Op0Reg)
3097       return false;
3098     bool Op0IsKill = hasTrivialKill(II->getOperand(0));
3099
3100     unsigned ResultReg = fastEmit_r(VT, VT, ISD::FSQRT, Op0Reg, Op0IsKill);
3101     if (!ResultReg)
3102       return false;
3103
3104     updateValueMap(II, ResultReg);
3105     return true;
3106   }
3107   case Intrinsic::sadd_with_overflow:
3108   case Intrinsic::uadd_with_overflow:
3109   case Intrinsic::ssub_with_overflow:
3110   case Intrinsic::usub_with_overflow:
3111   case Intrinsic::smul_with_overflow:
3112   case Intrinsic::umul_with_overflow: {
3113     // This implements the basic lowering of the xalu with overflow intrinsics.
3114     const Function *Callee = II->getCalledFunction();
3115     auto *Ty = cast<StructType>(Callee->getReturnType());
3116     Type *RetTy = Ty->getTypeAtIndex(0U);
3117
3118     MVT VT;
3119     if (!isTypeLegal(RetTy, VT))
3120       return false;
3121
3122     if (VT != MVT::i32 && VT != MVT::i64)
3123       return false;
3124
3125     const Value *LHS = II->getArgOperand(0);
3126     const Value *RHS = II->getArgOperand(1);
3127     // Canonicalize immediate to the RHS.
3128     if (isa<ConstantInt>(LHS) && !isa<ConstantInt>(RHS) &&
3129         isCommutativeIntrinsic(II))
3130       std::swap(LHS, RHS);
3131
3132     // Simplify multiplies.
3133     unsigned IID = II->getIntrinsicID();
3134     switch (IID) {
3135     default:
3136       break;
3137     case Intrinsic::smul_with_overflow:
3138       if (const auto *C = dyn_cast<ConstantInt>(RHS))
3139         if (C->getValue() == 2) {
3140           IID = Intrinsic::sadd_with_overflow;
3141           RHS = LHS;
3142         }
3143       break;
3144     case Intrinsic::umul_with_overflow:
3145       if (const auto *C = dyn_cast<ConstantInt>(RHS))
3146         if (C->getValue() == 2) {
3147           IID = Intrinsic::uadd_with_overflow;
3148           RHS = LHS;
3149         }
3150       break;
3151     }
3152
3153     unsigned ResultReg1 = 0, ResultReg2 = 0, MulReg = 0;
3154     AArch64CC::CondCode CC = AArch64CC::Invalid;
3155     switch (IID) {
3156     default: llvm_unreachable("Unexpected intrinsic!");
3157     case Intrinsic::sadd_with_overflow:
3158       ResultReg1 = emitAdd(VT, LHS, RHS, /*SetFlags=*/true);
3159       CC = AArch64CC::VS;
3160       break;
3161     case Intrinsic::uadd_with_overflow:
3162       ResultReg1 = emitAdd(VT, LHS, RHS, /*SetFlags=*/true);
3163       CC = AArch64CC::HS;
3164       break;
3165     case Intrinsic::ssub_with_overflow:
3166       ResultReg1 = emitSub(VT, LHS, RHS, /*SetFlags=*/true);
3167       CC = AArch64CC::VS;
3168       break;
3169     case Intrinsic::usub_with_overflow:
3170       ResultReg1 = emitSub(VT, LHS, RHS, /*SetFlags=*/true);
3171       CC = AArch64CC::LO;
3172       break;
3173     case Intrinsic::smul_with_overflow: {
3174       CC = AArch64CC::NE;
3175       unsigned LHSReg = getRegForValue(LHS);
3176       if (!LHSReg)
3177         return false;
3178       bool LHSIsKill = hasTrivialKill(LHS);
3179
3180       unsigned RHSReg = getRegForValue(RHS);
3181       if (!RHSReg)
3182         return false;
3183       bool RHSIsKill = hasTrivialKill(RHS);
3184
3185       if (VT == MVT::i32) {
3186         MulReg = emitSMULL_rr(MVT::i64, LHSReg, LHSIsKill, RHSReg, RHSIsKill);
3187         unsigned ShiftReg = emitLSR_ri(MVT::i64, MVT::i64, MulReg,
3188                                        /*IsKill=*/false, 32);
3189         MulReg = fastEmitInst_extractsubreg(VT, MulReg, /*IsKill=*/true,
3190                                             AArch64::sub_32);
3191         ShiftReg = fastEmitInst_extractsubreg(VT, ShiftReg, /*IsKill=*/true,
3192                                               AArch64::sub_32);
3193         emitSubs_rs(VT, ShiftReg, /*IsKill=*/true, MulReg, /*IsKill=*/false,
3194                     AArch64_AM::ASR, 31, /*WantResult=*/false);
3195       } else {
3196         assert(VT == MVT::i64 && "Unexpected value type.");
3197         MulReg = emitMul_rr(VT, LHSReg, LHSIsKill, RHSReg, RHSIsKill);
3198         unsigned SMULHReg = fastEmit_rr(VT, VT, ISD::MULHS, LHSReg, LHSIsKill,
3199                                         RHSReg, RHSIsKill);
3200         emitSubs_rs(VT, SMULHReg, /*IsKill=*/true, MulReg, /*IsKill=*/false,
3201                     AArch64_AM::ASR, 63, /*WantResult=*/false);
3202       }
3203       break;
3204     }
3205     case Intrinsic::umul_with_overflow: {
3206       CC = AArch64CC::NE;
3207       unsigned LHSReg = getRegForValue(LHS);
3208       if (!LHSReg)
3209         return false;
3210       bool LHSIsKill = hasTrivialKill(LHS);
3211
3212       unsigned RHSReg = getRegForValue(RHS);
3213       if (!RHSReg)
3214         return false;
3215       bool RHSIsKill = hasTrivialKill(RHS);
3216
3217       if (VT == MVT::i32) {
3218         MulReg = emitUMULL_rr(MVT::i64, LHSReg, LHSIsKill, RHSReg, RHSIsKill);
3219         emitSubs_rs(MVT::i64, AArch64::XZR, /*IsKill=*/true, MulReg,
3220                     /*IsKill=*/false, AArch64_AM::LSR, 32,
3221                     /*WantResult=*/false);
3222         MulReg = fastEmitInst_extractsubreg(VT, MulReg, /*IsKill=*/true,
3223                                             AArch64::sub_32);
3224       } else {
3225         assert(VT == MVT::i64 && "Unexpected value type.");
3226         MulReg = emitMul_rr(VT, LHSReg, LHSIsKill, RHSReg, RHSIsKill);
3227         unsigned UMULHReg = fastEmit_rr(VT, VT, ISD::MULHU, LHSReg, LHSIsKill,
3228                                         RHSReg, RHSIsKill);
3229         emitSubs_rr(VT, AArch64::XZR, /*IsKill=*/true, UMULHReg,
3230                     /*IsKill=*/false, /*WantResult=*/false);
3231       }
3232       break;
3233     }
3234     }
3235
3236     if (MulReg) {
3237       ResultReg1 = createResultReg(TLI.getRegClassFor(VT));
3238       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3239               TII.get(TargetOpcode::COPY), ResultReg1).addReg(MulReg);
3240     }
3241
3242     ResultReg2 = fastEmitInst_rri(AArch64::CSINCWr, &AArch64::GPR32RegClass,
3243                                   AArch64::WZR, /*IsKill=*/true, AArch64::WZR,
3244                                   /*IsKill=*/true, getInvertedCondCode(CC));
3245     assert((ResultReg1 + 1) == ResultReg2 &&
3246            "Nonconsecutive result registers.");
3247     updateValueMap(II, ResultReg1, 2);
3248     return true;
3249   }
3250   }
3251   return false;
3252 }
3253
3254 bool AArch64FastISel::selectRet(const Instruction *I) {
3255   const ReturnInst *Ret = cast<ReturnInst>(I);
3256   const Function &F = *I->getParent()->getParent();
3257
3258   if (!FuncInfo.CanLowerReturn)
3259     return false;
3260
3261   if (F.isVarArg())
3262     return false;
3263
3264   // Build a list of return value registers.
3265   SmallVector<unsigned, 4> RetRegs;
3266
3267   if (Ret->getNumOperands() > 0) {
3268     CallingConv::ID CC = F.getCallingConv();
3269     SmallVector<ISD::OutputArg, 4> Outs;
3270     GetReturnInfo(F.getReturnType(), F.getAttributes(), Outs, TLI);
3271
3272     // Analyze operands of the call, assigning locations to each operand.
3273     SmallVector<CCValAssign, 16> ValLocs;
3274     CCState CCInfo(CC, F.isVarArg(), *FuncInfo.MF, ValLocs, I->getContext());
3275     CCAssignFn *RetCC = CC == CallingConv::WebKit_JS ? RetCC_AArch64_WebKit_JS
3276                                                      : RetCC_AArch64_AAPCS;
3277     CCInfo.AnalyzeReturn(Outs, RetCC);
3278
3279     // Only handle a single return value for now.
3280     if (ValLocs.size() != 1)
3281       return false;
3282
3283     CCValAssign &VA = ValLocs[0];
3284     const Value *RV = Ret->getOperand(0);
3285
3286     // Don't bother handling odd stuff for now.
3287     if ((VA.getLocInfo() != CCValAssign::Full) &&
3288         (VA.getLocInfo() != CCValAssign::BCvt))
3289       return false;
3290
3291     // Only handle register returns for now.
3292     if (!VA.isRegLoc())
3293       return false;
3294
3295     unsigned Reg = getRegForValue(RV);
3296     if (Reg == 0)
3297       return false;
3298
3299     unsigned SrcReg = Reg + VA.getValNo();
3300     unsigned DestReg = VA.getLocReg();
3301     // Avoid a cross-class copy. This is very unlikely.
3302     if (!MRI.getRegClass(SrcReg)->contains(DestReg))
3303       return false;
3304
3305     EVT RVEVT = TLI.getValueType(RV->getType());
3306     if (!RVEVT.isSimple())
3307       return false;
3308
3309     // Vectors (of > 1 lane) in big endian need tricky handling.
3310     if (RVEVT.isVector() && RVEVT.getVectorNumElements() > 1 &&
3311         !Subtarget->isLittleEndian())
3312       return false;
3313
3314     MVT RVVT = RVEVT.getSimpleVT();
3315     if (RVVT == MVT::f128)
3316       return false;
3317
3318     MVT DestVT = VA.getValVT();
3319     // Special handling for extended integers.
3320     if (RVVT != DestVT) {
3321       if (RVVT != MVT::i1 && RVVT != MVT::i8 && RVVT != MVT::i16)
3322         return false;
3323
3324       if (!Outs[0].Flags.isZExt() && !Outs[0].Flags.isSExt())
3325         return false;
3326
3327       bool IsZExt = Outs[0].Flags.isZExt();
3328       SrcReg = emitIntExt(RVVT, SrcReg, DestVT, IsZExt);
3329       if (SrcReg == 0)
3330         return false;
3331     }
3332
3333     // Make the copy.
3334     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3335             TII.get(TargetOpcode::COPY), DestReg).addReg(SrcReg);
3336
3337     // Add register to return instruction.
3338     RetRegs.push_back(VA.getLocReg());
3339   }
3340
3341   MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3342                                     TII.get(AArch64::RET_ReallyLR));
3343   for (unsigned i = 0, e = RetRegs.size(); i != e; ++i)
3344     MIB.addReg(RetRegs[i], RegState::Implicit);
3345   return true;
3346 }
3347
3348 bool AArch64FastISel::selectTrunc(const Instruction *I) {
3349   Type *DestTy = I->getType();
3350   Value *Op = I->getOperand(0);
3351   Type *SrcTy = Op->getType();
3352
3353   EVT SrcEVT = TLI.getValueType(SrcTy, true);
3354   EVT DestEVT = TLI.getValueType(DestTy, true);
3355   if (!SrcEVT.isSimple())
3356     return false;
3357   if (!DestEVT.isSimple())
3358     return false;
3359
3360   MVT SrcVT = SrcEVT.getSimpleVT();
3361   MVT DestVT = DestEVT.getSimpleVT();
3362
3363   if (SrcVT != MVT::i64 && SrcVT != MVT::i32 && SrcVT != MVT::i16 &&
3364       SrcVT != MVT::i8)
3365     return false;
3366   if (DestVT != MVT::i32 && DestVT != MVT::i16 && DestVT != MVT::i8 &&
3367       DestVT != MVT::i1)
3368     return false;
3369
3370   unsigned SrcReg = getRegForValue(Op);
3371   if (!SrcReg)
3372     return false;
3373   bool SrcIsKill = hasTrivialKill(Op);
3374
3375   // If we're truncating from i64 to a smaller non-legal type then generate an
3376   // AND. Otherwise, we know the high bits are undefined and a truncate only
3377   // generate a COPY. We cannot mark the source register also as result
3378   // register, because this can incorrectly transfer the kill flag onto the
3379   // source register.
3380   unsigned ResultReg;
3381   if (SrcVT == MVT::i64) {
3382     uint64_t Mask = 0;
3383     switch (DestVT.SimpleTy) {
3384     default:
3385       // Trunc i64 to i32 is handled by the target-independent fast-isel.
3386       return false;
3387     case MVT::i1:
3388       Mask = 0x1;
3389       break;
3390     case MVT::i8:
3391       Mask = 0xff;
3392       break;
3393     case MVT::i16:
3394       Mask = 0xffff;
3395       break;
3396     }
3397     // Issue an extract_subreg to get the lower 32-bits.
3398     unsigned Reg32 = fastEmitInst_extractsubreg(MVT::i32, SrcReg, SrcIsKill,
3399                                                 AArch64::sub_32);
3400     // Create the AND instruction which performs the actual truncation.
3401     ResultReg = emitAnd_ri(MVT::i32, Reg32, /*IsKill=*/true, Mask);
3402     assert(ResultReg && "Unexpected AND instruction emission failure.");
3403   } else {
3404     ResultReg = createResultReg(&AArch64::GPR32RegClass);
3405     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3406             TII.get(TargetOpcode::COPY), ResultReg)
3407         .addReg(SrcReg, getKillRegState(SrcIsKill));
3408   }
3409
3410   updateValueMap(I, ResultReg);
3411   return true;
3412 }
3413
3414 unsigned AArch64FastISel::emiti1Ext(unsigned SrcReg, MVT DestVT, bool IsZExt) {
3415   assert((DestVT == MVT::i8 || DestVT == MVT::i16 || DestVT == MVT::i32 ||
3416           DestVT == MVT::i64) &&
3417          "Unexpected value type.");
3418   // Handle i8 and i16 as i32.
3419   if (DestVT == MVT::i8 || DestVT == MVT::i16)
3420     DestVT = MVT::i32;
3421
3422   if (IsZExt) {
3423     unsigned ResultReg = emitAnd_ri(MVT::i32, SrcReg, /*TODO:IsKill=*/false, 1);
3424     assert(ResultReg && "Unexpected AND instruction emission failure.");
3425     if (DestVT == MVT::i64) {
3426       // We're ZExt i1 to i64.  The ANDWri Wd, Ws, #1 implicitly clears the
3427       // upper 32 bits.  Emit a SUBREG_TO_REG to extend from Wd to Xd.
3428       unsigned Reg64 = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
3429       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3430               TII.get(AArch64::SUBREG_TO_REG), Reg64)
3431           .addImm(0)
3432           .addReg(ResultReg)
3433           .addImm(AArch64::sub_32);
3434       ResultReg = Reg64;
3435     }
3436     return ResultReg;
3437   } else {
3438     if (DestVT == MVT::i64) {
3439       // FIXME: We're SExt i1 to i64.
3440       return 0;
3441     }
3442     return fastEmitInst_rii(AArch64::SBFMWri, &AArch64::GPR32RegClass, SrcReg,
3443                             /*TODO:IsKill=*/false, 0, 0);
3444   }
3445 }
3446
3447 unsigned AArch64FastISel::emitMul_rr(MVT RetVT, unsigned Op0, bool Op0IsKill,
3448                                       unsigned Op1, bool Op1IsKill) {
3449   unsigned Opc, ZReg;
3450   switch (RetVT.SimpleTy) {
3451   default: return 0;
3452   case MVT::i8:
3453   case MVT::i16:
3454   case MVT::i32:
3455     RetVT = MVT::i32;
3456     Opc = AArch64::MADDWrrr; ZReg = AArch64::WZR; break;
3457   case MVT::i64:
3458     Opc = AArch64::MADDXrrr; ZReg = AArch64::XZR; break;
3459   }
3460
3461   const TargetRegisterClass *RC =
3462       (RetVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
3463   return fastEmitInst_rrr(Opc, RC, Op0, Op0IsKill, Op1, Op1IsKill,
3464                           /*IsKill=*/ZReg, true);
3465 }
3466
3467 unsigned AArch64FastISel::emitSMULL_rr(MVT RetVT, unsigned Op0, bool Op0IsKill,
3468                                         unsigned Op1, bool Op1IsKill) {
3469   if (RetVT != MVT::i64)
3470     return 0;
3471
3472   return fastEmitInst_rrr(AArch64::SMADDLrrr, &AArch64::GPR64RegClass,
3473                           Op0, Op0IsKill, Op1, Op1IsKill,
3474                           AArch64::XZR, /*IsKill=*/true);
3475 }
3476
3477 unsigned AArch64FastISel::emitUMULL_rr(MVT RetVT, unsigned Op0, bool Op0IsKill,
3478                                         unsigned Op1, bool Op1IsKill) {
3479   if (RetVT != MVT::i64)
3480     return 0;
3481
3482   return fastEmitInst_rrr(AArch64::UMADDLrrr, &AArch64::GPR64RegClass,
3483                           Op0, Op0IsKill, Op1, Op1IsKill,
3484                           AArch64::XZR, /*IsKill=*/true);
3485 }
3486
3487 unsigned AArch64FastISel::emitLSL_rr(MVT RetVT, unsigned Op0Reg, bool Op0IsKill,
3488                                      unsigned Op1Reg, bool Op1IsKill) {
3489   unsigned Opc = 0;
3490   bool NeedTrunc = false;
3491   uint64_t Mask = 0;
3492   switch (RetVT.SimpleTy) {
3493   default: return 0;
3494   case MVT::i8:  Opc = AArch64::LSLVWr; NeedTrunc = true; Mask = 0xff;   break;
3495   case MVT::i16: Opc = AArch64::LSLVWr; NeedTrunc = true; Mask = 0xffff; break;
3496   case MVT::i32: Opc = AArch64::LSLVWr;                                  break;
3497   case MVT::i64: Opc = AArch64::LSLVXr;                                  break;
3498   }
3499
3500   const TargetRegisterClass *RC =
3501       (RetVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
3502   if (NeedTrunc) {
3503     Op1Reg = emitAnd_ri(MVT::i32, Op1Reg, Op1IsKill, Mask);
3504     Op1IsKill = true;
3505   }
3506   unsigned ResultReg = fastEmitInst_rr(Opc, RC, Op0Reg, Op0IsKill, Op1Reg,
3507                                        Op1IsKill);
3508   if (NeedTrunc)
3509     ResultReg = emitAnd_ri(MVT::i32, ResultReg, /*IsKill=*/true, Mask);
3510   return ResultReg;
3511 }
3512
3513 unsigned AArch64FastISel::emitLSL_ri(MVT RetVT, MVT SrcVT, unsigned Op0,
3514                                      bool Op0IsKill, uint64_t Shift,
3515                                      bool IsZext) {
3516   assert(RetVT.SimpleTy >= SrcVT.SimpleTy &&
3517          "Unexpected source/return type pair.");
3518   assert((SrcVT == MVT::i1 || SrcVT == MVT::i8 || SrcVT == MVT::i16 ||
3519           SrcVT == MVT::i32 || SrcVT == MVT::i64) &&
3520          "Unexpected source value type.");
3521   assert((RetVT == MVT::i8 || RetVT == MVT::i16 || RetVT == MVT::i32 ||
3522           RetVT == MVT::i64) && "Unexpected return value type.");
3523
3524   bool Is64Bit = (RetVT == MVT::i64);
3525   unsigned RegSize = Is64Bit ? 64 : 32;
3526   unsigned DstBits = RetVT.getSizeInBits();
3527   unsigned SrcBits = SrcVT.getSizeInBits();
3528
3529   // Don't deal with undefined shifts.
3530   if (Shift >= DstBits)
3531     return 0;
3532
3533   // For immediate shifts we can fold the zero-/sign-extension into the shift.
3534   // {S|U}BFM Wd, Wn, #r, #s
3535   // Wd<32+s-r,32-r> = Wn<s:0> when r > s
3536
3537   // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
3538   // %2 = shl i16 %1, 4
3539   // Wd<32+7-28,32-28> = Wn<7:0> <- clamp s to 7
3540   // 0b1111_1111_1111_1111__1111_1010_1010_0000 sext
3541   // 0b0000_0000_0000_0000__0000_0101_0101_0000 sext | zext
3542   // 0b0000_0000_0000_0000__0000_1010_1010_0000 zext
3543
3544   // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
3545   // %2 = shl i16 %1, 8
3546   // Wd<32+7-24,32-24> = Wn<7:0>
3547   // 0b1111_1111_1111_1111__1010_1010_0000_0000 sext
3548   // 0b0000_0000_0000_0000__0101_0101_0000_0000 sext | zext
3549   // 0b0000_0000_0000_0000__1010_1010_0000_0000 zext
3550
3551   // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
3552   // %2 = shl i16 %1, 12
3553   // Wd<32+3-20,32-20> = Wn<3:0>
3554   // 0b1111_1111_1111_1111__1010_0000_0000_0000 sext
3555   // 0b0000_0000_0000_0000__0101_0000_0000_0000 sext | zext
3556   // 0b0000_0000_0000_0000__1010_0000_0000_0000 zext
3557
3558   unsigned ImmR = RegSize - Shift;
3559   // Limit the width to the length of the source type.
3560   unsigned ImmS = std::min<unsigned>(SrcBits - 1, DstBits - 1 - Shift);
3561   static const unsigned OpcTable[2][2] = {
3562     {AArch64::SBFMWri, AArch64::SBFMXri},
3563     {AArch64::UBFMWri, AArch64::UBFMXri}
3564   };
3565   unsigned Opc = OpcTable[IsZext][Is64Bit];
3566   const TargetRegisterClass *RC =
3567       Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
3568   if (SrcVT.SimpleTy <= MVT::i32 && RetVT == MVT::i64) {
3569     unsigned TmpReg = MRI.createVirtualRegister(RC);
3570     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3571             TII.get(AArch64::SUBREG_TO_REG), TmpReg)
3572         .addImm(0)
3573         .addReg(Op0, getKillRegState(Op0IsKill))
3574         .addImm(AArch64::sub_32);
3575     Op0 = TmpReg;
3576     Op0IsKill = true;
3577   }
3578   return fastEmitInst_rii(Opc, RC, Op0, Op0IsKill, ImmR, ImmS);
3579 }
3580
3581 unsigned AArch64FastISel::emitLSR_rr(MVT RetVT, unsigned Op0Reg, bool Op0IsKill,
3582                                      unsigned Op1Reg, bool Op1IsKill) {
3583   unsigned Opc = 0;
3584   bool NeedTrunc = false;
3585   uint64_t Mask = 0;
3586   switch (RetVT.SimpleTy) {
3587   default: return 0;
3588   case MVT::i8:  Opc = AArch64::LSRVWr; NeedTrunc = true; Mask = 0xff;   break;
3589   case MVT::i16: Opc = AArch64::LSRVWr; NeedTrunc = true; Mask = 0xffff; break;
3590   case MVT::i32: Opc = AArch64::LSRVWr; break;
3591   case MVT::i64: Opc = AArch64::LSRVXr; break;
3592   }
3593
3594   const TargetRegisterClass *RC =
3595       (RetVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
3596   if (NeedTrunc) {
3597     Op0Reg = emitAnd_ri(MVT::i32, Op0Reg, Op0IsKill, Mask);
3598     Op1Reg = emitAnd_ri(MVT::i32, Op1Reg, Op1IsKill, Mask);
3599     Op0IsKill = Op1IsKill = true;
3600   }
3601   unsigned ResultReg = fastEmitInst_rr(Opc, RC, Op0Reg, Op0IsKill, Op1Reg,
3602                                        Op1IsKill);
3603   if (NeedTrunc)
3604     ResultReg = emitAnd_ri(MVT::i32, ResultReg, /*IsKill=*/true, Mask);
3605   return ResultReg;
3606 }
3607
3608 unsigned AArch64FastISel::emitLSR_ri(MVT RetVT, MVT SrcVT, unsigned Op0,
3609                                      bool Op0IsKill, uint64_t Shift,
3610                                      bool IsZExt) {
3611   assert(RetVT.SimpleTy >= SrcVT.SimpleTy &&
3612          "Unexpected source/return type pair.");
3613   assert((SrcVT == MVT::i8 || SrcVT == MVT::i16 || SrcVT == MVT::i32 ||
3614           SrcVT == MVT::i64) && "Unexpected source value type.");
3615   assert((RetVT == MVT::i8 || RetVT == MVT::i16 || RetVT == MVT::i32 ||
3616           RetVT == MVT::i64) && "Unexpected return value type.");
3617
3618   bool Is64Bit = (RetVT == MVT::i64);
3619   unsigned RegSize = Is64Bit ? 64 : 32;
3620   unsigned DstBits = RetVT.getSizeInBits();
3621   unsigned SrcBits = SrcVT.getSizeInBits();
3622
3623   // Don't deal with undefined shifts.
3624   if (Shift >= DstBits)
3625     return 0;
3626
3627   // For immediate shifts we can fold the zero-/sign-extension into the shift.
3628   // {S|U}BFM Wd, Wn, #r, #s
3629   // Wd<s-r:0> = Wn<s:r> when r <= s
3630
3631   // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
3632   // %2 = lshr i16 %1, 4
3633   // Wd<7-4:0> = Wn<7:4>
3634   // 0b0000_0000_0000_0000__0000_1111_1111_1010 sext
3635   // 0b0000_0000_0000_0000__0000_0000_0000_0101 sext | zext
3636   // 0b0000_0000_0000_0000__0000_0000_0000_1010 zext
3637
3638   // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
3639   // %2 = lshr i16 %1, 8
3640   // Wd<7-7,0> = Wn<7:7>
3641   // 0b0000_0000_0000_0000__0000_0000_1111_1111 sext
3642   // 0b0000_0000_0000_0000__0000_0000_0000_0000 sext
3643   // 0b0000_0000_0000_0000__0000_0000_0000_0000 zext
3644
3645   // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
3646   // %2 = lshr i16 %1, 12
3647   // Wd<7-7,0> = Wn<7:7> <- clamp r to 7
3648   // 0b0000_0000_0000_0000__0000_0000_0000_1111 sext
3649   // 0b0000_0000_0000_0000__0000_0000_0000_0000 sext
3650   // 0b0000_0000_0000_0000__0000_0000_0000_0000 zext
3651
3652   if (Shift >= SrcBits && IsZExt)
3653     return materializeInt(ConstantInt::get(*Context, APInt(RegSize, 0)), RetVT);
3654
3655   // It is not possible to fold a sign-extend into the LShr instruction. In this
3656   // case emit a sign-extend.
3657   if (!IsZExt) {
3658     Op0 = emitIntExt(SrcVT, Op0, RetVT, IsZExt);
3659     if (!Op0)
3660       return 0;
3661     Op0IsKill = true;
3662     SrcVT = RetVT;
3663     SrcBits = SrcVT.getSizeInBits();
3664     IsZExt = true;
3665   }
3666
3667   unsigned ImmR = std::min<unsigned>(SrcBits - 1, Shift);
3668   unsigned ImmS = SrcBits - 1;
3669   static const unsigned OpcTable[2][2] = {
3670     {AArch64::SBFMWri, AArch64::SBFMXri},
3671     {AArch64::UBFMWri, AArch64::UBFMXri}
3672   };
3673   unsigned Opc = OpcTable[IsZExt][Is64Bit];
3674   const TargetRegisterClass *RC =
3675       Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
3676   if (SrcVT.SimpleTy <= MVT::i32 && RetVT == MVT::i64) {
3677     unsigned TmpReg = MRI.createVirtualRegister(RC);
3678     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3679             TII.get(AArch64::SUBREG_TO_REG), TmpReg)
3680         .addImm(0)
3681         .addReg(Op0, getKillRegState(Op0IsKill))
3682         .addImm(AArch64::sub_32);
3683     Op0 = TmpReg;
3684     Op0IsKill = true;
3685   }
3686   return fastEmitInst_rii(Opc, RC, Op0, Op0IsKill, ImmR, ImmS);
3687 }
3688
3689 unsigned AArch64FastISel::emitASR_rr(MVT RetVT, unsigned Op0Reg, bool Op0IsKill,
3690                                      unsigned Op1Reg, bool Op1IsKill) {
3691   unsigned Opc = 0;
3692   bool NeedTrunc = false;
3693   uint64_t Mask = 0;
3694   switch (RetVT.SimpleTy) {
3695   default: return 0;
3696   case MVT::i8:  Opc = AArch64::ASRVWr; NeedTrunc = true; Mask = 0xff;   break;
3697   case MVT::i16: Opc = AArch64::ASRVWr; NeedTrunc = true; Mask = 0xffff; break;
3698   case MVT::i32: Opc = AArch64::ASRVWr;                                  break;
3699   case MVT::i64: Opc = AArch64::ASRVXr;                                  break;
3700   }
3701
3702   const TargetRegisterClass *RC =
3703       (RetVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
3704   if (NeedTrunc) {
3705     Op0Reg = emitIntExt(RetVT, Op0Reg, MVT::i32, /*IsZExt=*/false);
3706     Op1Reg = emitAnd_ri(MVT::i32, Op1Reg, Op1IsKill, Mask);
3707     Op0IsKill = Op1IsKill = true;
3708   }
3709   unsigned ResultReg = fastEmitInst_rr(Opc, RC, Op0Reg, Op0IsKill, Op1Reg,
3710                                        Op1IsKill);
3711   if (NeedTrunc)
3712     ResultReg = emitAnd_ri(MVT::i32, ResultReg, /*IsKill=*/true, Mask);
3713   return ResultReg;
3714 }
3715
3716 unsigned AArch64FastISel::emitASR_ri(MVT RetVT, MVT SrcVT, unsigned Op0,
3717                                      bool Op0IsKill, uint64_t Shift,
3718                                      bool IsZExt) {
3719   assert(RetVT.SimpleTy >= SrcVT.SimpleTy &&
3720          "Unexpected source/return type pair.");
3721   assert((SrcVT == MVT::i8 || SrcVT == MVT::i16 || SrcVT == MVT::i32 ||
3722           SrcVT == MVT::i64) && "Unexpected source value type.");
3723   assert((RetVT == MVT::i8 || RetVT == MVT::i16 || RetVT == MVT::i32 ||
3724           RetVT == MVT::i64) && "Unexpected return value type.");
3725
3726   bool Is64Bit = (RetVT == MVT::i64);
3727   unsigned RegSize = Is64Bit ? 64 : 32;
3728   unsigned DstBits = RetVT.getSizeInBits();
3729   unsigned SrcBits = SrcVT.getSizeInBits();
3730
3731   // Don't deal with undefined shifts.
3732   if (Shift >= DstBits)
3733     return 0;
3734
3735   // For immediate shifts we can fold the zero-/sign-extension into the shift.
3736   // {S|U}BFM Wd, Wn, #r, #s
3737   // Wd<s-r:0> = Wn<s:r> when r <= s
3738
3739   // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
3740   // %2 = ashr i16 %1, 4
3741   // Wd<7-4:0> = Wn<7:4>
3742   // 0b1111_1111_1111_1111__1111_1111_1111_1010 sext
3743   // 0b0000_0000_0000_0000__0000_0000_0000_0101 sext | zext
3744   // 0b0000_0000_0000_0000__0000_0000_0000_1010 zext
3745
3746   // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
3747   // %2 = ashr i16 %1, 8
3748   // Wd<7-7,0> = Wn<7:7>
3749   // 0b1111_1111_1111_1111__1111_1111_1111_1111 sext
3750   // 0b0000_0000_0000_0000__0000_0000_0000_0000 sext
3751   // 0b0000_0000_0000_0000__0000_0000_0000_0000 zext
3752
3753   // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
3754   // %2 = ashr i16 %1, 12
3755   // Wd<7-7,0> = Wn<7:7> <- clamp r to 7
3756   // 0b1111_1111_1111_1111__1111_1111_1111_1111 sext
3757   // 0b0000_0000_0000_0000__0000_0000_0000_0000 sext
3758   // 0b0000_0000_0000_0000__0000_0000_0000_0000 zext
3759
3760   if (Shift >= SrcBits && IsZExt)
3761     return materializeInt(ConstantInt::get(*Context, APInt(RegSize, 0)), RetVT);
3762
3763   unsigned ImmR = std::min<unsigned>(SrcBits - 1, Shift);
3764   unsigned ImmS = SrcBits - 1;
3765   static const unsigned OpcTable[2][2] = {
3766     {AArch64::SBFMWri, AArch64::SBFMXri},
3767     {AArch64::UBFMWri, AArch64::UBFMXri}
3768   };
3769   unsigned Opc = OpcTable[IsZExt][Is64Bit];
3770   const TargetRegisterClass *RC =
3771       Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
3772   if (SrcVT.SimpleTy <= MVT::i32 && RetVT == MVT::i64) {
3773     unsigned TmpReg = MRI.createVirtualRegister(RC);
3774     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3775             TII.get(AArch64::SUBREG_TO_REG), TmpReg)
3776         .addImm(0)
3777         .addReg(Op0, getKillRegState(Op0IsKill))
3778         .addImm(AArch64::sub_32);
3779     Op0 = TmpReg;
3780     Op0IsKill = true;
3781   }
3782   return fastEmitInst_rii(Opc, RC, Op0, Op0IsKill, ImmR, ImmS);
3783 }
3784
3785 unsigned AArch64FastISel::emitIntExt(MVT SrcVT, unsigned SrcReg, MVT DestVT,
3786                                      bool IsZExt) {
3787   assert(DestVT != MVT::i1 && "ZeroExt/SignExt an i1?");
3788
3789   // FastISel does not have plumbing to deal with extensions where the SrcVT or
3790   // DestVT are odd things, so test to make sure that they are both types we can
3791   // handle (i1/i8/i16/i32 for SrcVT and i8/i16/i32/i64 for DestVT), otherwise
3792   // bail out to SelectionDAG.
3793   if (((DestVT != MVT::i8) && (DestVT != MVT::i16) &&
3794        (DestVT != MVT::i32) && (DestVT != MVT::i64)) ||
3795       ((SrcVT !=  MVT::i1) && (SrcVT !=  MVT::i8) &&
3796        (SrcVT !=  MVT::i16) && (SrcVT !=  MVT::i32)))
3797     return 0;
3798
3799   unsigned Opc;
3800   unsigned Imm = 0;
3801
3802   switch (SrcVT.SimpleTy) {
3803   default:
3804     return 0;
3805   case MVT::i1:
3806     return emiti1Ext(SrcReg, DestVT, IsZExt);
3807   case MVT::i8:
3808     if (DestVT == MVT::i64)
3809       Opc = IsZExt ? AArch64::UBFMXri : AArch64::SBFMXri;
3810     else
3811       Opc = IsZExt ? AArch64::UBFMWri : AArch64::SBFMWri;
3812     Imm = 7;
3813     break;
3814   case MVT::i16:
3815     if (DestVT == MVT::i64)
3816       Opc = IsZExt ? AArch64::UBFMXri : AArch64::SBFMXri;
3817     else
3818       Opc = IsZExt ? AArch64::UBFMWri : AArch64::SBFMWri;
3819     Imm = 15;
3820     break;
3821   case MVT::i32:
3822     assert(DestVT == MVT::i64 && "IntExt i32 to i32?!?");
3823     Opc = IsZExt ? AArch64::UBFMXri : AArch64::SBFMXri;
3824     Imm = 31;
3825     break;
3826   }
3827
3828   // Handle i8 and i16 as i32.
3829   if (DestVT == MVT::i8 || DestVT == MVT::i16)
3830     DestVT = MVT::i32;
3831   else if (DestVT == MVT::i64) {
3832     unsigned Src64 = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
3833     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3834             TII.get(AArch64::SUBREG_TO_REG), Src64)
3835         .addImm(0)
3836         .addReg(SrcReg)
3837         .addImm(AArch64::sub_32);
3838     SrcReg = Src64;
3839   }
3840
3841   const TargetRegisterClass *RC =
3842       (DestVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
3843   return fastEmitInst_rii(Opc, RC, SrcReg, /*TODO:IsKill=*/false, 0, Imm);
3844 }
3845
3846 bool AArch64FastISel::selectIntExt(const Instruction *I) {
3847   assert((isa<ZExtInst>(I) || isa<SExtInst>(I)) &&
3848          "Unexpected integer extend instruction.");
3849   MVT RetVT;
3850   MVT SrcVT;
3851   if (!isTypeSupported(I->getType(), RetVT))
3852     return false;
3853
3854   if (!isTypeSupported(I->getOperand(0)->getType(), SrcVT))
3855     return false;
3856
3857   if (isIntExtFree(I)) {
3858     unsigned SrcReg = getRegForValue(I->getOperand(0));
3859     if (!SrcReg)
3860       return false;
3861     bool SrcIsKill = hasTrivialKill(I->getOperand(0));
3862
3863     const TargetRegisterClass *RC = (RetVT == MVT::i64) ?
3864         &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
3865     unsigned ResultReg = createResultReg(RC);
3866     if (RetVT == MVT::i64 && SrcVT != MVT::i64) {
3867       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3868               TII.get(AArch64::SUBREG_TO_REG), ResultReg)
3869           .addImm(0)
3870           .addReg(SrcReg, getKillRegState(SrcIsKill))
3871           .addImm(AArch64::sub_32);
3872     } else {
3873       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3874               TII.get(TargetOpcode::COPY), ResultReg)
3875           .addReg(SrcReg, getKillRegState(SrcIsKill));
3876     }
3877     updateValueMap(I, ResultReg);
3878     return true;
3879   }
3880
3881   unsigned SrcReg = getRegForValue(I->getOperand(0));
3882   if (!SrcReg)
3883     return false;
3884   bool SrcRegIsKill = hasTrivialKill(I->getOperand(0));
3885
3886   unsigned ResultReg = 0;
3887   if (isIntExtFree(I)) {
3888     if (RetVT == MVT::i64) {
3889       ResultReg = createResultReg(&AArch64::GPR64RegClass);
3890       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3891               TII.get(AArch64::SUBREG_TO_REG), ResultReg)
3892           .addImm(0)
3893           .addReg(SrcReg, getKillRegState(SrcRegIsKill))
3894           .addImm(AArch64::sub_32);
3895     } else
3896       ResultReg = SrcReg;
3897   }
3898
3899   if (!ResultReg)
3900     ResultReg = emitIntExt(SrcVT, SrcReg, RetVT, isa<ZExtInst>(I));
3901
3902   if (!ResultReg)
3903     return false;
3904
3905   updateValueMap(I, ResultReg);
3906   return true;
3907 }
3908
3909 bool AArch64FastISel::selectRem(const Instruction *I, unsigned ISDOpcode) {
3910   EVT DestEVT = TLI.getValueType(I->getType(), true);
3911   if (!DestEVT.isSimple())
3912     return false;
3913
3914   MVT DestVT = DestEVT.getSimpleVT();
3915   if (DestVT != MVT::i64 && DestVT != MVT::i32)
3916     return false;
3917
3918   unsigned DivOpc;
3919   bool Is64bit = (DestVT == MVT::i64);
3920   switch (ISDOpcode) {
3921   default:
3922     return false;
3923   case ISD::SREM:
3924     DivOpc = Is64bit ? AArch64::SDIVXr : AArch64::SDIVWr;
3925     break;
3926   case ISD::UREM:
3927     DivOpc = Is64bit ? AArch64::UDIVXr : AArch64::UDIVWr;
3928     break;
3929   }
3930   unsigned MSubOpc = Is64bit ? AArch64::MSUBXrrr : AArch64::MSUBWrrr;
3931   unsigned Src0Reg = getRegForValue(I->getOperand(0));
3932   if (!Src0Reg)
3933     return false;
3934   bool Src0IsKill = hasTrivialKill(I->getOperand(0));
3935
3936   unsigned Src1Reg = getRegForValue(I->getOperand(1));
3937   if (!Src1Reg)
3938     return false;
3939   bool Src1IsKill = hasTrivialKill(I->getOperand(1));
3940
3941   const TargetRegisterClass *RC =
3942       (DestVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
3943   unsigned QuotReg = fastEmitInst_rr(DivOpc, RC, Src0Reg, /*IsKill=*/false,
3944                                      Src1Reg, /*IsKill=*/false);
3945   assert(QuotReg && "Unexpected DIV instruction emission failure.");
3946   // The remainder is computed as numerator - (quotient * denominator) using the
3947   // MSUB instruction.
3948   unsigned ResultReg = fastEmitInst_rrr(MSubOpc, RC, QuotReg, /*IsKill=*/true,
3949                                         Src1Reg, Src1IsKill, Src0Reg,
3950                                         Src0IsKill);
3951   updateValueMap(I, ResultReg);
3952   return true;
3953 }
3954
3955 bool AArch64FastISel::selectMul(const Instruction *I) {
3956   MVT VT;
3957   if (!isTypeSupported(I->getType(), VT, /*IsVectorAllowed=*/true))
3958     return false;
3959
3960   if (VT.isVector())
3961     return selectBinaryOp(I, ISD::MUL);
3962
3963   const Value *Src0 = I->getOperand(0);
3964   const Value *Src1 = I->getOperand(1);
3965   if (const auto *C = dyn_cast<ConstantInt>(Src0))
3966     if (C->getValue().isPowerOf2())
3967       std::swap(Src0, Src1);
3968
3969   // Try to simplify to a shift instruction.
3970   if (const auto *C = dyn_cast<ConstantInt>(Src1))
3971     if (C->getValue().isPowerOf2()) {
3972       uint64_t ShiftVal = C->getValue().logBase2();
3973       MVT SrcVT = VT;
3974       bool IsZExt = true;
3975       if (const auto *ZExt = dyn_cast<ZExtInst>(Src0)) {
3976         if (!isIntExtFree(ZExt)) {
3977           MVT VT;
3978           if (isValueAvailable(ZExt) && isTypeSupported(ZExt->getSrcTy(), VT)) {
3979             SrcVT = VT;
3980             IsZExt = true;
3981             Src0 = ZExt->getOperand(0);
3982           }
3983         }
3984       } else if (const auto *SExt = dyn_cast<SExtInst>(Src0)) {
3985         if (!isIntExtFree(SExt)) {
3986           MVT VT;
3987           if (isValueAvailable(SExt) && isTypeSupported(SExt->getSrcTy(), VT)) {
3988             SrcVT = VT;
3989             IsZExt = false;
3990             Src0 = SExt->getOperand(0);
3991           }
3992         }
3993       }
3994
3995       unsigned Src0Reg = getRegForValue(Src0);
3996       if (!Src0Reg)
3997         return false;
3998       bool Src0IsKill = hasTrivialKill(Src0);
3999
4000       unsigned ResultReg =
4001           emitLSL_ri(VT, SrcVT, Src0Reg, Src0IsKill, ShiftVal, IsZExt);
4002
4003       if (ResultReg) {
4004         updateValueMap(I, ResultReg);
4005         return true;
4006       }
4007     }
4008
4009   unsigned Src0Reg = getRegForValue(I->getOperand(0));
4010   if (!Src0Reg)
4011     return false;
4012   bool Src0IsKill = hasTrivialKill(I->getOperand(0));
4013
4014   unsigned Src1Reg = getRegForValue(I->getOperand(1));
4015   if (!Src1Reg)
4016     return false;
4017   bool Src1IsKill = hasTrivialKill(I->getOperand(1));
4018
4019   unsigned ResultReg = emitMul_rr(VT, Src0Reg, Src0IsKill, Src1Reg, Src1IsKill);
4020
4021   if (!ResultReg)
4022     return false;
4023
4024   updateValueMap(I, ResultReg);
4025   return true;
4026 }
4027
4028 bool AArch64FastISel::selectShift(const Instruction *I) {
4029   MVT RetVT;
4030   if (!isTypeSupported(I->getType(), RetVT, /*IsVectorAllowed=*/true))
4031     return false;
4032
4033   if (RetVT.isVector())
4034     return selectOperator(I, I->getOpcode());
4035
4036   if (const auto *C = dyn_cast<ConstantInt>(I->getOperand(1))) {
4037     unsigned ResultReg = 0;
4038     uint64_t ShiftVal = C->getZExtValue();
4039     MVT SrcVT = RetVT;
4040     bool IsZExt = (I->getOpcode() == Instruction::AShr) ? false : true;
4041     const Value *Op0 = I->getOperand(0);
4042     if (const auto *ZExt = dyn_cast<ZExtInst>(Op0)) {
4043       if (!isIntExtFree(ZExt)) {
4044         MVT TmpVT;
4045         if (isValueAvailable(ZExt) && isTypeSupported(ZExt->getSrcTy(), TmpVT)) {
4046           SrcVT = TmpVT;
4047           IsZExt = true;
4048           Op0 = ZExt->getOperand(0);
4049         }
4050       }
4051     } else if (const auto *SExt = dyn_cast<SExtInst>(Op0)) {
4052       if (!isIntExtFree(SExt)) {
4053         MVT TmpVT;
4054         if (isValueAvailable(SExt) && isTypeSupported(SExt->getSrcTy(), TmpVT)) {
4055           SrcVT = TmpVT;
4056           IsZExt = false;
4057           Op0 = SExt->getOperand(0);
4058         }
4059       }
4060     }
4061
4062     unsigned Op0Reg = getRegForValue(Op0);
4063     if (!Op0Reg)
4064       return false;
4065     bool Op0IsKill = hasTrivialKill(Op0);
4066
4067     switch (I->getOpcode()) {
4068     default: llvm_unreachable("Unexpected instruction.");
4069     case Instruction::Shl:
4070       ResultReg = emitLSL_ri(RetVT, SrcVT, Op0Reg, Op0IsKill, ShiftVal, IsZExt);
4071       break;
4072     case Instruction::AShr:
4073       ResultReg = emitASR_ri(RetVT, SrcVT, Op0Reg, Op0IsKill, ShiftVal, IsZExt);
4074       break;
4075     case Instruction::LShr:
4076       ResultReg = emitLSR_ri(RetVT, SrcVT, Op0Reg, Op0IsKill, ShiftVal, IsZExt);
4077       break;
4078     }
4079     if (!ResultReg)
4080       return false;
4081
4082     updateValueMap(I, ResultReg);
4083     return true;
4084   }
4085
4086   unsigned Op0Reg = getRegForValue(I->getOperand(0));
4087   if (!Op0Reg)
4088     return false;
4089   bool Op0IsKill = hasTrivialKill(I->getOperand(0));
4090
4091   unsigned Op1Reg = getRegForValue(I->getOperand(1));
4092   if (!Op1Reg)
4093     return false;
4094   bool Op1IsKill = hasTrivialKill(I->getOperand(1));
4095
4096   unsigned ResultReg = 0;
4097   switch (I->getOpcode()) {
4098   default: llvm_unreachable("Unexpected instruction.");
4099   case Instruction::Shl:
4100     ResultReg = emitLSL_rr(RetVT, Op0Reg, Op0IsKill, Op1Reg, Op1IsKill);
4101     break;
4102   case Instruction::AShr:
4103     ResultReg = emitASR_rr(RetVT, Op0Reg, Op0IsKill, Op1Reg, Op1IsKill);
4104     break;
4105   case Instruction::LShr:
4106     ResultReg = emitLSR_rr(RetVT, Op0Reg, Op0IsKill, Op1Reg, Op1IsKill);
4107     break;
4108   }
4109
4110   if (!ResultReg)
4111     return false;
4112
4113   updateValueMap(I, ResultReg);
4114   return true;
4115 }
4116
4117 bool AArch64FastISel::selectBitCast(const Instruction *I) {
4118   MVT RetVT, SrcVT;
4119
4120   if (!isTypeLegal(I->getOperand(0)->getType(), SrcVT))
4121     return false;
4122   if (!isTypeLegal(I->getType(), RetVT))
4123     return false;
4124
4125   unsigned Opc;
4126   if (RetVT == MVT::f32 && SrcVT == MVT::i32)
4127     Opc = AArch64::FMOVWSr;
4128   else if (RetVT == MVT::f64 && SrcVT == MVT::i64)
4129     Opc = AArch64::FMOVXDr;
4130   else if (RetVT == MVT::i32 && SrcVT == MVT::f32)
4131     Opc = AArch64::FMOVSWr;
4132   else if (RetVT == MVT::i64 && SrcVT == MVT::f64)
4133     Opc = AArch64::FMOVDXr;
4134   else
4135     return false;
4136
4137   const TargetRegisterClass *RC = nullptr;
4138   switch (RetVT.SimpleTy) {
4139   default: llvm_unreachable("Unexpected value type.");
4140   case MVT::i32: RC = &AArch64::GPR32RegClass; break;
4141   case MVT::i64: RC = &AArch64::GPR64RegClass; break;
4142   case MVT::f32: RC = &AArch64::FPR32RegClass; break;
4143   case MVT::f64: RC = &AArch64::FPR64RegClass; break;
4144   }
4145   unsigned Op0Reg = getRegForValue(I->getOperand(0));
4146   if (!Op0Reg)
4147     return false;
4148   bool Op0IsKill = hasTrivialKill(I->getOperand(0));
4149   unsigned ResultReg = fastEmitInst_r(Opc, RC, Op0Reg, Op0IsKill);
4150
4151   if (!ResultReg)
4152     return false;
4153
4154   updateValueMap(I, ResultReg);
4155   return true;
4156 }
4157
4158 bool AArch64FastISel::selectFRem(const Instruction *I) {
4159   MVT RetVT;
4160   if (!isTypeLegal(I->getType(), RetVT))
4161     return false;
4162
4163   RTLIB::Libcall LC;
4164   switch (RetVT.SimpleTy) {
4165   default:
4166     return false;
4167   case MVT::f32:
4168     LC = RTLIB::REM_F32;
4169     break;
4170   case MVT::f64:
4171     LC = RTLIB::REM_F64;
4172     break;
4173   }
4174
4175   ArgListTy Args;
4176   Args.reserve(I->getNumOperands());
4177
4178   // Populate the argument list.
4179   for (auto &Arg : I->operands()) {
4180     ArgListEntry Entry;
4181     Entry.Val = Arg;
4182     Entry.Ty = Arg->getType();
4183     Args.push_back(Entry);
4184   }
4185
4186   CallLoweringInfo CLI;
4187   CLI.setCallee(TLI.getLibcallCallingConv(LC), I->getType(),
4188                 TLI.getLibcallName(LC), std::move(Args));
4189   if (!lowerCallTo(CLI))
4190     return false;
4191   updateValueMap(I, CLI.ResultReg);
4192   return true;
4193 }
4194
4195 bool AArch64FastISel::selectSDiv(const Instruction *I) {
4196   MVT VT;
4197   if (!isTypeLegal(I->getType(), VT))
4198     return false;
4199
4200   if (!isa<ConstantInt>(I->getOperand(1)))
4201     return selectBinaryOp(I, ISD::SDIV);
4202
4203   const APInt &C = cast<ConstantInt>(I->getOperand(1))->getValue();
4204   if ((VT != MVT::i32 && VT != MVT::i64) || !C ||
4205       !(C.isPowerOf2() || (-C).isPowerOf2()))
4206     return selectBinaryOp(I, ISD::SDIV);
4207
4208   unsigned Lg2 = C.countTrailingZeros();
4209   unsigned Src0Reg = getRegForValue(I->getOperand(0));
4210   if (!Src0Reg)
4211     return false;
4212   bool Src0IsKill = hasTrivialKill(I->getOperand(0));
4213
4214   if (cast<BinaryOperator>(I)->isExact()) {
4215     unsigned ResultReg = emitASR_ri(VT, VT, Src0Reg, Src0IsKill, Lg2);
4216     if (!ResultReg)
4217       return false;
4218     updateValueMap(I, ResultReg);
4219     return true;
4220   }
4221
4222   unsigned Pow2MinusOne = (1 << Lg2) - 1;
4223   unsigned AddReg = emitAddSub_ri(/*UseAdd=*/true, VT, Src0Reg,
4224                                   /*IsKill=*/false, Pow2MinusOne);
4225   if (!AddReg)
4226     return false;
4227
4228   // (Src0 < 0) ? Pow2 - 1 : 0;
4229   if (!emitICmp_ri(VT, Src0Reg, /*IsKill=*/false, 0))
4230     return false;
4231
4232   unsigned SelectOpc;
4233   const TargetRegisterClass *RC;
4234   if (VT == MVT::i64) {
4235     SelectOpc = AArch64::CSELXr;
4236     RC = &AArch64::GPR64RegClass;
4237   } else {
4238     SelectOpc = AArch64::CSELWr;
4239     RC = &AArch64::GPR32RegClass;
4240   }
4241   unsigned SelectReg =
4242       fastEmitInst_rri(SelectOpc, RC, AddReg, /*IsKill=*/true, Src0Reg,
4243                        Src0IsKill, AArch64CC::LT);
4244   if (!SelectReg)
4245     return false;
4246
4247   // Divide by Pow2 --> ashr. If we're dividing by a negative value we must also
4248   // negate the result.
4249   unsigned ZeroReg = (VT == MVT::i64) ? AArch64::XZR : AArch64::WZR;
4250   unsigned ResultReg;
4251   if (C.isNegative())
4252     ResultReg = emitAddSub_rs(/*UseAdd=*/false, VT, ZeroReg, /*IsKill=*/true,
4253                               SelectReg, /*IsKill=*/true, AArch64_AM::ASR, Lg2);
4254   else
4255     ResultReg = emitASR_ri(VT, VT, SelectReg, /*IsKill=*/true, Lg2);
4256
4257   if (!ResultReg)
4258     return false;
4259
4260   updateValueMap(I, ResultReg);
4261   return true;
4262 }
4263
4264 bool AArch64FastISel::fastSelectInstruction(const Instruction *I) {
4265   switch (I->getOpcode()) {
4266   default:
4267     break;
4268   case Instruction::Add:
4269   case Instruction::Sub:
4270     return selectAddSub(I);
4271   case Instruction::Mul:
4272     return selectMul(I);
4273   case Instruction::SDiv:
4274     return selectSDiv(I);
4275   case Instruction::SRem:
4276     if (!selectBinaryOp(I, ISD::SREM))
4277       return selectRem(I, ISD::SREM);
4278     return true;
4279   case Instruction::URem:
4280     if (!selectBinaryOp(I, ISD::UREM))
4281       return selectRem(I, ISD::UREM);
4282     return true;
4283   case Instruction::Shl:
4284   case Instruction::LShr:
4285   case Instruction::AShr:
4286     return selectShift(I);
4287   case Instruction::And:
4288   case Instruction::Or:
4289   case Instruction::Xor:
4290     return selectLogicalOp(I);
4291   case Instruction::Br:
4292     return selectBranch(I);
4293   case Instruction::IndirectBr:
4294     return selectIndirectBr(I);
4295   case Instruction::BitCast:
4296     if (!FastISel::selectBitCast(I))
4297       return selectBitCast(I);
4298     return true;
4299   case Instruction::FPToSI:
4300     if (!selectCast(I, ISD::FP_TO_SINT))
4301       return selectFPToInt(I, /*Signed=*/true);
4302     return true;
4303   case Instruction::FPToUI:
4304     return selectFPToInt(I, /*Signed=*/false);
4305   case Instruction::ZExt:
4306   case Instruction::SExt:
4307     return selectIntExt(I);
4308   case Instruction::Trunc:
4309     if (!selectCast(I, ISD::TRUNCATE))
4310       return selectTrunc(I);
4311     return true;
4312   case Instruction::FPExt:
4313     return selectFPExt(I);
4314   case Instruction::FPTrunc:
4315     return selectFPTrunc(I);
4316   case Instruction::SIToFP:
4317     if (!selectCast(I, ISD::SINT_TO_FP))
4318       return selectIntToFP(I, /*Signed=*/true);
4319     return true;
4320   case Instruction::UIToFP:
4321     return selectIntToFP(I, /*Signed=*/false);
4322   case Instruction::Load:
4323     return selectLoad(I);
4324   case Instruction::Store:
4325     return selectStore(I);
4326   case Instruction::FCmp:
4327   case Instruction::ICmp:
4328     return selectCmp(I);
4329   case Instruction::Select:
4330     return selectSelect(I);
4331   case Instruction::Ret:
4332     return selectRet(I);
4333   case Instruction::FRem:
4334     return selectFRem(I);
4335   }
4336
4337   // fall-back to target-independent instruction selection.
4338   return selectOperator(I, I->getOpcode());
4339   // Silence warnings.
4340   (void)&CC_AArch64_DarwinPCS_VarArg;
4341 }
4342
4343 namespace llvm {
4344 llvm::FastISel *AArch64::createFastISel(FunctionLoweringInfo &FuncInfo,
4345                                         const TargetLibraryInfo *LibInfo) {
4346   return new AArch64FastISel(FuncInfo, LibInfo);
4347 }
4348 }