lib/Target/AArch64/AArch64FastISel.cpp

   1 //===-- AArch6464FastISel.cpp - AArch64 FastISel implementation -----------===//
   2 //
   3 //                     The LLVM Compiler Infrastructure
   4 //
   5 // This file is distributed under the University of Illinois Open Source
   6 // License. See LICENSE.TXT for details.
   7 //
   8 //===----------------------------------------------------------------------===//
   9 //
  10 // This file defines the AArch64-specific support for the FastISel class. Some
  11 // of the target-specific code is generated by tablegen in the file
  12 // AArch64GenFastISel.inc, which is #included here.
  13 //
  14 //===----------------------------------------------------------------------===//
  15
  16 #include "AArch64.h"
  17 #include "AArch64Subtarget.h"
  18 #include "AArch64TargetMachine.h"
  19 #include "MCTargetDesc/AArch64AddressingModes.h"
  20 #include "llvm/Analysis/BranchProbabilityInfo.h"
  21 #include "llvm/CodeGen/CallingConvLower.h"
  22 #include "llvm/CodeGen/FastISel.h"
  23 #include "llvm/CodeGen/FunctionLoweringInfo.h"
  24 #include "llvm/CodeGen/MachineConstantPool.h"
  25 #include "llvm/CodeGen/MachineFrameInfo.h"
  26 #include "llvm/CodeGen/MachineInstrBuilder.h"
  27 #include "llvm/CodeGen/MachineRegisterInfo.h"
  28 #include "llvm/IR/CallingConv.h"
  29 #include "llvm/IR/DataLayout.h"
  30 #include "llvm/IR/DerivedTypes.h"
  31 #include "llvm/IR/Function.h"
  32 #include "llvm/IR/GetElementPtrTypeIterator.h"
  33 #include "llvm/IR/GlobalAlias.h"
  34 #include "llvm/IR/GlobalVariable.h"
  35 #include "llvm/IR/Instructions.h"
  36 #include "llvm/IR/IntrinsicInst.h"
  37 #include "llvm/IR/Operator.h"
  38 #include "llvm/Support/CommandLine.h"
  39 using namespace llvm;
  40
  41 namespace {
  42
  43 class AArch64FastISel : public FastISel {
  44   class Address {
  45   public:
  46     typedef enum {
  47       RegBase,
  48       FrameIndexBase
  49     } BaseKind;
  50
  51   private:
  52     BaseKind Kind;
  53     AArch64_AM::ShiftExtendType ExtType;
  54     union {
  55       unsigned Reg;
  56       int FI;
  57     } Base;
  58     unsigned OffsetReg;
  59     unsigned Shift;
  60     int64_t Offset;
  61     const GlobalValue *GV;
  62
  63   public:
  64     Address() : Kind(RegBase), ExtType(AArch64_AM::InvalidShiftExtend),
  65       OffsetReg(0), Shift(0), Offset(0), GV(nullptr) { Base.Reg = 0; }
  66     void setKind(BaseKind K) { Kind = K; }
  67     BaseKind getKind() const { return Kind; }
  68     void setExtendType(AArch64_AM::ShiftExtendType E) { ExtType = E; }
  69     AArch64_AM::ShiftExtendType getExtendType() const { return ExtType; }
  70     bool isRegBase() const { return Kind == RegBase; }
  71     bool isFIBase() const { return Kind == FrameIndexBase; }
  72     void setReg(unsigned Reg) {
  73       assert(isRegBase() && "Invalid base register access!");
  74       Base.Reg = Reg;
  75     }
  76     unsigned getReg() const {
  77       assert(isRegBase() && "Invalid base register access!");
  78       return Base.Reg;
  79     }
  80     void setOffsetReg(unsigned Reg) {
  81       assert(isRegBase() && "Invalid offset register access!");
  82       OffsetReg = Reg;
  83     }
  84     unsigned getOffsetReg() const {
  85       assert(isRegBase() && "Invalid offset register access!");
  86       return OffsetReg;
  87     }
  88     void setFI(unsigned FI) {
  89       assert(isFIBase() && "Invalid base frame index  access!");
  90       Base.FI = FI;
  91     }
  92     unsigned getFI() const {
  93       assert(isFIBase() && "Invalid base frame index access!");
  94       return Base.FI;
  95     }
  96     void setOffset(int64_t O) { Offset = O; }
  97     int64_t getOffset() { return Offset; }
  98     void setShift(unsigned S) { Shift = S; }
  99     unsigned getShift() { return Shift; }
 100
 101     void setGlobalValue(const GlobalValue *G) { GV = G; }
 102     const GlobalValue *getGlobalValue() { return GV; }
 103   };
 104
 105   /// Subtarget - Keep a pointer to the AArch64Subtarget around so that we can
 106   /// make the right decision when generating code for different targets.
 107   const AArch64Subtarget *Subtarget;
 108   LLVMContext *Context;
 109
 110   bool FastLowerArguments() override;
 111   bool FastLowerCall(CallLoweringInfo &CLI) override;
 112   bool FastLowerIntrinsicCall(const IntrinsicInst *II) override;
 113
 114 private:
 115   // Selection routines.
 116   bool SelectLoad(const Instruction *I);
 117   bool SelectStore(const Instruction *I);
 118   bool SelectBranch(const Instruction *I);
 119   bool SelectIndirectBr(const Instruction *I);
 120   bool SelectCmp(const Instruction *I);
 121   bool SelectSelect(const Instruction *I);
 122   bool SelectFPExt(const Instruction *I);
 123   bool SelectFPTrunc(const Instruction *I);
 124   bool SelectFPToInt(const Instruction *I, bool Signed);
 125   bool SelectIntToFP(const Instruction *I, bool Signed);
 126   bool SelectRem(const Instruction *I, unsigned ISDOpcode);
 127   bool SelectRet(const Instruction *I);
 128   bool SelectTrunc(const Instruction *I);
 129   bool SelectIntExt(const Instruction *I);
 130   bool SelectMul(const Instruction *I);
 131   bool SelectShift(const Instruction *I);
 132   bool SelectBitCast(const Instruction *I);
 133
 134   // Utility helper routines.
 135   bool isTypeLegal(Type *Ty, MVT &VT);
 136   bool isLoadStoreTypeLegal(Type *Ty, MVT &VT);
 137   bool ComputeAddress(const Value *Obj, Address &Addr, Type *Ty = nullptr);
 138   bool ComputeCallAddress(const Value *V, Address &Addr);
 139   bool SimplifyAddress(Address &Addr, MVT VT);
 140   void AddLoadStoreOperands(Address &Addr, const MachineInstrBuilder &MIB,
 141                             unsigned Flags, unsigned ScaleFactor,
 142                             MachineMemOperand *MMO);
 143   bool IsMemCpySmall(uint64_t Len, unsigned Alignment);
 144   bool TryEmitSmallMemCpy(Address Dest, Address Src, uint64_t Len,
 145                           unsigned Alignment);
 146   bool foldXALUIntrinsic(AArch64CC::CondCode &CC, const Instruction *I,
 147                          const Value *Cond);
 148
 149   // Emit helper routines.
 150   unsigned emitAddsSubs(bool UseAdds, MVT RetVT, const Value *LHS,
 151                         const Value *RHS, bool IsZExt = false,
 152                         bool WantResult = true);
 153   unsigned emitAddsSubs_rr(bool UseAdds, MVT RetVT, unsigned LHSReg,
 154                            bool LHSIsKill, unsigned RHSReg, bool RHSIsKill,
 155                            bool WantResult = true);
 156   unsigned emitAddsSubs_ri(bool UseAdds, MVT RetVT, unsigned LHSReg,
 157                            bool LHSIsKill, uint64_t Imm,
 158                            bool WantResult = true);
 159   unsigned emitAddSub_rs(bool UseAdd, MVT RetVT, unsigned LHSReg,
 160                          bool LHSIsKill, unsigned RHSReg, bool RHSIsKill,
 161                          AArch64_AM::ShiftExtendType ShiftType,
 162                          uint64_t ShiftImm, bool WantResult = true);
 163   unsigned emitAddsSubs_rs(bool UseAdds, MVT RetVT, unsigned LHSReg,
 164                            bool LHSIsKill, unsigned RHSReg, bool RHSIsKill,
 165                            AArch64_AM::ShiftExtendType ShiftType,
 166                            uint64_t ShiftImm, bool WantResult = true);
 167   unsigned emitAddSub_rx(bool UseAdd, MVT RetVT, unsigned LHSReg,
 168                          bool LHSIsKill, unsigned RHSReg, bool RHSIsKill,
 169                           AArch64_AM::ShiftExtendType ExtType,
 170                           uint64_t ShiftImm, bool WantResult = true);
 171
 172   unsigned emitAddsSubs_rx(bool UseAdds, MVT RetVT, unsigned LHSReg,
 173                            bool LHSIsKill, unsigned RHSReg, bool RHSIsKill,
 174                            AArch64_AM::ShiftExtendType ExtType,
 175                            uint64_t ShiftImm, bool WantResult = true);
 176
 177   // Emit functions.
 178   bool emitCmp(const Value *LHS, const Value *RHS, bool IsZExt);
 179   bool emitICmp(MVT RetVT, const Value *LHS, const Value *RHS, bool IsZExt);
 180   bool emitICmp_ri(MVT RetVT, unsigned LHSReg, bool LHSIsKill, uint64_t Imm);
 181   bool emitFCmp(MVT RetVT, const Value *LHS, const Value *RHS);
 182   bool EmitLoad(MVT VT, unsigned &ResultReg, Address Addr,
 183                 MachineMemOperand *MMO = nullptr);
 184   bool EmitStore(MVT VT, unsigned SrcReg, Address Addr,
 185                  MachineMemOperand *MMO = nullptr);
 186   unsigned EmitIntExt(MVT SrcVT, unsigned SrcReg, MVT DestVT, bool isZExt);
 187   unsigned Emiti1Ext(unsigned SrcReg, MVT DestVT, bool isZExt);
 188   unsigned emitAdds(MVT RetVT, const Value *LHS, const Value *RHS,
 189                     bool IsZExt = false, bool WantResult = true);
 190   unsigned emitSubs(MVT RetVT, const Value *LHS, const Value *RHS,
 191                     bool IsZExt = false, bool WantResult = true);
 192   unsigned emitSubs_rr(MVT RetVT, unsigned LHSReg, bool LHSIsKill,
 193                        unsigned RHSReg, bool RHSIsKill, bool WantResult = true);
 194   unsigned emitSubs_rs(MVT RetVT, unsigned LHSReg, bool LHSIsKill,
 195                        unsigned RHSReg, bool RHSIsKill,
 196                        AArch64_AM::ShiftExtendType ShiftType, uint64_t ShiftImm,
 197                        bool WantResult = true);
 198   unsigned emitAND_ri(MVT RetVT, unsigned LHSReg, bool LHSIsKill, uint64_t Imm);
 199   unsigned Emit_MUL_rr(MVT RetVT, unsigned Op0, bool Op0IsKill,
 200                        unsigned Op1, bool Op1IsKill);
 201   unsigned Emit_SMULL_rr(MVT RetVT, unsigned Op0, bool Op0IsKill,
 202                          unsigned Op1, bool Op1IsKill);
 203   unsigned Emit_UMULL_rr(MVT RetVT, unsigned Op0, bool Op0IsKill,
 204                          unsigned Op1, bool Op1IsKill);
 205   unsigned emitLSL_rr(MVT RetVT, unsigned Op0Reg, bool Op0IsKill,
 206                       unsigned Op1Reg, bool Op1IsKill);
 207   unsigned emitLSL_ri(MVT RetVT, MVT SrcVT, unsigned Op0Reg, bool Op0IsKill,
 208                       uint64_t Imm, bool IsZExt = true);
 209   unsigned emitLSR_rr(MVT RetVT, unsigned Op0Reg, bool Op0IsKill,
 210                       unsigned Op1Reg, bool Op1IsKill);
 211   unsigned emitLSR_ri(MVT RetVT, MVT SrcVT, unsigned Op0Reg, bool Op0IsKill,
 212                       uint64_t Imm, bool IsZExt = true);
 213   unsigned emitASR_rr(MVT RetVT, unsigned Op0Reg, bool Op0IsKill,
 214                       unsigned Op1Reg, bool Op1IsKill);
 215   unsigned emitASR_ri(MVT RetVT, MVT SrcVT, unsigned Op0Reg, bool Op0IsKill,
 216                       uint64_t Imm, bool IsZExt = false);
 217
 218   unsigned AArch64MaterializeInt(const ConstantInt *CI, MVT VT);
 219   unsigned AArch64MaterializeFP(const ConstantFP *CFP, MVT VT);
 220   unsigned AArch64MaterializeGV(const GlobalValue *GV);
 221
 222   // Call handling routines.
 223 private:
 224   CCAssignFn *CCAssignFnForCall(CallingConv::ID CC) const;
 225   bool ProcessCallArgs(CallLoweringInfo &CLI, SmallVectorImpl<MVT> &ArgVTs,
 226                        unsigned &NumBytes);
 227   bool FinishCall(CallLoweringInfo &CLI, MVT RetVT, unsigned NumBytes);
 228
 229 public:
 230   // Backend specific FastISel code.
 231   unsigned TargetMaterializeAlloca(const AllocaInst *AI) override;
 232   unsigned TargetMaterializeConstant(const Constant *C) override;
 233   unsigned TargetMaterializeFloatZero(const ConstantFP* CF) override;
 234
 235   explicit AArch64FastISel(FunctionLoweringInfo &funcInfo,
 236                          const TargetLibraryInfo *libInfo)
 237       : FastISel(funcInfo, libInfo) {
 238     Subtarget = &TM.getSubtarget<AArch64Subtarget>();
 239     Context = &funcInfo.Fn->getContext();
 240   }
 241
 242   bool TargetSelectInstruction(const Instruction *I) override;
 243
 244 #include "AArch64GenFastISel.inc"
 245 };
 246
 247 } // end anonymous namespace
 248
 249 #include "AArch64GenCallingConv.inc"
 250
 251 CCAssignFn *AArch64FastISel::CCAssignFnForCall(CallingConv::ID CC) const {
 252   if (CC == CallingConv::WebKit_JS)
 253     return CC_AArch64_WebKit_JS;
 254   return Subtarget->isTargetDarwin() ? CC_AArch64_DarwinPCS : CC_AArch64_AAPCS;
 255 }
 256
 257 unsigned AArch64FastISel::TargetMaterializeAlloca(const AllocaInst *AI) {
 258   assert(TLI.getValueType(AI->getType(), true) == MVT::i64 &&
 259          "Alloca should always return a pointer.");
 260
 261   // Don't handle dynamic allocas.
 262   if (!FuncInfo.StaticAllocaMap.count(AI))
 263     return 0;
 264
 265   DenseMap<const AllocaInst *, int>::iterator SI =
 266       FuncInfo.StaticAllocaMap.find(AI);
 267
 268   if (SI != FuncInfo.StaticAllocaMap.end()) {
 269     unsigned ResultReg = createResultReg(&AArch64::GPR64spRegClass);
 270     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADDXri),
 271             ResultReg)
 272         .addFrameIndex(SI->second)
 273         .addImm(0)
 274         .addImm(0);
 275     return ResultReg;
 276   }
 277
 278   return 0;
 279 }
 280
 281 unsigned AArch64FastISel::AArch64MaterializeInt(const ConstantInt *CI, MVT VT) {
 282   if (VT > MVT::i64)
 283     return 0;
 284
 285   if (!CI->isZero())
 286     return FastEmit_i(VT, VT, ISD::Constant, CI->getZExtValue());
 287
 288   // Create a copy from the zero register to materialize a "0" value.
 289   const TargetRegisterClass *RC = (VT == MVT::i64) ? &AArch64::GPR64RegClass
 290                                                    : &AArch64::GPR32RegClass;
 291   unsigned ZeroReg = (VT == MVT::i64) ? AArch64::XZR : AArch64::WZR;
 292   unsigned ResultReg = createResultReg(RC);
 293   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(TargetOpcode::COPY),
 294           ResultReg).addReg(ZeroReg, getKillRegState(true));
 295   return ResultReg;
 296 }
 297
 298 unsigned AArch64FastISel::AArch64MaterializeFP(const ConstantFP *CFP, MVT VT) {
 299   // Positive zero (+0.0) has to be materialized with a fmov from the zero
 300   // register, because the immediate version of fmov cannot encode zero.
 301   if (CFP->isNullValue())
 302     return TargetMaterializeFloatZero(CFP);
 303
 304   if (VT != MVT::f32 && VT != MVT::f64)
 305     return 0;
 306
 307   const APFloat Val = CFP->getValueAPF();
 308   bool Is64Bit = (VT == MVT::f64);
 309   // This checks to see if we can use FMOV instructions to materialize
 310   // a constant, otherwise we have to materialize via the constant pool.
 311   if (TLI.isFPImmLegal(Val, VT)) {
 312     int Imm =
 313         Is64Bit ? AArch64_AM::getFP64Imm(Val) : AArch64_AM::getFP32Imm(Val);
 314     assert((Imm != -1) && "Cannot encode floating-point constant.");
 315     unsigned Opc = Is64Bit ? AArch64::FMOVDi : AArch64::FMOVSi;
 316     return FastEmitInst_i(Opc, TLI.getRegClassFor(VT), Imm);
 317   }
 318
 319   // Materialize via constant pool.  MachineConstantPool wants an explicit
 320   // alignment.
 321   unsigned Align = DL.getPrefTypeAlignment(CFP->getType());
 322   if (Align == 0)
 323     Align = DL.getTypeAllocSize(CFP->getType());
 324
 325   unsigned CPI = MCP.getConstantPoolIndex(cast<Constant>(CFP), Align);
 326   unsigned ADRPReg = createResultReg(&AArch64::GPR64commonRegClass);
 327   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADRP),
 328           ADRPReg).addConstantPoolIndex(CPI, 0, AArch64II::MO_PAGE);
 329
 330   unsigned Opc = Is64Bit ? AArch64::LDRDui : AArch64::LDRSui;
 331   unsigned ResultReg = createResultReg(TLI.getRegClassFor(VT));
 332   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg)
 333       .addReg(ADRPReg)
 334       .addConstantPoolIndex(CPI, 0, AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
 335   return ResultReg;
 336 }
 337
 338 unsigned AArch64FastISel::AArch64MaterializeGV(const GlobalValue *GV) {
 339   // We can't handle thread-local variables quickly yet.
 340   if (GV->isThreadLocal())
 341     return 0;
 342
 343   // MachO still uses GOT for large code-model accesses, but ELF requires
 344   // movz/movk sequences, which FastISel doesn't handle yet.
 345   if (TM.getCodeModel() != CodeModel::Small && !Subtarget->isTargetMachO())
 346     return 0;
 347
 348   unsigned char OpFlags = Subtarget->ClassifyGlobalReference(GV, TM);
 349
 350   EVT DestEVT = TLI.getValueType(GV->getType(), true);
 351   if (!DestEVT.isSimple())
 352     return 0;
 353
 354   unsigned ADRPReg = createResultReg(&AArch64::GPR64commonRegClass);
 355   unsigned ResultReg;
 356
 357   if (OpFlags & AArch64II::MO_GOT) {
 358     // ADRP + LDRX
 359     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADRP),
 360             ADRPReg)
 361       .addGlobalAddress(GV, 0, AArch64II::MO_GOT | AArch64II::MO_PAGE);
 362
 363     ResultReg = createResultReg(&AArch64::GPR64RegClass);
 364     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::LDRXui),
 365             ResultReg)
 366       .addReg(ADRPReg)
 367       .addGlobalAddress(GV, 0, AArch64II::MO_GOT | AArch64II::MO_PAGEOFF |
 368                         AArch64II::MO_NC);
 369   } else {
 370     // ADRP + ADDX
 371     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADRP),
 372             ADRPReg)
 373       .addGlobalAddress(GV, 0, AArch64II::MO_PAGE);
 374
 375     ResultReg = createResultReg(&AArch64::GPR64spRegClass);
 376     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADDXri),
 377             ResultReg)
 378       .addReg(ADRPReg)
 379       .addGlobalAddress(GV, 0, AArch64II::MO_PAGEOFF | AArch64II::MO_NC)
 380       .addImm(0);
 381   }
 382   return ResultReg;
 383 }
 384
 385 unsigned AArch64FastISel::TargetMaterializeConstant(const Constant *C) {
 386   EVT CEVT = TLI.getValueType(C->getType(), true);
 387
 388   // Only handle simple types.
 389   if (!CEVT.isSimple())
 390     return 0;
 391   MVT VT = CEVT.getSimpleVT();
 392
 393   if (const auto *CI = dyn_cast<ConstantInt>(C))
 394     return AArch64MaterializeInt(CI, VT);
 395   else if (const ConstantFP *CFP = dyn_cast<ConstantFP>(C))
 396     return AArch64MaterializeFP(CFP, VT);
 397   else if (const GlobalValue *GV = dyn_cast<GlobalValue>(C))
 398     return AArch64MaterializeGV(GV);
 399
 400   return 0;
 401 }
 402
 403 unsigned AArch64FastISel::TargetMaterializeFloatZero(const ConstantFP* CFP) {
 404   assert(CFP->isNullValue() &&
 405          "Floating-point constant is not a positive zero.");
 406   MVT VT;
 407   if (!isTypeLegal(CFP->getType(), VT))
 408     return 0;
 409
 410   if (VT != MVT::f32 && VT != MVT::f64)
 411     return 0;
 412
 413   bool Is64Bit = (VT == MVT::f64);
 414   unsigned ZReg = Is64Bit ? AArch64::XZR : AArch64::WZR;
 415   unsigned Opc = Is64Bit ? AArch64::FMOVXDr : AArch64::FMOVWSr;
 416   return FastEmitInst_r(Opc, TLI.getRegClassFor(VT), ZReg, /*IsKill=*/true);
 417 }
 418
 419 // Computes the address to get to an object.
 420 bool AArch64FastISel::ComputeAddress(const Value *Obj, Address &Addr, Type *Ty)
 421 {
 422   const User *U = nullptr;
 423   unsigned Opcode = Instruction::UserOp1;
 424   if (const Instruction *I = dyn_cast<Instruction>(Obj)) {
 425     // Don't walk into other basic blocks unless the object is an alloca from
 426     // another block, otherwise it may not have a virtual register assigned.
 427     if (FuncInfo.StaticAllocaMap.count(static_cast<const AllocaInst *>(Obj)) ||
 428         FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB) {
 429       Opcode = I->getOpcode();
 430       U = I;
 431     }
 432   } else if (const ConstantExpr *C = dyn_cast<ConstantExpr>(Obj)) {
 433     Opcode = C->getOpcode();
 434     U = C;
 435   }
 436
 437   if (const PointerType *Ty = dyn_cast<PointerType>(Obj->getType()))
 438     if (Ty->getAddressSpace() > 255)
 439       // Fast instruction selection doesn't support the special
 440       // address spaces.
 441       return false;
 442
 443   switch (Opcode) {
 444   default:
 445     break;
 446   case Instruction::BitCast: {
 447     // Look through bitcasts.
 448     return ComputeAddress(U->getOperand(0), Addr, Ty);
 449   }
 450   case Instruction::IntToPtr: {
 451     // Look past no-op inttoptrs.
 452     if (TLI.getValueType(U->getOperand(0)->getType()) == TLI.getPointerTy())
 453       return ComputeAddress(U->getOperand(0), Addr, Ty);
 454     break;
 455   }
 456   case Instruction::PtrToInt: {
 457     // Look past no-op ptrtoints.
 458     if (TLI.getValueType(U->getType()) == TLI.getPointerTy())
 459       return ComputeAddress(U->getOperand(0), Addr, Ty);
 460     break;
 461   }
 462   case Instruction::GetElementPtr: {
 463     Address SavedAddr = Addr;
 464     uint64_t TmpOffset = Addr.getOffset();
 465
 466     // Iterate through the GEP folding the constants into offsets where
 467     // we can.
 468     gep_type_iterator GTI = gep_type_begin(U);
 469     for (User::const_op_iterator i = U->op_begin() + 1, e = U->op_end(); i != e;
 470          ++i, ++GTI) {
 471       const Value *Op = *i;
 472       if (StructType *STy = dyn_cast<StructType>(*GTI)) {
 473         const StructLayout *SL = DL.getStructLayout(STy);
 474         unsigned Idx = cast<ConstantInt>(Op)->getZExtValue();
 475         TmpOffset += SL->getElementOffset(Idx);
 476       } else {
 477         uint64_t S = DL.getTypeAllocSize(GTI.getIndexedType());
 478         for (;;) {
 479           if (const ConstantInt *CI = dyn_cast<ConstantInt>(Op)) {
 480             // Constant-offset addressing.
 481             TmpOffset += CI->getSExtValue() * S;
 482             break;
 483           }
 484           if (canFoldAddIntoGEP(U, Op)) {
 485             // A compatible add with a constant operand. Fold the constant.
 486             ConstantInt *CI =
 487                 cast<ConstantInt>(cast<AddOperator>(Op)->getOperand(1));
 488             TmpOffset += CI->getSExtValue() * S;
 489             // Iterate on the other operand.
 490             Op = cast<AddOperator>(Op)->getOperand(0);
 491             continue;
 492           }
 493           // Unsupported
 494           goto unsupported_gep;
 495         }
 496       }
 497     }
 498
 499     // Try to grab the base operand now.
 500     Addr.setOffset(TmpOffset);
 501     if (ComputeAddress(U->getOperand(0), Addr, Ty))
 502       return true;
 503
 504     // We failed, restore everything and try the other options.
 505     Addr = SavedAddr;
 506
 507   unsupported_gep:
 508     break;
 509   }
 510   case Instruction::Alloca: {
 511     const AllocaInst *AI = cast<AllocaInst>(Obj);
 512     DenseMap<const AllocaInst *, int>::iterator SI =
 513         FuncInfo.StaticAllocaMap.find(AI);
 514     if (SI != FuncInfo.StaticAllocaMap.end()) {
 515       Addr.setKind(Address::FrameIndexBase);
 516       Addr.setFI(SI->second);
 517       return true;
 518     }
 519     break;
 520   }
 521   case Instruction::Add: {
 522     // Adds of constants are common and easy enough.
 523     const Value *LHS = U->getOperand(0);
 524     const Value *RHS = U->getOperand(1);
 525
 526     if (isa<ConstantInt>(LHS))
 527       std::swap(LHS, RHS);
 528
 529     if (const ConstantInt *CI = dyn_cast<ConstantInt>(RHS)) {
 530       Addr.setOffset(Addr.getOffset() + (uint64_t)CI->getSExtValue());
 531       return ComputeAddress(LHS, Addr, Ty);
 532     }
 533
 534     Address Backup = Addr;
 535     if (ComputeAddress(LHS, Addr, Ty) && ComputeAddress(RHS, Addr, Ty))
 536       return true;
 537     Addr = Backup;
 538
 539     break;
 540   }
 541   case Instruction::Shl:
 542     if (Addr.getOffsetReg())
 543       break;
 544
 545     if (const auto *CI = dyn_cast<ConstantInt>(U->getOperand(1))) {
 546       unsigned Val = CI->getZExtValue();
 547       if (Val < 1 || Val > 3)
 548         break;
 549
 550       uint64_t NumBytes = 0;
 551       if (Ty && Ty->isSized()) {
 552         uint64_t NumBits = DL.getTypeSizeInBits(Ty);
 553         NumBytes = NumBits / 8;
 554         if (!isPowerOf2_64(NumBits))
 555           NumBytes = 0;
 556       }
 557
 558       if (NumBytes != (1ULL << Val))
 559         break;
 560
 561       Addr.setShift(Val);
 562       Addr.setExtendType(AArch64_AM::LSL);
 563
 564       if (const auto *I = dyn_cast<Instruction>(U->getOperand(0)))
 565         if (FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB)
 566           U = I;
 567
 568       if (const auto *ZE = dyn_cast<ZExtInst>(U))
 569         if (ZE->getOperand(0)->getType()->isIntegerTy(32))
 570           Addr.setExtendType(AArch64_AM::UXTW);
 571
 572       if (const auto *SE = dyn_cast<SExtInst>(U))
 573         if (SE->getOperand(0)->getType()->isIntegerTy(32))
 574           Addr.setExtendType(AArch64_AM::SXTW);
 575
 576       unsigned Reg = getRegForValue(U->getOperand(0));
 577       if (!Reg)
 578         return false;
 579       Addr.setOffsetReg(Reg);
 580       return true;
 581     }
 582     break;
 583   }
 584
 585   if (Addr.getReg()) {
 586     if (!Addr.getOffsetReg()) {
 587       unsigned Reg = getRegForValue(Obj);
 588       if (!Reg)
 589         return false;
 590       Addr.setOffsetReg(Reg);
 591       return true;
 592     }
 593     return false;
 594   }
 595
 596   unsigned Reg = getRegForValue(Obj);
 597   if (!Reg)
 598     return false;
 599   Addr.setReg(Reg);
 600   return true;
 601 }
 602
 603 bool AArch64FastISel::ComputeCallAddress(const Value *V, Address &Addr) {
 604   const User *U = nullptr;
 605   unsigned Opcode = Instruction::UserOp1;
 606   bool InMBB = true;
 607
 608   if (const auto *I = dyn_cast<Instruction>(V)) {
 609     Opcode = I->getOpcode();
 610     U = I;
 611     InMBB = I->getParent() == FuncInfo.MBB->getBasicBlock();
 612   } else if (const auto *C = dyn_cast<ConstantExpr>(V)) {
 613     Opcode = C->getOpcode();
 614     U = C;
 615   }
 616
 617   switch (Opcode) {
 618   default: break;
 619   case Instruction::BitCast:
 620     // Look past bitcasts if its operand is in the same BB.
 621     if (InMBB)
 622       return ComputeCallAddress(U->getOperand(0), Addr);
 623     break;
 624   case Instruction::IntToPtr:
 625     // Look past no-op inttoptrs if its operand is in the same BB.
 626     if (InMBB &&
 627         TLI.getValueType(U->getOperand(0)->getType()) == TLI.getPointerTy())
 628       return ComputeCallAddress(U->getOperand(0), Addr);
 629     break;
 630   case Instruction::PtrToInt:
 631     // Look past no-op ptrtoints if its operand is in the same BB.
 632     if (InMBB &&
 633         TLI.getValueType(U->getType()) == TLI.getPointerTy())
 634       return ComputeCallAddress(U->getOperand(0), Addr);
 635     break;
 636   }
 637
 638   if (const GlobalValue *GV = dyn_cast<GlobalValue>(V)) {
 639     Addr.setGlobalValue(GV);
 640     return true;
 641   }
 642
 643   // If all else fails, try to materialize the value in a register.
 644   if (!Addr.getGlobalValue()) {
 645     Addr.setReg(getRegForValue(V));
 646     return Addr.getReg() != 0;
 647   }
 648
 649   return false;
 650 }
 651
 652
 653 bool AArch64FastISel::isTypeLegal(Type *Ty, MVT &VT) {
 654   EVT evt = TLI.getValueType(Ty, true);
 655
 656   // Only handle simple types.
 657   if (evt == MVT::Other || !evt.isSimple())
 658     return false;
 659   VT = evt.getSimpleVT();
 660
 661   // This is a legal type, but it's not something we handle in fast-isel.
 662   if (VT == MVT::f128)
 663     return false;
 664
 665   // Handle all other legal types, i.e. a register that will directly hold this
 666   // value.
 667   return TLI.isTypeLegal(VT);
 668 }
 669
 670 bool AArch64FastISel::isLoadStoreTypeLegal(Type *Ty, MVT &VT) {
 671   if (isTypeLegal(Ty, VT))
 672     return true;
 673
 674   // If this is a type than can be sign or zero-extended to a basic operation
 675   // go ahead and accept it now. For stores, this reflects truncation.
 676   if (VT == MVT::i1 || VT == MVT::i8 || VT == MVT::i16)
 677     return true;
 678
 679   return false;
 680 }
 681
 682 bool AArch64FastISel::SimplifyAddress(Address &Addr, MVT VT) {
 683   unsigned ScaleFactor;
 684   switch (VT.SimpleTy) {
 685   default: return false;
 686   case MVT::i1:  // fall-through
 687   case MVT::i8:  ScaleFactor = 1; break;
 688   case MVT::i16: ScaleFactor = 2; break;
 689   case MVT::i32: // fall-through
 690   case MVT::f32: ScaleFactor = 4; break;
 691   case MVT::i64: // fall-through
 692   case MVT::f64: ScaleFactor = 8; break;
 693   }
 694
 695   bool ImmediateOffsetNeedsLowering = false;
 696   bool RegisterOffsetNeedsLowering = false;
 697   int64_t Offset = Addr.getOffset();
 698   if (((Offset < 0) || (Offset & (ScaleFactor - 1))) && !isInt<9>(Offset))
 699     ImmediateOffsetNeedsLowering = true;
 700   else if (Offset > 0 && !(Offset & (ScaleFactor - 1)) &&
 701            !isUInt<12>(Offset / ScaleFactor))
 702     ImmediateOffsetNeedsLowering = true;
 703
 704   // Cannot encode an offset register and an immediate offset in the same
 705   // instruction. Fold the immediate offset into the load/store instruction and
 706   // emit an additonal add to take care of the offset register.
 707   if (!ImmediateOffsetNeedsLowering && Addr.getOffset() && Addr.isRegBase() &&
 708       Addr.getOffsetReg())
 709     RegisterOffsetNeedsLowering = true;
 710
 711   // Cannot encode zero register as base.
 712   if (Addr.isRegBase() && Addr.getOffsetReg() && !Addr.getReg())
 713     RegisterOffsetNeedsLowering = true;
 714
 715   // If this is a stack pointer and the offset needs to be simplified then put
 716   // the alloca address into a register, set the base type back to register and
 717   // continue. This should almost never happen.
 718   if (ImmediateOffsetNeedsLowering && Addr.isFIBase()) {
 719     unsigned ResultReg = createResultReg(&AArch64::GPR64spRegClass);
 720     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADDXri),
 721             ResultReg)
 722       .addFrameIndex(Addr.getFI())
 723       .addImm(0)
 724       .addImm(0);
 725     Addr.setKind(Address::RegBase);
 726     Addr.setReg(ResultReg);
 727   }
 728
 729   if (RegisterOffsetNeedsLowering) {
 730     unsigned ResultReg = 0;
 731     if (Addr.getReg()) {
 732       if (Addr.getExtendType() == AArch64_AM::SXTW ||
 733           Addr.getExtendType() == AArch64_AM::UXTW   )
 734         ResultReg = emitAddSub_rx(/*UseAdd=*/true, MVT::i64, Addr.getReg(),
 735                                   /*TODO:IsKill=*/false, Addr.getOffsetReg(),
 736                                   /*TODO:IsKill=*/false, Addr.getExtendType(),
 737                                   Addr.getShift());
 738       else
 739         ResultReg = emitAddSub_rs(/*UseAdd=*/true, MVT::i64, Addr.getReg(),
 740                                   /*TODO:IsKill=*/false, Addr.getOffsetReg(),
 741                                   /*TODO:IsKill=*/false, AArch64_AM::LSL,
 742                                   Addr.getShift());
 743     } else {
 744       if (Addr.getExtendType() == AArch64_AM::UXTW)
 745         ResultReg = emitLSL_ri(MVT::i64, MVT::i32, Addr.getOffsetReg(),
 746                                /*Op0IsKill=*/false, Addr.getShift(),
 747                                /*IsZExt=*/true);
 748       else if (Addr.getExtendType() == AArch64_AM::SXTW)
 749         ResultReg = emitLSL_ri(MVT::i64, MVT::i32, Addr.getOffsetReg(),
 750                                /*Op0IsKill=*/false, Addr.getShift(),
 751                                /*IsZExt=*/false);
 752       else
 753         ResultReg = emitLSL_ri(MVT::i64, MVT::i64, Addr.getOffsetReg(),
 754                                /*Op0IsKill=*/false, Addr.getShift());
 755     }
 756     if (!ResultReg)
 757       return false;
 758
 759     Addr.setReg(ResultReg);
 760     Addr.setOffsetReg(0);
 761     Addr.setShift(0);
 762     Addr.setExtendType(AArch64_AM::InvalidShiftExtend);
 763   }
 764
 765   // Since the offset is too large for the load/store instruction get the
 766   // reg+offset into a register.
 767   if (ImmediateOffsetNeedsLowering) {
 768     unsigned ResultReg = 0;
 769     if (Addr.getReg())
 770       ResultReg = FastEmit_ri_(MVT::i64, ISD::ADD, Addr.getReg(),
 771                                /*IsKill=*/false, Offset, MVT::i64);
 772     else
 773       ResultReg = FastEmit_i(MVT::i64, MVT::i64, ISD::Constant, Offset);
 774
 775     if (!ResultReg)
 776       return false;
 777     Addr.setReg(ResultReg);
 778     Addr.setOffset(0);
 779   }
 780   return true;
 781 }
 782
 783 void AArch64FastISel::AddLoadStoreOperands(Address &Addr,
 784                                            const MachineInstrBuilder &MIB,
 785                                            unsigned Flags,
 786                                            unsigned ScaleFactor,
 787                                            MachineMemOperand *MMO) {
 788   int64_t Offset = Addr.getOffset() / ScaleFactor;
 789   // Frame base works a bit differently. Handle it separately.
 790   if (Addr.isFIBase()) {
 791     int FI = Addr.getFI();
 792     // FIXME: We shouldn't be using getObjectSize/getObjectAlignment.  The size
 793     // and alignment should be based on the VT.
 794     MMO = FuncInfo.MF->getMachineMemOperand(
 795       MachinePointerInfo::getFixedStack(FI, Offset), Flags,
 796       MFI.getObjectSize(FI), MFI.getObjectAlignment(FI));
 797     // Now add the rest of the operands.
 798     MIB.addFrameIndex(FI).addImm(Offset);
 799   } else {
 800     assert(Addr.isRegBase() && "Unexpected address kind.");
 801     const MCInstrDesc &II = MIB->getDesc();
 802     unsigned Idx = (Flags & MachineMemOperand::MOStore) ? 1 : 0;
 803     Addr.setReg(
 804       constrainOperandRegClass(II, Addr.getReg(), II.getNumDefs()+Idx));
 805     Addr.setOffsetReg(
 806       constrainOperandRegClass(II, Addr.getOffsetReg(), II.getNumDefs()+Idx+1));
 807     if (Addr.getOffsetReg()) {
 808       assert(Addr.getOffset() == 0 && "Unexpected offset");
 809       bool IsSigned = Addr.getExtendType() == AArch64_AM::SXTW ||
 810                       Addr.getExtendType() == AArch64_AM::SXTX;
 811       MIB.addReg(Addr.getReg());
 812       MIB.addReg(Addr.getOffsetReg());
 813       MIB.addImm(IsSigned);
 814       MIB.addImm(Addr.getShift() != 0);
 815     } else {
 816       MIB.addReg(Addr.getReg());
 817       MIB.addImm(Offset);
 818     }
 819   }
 820
 821   if (MMO)
 822     MIB.addMemOperand(MMO);
 823 }
 824
 825 unsigned AArch64FastISel::emitAddsSubs(bool UseAdds, MVT RetVT,
 826                                        const Value *LHS, const Value *RHS,
 827                                        bool IsZExt, bool WantResult) {
 828   AArch64_AM::ShiftExtendType ExtendType = AArch64_AM::InvalidShiftExtend;
 829   bool NeedExtend = false;
 830   switch (RetVT.SimpleTy) {
 831   default:
 832     return 0;
 833   case MVT::i1:
 834     NeedExtend = true;
 835     break;
 836   case MVT::i8:
 837     NeedExtend = true;
 838     ExtendType = IsZExt ? AArch64_AM::UXTB : AArch64_AM::SXTB;
 839     break;
 840   case MVT::i16:
 841     NeedExtend = true;
 842     ExtendType = IsZExt ? AArch64_AM::UXTH : AArch64_AM::SXTH;
 843     break;
 844   case MVT::i32:  // fall-through
 845   case MVT::i64:
 846     break;
 847   }
 848   MVT SrcVT = RetVT;
 849   RetVT.SimpleTy = std::max(RetVT.SimpleTy, MVT::i32);
 850
 851   // Canonicalize immediates to the RHS first.
 852   if (UseAdds && isa<ConstantInt>(LHS) && !isa<ConstantInt>(RHS))
 853     std::swap(LHS, RHS);
 854
 855   // Canonicalize shift immediate to the RHS.
 856   if (UseAdds)
 857     if (const auto *SI = dyn_cast<BinaryOperator>(LHS))
 858       if (isa<ConstantInt>(SI->getOperand(1)))
 859         if (SI->getOpcode() == Instruction::Shl  ||
 860             SI->getOpcode() == Instruction::LShr ||
 861             SI->getOpcode() == Instruction::AShr   )
 862           std::swap(LHS, RHS);
 863
 864   unsigned LHSReg = getRegForValue(LHS);
 865   if (!LHSReg)
 866     return 0;
 867   bool LHSIsKill = hasTrivialKill(LHS);
 868
 869   if (NeedExtend)
 870     LHSReg = EmitIntExt(SrcVT, LHSReg, RetVT, IsZExt);
 871
 872   unsigned ResultReg = 0;
 873   if (const auto *C = dyn_cast<ConstantInt>(RHS)) {
 874     uint64_t Imm = IsZExt ? C->getZExtValue() : C->getSExtValue();
 875     if (C->isNegative())
 876       ResultReg =
 877           emitAddsSubs_ri(!UseAdds, RetVT, LHSReg, LHSIsKill, -Imm, WantResult);
 878     else
 879       ResultReg =
 880           emitAddsSubs_ri(UseAdds, RetVT, LHSReg, LHSIsKill, Imm, WantResult);
 881   }
 882   if (ResultReg)
 883     return ResultReg;
 884
 885   // Only extend the RHS within the instruction if there is a valid extend type.
 886   if (ExtendType != AArch64_AM::InvalidShiftExtend) {
 887     if (const auto *SI = dyn_cast<BinaryOperator>(RHS))
 888       if (const auto *C = dyn_cast<ConstantInt>(SI->getOperand(1)))
 889         if ((SI->getOpcode() == Instruction::Shl) && (C->getZExtValue() < 4)) {
 890           unsigned RHSReg = getRegForValue(SI->getOperand(0));
 891           if (!RHSReg)
 892             return 0;
 893           bool RHSIsKill = hasTrivialKill(SI->getOperand(0));
 894           return emitAddsSubs_rx(UseAdds, RetVT, LHSReg, LHSIsKill, RHSReg,
 895                                  RHSIsKill, ExtendType, C->getZExtValue(),
 896                                  WantResult);
 897         }
 898     unsigned RHSReg = getRegForValue(RHS);
 899     if (!RHSReg)
 900       return 0;
 901     bool RHSIsKill = hasTrivialKill(RHS);
 902     return emitAddsSubs_rx(UseAdds, RetVT, LHSReg, LHSIsKill, RHSReg, RHSIsKill,
 903                            ExtendType, 0, WantResult);
 904   }
 905
 906   // Check if the shift can be folded into the instruction.
 907   if (const auto *SI = dyn_cast<BinaryOperator>(RHS)) {
 908     if (const auto *C = dyn_cast<ConstantInt>(SI->getOperand(1))) {
 909       AArch64_AM::ShiftExtendType ShiftType = AArch64_AM::InvalidShiftExtend;
 910       switch (SI->getOpcode()) {
 911       default: break;
 912       case Instruction::Shl:  ShiftType = AArch64_AM::LSL; break;
 913       case Instruction::LShr: ShiftType = AArch64_AM::LSR; break;
 914       case Instruction::AShr: ShiftType = AArch64_AM::ASR; break;
 915       }
 916       uint64_t ShiftVal = C->getZExtValue();
 917       if (ShiftType != AArch64_AM::InvalidShiftExtend) {
 918         unsigned RHSReg = getRegForValue(SI->getOperand(0));
 919         if (!RHSReg)
 920           return 0;
 921         bool RHSIsKill = hasTrivialKill(SI->getOperand(0));
 922         return emitAddsSubs_rs(UseAdds, RetVT, LHSReg, LHSIsKill, RHSReg,
 923                                RHSIsKill, ShiftType, ShiftVal, WantResult);
 924       }
 925     }
 926   }
 927
 928   unsigned RHSReg = getRegForValue(RHS);
 929   if (!RHSReg)
 930     return 0;
 931   bool RHSIsKill = hasTrivialKill(RHS);
 932
 933   if (NeedExtend)
 934     RHSReg = EmitIntExt(SrcVT, RHSReg, RetVT, IsZExt);
 935
 936   return emitAddsSubs_rr(UseAdds, RetVT, LHSReg, LHSIsKill, RHSReg, RHSIsKill,
 937                          WantResult);
 938 }
 939
 940 unsigned AArch64FastISel::emitAddsSubs_rr(bool UseAdds, MVT RetVT,
 941                                           unsigned LHSReg, bool LHSIsKill,
 942                                           unsigned RHSReg, bool RHSIsKill,
 943                                           bool WantResult) {
 944   assert(LHSReg && RHSReg && "Invalid register number.");
 945
 946   if (RetVT != MVT::i32 && RetVT != MVT::i64)
 947     return 0;
 948
 949   static const unsigned OpcTable[2][2] = {
 950     { AArch64::ADDSWrr, AArch64::ADDSXrr },
 951     { AArch64::SUBSWrr, AArch64::SUBSXrr }
 952   };
 953   unsigned Opc = OpcTable[!UseAdds][(RetVT == MVT::i64)];
 954   unsigned ResultReg;
 955   if (WantResult) {
 956     const TargetRegisterClass *RC =
 957         (RetVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
 958     ResultReg = createResultReg(RC);
 959   } else
 960     ResultReg = (RetVT == MVT::i64) ? AArch64::XZR : AArch64::WZR;
 961
 962   const MCInstrDesc &II = TII.get(Opc);
 963   LHSReg = constrainOperandRegClass(II, LHSReg, II.getNumDefs());
 964   RHSReg = constrainOperandRegClass(II, RHSReg, II.getNumDefs() + 1);
 965   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg)
 966       .addReg(LHSReg, getKillRegState(LHSIsKill))
 967       .addReg(RHSReg, getKillRegState(RHSIsKill));
 968
 969   return ResultReg;
 970 }
 971
 972 unsigned AArch64FastISel::emitAddsSubs_ri(bool UseAdds, MVT RetVT,
 973                                           unsigned LHSReg, bool LHSIsKill,
 974                                           uint64_t Imm, bool WantResult) {
 975   assert(LHSReg && "Invalid register number.");
 976
 977   if (RetVT != MVT::i32 && RetVT != MVT::i64)
 978     return 0;
 979
 980   unsigned ShiftImm;
 981   if (isUInt<12>(Imm))
 982     ShiftImm = 0;
 983   else if ((Imm & 0xfff000) == Imm) {
 984     ShiftImm = 12;
 985     Imm >>= 12;
 986   } else
 987     return 0;
 988
 989   static const unsigned OpcTable[2][2] = {
 990     { AArch64::ADDSWri, AArch64::ADDSXri },
 991     { AArch64::SUBSWri, AArch64::SUBSXri }
 992   };
 993   unsigned Opc = OpcTable[!UseAdds][(RetVT == MVT::i64)];
 994   unsigned ResultReg;
 995   if (WantResult) {
 996     const TargetRegisterClass *RC =
 997         (RetVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
 998     ResultReg = createResultReg(RC);
 999   } else
1000     ResultReg = (RetVT == MVT::i64) ? AArch64::XZR : AArch64::WZR;
1001
1002   const MCInstrDesc &II = TII.get(Opc);
1003   LHSReg = constrainOperandRegClass(II, LHSReg, II.getNumDefs());
1004   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg)
1005       .addReg(LHSReg, getKillRegState(LHSIsKill))
1006       .addImm(Imm)
1007       .addImm(getShifterImm(AArch64_AM::LSL, ShiftImm));
1008
1009   return ResultReg;
1010 }
1011
1012 unsigned AArch64FastISel::emitAddSub_rs(bool UseAdd, MVT RetVT,
1013                                         unsigned LHSReg, bool LHSIsKill,
1014                                         unsigned RHSReg, bool RHSIsKill,
1015                                         AArch64_AM::ShiftExtendType ShiftType,
1016                                         uint64_t ShiftImm, bool WantResult) {
1017   assert(LHSReg && RHSReg && "Invalid register number.");
1018
1019   if (RetVT != MVT::i32 && RetVT != MVT::i64)
1020     return 0;
1021
1022   static const unsigned OpcTable[2][2] = {
1023     { AArch64::ADDWrs, AArch64::ADDXrs },
1024     { AArch64::SUBWrs, AArch64::SUBXrs }
1025   };
1026   unsigned Opc = OpcTable[!UseAdd][(RetVT == MVT::i64)];
1027   unsigned ResultReg;
1028   if (WantResult) {
1029     const TargetRegisterClass *RC =
1030         (RetVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
1031     ResultReg = createResultReg(RC);
1032   } else
1033     ResultReg = (RetVT == MVT::i64) ? AArch64::XZR : AArch64::WZR;
1034
1035   const MCInstrDesc &II = TII.get(Opc);
1036   LHSReg = constrainOperandRegClass(II, LHSReg, II.getNumDefs());
1037   RHSReg = constrainOperandRegClass(II, RHSReg, II.getNumDefs() + 1);
1038   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg)
1039       .addReg(LHSReg, getKillRegState(LHSIsKill))
1040       .addReg(RHSReg, getKillRegState(RHSIsKill))
1041       .addImm(getShifterImm(ShiftType, ShiftImm));
1042
1043   return ResultReg;
1044 }
1045
1046 unsigned AArch64FastISel::emitAddsSubs_rs(bool UseAdds, MVT RetVT,
1047                                           unsigned LHSReg, bool LHSIsKill,
1048                                           unsigned RHSReg, bool RHSIsKill,
1049                                           AArch64_AM::ShiftExtendType ShiftType,
1050                                           uint64_t ShiftImm, bool WantResult) {
1051   assert(LHSReg && RHSReg && "Invalid register number.");
1052
1053   if (RetVT != MVT::i32 && RetVT != MVT::i64)
1054     return 0;
1055
1056   static const unsigned OpcTable[2][2] = {
1057     { AArch64::ADDSWrs, AArch64::ADDSXrs },
1058     { AArch64::SUBSWrs, AArch64::SUBSXrs }
1059   };
1060   unsigned Opc = OpcTable[!UseAdds][(RetVT == MVT::i64)];
1061   unsigned ResultReg;
1062   if (WantResult) {
1063     const TargetRegisterClass *RC =
1064         (RetVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
1065     ResultReg = createResultReg(RC);
1066   } else
1067     ResultReg = (RetVT == MVT::i64) ? AArch64::XZR : AArch64::WZR;
1068
1069   const MCInstrDesc &II = TII.get(Opc);
1070   LHSReg = constrainOperandRegClass(II, LHSReg, II.getNumDefs());
1071   RHSReg = constrainOperandRegClass(II, RHSReg, II.getNumDefs() + 1);
1072   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg)
1073       .addReg(LHSReg, getKillRegState(LHSIsKill))
1074       .addReg(RHSReg, getKillRegState(RHSIsKill))
1075       .addImm(getShifterImm(ShiftType, ShiftImm));
1076
1077   return ResultReg;
1078 }
1079
1080 unsigned AArch64FastISel::emitAddSub_rx(bool UseAdd, MVT RetVT,
1081                                         unsigned LHSReg, bool LHSIsKill,
1082                                         unsigned RHSReg, bool RHSIsKill,
1083                                         AArch64_AM::ShiftExtendType ExtType,
1084                                         uint64_t ShiftImm, bool WantResult) {
1085   assert(LHSReg && RHSReg && "Invalid register number.");
1086
1087   if (RetVT != MVT::i32 && RetVT != MVT::i64)
1088     return 0;
1089
1090   static const unsigned OpcTable[2][2] = {
1091     { AArch64::ADDWrx, AArch64::ADDXrx },
1092     { AArch64::SUBWrx, AArch64::SUBXrx }
1093   };
1094   unsigned Opc = OpcTable[!UseAdd][(RetVT == MVT::i64)];
1095   unsigned ResultReg;
1096   if (WantResult) {
1097     const TargetRegisterClass *RC =
1098         (RetVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
1099     ResultReg = createResultReg(RC);
1100   } else
1101     ResultReg = (RetVT == MVT::i64) ? AArch64::XZR : AArch64::WZR;
1102
1103   const MCInstrDesc &II = TII.get(Opc);
1104   LHSReg = constrainOperandRegClass(II, LHSReg, II.getNumDefs());
1105   RHSReg = constrainOperandRegClass(II, RHSReg, II.getNumDefs() + 1);
1106   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg)
1107       .addReg(LHSReg, getKillRegState(LHSIsKill))
1108       .addReg(RHSReg, getKillRegState(RHSIsKill))
1109       .addImm(getArithExtendImm(ExtType, ShiftImm));
1110
1111   return ResultReg;
1112 }
1113
1114 unsigned AArch64FastISel::emitAddsSubs_rx(bool UseAdds, MVT RetVT,
1115                                           unsigned LHSReg, bool LHSIsKill,
1116                                           unsigned RHSReg, bool RHSIsKill,
1117                                           AArch64_AM::ShiftExtendType ExtType,
1118                                           uint64_t ShiftImm, bool WantResult) {
1119   assert(LHSReg && RHSReg && "Invalid register number.");
1120
1121   if (RetVT != MVT::i32 && RetVT != MVT::i64)
1122     return 0;
1123
1124   static const unsigned OpcTable[2][2] = {
1125     { AArch64::ADDSWrx, AArch64::ADDSXrx },
1126     { AArch64::SUBSWrx, AArch64::SUBSXrx }
1127   };
1128   unsigned Opc = OpcTable[!UseAdds][(RetVT == MVT::i64)];
1129   unsigned ResultReg;
1130   if (WantResult) {
1131     const TargetRegisterClass *RC =
1132         (RetVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
1133     ResultReg = createResultReg(RC);
1134   } else
1135     ResultReg = (RetVT == MVT::i64) ? AArch64::XZR : AArch64::WZR;
1136
1137   const MCInstrDesc &II = TII.get(Opc);
1138   LHSReg = constrainOperandRegClass(II, LHSReg, II.getNumDefs());
1139   RHSReg = constrainOperandRegClass(II, RHSReg, II.getNumDefs() + 1);
1140   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg)
1141       .addReg(LHSReg, getKillRegState(LHSIsKill))
1142       .addReg(RHSReg, getKillRegState(RHSIsKill))
1143       .addImm(getArithExtendImm(ExtType, ShiftImm));
1144
1145   return ResultReg;
1146 }
1147
1148 bool AArch64FastISel::emitCmp(const Value *LHS, const Value *RHS, bool IsZExt) {
1149   Type *Ty = LHS->getType();
1150   EVT EVT = TLI.getValueType(Ty, true);
1151   if (!EVT.isSimple())
1152     return false;
1153   MVT VT = EVT.getSimpleVT();
1154
1155   switch (VT.SimpleTy) {
1156   default:
1157     return false;
1158   case MVT::i1:
1159   case MVT::i8:
1160   case MVT::i16:
1161   case MVT::i32:
1162   case MVT::i64:
1163     return emitICmp(VT, LHS, RHS, IsZExt);
1164   case MVT::f32:
1165   case MVT::f64:
1166     return emitFCmp(VT, LHS, RHS);
1167   }
1168 }
1169
1170 bool AArch64FastISel::emitICmp(MVT RetVT, const Value *LHS, const Value *RHS,
1171                                bool IsZExt) {
1172   return emitSubs(RetVT, LHS, RHS, IsZExt, /*WantResult=*/false) != 0;
1173 }
1174
1175 bool AArch64FastISel::emitICmp_ri(MVT RetVT, unsigned LHSReg, bool LHSIsKill,
1176                                   uint64_t Imm) {
1177   return emitAddsSubs_ri(false, RetVT, LHSReg, LHSIsKill, Imm,
1178                          /*WantResult=*/false) != 0;
1179 }
1180
1181 bool AArch64FastISel::emitFCmp(MVT RetVT, const Value *LHS, const Value *RHS) {
1182   if (RetVT != MVT::f32 && RetVT != MVT::f64)
1183     return false;
1184
1185   // Check to see if the 2nd operand is a constant that we can encode directly
1186   // in the compare.
1187   bool UseImm = false;
1188   if (const auto *CFP = dyn_cast<ConstantFP>(RHS))
1189     if (CFP->isZero() && !CFP->isNegative())
1190       UseImm = true;
1191
1192   unsigned LHSReg = getRegForValue(LHS);
1193   if (!LHSReg)
1194     return false;
1195   bool LHSIsKill = hasTrivialKill(LHS);
1196
1197   if (UseImm) {
1198     unsigned Opc = (RetVT == MVT::f64) ? AArch64::FCMPDri : AArch64::FCMPSri;
1199     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc))
1200         .addReg(LHSReg, getKillRegState(LHSIsKill));
1201     return true;
1202   }
1203
1204   unsigned RHSReg = getRegForValue(RHS);
1205   if (!RHSReg)
1206     return false;
1207   bool RHSIsKill = hasTrivialKill(RHS);
1208
1209   unsigned Opc = (RetVT == MVT::f64) ? AArch64::FCMPDrr : AArch64::FCMPSrr;
1210   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc))
1211       .addReg(LHSReg, getKillRegState(LHSIsKill))
1212       .addReg(RHSReg, getKillRegState(RHSIsKill));
1213   return true;
1214 }
1215
1216 unsigned AArch64FastISel::emitAdds(MVT RetVT, const Value *LHS,
1217                                    const Value *RHS, bool IsZExt,
1218                                    bool WantResult) {
1219   return emitAddsSubs(true, RetVT, LHS, RHS, IsZExt, WantResult);
1220 }
1221
1222 unsigned AArch64FastISel::emitSubs(MVT RetVT, const Value *LHS,
1223                                    const Value *RHS, bool IsZExt,
1224                                    bool WantResult) {
1225   return emitAddsSubs(false, RetVT, LHS, RHS, IsZExt, WantResult);
1226 }
1227
1228 unsigned AArch64FastISel::emitSubs_rr(MVT RetVT, unsigned LHSReg,
1229                                       bool LHSIsKill, unsigned RHSReg,
1230                                       bool RHSIsKill, bool WantResult) {
1231   return emitAddsSubs_rr(false, RetVT, LHSReg, LHSIsKill, RHSReg, RHSIsKill,
1232                          WantResult);
1233 }
1234
1235 unsigned AArch64FastISel::emitSubs_rs(MVT RetVT, unsigned LHSReg,
1236                                       bool LHSIsKill, unsigned RHSReg,
1237                                       bool RHSIsKill,
1238                                       AArch64_AM::ShiftExtendType ShiftType,
1239                                       uint64_t ShiftImm, bool WantResult) {
1240   return emitAddsSubs_rs(false, RetVT, LHSReg, LHSIsKill, RHSReg, RHSIsKill,
1241                          ShiftType, ShiftImm, WantResult);
1242 }
1243
1244 // FIXME: This should be eventually generated automatically by tblgen.
1245 unsigned AArch64FastISel::emitAND_ri(MVT RetVT, unsigned LHSReg, bool LHSIsKill,
1246                                      uint64_t Imm) {
1247   const TargetRegisterClass *RC = nullptr;
1248   unsigned Opc = 0;
1249   unsigned RegSize = 0;
1250   switch (RetVT.SimpleTy) {
1251   default:
1252     return 0;
1253   case MVT::i32:
1254     Opc = AArch64::ANDWri;
1255     RC = &AArch64::GPR32spRegClass;
1256     RegSize = 32;
1257     break;
1258   case MVT::i64:
1259     Opc = AArch64::ANDXri;
1260     RC = &AArch64::GPR64spRegClass;
1261     RegSize = 64;
1262     break;
1263   }
1264
1265   if (!AArch64_AM::isLogicalImmediate(Imm, RegSize))
1266     return 0;
1267
1268   return FastEmitInst_ri(Opc, RC, LHSReg, LHSIsKill,
1269                          AArch64_AM::encodeLogicalImmediate(Imm, RegSize));
1270 }
1271
1272 bool AArch64FastISel::EmitLoad(MVT VT, unsigned &ResultReg, Address Addr,
1273                                MachineMemOperand *MMO) {
1274   // Simplify this down to something we can handle.
1275   if (!SimplifyAddress(Addr, VT))
1276     return false;
1277
1278   unsigned ScaleFactor;
1279   switch (VT.SimpleTy) {
1280   default: llvm_unreachable("Unexpected value type.");
1281   case MVT::i1:  // fall-through
1282   case MVT::i8:  ScaleFactor = 1; break;
1283   case MVT::i16: ScaleFactor = 2; break;
1284   case MVT::i32: // fall-through
1285   case MVT::f32: ScaleFactor = 4; break;
1286   case MVT::i64: // fall-through
1287   case MVT::f64: ScaleFactor = 8; break;
1288   }
1289
1290   // Negative offsets require unscaled, 9-bit, signed immediate offsets.
1291   // Otherwise, we try using scaled, 12-bit, unsigned immediate offsets.
1292   bool UseScaled = true;
1293   if ((Addr.getOffset() < 0) || (Addr.getOffset() & (ScaleFactor - 1))) {
1294     UseScaled = false;
1295     ScaleFactor = 1;
1296   }
1297
1298   static const unsigned OpcTable[4][6] = {
1299     { AArch64::LDURBBi,  AArch64::LDURHHi,  AArch64::LDURWi,  AArch64::LDURXi,
1300       AArch64::LDURSi,   AArch64::LDURDi },
1301     { AArch64::LDRBBui,  AArch64::LDRHHui,  AArch64::LDRWui,  AArch64::LDRXui,
1302       AArch64::LDRSui,   AArch64::LDRDui },
1303     { AArch64::LDRBBroX, AArch64::LDRHHroX, AArch64::LDRWroX, AArch64::LDRXroX,
1304       AArch64::LDRSroX,  AArch64::LDRDroX },
1305     { AArch64::LDRBBroW, AArch64::LDRHHroW, AArch64::LDRWroW, AArch64::LDRXroW,
1306       AArch64::LDRSroW,  AArch64::LDRDroW }
1307   };
1308
1309   unsigned Opc;
1310   const TargetRegisterClass *RC;
1311   bool VTIsi1 = false;
1312   bool UseRegOffset = Addr.isRegBase() && !Addr.getOffset() && Addr.getReg() &&
1313                       Addr.getOffsetReg();
1314   unsigned Idx = UseRegOffset ? 2 : UseScaled ? 1 : 0;
1315   if (Addr.getExtendType() == AArch64_AM::UXTW ||
1316       Addr.getExtendType() == AArch64_AM::SXTW)
1317     Idx++;
1318
1319   switch (VT.SimpleTy) {
1320   default: llvm_unreachable("Unexpected value type.");
1321   case MVT::i1:  VTIsi1 = true; // Intentional fall-through.
1322   case MVT::i8:  Opc = OpcTable[Idx][0]; RC = &AArch64::GPR32RegClass; break;
1323   case MVT::i16: Opc = OpcTable[Idx][1]; RC = &AArch64::GPR32RegClass; break;
1324   case MVT::i32: Opc = OpcTable[Idx][2]; RC = &AArch64::GPR32RegClass; break;
1325   case MVT::i64: Opc = OpcTable[Idx][3]; RC = &AArch64::GPR64RegClass; break;
1326   case MVT::f32: Opc = OpcTable[Idx][4]; RC = &AArch64::FPR32RegClass; break;
1327   case MVT::f64: Opc = OpcTable[Idx][5]; RC = &AArch64::FPR64RegClass; break;
1328   }
1329
1330   // Create the base instruction, then add the operands.
1331   ResultReg = createResultReg(RC);
1332   MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
1333                                     TII.get(Opc), ResultReg);
1334   AddLoadStoreOperands(Addr, MIB, MachineMemOperand::MOLoad, ScaleFactor, MMO);
1335
1336   // Loading an i1 requires special handling.
1337   if (VTIsi1) {
1338     unsigned ANDReg = emitAND_ri(MVT::i32, ResultReg, /*IsKill=*/true, 1);
1339     assert(ANDReg && "Unexpected AND instruction emission failure.");
1340     ResultReg = ANDReg;
1341   }
1342   return true;
1343 }
1344
1345 bool AArch64FastISel::SelectLoad(const Instruction *I) {
1346   MVT VT;
1347   // Verify we have a legal type before going any further.  Currently, we handle
1348   // simple types that will directly fit in a register (i32/f32/i64/f64) or
1349   // those that can be sign or zero-extended to a basic operation (i1/i8/i16).
1350   if (!isLoadStoreTypeLegal(I->getType(), VT) || cast<LoadInst>(I)->isAtomic())
1351     return false;
1352
1353   // See if we can handle this address.
1354   Address Addr;
1355   if (!ComputeAddress(I->getOperand(0), Addr, I->getType()))
1356     return false;
1357
1358   unsigned ResultReg;
1359   if (!EmitLoad(VT, ResultReg, Addr, createMachineMemOperandFor(I)))
1360     return false;
1361
1362   UpdateValueMap(I, ResultReg);
1363   return true;
1364 }
1365
1366 bool AArch64FastISel::EmitStore(MVT VT, unsigned SrcReg, Address Addr,
1367                                 MachineMemOperand *MMO) {
1368   // Simplify this down to something we can handle.
1369   if (!SimplifyAddress(Addr, VT))
1370     return false;
1371
1372   unsigned ScaleFactor;
1373   switch (VT.SimpleTy) {
1374   default: llvm_unreachable("Unexpected value type.");
1375   case MVT::i1:  // fall-through
1376   case MVT::i8:  ScaleFactor = 1; break;
1377   case MVT::i16: ScaleFactor = 2; break;
1378   case MVT::i32: // fall-through
1379   case MVT::f32: ScaleFactor = 4; break;
1380   case MVT::i64: // fall-through
1381   case MVT::f64: ScaleFactor = 8; break;
1382   }
1383
1384   // Negative offsets require unscaled, 9-bit, signed immediate offsets.
1385   // Otherwise, we try using scaled, 12-bit, unsigned immediate offsets.
1386   bool UseScaled = true;
1387   if ((Addr.getOffset() < 0) || (Addr.getOffset() & (ScaleFactor - 1))) {
1388     UseScaled = false;
1389     ScaleFactor = 1;
1390   }
1391
1392
1393   static const unsigned OpcTable[4][6] = {
1394     { AArch64::STURBBi,  AArch64::STURHHi,  AArch64::STURWi,  AArch64::STURXi,
1395       AArch64::STURSi,   AArch64::STURDi },
1396     { AArch64::STRBBui,  AArch64::STRHHui,  AArch64::STRWui,  AArch64::STRXui,
1397       AArch64::STRSui,   AArch64::STRDui },
1398     { AArch64::STRBBroX, AArch64::STRHHroX, AArch64::STRWroX, AArch64::STRXroX,
1399       AArch64::STRSroX,  AArch64::STRDroX },
1400     { AArch64::STRBBroW, AArch64::STRHHroW, AArch64::STRWroW, AArch64::STRXroW,
1401       AArch64::STRSroW,  AArch64::STRDroW }
1402
1403   };
1404
1405   unsigned Opc;
1406   bool VTIsi1 = false;
1407   bool UseRegOffset = Addr.isRegBase() && !Addr.getOffset() && Addr.getReg() &&
1408                       Addr.getOffsetReg();
1409   unsigned Idx = UseRegOffset ? 2 : UseScaled ? 1 : 0;
1410   if (Addr.getExtendType() == AArch64_AM::UXTW ||
1411       Addr.getExtendType() == AArch64_AM::SXTW)
1412     Idx++;
1413
1414   switch (VT.SimpleTy) {
1415   default: llvm_unreachable("Unexpected value type.");
1416   case MVT::i1:  VTIsi1 = true;
1417   case MVT::i8:  Opc = OpcTable[Idx][0]; break;
1418   case MVT::i16: Opc = OpcTable[Idx][1]; break;
1419   case MVT::i32: Opc = OpcTable[Idx][2]; break;
1420   case MVT::i64: Opc = OpcTable[Idx][3]; break;
1421   case MVT::f32: Opc = OpcTable[Idx][4]; break;
1422   case MVT::f64: Opc = OpcTable[Idx][5]; break;
1423   }
1424
1425   // Storing an i1 requires special handling.
1426   if (VTIsi1 && SrcReg != AArch64::WZR) {
1427     unsigned ANDReg = emitAND_ri(MVT::i32, SrcReg, /*TODO:IsKill=*/false, 1);
1428     assert(ANDReg && "Unexpected AND instruction emission failure.");
1429     SrcReg = ANDReg;
1430   }
1431   // Create the base instruction, then add the operands.
1432   const MCInstrDesc &II = TII.get(Opc);
1433   SrcReg = constrainOperandRegClass(II, SrcReg, II.getNumDefs());
1434   MachineInstrBuilder MIB =
1435       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II).addReg(SrcReg);
1436   AddLoadStoreOperands(Addr, MIB, MachineMemOperand::MOStore, ScaleFactor, MMO);
1437
1438   return true;
1439 }
1440
1441 bool AArch64FastISel::SelectStore(const Instruction *I) {
1442   MVT VT;
1443   const Value *Op0 = I->getOperand(0);
1444   // Verify we have a legal type before going any further.  Currently, we handle
1445   // simple types that will directly fit in a register (i32/f32/i64/f64) or
1446   // those that can be sign or zero-extended to a basic operation (i1/i8/i16).
1447   if (!isLoadStoreTypeLegal(Op0->getType(), VT) ||
1448       cast<StoreInst>(I)->isAtomic())
1449     return false;
1450
1451   // Get the value to be stored into a register. Use the zero register directly
1452   // when possible to avoid an unnecessary copy and a wasted register.
1453   unsigned SrcReg = 0;
1454   if (const auto *CI = dyn_cast<ConstantInt>(Op0)) {
1455     if (CI->isZero())
1456       SrcReg = (VT == MVT::i64) ? AArch64::XZR : AArch64::WZR;
1457   } else if (const auto *CF = dyn_cast<ConstantFP>(Op0)) {
1458     if (CF->isZero() && !CF->isNegative()) {
1459       VT = MVT::getIntegerVT(VT.getSizeInBits());
1460       SrcReg = (VT == MVT::i64) ? AArch64::XZR : AArch64::WZR;
1461     }
1462   }
1463
1464   if (!SrcReg)
1465     SrcReg = getRegForValue(Op0);
1466
1467   if (!SrcReg)
1468     return false;
1469
1470   // See if we can handle this address.
1471   Address Addr;
1472   if (!ComputeAddress(I->getOperand(1), Addr, I->getOperand(0)->getType()))
1473     return false;
1474
1475   if (!EmitStore(VT, SrcReg, Addr, createMachineMemOperandFor(I)))
1476     return false;
1477   return true;
1478 }
1479
1480 static AArch64CC::CondCode getCompareCC(CmpInst::Predicate Pred) {
1481   switch (Pred) {
1482   case CmpInst::FCMP_ONE:
1483   case CmpInst::FCMP_UEQ:
1484   default:
1485     // AL is our "false" for now. The other two need more compares.
1486     return AArch64CC::AL;
1487   case CmpInst::ICMP_EQ:
1488   case CmpInst::FCMP_OEQ:
1489     return AArch64CC::EQ;
1490   case CmpInst::ICMP_SGT:
1491   case CmpInst::FCMP_OGT:
1492     return AArch64CC::GT;
1493   case CmpInst::ICMP_SGE:
1494   case CmpInst::FCMP_OGE:
1495     return AArch64CC::GE;
1496   case CmpInst::ICMP_UGT:
1497   case CmpInst::FCMP_UGT:
1498     return AArch64CC::HI;
1499   case CmpInst::FCMP_OLT:
1500     return AArch64CC::MI;
1501   case CmpInst::ICMP_ULE:
1502   case CmpInst::FCMP_OLE:
1503     return AArch64CC::LS;
1504   case CmpInst::FCMP_ORD:
1505     return AArch64CC::VC;
1506   case CmpInst::FCMP_UNO:
1507     return AArch64CC::VS;
1508   case CmpInst::FCMP_UGE:
1509     return AArch64CC::PL;
1510   case CmpInst::ICMP_SLT:
1511   case CmpInst::FCMP_ULT:
1512     return AArch64CC::LT;
1513   case CmpInst::ICMP_SLE:
1514   case CmpInst::FCMP_ULE:
1515     return AArch64CC::LE;
1516   case CmpInst::FCMP_UNE:
1517   case CmpInst::ICMP_NE:
1518     return AArch64CC::NE;
1519   case CmpInst::ICMP_UGE:
1520     return AArch64CC::HS;
1521   case CmpInst::ICMP_ULT:
1522     return AArch64CC::LO;
1523   }
1524 }
1525
1526 bool AArch64FastISel::SelectBranch(const Instruction *I) {
1527   const BranchInst *BI = cast<BranchInst>(I);
1528   MachineBasicBlock *TBB = FuncInfo.MBBMap[BI->getSuccessor(0)];
1529   MachineBasicBlock *FBB = FuncInfo.MBBMap[BI->getSuccessor(1)];
1530
1531   AArch64CC::CondCode CC = AArch64CC::NE;
1532   if (const CmpInst *CI = dyn_cast<CmpInst>(BI->getCondition())) {
1533     if (CI->hasOneUse() && (CI->getParent() == I->getParent())) {
1534       // We may not handle every CC for now.
1535       CC = getCompareCC(CI->getPredicate());
1536       if (CC == AArch64CC::AL)
1537         return false;
1538
1539       // Emit the cmp.
1540       if (!emitCmp(CI->getOperand(0), CI->getOperand(1), CI->isUnsigned()))
1541         return false;
1542
1543       // Emit the branch.
1544       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::Bcc))
1545           .addImm(CC)
1546           .addMBB(TBB);
1547
1548       // Obtain the branch weight and add the TrueBB to the successor list.
1549       uint32_t BranchWeight = 0;
1550       if (FuncInfo.BPI)
1551         BranchWeight = FuncInfo.BPI->getEdgeWeight(BI->getParent(),
1552                                                   TBB->getBasicBlock());
1553       FuncInfo.MBB->addSuccessor(TBB, BranchWeight);
1554
1555       FastEmitBranch(FBB, DbgLoc);
1556       return true;
1557     }
1558   } else if (TruncInst *TI = dyn_cast<TruncInst>(BI->getCondition())) {
1559     MVT SrcVT;
1560     if (TI->hasOneUse() && TI->getParent() == I->getParent() &&
1561         (isLoadStoreTypeLegal(TI->getOperand(0)->getType(), SrcVT))) {
1562       unsigned CondReg = getRegForValue(TI->getOperand(0));
1563       if (!CondReg)
1564         return false;
1565       bool CondIsKill = hasTrivialKill(TI->getOperand(0));
1566
1567       // Issue an extract_subreg to get the lower 32-bits.
1568       if (SrcVT == MVT::i64) {
1569         CondReg = FastEmitInst_extractsubreg(MVT::i32, CondReg, CondIsKill,
1570                                              AArch64::sub_32);
1571         CondIsKill = true;
1572       }
1573
1574       unsigned ANDReg = emitAND_ri(MVT::i32, CondReg, CondIsKill, 1);
1575       assert(ANDReg && "Unexpected AND instruction emission failure.");
1576       emitICmp_ri(MVT::i32, ANDReg, /*IsKill=*/true, 0);
1577
1578       if (FuncInfo.MBB->isLayoutSuccessor(TBB)) {
1579         std::swap(TBB, FBB);
1580         CC = AArch64CC::EQ;
1581       }
1582       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::Bcc))
1583           .addImm(CC)
1584           .addMBB(TBB);
1585
1586       // Obtain the branch weight and add the TrueBB to the successor list.
1587       uint32_t BranchWeight = 0;
1588       if (FuncInfo.BPI)
1589         BranchWeight = FuncInfo.BPI->getEdgeWeight(BI->getParent(),
1590                                                   TBB->getBasicBlock());
1591       FuncInfo.MBB->addSuccessor(TBB, BranchWeight);
1592
1593       FastEmitBranch(FBB, DbgLoc);
1594       return true;
1595     }
1596   } else if (const ConstantInt *CI =
1597                  dyn_cast<ConstantInt>(BI->getCondition())) {
1598     uint64_t Imm = CI->getZExtValue();
1599     MachineBasicBlock *Target = (Imm == 0) ? FBB : TBB;
1600     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::B))
1601         .addMBB(Target);
1602
1603     // Obtain the branch weight and add the target to the successor list.
1604     uint32_t BranchWeight = 0;
1605     if (FuncInfo.BPI)
1606       BranchWeight = FuncInfo.BPI->getEdgeWeight(BI->getParent(),
1607                                                  Target->getBasicBlock());
1608     FuncInfo.MBB->addSuccessor(Target, BranchWeight);
1609     return true;
1610   } else if (foldXALUIntrinsic(CC, I, BI->getCondition())) {
1611     // Fake request the condition, otherwise the intrinsic might be completely
1612     // optimized away.
1613     unsigned CondReg = getRegForValue(BI->getCondition());
1614     if (!CondReg)
1615       return false;
1616
1617     // Emit the branch.
1618     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::Bcc))
1619       .addImm(CC)
1620       .addMBB(TBB);
1621
1622     // Obtain the branch weight and add the TrueBB to the successor list.
1623     uint32_t BranchWeight = 0;
1624     if (FuncInfo.BPI)
1625       BranchWeight = FuncInfo.BPI->getEdgeWeight(BI->getParent(),
1626                                                  TBB->getBasicBlock());
1627     FuncInfo.MBB->addSuccessor(TBB, BranchWeight);
1628
1629     FastEmitBranch(FBB, DbgLoc);
1630     return true;
1631   }
1632
1633   unsigned CondReg = getRegForValue(BI->getCondition());
1634   if (CondReg == 0)
1635     return false;
1636   bool CondRegIsKill = hasTrivialKill(BI->getCondition());
1637
1638   // We've been divorced from our compare!  Our block was split, and
1639   // now our compare lives in a predecessor block.  We musn't
1640   // re-compare here, as the children of the compare aren't guaranteed
1641   // live across the block boundary (we *could* check for this).
1642   // Regardless, the compare has been done in the predecessor block,
1643   // and it left a value for us in a virtual register.  Ergo, we test
1644   // the one-bit value left in the virtual register.
1645   emitICmp_ri(MVT::i32, CondReg, CondRegIsKill, 0);
1646
1647   if (FuncInfo.MBB->isLayoutSuccessor(TBB)) {
1648     std::swap(TBB, FBB);
1649     CC = AArch64CC::EQ;
1650   }
1651
1652   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::Bcc))
1653       .addImm(CC)
1654       .addMBB(TBB);
1655
1656   // Obtain the branch weight and add the TrueBB to the successor list.
1657   uint32_t BranchWeight = 0;
1658   if (FuncInfo.BPI)
1659     BranchWeight = FuncInfo.BPI->getEdgeWeight(BI->getParent(),
1660                                                TBB->getBasicBlock());
1661   FuncInfo.MBB->addSuccessor(TBB, BranchWeight);
1662
1663   FastEmitBranch(FBB, DbgLoc);
1664   return true;
1665 }
1666
1667 bool AArch64FastISel::SelectIndirectBr(const Instruction *I) {
1668   const IndirectBrInst *BI = cast<IndirectBrInst>(I);
1669   unsigned AddrReg = getRegForValue(BI->getOperand(0));
1670   if (AddrReg == 0)
1671     return false;
1672
1673   // Emit the indirect branch.
1674   const MCInstrDesc &II = TII.get(AArch64::BR);
1675   AddrReg = constrainOperandRegClass(II, AddrReg,  II.getNumDefs());
1676   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II).addReg(AddrReg);
1677
1678   // Make sure the CFG is up-to-date.
1679   for (unsigned i = 0, e = BI->getNumSuccessors(); i != e; ++i)
1680     FuncInfo.MBB->addSuccessor(FuncInfo.MBBMap[BI->getSuccessor(i)]);
1681
1682   return true;
1683 }
1684
1685 bool AArch64FastISel::SelectCmp(const Instruction *I) {
1686   const CmpInst *CI = cast<CmpInst>(I);
1687
1688   // We may not handle every CC for now.
1689   AArch64CC::CondCode CC = getCompareCC(CI->getPredicate());
1690   if (CC == AArch64CC::AL)
1691     return false;
1692
1693   // Emit the cmp.
1694   if (!emitCmp(CI->getOperand(0), CI->getOperand(1), CI->isUnsigned()))
1695     return false;
1696
1697   // Now set a register based on the comparison.
1698   AArch64CC::CondCode invertedCC = getInvertedCondCode(CC);
1699   unsigned ResultReg = createResultReg(&AArch64::GPR32RegClass);
1700   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::CSINCWr),
1701           ResultReg)
1702       .addReg(AArch64::WZR)
1703       .addReg(AArch64::WZR)
1704       .addImm(invertedCC);
1705
1706   UpdateValueMap(I, ResultReg);
1707   return true;
1708 }
1709
1710 bool AArch64FastISel::SelectSelect(const Instruction *I) {
1711   const SelectInst *SI = cast<SelectInst>(I);
1712
1713   EVT DestEVT = TLI.getValueType(SI->getType(), true);
1714   if (!DestEVT.isSimple())
1715     return false;
1716
1717   MVT DestVT = DestEVT.getSimpleVT();
1718   if (DestVT != MVT::i32 && DestVT != MVT::i64 && DestVT != MVT::f32 &&
1719       DestVT != MVT::f64)
1720     return false;
1721
1722   unsigned SelectOpc;
1723   const TargetRegisterClass *RC = nullptr;
1724   switch (DestVT.SimpleTy) {
1725   default: return false;
1726   case MVT::i32:
1727     SelectOpc = AArch64::CSELWr;    RC = &AArch64::GPR32RegClass; break;
1728   case MVT::i64:
1729     SelectOpc = AArch64::CSELXr;    RC = &AArch64::GPR64RegClass; break;
1730   case MVT::f32:
1731     SelectOpc = AArch64::FCSELSrrr; RC = &AArch64::FPR32RegClass; break;
1732   case MVT::f64:
1733     SelectOpc = AArch64::FCSELDrrr; RC = &AArch64::FPR64RegClass; break;
1734   }
1735
1736   const Value *Cond = SI->getCondition();
1737   bool NeedTest = true;
1738   AArch64CC::CondCode CC = AArch64CC::NE;
1739   if (foldXALUIntrinsic(CC, I, Cond))
1740     NeedTest = false;
1741
1742   unsigned CondReg = getRegForValue(Cond);
1743   if (!CondReg)
1744     return false;
1745   bool CondIsKill = hasTrivialKill(Cond);
1746
1747   if (NeedTest) {
1748     unsigned ANDReg = emitAND_ri(MVT::i32, CondReg, CondIsKill, 1);
1749     assert(ANDReg && "Unexpected AND instruction emission failure.");
1750     emitICmp_ri(MVT::i32, ANDReg, /*IsKill=*/true, 0);
1751   }
1752
1753   unsigned TrueReg = getRegForValue(SI->getTrueValue());
1754   bool TrueIsKill = hasTrivialKill(SI->getTrueValue());
1755
1756   unsigned FalseReg = getRegForValue(SI->getFalseValue());
1757   bool FalseIsKill = hasTrivialKill(SI->getFalseValue());
1758
1759   if (!TrueReg || !FalseReg)
1760     return false;
1761
1762   unsigned ResultReg = FastEmitInst_rri(SelectOpc, RC, TrueReg, TrueIsKill,
1763                                         FalseReg, FalseIsKill, CC);
1764   UpdateValueMap(I, ResultReg);
1765   return true;
1766 }
1767
1768 bool AArch64FastISel::SelectFPExt(const Instruction *I) {
1769   Value *V = I->getOperand(0);
1770   if (!I->getType()->isDoubleTy() || !V->getType()->isFloatTy())
1771     return false;
1772
1773   unsigned Op = getRegForValue(V);
1774   if (Op == 0)
1775     return false;
1776
1777   unsigned ResultReg = createResultReg(&AArch64::FPR64RegClass);
1778   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::FCVTDSr),
1779           ResultReg).addReg(Op);
1780   UpdateValueMap(I, ResultReg);
1781   return true;
1782 }
1783
1784 bool AArch64FastISel::SelectFPTrunc(const Instruction *I) {
1785   Value *V = I->getOperand(0);
1786   if (!I->getType()->isFloatTy() || !V->getType()->isDoubleTy())
1787     return false;
1788
1789   unsigned Op = getRegForValue(V);
1790   if (Op == 0)
1791     return false;
1792
1793   unsigned ResultReg = createResultReg(&AArch64::FPR32RegClass);
1794   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::FCVTSDr),
1795           ResultReg).addReg(Op);
1796   UpdateValueMap(I, ResultReg);
1797   return true;
1798 }
1799
1800 // FPToUI and FPToSI
1801 bool AArch64FastISel::SelectFPToInt(const Instruction *I, bool Signed) {
1802   MVT DestVT;
1803   if (!isTypeLegal(I->getType(), DestVT) || DestVT.isVector())
1804     return false;
1805
1806   unsigned SrcReg = getRegForValue(I->getOperand(0));
1807   if (SrcReg == 0)
1808     return false;
1809
1810   EVT SrcVT = TLI.getValueType(I->getOperand(0)->getType(), true);
1811   if (SrcVT == MVT::f128)
1812     return false;
1813
1814   unsigned Opc;
1815   if (SrcVT == MVT::f64) {
1816     if (Signed)
1817       Opc = (DestVT == MVT::i32) ? AArch64::FCVTZSUWDr : AArch64::FCVTZSUXDr;
1818     else
1819       Opc = (DestVT == MVT::i32) ? AArch64::FCVTZUUWDr : AArch64::FCVTZUUXDr;
1820   } else {
1821     if (Signed)
1822       Opc = (DestVT == MVT::i32) ? AArch64::FCVTZSUWSr : AArch64::FCVTZSUXSr;
1823     else
1824       Opc = (DestVT == MVT::i32) ? AArch64::FCVTZUUWSr : AArch64::FCVTZUUXSr;
1825   }
1826   unsigned ResultReg = createResultReg(
1827       DestVT == MVT::i32 ? &AArch64::GPR32RegClass : &AArch64::GPR64RegClass);
1828   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg)
1829       .addReg(SrcReg);
1830   UpdateValueMap(I, ResultReg);
1831   return true;
1832 }
1833
1834 bool AArch64FastISel::SelectIntToFP(const Instruction *I, bool Signed) {
1835   MVT DestVT;
1836   if (!isTypeLegal(I->getType(), DestVT) || DestVT.isVector())
1837     return false;
1838   assert ((DestVT == MVT::f32 || DestVT == MVT::f64) &&
1839           "Unexpected value type.");
1840
1841   unsigned SrcReg = getRegForValue(I->getOperand(0));
1842   if (!SrcReg)
1843     return false;
1844   bool SrcIsKill = hasTrivialKill(I->getOperand(0));
1845
1846   EVT SrcVT = TLI.getValueType(I->getOperand(0)->getType(), true);
1847
1848   // Handle sign-extension.
1849   if (SrcVT == MVT::i16 || SrcVT == MVT::i8 || SrcVT == MVT::i1) {
1850     SrcReg =
1851         EmitIntExt(SrcVT.getSimpleVT(), SrcReg, MVT::i32, /*isZExt*/ !Signed);
1852     if (!SrcReg)
1853       return false;
1854     SrcIsKill = true;
1855   }
1856
1857   unsigned Opc;
1858   if (SrcVT == MVT::i64) {
1859     if (Signed)
1860       Opc = (DestVT == MVT::f32) ? AArch64::SCVTFUXSri : AArch64::SCVTFUXDri;
1861     else
1862       Opc = (DestVT == MVT::f32) ? AArch64::UCVTFUXSri : AArch64::UCVTFUXDri;
1863   } else {
1864     if (Signed)
1865       Opc = (DestVT == MVT::f32) ? AArch64::SCVTFUWSri : AArch64::SCVTFUWDri;
1866     else
1867       Opc = (DestVT == MVT::f32) ? AArch64::UCVTFUWSri : AArch64::UCVTFUWDri;
1868   }
1869
1870   unsigned ResultReg = FastEmitInst_r(Opc, TLI.getRegClassFor(DestVT), SrcReg,
1871                                       SrcIsKill);
1872   UpdateValueMap(I, ResultReg);
1873   return true;
1874 }
1875
1876 bool AArch64FastISel::FastLowerArguments() {
1877   if (!FuncInfo.CanLowerReturn)
1878     return false;
1879
1880   const Function *F = FuncInfo.Fn;
1881   if (F->isVarArg())
1882     return false;
1883
1884   CallingConv::ID CC = F->getCallingConv();
1885   if (CC != CallingConv::C)
1886     return false;
1887
1888   // Only handle simple cases like i1/i8/i16/i32/i64/f32/f64 of up to 8 GPR and
1889   // FPR each.
1890   unsigned GPRCnt = 0;
1891   unsigned FPRCnt = 0;
1892   unsigned Idx = 0;
1893   for (auto const &Arg : F->args()) {
1894     // The first argument is at index 1.
1895     ++Idx;
1896     if (F->getAttributes().hasAttribute(Idx, Attribute::ByVal) ||
1897         F->getAttributes().hasAttribute(Idx, Attribute::InReg) ||
1898         F->getAttributes().hasAttribute(Idx, Attribute::StructRet) ||
1899         F->getAttributes().hasAttribute(Idx, Attribute::Nest))
1900       return false;
1901
1902     Type *ArgTy = Arg.getType();
1903     if (ArgTy->isStructTy() || ArgTy->isArrayTy() || ArgTy->isVectorTy())
1904       return false;
1905
1906     EVT ArgVT = TLI.getValueType(ArgTy);
1907     if (!ArgVT.isSimple()) return false;
1908     switch (ArgVT.getSimpleVT().SimpleTy) {
1909     default: return false;
1910     case MVT::i1:
1911     case MVT::i8:
1912     case MVT::i16:
1913     case MVT::i32:
1914     case MVT::i64:
1915       ++GPRCnt;
1916       break;
1917     case MVT::f16:
1918     case MVT::f32:
1919     case MVT::f64:
1920       ++FPRCnt;
1921       break;
1922     }
1923
1924     if (GPRCnt > 8 || FPRCnt > 8)
1925       return false;
1926   }
1927
1928   static const MCPhysReg Registers[5][8] = {
1929     { AArch64::W0, AArch64::W1, AArch64::W2, AArch64::W3, AArch64::W4,
1930       AArch64::W5, AArch64::W6, AArch64::W7 },
1931     { AArch64::X0, AArch64::X1, AArch64::X2, AArch64::X3, AArch64::X4,
1932       AArch64::X5, AArch64::X6, AArch64::X7 },
1933     { AArch64::H0, AArch64::H1, AArch64::H2, AArch64::H3, AArch64::H4,
1934       AArch64::H5, AArch64::H6, AArch64::H7 },
1935     { AArch64::S0, AArch64::S1, AArch64::S2, AArch64::S3, AArch64::S4,
1936       AArch64::S5, AArch64::S6, AArch64::S7 },
1937     { AArch64::D0, AArch64::D1, AArch64::D2, AArch64::D3, AArch64::D4,
1938       AArch64::D5, AArch64::D6, AArch64::D7 }
1939   };
1940
1941   unsigned GPRIdx = 0;
1942   unsigned FPRIdx = 0;
1943   for (auto const &Arg : F->args()) {
1944     MVT VT = TLI.getSimpleValueType(Arg.getType());
1945     unsigned SrcReg;
1946     const TargetRegisterClass *RC = nullptr;
1947     switch (VT.SimpleTy) {
1948     default: llvm_unreachable("Unexpected value type.");
1949     case MVT::i1:
1950     case MVT::i8:
1951     case MVT::i16: VT = MVT::i32; // fall-through
1952     case MVT::i32:
1953       SrcReg = Registers[0][GPRIdx++]; RC = &AArch64::GPR32RegClass; break;
1954     case MVT::i64:
1955       SrcReg = Registers[1][GPRIdx++]; RC = &AArch64::GPR64RegClass; break;
1956     case MVT::f16:
1957       SrcReg = Registers[2][FPRIdx++]; RC = &AArch64::FPR16RegClass; break;
1958     case MVT::f32:
1959       SrcReg = Registers[3][FPRIdx++]; RC = &AArch64::FPR32RegClass; break;
1960     case MVT::f64:
1961       SrcReg = Registers[4][FPRIdx++]; RC = &AArch64::FPR64RegClass; break;
1962     }
1963
1964     // Skip unused arguments.
1965     if (Arg.use_empty()) {
1966       UpdateValueMap(&Arg, 0);
1967       continue;
1968     }
1969
1970     unsigned DstReg = FuncInfo.MF->addLiveIn(SrcReg, RC);
1971     // FIXME: Unfortunately it's necessary to emit a copy from the livein copy.
1972     // Without this, EmitLiveInCopies may eliminate the livein if its only
1973     // use is a bitcast (which isn't turned into an instruction).
1974     unsigned ResultReg = createResultReg(RC);
1975     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
1976             TII.get(TargetOpcode::COPY), ResultReg)
1977         .addReg(DstReg, getKillRegState(true));
1978     UpdateValueMap(&Arg, ResultReg);
1979   }
1980   return true;
1981 }
1982
1983 bool AArch64FastISel::ProcessCallArgs(CallLoweringInfo &CLI,
1984                                       SmallVectorImpl<MVT> &OutVTs,
1985                                       unsigned &NumBytes) {
1986   CallingConv::ID CC = CLI.CallConv;
1987   SmallVector<CCValAssign, 16> ArgLocs;
1988   CCState CCInfo(CC, false, *FuncInfo.MF, ArgLocs, *Context);
1989   CCInfo.AnalyzeCallOperands(OutVTs, CLI.OutFlags, CCAssignFnForCall(CC));
1990
1991   // Get a count of how many bytes are to be pushed on the stack.
1992   NumBytes = CCInfo.getNextStackOffset();
1993
1994   // Issue CALLSEQ_START
1995   unsigned AdjStackDown = TII.getCallFrameSetupOpcode();
1996   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AdjStackDown))
1997     .addImm(NumBytes);
1998
1999   // Process the args.
2000   for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
2001     CCValAssign &VA = ArgLocs[i];
2002     const Value *ArgVal = CLI.OutVals[VA.getValNo()];
2003     MVT ArgVT = OutVTs[VA.getValNo()];
2004
2005     unsigned ArgReg = getRegForValue(ArgVal);
2006     if (!ArgReg)
2007       return false;
2008
2009     // Handle arg promotion: SExt, ZExt, AExt.
2010     switch (VA.getLocInfo()) {
2011     case CCValAssign::Full:
2012       break;
2013     case CCValAssign::SExt: {
2014       MVT DestVT = VA.getLocVT();
2015       MVT SrcVT = ArgVT;
2016       ArgReg = EmitIntExt(SrcVT, ArgReg, DestVT, /*isZExt=*/false);
2017       if (!ArgReg)
2018         return false;
2019       break;
2020     }
2021     case CCValAssign::AExt:
2022     // Intentional fall-through.
2023     case CCValAssign::ZExt: {
2024       MVT DestVT = VA.getLocVT();
2025       MVT SrcVT = ArgVT;
2026       ArgReg = EmitIntExt(SrcVT, ArgReg, DestVT, /*isZExt=*/true);
2027       if (!ArgReg)
2028         return false;
2029       break;
2030     }
2031     default:
2032       llvm_unreachable("Unknown arg promotion!");
2033     }
2034
2035     // Now copy/store arg to correct locations.
2036     if (VA.isRegLoc() && !VA.needsCustom()) {
2037       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2038               TII.get(TargetOpcode::COPY), VA.getLocReg()).addReg(ArgReg);
2039       CLI.OutRegs.push_back(VA.getLocReg());
2040     } else if (VA.needsCustom()) {
2041       // FIXME: Handle custom args.
2042       return false;
2043     } else {
2044       assert(VA.isMemLoc() && "Assuming store on stack.");
2045
2046       // Don't emit stores for undef values.
2047       if (isa<UndefValue>(ArgVal))
2048         continue;
2049
2050       // Need to store on the stack.
2051       unsigned ArgSize = (ArgVT.getSizeInBits() + 7) / 8;
2052
2053       unsigned BEAlign = 0;
2054       if (ArgSize < 8 && !Subtarget->isLittleEndian())
2055         BEAlign = 8 - ArgSize;
2056
2057       Address Addr;
2058       Addr.setKind(Address::RegBase);
2059       Addr.setReg(AArch64::SP);
2060       Addr.setOffset(VA.getLocMemOffset() + BEAlign);
2061
2062       unsigned Alignment = DL.getABITypeAlignment(ArgVal->getType());
2063       MachineMemOperand *MMO = FuncInfo.MF->getMachineMemOperand(
2064         MachinePointerInfo::getStack(Addr.getOffset()),
2065         MachineMemOperand::MOStore, ArgVT.getStoreSize(), Alignment);
2066
2067       if (!EmitStore(ArgVT, ArgReg, Addr, MMO))
2068         return false;
2069     }
2070   }
2071   return true;
2072 }
2073
2074 bool AArch64FastISel::FinishCall(CallLoweringInfo &CLI, MVT RetVT,
2075                                  unsigned NumBytes) {
2076   CallingConv::ID CC = CLI.CallConv;
2077
2078   // Issue CALLSEQ_END
2079   unsigned AdjStackUp = TII.getCallFrameDestroyOpcode();
2080   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AdjStackUp))
2081     .addImm(NumBytes).addImm(0);
2082
2083   // Now the return value.
2084   if (RetVT != MVT::isVoid) {
2085     SmallVector<CCValAssign, 16> RVLocs;
2086     CCState CCInfo(CC, false, *FuncInfo.MF, RVLocs, *Context);
2087     CCInfo.AnalyzeCallResult(RetVT, CCAssignFnForCall(CC));
2088
2089     // Only handle a single return value.
2090     if (RVLocs.size() != 1)
2091       return false;
2092
2093     // Copy all of the result registers out of their specified physreg.
2094     MVT CopyVT = RVLocs[0].getValVT();
2095     unsigned ResultReg = createResultReg(TLI.getRegClassFor(CopyVT));
2096     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2097             TII.get(TargetOpcode::COPY), ResultReg)
2098         .addReg(RVLocs[0].getLocReg());
2099     CLI.InRegs.push_back(RVLocs[0].getLocReg());
2100
2101     CLI.ResultReg = ResultReg;
2102     CLI.NumResultRegs = 1;
2103   }
2104
2105   return true;
2106 }
2107
2108 bool AArch64FastISel::FastLowerCall(CallLoweringInfo &CLI) {
2109   CallingConv::ID CC  = CLI.CallConv;
2110   bool IsTailCall     = CLI.IsTailCall;
2111   bool IsVarArg       = CLI.IsVarArg;
2112   const Value *Callee = CLI.Callee;
2113   const char *SymName = CLI.SymName;
2114
2115   // Allow SelectionDAG isel to handle tail calls.
2116   if (IsTailCall)
2117     return false;
2118
2119   CodeModel::Model CM = TM.getCodeModel();
2120   // Only support the small and large code model.
2121   if (CM != CodeModel::Small && CM != CodeModel::Large)
2122     return false;
2123
2124   // FIXME: Add large code model support for ELF.
2125   if (CM == CodeModel::Large && !Subtarget->isTargetMachO())
2126     return false;
2127
2128   // Let SDISel handle vararg functions.
2129   if (IsVarArg)
2130     return false;
2131
2132   // FIXME: Only handle *simple* calls for now.
2133   MVT RetVT;
2134   if (CLI.RetTy->isVoidTy())
2135     RetVT = MVT::isVoid;
2136   else if (!isTypeLegal(CLI.RetTy, RetVT))
2137     return false;
2138
2139   for (auto Flag : CLI.OutFlags)
2140     if (Flag.isInReg() || Flag.isSRet() || Flag.isNest() || Flag.isByVal())
2141       return false;
2142
2143   // Set up the argument vectors.
2144   SmallVector<MVT, 16> OutVTs;
2145   OutVTs.reserve(CLI.OutVals.size());
2146
2147   for (auto *Val : CLI.OutVals) {
2148     MVT VT;
2149     if (!isTypeLegal(Val->getType(), VT) &&
2150         !(VT == MVT::i1 || VT == MVT::i8 || VT == MVT::i16))
2151       return false;
2152
2153     // We don't handle vector parameters yet.
2154     if (VT.isVector() || VT.getSizeInBits() > 64)
2155       return false;
2156
2157     OutVTs.push_back(VT);
2158   }
2159
2160   Address Addr;
2161   if (!ComputeCallAddress(Callee, Addr))
2162     return false;
2163
2164   // Handle the arguments now that we've gotten them.
2165   unsigned NumBytes;
2166   if (!ProcessCallArgs(CLI, OutVTs, NumBytes))
2167     return false;
2168
2169   // Issue the call.
2170   MachineInstrBuilder MIB;
2171   if (CM == CodeModel::Small) {
2172     unsigned CallOpc = Addr.getReg() ? AArch64::BLR : AArch64::BL;
2173     MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(CallOpc));
2174     if (SymName)
2175       MIB.addExternalSymbol(SymName, 0);
2176     else if (Addr.getGlobalValue())
2177       MIB.addGlobalAddress(Addr.getGlobalValue(), 0, 0);
2178     else if (Addr.getReg())
2179       MIB.addReg(Addr.getReg());
2180     else
2181       return false;
2182   } else {
2183     unsigned CallReg = 0;
2184     if (SymName) {
2185       unsigned ADRPReg = createResultReg(&AArch64::GPR64commonRegClass);
2186       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADRP),
2187               ADRPReg)
2188         .addExternalSymbol(SymName, AArch64II::MO_GOT | AArch64II::MO_PAGE);
2189
2190       CallReg = createResultReg(&AArch64::GPR64RegClass);
2191       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::LDRXui),
2192               CallReg)
2193         .addReg(ADRPReg)
2194         .addExternalSymbol(SymName, AArch64II::MO_GOT | AArch64II::MO_PAGEOFF |
2195                            AArch64II::MO_NC);
2196     } else if (Addr.getGlobalValue()) {
2197       CallReg = AArch64MaterializeGV(Addr.getGlobalValue());
2198     } else if (Addr.getReg())
2199       CallReg = Addr.getReg();
2200
2201     if (!CallReg)
2202       return false;
2203
2204     MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2205                   TII.get(AArch64::BLR)).addReg(CallReg);
2206   }
2207
2208   // Add implicit physical register uses to the call.
2209   for (auto Reg : CLI.OutRegs)
2210     MIB.addReg(Reg, RegState::Implicit);
2211
2212   // Add a register mask with the call-preserved registers.
2213   // Proper defs for return values will be added by setPhysRegsDeadExcept().
2214   MIB.addRegMask(TRI.getCallPreservedMask(CC));
2215
2216   CLI.Call = MIB;
2217
2218   // Finish off the call including any return values.
2219   return FinishCall(CLI, RetVT, NumBytes);
2220 }
2221
2222 bool AArch64FastISel::IsMemCpySmall(uint64_t Len, unsigned Alignment) {
2223   if (Alignment)
2224     return Len / Alignment <= 4;
2225   else
2226     return Len < 32;
2227 }
2228
2229 bool AArch64FastISel::TryEmitSmallMemCpy(Address Dest, Address Src,
2230                                          uint64_t Len, unsigned Alignment) {
2231   // Make sure we don't bloat code by inlining very large memcpy's.
2232   if (!IsMemCpySmall(Len, Alignment))
2233     return false;
2234
2235   int64_t UnscaledOffset = 0;
2236   Address OrigDest = Dest;
2237   Address OrigSrc = Src;
2238
2239   while (Len) {
2240     MVT VT;
2241     if (!Alignment || Alignment >= 8) {
2242       if (Len >= 8)
2243         VT = MVT::i64;
2244       else if (Len >= 4)
2245         VT = MVT::i32;
2246       else if (Len >= 2)
2247         VT = MVT::i16;
2248       else {
2249         VT = MVT::i8;
2250       }
2251     } else {
2252       // Bound based on alignment.
2253       if (Len >= 4 && Alignment == 4)
2254         VT = MVT::i32;
2255       else if (Len >= 2 && Alignment == 2)
2256         VT = MVT::i16;
2257       else {
2258         VT = MVT::i8;
2259       }
2260     }
2261
2262     bool RV;
2263     unsigned ResultReg;
2264     RV = EmitLoad(VT, ResultReg, Src);
2265     if (!RV)
2266       return false;
2267
2268     RV = EmitStore(VT, ResultReg, Dest);
2269     if (!RV)
2270       return false;
2271
2272     int64_t Size = VT.getSizeInBits() / 8;
2273     Len -= Size;
2274     UnscaledOffset += Size;
2275
2276     // We need to recompute the unscaled offset for each iteration.
2277     Dest.setOffset(OrigDest.getOffset() + UnscaledOffset);
2278     Src.setOffset(OrigSrc.getOffset() + UnscaledOffset);
2279   }
2280
2281   return true;
2282 }
2283
2284 /// \brief Check if it is possible to fold the condition from the XALU intrinsic
2285 /// into the user. The condition code will only be updated on success.
2286 bool AArch64FastISel::foldXALUIntrinsic(AArch64CC::CondCode &CC,
2287                                         const Instruction *I,
2288                                         const Value *Cond) {
2289   if (!isa<ExtractValueInst>(Cond))
2290     return false;
2291
2292   const auto *EV = cast<ExtractValueInst>(Cond);
2293   if (!isa<IntrinsicInst>(EV->getAggregateOperand()))
2294     return false;
2295
2296   const auto *II = cast<IntrinsicInst>(EV->getAggregateOperand());
2297   MVT RetVT;
2298   const Function *Callee = II->getCalledFunction();
2299   Type *RetTy =
2300   cast<StructType>(Callee->getReturnType())->getTypeAtIndex(0U);
2301   if (!isTypeLegal(RetTy, RetVT))
2302     return false;
2303
2304   if (RetVT != MVT::i32 && RetVT != MVT::i64)
2305     return false;
2306
2307   AArch64CC::CondCode TmpCC;
2308   switch (II->getIntrinsicID()) {
2309     default: return false;
2310     case Intrinsic::sadd_with_overflow:
2311     case Intrinsic::ssub_with_overflow: TmpCC = AArch64CC::VS; break;
2312     case Intrinsic::uadd_with_overflow: TmpCC = AArch64CC::HS; break;
2313     case Intrinsic::usub_with_overflow: TmpCC = AArch64CC::LO; break;
2314     case Intrinsic::smul_with_overflow:
2315     case Intrinsic::umul_with_overflow: TmpCC = AArch64CC::NE; break;
2316   }
2317
2318   // Check if both instructions are in the same basic block.
2319   if (II->getParent() != I->getParent())
2320     return false;
2321
2322   // Make sure nothing is in the way
2323   BasicBlock::const_iterator Start = I;
2324   BasicBlock::const_iterator End = II;
2325   for (auto Itr = std::prev(Start); Itr != End; --Itr) {
2326     // We only expect extractvalue instructions between the intrinsic and the
2327     // instruction to be selected.
2328     if (!isa<ExtractValueInst>(Itr))
2329       return false;
2330
2331     // Check that the extractvalue operand comes from the intrinsic.
2332     const auto *EVI = cast<ExtractValueInst>(Itr);
2333     if (EVI->getAggregateOperand() != II)
2334       return false;
2335   }
2336
2337   CC = TmpCC;
2338   return true;
2339 }
2340
2341 bool AArch64FastISel::FastLowerIntrinsicCall(const IntrinsicInst *II) {
2342   // FIXME: Handle more intrinsics.
2343   switch (II->getIntrinsicID()) {
2344   default: return false;
2345   case Intrinsic::frameaddress: {
2346     MachineFrameInfo *MFI = FuncInfo.MF->getFrameInfo();
2347     MFI->setFrameAddressIsTaken(true);
2348
2349     const AArch64RegisterInfo *RegInfo =
2350         static_cast<const AArch64RegisterInfo *>(
2351             TM.getSubtargetImpl()->getRegisterInfo());
2352     unsigned FramePtr = RegInfo->getFrameRegister(*(FuncInfo.MF));
2353     unsigned SrcReg = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
2354     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2355             TII.get(TargetOpcode::COPY), SrcReg).addReg(FramePtr);
2356     // Recursively load frame address
2357     // ldr x0, [fp]
2358     // ldr x0, [x0]
2359     // ldr x0, [x0]
2360     // ...
2361     unsigned DestReg;
2362     unsigned Depth = cast<ConstantInt>(II->getOperand(0))->getZExtValue();
2363     while (Depth--) {
2364       DestReg = FastEmitInst_ri(AArch64::LDRXui, &AArch64::GPR64RegClass,
2365                                 SrcReg, /*IsKill=*/true, 0);
2366       assert(DestReg && "Unexpected LDR instruction emission failure.");
2367       SrcReg = DestReg;
2368     }
2369
2370     UpdateValueMap(II, SrcReg);
2371     return true;
2372   }
2373   case Intrinsic::memcpy:
2374   case Intrinsic::memmove: {
2375     const auto *MTI = cast<MemTransferInst>(II);
2376     // Don't handle volatile.
2377     if (MTI->isVolatile())
2378       return false;
2379
2380     // Disable inlining for memmove before calls to ComputeAddress.  Otherwise,
2381     // we would emit dead code because we don't currently handle memmoves.
2382     bool IsMemCpy = (II->getIntrinsicID() == Intrinsic::memcpy);
2383     if (isa<ConstantInt>(MTI->getLength()) && IsMemCpy) {
2384       // Small memcpy's are common enough that we want to do them without a call
2385       // if possible.
2386       uint64_t Len = cast<ConstantInt>(MTI->getLength())->getZExtValue();
2387       unsigned Alignment = MTI->getAlignment();
2388       if (IsMemCpySmall(Len, Alignment)) {
2389         Address Dest, Src;
2390         if (!ComputeAddress(MTI->getRawDest(), Dest) ||
2391             !ComputeAddress(MTI->getRawSource(), Src))
2392           return false;
2393         if (TryEmitSmallMemCpy(Dest, Src, Len, Alignment))
2394           return true;
2395       }
2396     }
2397
2398     if (!MTI->getLength()->getType()->isIntegerTy(64))
2399       return false;
2400
2401     if (MTI->getSourceAddressSpace() > 255 || MTI->getDestAddressSpace() > 255)
2402       // Fast instruction selection doesn't support the special
2403       // address spaces.
2404       return false;
2405
2406     const char *IntrMemName = isa<MemCpyInst>(II) ? "memcpy" : "memmove";
2407     return LowerCallTo(II, IntrMemName, II->getNumArgOperands() - 2);
2408   }
2409   case Intrinsic::memset: {
2410     const MemSetInst *MSI = cast<MemSetInst>(II);
2411     // Don't handle volatile.
2412     if (MSI->isVolatile())
2413       return false;
2414
2415     if (!MSI->getLength()->getType()->isIntegerTy(64))
2416       return false;
2417
2418     if (MSI->getDestAddressSpace() > 255)
2419       // Fast instruction selection doesn't support the special
2420       // address spaces.
2421       return false;
2422
2423     return LowerCallTo(II, "memset", II->getNumArgOperands() - 2);
2424   }
2425   case Intrinsic::trap: {
2426     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::BRK))
2427         .addImm(1);
2428     return true;
2429   }
2430   case Intrinsic::sqrt: {
2431     Type *RetTy = II->getCalledFunction()->getReturnType();
2432
2433     MVT VT;
2434     if (!isTypeLegal(RetTy, VT))
2435       return false;
2436
2437     unsigned Op0Reg = getRegForValue(II->getOperand(0));
2438     if (!Op0Reg)
2439       return false;
2440     bool Op0IsKill = hasTrivialKill(II->getOperand(0));
2441
2442     unsigned ResultReg = FastEmit_r(VT, VT, ISD::FSQRT, Op0Reg, Op0IsKill);
2443     if (!ResultReg)
2444       return false;
2445
2446     UpdateValueMap(II, ResultReg);
2447     return true;
2448   }
2449   case Intrinsic::sadd_with_overflow:
2450   case Intrinsic::uadd_with_overflow:
2451   case Intrinsic::ssub_with_overflow:
2452   case Intrinsic::usub_with_overflow:
2453   case Intrinsic::smul_with_overflow:
2454   case Intrinsic::umul_with_overflow: {
2455     // This implements the basic lowering of the xalu with overflow intrinsics.
2456     const Function *Callee = II->getCalledFunction();
2457     auto *Ty = cast<StructType>(Callee->getReturnType());
2458     Type *RetTy = Ty->getTypeAtIndex(0U);
2459
2460     MVT VT;
2461     if (!isTypeLegal(RetTy, VT))
2462       return false;
2463
2464     if (VT != MVT::i32 && VT != MVT::i64)
2465       return false;
2466
2467     const Value *LHS = II->getArgOperand(0);
2468     const Value *RHS = II->getArgOperand(1);
2469     // Canonicalize immediate to the RHS.
2470     if (isa<ConstantInt>(LHS) && !isa<ConstantInt>(RHS) &&
2471         isCommutativeIntrinsic(II))
2472       std::swap(LHS, RHS);
2473
2474     unsigned ResultReg1 = 0, ResultReg2 = 0, MulReg = 0;
2475     AArch64CC::CondCode CC = AArch64CC::Invalid;
2476     switch (II->getIntrinsicID()) {
2477     default: llvm_unreachable("Unexpected intrinsic!");
2478     case Intrinsic::sadd_with_overflow:
2479       ResultReg1 = emitAdds(VT, LHS, RHS); CC = AArch64CC::VS; break;
2480     case Intrinsic::uadd_with_overflow:
2481       ResultReg1 = emitAdds(VT, LHS, RHS); CC = AArch64CC::HS; break;
2482     case Intrinsic::ssub_with_overflow:
2483       ResultReg1 = emitSubs(VT, LHS, RHS); CC = AArch64CC::VS; break;
2484     case Intrinsic::usub_with_overflow:
2485       ResultReg1 = emitSubs(VT, LHS, RHS); CC = AArch64CC::LO; break;
2486     case Intrinsic::smul_with_overflow: {
2487       CC = AArch64CC::NE;
2488       unsigned LHSReg = getRegForValue(LHS);
2489       if (!LHSReg)
2490         return false;
2491       bool LHSIsKill = hasTrivialKill(LHS);
2492
2493       unsigned RHSReg = getRegForValue(RHS);
2494       if (!RHSReg)
2495         return false;
2496       bool RHSIsKill = hasTrivialKill(RHS);
2497
2498       if (VT == MVT::i32) {
2499         MulReg = Emit_SMULL_rr(MVT::i64, LHSReg, LHSIsKill, RHSReg, RHSIsKill);
2500         unsigned ShiftReg = emitLSR_ri(MVT::i64, MVT::i64, MulReg,
2501                                        /*IsKill=*/false, 32);
2502         MulReg = FastEmitInst_extractsubreg(VT, MulReg, /*IsKill=*/true,
2503                                             AArch64::sub_32);
2504         ShiftReg = FastEmitInst_extractsubreg(VT, ShiftReg, /*IsKill=*/true,
2505                                               AArch64::sub_32);
2506         emitSubs_rs(VT, ShiftReg, /*IsKill=*/true, MulReg, /*IsKill=*/false,
2507                     AArch64_AM::ASR, 31, /*WantResult=*/false);
2508       } else {
2509         assert(VT == MVT::i64 && "Unexpected value type.");
2510         MulReg = Emit_MUL_rr(VT, LHSReg, LHSIsKill, RHSReg, RHSIsKill);
2511         unsigned SMULHReg = FastEmit_rr(VT, VT, ISD::MULHS, LHSReg, LHSIsKill,
2512                                         RHSReg, RHSIsKill);
2513         emitSubs_rs(VT, SMULHReg, /*IsKill=*/true, MulReg, /*IsKill=*/false,
2514                     AArch64_AM::ASR, 63, /*WantResult=*/false);
2515       }
2516       break;
2517     }
2518     case Intrinsic::umul_with_overflow: {
2519       CC = AArch64CC::NE;
2520       unsigned LHSReg = getRegForValue(LHS);
2521       if (!LHSReg)
2522         return false;
2523       bool LHSIsKill = hasTrivialKill(LHS);
2524
2525       unsigned RHSReg = getRegForValue(RHS);
2526       if (!RHSReg)
2527         return false;
2528       bool RHSIsKill = hasTrivialKill(RHS);
2529
2530       if (VT == MVT::i32) {
2531         MulReg = Emit_UMULL_rr(MVT::i64, LHSReg, LHSIsKill, RHSReg, RHSIsKill);
2532         emitSubs_rs(MVT::i64, AArch64::XZR, /*IsKill=*/true, MulReg,
2533                     /*IsKill=*/false, AArch64_AM::LSR, 32,
2534                     /*WantResult=*/false);
2535         MulReg = FastEmitInst_extractsubreg(VT, MulReg, /*IsKill=*/true,
2536                                             AArch64::sub_32);
2537       } else {
2538         assert(VT == MVT::i64 && "Unexpected value type.");
2539         MulReg = Emit_MUL_rr(VT, LHSReg, LHSIsKill, RHSReg, RHSIsKill);
2540         unsigned UMULHReg = FastEmit_rr(VT, VT, ISD::MULHU, LHSReg, LHSIsKill,
2541                                         RHSReg, RHSIsKill);
2542         emitSubs_rr(VT, AArch64::XZR, /*IsKill=*/true, UMULHReg,
2543                     /*IsKill=*/false, /*WantResult=*/false);
2544       }
2545       break;
2546     }
2547     }
2548
2549     if (MulReg) {
2550       ResultReg1 = createResultReg(TLI.getRegClassFor(VT));
2551       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2552               TII.get(TargetOpcode::COPY), ResultReg1).addReg(MulReg);
2553     }
2554
2555     ResultReg2 = FastEmitInst_rri(AArch64::CSINCWr, &AArch64::GPR32RegClass,
2556                                   AArch64::WZR, /*IsKill=*/true, AArch64::WZR,
2557                                   /*IsKill=*/true, getInvertedCondCode(CC));
2558     assert((ResultReg1 + 1) == ResultReg2 &&
2559            "Nonconsecutive result registers.");
2560     UpdateValueMap(II, ResultReg1, 2);
2561     return true;
2562   }
2563   }
2564   return false;
2565 }
2566
2567 bool AArch64FastISel::SelectRet(const Instruction *I) {
2568   const ReturnInst *Ret = cast<ReturnInst>(I);
2569   const Function &F = *I->getParent()->getParent();
2570
2571   if (!FuncInfo.CanLowerReturn)
2572     return false;
2573
2574   if (F.isVarArg())
2575     return false;
2576
2577   // Build a list of return value registers.
2578   SmallVector<unsigned, 4> RetRegs;
2579
2580   if (Ret->getNumOperands() > 0) {
2581     CallingConv::ID CC = F.getCallingConv();
2582     SmallVector<ISD::OutputArg, 4> Outs;
2583     GetReturnInfo(F.getReturnType(), F.getAttributes(), Outs, TLI);
2584
2585     // Analyze operands of the call, assigning locations to each operand.
2586     SmallVector<CCValAssign, 16> ValLocs;
2587     CCState CCInfo(CC, F.isVarArg(), *FuncInfo.MF, ValLocs, I->getContext());
2588     CCAssignFn *RetCC = CC == CallingConv::WebKit_JS ? RetCC_AArch64_WebKit_JS
2589                                                      : RetCC_AArch64_AAPCS;
2590     CCInfo.AnalyzeReturn(Outs, RetCC);
2591
2592     // Only handle a single return value for now.
2593     if (ValLocs.size() != 1)
2594       return false;
2595
2596     CCValAssign &VA = ValLocs[0];
2597     const Value *RV = Ret->getOperand(0);
2598
2599     // Don't bother handling odd stuff for now.
2600     if (VA.getLocInfo() != CCValAssign::Full)
2601       return false;
2602     // Only handle register returns for now.
2603     if (!VA.isRegLoc())
2604       return false;
2605     unsigned Reg = getRegForValue(RV);
2606     if (Reg == 0)
2607       return false;
2608
2609     unsigned SrcReg = Reg + VA.getValNo();
2610     unsigned DestReg = VA.getLocReg();
2611     // Avoid a cross-class copy. This is very unlikely.
2612     if (!MRI.getRegClass(SrcReg)->contains(DestReg))
2613       return false;
2614
2615     EVT RVEVT = TLI.getValueType(RV->getType());
2616     if (!RVEVT.isSimple())
2617       return false;
2618
2619     // Vectors (of > 1 lane) in big endian need tricky handling.
2620     if (RVEVT.isVector() && RVEVT.getVectorNumElements() > 1)
2621       return false;
2622
2623     MVT RVVT = RVEVT.getSimpleVT();
2624     if (RVVT == MVT::f128)
2625       return false;
2626     MVT DestVT = VA.getValVT();
2627     // Special handling for extended integers.
2628     if (RVVT != DestVT) {
2629       if (RVVT != MVT::i1 && RVVT != MVT::i8 && RVVT != MVT::i16)
2630         return false;
2631
2632       if (!Outs[0].Flags.isZExt() && !Outs[0].Flags.isSExt())
2633         return false;
2634
2635       bool isZExt = Outs[0].Flags.isZExt();
2636       SrcReg = EmitIntExt(RVVT, SrcReg, DestVT, isZExt);
2637       if (SrcReg == 0)
2638         return false;
2639     }
2640
2641     // Make the copy.
2642     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2643             TII.get(TargetOpcode::COPY), DestReg).addReg(SrcReg);
2644
2645     // Add register to return instruction.
2646     RetRegs.push_back(VA.getLocReg());
2647   }
2648
2649   MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2650                                     TII.get(AArch64::RET_ReallyLR));
2651   for (unsigned i = 0, e = RetRegs.size(); i != e; ++i)
2652     MIB.addReg(RetRegs[i], RegState::Implicit);
2653   return true;
2654 }
2655
2656 bool AArch64FastISel::SelectTrunc(const Instruction *I) {
2657   Type *DestTy = I->getType();
2658   Value *Op = I->getOperand(0);
2659   Type *SrcTy = Op->getType();
2660
2661   EVT SrcEVT = TLI.getValueType(SrcTy, true);
2662   EVT DestEVT = TLI.getValueType(DestTy, true);
2663   if (!SrcEVT.isSimple())
2664     return false;
2665   if (!DestEVT.isSimple())
2666     return false;
2667
2668   MVT SrcVT = SrcEVT.getSimpleVT();
2669   MVT DestVT = DestEVT.getSimpleVT();
2670
2671   if (SrcVT != MVT::i64 && SrcVT != MVT::i32 && SrcVT != MVT::i16 &&
2672       SrcVT != MVT::i8)
2673     return false;
2674   if (DestVT != MVT::i32 && DestVT != MVT::i16 && DestVT != MVT::i8 &&
2675       DestVT != MVT::i1)
2676     return false;
2677
2678   unsigned SrcReg = getRegForValue(Op);
2679   if (!SrcReg)
2680     return false;
2681   bool SrcIsKill = hasTrivialKill(Op);
2682
2683   // If we're truncating from i64 to a smaller non-legal type then generate an
2684   // AND.  Otherwise, we know the high bits are undefined and a truncate doesn't
2685   // generate any code.
2686   if (SrcVT == MVT::i64) {
2687     uint64_t Mask = 0;
2688     switch (DestVT.SimpleTy) {
2689     default:
2690       // Trunc i64 to i32 is handled by the target-independent fast-isel.
2691       return false;
2692     case MVT::i1:
2693       Mask = 0x1;
2694       break;
2695     case MVT::i8:
2696       Mask = 0xff;
2697       break;
2698     case MVT::i16:
2699       Mask = 0xffff;
2700       break;
2701     }
2702     // Issue an extract_subreg to get the lower 32-bits.
2703     unsigned Reg32 = FastEmitInst_extractsubreg(MVT::i32, SrcReg, SrcIsKill,
2704                                                 AArch64::sub_32);
2705     // Create the AND instruction which performs the actual truncation.
2706     unsigned ANDReg = emitAND_ri(MVT::i32, Reg32, /*IsKill=*/true, Mask);
2707     assert(ANDReg && "Unexpected AND instruction emission failure.");
2708     SrcReg = ANDReg;
2709   }
2710
2711   UpdateValueMap(I, SrcReg);
2712   return true;
2713 }
2714
2715 unsigned AArch64FastISel::Emiti1Ext(unsigned SrcReg, MVT DestVT, bool isZExt) {
2716   assert((DestVT == MVT::i8 || DestVT == MVT::i16 || DestVT == MVT::i32 ||
2717           DestVT == MVT::i64) &&
2718          "Unexpected value type.");
2719   // Handle i8 and i16 as i32.
2720   if (DestVT == MVT::i8 || DestVT == MVT::i16)
2721     DestVT = MVT::i32;
2722
2723   if (isZExt) {
2724     unsigned ResultReg = emitAND_ri(MVT::i32, SrcReg, /*TODO:IsKill=*/false, 1);
2725     assert(ResultReg && "Unexpected AND instruction emission failure.");
2726     if (DestVT == MVT::i64) {
2727       // We're ZExt i1 to i64.  The ANDWri Wd, Ws, #1 implicitly clears the
2728       // upper 32 bits.  Emit a SUBREG_TO_REG to extend from Wd to Xd.
2729       unsigned Reg64 = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
2730       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2731               TII.get(AArch64::SUBREG_TO_REG), Reg64)
2732           .addImm(0)
2733           .addReg(ResultReg)
2734           .addImm(AArch64::sub_32);
2735       ResultReg = Reg64;
2736     }
2737     return ResultReg;
2738   } else {
2739     if (DestVT == MVT::i64) {
2740       // FIXME: We're SExt i1 to i64.
2741       return 0;
2742     }
2743     return FastEmitInst_rii(AArch64::SBFMWri, &AArch64::GPR32RegClass, SrcReg,
2744                             /*TODO:IsKill=*/false, 0, 0);
2745   }
2746 }
2747
2748 unsigned AArch64FastISel::Emit_MUL_rr(MVT RetVT, unsigned Op0, bool Op0IsKill,
2749                                       unsigned Op1, bool Op1IsKill) {
2750   unsigned Opc, ZReg;
2751   switch (RetVT.SimpleTy) {
2752   default: return 0;
2753   case MVT::i8:
2754   case MVT::i16:
2755   case MVT::i32:
2756     RetVT = MVT::i32;
2757     Opc = AArch64::MADDWrrr; ZReg = AArch64::WZR; break;
2758   case MVT::i64:
2759     Opc = AArch64::MADDXrrr; ZReg = AArch64::XZR; break;
2760   }
2761
2762   const TargetRegisterClass *RC =
2763       (RetVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
2764   return FastEmitInst_rrr(Opc, RC, Op0, Op0IsKill, Op1, Op1IsKill,
2765                           /*IsKill=*/ZReg, true);
2766 }
2767
2768 unsigned AArch64FastISel::Emit_SMULL_rr(MVT RetVT, unsigned Op0, bool Op0IsKill,
2769                                         unsigned Op1, bool Op1IsKill) {
2770   if (RetVT != MVT::i64)
2771     return 0;
2772
2773   return FastEmitInst_rrr(AArch64::SMADDLrrr, &AArch64::GPR64RegClass,
2774                           Op0, Op0IsKill, Op1, Op1IsKill,
2775                           AArch64::XZR, /*IsKill=*/true);
2776 }
2777
2778 unsigned AArch64FastISel::Emit_UMULL_rr(MVT RetVT, unsigned Op0, bool Op0IsKill,
2779                                         unsigned Op1, bool Op1IsKill) {
2780   if (RetVT != MVT::i64)
2781     return 0;
2782
2783   return FastEmitInst_rrr(AArch64::UMADDLrrr, &AArch64::GPR64RegClass,
2784                           Op0, Op0IsKill, Op1, Op1IsKill,
2785                           AArch64::XZR, /*IsKill=*/true);
2786 }
2787
2788 unsigned AArch64FastISel::emitLSL_rr(MVT RetVT, unsigned Op0Reg, bool Op0IsKill,
2789                                      unsigned Op1Reg, bool Op1IsKill) {
2790   unsigned Opc = 0;
2791   bool NeedTrunc = false;
2792   uint64_t Mask = 0;
2793   switch (RetVT.SimpleTy) {
2794   default: return 0;
2795   case MVT::i8:  Opc = AArch64::LSLVWr; NeedTrunc = true; Mask = 0xff;   break;
2796   case MVT::i16: Opc = AArch64::LSLVWr; NeedTrunc = true; Mask = 0xffff; break;
2797   case MVT::i32: Opc = AArch64::LSLVWr;                                  break;
2798   case MVT::i64: Opc = AArch64::LSLVXr;                                  break;
2799   }
2800
2801   const TargetRegisterClass *RC =
2802       (RetVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
2803   if (NeedTrunc) {
2804     Op1Reg = emitAND_ri(MVT::i32, Op1Reg, Op1IsKill, Mask);
2805     Op1IsKill = true;
2806   }
2807   unsigned ResultReg = FastEmitInst_rr(Opc, RC, Op0Reg, Op0IsKill, Op1Reg,
2808                                        Op1IsKill);
2809   if (NeedTrunc)
2810     ResultReg = emitAND_ri(MVT::i32, ResultReg, /*IsKill=*/true, Mask);
2811   return ResultReg;
2812 }
2813
2814 unsigned AArch64FastISel::emitLSL_ri(MVT RetVT, MVT SrcVT, unsigned Op0,
2815                                      bool Op0IsKill, uint64_t Shift,
2816                                      bool IsZext) {
2817   assert(RetVT.SimpleTy >= SrcVT.SimpleTy &&
2818          "Unexpected source/return type pair.");
2819   assert((SrcVT == MVT::i8 || SrcVT == MVT::i16 || SrcVT == MVT::i32 ||
2820           SrcVT == MVT::i64) && "Unexpected source value type.");
2821   assert((RetVT == MVT::i8 || RetVT == MVT::i16 || RetVT == MVT::i32 ||
2822           RetVT == MVT::i64) && "Unexpected return value type.");
2823
2824   bool Is64Bit = (RetVT == MVT::i64);
2825   unsigned RegSize = Is64Bit ? 64 : 32;
2826   unsigned DstBits = RetVT.getSizeInBits();
2827   unsigned SrcBits = SrcVT.getSizeInBits();
2828
2829   // Don't deal with undefined shifts.
2830   if (Shift >= DstBits)
2831     return 0;
2832
2833   // For immediate shifts we can fold the zero-/sign-extension into the shift.
2834   // {S|U}BFM Wd, Wn, #r, #s
2835   // Wd<32+s-r,32-r> = Wn<s:0> when r > s
2836
2837   // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
2838   // %2 = shl i16 %1, 4
2839   // Wd<32+7-28,32-28> = Wn<7:0> <- clamp s to 7
2840   // 0b1111_1111_1111_1111__1111_1010_1010_0000 sext
2841   // 0b0000_0000_0000_0000__0000_0101_0101_0000 sext | zext
2842   // 0b0000_0000_0000_0000__0000_1010_1010_0000 zext
2843
2844   // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
2845   // %2 = shl i16 %1, 8
2846   // Wd<32+7-24,32-24> = Wn<7:0>
2847   // 0b1111_1111_1111_1111__1010_1010_0000_0000 sext
2848   // 0b0000_0000_0000_0000__0101_0101_0000_0000 sext | zext
2849   // 0b0000_0000_0000_0000__1010_1010_0000_0000 zext
2850
2851   // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
2852   // %2 = shl i16 %1, 12
2853   // Wd<32+3-20,32-20> = Wn<3:0>
2854   // 0b1111_1111_1111_1111__1010_0000_0000_0000 sext
2855   // 0b0000_0000_0000_0000__0101_0000_0000_0000 sext | zext
2856   // 0b0000_0000_0000_0000__1010_0000_0000_0000 zext
2857
2858   unsigned ImmR = RegSize - Shift;
2859   // Limit the width to the length of the source type.
2860   unsigned ImmS = std::min<unsigned>(SrcBits - 1, DstBits - 1 - Shift);
2861   static const unsigned OpcTable[2][2] = {
2862     {AArch64::SBFMWri, AArch64::SBFMXri},
2863     {AArch64::UBFMWri, AArch64::UBFMXri}
2864   };
2865   unsigned Opc = OpcTable[IsZext][Is64Bit];
2866   const TargetRegisterClass *RC =
2867       Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
2868   if (SrcVT.SimpleTy <= MVT::i32 && RetVT == MVT::i64) {
2869     unsigned TmpReg = MRI.createVirtualRegister(RC);
2870     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2871             TII.get(AArch64::SUBREG_TO_REG), TmpReg)
2872         .addImm(0)
2873         .addReg(Op0, getKillRegState(Op0IsKill))
2874         .addImm(AArch64::sub_32);
2875     Op0 = TmpReg;
2876     Op0IsKill = true;
2877   }
2878   return FastEmitInst_rii(Opc, RC, Op0, Op0IsKill, ImmR, ImmS);
2879 }
2880
2881 unsigned AArch64FastISel::emitLSR_rr(MVT RetVT, unsigned Op0Reg, bool Op0IsKill,
2882                                      unsigned Op1Reg, bool Op1IsKill) {
2883   unsigned Opc = 0;
2884   bool NeedTrunc = false;
2885   uint64_t Mask = 0;
2886   switch (RetVT.SimpleTy) {
2887   default: return 0;
2888   case MVT::i8:  Opc = AArch64::LSRVWr; NeedTrunc = true; Mask = 0xff;   break;
2889   case MVT::i16: Opc = AArch64::LSRVWr; NeedTrunc = true; Mask = 0xffff; break;
2890   case MVT::i32: Opc = AArch64::LSRVWr; break;
2891   case MVT::i64: Opc = AArch64::LSRVXr; break;
2892   }
2893
2894   const TargetRegisterClass *RC =
2895       (RetVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
2896   if (NeedTrunc) {
2897     Op0Reg = emitAND_ri(MVT::i32, Op0Reg, Op0IsKill, Mask);
2898     Op1Reg = emitAND_ri(MVT::i32, Op1Reg, Op1IsKill, Mask);
2899     Op0IsKill = Op1IsKill = true;
2900   }
2901   unsigned ResultReg = FastEmitInst_rr(Opc, RC, Op0Reg, Op0IsKill, Op1Reg,
2902                                        Op1IsKill);
2903   if (NeedTrunc)
2904     ResultReg = emitAND_ri(MVT::i32, ResultReg, /*IsKill=*/true, Mask);
2905   return ResultReg;
2906 }
2907
2908 unsigned AArch64FastISel::emitLSR_ri(MVT RetVT, MVT SrcVT, unsigned Op0,
2909                                      bool Op0IsKill, uint64_t Shift,
2910                                      bool IsZExt) {
2911   assert(RetVT.SimpleTy >= SrcVT.SimpleTy &&
2912          "Unexpected source/return type pair.");
2913   assert((SrcVT == MVT::i8 || SrcVT == MVT::i16 || SrcVT == MVT::i32 ||
2914           SrcVT == MVT::i64) && "Unexpected source value type.");
2915   assert((RetVT == MVT::i8 || RetVT == MVT::i16 || RetVT == MVT::i32 ||
2916           RetVT == MVT::i64) && "Unexpected return value type.");
2917
2918   bool Is64Bit = (RetVT == MVT::i64);
2919   unsigned RegSize = Is64Bit ? 64 : 32;
2920   unsigned DstBits = RetVT.getSizeInBits();
2921   unsigned SrcBits = SrcVT.getSizeInBits();
2922
2923   // Don't deal with undefined shifts.
2924   if (Shift >= DstBits)
2925     return 0;
2926
2927   // For immediate shifts we can fold the zero-/sign-extension into the shift.
2928   // {S|U}BFM Wd, Wn, #r, #s
2929   // Wd<s-r:0> = Wn<s:r> when r <= s
2930
2931   // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
2932   // %2 = lshr i16 %1, 4
2933   // Wd<7-4:0> = Wn<7:4>
2934   // 0b0000_0000_0000_0000__0000_1111_1111_1010 sext
2935   // 0b0000_0000_0000_0000__0000_0000_0000_0101 sext | zext
2936   // 0b0000_0000_0000_0000__0000_0000_0000_1010 zext
2937
2938   // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
2939   // %2 = lshr i16 %1, 8
2940   // Wd<7-7,0> = Wn<7:7>
2941   // 0b0000_0000_0000_0000__0000_0000_1111_1111 sext
2942   // 0b0000_0000_0000_0000__0000_0000_0000_0000 sext
2943   // 0b0000_0000_0000_0000__0000_0000_0000_0000 zext
2944
2945   // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
2946   // %2 = lshr i16 %1, 12
2947   // Wd<7-7,0> = Wn<7:7> <- clamp r to 7
2948   // 0b0000_0000_0000_0000__0000_0000_0000_1111 sext
2949   // 0b0000_0000_0000_0000__0000_0000_0000_0000 sext
2950   // 0b0000_0000_0000_0000__0000_0000_0000_0000 zext
2951
2952   if (Shift >= SrcBits && IsZExt)
2953     return AArch64MaterializeInt(ConstantInt::get(*Context, APInt(RegSize, 0)),
2954                                  RetVT);
2955
2956   // It is not possible to fold a sign-extend into the LShr instruction. In this
2957   // case emit a sign-extend.
2958   if (!IsZExt) {
2959     Op0 = EmitIntExt(SrcVT, Op0, RetVT, IsZExt);
2960     if (!Op0)
2961       return 0;
2962     Op0IsKill = true;
2963     SrcVT = RetVT;
2964     SrcBits = SrcVT.getSizeInBits();
2965     IsZExt = true;
2966   }
2967
2968   unsigned ImmR = std::min<unsigned>(SrcBits - 1, Shift);
2969   unsigned ImmS = SrcBits - 1;
2970   static const unsigned OpcTable[2][2] = {
2971     {AArch64::SBFMWri, AArch64::SBFMXri},
2972     {AArch64::UBFMWri, AArch64::UBFMXri}
2973   };
2974   unsigned Opc = OpcTable[IsZExt][Is64Bit];
2975   const TargetRegisterClass *RC =
2976       Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
2977   if (SrcVT.SimpleTy <= MVT::i32 && RetVT == MVT::i64) {
2978     unsigned TmpReg = MRI.createVirtualRegister(RC);
2979     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2980             TII.get(AArch64::SUBREG_TO_REG), TmpReg)
2981         .addImm(0)
2982         .addReg(Op0, getKillRegState(Op0IsKill))
2983         .addImm(AArch64::sub_32);
2984     Op0 = TmpReg;
2985     Op0IsKill = true;
2986   }
2987   return FastEmitInst_rii(Opc, RC, Op0, Op0IsKill, ImmR, ImmS);
2988 }
2989
2990 unsigned AArch64FastISel::emitASR_rr(MVT RetVT, unsigned Op0Reg, bool Op0IsKill,
2991                                      unsigned Op1Reg, bool Op1IsKill) {
2992   unsigned Opc = 0;
2993   bool NeedTrunc = false;
2994   uint64_t Mask = 0;
2995   switch (RetVT.SimpleTy) {
2996   default: return 0;
2997   case MVT::i8:  Opc = AArch64::ASRVWr; NeedTrunc = true; Mask = 0xff;   break;
2998   case MVT::i16: Opc = AArch64::ASRVWr; NeedTrunc = true; Mask = 0xffff; break;
2999   case MVT::i32: Opc = AArch64::ASRVWr;                                  break;
3000   case MVT::i64: Opc = AArch64::ASRVXr;                                  break;
3001   }
3002
3003   const TargetRegisterClass *RC =
3004       (RetVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
3005   if (NeedTrunc) {
3006     Op0Reg = EmitIntExt(RetVT, Op0Reg, MVT::i32, /*IsZExt=*/false);
3007     Op1Reg = emitAND_ri(MVT::i32, Op1Reg, Op1IsKill, Mask);
3008     Op0IsKill = Op1IsKill = true;
3009   }
3010   unsigned ResultReg = FastEmitInst_rr(Opc, RC, Op0Reg, Op0IsKill, Op1Reg,
3011                                        Op1IsKill);
3012   if (NeedTrunc)
3013     ResultReg = emitAND_ri(MVT::i32, ResultReg, /*IsKill=*/true, Mask);
3014   return ResultReg;
3015 }
3016
3017 unsigned AArch64FastISel::emitASR_ri(MVT RetVT, MVT SrcVT, unsigned Op0,
3018                                      bool Op0IsKill, uint64_t Shift,
3019                                      bool IsZExt) {
3020   assert(RetVT.SimpleTy >= SrcVT.SimpleTy &&
3021          "Unexpected source/return type pair.");
3022   assert((SrcVT == MVT::i8 || SrcVT == MVT::i16 || SrcVT == MVT::i32 ||
3023           SrcVT == MVT::i64) && "Unexpected source value type.");
3024   assert((RetVT == MVT::i8 || RetVT == MVT::i16 || RetVT == MVT::i32 ||
3025           RetVT == MVT::i64) && "Unexpected return value type.");
3026
3027   bool Is64Bit = (RetVT == MVT::i64);
3028   unsigned RegSize = Is64Bit ? 64 : 32;
3029   unsigned DstBits = RetVT.getSizeInBits();
3030   unsigned SrcBits = SrcVT.getSizeInBits();
3031
3032   // Don't deal with undefined shifts.
3033   if (Shift >= DstBits)
3034     return 0;
3035
3036   // For immediate shifts we can fold the zero-/sign-extension into the shift.
3037   // {S|U}BFM Wd, Wn, #r, #s
3038   // Wd<s-r:0> = Wn<s:r> when r <= s
3039
3040   // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
3041   // %2 = ashr i16 %1, 4
3042   // Wd<7-4:0> = Wn<7:4>
3043   // 0b1111_1111_1111_1111__1111_1111_1111_1010 sext
3044   // 0b0000_0000_0000_0000__0000_0000_0000_0101 sext | zext
3045   // 0b0000_0000_0000_0000__0000_0000_0000_1010 zext
3046
3047   // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
3048   // %2 = ashr i16 %1, 8
3049   // Wd<7-7,0> = Wn<7:7>
3050   // 0b1111_1111_1111_1111__1111_1111_1111_1111 sext
3051   // 0b0000_0000_0000_0000__0000_0000_0000_0000 sext
3052   // 0b0000_0000_0000_0000__0000_0000_0000_0000 zext
3053
3054   // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
3055   // %2 = ashr i16 %1, 12
3056   // Wd<7-7,0> = Wn<7:7> <- clamp r to 7
3057   // 0b1111_1111_1111_1111__1111_1111_1111_1111 sext
3058   // 0b0000_0000_0000_0000__0000_0000_0000_0000 sext
3059   // 0b0000_0000_0000_0000__0000_0000_0000_0000 zext
3060
3061   if (Shift >= SrcBits && IsZExt)
3062     return AArch64MaterializeInt(ConstantInt::get(*Context, APInt(RegSize, 0)),
3063                                  RetVT);
3064
3065   unsigned ImmR = std::min<unsigned>(SrcBits - 1, Shift);
3066   unsigned ImmS = SrcBits - 1;
3067   static const unsigned OpcTable[2][2] = {
3068     {AArch64::SBFMWri, AArch64::SBFMXri},
3069     {AArch64::UBFMWri, AArch64::UBFMXri}
3070   };
3071   unsigned Opc = OpcTable[IsZExt][Is64Bit];
3072   const TargetRegisterClass *RC =
3073       Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
3074   if (SrcVT.SimpleTy <= MVT::i32 && RetVT == MVT::i64) {
3075     unsigned TmpReg = MRI.createVirtualRegister(RC);
3076     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3077             TII.get(AArch64::SUBREG_TO_REG), TmpReg)
3078         .addImm(0)
3079         .addReg(Op0, getKillRegState(Op0IsKill))
3080         .addImm(AArch64::sub_32);
3081     Op0 = TmpReg;
3082     Op0IsKill = true;
3083   }
3084   return FastEmitInst_rii(Opc, RC, Op0, Op0IsKill, ImmR, ImmS);
3085 }
3086
3087 unsigned AArch64FastISel::EmitIntExt(MVT SrcVT, unsigned SrcReg, MVT DestVT,
3088                                      bool isZExt) {
3089   assert(DestVT != MVT::i1 && "ZeroExt/SignExt an i1?");
3090
3091   // FastISel does not have plumbing to deal with extensions where the SrcVT or
3092   // DestVT are odd things, so test to make sure that they are both types we can
3093   // handle (i1/i8/i16/i32 for SrcVT and i8/i16/i32/i64 for DestVT), otherwise
3094   // bail out to SelectionDAG.
3095   if (((DestVT != MVT::i8) && (DestVT != MVT::i16) &&
3096        (DestVT != MVT::i32) && (DestVT != MVT::i64)) ||
3097       ((SrcVT !=  MVT::i1) && (SrcVT !=  MVT::i8) &&
3098        (SrcVT !=  MVT::i16) && (SrcVT !=  MVT::i32)))
3099     return 0;
3100
3101   unsigned Opc;
3102   unsigned Imm = 0;
3103
3104   switch (SrcVT.SimpleTy) {
3105   default:
3106     return 0;
3107   case MVT::i1:
3108     return Emiti1Ext(SrcReg, DestVT, isZExt);
3109   case MVT::i8:
3110     if (DestVT == MVT::i64)
3111       Opc = isZExt ? AArch64::UBFMXri : AArch64::SBFMXri;
3112     else
3113       Opc = isZExt ? AArch64::UBFMWri : AArch64::SBFMWri;
3114     Imm = 7;
3115     break;
3116   case MVT::i16:
3117     if (DestVT == MVT::i64)
3118       Opc = isZExt ? AArch64::UBFMXri : AArch64::SBFMXri;
3119     else
3120       Opc = isZExt ? AArch64::UBFMWri : AArch64::SBFMWri;
3121     Imm = 15;
3122     break;
3123   case MVT::i32:
3124     assert(DestVT == MVT::i64 && "IntExt i32 to i32?!?");
3125     Opc = isZExt ? AArch64::UBFMXri : AArch64::SBFMXri;
3126     Imm = 31;
3127     break;
3128   }
3129
3130   // Handle i8 and i16 as i32.
3131   if (DestVT == MVT::i8 || DestVT == MVT::i16)
3132     DestVT = MVT::i32;
3133   else if (DestVT == MVT::i64) {
3134     unsigned Src64 = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
3135     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3136             TII.get(AArch64::SUBREG_TO_REG), Src64)
3137         .addImm(0)
3138         .addReg(SrcReg)
3139         .addImm(AArch64::sub_32);
3140     SrcReg = Src64;
3141   }
3142
3143   const TargetRegisterClass *RC =
3144       (DestVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
3145   return FastEmitInst_rii(Opc, RC, SrcReg, /*TODO:IsKill=*/false, 0, Imm);
3146 }
3147
3148 bool AArch64FastISel::SelectIntExt(const Instruction *I) {
3149   // On ARM, in general, integer casts don't involve legal types; this code
3150   // handles promotable integers.  The high bits for a type smaller than
3151   // the register size are assumed to be undefined.
3152   Type *DestTy = I->getType();
3153   Value *Src = I->getOperand(0);
3154   Type *SrcTy = Src->getType();
3155
3156   bool isZExt = isa<ZExtInst>(I);
3157   unsigned SrcReg = getRegForValue(Src);
3158   if (!SrcReg)
3159     return false;
3160
3161   EVT SrcEVT = TLI.getValueType(SrcTy, true);
3162   EVT DestEVT = TLI.getValueType(DestTy, true);
3163   if (!SrcEVT.isSimple())
3164     return false;
3165   if (!DestEVT.isSimple())
3166     return false;
3167
3168   MVT SrcVT = SrcEVT.getSimpleVT();
3169   MVT DestVT = DestEVT.getSimpleVT();
3170   unsigned ResultReg = 0;
3171
3172   // Check if it is an argument and if it is already zero/sign-extended.
3173   if (const auto *Arg = dyn_cast<Argument>(Src)) {
3174     if ((isZExt && Arg->hasZExtAttr()) || (!isZExt && Arg->hasSExtAttr())) {
3175       if (DestVT == MVT::i64) {
3176         ResultReg = createResultReg(TLI.getRegClassFor(DestVT));
3177         BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3178                 TII.get(AArch64::SUBREG_TO_REG), ResultReg)
3179           .addImm(0)
3180           .addReg(SrcReg)
3181           .addImm(AArch64::sub_32);
3182       } else
3183         ResultReg = SrcReg;
3184     }
3185   }
3186
3187   if (!ResultReg)
3188     ResultReg = EmitIntExt(SrcVT, SrcReg, DestVT, isZExt);
3189
3190   if (!ResultReg)
3191     return false;
3192
3193   UpdateValueMap(I, ResultReg);
3194   return true;
3195 }
3196
3197 bool AArch64FastISel::SelectRem(const Instruction *I, unsigned ISDOpcode) {
3198   EVT DestEVT = TLI.getValueType(I->getType(), true);
3199   if (!DestEVT.isSimple())
3200     return false;
3201
3202   MVT DestVT = DestEVT.getSimpleVT();
3203   if (DestVT != MVT::i64 && DestVT != MVT::i32)
3204     return false;
3205
3206   unsigned DivOpc;
3207   bool is64bit = (DestVT == MVT::i64);
3208   switch (ISDOpcode) {
3209   default:
3210     return false;
3211   case ISD::SREM:
3212     DivOpc = is64bit ? AArch64::SDIVXr : AArch64::SDIVWr;
3213     break;
3214   case ISD::UREM:
3215     DivOpc = is64bit ? AArch64::UDIVXr : AArch64::UDIVWr;
3216     break;
3217   }
3218   unsigned MSubOpc = is64bit ? AArch64::MSUBXrrr : AArch64::MSUBWrrr;
3219   unsigned Src0Reg = getRegForValue(I->getOperand(0));
3220   if (!Src0Reg)
3221     return false;
3222   bool Src0IsKill = hasTrivialKill(I->getOperand(0));
3223
3224   unsigned Src1Reg = getRegForValue(I->getOperand(1));
3225   if (!Src1Reg)
3226     return false;
3227   bool Src1IsKill = hasTrivialKill(I->getOperand(1));
3228
3229   const TargetRegisterClass *RC =
3230       (DestVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
3231   unsigned QuotReg = FastEmitInst_rr(DivOpc, RC, Src0Reg, /*IsKill=*/false,
3232                                      Src1Reg, /*IsKill=*/false);
3233   assert(QuotReg && "Unexpected DIV instruction emission failure.");
3234   // The remainder is computed as numerator - (quotient * denominator) using the
3235   // MSUB instruction.
3236   unsigned ResultReg = FastEmitInst_rrr(MSubOpc, RC, QuotReg, /*IsKill=*/true,
3237                                         Src1Reg, Src1IsKill, Src0Reg,
3238                                         Src0IsKill);
3239   UpdateValueMap(I, ResultReg);
3240   return true;
3241 }
3242
3243 bool AArch64FastISel::SelectMul(const Instruction *I) {
3244   EVT SrcEVT = TLI.getValueType(I->getOperand(0)->getType(), true);
3245   if (!SrcEVT.isSimple())
3246     return false;
3247   MVT SrcVT = SrcEVT.getSimpleVT();
3248
3249   // Must be simple value type.  Don't handle vectors.
3250   if (SrcVT != MVT::i64 && SrcVT != MVT::i32 && SrcVT != MVT::i16 &&
3251       SrcVT != MVT::i8)
3252     return false;
3253
3254   unsigned Src0Reg = getRegForValue(I->getOperand(0));
3255   if (!Src0Reg)
3256     return false;
3257   bool Src0IsKill = hasTrivialKill(I->getOperand(0));
3258
3259   unsigned Src1Reg = getRegForValue(I->getOperand(1));
3260   if (!Src1Reg)
3261     return false;
3262   bool Src1IsKill = hasTrivialKill(I->getOperand(1));
3263
3264   unsigned ResultReg =
3265     Emit_MUL_rr(SrcVT, Src0Reg, Src0IsKill, Src1Reg, Src1IsKill);
3266
3267   if (!ResultReg)
3268     return false;
3269
3270   UpdateValueMap(I, ResultReg);
3271   return true;
3272 }
3273
3274 bool AArch64FastISel::SelectShift(const Instruction *I) {
3275   MVT RetVT;
3276   if (!isLoadStoreTypeLegal(I->getType(), RetVT))
3277     return false;
3278
3279   if (const auto *C = dyn_cast<ConstantInt>(I->getOperand(1))) {
3280     unsigned ResultReg = 0;
3281     uint64_t ShiftVal = C->getZExtValue();
3282     MVT SrcVT = RetVT;
3283     bool IsZExt = (I->getOpcode() == Instruction::AShr) ? false : true;
3284     const Value * Op0 = I->getOperand(0);
3285     if (const auto *ZExt = dyn_cast<ZExtInst>(Op0)) {
3286       MVT TmpVT;
3287       if (isLoadStoreTypeLegal(ZExt->getSrcTy(), TmpVT)) {
3288         SrcVT = TmpVT;
3289         IsZExt = true;
3290         Op0 = ZExt->getOperand(0);
3291       }
3292     } else if (const auto *SExt = dyn_cast<SExtInst>(Op0)) {
3293       MVT TmpVT;
3294       if (isLoadStoreTypeLegal(SExt->getSrcTy(), TmpVT)) {
3295         SrcVT = TmpVT;
3296         IsZExt = false;
3297         Op0 = SExt->getOperand(0);
3298       }
3299     }
3300
3301     unsigned Op0Reg = getRegForValue(Op0);
3302     if (!Op0Reg)
3303       return false;
3304     bool Op0IsKill = hasTrivialKill(Op0);
3305
3306     switch (I->getOpcode()) {
3307     default: llvm_unreachable("Unexpected instruction.");
3308     case Instruction::Shl:
3309       ResultReg = emitLSL_ri(RetVT, SrcVT, Op0Reg, Op0IsKill, ShiftVal, IsZExt);
3310       break;
3311     case Instruction::AShr:
3312       ResultReg = emitASR_ri(RetVT, SrcVT, Op0Reg, Op0IsKill, ShiftVal, IsZExt);
3313       break;
3314     case Instruction::LShr:
3315       ResultReg = emitLSR_ri(RetVT, SrcVT, Op0Reg, Op0IsKill, ShiftVal, IsZExt);
3316       break;
3317     }
3318     if (!ResultReg)
3319       return false;
3320
3321     UpdateValueMap(I, ResultReg);
3322     return true;
3323   }
3324
3325   unsigned Op0Reg = getRegForValue(I->getOperand(0));
3326   if (!Op0Reg)
3327     return false;
3328   bool Op0IsKill = hasTrivialKill(I->getOperand(0));
3329
3330   unsigned Op1Reg = getRegForValue(I->getOperand(1));
3331   if (!Op1Reg)
3332     return false;
3333   bool Op1IsKill = hasTrivialKill(I->getOperand(1));
3334
3335   unsigned ResultReg = 0;
3336   switch (I->getOpcode()) {
3337   default: llvm_unreachable("Unexpected instruction.");
3338   case Instruction::Shl:
3339     ResultReg = emitLSL_rr(RetVT, Op0Reg, Op0IsKill, Op1Reg, Op1IsKill);
3340     break;
3341   case Instruction::AShr:
3342     ResultReg = emitASR_rr(RetVT, Op0Reg, Op0IsKill, Op1Reg, Op1IsKill);
3343     break;
3344   case Instruction::LShr:
3345     ResultReg = emitLSR_rr(RetVT, Op0Reg, Op0IsKill, Op1Reg, Op1IsKill);
3346     break;
3347   }
3348
3349   if (!ResultReg)
3350     return false;
3351
3352   UpdateValueMap(I, ResultReg);
3353   return true;
3354 }
3355
3356 bool AArch64FastISel::SelectBitCast(const Instruction *I) {
3357   MVT RetVT, SrcVT;
3358
3359   if (!isTypeLegal(I->getOperand(0)->getType(), SrcVT))
3360     return false;
3361   if (!isTypeLegal(I->getType(), RetVT))
3362     return false;
3363
3364   unsigned Opc;
3365   if (RetVT == MVT::f32 && SrcVT == MVT::i32)
3366     Opc = AArch64::FMOVWSr;
3367   else if (RetVT == MVT::f64 && SrcVT == MVT::i64)
3368     Opc = AArch64::FMOVXDr;
3369   else if (RetVT == MVT::i32 && SrcVT == MVT::f32)
3370     Opc = AArch64::FMOVSWr;
3371   else if (RetVT == MVT::i64 && SrcVT == MVT::f64)
3372     Opc = AArch64::FMOVDXr;
3373   else
3374     return false;
3375
3376   const TargetRegisterClass *RC = nullptr;
3377   switch (RetVT.SimpleTy) {
3378   default: llvm_unreachable("Unexpected value type.");
3379   case MVT::i32: RC = &AArch64::GPR32RegClass; break;
3380   case MVT::i64: RC = &AArch64::GPR64RegClass; break;
3381   case MVT::f32: RC = &AArch64::FPR32RegClass; break;
3382   case MVT::f64: RC = &AArch64::FPR64RegClass; break;
3383   }
3384   unsigned Op0Reg = getRegForValue(I->getOperand(0));
3385   if (!Op0Reg)
3386     return false;
3387   bool Op0IsKill = hasTrivialKill(I->getOperand(0));
3388   unsigned ResultReg = FastEmitInst_r(Opc, RC, Op0Reg, Op0IsKill);
3389
3390   if (!ResultReg)
3391     return false;
3392
3393   UpdateValueMap(I, ResultReg);
3394   return true;
3395 }
3396
3397 bool AArch64FastISel::TargetSelectInstruction(const Instruction *I) {
3398   switch (I->getOpcode()) {
3399   default:
3400     break;
3401   case Instruction::Load:
3402     return SelectLoad(I);
3403   case Instruction::Store:
3404     return SelectStore(I);
3405   case Instruction::Br:
3406     return SelectBranch(I);
3407   case Instruction::IndirectBr:
3408     return SelectIndirectBr(I);
3409   case Instruction::FCmp:
3410   case Instruction::ICmp:
3411     return SelectCmp(I);
3412   case Instruction::Select:
3413     return SelectSelect(I);
3414   case Instruction::FPExt:
3415     return SelectFPExt(I);
3416   case Instruction::FPTrunc:
3417     return SelectFPTrunc(I);
3418   case Instruction::FPToSI:
3419     return SelectFPToInt(I, /*Signed=*/true);
3420   case Instruction::FPToUI:
3421     return SelectFPToInt(I, /*Signed=*/false);
3422   case Instruction::SIToFP:
3423     return SelectIntToFP(I, /*Signed=*/true);
3424   case Instruction::UIToFP:
3425     return SelectIntToFP(I, /*Signed=*/false);
3426   case Instruction::SRem:
3427     return SelectRem(I, ISD::SREM);
3428   case Instruction::URem:
3429     return SelectRem(I, ISD::UREM);
3430   case Instruction::Ret:
3431     return SelectRet(I);
3432   case Instruction::Trunc:
3433     return SelectTrunc(I);
3434   case Instruction::ZExt:
3435   case Instruction::SExt:
3436     return SelectIntExt(I);
3437
3438   // FIXME: All of these should really be handled by the target-independent
3439   // selector -> improve FastISel tblgen.
3440   case Instruction::Mul:
3441     return SelectMul(I);
3442   case Instruction::Shl:  // fall-through
3443   case Instruction::LShr: // fall-through
3444   case Instruction::AShr:
3445     return SelectShift(I);
3446   case Instruction::BitCast:
3447     return SelectBitCast(I);
3448   }
3449   return false;
3450   // Silence warnings.
3451   (void)&CC_AArch64_DarwinPCS_VarArg;
3452 }
3453
3454 namespace llvm {
3455 llvm::FastISel *AArch64::createFastISel(FunctionLoweringInfo &funcInfo,
3456                                         const TargetLibraryInfo *libInfo) {
3457   return new AArch64FastISel(funcInfo, libInfo);
3458 }
3459 }