1 //===-- AArch6464FastISel.cpp - AArch64 FastISel implementation -----------===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This file defines the AArch64-specific support for the FastISel class. Some
11 // of the target-specific code is generated by tablegen in the file
12 // AArch64GenFastISel.inc, which is #included here.
14 //===----------------------------------------------------------------------===//
17 #include "AArch64Subtarget.h"
18 #include "AArch64TargetMachine.h"
19 #include "MCTargetDesc/AArch64AddressingModes.h"
20 #include "llvm/Analysis/BranchProbabilityInfo.h"
21 #include "llvm/CodeGen/CallingConvLower.h"
22 #include "llvm/CodeGen/FastISel.h"
23 #include "llvm/CodeGen/FunctionLoweringInfo.h"
24 #include "llvm/CodeGen/MachineConstantPool.h"
25 #include "llvm/CodeGen/MachineFrameInfo.h"
26 #include "llvm/CodeGen/MachineInstrBuilder.h"
27 #include "llvm/CodeGen/MachineRegisterInfo.h"
28 #include "llvm/IR/CallingConv.h"
29 #include "llvm/IR/DataLayout.h"
30 #include "llvm/IR/DerivedTypes.h"
31 #include "llvm/IR/Function.h"
32 #include "llvm/IR/GetElementPtrTypeIterator.h"
33 #include "llvm/IR/GlobalAlias.h"
34 #include "llvm/IR/GlobalVariable.h"
35 #include "llvm/IR/Instructions.h"
36 #include "llvm/IR/IntrinsicInst.h"
37 #include "llvm/IR/Operator.h"
38 #include "llvm/Support/CommandLine.h"
43 class AArch64FastISel final : public FastISel {
53 AArch64_AM::ShiftExtendType ExtType;
61 const GlobalValue *GV;
64 Address() : Kind(RegBase), ExtType(AArch64_AM::InvalidShiftExtend),
65 OffsetReg(0), Shift(0), Offset(0), GV(nullptr) { Base.Reg = 0; }
66 void setKind(BaseKind K) { Kind = K; }
67 BaseKind getKind() const { return Kind; }
68 void setExtendType(AArch64_AM::ShiftExtendType E) { ExtType = E; }
69 AArch64_AM::ShiftExtendType getExtendType() const { return ExtType; }
70 bool isRegBase() const { return Kind == RegBase; }
71 bool isFIBase() const { return Kind == FrameIndexBase; }
72 void setReg(unsigned Reg) {
73 assert(isRegBase() && "Invalid base register access!");
76 unsigned getReg() const {
77 assert(isRegBase() && "Invalid base register access!");
80 void setOffsetReg(unsigned Reg) {
83 unsigned getOffsetReg() const {
86 void setFI(unsigned FI) {
87 assert(isFIBase() && "Invalid base frame index access!");
90 unsigned getFI() const {
91 assert(isFIBase() && "Invalid base frame index access!");
94 void setOffset(int64_t O) { Offset = O; }
95 int64_t getOffset() { return Offset; }
96 void setShift(unsigned S) { Shift = S; }
97 unsigned getShift() { return Shift; }
99 void setGlobalValue(const GlobalValue *G) { GV = G; }
100 const GlobalValue *getGlobalValue() { return GV; }
103 /// Subtarget - Keep a pointer to the AArch64Subtarget around so that we can
104 /// make the right decision when generating code for different targets.
105 const AArch64Subtarget *Subtarget;
106 LLVMContext *Context;
108 bool fastLowerArguments() override;
109 bool fastLowerCall(CallLoweringInfo &CLI) override;
110 bool fastLowerIntrinsicCall(const IntrinsicInst *II) override;
113 // Selection routines.
114 bool selectAddSub(const Instruction *I);
115 bool selectLogicalOp(const Instruction *I);
116 bool selectLoad(const Instruction *I);
117 bool selectStore(const Instruction *I);
118 bool selectBranch(const Instruction *I);
119 bool selectIndirectBr(const Instruction *I);
120 bool selectCmp(const Instruction *I);
121 bool selectSelect(const Instruction *I);
122 bool selectFPExt(const Instruction *I);
123 bool selectFPTrunc(const Instruction *I);
124 bool selectFPToInt(const Instruction *I, bool Signed);
125 bool selectIntToFP(const Instruction *I, bool Signed);
126 bool selectRem(const Instruction *I, unsigned ISDOpcode);
127 bool selectRet(const Instruction *I);
128 bool selectTrunc(const Instruction *I);
129 bool selectIntExt(const Instruction *I);
130 bool selectMul(const Instruction *I);
131 bool selectShift(const Instruction *I);
132 bool selectBitCast(const Instruction *I);
133 bool selectFRem(const Instruction *I);
134 bool selectSDiv(const Instruction *I);
135 bool selectGetElementPtr(const Instruction *I);
137 // Utility helper routines.
138 bool isTypeLegal(Type *Ty, MVT &VT);
139 bool isTypeSupported(Type *Ty, MVT &VT, bool IsVectorAllowed = false);
140 bool isValueAvailable(const Value *V) const;
141 bool computeAddress(const Value *Obj, Address &Addr, Type *Ty = nullptr);
142 bool computeCallAddress(const Value *V, Address &Addr);
143 bool simplifyAddress(Address &Addr, MVT VT);
144 void addLoadStoreOperands(Address &Addr, const MachineInstrBuilder &MIB,
145 unsigned Flags, unsigned ScaleFactor,
146 MachineMemOperand *MMO);
147 bool isMemCpySmall(uint64_t Len, unsigned Alignment);
148 bool tryEmitSmallMemCpy(Address Dest, Address Src, uint64_t Len,
150 bool foldXALUIntrinsic(AArch64CC::CondCode &CC, const Instruction *I,
152 bool optimizeIntExtLoad(const Instruction *I, MVT RetVT, MVT SrcVT);
153 bool optimizeSelect(const SelectInst *SI);
155 // Emit helper routines.
156 unsigned emitAddSub(bool UseAdd, MVT RetVT, const Value *LHS,
157 const Value *RHS, bool SetFlags = false,
158 bool WantResult = true, bool IsZExt = false);
159 unsigned emitAddSub_rr(bool UseAdd, MVT RetVT, unsigned LHSReg,
160 bool LHSIsKill, unsigned RHSReg, bool RHSIsKill,
161 bool SetFlags = false, bool WantResult = true);
162 unsigned emitAddSub_ri(bool UseAdd, MVT RetVT, unsigned LHSReg,
163 bool LHSIsKill, uint64_t Imm, bool SetFlags = false,
164 bool WantResult = true);
165 unsigned emitAddSub_rs(bool UseAdd, MVT RetVT, unsigned LHSReg,
166 bool LHSIsKill, unsigned RHSReg, bool RHSIsKill,
167 AArch64_AM::ShiftExtendType ShiftType,
168 uint64_t ShiftImm, bool SetFlags = false,
169 bool WantResult = true);
170 unsigned emitAddSub_rx(bool UseAdd, MVT RetVT, unsigned LHSReg,
171 bool LHSIsKill, unsigned RHSReg, bool RHSIsKill,
172 AArch64_AM::ShiftExtendType ExtType,
173 uint64_t ShiftImm, bool SetFlags = false,
174 bool WantResult = true);
177 bool emitCompareAndBranch(const BranchInst *BI);
178 bool emitCmp(const Value *LHS, const Value *RHS, bool IsZExt);
179 bool emitICmp(MVT RetVT, const Value *LHS, const Value *RHS, bool IsZExt);
180 bool emitICmp_ri(MVT RetVT, unsigned LHSReg, bool LHSIsKill, uint64_t Imm);
181 bool emitFCmp(MVT RetVT, const Value *LHS, const Value *RHS);
182 unsigned emitLoad(MVT VT, MVT ResultVT, Address Addr, bool WantZExt = true,
183 MachineMemOperand *MMO = nullptr);
184 bool emitStore(MVT VT, unsigned SrcReg, Address Addr,
185 MachineMemOperand *MMO = nullptr);
186 unsigned emitIntExt(MVT SrcVT, unsigned SrcReg, MVT DestVT, bool isZExt);
187 unsigned emiti1Ext(unsigned SrcReg, MVT DestVT, bool isZExt);
188 unsigned emitAdd(MVT RetVT, const Value *LHS, const Value *RHS,
189 bool SetFlags = false, bool WantResult = true,
190 bool IsZExt = false);
191 unsigned emitAdd_ri_(MVT VT, unsigned Op0, bool Op0IsKill, int64_t Imm);
192 unsigned emitSub(MVT RetVT, const Value *LHS, const Value *RHS,
193 bool SetFlags = false, bool WantResult = true,
194 bool IsZExt = false);
195 unsigned emitSubs_rr(MVT RetVT, unsigned LHSReg, bool LHSIsKill,
196 unsigned RHSReg, bool RHSIsKill, bool WantResult = true);
197 unsigned emitSubs_rs(MVT RetVT, unsigned LHSReg, bool LHSIsKill,
198 unsigned RHSReg, bool RHSIsKill,
199 AArch64_AM::ShiftExtendType ShiftType, uint64_t ShiftImm,
200 bool WantResult = true);
201 unsigned emitLogicalOp(unsigned ISDOpc, MVT RetVT, const Value *LHS,
203 unsigned emitLogicalOp_ri(unsigned ISDOpc, MVT RetVT, unsigned LHSReg,
204 bool LHSIsKill, uint64_t Imm);
205 unsigned emitLogicalOp_rs(unsigned ISDOpc, MVT RetVT, unsigned LHSReg,
206 bool LHSIsKill, unsigned RHSReg, bool RHSIsKill,
208 unsigned emitAnd_ri(MVT RetVT, unsigned LHSReg, bool LHSIsKill, uint64_t Imm);
209 unsigned emitMul_rr(MVT RetVT, unsigned Op0, bool Op0IsKill,
210 unsigned Op1, bool Op1IsKill);
211 unsigned emitSMULL_rr(MVT RetVT, unsigned Op0, bool Op0IsKill,
212 unsigned Op1, bool Op1IsKill);
213 unsigned emitUMULL_rr(MVT RetVT, unsigned Op0, bool Op0IsKill,
214 unsigned Op1, bool Op1IsKill);
215 unsigned emitLSL_rr(MVT RetVT, unsigned Op0Reg, bool Op0IsKill,
216 unsigned Op1Reg, bool Op1IsKill);
217 unsigned emitLSL_ri(MVT RetVT, MVT SrcVT, unsigned Op0Reg, bool Op0IsKill,
218 uint64_t Imm, bool IsZExt = true);
219 unsigned emitLSR_rr(MVT RetVT, unsigned Op0Reg, bool Op0IsKill,
220 unsigned Op1Reg, bool Op1IsKill);
221 unsigned emitLSR_ri(MVT RetVT, MVT SrcVT, unsigned Op0Reg, bool Op0IsKill,
222 uint64_t Imm, bool IsZExt = true);
223 unsigned emitASR_rr(MVT RetVT, unsigned Op0Reg, bool Op0IsKill,
224 unsigned Op1Reg, bool Op1IsKill);
225 unsigned emitASR_ri(MVT RetVT, MVT SrcVT, unsigned Op0Reg, bool Op0IsKill,
226 uint64_t Imm, bool IsZExt = false);
228 unsigned materializeInt(const ConstantInt *CI, MVT VT);
229 unsigned materializeFP(const ConstantFP *CFP, MVT VT);
230 unsigned materializeGV(const GlobalValue *GV);
232 // Call handling routines.
234 CCAssignFn *CCAssignFnForCall(CallingConv::ID CC) const;
235 bool processCallArgs(CallLoweringInfo &CLI, SmallVectorImpl<MVT> &ArgVTs,
237 bool finishCall(CallLoweringInfo &CLI, MVT RetVT, unsigned NumBytes);
240 // Backend specific FastISel code.
241 unsigned fastMaterializeAlloca(const AllocaInst *AI) override;
242 unsigned fastMaterializeConstant(const Constant *C) override;
243 unsigned fastMaterializeFloatZero(const ConstantFP* CF) override;
245 explicit AArch64FastISel(FunctionLoweringInfo &FuncInfo,
246 const TargetLibraryInfo *LibInfo)
247 : FastISel(FuncInfo, LibInfo, /*SkipTargetIndependentISel=*/true) {
248 Subtarget = &TM.getSubtarget<AArch64Subtarget>();
249 Context = &FuncInfo.Fn->getContext();
252 bool fastSelectInstruction(const Instruction *I) override;
254 #include "AArch64GenFastISel.inc"
257 } // end anonymous namespace
259 #include "AArch64GenCallingConv.inc"
261 /// \brief Check if the sign-/zero-extend will be a noop.
262 static bool isIntExtFree(const Instruction *I) {
263 assert((isa<ZExtInst>(I) || isa<SExtInst>(I)) &&
264 "Unexpected integer extend instruction.");
265 assert(!I->getType()->isVectorTy() && I->getType()->isIntegerTy() &&
266 "Unexpected value type.");
267 bool IsZExt = isa<ZExtInst>(I);
269 if (const auto *LI = dyn_cast<LoadInst>(I->getOperand(0)))
273 if (const auto *Arg = dyn_cast<Argument>(I->getOperand(0)))
274 if ((IsZExt && Arg->hasZExtAttr()) || (!IsZExt && Arg->hasSExtAttr()))
280 /// \brief Determine the implicit scale factor that is applied by a memory
281 /// operation for a given value type.
282 static unsigned getImplicitScaleFactor(MVT VT) {
283 switch (VT.SimpleTy) {
286 case MVT::i1: // fall-through
291 case MVT::i32: // fall-through
294 case MVT::i64: // fall-through
300 CCAssignFn *AArch64FastISel::CCAssignFnForCall(CallingConv::ID CC) const {
301 if (CC == CallingConv::WebKit_JS)
302 return CC_AArch64_WebKit_JS;
303 return Subtarget->isTargetDarwin() ? CC_AArch64_DarwinPCS : CC_AArch64_AAPCS;
306 unsigned AArch64FastISel::fastMaterializeAlloca(const AllocaInst *AI) {
307 assert(TLI.getValueType(AI->getType(), true) == MVT::i64 &&
308 "Alloca should always return a pointer.");
310 // Don't handle dynamic allocas.
311 if (!FuncInfo.StaticAllocaMap.count(AI))
314 DenseMap<const AllocaInst *, int>::iterator SI =
315 FuncInfo.StaticAllocaMap.find(AI);
317 if (SI != FuncInfo.StaticAllocaMap.end()) {
318 unsigned ResultReg = createResultReg(&AArch64::GPR64spRegClass);
319 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADDXri),
321 .addFrameIndex(SI->second)
330 unsigned AArch64FastISel::materializeInt(const ConstantInt *CI, MVT VT) {
335 return fastEmit_i(VT, VT, ISD::Constant, CI->getZExtValue());
337 // Create a copy from the zero register to materialize a "0" value.
338 const TargetRegisterClass *RC = (VT == MVT::i64) ? &AArch64::GPR64RegClass
339 : &AArch64::GPR32RegClass;
340 unsigned ZeroReg = (VT == MVT::i64) ? AArch64::XZR : AArch64::WZR;
341 unsigned ResultReg = createResultReg(RC);
342 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(TargetOpcode::COPY),
343 ResultReg).addReg(ZeroReg, getKillRegState(true));
347 unsigned AArch64FastISel::materializeFP(const ConstantFP *CFP, MVT VT) {
348 // Positive zero (+0.0) has to be materialized with a fmov from the zero
349 // register, because the immediate version of fmov cannot encode zero.
350 if (CFP->isNullValue())
351 return fastMaterializeFloatZero(CFP);
353 if (VT != MVT::f32 && VT != MVT::f64)
356 const APFloat Val = CFP->getValueAPF();
357 bool Is64Bit = (VT == MVT::f64);
358 // This checks to see if we can use FMOV instructions to materialize
359 // a constant, otherwise we have to materialize via the constant pool.
360 if (TLI.isFPImmLegal(Val, VT)) {
362 Is64Bit ? AArch64_AM::getFP64Imm(Val) : AArch64_AM::getFP32Imm(Val);
363 assert((Imm != -1) && "Cannot encode floating-point constant.");
364 unsigned Opc = Is64Bit ? AArch64::FMOVDi : AArch64::FMOVSi;
365 return fastEmitInst_i(Opc, TLI.getRegClassFor(VT), Imm);
368 // Materialize via constant pool. MachineConstantPool wants an explicit
370 unsigned Align = DL.getPrefTypeAlignment(CFP->getType());
372 Align = DL.getTypeAllocSize(CFP->getType());
374 unsigned CPI = MCP.getConstantPoolIndex(cast<Constant>(CFP), Align);
375 unsigned ADRPReg = createResultReg(&AArch64::GPR64commonRegClass);
376 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADRP),
377 ADRPReg).addConstantPoolIndex(CPI, 0, AArch64II::MO_PAGE);
379 unsigned Opc = Is64Bit ? AArch64::LDRDui : AArch64::LDRSui;
380 unsigned ResultReg = createResultReg(TLI.getRegClassFor(VT));
381 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg)
383 .addConstantPoolIndex(CPI, 0, AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
387 unsigned AArch64FastISel::materializeGV(const GlobalValue *GV) {
388 // We can't handle thread-local variables quickly yet.
389 if (GV->isThreadLocal())
392 // MachO still uses GOT for large code-model accesses, but ELF requires
393 // movz/movk sequences, which FastISel doesn't handle yet.
394 if (TM.getCodeModel() != CodeModel::Small && !Subtarget->isTargetMachO())
397 unsigned char OpFlags = Subtarget->ClassifyGlobalReference(GV, TM);
399 EVT DestEVT = TLI.getValueType(GV->getType(), true);
400 if (!DestEVT.isSimple())
403 unsigned ADRPReg = createResultReg(&AArch64::GPR64commonRegClass);
406 if (OpFlags & AArch64II::MO_GOT) {
408 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADRP),
410 .addGlobalAddress(GV, 0, AArch64II::MO_GOT | AArch64II::MO_PAGE);
412 ResultReg = createResultReg(&AArch64::GPR64RegClass);
413 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::LDRXui),
416 .addGlobalAddress(GV, 0, AArch64II::MO_GOT | AArch64II::MO_PAGEOFF |
418 } else if (OpFlags & AArch64II::MO_CONSTPOOL) {
419 // We can't handle addresses loaded from a constant pool quickly yet.
423 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADRP),
425 .addGlobalAddress(GV, 0, AArch64II::MO_PAGE);
427 ResultReg = createResultReg(&AArch64::GPR64spRegClass);
428 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADDXri),
431 .addGlobalAddress(GV, 0, AArch64II::MO_PAGEOFF | AArch64II::MO_NC)
437 unsigned AArch64FastISel::fastMaterializeConstant(const Constant *C) {
438 EVT CEVT = TLI.getValueType(C->getType(), true);
440 // Only handle simple types.
441 if (!CEVT.isSimple())
443 MVT VT = CEVT.getSimpleVT();
445 if (const auto *CI = dyn_cast<ConstantInt>(C))
446 return materializeInt(CI, VT);
447 else if (const ConstantFP *CFP = dyn_cast<ConstantFP>(C))
448 return materializeFP(CFP, VT);
449 else if (const GlobalValue *GV = dyn_cast<GlobalValue>(C))
450 return materializeGV(GV);
455 unsigned AArch64FastISel::fastMaterializeFloatZero(const ConstantFP* CFP) {
456 assert(CFP->isNullValue() &&
457 "Floating-point constant is not a positive zero.");
459 if (!isTypeLegal(CFP->getType(), VT))
462 if (VT != MVT::f32 && VT != MVT::f64)
465 bool Is64Bit = (VT == MVT::f64);
466 unsigned ZReg = Is64Bit ? AArch64::XZR : AArch64::WZR;
467 unsigned Opc = Is64Bit ? AArch64::FMOVXDr : AArch64::FMOVWSr;
468 return fastEmitInst_r(Opc, TLI.getRegClassFor(VT), ZReg, /*IsKill=*/true);
471 /// \brief Check if the multiply is by a power-of-2 constant.
472 static bool isMulPowOf2(const Value *I) {
473 if (const auto *MI = dyn_cast<MulOperator>(I)) {
474 if (const auto *C = dyn_cast<ConstantInt>(MI->getOperand(0)))
475 if (C->getValue().isPowerOf2())
477 if (const auto *C = dyn_cast<ConstantInt>(MI->getOperand(1)))
478 if (C->getValue().isPowerOf2())
484 // Computes the address to get to an object.
485 bool AArch64FastISel::computeAddress(const Value *Obj, Address &Addr, Type *Ty)
487 const User *U = nullptr;
488 unsigned Opcode = Instruction::UserOp1;
489 if (const Instruction *I = dyn_cast<Instruction>(Obj)) {
490 // Don't walk into other basic blocks unless the object is an alloca from
491 // another block, otherwise it may not have a virtual register assigned.
492 if (FuncInfo.StaticAllocaMap.count(static_cast<const AllocaInst *>(Obj)) ||
493 FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB) {
494 Opcode = I->getOpcode();
497 } else if (const ConstantExpr *C = dyn_cast<ConstantExpr>(Obj)) {
498 Opcode = C->getOpcode();
502 if (const PointerType *Ty = dyn_cast<PointerType>(Obj->getType()))
503 if (Ty->getAddressSpace() > 255)
504 // Fast instruction selection doesn't support the special
511 case Instruction::BitCast: {
512 // Look through bitcasts.
513 return computeAddress(U->getOperand(0), Addr, Ty);
515 case Instruction::IntToPtr: {
516 // Look past no-op inttoptrs.
517 if (TLI.getValueType(U->getOperand(0)->getType()) == TLI.getPointerTy())
518 return computeAddress(U->getOperand(0), Addr, Ty);
521 case Instruction::PtrToInt: {
522 // Look past no-op ptrtoints.
523 if (TLI.getValueType(U->getType()) == TLI.getPointerTy())
524 return computeAddress(U->getOperand(0), Addr, Ty);
527 case Instruction::GetElementPtr: {
528 Address SavedAddr = Addr;
529 uint64_t TmpOffset = Addr.getOffset();
531 // Iterate through the GEP folding the constants into offsets where
533 gep_type_iterator GTI = gep_type_begin(U);
534 for (User::const_op_iterator i = U->op_begin() + 1, e = U->op_end(); i != e;
536 const Value *Op = *i;
537 if (StructType *STy = dyn_cast<StructType>(*GTI)) {
538 const StructLayout *SL = DL.getStructLayout(STy);
539 unsigned Idx = cast<ConstantInt>(Op)->getZExtValue();
540 TmpOffset += SL->getElementOffset(Idx);
542 uint64_t S = DL.getTypeAllocSize(GTI.getIndexedType());
544 if (const ConstantInt *CI = dyn_cast<ConstantInt>(Op)) {
545 // Constant-offset addressing.
546 TmpOffset += CI->getSExtValue() * S;
549 if (canFoldAddIntoGEP(U, Op)) {
550 // A compatible add with a constant operand. Fold the constant.
552 cast<ConstantInt>(cast<AddOperator>(Op)->getOperand(1));
553 TmpOffset += CI->getSExtValue() * S;
554 // Iterate on the other operand.
555 Op = cast<AddOperator>(Op)->getOperand(0);
559 goto unsupported_gep;
564 // Try to grab the base operand now.
565 Addr.setOffset(TmpOffset);
566 if (computeAddress(U->getOperand(0), Addr, Ty))
569 // We failed, restore everything and try the other options.
575 case Instruction::Alloca: {
576 const AllocaInst *AI = cast<AllocaInst>(Obj);
577 DenseMap<const AllocaInst *, int>::iterator SI =
578 FuncInfo.StaticAllocaMap.find(AI);
579 if (SI != FuncInfo.StaticAllocaMap.end()) {
580 Addr.setKind(Address::FrameIndexBase);
581 Addr.setFI(SI->second);
586 case Instruction::Add: {
587 // Adds of constants are common and easy enough.
588 const Value *LHS = U->getOperand(0);
589 const Value *RHS = U->getOperand(1);
591 if (isa<ConstantInt>(LHS))
594 if (const ConstantInt *CI = dyn_cast<ConstantInt>(RHS)) {
595 Addr.setOffset(Addr.getOffset() + CI->getSExtValue());
596 return computeAddress(LHS, Addr, Ty);
599 Address Backup = Addr;
600 if (computeAddress(LHS, Addr, Ty) && computeAddress(RHS, Addr, Ty))
606 case Instruction::Sub: {
607 // Subs of constants are common and easy enough.
608 const Value *LHS = U->getOperand(0);
609 const Value *RHS = U->getOperand(1);
611 if (const ConstantInt *CI = dyn_cast<ConstantInt>(RHS)) {
612 Addr.setOffset(Addr.getOffset() - CI->getSExtValue());
613 return computeAddress(LHS, Addr, Ty);
617 case Instruction::Shl: {
618 if (Addr.getOffsetReg())
621 const auto *CI = dyn_cast<ConstantInt>(U->getOperand(1));
625 unsigned Val = CI->getZExtValue();
626 if (Val < 1 || Val > 3)
629 uint64_t NumBytes = 0;
630 if (Ty && Ty->isSized()) {
631 uint64_t NumBits = DL.getTypeSizeInBits(Ty);
632 NumBytes = NumBits / 8;
633 if (!isPowerOf2_64(NumBits))
637 if (NumBytes != (1ULL << Val))
641 Addr.setExtendType(AArch64_AM::LSL);
643 const Value *Src = U->getOperand(0);
644 if (const auto *I = dyn_cast<Instruction>(Src))
645 if (FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB)
648 // Fold the zext or sext when it won't become a noop.
649 if (const auto *ZE = dyn_cast<ZExtInst>(Src)) {
650 if (!isIntExtFree(ZE) && ZE->getOperand(0)->getType()->isIntegerTy(32)) {
651 Addr.setExtendType(AArch64_AM::UXTW);
652 Src = ZE->getOperand(0);
654 } else if (const auto *SE = dyn_cast<SExtInst>(Src)) {
655 if (!isIntExtFree(SE) && SE->getOperand(0)->getType()->isIntegerTy(32)) {
656 Addr.setExtendType(AArch64_AM::SXTW);
657 Src = SE->getOperand(0);
661 if (const auto *AI = dyn_cast<BinaryOperator>(Src))
662 if (AI->getOpcode() == Instruction::And) {
663 const Value *LHS = AI->getOperand(0);
664 const Value *RHS = AI->getOperand(1);
666 if (const auto *C = dyn_cast<ConstantInt>(LHS))
667 if (C->getValue() == 0xffffffff)
670 if (const auto *C = dyn_cast<ConstantInt>(RHS))
671 if (C->getValue() == 0xffffffff) {
672 Addr.setExtendType(AArch64_AM::UXTW);
673 unsigned Reg = getRegForValue(LHS);
676 bool RegIsKill = hasTrivialKill(LHS);
677 Reg = fastEmitInst_extractsubreg(MVT::i32, Reg, RegIsKill,
679 Addr.setOffsetReg(Reg);
684 unsigned Reg = getRegForValue(Src);
687 Addr.setOffsetReg(Reg);
690 case Instruction::Mul: {
691 if (Addr.getOffsetReg())
697 const Value *LHS = U->getOperand(0);
698 const Value *RHS = U->getOperand(1);
700 // Canonicalize power-of-2 value to the RHS.
701 if (const auto *C = dyn_cast<ConstantInt>(LHS))
702 if (C->getValue().isPowerOf2())
705 assert(isa<ConstantInt>(RHS) && "Expected an ConstantInt.");
706 const auto *C = cast<ConstantInt>(RHS);
707 unsigned Val = C->getValue().logBase2();
708 if (Val < 1 || Val > 3)
711 uint64_t NumBytes = 0;
712 if (Ty && Ty->isSized()) {
713 uint64_t NumBits = DL.getTypeSizeInBits(Ty);
714 NumBytes = NumBits / 8;
715 if (!isPowerOf2_64(NumBits))
719 if (NumBytes != (1ULL << Val))
723 Addr.setExtendType(AArch64_AM::LSL);
725 const Value *Src = LHS;
726 if (const auto *I = dyn_cast<Instruction>(Src))
727 if (FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB)
731 // Fold the zext or sext when it won't become a noop.
732 if (const auto *ZE = dyn_cast<ZExtInst>(Src)) {
733 if (!isIntExtFree(ZE) && ZE->getOperand(0)->getType()->isIntegerTy(32)) {
734 Addr.setExtendType(AArch64_AM::UXTW);
735 Src = ZE->getOperand(0);
737 } else if (const auto *SE = dyn_cast<SExtInst>(Src)) {
738 if (!isIntExtFree(SE) && SE->getOperand(0)->getType()->isIntegerTy(32)) {
739 Addr.setExtendType(AArch64_AM::SXTW);
740 Src = SE->getOperand(0);
744 unsigned Reg = getRegForValue(Src);
747 Addr.setOffsetReg(Reg);
750 case Instruction::And: {
751 if (Addr.getOffsetReg())
754 if (DL.getTypeSizeInBits(Ty) != 8)
757 const Value *LHS = U->getOperand(0);
758 const Value *RHS = U->getOperand(1);
760 if (const auto *C = dyn_cast<ConstantInt>(LHS))
761 if (C->getValue() == 0xffffffff)
764 if (const auto *C = dyn_cast<ConstantInt>(RHS))
765 if (C->getValue() == 0xffffffff) {
767 Addr.setExtendType(AArch64_AM::LSL);
768 Addr.setExtendType(AArch64_AM::UXTW);
770 unsigned Reg = getRegForValue(LHS);
773 bool RegIsKill = hasTrivialKill(LHS);
774 Reg = fastEmitInst_extractsubreg(MVT::i32, Reg, RegIsKill,
776 Addr.setOffsetReg(Reg);
781 case Instruction::SExt:
782 case Instruction::ZExt: {
783 if (!Addr.getReg() || Addr.getOffsetReg())
786 const Value *Src = nullptr;
787 // Fold the zext or sext when it won't become a noop.
788 if (const auto *ZE = dyn_cast<ZExtInst>(U)) {
789 if (!isIntExtFree(ZE) && ZE->getOperand(0)->getType()->isIntegerTy(32)) {
790 Addr.setExtendType(AArch64_AM::UXTW);
791 Src = ZE->getOperand(0);
793 } else if (const auto *SE = dyn_cast<SExtInst>(U)) {
794 if (!isIntExtFree(SE) && SE->getOperand(0)->getType()->isIntegerTy(32)) {
795 Addr.setExtendType(AArch64_AM::SXTW);
796 Src = SE->getOperand(0);
804 unsigned Reg = getRegForValue(Src);
807 Addr.setOffsetReg(Reg);
812 if (Addr.isRegBase() && !Addr.getReg()) {
813 unsigned Reg = getRegForValue(Obj);
820 if (!Addr.getOffsetReg()) {
821 unsigned Reg = getRegForValue(Obj);
824 Addr.setOffsetReg(Reg);
831 bool AArch64FastISel::computeCallAddress(const Value *V, Address &Addr) {
832 const User *U = nullptr;
833 unsigned Opcode = Instruction::UserOp1;
836 if (const auto *I = dyn_cast<Instruction>(V)) {
837 Opcode = I->getOpcode();
839 InMBB = I->getParent() == FuncInfo.MBB->getBasicBlock();
840 } else if (const auto *C = dyn_cast<ConstantExpr>(V)) {
841 Opcode = C->getOpcode();
847 case Instruction::BitCast:
848 // Look past bitcasts if its operand is in the same BB.
850 return computeCallAddress(U->getOperand(0), Addr);
852 case Instruction::IntToPtr:
853 // Look past no-op inttoptrs if its operand is in the same BB.
855 TLI.getValueType(U->getOperand(0)->getType()) == TLI.getPointerTy())
856 return computeCallAddress(U->getOperand(0), Addr);
858 case Instruction::PtrToInt:
859 // Look past no-op ptrtoints if its operand is in the same BB.
861 TLI.getValueType(U->getType()) == TLI.getPointerTy())
862 return computeCallAddress(U->getOperand(0), Addr);
866 if (const GlobalValue *GV = dyn_cast<GlobalValue>(V)) {
867 Addr.setGlobalValue(GV);
871 // If all else fails, try to materialize the value in a register.
872 if (!Addr.getGlobalValue()) {
873 Addr.setReg(getRegForValue(V));
874 return Addr.getReg() != 0;
881 bool AArch64FastISel::isTypeLegal(Type *Ty, MVT &VT) {
882 EVT evt = TLI.getValueType(Ty, true);
884 // Only handle simple types.
885 if (evt == MVT::Other || !evt.isSimple())
887 VT = evt.getSimpleVT();
889 // This is a legal type, but it's not something we handle in fast-isel.
893 // Handle all other legal types, i.e. a register that will directly hold this
895 return TLI.isTypeLegal(VT);
898 /// \brief Determine if the value type is supported by FastISel.
900 /// FastISel for AArch64 can handle more value types than are legal. This adds
901 /// simple value type such as i1, i8, and i16.
902 bool AArch64FastISel::isTypeSupported(Type *Ty, MVT &VT, bool IsVectorAllowed) {
903 if (Ty->isVectorTy() && !IsVectorAllowed)
906 if (isTypeLegal(Ty, VT))
909 // If this is a type than can be sign or zero-extended to a basic operation
910 // go ahead and accept it now.
911 if (VT == MVT::i1 || VT == MVT::i8 || VT == MVT::i16)
917 bool AArch64FastISel::isValueAvailable(const Value *V) const {
918 if (!isa<Instruction>(V))
921 const auto *I = cast<Instruction>(V);
922 if (FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB)
928 bool AArch64FastISel::simplifyAddress(Address &Addr, MVT VT) {
929 unsigned ScaleFactor = getImplicitScaleFactor(VT);
933 bool ImmediateOffsetNeedsLowering = false;
934 bool RegisterOffsetNeedsLowering = false;
935 int64_t Offset = Addr.getOffset();
936 if (((Offset < 0) || (Offset & (ScaleFactor - 1))) && !isInt<9>(Offset))
937 ImmediateOffsetNeedsLowering = true;
938 else if (Offset > 0 && !(Offset & (ScaleFactor - 1)) &&
939 !isUInt<12>(Offset / ScaleFactor))
940 ImmediateOffsetNeedsLowering = true;
942 // Cannot encode an offset register and an immediate offset in the same
943 // instruction. Fold the immediate offset into the load/store instruction and
944 // emit an additonal add to take care of the offset register.
945 if (!ImmediateOffsetNeedsLowering && Addr.getOffset() && Addr.getOffsetReg())
946 RegisterOffsetNeedsLowering = true;
948 // Cannot encode zero register as base.
949 if (Addr.isRegBase() && Addr.getOffsetReg() && !Addr.getReg())
950 RegisterOffsetNeedsLowering = true;
952 // If this is a stack pointer and the offset needs to be simplified then put
953 // the alloca address into a register, set the base type back to register and
954 // continue. This should almost never happen.
955 if ((ImmediateOffsetNeedsLowering || Addr.getOffsetReg()) && Addr.isFIBase())
957 unsigned ResultReg = createResultReg(&AArch64::GPR64spRegClass);
958 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADDXri),
960 .addFrameIndex(Addr.getFI())
963 Addr.setKind(Address::RegBase);
964 Addr.setReg(ResultReg);
967 if (RegisterOffsetNeedsLowering) {
968 unsigned ResultReg = 0;
970 if (Addr.getExtendType() == AArch64_AM::SXTW ||
971 Addr.getExtendType() == AArch64_AM::UXTW )
972 ResultReg = emitAddSub_rx(/*UseAdd=*/true, MVT::i64, Addr.getReg(),
973 /*TODO:IsKill=*/false, Addr.getOffsetReg(),
974 /*TODO:IsKill=*/false, Addr.getExtendType(),
977 ResultReg = emitAddSub_rs(/*UseAdd=*/true, MVT::i64, Addr.getReg(),
978 /*TODO:IsKill=*/false, Addr.getOffsetReg(),
979 /*TODO:IsKill=*/false, AArch64_AM::LSL,
982 if (Addr.getExtendType() == AArch64_AM::UXTW)
983 ResultReg = emitLSL_ri(MVT::i64, MVT::i32, Addr.getOffsetReg(),
984 /*Op0IsKill=*/false, Addr.getShift(),
986 else if (Addr.getExtendType() == AArch64_AM::SXTW)
987 ResultReg = emitLSL_ri(MVT::i64, MVT::i32, Addr.getOffsetReg(),
988 /*Op0IsKill=*/false, Addr.getShift(),
991 ResultReg = emitLSL_ri(MVT::i64, MVT::i64, Addr.getOffsetReg(),
992 /*Op0IsKill=*/false, Addr.getShift());
997 Addr.setReg(ResultReg);
998 Addr.setOffsetReg(0);
1000 Addr.setExtendType(AArch64_AM::InvalidShiftExtend);
1003 // Since the offset is too large for the load/store instruction get the
1004 // reg+offset into a register.
1005 if (ImmediateOffsetNeedsLowering) {
1008 // Try to fold the immediate into the add instruction.
1009 ResultReg = emitAdd_ri_(MVT::i64, Addr.getReg(), /*IsKill=*/false, Offset);
1011 ResultReg = fastEmit_i(MVT::i64, MVT::i64, ISD::Constant, Offset);
1015 Addr.setReg(ResultReg);
1021 void AArch64FastISel::addLoadStoreOperands(Address &Addr,
1022 const MachineInstrBuilder &MIB,
1024 unsigned ScaleFactor,
1025 MachineMemOperand *MMO) {
1026 int64_t Offset = Addr.getOffset() / ScaleFactor;
1027 // Frame base works a bit differently. Handle it separately.
1028 if (Addr.isFIBase()) {
1029 int FI = Addr.getFI();
1030 // FIXME: We shouldn't be using getObjectSize/getObjectAlignment. The size
1031 // and alignment should be based on the VT.
1032 MMO = FuncInfo.MF->getMachineMemOperand(
1033 MachinePointerInfo::getFixedStack(FI, Offset), Flags,
1034 MFI.getObjectSize(FI), MFI.getObjectAlignment(FI));
1035 // Now add the rest of the operands.
1036 MIB.addFrameIndex(FI).addImm(Offset);
1038 assert(Addr.isRegBase() && "Unexpected address kind.");
1039 const MCInstrDesc &II = MIB->getDesc();
1040 unsigned Idx = (Flags & MachineMemOperand::MOStore) ? 1 : 0;
1042 constrainOperandRegClass(II, Addr.getReg(), II.getNumDefs()+Idx));
1044 constrainOperandRegClass(II, Addr.getOffsetReg(), II.getNumDefs()+Idx+1));
1045 if (Addr.getOffsetReg()) {
1046 assert(Addr.getOffset() == 0 && "Unexpected offset");
1047 bool IsSigned = Addr.getExtendType() == AArch64_AM::SXTW ||
1048 Addr.getExtendType() == AArch64_AM::SXTX;
1049 MIB.addReg(Addr.getReg());
1050 MIB.addReg(Addr.getOffsetReg());
1051 MIB.addImm(IsSigned);
1052 MIB.addImm(Addr.getShift() != 0);
1054 MIB.addReg(Addr.getReg()).addImm(Offset);
1058 MIB.addMemOperand(MMO);
1061 unsigned AArch64FastISel::emitAddSub(bool UseAdd, MVT RetVT, const Value *LHS,
1062 const Value *RHS, bool SetFlags,
1063 bool WantResult, bool IsZExt) {
1064 AArch64_AM::ShiftExtendType ExtendType = AArch64_AM::InvalidShiftExtend;
1065 bool NeedExtend = false;
1066 switch (RetVT.SimpleTy) {
1074 ExtendType = IsZExt ? AArch64_AM::UXTB : AArch64_AM::SXTB;
1078 ExtendType = IsZExt ? AArch64_AM::UXTH : AArch64_AM::SXTH;
1080 case MVT::i32: // fall-through
1085 RetVT.SimpleTy = std::max(RetVT.SimpleTy, MVT::i32);
1087 // Canonicalize immediates to the RHS first.
1088 if (UseAdd && isa<Constant>(LHS) && !isa<Constant>(RHS))
1089 std::swap(LHS, RHS);
1091 // Canonicalize mul by power of 2 to the RHS.
1092 if (UseAdd && LHS->hasOneUse() && isValueAvailable(LHS))
1093 if (isMulPowOf2(LHS))
1094 std::swap(LHS, RHS);
1096 // Canonicalize shift immediate to the RHS.
1097 if (UseAdd && LHS->hasOneUse() && isValueAvailable(LHS))
1098 if (const auto *SI = dyn_cast<BinaryOperator>(LHS))
1099 if (isa<ConstantInt>(SI->getOperand(1)))
1100 if (SI->getOpcode() == Instruction::Shl ||
1101 SI->getOpcode() == Instruction::LShr ||
1102 SI->getOpcode() == Instruction::AShr )
1103 std::swap(LHS, RHS);
1105 unsigned LHSReg = getRegForValue(LHS);
1108 bool LHSIsKill = hasTrivialKill(LHS);
1111 LHSReg = emitIntExt(SrcVT, LHSReg, RetVT, IsZExt);
1113 unsigned ResultReg = 0;
1114 if (const auto *C = dyn_cast<ConstantInt>(RHS)) {
1115 uint64_t Imm = IsZExt ? C->getZExtValue() : C->getSExtValue();
1116 if (C->isNegative())
1117 ResultReg = emitAddSub_ri(!UseAdd, RetVT, LHSReg, LHSIsKill, -Imm,
1118 SetFlags, WantResult);
1120 ResultReg = emitAddSub_ri(UseAdd, RetVT, LHSReg, LHSIsKill, Imm, SetFlags,
1122 } else if (const auto *C = dyn_cast<Constant>(RHS))
1123 if (C->isNullValue())
1124 ResultReg = emitAddSub_ri(UseAdd, RetVT, LHSReg, LHSIsKill, 0, SetFlags,
1130 // Only extend the RHS within the instruction if there is a valid extend type.
1131 if (ExtendType != AArch64_AM::InvalidShiftExtend && RHS->hasOneUse() &&
1132 isValueAvailable(RHS)) {
1133 if (const auto *SI = dyn_cast<BinaryOperator>(RHS))
1134 if (const auto *C = dyn_cast<ConstantInt>(SI->getOperand(1)))
1135 if ((SI->getOpcode() == Instruction::Shl) && (C->getZExtValue() < 4)) {
1136 unsigned RHSReg = getRegForValue(SI->getOperand(0));
1139 bool RHSIsKill = hasTrivialKill(SI->getOperand(0));
1140 return emitAddSub_rx(UseAdd, RetVT, LHSReg, LHSIsKill, RHSReg,
1141 RHSIsKill, ExtendType, C->getZExtValue(),
1142 SetFlags, WantResult);
1144 unsigned RHSReg = getRegForValue(RHS);
1147 bool RHSIsKill = hasTrivialKill(RHS);
1148 return emitAddSub_rx(UseAdd, RetVT, LHSReg, LHSIsKill, RHSReg, RHSIsKill,
1149 ExtendType, 0, SetFlags, WantResult);
1152 // Check if the mul can be folded into the instruction.
1153 if (RHS->hasOneUse() && isValueAvailable(RHS))
1154 if (isMulPowOf2(RHS)) {
1155 const Value *MulLHS = cast<MulOperator>(RHS)->getOperand(0);
1156 const Value *MulRHS = cast<MulOperator>(RHS)->getOperand(1);
1158 if (const auto *C = dyn_cast<ConstantInt>(MulLHS))
1159 if (C->getValue().isPowerOf2())
1160 std::swap(MulLHS, MulRHS);
1162 assert(isa<ConstantInt>(MulRHS) && "Expected a ConstantInt.");
1163 uint64_t ShiftVal = cast<ConstantInt>(MulRHS)->getValue().logBase2();
1164 unsigned RHSReg = getRegForValue(MulLHS);
1167 bool RHSIsKill = hasTrivialKill(MulLHS);
1168 return emitAddSub_rs(UseAdd, RetVT, LHSReg, LHSIsKill, RHSReg, RHSIsKill,
1169 AArch64_AM::LSL, ShiftVal, SetFlags, WantResult);
1172 // Check if the shift can be folded into the instruction.
1173 if (RHS->hasOneUse() && isValueAvailable(RHS))
1174 if (const auto *SI = dyn_cast<BinaryOperator>(RHS)) {
1175 if (const auto *C = dyn_cast<ConstantInt>(SI->getOperand(1))) {
1176 AArch64_AM::ShiftExtendType ShiftType = AArch64_AM::InvalidShiftExtend;
1177 switch (SI->getOpcode()) {
1179 case Instruction::Shl: ShiftType = AArch64_AM::LSL; break;
1180 case Instruction::LShr: ShiftType = AArch64_AM::LSR; break;
1181 case Instruction::AShr: ShiftType = AArch64_AM::ASR; break;
1183 uint64_t ShiftVal = C->getZExtValue();
1184 if (ShiftType != AArch64_AM::InvalidShiftExtend) {
1185 unsigned RHSReg = getRegForValue(SI->getOperand(0));
1188 bool RHSIsKill = hasTrivialKill(SI->getOperand(0));
1189 return emitAddSub_rs(UseAdd, RetVT, LHSReg, LHSIsKill, RHSReg,
1190 RHSIsKill, ShiftType, ShiftVal, SetFlags,
1196 unsigned RHSReg = getRegForValue(RHS);
1199 bool RHSIsKill = hasTrivialKill(RHS);
1202 RHSReg = emitIntExt(SrcVT, RHSReg, RetVT, IsZExt);
1204 return emitAddSub_rr(UseAdd, RetVT, LHSReg, LHSIsKill, RHSReg, RHSIsKill,
1205 SetFlags, WantResult);
1208 unsigned AArch64FastISel::emitAddSub_rr(bool UseAdd, MVT RetVT, unsigned LHSReg,
1209 bool LHSIsKill, unsigned RHSReg,
1210 bool RHSIsKill, bool SetFlags,
1212 assert(LHSReg && RHSReg && "Invalid register number.");
1214 if (RetVT != MVT::i32 && RetVT != MVT::i64)
1217 static const unsigned OpcTable[2][2][2] = {
1218 { { AArch64::SUBWrr, AArch64::SUBXrr },
1219 { AArch64::ADDWrr, AArch64::ADDXrr } },
1220 { { AArch64::SUBSWrr, AArch64::SUBSXrr },
1221 { AArch64::ADDSWrr, AArch64::ADDSXrr } }
1223 bool Is64Bit = RetVT == MVT::i64;
1224 unsigned Opc = OpcTable[SetFlags][UseAdd][Is64Bit];
1225 const TargetRegisterClass *RC =
1226 Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
1229 ResultReg = createResultReg(RC);
1231 ResultReg = Is64Bit ? AArch64::XZR : AArch64::WZR;
1233 const MCInstrDesc &II = TII.get(Opc);
1234 LHSReg = constrainOperandRegClass(II, LHSReg, II.getNumDefs());
1235 RHSReg = constrainOperandRegClass(II, RHSReg, II.getNumDefs() + 1);
1236 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg)
1237 .addReg(LHSReg, getKillRegState(LHSIsKill))
1238 .addReg(RHSReg, getKillRegState(RHSIsKill));
1242 unsigned AArch64FastISel::emitAddSub_ri(bool UseAdd, MVT RetVT, unsigned LHSReg,
1243 bool LHSIsKill, uint64_t Imm,
1244 bool SetFlags, bool WantResult) {
1245 assert(LHSReg && "Invalid register number.");
1247 if (RetVT != MVT::i32 && RetVT != MVT::i64)
1251 if (isUInt<12>(Imm))
1253 else if ((Imm & 0xfff000) == Imm) {
1259 static const unsigned OpcTable[2][2][2] = {
1260 { { AArch64::SUBWri, AArch64::SUBXri },
1261 { AArch64::ADDWri, AArch64::ADDXri } },
1262 { { AArch64::SUBSWri, AArch64::SUBSXri },
1263 { AArch64::ADDSWri, AArch64::ADDSXri } }
1265 bool Is64Bit = RetVT == MVT::i64;
1266 unsigned Opc = OpcTable[SetFlags][UseAdd][Is64Bit];
1267 const TargetRegisterClass *RC;
1269 RC = Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
1271 RC = Is64Bit ? &AArch64::GPR64spRegClass : &AArch64::GPR32spRegClass;
1274 ResultReg = createResultReg(RC);
1276 ResultReg = Is64Bit ? AArch64::XZR : AArch64::WZR;
1278 const MCInstrDesc &II = TII.get(Opc);
1279 LHSReg = constrainOperandRegClass(II, LHSReg, II.getNumDefs());
1280 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg)
1281 .addReg(LHSReg, getKillRegState(LHSIsKill))
1283 .addImm(getShifterImm(AArch64_AM::LSL, ShiftImm));
1287 unsigned AArch64FastISel::emitAddSub_rs(bool UseAdd, MVT RetVT, unsigned LHSReg,
1288 bool LHSIsKill, unsigned RHSReg,
1290 AArch64_AM::ShiftExtendType ShiftType,
1291 uint64_t ShiftImm, bool SetFlags,
1293 assert(LHSReg && RHSReg && "Invalid register number.");
1295 if (RetVT != MVT::i32 && RetVT != MVT::i64)
1298 static const unsigned OpcTable[2][2][2] = {
1299 { { AArch64::SUBWrs, AArch64::SUBXrs },
1300 { AArch64::ADDWrs, AArch64::ADDXrs } },
1301 { { AArch64::SUBSWrs, AArch64::SUBSXrs },
1302 { AArch64::ADDSWrs, AArch64::ADDSXrs } }
1304 bool Is64Bit = RetVT == MVT::i64;
1305 unsigned Opc = OpcTable[SetFlags][UseAdd][Is64Bit];
1306 const TargetRegisterClass *RC =
1307 Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
1310 ResultReg = createResultReg(RC);
1312 ResultReg = Is64Bit ? AArch64::XZR : AArch64::WZR;
1314 const MCInstrDesc &II = TII.get(Opc);
1315 LHSReg = constrainOperandRegClass(II, LHSReg, II.getNumDefs());
1316 RHSReg = constrainOperandRegClass(II, RHSReg, II.getNumDefs() + 1);
1317 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg)
1318 .addReg(LHSReg, getKillRegState(LHSIsKill))
1319 .addReg(RHSReg, getKillRegState(RHSIsKill))
1320 .addImm(getShifterImm(ShiftType, ShiftImm));
1324 unsigned AArch64FastISel::emitAddSub_rx(bool UseAdd, MVT RetVT, unsigned LHSReg,
1325 bool LHSIsKill, unsigned RHSReg,
1327 AArch64_AM::ShiftExtendType ExtType,
1328 uint64_t ShiftImm, bool SetFlags,
1330 assert(LHSReg && RHSReg && "Invalid register number.");
1332 if (RetVT != MVT::i32 && RetVT != MVT::i64)
1335 static const unsigned OpcTable[2][2][2] = {
1336 { { AArch64::SUBWrx, AArch64::SUBXrx },
1337 { AArch64::ADDWrx, AArch64::ADDXrx } },
1338 { { AArch64::SUBSWrx, AArch64::SUBSXrx },
1339 { AArch64::ADDSWrx, AArch64::ADDSXrx } }
1341 bool Is64Bit = RetVT == MVT::i64;
1342 unsigned Opc = OpcTable[SetFlags][UseAdd][Is64Bit];
1343 const TargetRegisterClass *RC = nullptr;
1345 RC = Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
1347 RC = Is64Bit ? &AArch64::GPR64spRegClass : &AArch64::GPR32spRegClass;
1350 ResultReg = createResultReg(RC);
1352 ResultReg = Is64Bit ? AArch64::XZR : AArch64::WZR;
1354 const MCInstrDesc &II = TII.get(Opc);
1355 LHSReg = constrainOperandRegClass(II, LHSReg, II.getNumDefs());
1356 RHSReg = constrainOperandRegClass(II, RHSReg, II.getNumDefs() + 1);
1357 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg)
1358 .addReg(LHSReg, getKillRegState(LHSIsKill))
1359 .addReg(RHSReg, getKillRegState(RHSIsKill))
1360 .addImm(getArithExtendImm(ExtType, ShiftImm));
1364 bool AArch64FastISel::emitCmp(const Value *LHS, const Value *RHS, bool IsZExt) {
1365 Type *Ty = LHS->getType();
1366 EVT EVT = TLI.getValueType(Ty, true);
1367 if (!EVT.isSimple())
1369 MVT VT = EVT.getSimpleVT();
1371 switch (VT.SimpleTy) {
1379 return emitICmp(VT, LHS, RHS, IsZExt);
1382 return emitFCmp(VT, LHS, RHS);
1386 bool AArch64FastISel::emitICmp(MVT RetVT, const Value *LHS, const Value *RHS,
1388 return emitSub(RetVT, LHS, RHS, /*SetFlags=*/true, /*WantResult=*/false,
1392 bool AArch64FastISel::emitICmp_ri(MVT RetVT, unsigned LHSReg, bool LHSIsKill,
1394 return emitAddSub_ri(/*UseAdd=*/false, RetVT, LHSReg, LHSIsKill, Imm,
1395 /*SetFlags=*/true, /*WantResult=*/false) != 0;
1398 bool AArch64FastISel::emitFCmp(MVT RetVT, const Value *LHS, const Value *RHS) {
1399 if (RetVT != MVT::f32 && RetVT != MVT::f64)
1402 // Check to see if the 2nd operand is a constant that we can encode directly
1404 bool UseImm = false;
1405 if (const auto *CFP = dyn_cast<ConstantFP>(RHS))
1406 if (CFP->isZero() && !CFP->isNegative())
1409 unsigned LHSReg = getRegForValue(LHS);
1412 bool LHSIsKill = hasTrivialKill(LHS);
1415 unsigned Opc = (RetVT == MVT::f64) ? AArch64::FCMPDri : AArch64::FCMPSri;
1416 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc))
1417 .addReg(LHSReg, getKillRegState(LHSIsKill));
1421 unsigned RHSReg = getRegForValue(RHS);
1424 bool RHSIsKill = hasTrivialKill(RHS);
1426 unsigned Opc = (RetVT == MVT::f64) ? AArch64::FCMPDrr : AArch64::FCMPSrr;
1427 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc))
1428 .addReg(LHSReg, getKillRegState(LHSIsKill))
1429 .addReg(RHSReg, getKillRegState(RHSIsKill));
1433 unsigned AArch64FastISel::emitAdd(MVT RetVT, const Value *LHS, const Value *RHS,
1434 bool SetFlags, bool WantResult, bool IsZExt) {
1435 return emitAddSub(/*UseAdd=*/true, RetVT, LHS, RHS, SetFlags, WantResult,
1439 /// \brief This method is a wrapper to simplify add emission.
1441 /// First try to emit an add with an immediate operand using emitAddSub_ri. If
1442 /// that fails, then try to materialize the immediate into a register and use
1443 /// emitAddSub_rr instead.
1444 unsigned AArch64FastISel::emitAdd_ri_(MVT VT, unsigned Op0, bool Op0IsKill,
1448 ResultReg = emitAddSub_ri(false, VT, Op0, Op0IsKill, -Imm);
1450 ResultReg = emitAddSub_ri(true, VT, Op0, Op0IsKill, Imm);
1455 unsigned CReg = fastEmit_i(VT, VT, ISD::Constant, Imm);
1459 ResultReg = emitAddSub_rr(true, VT, Op0, Op0IsKill, CReg, true);
1463 unsigned AArch64FastISel::emitSub(MVT RetVT, const Value *LHS, const Value *RHS,
1464 bool SetFlags, bool WantResult, bool IsZExt) {
1465 return emitAddSub(/*UseAdd=*/false, RetVT, LHS, RHS, SetFlags, WantResult,
1469 unsigned AArch64FastISel::emitSubs_rr(MVT RetVT, unsigned LHSReg,
1470 bool LHSIsKill, unsigned RHSReg,
1471 bool RHSIsKill, bool WantResult) {
1472 return emitAddSub_rr(/*UseAdd=*/false, RetVT, LHSReg, LHSIsKill, RHSReg,
1473 RHSIsKill, /*SetFlags=*/true, WantResult);
1476 unsigned AArch64FastISel::emitSubs_rs(MVT RetVT, unsigned LHSReg,
1477 bool LHSIsKill, unsigned RHSReg,
1479 AArch64_AM::ShiftExtendType ShiftType,
1480 uint64_t ShiftImm, bool WantResult) {
1481 return emitAddSub_rs(/*UseAdd=*/false, RetVT, LHSReg, LHSIsKill, RHSReg,
1482 RHSIsKill, ShiftType, ShiftImm, /*SetFlags=*/true,
1486 unsigned AArch64FastISel::emitLogicalOp(unsigned ISDOpc, MVT RetVT,
1487 const Value *LHS, const Value *RHS) {
1488 // Canonicalize immediates to the RHS first.
1489 if (isa<ConstantInt>(LHS) && !isa<ConstantInt>(RHS))
1490 std::swap(LHS, RHS);
1492 // Canonicalize mul by power-of-2 to the RHS.
1493 if (LHS->hasOneUse() && isValueAvailable(LHS))
1494 if (isMulPowOf2(LHS))
1495 std::swap(LHS, RHS);
1497 // Canonicalize shift immediate to the RHS.
1498 if (LHS->hasOneUse() && isValueAvailable(LHS))
1499 if (const auto *SI = dyn_cast<ShlOperator>(LHS))
1500 if (isa<ConstantInt>(SI->getOperand(1)))
1501 std::swap(LHS, RHS);
1503 unsigned LHSReg = getRegForValue(LHS);
1506 bool LHSIsKill = hasTrivialKill(LHS);
1508 unsigned ResultReg = 0;
1509 if (const auto *C = dyn_cast<ConstantInt>(RHS)) {
1510 uint64_t Imm = C->getZExtValue();
1511 ResultReg = emitLogicalOp_ri(ISDOpc, RetVT, LHSReg, LHSIsKill, Imm);
1516 // Check if the mul can be folded into the instruction.
1517 if (RHS->hasOneUse() && isValueAvailable(RHS))
1518 if (isMulPowOf2(RHS)) {
1519 const Value *MulLHS = cast<MulOperator>(RHS)->getOperand(0);
1520 const Value *MulRHS = cast<MulOperator>(RHS)->getOperand(1);
1522 if (const auto *C = dyn_cast<ConstantInt>(MulLHS))
1523 if (C->getValue().isPowerOf2())
1524 std::swap(MulLHS, MulRHS);
1526 assert(isa<ConstantInt>(MulRHS) && "Expected a ConstantInt.");
1527 uint64_t ShiftVal = cast<ConstantInt>(MulRHS)->getValue().logBase2();
1529 unsigned RHSReg = getRegForValue(MulLHS);
1532 bool RHSIsKill = hasTrivialKill(MulLHS);
1533 return emitLogicalOp_rs(ISDOpc, RetVT, LHSReg, LHSIsKill, RHSReg,
1534 RHSIsKill, ShiftVal);
1537 // Check if the shift can be folded into the instruction.
1538 if (RHS->hasOneUse() && isValueAvailable(RHS))
1539 if (const auto *SI = dyn_cast<ShlOperator>(RHS))
1540 if (const auto *C = dyn_cast<ConstantInt>(SI->getOperand(1))) {
1541 uint64_t ShiftVal = C->getZExtValue();
1542 unsigned RHSReg = getRegForValue(SI->getOperand(0));
1545 bool RHSIsKill = hasTrivialKill(SI->getOperand(0));
1546 return emitLogicalOp_rs(ISDOpc, RetVT, LHSReg, LHSIsKill, RHSReg,
1547 RHSIsKill, ShiftVal);
1550 unsigned RHSReg = getRegForValue(RHS);
1553 bool RHSIsKill = hasTrivialKill(RHS);
1555 MVT VT = std::max(MVT::i32, RetVT.SimpleTy);
1556 ResultReg = fastEmit_rr(VT, VT, ISDOpc, LHSReg, LHSIsKill, RHSReg, RHSIsKill);
1557 if (RetVT >= MVT::i8 && RetVT <= MVT::i16) {
1558 uint64_t Mask = (RetVT == MVT::i8) ? 0xff : 0xffff;
1559 ResultReg = emitAnd_ri(MVT::i32, ResultReg, /*IsKill=*/true, Mask);
1564 unsigned AArch64FastISel::emitLogicalOp_ri(unsigned ISDOpc, MVT RetVT,
1565 unsigned LHSReg, bool LHSIsKill,
1567 assert((ISD::AND + 1 == ISD::OR) && (ISD::AND + 2 == ISD::XOR) &&
1568 "ISD nodes are not consecutive!");
1569 static const unsigned OpcTable[3][2] = {
1570 { AArch64::ANDWri, AArch64::ANDXri },
1571 { AArch64::ORRWri, AArch64::ORRXri },
1572 { AArch64::EORWri, AArch64::EORXri }
1574 const TargetRegisterClass *RC;
1577 switch (RetVT.SimpleTy) {
1584 unsigned Idx = ISDOpc - ISD::AND;
1585 Opc = OpcTable[Idx][0];
1586 RC = &AArch64::GPR32spRegClass;
1591 Opc = OpcTable[ISDOpc - ISD::AND][1];
1592 RC = &AArch64::GPR64spRegClass;
1597 if (!AArch64_AM::isLogicalImmediate(Imm, RegSize))
1600 unsigned ResultReg =
1601 fastEmitInst_ri(Opc, RC, LHSReg, LHSIsKill,
1602 AArch64_AM::encodeLogicalImmediate(Imm, RegSize));
1603 if (RetVT >= MVT::i8 && RetVT <= MVT::i16 && ISDOpc != ISD::AND) {
1604 uint64_t Mask = (RetVT == MVT::i8) ? 0xff : 0xffff;
1605 ResultReg = emitAnd_ri(MVT::i32, ResultReg, /*IsKill=*/true, Mask);
1610 unsigned AArch64FastISel::emitLogicalOp_rs(unsigned ISDOpc, MVT RetVT,
1611 unsigned LHSReg, bool LHSIsKill,
1612 unsigned RHSReg, bool RHSIsKill,
1613 uint64_t ShiftImm) {
1614 assert((ISD::AND + 1 == ISD::OR) && (ISD::AND + 2 == ISD::XOR) &&
1615 "ISD nodes are not consecutive!");
1616 static const unsigned OpcTable[3][2] = {
1617 { AArch64::ANDWrs, AArch64::ANDXrs },
1618 { AArch64::ORRWrs, AArch64::ORRXrs },
1619 { AArch64::EORWrs, AArch64::EORXrs }
1621 const TargetRegisterClass *RC;
1623 switch (RetVT.SimpleTy) {
1630 Opc = OpcTable[ISDOpc - ISD::AND][0];
1631 RC = &AArch64::GPR32RegClass;
1634 Opc = OpcTable[ISDOpc - ISD::AND][1];
1635 RC = &AArch64::GPR64RegClass;
1638 unsigned ResultReg =
1639 fastEmitInst_rri(Opc, RC, LHSReg, LHSIsKill, RHSReg, RHSIsKill,
1640 AArch64_AM::getShifterImm(AArch64_AM::LSL, ShiftImm));
1641 if (RetVT >= MVT::i8 && RetVT <= MVT::i16) {
1642 uint64_t Mask = (RetVT == MVT::i8) ? 0xff : 0xffff;
1643 ResultReg = emitAnd_ri(MVT::i32, ResultReg, /*IsKill=*/true, Mask);
1648 unsigned AArch64FastISel::emitAnd_ri(MVT RetVT, unsigned LHSReg, bool LHSIsKill,
1650 return emitLogicalOp_ri(ISD::AND, RetVT, LHSReg, LHSIsKill, Imm);
1653 unsigned AArch64FastISel::emitLoad(MVT VT, MVT RetVT, Address Addr,
1654 bool WantZExt, MachineMemOperand *MMO) {
1655 // Simplify this down to something we can handle.
1656 if (!simplifyAddress(Addr, VT))
1659 unsigned ScaleFactor = getImplicitScaleFactor(VT);
1661 llvm_unreachable("Unexpected value type.");
1663 // Negative offsets require unscaled, 9-bit, signed immediate offsets.
1664 // Otherwise, we try using scaled, 12-bit, unsigned immediate offsets.
1665 bool UseScaled = true;
1666 if ((Addr.getOffset() < 0) || (Addr.getOffset() & (ScaleFactor - 1))) {
1671 static const unsigned GPOpcTable[2][8][4] = {
1673 { { AArch64::LDURSBWi, AArch64::LDURSHWi, AArch64::LDURWi,
1675 { AArch64::LDURSBXi, AArch64::LDURSHXi, AArch64::LDURSWi,
1677 { AArch64::LDRSBWui, AArch64::LDRSHWui, AArch64::LDRWui,
1679 { AArch64::LDRSBXui, AArch64::LDRSHXui, AArch64::LDRSWui,
1681 { AArch64::LDRSBWroX, AArch64::LDRSHWroX, AArch64::LDRWroX,
1683 { AArch64::LDRSBXroX, AArch64::LDRSHXroX, AArch64::LDRSWroX,
1685 { AArch64::LDRSBWroW, AArch64::LDRSHWroW, AArch64::LDRWroW,
1687 { AArch64::LDRSBXroW, AArch64::LDRSHXroW, AArch64::LDRSWroW,
1691 { { AArch64::LDURBBi, AArch64::LDURHHi, AArch64::LDURWi,
1693 { AArch64::LDURBBi, AArch64::LDURHHi, AArch64::LDURWi,
1695 { AArch64::LDRBBui, AArch64::LDRHHui, AArch64::LDRWui,
1697 { AArch64::LDRBBui, AArch64::LDRHHui, AArch64::LDRWui,
1699 { AArch64::LDRBBroX, AArch64::LDRHHroX, AArch64::LDRWroX,
1701 { AArch64::LDRBBroX, AArch64::LDRHHroX, AArch64::LDRWroX,
1703 { AArch64::LDRBBroW, AArch64::LDRHHroW, AArch64::LDRWroW,
1705 { AArch64::LDRBBroW, AArch64::LDRHHroW, AArch64::LDRWroW,
1710 static const unsigned FPOpcTable[4][2] = {
1711 { AArch64::LDURSi, AArch64::LDURDi },
1712 { AArch64::LDRSui, AArch64::LDRDui },
1713 { AArch64::LDRSroX, AArch64::LDRDroX },
1714 { AArch64::LDRSroW, AArch64::LDRDroW }
1718 const TargetRegisterClass *RC;
1719 bool UseRegOffset = Addr.isRegBase() && !Addr.getOffset() && Addr.getReg() &&
1720 Addr.getOffsetReg();
1721 unsigned Idx = UseRegOffset ? 2 : UseScaled ? 1 : 0;
1722 if (Addr.getExtendType() == AArch64_AM::UXTW ||
1723 Addr.getExtendType() == AArch64_AM::SXTW)
1726 bool IsRet64Bit = RetVT == MVT::i64;
1727 switch (VT.SimpleTy) {
1729 llvm_unreachable("Unexpected value type.");
1730 case MVT::i1: // Intentional fall-through.
1732 Opc = GPOpcTable[WantZExt][2 * Idx + IsRet64Bit][0];
1733 RC = (IsRet64Bit && !WantZExt) ?
1734 &AArch64::GPR64RegClass: &AArch64::GPR32RegClass;
1737 Opc = GPOpcTable[WantZExt][2 * Idx + IsRet64Bit][1];
1738 RC = (IsRet64Bit && !WantZExt) ?
1739 &AArch64::GPR64RegClass: &AArch64::GPR32RegClass;
1742 Opc = GPOpcTable[WantZExt][2 * Idx + IsRet64Bit][2];
1743 RC = (IsRet64Bit && !WantZExt) ?
1744 &AArch64::GPR64RegClass: &AArch64::GPR32RegClass;
1747 Opc = GPOpcTable[WantZExt][2 * Idx + IsRet64Bit][3];
1748 RC = &AArch64::GPR64RegClass;
1751 Opc = FPOpcTable[Idx][0];
1752 RC = &AArch64::FPR32RegClass;
1755 Opc = FPOpcTable[Idx][1];
1756 RC = &AArch64::FPR64RegClass;
1760 // Create the base instruction, then add the operands.
1761 unsigned ResultReg = createResultReg(RC);
1762 MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
1763 TII.get(Opc), ResultReg);
1764 addLoadStoreOperands(Addr, MIB, MachineMemOperand::MOLoad, ScaleFactor, MMO);
1766 // Loading an i1 requires special handling.
1767 if (VT == MVT::i1) {
1768 unsigned ANDReg = emitAnd_ri(MVT::i32, ResultReg, /*IsKill=*/true, 1);
1769 assert(ANDReg && "Unexpected AND instruction emission failure.");
1773 // For zero-extending loads to 64bit we emit a 32bit load and then convert
1774 // the 32bit reg to a 64bit reg.
1775 if (WantZExt && RetVT == MVT::i64 && VT <= MVT::i32) {
1776 unsigned Reg64 = createResultReg(&AArch64::GPR64RegClass);
1777 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
1778 TII.get(AArch64::SUBREG_TO_REG), Reg64)
1780 .addReg(ResultReg, getKillRegState(true))
1781 .addImm(AArch64::sub_32);
1787 bool AArch64FastISel::selectAddSub(const Instruction *I) {
1789 if (!isTypeSupported(I->getType(), VT, /*IsVectorAllowed=*/true))
1793 return selectOperator(I, I->getOpcode());
1796 switch (I->getOpcode()) {
1798 llvm_unreachable("Unexpected instruction.");
1799 case Instruction::Add:
1800 ResultReg = emitAdd(VT, I->getOperand(0), I->getOperand(1));
1802 case Instruction::Sub:
1803 ResultReg = emitSub(VT, I->getOperand(0), I->getOperand(1));
1809 updateValueMap(I, ResultReg);
1813 bool AArch64FastISel::selectLogicalOp(const Instruction *I) {
1815 if (!isTypeSupported(I->getType(), VT, /*IsVectorAllowed=*/true))
1819 return selectOperator(I, I->getOpcode());
1822 switch (I->getOpcode()) {
1824 llvm_unreachable("Unexpected instruction.");
1825 case Instruction::And:
1826 ResultReg = emitLogicalOp(ISD::AND, VT, I->getOperand(0), I->getOperand(1));
1828 case Instruction::Or:
1829 ResultReg = emitLogicalOp(ISD::OR, VT, I->getOperand(0), I->getOperand(1));
1831 case Instruction::Xor:
1832 ResultReg = emitLogicalOp(ISD::XOR, VT, I->getOperand(0), I->getOperand(1));
1838 updateValueMap(I, ResultReg);
1842 bool AArch64FastISel::selectLoad(const Instruction *I) {
1844 // Verify we have a legal type before going any further. Currently, we handle
1845 // simple types that will directly fit in a register (i32/f32/i64/f64) or
1846 // those that can be sign or zero-extended to a basic operation (i1/i8/i16).
1847 if (!isTypeSupported(I->getType(), VT, /*IsVectorAllowed=*/true) ||
1848 cast<LoadInst>(I)->isAtomic())
1851 // See if we can handle this address.
1853 if (!computeAddress(I->getOperand(0), Addr, I->getType()))
1856 // Fold the following sign-/zero-extend into the load instruction.
1857 bool WantZExt = true;
1859 const Value *IntExtVal = nullptr;
1860 if (I->hasOneUse()) {
1861 if (const auto *ZE = dyn_cast<ZExtInst>(I->use_begin()->getUser())) {
1862 if (isTypeSupported(ZE->getType(), RetVT))
1866 } else if (const auto *SE = dyn_cast<SExtInst>(I->use_begin()->getUser())) {
1867 if (isTypeSupported(SE->getType(), RetVT))
1875 unsigned ResultReg =
1876 emitLoad(VT, RetVT, Addr, WantZExt, createMachineMemOperandFor(I));
1880 // There are a few different cases we have to handle, because the load or the
1881 // sign-/zero-extend might not be selected by FastISel if we fall-back to
1882 // SelectionDAG. There is also an ordering issue when both instructions are in
1883 // different basic blocks.
1884 // 1.) The load instruction is selected by FastISel, but the integer extend
1885 // not. This usually happens when the integer extend is in a different
1886 // basic block and SelectionDAG took over for that basic block.
1887 // 2.) The load instruction is selected before the integer extend. This only
1888 // happens when the integer extend is in a different basic block.
1889 // 3.) The load instruction is selected by SelectionDAG and the integer extend
1890 // by FastISel. This happens if there are instructions between the load
1891 // and the integer extend that couldn't be selected by FastISel.
1893 // The integer extend hasn't been emitted yet. FastISel or SelectionDAG
1894 // could select it. Emit a copy to subreg if necessary. FastISel will remove
1895 // it when it selects the integer extend.
1896 unsigned Reg = lookUpRegForValue(IntExtVal);
1898 if (RetVT == MVT::i64 && VT <= MVT::i32) {
1900 // Delete the last emitted instruction from emitLoad (SUBREG_TO_REG).
1901 std::prev(FuncInfo.InsertPt)->eraseFromParent();
1902 ResultReg = std::prev(FuncInfo.InsertPt)->getOperand(0).getReg();
1904 ResultReg = fastEmitInst_extractsubreg(MVT::i32, ResultReg,
1908 updateValueMap(I, ResultReg);
1912 // The integer extend has already been emitted - delete all the instructions
1913 // that have been emitted by the integer extend lowering code and use the
1914 // result from the load instruction directly.
1916 auto *MI = MRI.getUniqueVRegDef(Reg);
1920 for (auto &Opnd : MI->uses()) {
1922 Reg = Opnd.getReg();
1926 MI->eraseFromParent();
1928 updateValueMap(IntExtVal, ResultReg);
1932 updateValueMap(I, ResultReg);
1936 bool AArch64FastISel::emitStore(MVT VT, unsigned SrcReg, Address Addr,
1937 MachineMemOperand *MMO) {
1938 // Simplify this down to something we can handle.
1939 if (!simplifyAddress(Addr, VT))
1942 unsigned ScaleFactor = getImplicitScaleFactor(VT);
1944 llvm_unreachable("Unexpected value type.");
1946 // Negative offsets require unscaled, 9-bit, signed immediate offsets.
1947 // Otherwise, we try using scaled, 12-bit, unsigned immediate offsets.
1948 bool UseScaled = true;
1949 if ((Addr.getOffset() < 0) || (Addr.getOffset() & (ScaleFactor - 1))) {
1954 static const unsigned OpcTable[4][6] = {
1955 { AArch64::STURBBi, AArch64::STURHHi, AArch64::STURWi, AArch64::STURXi,
1956 AArch64::STURSi, AArch64::STURDi },
1957 { AArch64::STRBBui, AArch64::STRHHui, AArch64::STRWui, AArch64::STRXui,
1958 AArch64::STRSui, AArch64::STRDui },
1959 { AArch64::STRBBroX, AArch64::STRHHroX, AArch64::STRWroX, AArch64::STRXroX,
1960 AArch64::STRSroX, AArch64::STRDroX },
1961 { AArch64::STRBBroW, AArch64::STRHHroW, AArch64::STRWroW, AArch64::STRXroW,
1962 AArch64::STRSroW, AArch64::STRDroW }
1966 bool VTIsi1 = false;
1967 bool UseRegOffset = Addr.isRegBase() && !Addr.getOffset() && Addr.getReg() &&
1968 Addr.getOffsetReg();
1969 unsigned Idx = UseRegOffset ? 2 : UseScaled ? 1 : 0;
1970 if (Addr.getExtendType() == AArch64_AM::UXTW ||
1971 Addr.getExtendType() == AArch64_AM::SXTW)
1974 switch (VT.SimpleTy) {
1975 default: llvm_unreachable("Unexpected value type.");
1976 case MVT::i1: VTIsi1 = true;
1977 case MVT::i8: Opc = OpcTable[Idx][0]; break;
1978 case MVT::i16: Opc = OpcTable[Idx][1]; break;
1979 case MVT::i32: Opc = OpcTable[Idx][2]; break;
1980 case MVT::i64: Opc = OpcTable[Idx][3]; break;
1981 case MVT::f32: Opc = OpcTable[Idx][4]; break;
1982 case MVT::f64: Opc = OpcTable[Idx][5]; break;
1985 // Storing an i1 requires special handling.
1986 if (VTIsi1 && SrcReg != AArch64::WZR) {
1987 unsigned ANDReg = emitAnd_ri(MVT::i32, SrcReg, /*TODO:IsKill=*/false, 1);
1988 assert(ANDReg && "Unexpected AND instruction emission failure.");
1991 // Create the base instruction, then add the operands.
1992 const MCInstrDesc &II = TII.get(Opc);
1993 SrcReg = constrainOperandRegClass(II, SrcReg, II.getNumDefs());
1994 MachineInstrBuilder MIB =
1995 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II).addReg(SrcReg);
1996 addLoadStoreOperands(Addr, MIB, MachineMemOperand::MOStore, ScaleFactor, MMO);
2001 bool AArch64FastISel::selectStore(const Instruction *I) {
2003 const Value *Op0 = I->getOperand(0);
2004 // Verify we have a legal type before going any further. Currently, we handle
2005 // simple types that will directly fit in a register (i32/f32/i64/f64) or
2006 // those that can be sign or zero-extended to a basic operation (i1/i8/i16).
2007 if (!isTypeSupported(Op0->getType(), VT, /*IsVectorAllowed=*/true) ||
2008 cast<StoreInst>(I)->isAtomic())
2011 // Get the value to be stored into a register. Use the zero register directly
2012 // when possible to avoid an unnecessary copy and a wasted register.
2013 unsigned SrcReg = 0;
2014 if (const auto *CI = dyn_cast<ConstantInt>(Op0)) {
2016 SrcReg = (VT == MVT::i64) ? AArch64::XZR : AArch64::WZR;
2017 } else if (const auto *CF = dyn_cast<ConstantFP>(Op0)) {
2018 if (CF->isZero() && !CF->isNegative()) {
2019 VT = MVT::getIntegerVT(VT.getSizeInBits());
2020 SrcReg = (VT == MVT::i64) ? AArch64::XZR : AArch64::WZR;
2025 SrcReg = getRegForValue(Op0);
2030 // See if we can handle this address.
2032 if (!computeAddress(I->getOperand(1), Addr, I->getOperand(0)->getType()))
2035 if (!emitStore(VT, SrcReg, Addr, createMachineMemOperandFor(I)))
2040 static AArch64CC::CondCode getCompareCC(CmpInst::Predicate Pred) {
2042 case CmpInst::FCMP_ONE:
2043 case CmpInst::FCMP_UEQ:
2045 // AL is our "false" for now. The other two need more compares.
2046 return AArch64CC::AL;
2047 case CmpInst::ICMP_EQ:
2048 case CmpInst::FCMP_OEQ:
2049 return AArch64CC::EQ;
2050 case CmpInst::ICMP_SGT:
2051 case CmpInst::FCMP_OGT:
2052 return AArch64CC::GT;
2053 case CmpInst::ICMP_SGE:
2054 case CmpInst::FCMP_OGE:
2055 return AArch64CC::GE;
2056 case CmpInst::ICMP_UGT:
2057 case CmpInst::FCMP_UGT:
2058 return AArch64CC::HI;
2059 case CmpInst::FCMP_OLT:
2060 return AArch64CC::MI;
2061 case CmpInst::ICMP_ULE:
2062 case CmpInst::FCMP_OLE:
2063 return AArch64CC::LS;
2064 case CmpInst::FCMP_ORD:
2065 return AArch64CC::VC;
2066 case CmpInst::FCMP_UNO:
2067 return AArch64CC::VS;
2068 case CmpInst::FCMP_UGE:
2069 return AArch64CC::PL;
2070 case CmpInst::ICMP_SLT:
2071 case CmpInst::FCMP_ULT:
2072 return AArch64CC::LT;
2073 case CmpInst::ICMP_SLE:
2074 case CmpInst::FCMP_ULE:
2075 return AArch64CC::LE;
2076 case CmpInst::FCMP_UNE:
2077 case CmpInst::ICMP_NE:
2078 return AArch64CC::NE;
2079 case CmpInst::ICMP_UGE:
2080 return AArch64CC::HS;
2081 case CmpInst::ICMP_ULT:
2082 return AArch64CC::LO;
2086 /// \brief Try to emit a combined compare-and-branch instruction.
2087 bool AArch64FastISel::emitCompareAndBranch(const BranchInst *BI) {
2088 assert(isa<CmpInst>(BI->getCondition()) && "Expected cmp instruction");
2089 const CmpInst *CI = cast<CmpInst>(BI->getCondition());
2090 CmpInst::Predicate Predicate = optimizeCmpPredicate(CI);
2092 const Value *LHS = CI->getOperand(0);
2093 const Value *RHS = CI->getOperand(1);
2096 if (!isTypeSupported(LHS->getType(), VT))
2099 unsigned BW = VT.getSizeInBits();
2103 MachineBasicBlock *TBB = FuncInfo.MBBMap[BI->getSuccessor(0)];
2104 MachineBasicBlock *FBB = FuncInfo.MBBMap[BI->getSuccessor(1)];
2106 // Try to take advantage of fallthrough opportunities.
2107 if (FuncInfo.MBB->isLayoutSuccessor(TBB)) {
2108 std::swap(TBB, FBB);
2109 Predicate = CmpInst::getInversePredicate(Predicate);
2114 if ((Predicate == CmpInst::ICMP_EQ) || (Predicate == CmpInst::ICMP_NE)) {
2115 if (const auto *C = dyn_cast<Constant>(LHS))
2116 if (C->isNullValue())
2117 std::swap(LHS, RHS);
2119 if (!isa<Constant>(RHS))
2122 if (!cast<Constant>(RHS)->isNullValue())
2125 if (const auto *AI = dyn_cast<BinaryOperator>(LHS))
2126 if (AI->getOpcode() == Instruction::And && isValueAvailable(AI)) {
2127 const Value *AndLHS = AI->getOperand(0);
2128 const Value *AndRHS = AI->getOperand(1);
2130 if (const auto *C = dyn_cast<ConstantInt>(AndLHS))
2131 if (C->getValue().isPowerOf2())
2132 std::swap(AndLHS, AndRHS);
2134 if (const auto *C = dyn_cast<ConstantInt>(AndRHS))
2135 if (C->getValue().isPowerOf2()) {
2136 TestBit = C->getValue().logBase2();
2144 IsCmpNE = Predicate == CmpInst::ICMP_NE;
2145 } else if (Predicate == CmpInst::ICMP_SLT) {
2146 if (!isa<Constant>(RHS))
2149 if (!cast<Constant>(RHS)->isNullValue())
2154 } else if (Predicate == CmpInst::ICMP_SGT) {
2155 if (!isa<ConstantInt>(RHS))
2158 if (cast<ConstantInt>(RHS)->getValue() != -1)
2166 static const unsigned OpcTable[2][2][2] = {
2167 { {AArch64::CBZW, AArch64::CBZX },
2168 {AArch64::CBNZW, AArch64::CBNZX} },
2169 { {AArch64::TBZW, AArch64::TBZX },
2170 {AArch64::TBNZW, AArch64::TBNZX} }
2173 bool IsBitTest = TestBit != -1;
2174 bool Is64Bit = BW == 64;
2175 if (TestBit < 32 && TestBit >= 0)
2178 unsigned Opc = OpcTable[IsBitTest][IsCmpNE][Is64Bit];
2179 const MCInstrDesc &II = TII.get(Opc);
2181 unsigned SrcReg = getRegForValue(LHS);
2184 bool SrcIsKill = hasTrivialKill(LHS);
2186 if (BW == 64 && !Is64Bit)
2187 SrcReg = fastEmitInst_extractsubreg(MVT::i32, SrcReg, SrcIsKill,
2190 if ((BW < 32) && !IsBitTest)
2191 SrcReg = emitIntExt(VT, SrcReg, MVT::i32, /*IsZExt=*/true);
2193 // Emit the combined compare and branch instruction.
2194 SrcReg = constrainOperandRegClass(II, SrcReg, II.getNumDefs());
2195 MachineInstrBuilder MIB =
2196 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc))
2197 .addReg(SrcReg, getKillRegState(SrcIsKill));
2199 MIB.addImm(TestBit);
2202 // Obtain the branch weight and add the TrueBB to the successor list.
2203 uint32_t BranchWeight = 0;
2205 BranchWeight = FuncInfo.BPI->getEdgeWeight(BI->getParent(),
2206 TBB->getBasicBlock());
2207 FuncInfo.MBB->addSuccessor(TBB, BranchWeight);
2208 fastEmitBranch(FBB, DbgLoc);
2213 bool AArch64FastISel::selectBranch(const Instruction *I) {
2214 const BranchInst *BI = cast<BranchInst>(I);
2215 if (BI->isUnconditional()) {
2216 MachineBasicBlock *MSucc = FuncInfo.MBBMap[BI->getSuccessor(0)];
2217 fastEmitBranch(MSucc, BI->getDebugLoc());
2221 MachineBasicBlock *TBB = FuncInfo.MBBMap[BI->getSuccessor(0)];
2222 MachineBasicBlock *FBB = FuncInfo.MBBMap[BI->getSuccessor(1)];
2224 AArch64CC::CondCode CC = AArch64CC::NE;
2225 if (const CmpInst *CI = dyn_cast<CmpInst>(BI->getCondition())) {
2226 if (CI->hasOneUse() && isValueAvailable(CI)) {
2227 // Try to optimize or fold the cmp.
2228 CmpInst::Predicate Predicate = optimizeCmpPredicate(CI);
2229 switch (Predicate) {
2232 case CmpInst::FCMP_FALSE:
2233 fastEmitBranch(FBB, DbgLoc);
2235 case CmpInst::FCMP_TRUE:
2236 fastEmitBranch(TBB, DbgLoc);
2240 // Try to emit a combined compare-and-branch first.
2241 if (emitCompareAndBranch(BI))
2244 // Try to take advantage of fallthrough opportunities.
2245 if (FuncInfo.MBB->isLayoutSuccessor(TBB)) {
2246 std::swap(TBB, FBB);
2247 Predicate = CmpInst::getInversePredicate(Predicate);
2251 if (!emitCmp(CI->getOperand(0), CI->getOperand(1), CI->isUnsigned()))
2254 // FCMP_UEQ and FCMP_ONE cannot be checked with a single branch
2256 CC = getCompareCC(Predicate);
2257 AArch64CC::CondCode ExtraCC = AArch64CC::AL;
2258 switch (Predicate) {
2261 case CmpInst::FCMP_UEQ:
2262 ExtraCC = AArch64CC::EQ;
2265 case CmpInst::FCMP_ONE:
2266 ExtraCC = AArch64CC::MI;
2270 assert((CC != AArch64CC::AL) && "Unexpected condition code.");
2272 // Emit the extra branch for FCMP_UEQ and FCMP_ONE.
2273 if (ExtraCC != AArch64CC::AL) {
2274 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::Bcc))
2280 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::Bcc))
2284 // Obtain the branch weight and add the TrueBB to the successor list.
2285 uint32_t BranchWeight = 0;
2287 BranchWeight = FuncInfo.BPI->getEdgeWeight(BI->getParent(),
2288 TBB->getBasicBlock());
2289 FuncInfo.MBB->addSuccessor(TBB, BranchWeight);
2291 fastEmitBranch(FBB, DbgLoc);
2294 } else if (TruncInst *TI = dyn_cast<TruncInst>(BI->getCondition())) {
2296 if (TI->hasOneUse() && isValueAvailable(TI) &&
2297 isTypeSupported(TI->getOperand(0)->getType(), SrcVT)) {
2298 unsigned CondReg = getRegForValue(TI->getOperand(0));
2301 bool CondIsKill = hasTrivialKill(TI->getOperand(0));
2303 // Issue an extract_subreg to get the lower 32-bits.
2304 if (SrcVT == MVT::i64) {
2305 CondReg = fastEmitInst_extractsubreg(MVT::i32, CondReg, CondIsKill,
2310 unsigned ANDReg = emitAnd_ri(MVT::i32, CondReg, CondIsKill, 1);
2311 assert(ANDReg && "Unexpected AND instruction emission failure.");
2312 emitICmp_ri(MVT::i32, ANDReg, /*IsKill=*/true, 0);
2314 if (FuncInfo.MBB->isLayoutSuccessor(TBB)) {
2315 std::swap(TBB, FBB);
2318 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::Bcc))
2322 // Obtain the branch weight and add the TrueBB to the successor list.
2323 uint32_t BranchWeight = 0;
2325 BranchWeight = FuncInfo.BPI->getEdgeWeight(BI->getParent(),
2326 TBB->getBasicBlock());
2327 FuncInfo.MBB->addSuccessor(TBB, BranchWeight);
2329 fastEmitBranch(FBB, DbgLoc);
2332 } else if (const auto *CI = dyn_cast<ConstantInt>(BI->getCondition())) {
2333 uint64_t Imm = CI->getZExtValue();
2334 MachineBasicBlock *Target = (Imm == 0) ? FBB : TBB;
2335 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::B))
2338 // Obtain the branch weight and add the target to the successor list.
2339 uint32_t BranchWeight = 0;
2341 BranchWeight = FuncInfo.BPI->getEdgeWeight(BI->getParent(),
2342 Target->getBasicBlock());
2343 FuncInfo.MBB->addSuccessor(Target, BranchWeight);
2345 } else if (foldXALUIntrinsic(CC, I, BI->getCondition())) {
2346 // Fake request the condition, otherwise the intrinsic might be completely
2348 unsigned CondReg = getRegForValue(BI->getCondition());
2353 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::Bcc))
2357 // Obtain the branch weight and add the TrueBB to the successor list.
2358 uint32_t BranchWeight = 0;
2360 BranchWeight = FuncInfo.BPI->getEdgeWeight(BI->getParent(),
2361 TBB->getBasicBlock());
2362 FuncInfo.MBB->addSuccessor(TBB, BranchWeight);
2364 fastEmitBranch(FBB, DbgLoc);
2368 unsigned CondReg = getRegForValue(BI->getCondition());
2371 bool CondRegIsKill = hasTrivialKill(BI->getCondition());
2373 // We've been divorced from our compare! Our block was split, and
2374 // now our compare lives in a predecessor block. We musn't
2375 // re-compare here, as the children of the compare aren't guaranteed
2376 // live across the block boundary (we *could* check for this).
2377 // Regardless, the compare has been done in the predecessor block,
2378 // and it left a value for us in a virtual register. Ergo, we test
2379 // the one-bit value left in the virtual register.
2380 emitICmp_ri(MVT::i32, CondReg, CondRegIsKill, 0);
2382 if (FuncInfo.MBB->isLayoutSuccessor(TBB)) {
2383 std::swap(TBB, FBB);
2387 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::Bcc))
2391 // Obtain the branch weight and add the TrueBB to the successor list.
2392 uint32_t BranchWeight = 0;
2394 BranchWeight = FuncInfo.BPI->getEdgeWeight(BI->getParent(),
2395 TBB->getBasicBlock());
2396 FuncInfo.MBB->addSuccessor(TBB, BranchWeight);
2398 fastEmitBranch(FBB, DbgLoc);
2402 bool AArch64FastISel::selectIndirectBr(const Instruction *I) {
2403 const IndirectBrInst *BI = cast<IndirectBrInst>(I);
2404 unsigned AddrReg = getRegForValue(BI->getOperand(0));
2408 // Emit the indirect branch.
2409 const MCInstrDesc &II = TII.get(AArch64::BR);
2410 AddrReg = constrainOperandRegClass(II, AddrReg, II.getNumDefs());
2411 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II).addReg(AddrReg);
2413 // Make sure the CFG is up-to-date.
2414 for (unsigned i = 0, e = BI->getNumSuccessors(); i != e; ++i)
2415 FuncInfo.MBB->addSuccessor(FuncInfo.MBBMap[BI->getSuccessor(i)]);
2420 bool AArch64FastISel::selectCmp(const Instruction *I) {
2421 const CmpInst *CI = cast<CmpInst>(I);
2423 // Try to optimize or fold the cmp.
2424 CmpInst::Predicate Predicate = optimizeCmpPredicate(CI);
2425 unsigned ResultReg = 0;
2426 switch (Predicate) {
2429 case CmpInst::FCMP_FALSE:
2430 ResultReg = createResultReg(&AArch64::GPR32RegClass);
2431 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2432 TII.get(TargetOpcode::COPY), ResultReg)
2433 .addReg(AArch64::WZR, getKillRegState(true));
2435 case CmpInst::FCMP_TRUE:
2436 ResultReg = fastEmit_i(MVT::i32, MVT::i32, ISD::Constant, 1);
2441 updateValueMap(I, ResultReg);
2446 if (!emitCmp(CI->getOperand(0), CI->getOperand(1), CI->isUnsigned()))
2449 ResultReg = createResultReg(&AArch64::GPR32RegClass);
2451 // FCMP_UEQ and FCMP_ONE cannot be checked with a single instruction. These
2452 // condition codes are inverted, because they are used by CSINC.
2453 static unsigned CondCodeTable[2][2] = {
2454 { AArch64CC::NE, AArch64CC::VC },
2455 { AArch64CC::PL, AArch64CC::LE }
2457 unsigned *CondCodes = nullptr;
2458 switch (Predicate) {
2461 case CmpInst::FCMP_UEQ:
2462 CondCodes = &CondCodeTable[0][0];
2464 case CmpInst::FCMP_ONE:
2465 CondCodes = &CondCodeTable[1][0];
2470 unsigned TmpReg1 = createResultReg(&AArch64::GPR32RegClass);
2471 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::CSINCWr),
2473 .addReg(AArch64::WZR, getKillRegState(true))
2474 .addReg(AArch64::WZR, getKillRegState(true))
2475 .addImm(CondCodes[0]);
2476 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::CSINCWr),
2478 .addReg(TmpReg1, getKillRegState(true))
2479 .addReg(AArch64::WZR, getKillRegState(true))
2480 .addImm(CondCodes[1]);
2482 updateValueMap(I, ResultReg);
2486 // Now set a register based on the comparison.
2487 AArch64CC::CondCode CC = getCompareCC(Predicate);
2488 assert((CC != AArch64CC::AL) && "Unexpected condition code.");
2489 AArch64CC::CondCode invertedCC = getInvertedCondCode(CC);
2490 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::CSINCWr),
2492 .addReg(AArch64::WZR, getKillRegState(true))
2493 .addReg(AArch64::WZR, getKillRegState(true))
2494 .addImm(invertedCC);
2496 updateValueMap(I, ResultReg);
2500 /// \brief Optimize selects of i1 if one of the operands has a 'true' or 'false'
2502 bool AArch64FastISel::optimizeSelect(const SelectInst *SI) {
2503 if (!SI->getType()->isIntegerTy(1))
2506 const Value *Src1Val, *Src2Val;
2508 bool NeedExtraOp = false;
2509 if (auto *CI = dyn_cast<ConstantInt>(SI->getTrueValue())) {
2511 Src1Val = SI->getCondition();
2512 Src2Val = SI->getFalseValue();
2513 Opc = AArch64::ORRWrr;
2515 assert(CI->isZero());
2516 Src1Val = SI->getFalseValue();
2517 Src2Val = SI->getCondition();
2518 Opc = AArch64::BICWrr;
2520 } else if (auto *CI = dyn_cast<ConstantInt>(SI->getFalseValue())) {
2522 Src1Val = SI->getCondition();
2523 Src2Val = SI->getTrueValue();
2524 Opc = AArch64::ORRWrr;
2527 assert(CI->isZero());
2528 Src1Val = SI->getCondition();
2529 Src2Val = SI->getTrueValue();
2530 Opc = AArch64::ANDWrr;
2537 unsigned Src1Reg = getRegForValue(Src1Val);
2540 bool Src1IsKill = hasTrivialKill(Src1Val);
2542 unsigned Src2Reg = getRegForValue(Src2Val);
2545 bool Src2IsKill = hasTrivialKill(Src2Val);
2548 Src1Reg = emitLogicalOp_ri(ISD::XOR, MVT::i32, Src1Reg, Src1IsKill, 1);
2551 unsigned ResultReg = fastEmitInst_rr(Opc, &AArch64::GPR32spRegClass, Src1Reg,
2552 Src1IsKill, Src2Reg, Src2IsKill);
2553 updateValueMap(SI, ResultReg);
2557 bool AArch64FastISel::selectSelect(const Instruction *I) {
2558 assert(isa<SelectInst>(I) && "Expected a select instruction.");
2560 if (!isTypeSupported(I->getType(), VT))
2564 const TargetRegisterClass *RC;
2565 switch (VT.SimpleTy) {
2572 Opc = AArch64::CSELWr;
2573 RC = &AArch64::GPR32RegClass;
2576 Opc = AArch64::CSELXr;
2577 RC = &AArch64::GPR64RegClass;
2580 Opc = AArch64::FCSELSrrr;
2581 RC = &AArch64::FPR32RegClass;
2584 Opc = AArch64::FCSELDrrr;
2585 RC = &AArch64::FPR64RegClass;
2589 const SelectInst *SI = cast<SelectInst>(I);
2590 const Value *Cond = SI->getCondition();
2591 AArch64CC::CondCode CC = AArch64CC::NE;
2592 AArch64CC::CondCode ExtraCC = AArch64CC::AL;
2594 if (optimizeSelect(SI))
2597 // Try to pickup the flags, so we don't have to emit another compare.
2598 if (foldXALUIntrinsic(CC, I, Cond)) {
2599 // Fake request the condition to force emission of the XALU intrinsic.
2600 unsigned CondReg = getRegForValue(Cond);
2603 } else if (isa<CmpInst>(Cond) && cast<CmpInst>(Cond)->hasOneUse() &&
2604 isValueAvailable(Cond)) {
2605 const auto *Cmp = cast<CmpInst>(Cond);
2606 // Try to optimize or fold the cmp.
2607 CmpInst::Predicate Predicate = optimizeCmpPredicate(Cmp);
2608 const Value *FoldSelect = nullptr;
2609 switch (Predicate) {
2612 case CmpInst::FCMP_FALSE:
2613 FoldSelect = SI->getFalseValue();
2615 case CmpInst::FCMP_TRUE:
2616 FoldSelect = SI->getTrueValue();
2621 unsigned SrcReg = getRegForValue(FoldSelect);
2624 unsigned UseReg = lookUpRegForValue(SI);
2626 MRI.clearKillFlags(UseReg);
2628 updateValueMap(I, SrcReg);
2633 if (!emitCmp(Cmp->getOperand(0), Cmp->getOperand(1), Cmp->isUnsigned()))
2636 // FCMP_UEQ and FCMP_ONE cannot be checked with a single select instruction.
2637 CC = getCompareCC(Predicate);
2638 switch (Predicate) {
2641 case CmpInst::FCMP_UEQ:
2642 ExtraCC = AArch64CC::EQ;
2645 case CmpInst::FCMP_ONE:
2646 ExtraCC = AArch64CC::MI;
2650 assert((CC != AArch64CC::AL) && "Unexpected condition code.");
2652 unsigned CondReg = getRegForValue(Cond);
2655 bool CondIsKill = hasTrivialKill(Cond);
2657 // Emit a TST instruction (ANDS wzr, reg, #imm).
2658 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ANDSWri),
2660 .addReg(CondReg, getKillRegState(CondIsKill))
2661 .addImm(AArch64_AM::encodeLogicalImmediate(1, 32));
2664 unsigned Src1Reg = getRegForValue(SI->getTrueValue());
2665 bool Src1IsKill = hasTrivialKill(SI->getTrueValue());
2667 unsigned Src2Reg = getRegForValue(SI->getFalseValue());
2668 bool Src2IsKill = hasTrivialKill(SI->getFalseValue());
2670 if (!Src1Reg || !Src2Reg)
2673 if (ExtraCC != AArch64CC::AL) {
2674 Src2Reg = fastEmitInst_rri(Opc, RC, Src1Reg, Src1IsKill, Src2Reg,
2675 Src2IsKill, ExtraCC);
2678 unsigned ResultReg = fastEmitInst_rri(Opc, RC, Src1Reg, Src1IsKill, Src2Reg,
2680 updateValueMap(I, ResultReg);
2684 bool AArch64FastISel::selectFPExt(const Instruction *I) {
2685 Value *V = I->getOperand(0);
2686 if (!I->getType()->isDoubleTy() || !V->getType()->isFloatTy())
2689 unsigned Op = getRegForValue(V);
2693 unsigned ResultReg = createResultReg(&AArch64::FPR64RegClass);
2694 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::FCVTDSr),
2695 ResultReg).addReg(Op);
2696 updateValueMap(I, ResultReg);
2700 bool AArch64FastISel::selectFPTrunc(const Instruction *I) {
2701 Value *V = I->getOperand(0);
2702 if (!I->getType()->isFloatTy() || !V->getType()->isDoubleTy())
2705 unsigned Op = getRegForValue(V);
2709 unsigned ResultReg = createResultReg(&AArch64::FPR32RegClass);
2710 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::FCVTSDr),
2711 ResultReg).addReg(Op);
2712 updateValueMap(I, ResultReg);
2716 // FPToUI and FPToSI
2717 bool AArch64FastISel::selectFPToInt(const Instruction *I, bool Signed) {
2719 if (!isTypeLegal(I->getType(), DestVT) || DestVT.isVector())
2722 unsigned SrcReg = getRegForValue(I->getOperand(0));
2726 EVT SrcVT = TLI.getValueType(I->getOperand(0)->getType(), true);
2727 if (SrcVT == MVT::f128)
2731 if (SrcVT == MVT::f64) {
2733 Opc = (DestVT == MVT::i32) ? AArch64::FCVTZSUWDr : AArch64::FCVTZSUXDr;
2735 Opc = (DestVT == MVT::i32) ? AArch64::FCVTZUUWDr : AArch64::FCVTZUUXDr;
2738 Opc = (DestVT == MVT::i32) ? AArch64::FCVTZSUWSr : AArch64::FCVTZSUXSr;
2740 Opc = (DestVT == MVT::i32) ? AArch64::FCVTZUUWSr : AArch64::FCVTZUUXSr;
2742 unsigned ResultReg = createResultReg(
2743 DestVT == MVT::i32 ? &AArch64::GPR32RegClass : &AArch64::GPR64RegClass);
2744 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg)
2746 updateValueMap(I, ResultReg);
2750 bool AArch64FastISel::selectIntToFP(const Instruction *I, bool Signed) {
2752 if (!isTypeLegal(I->getType(), DestVT) || DestVT.isVector())
2754 assert ((DestVT == MVT::f32 || DestVT == MVT::f64) &&
2755 "Unexpected value type.");
2757 unsigned SrcReg = getRegForValue(I->getOperand(0));
2760 bool SrcIsKill = hasTrivialKill(I->getOperand(0));
2762 EVT SrcVT = TLI.getValueType(I->getOperand(0)->getType(), true);
2764 // Handle sign-extension.
2765 if (SrcVT == MVT::i16 || SrcVT == MVT::i8 || SrcVT == MVT::i1) {
2767 emitIntExt(SrcVT.getSimpleVT(), SrcReg, MVT::i32, /*isZExt*/ !Signed);
2774 if (SrcVT == MVT::i64) {
2776 Opc = (DestVT == MVT::f32) ? AArch64::SCVTFUXSri : AArch64::SCVTFUXDri;
2778 Opc = (DestVT == MVT::f32) ? AArch64::UCVTFUXSri : AArch64::UCVTFUXDri;
2781 Opc = (DestVT == MVT::f32) ? AArch64::SCVTFUWSri : AArch64::SCVTFUWDri;
2783 Opc = (DestVT == MVT::f32) ? AArch64::UCVTFUWSri : AArch64::UCVTFUWDri;
2786 unsigned ResultReg = fastEmitInst_r(Opc, TLI.getRegClassFor(DestVT), SrcReg,
2788 updateValueMap(I, ResultReg);
2792 bool AArch64FastISel::fastLowerArguments() {
2793 if (!FuncInfo.CanLowerReturn)
2796 const Function *F = FuncInfo.Fn;
2800 CallingConv::ID CC = F->getCallingConv();
2801 if (CC != CallingConv::C)
2804 // Only handle simple cases of up to 8 GPR and FPR each.
2805 unsigned GPRCnt = 0;
2806 unsigned FPRCnt = 0;
2808 for (auto const &Arg : F->args()) {
2809 // The first argument is at index 1.
2811 if (F->getAttributes().hasAttribute(Idx, Attribute::ByVal) ||
2812 F->getAttributes().hasAttribute(Idx, Attribute::InReg) ||
2813 F->getAttributes().hasAttribute(Idx, Attribute::StructRet) ||
2814 F->getAttributes().hasAttribute(Idx, Attribute::Nest))
2817 Type *ArgTy = Arg.getType();
2818 if (ArgTy->isStructTy() || ArgTy->isArrayTy())
2821 EVT ArgVT = TLI.getValueType(ArgTy);
2822 if (!ArgVT.isSimple())
2825 MVT VT = ArgVT.getSimpleVT().SimpleTy;
2826 if (VT.isFloatingPoint() && !Subtarget->hasFPARMv8())
2829 if (VT.isVector() &&
2830 (!Subtarget->hasNEON() || !Subtarget->isLittleEndian()))
2833 if (VT >= MVT::i1 && VT <= MVT::i64)
2835 else if ((VT >= MVT::f16 && VT <= MVT::f64) || VT.is64BitVector() ||
2836 VT.is128BitVector())
2841 if (GPRCnt > 8 || FPRCnt > 8)
2845 static const MCPhysReg Registers[6][8] = {
2846 { AArch64::W0, AArch64::W1, AArch64::W2, AArch64::W3, AArch64::W4,
2847 AArch64::W5, AArch64::W6, AArch64::W7 },
2848 { AArch64::X0, AArch64::X1, AArch64::X2, AArch64::X3, AArch64::X4,
2849 AArch64::X5, AArch64::X6, AArch64::X7 },
2850 { AArch64::H0, AArch64::H1, AArch64::H2, AArch64::H3, AArch64::H4,
2851 AArch64::H5, AArch64::H6, AArch64::H7 },
2852 { AArch64::S0, AArch64::S1, AArch64::S2, AArch64::S3, AArch64::S4,
2853 AArch64::S5, AArch64::S6, AArch64::S7 },
2854 { AArch64::D0, AArch64::D1, AArch64::D2, AArch64::D3, AArch64::D4,
2855 AArch64::D5, AArch64::D6, AArch64::D7 },
2856 { AArch64::Q0, AArch64::Q1, AArch64::Q2, AArch64::Q3, AArch64::Q4,
2857 AArch64::Q5, AArch64::Q6, AArch64::Q7 }
2860 unsigned GPRIdx = 0;
2861 unsigned FPRIdx = 0;
2862 for (auto const &Arg : F->args()) {
2863 MVT VT = TLI.getSimpleValueType(Arg.getType());
2865 const TargetRegisterClass *RC;
2866 if (VT >= MVT::i1 && VT <= MVT::i32) {
2867 SrcReg = Registers[0][GPRIdx++];
2868 RC = &AArch64::GPR32RegClass;
2870 } else if (VT == MVT::i64) {
2871 SrcReg = Registers[1][GPRIdx++];
2872 RC = &AArch64::GPR64RegClass;
2873 } else if (VT == MVT::f16) {
2874 SrcReg = Registers[2][FPRIdx++];
2875 RC = &AArch64::FPR16RegClass;
2876 } else if (VT == MVT::f32) {
2877 SrcReg = Registers[3][FPRIdx++];
2878 RC = &AArch64::FPR32RegClass;
2879 } else if ((VT == MVT::f64) || VT.is64BitVector()) {
2880 SrcReg = Registers[4][FPRIdx++];
2881 RC = &AArch64::FPR64RegClass;
2882 } else if (VT.is128BitVector()) {
2883 SrcReg = Registers[5][FPRIdx++];
2884 RC = &AArch64::FPR128RegClass;
2886 llvm_unreachable("Unexpected value type.");
2888 unsigned DstReg = FuncInfo.MF->addLiveIn(SrcReg, RC);
2889 // FIXME: Unfortunately it's necessary to emit a copy from the livein copy.
2890 // Without this, EmitLiveInCopies may eliminate the livein if its only
2891 // use is a bitcast (which isn't turned into an instruction).
2892 unsigned ResultReg = createResultReg(RC);
2893 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2894 TII.get(TargetOpcode::COPY), ResultReg)
2895 .addReg(DstReg, getKillRegState(true));
2896 updateValueMap(&Arg, ResultReg);
2901 bool AArch64FastISel::processCallArgs(CallLoweringInfo &CLI,
2902 SmallVectorImpl<MVT> &OutVTs,
2903 unsigned &NumBytes) {
2904 CallingConv::ID CC = CLI.CallConv;
2905 SmallVector<CCValAssign, 16> ArgLocs;
2906 CCState CCInfo(CC, false, *FuncInfo.MF, ArgLocs, *Context);
2907 CCInfo.AnalyzeCallOperands(OutVTs, CLI.OutFlags, CCAssignFnForCall(CC));
2909 // Get a count of how many bytes are to be pushed on the stack.
2910 NumBytes = CCInfo.getNextStackOffset();
2912 // Issue CALLSEQ_START
2913 unsigned AdjStackDown = TII.getCallFrameSetupOpcode();
2914 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AdjStackDown))
2917 // Process the args.
2918 for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
2919 CCValAssign &VA = ArgLocs[i];
2920 const Value *ArgVal = CLI.OutVals[VA.getValNo()];
2921 MVT ArgVT = OutVTs[VA.getValNo()];
2923 unsigned ArgReg = getRegForValue(ArgVal);
2927 // Handle arg promotion: SExt, ZExt, AExt.
2928 switch (VA.getLocInfo()) {
2929 case CCValAssign::Full:
2931 case CCValAssign::SExt: {
2932 MVT DestVT = VA.getLocVT();
2934 ArgReg = emitIntExt(SrcVT, ArgReg, DestVT, /*isZExt=*/false);
2939 case CCValAssign::AExt:
2940 // Intentional fall-through.
2941 case CCValAssign::ZExt: {
2942 MVT DestVT = VA.getLocVT();
2944 ArgReg = emitIntExt(SrcVT, ArgReg, DestVT, /*isZExt=*/true);
2950 llvm_unreachable("Unknown arg promotion!");
2953 // Now copy/store arg to correct locations.
2954 if (VA.isRegLoc() && !VA.needsCustom()) {
2955 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2956 TII.get(TargetOpcode::COPY), VA.getLocReg()).addReg(ArgReg);
2957 CLI.OutRegs.push_back(VA.getLocReg());
2958 } else if (VA.needsCustom()) {
2959 // FIXME: Handle custom args.
2962 assert(VA.isMemLoc() && "Assuming store on stack.");
2964 // Don't emit stores for undef values.
2965 if (isa<UndefValue>(ArgVal))
2968 // Need to store on the stack.
2969 unsigned ArgSize = (ArgVT.getSizeInBits() + 7) / 8;
2971 unsigned BEAlign = 0;
2972 if (ArgSize < 8 && !Subtarget->isLittleEndian())
2973 BEAlign = 8 - ArgSize;
2976 Addr.setKind(Address::RegBase);
2977 Addr.setReg(AArch64::SP);
2978 Addr.setOffset(VA.getLocMemOffset() + BEAlign);
2980 unsigned Alignment = DL.getABITypeAlignment(ArgVal->getType());
2981 MachineMemOperand *MMO = FuncInfo.MF->getMachineMemOperand(
2982 MachinePointerInfo::getStack(Addr.getOffset()),
2983 MachineMemOperand::MOStore, ArgVT.getStoreSize(), Alignment);
2985 if (!emitStore(ArgVT, ArgReg, Addr, MMO))
2992 bool AArch64FastISel::finishCall(CallLoweringInfo &CLI, MVT RetVT,
2993 unsigned NumBytes) {
2994 CallingConv::ID CC = CLI.CallConv;
2996 // Issue CALLSEQ_END
2997 unsigned AdjStackUp = TII.getCallFrameDestroyOpcode();
2998 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AdjStackUp))
2999 .addImm(NumBytes).addImm(0);
3001 // Now the return value.
3002 if (RetVT != MVT::isVoid) {
3003 SmallVector<CCValAssign, 16> RVLocs;
3004 CCState CCInfo(CC, false, *FuncInfo.MF, RVLocs, *Context);
3005 CCInfo.AnalyzeCallResult(RetVT, CCAssignFnForCall(CC));
3007 // Only handle a single return value.
3008 if (RVLocs.size() != 1)
3011 // Copy all of the result registers out of their specified physreg.
3012 MVT CopyVT = RVLocs[0].getValVT();
3013 unsigned ResultReg = createResultReg(TLI.getRegClassFor(CopyVT));
3014 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3015 TII.get(TargetOpcode::COPY), ResultReg)
3016 .addReg(RVLocs[0].getLocReg());
3017 CLI.InRegs.push_back(RVLocs[0].getLocReg());
3019 CLI.ResultReg = ResultReg;
3020 CLI.NumResultRegs = 1;
3026 bool AArch64FastISel::fastLowerCall(CallLoweringInfo &CLI) {
3027 CallingConv::ID CC = CLI.CallConv;
3028 bool IsTailCall = CLI.IsTailCall;
3029 bool IsVarArg = CLI.IsVarArg;
3030 const Value *Callee = CLI.Callee;
3031 const char *SymName = CLI.SymName;
3033 if (!Callee && !SymName)
3036 // Allow SelectionDAG isel to handle tail calls.
3040 CodeModel::Model CM = TM.getCodeModel();
3041 // Only support the small and large code model.
3042 if (CM != CodeModel::Small && CM != CodeModel::Large)
3045 // FIXME: Add large code model support for ELF.
3046 if (CM == CodeModel::Large && !Subtarget->isTargetMachO())
3049 // Let SDISel handle vararg functions.
3053 // FIXME: Only handle *simple* calls for now.
3055 if (CLI.RetTy->isVoidTy())
3056 RetVT = MVT::isVoid;
3057 else if (!isTypeLegal(CLI.RetTy, RetVT))
3060 for (auto Flag : CLI.OutFlags)
3061 if (Flag.isInReg() || Flag.isSRet() || Flag.isNest() || Flag.isByVal())
3064 // Set up the argument vectors.
3065 SmallVector<MVT, 16> OutVTs;
3066 OutVTs.reserve(CLI.OutVals.size());
3068 for (auto *Val : CLI.OutVals) {
3070 if (!isTypeLegal(Val->getType(), VT) &&
3071 !(VT == MVT::i1 || VT == MVT::i8 || VT == MVT::i16))
3074 // We don't handle vector parameters yet.
3075 if (VT.isVector() || VT.getSizeInBits() > 64)
3078 OutVTs.push_back(VT);
3082 if (Callee && !computeCallAddress(Callee, Addr))
3085 // Handle the arguments now that we've gotten them.
3087 if (!processCallArgs(CLI, OutVTs, NumBytes))
3091 MachineInstrBuilder MIB;
3092 if (CM == CodeModel::Small) {
3093 const MCInstrDesc &II = TII.get(Addr.getReg() ? AArch64::BLR : AArch64::BL);
3094 MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II);
3096 MIB.addExternalSymbol(SymName, 0);
3097 else if (Addr.getGlobalValue())
3098 MIB.addGlobalAddress(Addr.getGlobalValue(), 0, 0);
3099 else if (Addr.getReg()) {
3100 unsigned Reg = constrainOperandRegClass(II, Addr.getReg(), 0);
3105 unsigned CallReg = 0;
3107 unsigned ADRPReg = createResultReg(&AArch64::GPR64commonRegClass);
3108 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADRP),
3110 .addExternalSymbol(SymName, AArch64II::MO_GOT | AArch64II::MO_PAGE);
3112 CallReg = createResultReg(&AArch64::GPR64RegClass);
3113 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::LDRXui),
3116 .addExternalSymbol(SymName, AArch64II::MO_GOT | AArch64II::MO_PAGEOFF |
3118 } else if (Addr.getGlobalValue())
3119 CallReg = materializeGV(Addr.getGlobalValue());
3120 else if (Addr.getReg())
3121 CallReg = Addr.getReg();
3126 const MCInstrDesc &II = TII.get(AArch64::BLR);
3127 CallReg = constrainOperandRegClass(II, CallReg, 0);
3128 MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II).addReg(CallReg);
3131 // Add implicit physical register uses to the call.
3132 for (auto Reg : CLI.OutRegs)
3133 MIB.addReg(Reg, RegState::Implicit);
3135 // Add a register mask with the call-preserved registers.
3136 // Proper defs for return values will be added by setPhysRegsDeadExcept().
3137 MIB.addRegMask(TRI.getCallPreservedMask(CC));
3141 // Finish off the call including any return values.
3142 return finishCall(CLI, RetVT, NumBytes);
3145 bool AArch64FastISel::isMemCpySmall(uint64_t Len, unsigned Alignment) {
3147 return Len / Alignment <= 4;
3152 bool AArch64FastISel::tryEmitSmallMemCpy(Address Dest, Address Src,
3153 uint64_t Len, unsigned Alignment) {
3154 // Make sure we don't bloat code by inlining very large memcpy's.
3155 if (!isMemCpySmall(Len, Alignment))
3158 int64_t UnscaledOffset = 0;
3159 Address OrigDest = Dest;
3160 Address OrigSrc = Src;
3164 if (!Alignment || Alignment >= 8) {
3175 // Bound based on alignment.
3176 if (Len >= 4 && Alignment == 4)
3178 else if (Len >= 2 && Alignment == 2)
3185 unsigned ResultReg = emitLoad(VT, VT, Src);
3189 if (!emitStore(VT, ResultReg, Dest))
3192 int64_t Size = VT.getSizeInBits() / 8;
3194 UnscaledOffset += Size;
3196 // We need to recompute the unscaled offset for each iteration.
3197 Dest.setOffset(OrigDest.getOffset() + UnscaledOffset);
3198 Src.setOffset(OrigSrc.getOffset() + UnscaledOffset);
3204 /// \brief Check if it is possible to fold the condition from the XALU intrinsic
3205 /// into the user. The condition code will only be updated on success.
3206 bool AArch64FastISel::foldXALUIntrinsic(AArch64CC::CondCode &CC,
3207 const Instruction *I,
3208 const Value *Cond) {
3209 if (!isa<ExtractValueInst>(Cond))
3212 const auto *EV = cast<ExtractValueInst>(Cond);
3213 if (!isa<IntrinsicInst>(EV->getAggregateOperand()))
3216 const auto *II = cast<IntrinsicInst>(EV->getAggregateOperand());
3218 const Function *Callee = II->getCalledFunction();
3220 cast<StructType>(Callee->getReturnType())->getTypeAtIndex(0U);
3221 if (!isTypeLegal(RetTy, RetVT))
3224 if (RetVT != MVT::i32 && RetVT != MVT::i64)
3227 const Value *LHS = II->getArgOperand(0);
3228 const Value *RHS = II->getArgOperand(1);
3230 // Canonicalize immediate to the RHS.
3231 if (isa<ConstantInt>(LHS) && !isa<ConstantInt>(RHS) &&
3232 isCommutativeIntrinsic(II))
3233 std::swap(LHS, RHS);
3235 // Simplify multiplies.
3236 unsigned IID = II->getIntrinsicID();
3240 case Intrinsic::smul_with_overflow:
3241 if (const auto *C = dyn_cast<ConstantInt>(RHS))
3242 if (C->getValue() == 2)
3243 IID = Intrinsic::sadd_with_overflow;
3245 case Intrinsic::umul_with_overflow:
3246 if (const auto *C = dyn_cast<ConstantInt>(RHS))
3247 if (C->getValue() == 2)
3248 IID = Intrinsic::uadd_with_overflow;
3252 AArch64CC::CondCode TmpCC;
3256 case Intrinsic::sadd_with_overflow:
3257 case Intrinsic::ssub_with_overflow:
3258 TmpCC = AArch64CC::VS;
3260 case Intrinsic::uadd_with_overflow:
3261 TmpCC = AArch64CC::HS;
3263 case Intrinsic::usub_with_overflow:
3264 TmpCC = AArch64CC::LO;
3266 case Intrinsic::smul_with_overflow:
3267 case Intrinsic::umul_with_overflow:
3268 TmpCC = AArch64CC::NE;
3272 // Check if both instructions are in the same basic block.
3273 if (!isValueAvailable(II))
3276 // Make sure nothing is in the way
3277 BasicBlock::const_iterator Start = I;
3278 BasicBlock::const_iterator End = II;
3279 for (auto Itr = std::prev(Start); Itr != End; --Itr) {
3280 // We only expect extractvalue instructions between the intrinsic and the
3281 // instruction to be selected.
3282 if (!isa<ExtractValueInst>(Itr))
3285 // Check that the extractvalue operand comes from the intrinsic.
3286 const auto *EVI = cast<ExtractValueInst>(Itr);
3287 if (EVI->getAggregateOperand() != II)
3295 bool AArch64FastISel::fastLowerIntrinsicCall(const IntrinsicInst *II) {
3296 // FIXME: Handle more intrinsics.
3297 switch (II->getIntrinsicID()) {
3298 default: return false;
3299 case Intrinsic::frameaddress: {
3300 MachineFrameInfo *MFI = FuncInfo.MF->getFrameInfo();
3301 MFI->setFrameAddressIsTaken(true);
3303 const AArch64RegisterInfo *RegInfo =
3304 static_cast<const AArch64RegisterInfo *>(
3305 TM.getSubtargetImpl()->getRegisterInfo());
3306 unsigned FramePtr = RegInfo->getFrameRegister(*(FuncInfo.MF));
3307 unsigned SrcReg = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
3308 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3309 TII.get(TargetOpcode::COPY), SrcReg).addReg(FramePtr);
3310 // Recursively load frame address
3316 unsigned Depth = cast<ConstantInt>(II->getOperand(0))->getZExtValue();
3318 DestReg = fastEmitInst_ri(AArch64::LDRXui, &AArch64::GPR64RegClass,
3319 SrcReg, /*IsKill=*/true, 0);
3320 assert(DestReg && "Unexpected LDR instruction emission failure.");
3324 updateValueMap(II, SrcReg);
3327 case Intrinsic::memcpy:
3328 case Intrinsic::memmove: {
3329 const auto *MTI = cast<MemTransferInst>(II);
3330 // Don't handle volatile.
3331 if (MTI->isVolatile())
3334 // Disable inlining for memmove before calls to ComputeAddress. Otherwise,
3335 // we would emit dead code because we don't currently handle memmoves.
3336 bool IsMemCpy = (II->getIntrinsicID() == Intrinsic::memcpy);
3337 if (isa<ConstantInt>(MTI->getLength()) && IsMemCpy) {
3338 // Small memcpy's are common enough that we want to do them without a call
3340 uint64_t Len = cast<ConstantInt>(MTI->getLength())->getZExtValue();
3341 unsigned Alignment = MTI->getAlignment();
3342 if (isMemCpySmall(Len, Alignment)) {
3344 if (!computeAddress(MTI->getRawDest(), Dest) ||
3345 !computeAddress(MTI->getRawSource(), Src))
3347 if (tryEmitSmallMemCpy(Dest, Src, Len, Alignment))
3352 if (!MTI->getLength()->getType()->isIntegerTy(64))
3355 if (MTI->getSourceAddressSpace() > 255 || MTI->getDestAddressSpace() > 255)
3356 // Fast instruction selection doesn't support the special
3360 const char *IntrMemName = isa<MemCpyInst>(II) ? "memcpy" : "memmove";
3361 return lowerCallTo(II, IntrMemName, II->getNumArgOperands() - 2);
3363 case Intrinsic::memset: {
3364 const MemSetInst *MSI = cast<MemSetInst>(II);
3365 // Don't handle volatile.
3366 if (MSI->isVolatile())
3369 if (!MSI->getLength()->getType()->isIntegerTy(64))
3372 if (MSI->getDestAddressSpace() > 255)
3373 // Fast instruction selection doesn't support the special
3377 return lowerCallTo(II, "memset", II->getNumArgOperands() - 2);
3379 case Intrinsic::sin:
3380 case Intrinsic::cos:
3381 case Intrinsic::pow: {
3383 if (!isTypeLegal(II->getType(), RetVT))
3386 if (RetVT != MVT::f32 && RetVT != MVT::f64)
3389 static const RTLIB::Libcall LibCallTable[3][2] = {
3390 { RTLIB::SIN_F32, RTLIB::SIN_F64 },
3391 { RTLIB::COS_F32, RTLIB::COS_F64 },
3392 { RTLIB::POW_F32, RTLIB::POW_F64 }
3395 bool Is64Bit = RetVT == MVT::f64;
3396 switch (II->getIntrinsicID()) {
3398 llvm_unreachable("Unexpected intrinsic.");
3399 case Intrinsic::sin:
3400 LC = LibCallTable[0][Is64Bit];
3402 case Intrinsic::cos:
3403 LC = LibCallTable[1][Is64Bit];
3405 case Intrinsic::pow:
3406 LC = LibCallTable[2][Is64Bit];
3411 Args.reserve(II->getNumArgOperands());
3413 // Populate the argument list.
3414 for (auto &Arg : II->arg_operands()) {
3417 Entry.Ty = Arg->getType();
3418 Args.push_back(Entry);
3421 CallLoweringInfo CLI;
3422 CLI.setCallee(TLI.getLibcallCallingConv(LC), II->getType(),
3423 TLI.getLibcallName(LC), std::move(Args));
3424 if (!lowerCallTo(CLI))
3426 updateValueMap(II, CLI.ResultReg);
3429 case Intrinsic::fabs: {
3431 if (!isTypeLegal(II->getType(), VT))
3435 switch (VT.SimpleTy) {
3439 Opc = AArch64::FABSSr;
3442 Opc = AArch64::FABSDr;
3445 unsigned SrcReg = getRegForValue(II->getOperand(0));
3448 bool SrcRegIsKill = hasTrivialKill(II->getOperand(0));
3449 unsigned ResultReg = createResultReg(TLI.getRegClassFor(VT));
3450 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg)
3451 .addReg(SrcReg, getKillRegState(SrcRegIsKill));
3452 updateValueMap(II, ResultReg);
3455 case Intrinsic::trap: {
3456 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::BRK))
3460 case Intrinsic::sqrt: {
3461 Type *RetTy = II->getCalledFunction()->getReturnType();
3464 if (!isTypeLegal(RetTy, VT))
3467 unsigned Op0Reg = getRegForValue(II->getOperand(0));
3470 bool Op0IsKill = hasTrivialKill(II->getOperand(0));
3472 unsigned ResultReg = fastEmit_r(VT, VT, ISD::FSQRT, Op0Reg, Op0IsKill);
3476 updateValueMap(II, ResultReg);
3479 case Intrinsic::sadd_with_overflow:
3480 case Intrinsic::uadd_with_overflow:
3481 case Intrinsic::ssub_with_overflow:
3482 case Intrinsic::usub_with_overflow:
3483 case Intrinsic::smul_with_overflow:
3484 case Intrinsic::umul_with_overflow: {
3485 // This implements the basic lowering of the xalu with overflow intrinsics.
3486 const Function *Callee = II->getCalledFunction();
3487 auto *Ty = cast<StructType>(Callee->getReturnType());
3488 Type *RetTy = Ty->getTypeAtIndex(0U);
3491 if (!isTypeLegal(RetTy, VT))
3494 if (VT != MVT::i32 && VT != MVT::i64)
3497 const Value *LHS = II->getArgOperand(0);
3498 const Value *RHS = II->getArgOperand(1);
3499 // Canonicalize immediate to the RHS.
3500 if (isa<ConstantInt>(LHS) && !isa<ConstantInt>(RHS) &&
3501 isCommutativeIntrinsic(II))
3502 std::swap(LHS, RHS);
3504 // Simplify multiplies.
3505 unsigned IID = II->getIntrinsicID();
3509 case Intrinsic::smul_with_overflow:
3510 if (const auto *C = dyn_cast<ConstantInt>(RHS))
3511 if (C->getValue() == 2) {
3512 IID = Intrinsic::sadd_with_overflow;
3516 case Intrinsic::umul_with_overflow:
3517 if (const auto *C = dyn_cast<ConstantInt>(RHS))
3518 if (C->getValue() == 2) {
3519 IID = Intrinsic::uadd_with_overflow;
3525 unsigned ResultReg1 = 0, ResultReg2 = 0, MulReg = 0;
3526 AArch64CC::CondCode CC = AArch64CC::Invalid;
3528 default: llvm_unreachable("Unexpected intrinsic!");
3529 case Intrinsic::sadd_with_overflow:
3530 ResultReg1 = emitAdd(VT, LHS, RHS, /*SetFlags=*/true);
3533 case Intrinsic::uadd_with_overflow:
3534 ResultReg1 = emitAdd(VT, LHS, RHS, /*SetFlags=*/true);
3537 case Intrinsic::ssub_with_overflow:
3538 ResultReg1 = emitSub(VT, LHS, RHS, /*SetFlags=*/true);
3541 case Intrinsic::usub_with_overflow:
3542 ResultReg1 = emitSub(VT, LHS, RHS, /*SetFlags=*/true);
3545 case Intrinsic::smul_with_overflow: {
3547 unsigned LHSReg = getRegForValue(LHS);
3550 bool LHSIsKill = hasTrivialKill(LHS);
3552 unsigned RHSReg = getRegForValue(RHS);
3555 bool RHSIsKill = hasTrivialKill(RHS);
3557 if (VT == MVT::i32) {
3558 MulReg = emitSMULL_rr(MVT::i64, LHSReg, LHSIsKill, RHSReg, RHSIsKill);
3559 unsigned ShiftReg = emitLSR_ri(MVT::i64, MVT::i64, MulReg,
3560 /*IsKill=*/false, 32);
3561 MulReg = fastEmitInst_extractsubreg(VT, MulReg, /*IsKill=*/true,
3563 ShiftReg = fastEmitInst_extractsubreg(VT, ShiftReg, /*IsKill=*/true,
3565 emitSubs_rs(VT, ShiftReg, /*IsKill=*/true, MulReg, /*IsKill=*/false,
3566 AArch64_AM::ASR, 31, /*WantResult=*/false);
3568 assert(VT == MVT::i64 && "Unexpected value type.");
3569 MulReg = emitMul_rr(VT, LHSReg, LHSIsKill, RHSReg, RHSIsKill);
3570 unsigned SMULHReg = fastEmit_rr(VT, VT, ISD::MULHS, LHSReg, LHSIsKill,
3572 emitSubs_rs(VT, SMULHReg, /*IsKill=*/true, MulReg, /*IsKill=*/false,
3573 AArch64_AM::ASR, 63, /*WantResult=*/false);
3577 case Intrinsic::umul_with_overflow: {
3579 unsigned LHSReg = getRegForValue(LHS);
3582 bool LHSIsKill = hasTrivialKill(LHS);
3584 unsigned RHSReg = getRegForValue(RHS);
3587 bool RHSIsKill = hasTrivialKill(RHS);
3589 if (VT == MVT::i32) {
3590 MulReg = emitUMULL_rr(MVT::i64, LHSReg, LHSIsKill, RHSReg, RHSIsKill);
3591 emitSubs_rs(MVT::i64, AArch64::XZR, /*IsKill=*/true, MulReg,
3592 /*IsKill=*/false, AArch64_AM::LSR, 32,
3593 /*WantResult=*/false);
3594 MulReg = fastEmitInst_extractsubreg(VT, MulReg, /*IsKill=*/true,
3597 assert(VT == MVT::i64 && "Unexpected value type.");
3598 MulReg = emitMul_rr(VT, LHSReg, LHSIsKill, RHSReg, RHSIsKill);
3599 unsigned UMULHReg = fastEmit_rr(VT, VT, ISD::MULHU, LHSReg, LHSIsKill,
3601 emitSubs_rr(VT, AArch64::XZR, /*IsKill=*/true, UMULHReg,
3602 /*IsKill=*/false, /*WantResult=*/false);
3609 ResultReg1 = createResultReg(TLI.getRegClassFor(VT));
3610 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3611 TII.get(TargetOpcode::COPY), ResultReg1).addReg(MulReg);
3614 ResultReg2 = fastEmitInst_rri(AArch64::CSINCWr, &AArch64::GPR32RegClass,
3615 AArch64::WZR, /*IsKill=*/true, AArch64::WZR,
3616 /*IsKill=*/true, getInvertedCondCode(CC));
3618 assert((ResultReg1 + 1) == ResultReg2 &&
3619 "Nonconsecutive result registers.");
3620 updateValueMap(II, ResultReg1, 2);
3627 bool AArch64FastISel::selectRet(const Instruction *I) {
3628 const ReturnInst *Ret = cast<ReturnInst>(I);
3629 const Function &F = *I->getParent()->getParent();
3631 if (!FuncInfo.CanLowerReturn)
3637 // Build a list of return value registers.
3638 SmallVector<unsigned, 4> RetRegs;
3640 if (Ret->getNumOperands() > 0) {
3641 CallingConv::ID CC = F.getCallingConv();
3642 SmallVector<ISD::OutputArg, 4> Outs;
3643 GetReturnInfo(F.getReturnType(), F.getAttributes(), Outs, TLI);
3645 // Analyze operands of the call, assigning locations to each operand.
3646 SmallVector<CCValAssign, 16> ValLocs;
3647 CCState CCInfo(CC, F.isVarArg(), *FuncInfo.MF, ValLocs, I->getContext());
3648 CCAssignFn *RetCC = CC == CallingConv::WebKit_JS ? RetCC_AArch64_WebKit_JS
3649 : RetCC_AArch64_AAPCS;
3650 CCInfo.AnalyzeReturn(Outs, RetCC);
3652 // Only handle a single return value for now.
3653 if (ValLocs.size() != 1)
3656 CCValAssign &VA = ValLocs[0];
3657 const Value *RV = Ret->getOperand(0);
3659 // Don't bother handling odd stuff for now.
3660 if ((VA.getLocInfo() != CCValAssign::Full) &&
3661 (VA.getLocInfo() != CCValAssign::BCvt))
3664 // Only handle register returns for now.
3668 unsigned Reg = getRegForValue(RV);
3672 unsigned SrcReg = Reg + VA.getValNo();
3673 unsigned DestReg = VA.getLocReg();
3674 // Avoid a cross-class copy. This is very unlikely.
3675 if (!MRI.getRegClass(SrcReg)->contains(DestReg))
3678 EVT RVEVT = TLI.getValueType(RV->getType());
3679 if (!RVEVT.isSimple())
3682 // Vectors (of > 1 lane) in big endian need tricky handling.
3683 if (RVEVT.isVector() && RVEVT.getVectorNumElements() > 1 &&
3684 !Subtarget->isLittleEndian())
3687 MVT RVVT = RVEVT.getSimpleVT();
3688 if (RVVT == MVT::f128)
3691 MVT DestVT = VA.getValVT();
3692 // Special handling for extended integers.
3693 if (RVVT != DestVT) {
3694 if (RVVT != MVT::i1 && RVVT != MVT::i8 && RVVT != MVT::i16)
3697 if (!Outs[0].Flags.isZExt() && !Outs[0].Flags.isSExt())
3700 bool IsZExt = Outs[0].Flags.isZExt();
3701 SrcReg = emitIntExt(RVVT, SrcReg, DestVT, IsZExt);
3707 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3708 TII.get(TargetOpcode::COPY), DestReg).addReg(SrcReg);
3710 // Add register to return instruction.
3711 RetRegs.push_back(VA.getLocReg());
3714 MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3715 TII.get(AArch64::RET_ReallyLR));
3716 for (unsigned i = 0, e = RetRegs.size(); i != e; ++i)
3717 MIB.addReg(RetRegs[i], RegState::Implicit);
3721 bool AArch64FastISel::selectTrunc(const Instruction *I) {
3722 Type *DestTy = I->getType();
3723 Value *Op = I->getOperand(0);
3724 Type *SrcTy = Op->getType();
3726 EVT SrcEVT = TLI.getValueType(SrcTy, true);
3727 EVT DestEVT = TLI.getValueType(DestTy, true);
3728 if (!SrcEVT.isSimple())
3730 if (!DestEVT.isSimple())
3733 MVT SrcVT = SrcEVT.getSimpleVT();
3734 MVT DestVT = DestEVT.getSimpleVT();
3736 if (SrcVT != MVT::i64 && SrcVT != MVT::i32 && SrcVT != MVT::i16 &&
3739 if (DestVT != MVT::i32 && DestVT != MVT::i16 && DestVT != MVT::i8 &&
3743 unsigned SrcReg = getRegForValue(Op);
3746 bool SrcIsKill = hasTrivialKill(Op);
3748 // If we're truncating from i64 to a smaller non-legal type then generate an
3749 // AND. Otherwise, we know the high bits are undefined and a truncate only
3750 // generate a COPY. We cannot mark the source register also as result
3751 // register, because this can incorrectly transfer the kill flag onto the
3754 if (SrcVT == MVT::i64) {
3756 switch (DestVT.SimpleTy) {
3758 // Trunc i64 to i32 is handled by the target-independent fast-isel.
3770 // Issue an extract_subreg to get the lower 32-bits.
3771 unsigned Reg32 = fastEmitInst_extractsubreg(MVT::i32, SrcReg, SrcIsKill,
3773 // Create the AND instruction which performs the actual truncation.
3774 ResultReg = emitAnd_ri(MVT::i32, Reg32, /*IsKill=*/true, Mask);
3775 assert(ResultReg && "Unexpected AND instruction emission failure.");
3777 ResultReg = createResultReg(&AArch64::GPR32RegClass);
3778 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3779 TII.get(TargetOpcode::COPY), ResultReg)
3780 .addReg(SrcReg, getKillRegState(SrcIsKill));
3783 updateValueMap(I, ResultReg);
3787 unsigned AArch64FastISel::emiti1Ext(unsigned SrcReg, MVT DestVT, bool IsZExt) {
3788 assert((DestVT == MVT::i8 || DestVT == MVT::i16 || DestVT == MVT::i32 ||
3789 DestVT == MVT::i64) &&
3790 "Unexpected value type.");
3791 // Handle i8 and i16 as i32.
3792 if (DestVT == MVT::i8 || DestVT == MVT::i16)
3796 unsigned ResultReg = emitAnd_ri(MVT::i32, SrcReg, /*TODO:IsKill=*/false, 1);
3797 assert(ResultReg && "Unexpected AND instruction emission failure.");
3798 if (DestVT == MVT::i64) {
3799 // We're ZExt i1 to i64. The ANDWri Wd, Ws, #1 implicitly clears the
3800 // upper 32 bits. Emit a SUBREG_TO_REG to extend from Wd to Xd.
3801 unsigned Reg64 = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
3802 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3803 TII.get(AArch64::SUBREG_TO_REG), Reg64)
3806 .addImm(AArch64::sub_32);
3811 if (DestVT == MVT::i64) {
3812 // FIXME: We're SExt i1 to i64.
3815 return fastEmitInst_rii(AArch64::SBFMWri, &AArch64::GPR32RegClass, SrcReg,
3816 /*TODO:IsKill=*/false, 0, 0);
3820 unsigned AArch64FastISel::emitMul_rr(MVT RetVT, unsigned Op0, bool Op0IsKill,
3821 unsigned Op1, bool Op1IsKill) {
3823 switch (RetVT.SimpleTy) {
3829 Opc = AArch64::MADDWrrr; ZReg = AArch64::WZR; break;
3831 Opc = AArch64::MADDXrrr; ZReg = AArch64::XZR; break;
3834 const TargetRegisterClass *RC =
3835 (RetVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
3836 return fastEmitInst_rrr(Opc, RC, Op0, Op0IsKill, Op1, Op1IsKill,
3837 /*IsKill=*/ZReg, true);
3840 unsigned AArch64FastISel::emitSMULL_rr(MVT RetVT, unsigned Op0, bool Op0IsKill,
3841 unsigned Op1, bool Op1IsKill) {
3842 if (RetVT != MVT::i64)
3845 return fastEmitInst_rrr(AArch64::SMADDLrrr, &AArch64::GPR64RegClass,
3846 Op0, Op0IsKill, Op1, Op1IsKill,
3847 AArch64::XZR, /*IsKill=*/true);
3850 unsigned AArch64FastISel::emitUMULL_rr(MVT RetVT, unsigned Op0, bool Op0IsKill,
3851 unsigned Op1, bool Op1IsKill) {
3852 if (RetVT != MVT::i64)
3855 return fastEmitInst_rrr(AArch64::UMADDLrrr, &AArch64::GPR64RegClass,
3856 Op0, Op0IsKill, Op1, Op1IsKill,
3857 AArch64::XZR, /*IsKill=*/true);
3860 unsigned AArch64FastISel::emitLSL_rr(MVT RetVT, unsigned Op0Reg, bool Op0IsKill,
3861 unsigned Op1Reg, bool Op1IsKill) {
3863 bool NeedTrunc = false;
3865 switch (RetVT.SimpleTy) {
3867 case MVT::i8: Opc = AArch64::LSLVWr; NeedTrunc = true; Mask = 0xff; break;
3868 case MVT::i16: Opc = AArch64::LSLVWr; NeedTrunc = true; Mask = 0xffff; break;
3869 case MVT::i32: Opc = AArch64::LSLVWr; break;
3870 case MVT::i64: Opc = AArch64::LSLVXr; break;
3873 const TargetRegisterClass *RC =
3874 (RetVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
3876 Op1Reg = emitAnd_ri(MVT::i32, Op1Reg, Op1IsKill, Mask);
3879 unsigned ResultReg = fastEmitInst_rr(Opc, RC, Op0Reg, Op0IsKill, Op1Reg,
3882 ResultReg = emitAnd_ri(MVT::i32, ResultReg, /*IsKill=*/true, Mask);
3886 unsigned AArch64FastISel::emitLSL_ri(MVT RetVT, MVT SrcVT, unsigned Op0,
3887 bool Op0IsKill, uint64_t Shift,
3889 assert(RetVT.SimpleTy >= SrcVT.SimpleTy &&
3890 "Unexpected source/return type pair.");
3891 assert((SrcVT == MVT::i1 || SrcVT == MVT::i8 || SrcVT == MVT::i16 ||
3892 SrcVT == MVT::i32 || SrcVT == MVT::i64) &&
3893 "Unexpected source value type.");
3894 assert((RetVT == MVT::i8 || RetVT == MVT::i16 || RetVT == MVT::i32 ||
3895 RetVT == MVT::i64) && "Unexpected return value type.");
3897 bool Is64Bit = (RetVT == MVT::i64);
3898 unsigned RegSize = Is64Bit ? 64 : 32;
3899 unsigned DstBits = RetVT.getSizeInBits();
3900 unsigned SrcBits = SrcVT.getSizeInBits();
3902 // Don't deal with undefined shifts.
3903 if (Shift >= DstBits)
3906 // For immediate shifts we can fold the zero-/sign-extension into the shift.
3907 // {S|U}BFM Wd, Wn, #r, #s
3908 // Wd<32+s-r,32-r> = Wn<s:0> when r > s
3910 // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
3911 // %2 = shl i16 %1, 4
3912 // Wd<32+7-28,32-28> = Wn<7:0> <- clamp s to 7
3913 // 0b1111_1111_1111_1111__1111_1010_1010_0000 sext
3914 // 0b0000_0000_0000_0000__0000_0101_0101_0000 sext | zext
3915 // 0b0000_0000_0000_0000__0000_1010_1010_0000 zext
3917 // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
3918 // %2 = shl i16 %1, 8
3919 // Wd<32+7-24,32-24> = Wn<7:0>
3920 // 0b1111_1111_1111_1111__1010_1010_0000_0000 sext
3921 // 0b0000_0000_0000_0000__0101_0101_0000_0000 sext | zext
3922 // 0b0000_0000_0000_0000__1010_1010_0000_0000 zext
3924 // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
3925 // %2 = shl i16 %1, 12
3926 // Wd<32+3-20,32-20> = Wn<3:0>
3927 // 0b1111_1111_1111_1111__1010_0000_0000_0000 sext
3928 // 0b0000_0000_0000_0000__0101_0000_0000_0000 sext | zext
3929 // 0b0000_0000_0000_0000__1010_0000_0000_0000 zext
3931 unsigned ImmR = RegSize - Shift;
3932 // Limit the width to the length of the source type.
3933 unsigned ImmS = std::min<unsigned>(SrcBits - 1, DstBits - 1 - Shift);
3934 static const unsigned OpcTable[2][2] = {
3935 {AArch64::SBFMWri, AArch64::SBFMXri},
3936 {AArch64::UBFMWri, AArch64::UBFMXri}
3938 unsigned Opc = OpcTable[IsZext][Is64Bit];
3939 const TargetRegisterClass *RC =
3940 Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
3941 if (SrcVT.SimpleTy <= MVT::i32 && RetVT == MVT::i64) {
3942 unsigned TmpReg = MRI.createVirtualRegister(RC);
3943 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3944 TII.get(AArch64::SUBREG_TO_REG), TmpReg)
3946 .addReg(Op0, getKillRegState(Op0IsKill))
3947 .addImm(AArch64::sub_32);
3951 return fastEmitInst_rii(Opc, RC, Op0, Op0IsKill, ImmR, ImmS);
3954 unsigned AArch64FastISel::emitLSR_rr(MVT RetVT, unsigned Op0Reg, bool Op0IsKill,
3955 unsigned Op1Reg, bool Op1IsKill) {
3957 bool NeedTrunc = false;
3959 switch (RetVT.SimpleTy) {
3961 case MVT::i8: Opc = AArch64::LSRVWr; NeedTrunc = true; Mask = 0xff; break;
3962 case MVT::i16: Opc = AArch64::LSRVWr; NeedTrunc = true; Mask = 0xffff; break;
3963 case MVT::i32: Opc = AArch64::LSRVWr; break;
3964 case MVT::i64: Opc = AArch64::LSRVXr; break;
3967 const TargetRegisterClass *RC =
3968 (RetVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
3970 Op0Reg = emitAnd_ri(MVT::i32, Op0Reg, Op0IsKill, Mask);
3971 Op1Reg = emitAnd_ri(MVT::i32, Op1Reg, Op1IsKill, Mask);
3972 Op0IsKill = Op1IsKill = true;
3974 unsigned ResultReg = fastEmitInst_rr(Opc, RC, Op0Reg, Op0IsKill, Op1Reg,
3977 ResultReg = emitAnd_ri(MVT::i32, ResultReg, /*IsKill=*/true, Mask);
3981 unsigned AArch64FastISel::emitLSR_ri(MVT RetVT, MVT SrcVT, unsigned Op0,
3982 bool Op0IsKill, uint64_t Shift,
3984 assert(RetVT.SimpleTy >= SrcVT.SimpleTy &&
3985 "Unexpected source/return type pair.");
3986 assert((SrcVT == MVT::i8 || SrcVT == MVT::i16 || SrcVT == MVT::i32 ||
3987 SrcVT == MVT::i64) && "Unexpected source value type.");
3988 assert((RetVT == MVT::i8 || RetVT == MVT::i16 || RetVT == MVT::i32 ||
3989 RetVT == MVT::i64) && "Unexpected return value type.");
3991 bool Is64Bit = (RetVT == MVT::i64);
3992 unsigned RegSize = Is64Bit ? 64 : 32;
3993 unsigned DstBits = RetVT.getSizeInBits();
3994 unsigned SrcBits = SrcVT.getSizeInBits();
3996 // Don't deal with undefined shifts.
3997 if (Shift >= DstBits)
4000 // For immediate shifts we can fold the zero-/sign-extension into the shift.
4001 // {S|U}BFM Wd, Wn, #r, #s
4002 // Wd<s-r:0> = Wn<s:r> when r <= s
4004 // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
4005 // %2 = lshr i16 %1, 4
4006 // Wd<7-4:0> = Wn<7:4>
4007 // 0b0000_0000_0000_0000__0000_1111_1111_1010 sext
4008 // 0b0000_0000_0000_0000__0000_0000_0000_0101 sext | zext
4009 // 0b0000_0000_0000_0000__0000_0000_0000_1010 zext
4011 // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
4012 // %2 = lshr i16 %1, 8
4013 // Wd<7-7,0> = Wn<7:7>
4014 // 0b0000_0000_0000_0000__0000_0000_1111_1111 sext
4015 // 0b0000_0000_0000_0000__0000_0000_0000_0000 sext
4016 // 0b0000_0000_0000_0000__0000_0000_0000_0000 zext
4018 // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
4019 // %2 = lshr i16 %1, 12
4020 // Wd<7-7,0> = Wn<7:7> <- clamp r to 7
4021 // 0b0000_0000_0000_0000__0000_0000_0000_1111 sext
4022 // 0b0000_0000_0000_0000__0000_0000_0000_0000 sext
4023 // 0b0000_0000_0000_0000__0000_0000_0000_0000 zext
4025 if (Shift >= SrcBits && IsZExt)
4026 return materializeInt(ConstantInt::get(*Context, APInt(RegSize, 0)), RetVT);
4028 // It is not possible to fold a sign-extend into the LShr instruction. In this
4029 // case emit a sign-extend.
4031 Op0 = emitIntExt(SrcVT, Op0, RetVT, IsZExt);
4036 SrcBits = SrcVT.getSizeInBits();
4040 unsigned ImmR = std::min<unsigned>(SrcBits - 1, Shift);
4041 unsigned ImmS = SrcBits - 1;
4042 static const unsigned OpcTable[2][2] = {
4043 {AArch64::SBFMWri, AArch64::SBFMXri},
4044 {AArch64::UBFMWri, AArch64::UBFMXri}
4046 unsigned Opc = OpcTable[IsZExt][Is64Bit];
4047 const TargetRegisterClass *RC =
4048 Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
4049 if (SrcVT.SimpleTy <= MVT::i32 && RetVT == MVT::i64) {
4050 unsigned TmpReg = MRI.createVirtualRegister(RC);
4051 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
4052 TII.get(AArch64::SUBREG_TO_REG), TmpReg)
4054 .addReg(Op0, getKillRegState(Op0IsKill))
4055 .addImm(AArch64::sub_32);
4059 return fastEmitInst_rii(Opc, RC, Op0, Op0IsKill, ImmR, ImmS);
4062 unsigned AArch64FastISel::emitASR_rr(MVT RetVT, unsigned Op0Reg, bool Op0IsKill,
4063 unsigned Op1Reg, bool Op1IsKill) {
4065 bool NeedTrunc = false;
4067 switch (RetVT.SimpleTy) {
4069 case MVT::i8: Opc = AArch64::ASRVWr; NeedTrunc = true; Mask = 0xff; break;
4070 case MVT::i16: Opc = AArch64::ASRVWr; NeedTrunc = true; Mask = 0xffff; break;
4071 case MVT::i32: Opc = AArch64::ASRVWr; break;
4072 case MVT::i64: Opc = AArch64::ASRVXr; break;
4075 const TargetRegisterClass *RC =
4076 (RetVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
4078 Op0Reg = emitIntExt(RetVT, Op0Reg, MVT::i32, /*IsZExt=*/false);
4079 Op1Reg = emitAnd_ri(MVT::i32, Op1Reg, Op1IsKill, Mask);
4080 Op0IsKill = Op1IsKill = true;
4082 unsigned ResultReg = fastEmitInst_rr(Opc, RC, Op0Reg, Op0IsKill, Op1Reg,
4085 ResultReg = emitAnd_ri(MVT::i32, ResultReg, /*IsKill=*/true, Mask);
4089 unsigned AArch64FastISel::emitASR_ri(MVT RetVT, MVT SrcVT, unsigned Op0,
4090 bool Op0IsKill, uint64_t Shift,
4092 assert(RetVT.SimpleTy >= SrcVT.SimpleTy &&
4093 "Unexpected source/return type pair.");
4094 assert((SrcVT == MVT::i8 || SrcVT == MVT::i16 || SrcVT == MVT::i32 ||
4095 SrcVT == MVT::i64) && "Unexpected source value type.");
4096 assert((RetVT == MVT::i8 || RetVT == MVT::i16 || RetVT == MVT::i32 ||
4097 RetVT == MVT::i64) && "Unexpected return value type.");
4099 bool Is64Bit = (RetVT == MVT::i64);
4100 unsigned RegSize = Is64Bit ? 64 : 32;
4101 unsigned DstBits = RetVT.getSizeInBits();
4102 unsigned SrcBits = SrcVT.getSizeInBits();
4104 // Don't deal with undefined shifts.
4105 if (Shift >= DstBits)
4108 // For immediate shifts we can fold the zero-/sign-extension into the shift.
4109 // {S|U}BFM Wd, Wn, #r, #s
4110 // Wd<s-r:0> = Wn<s:r> when r <= s
4112 // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
4113 // %2 = ashr i16 %1, 4
4114 // Wd<7-4:0> = Wn<7:4>
4115 // 0b1111_1111_1111_1111__1111_1111_1111_1010 sext
4116 // 0b0000_0000_0000_0000__0000_0000_0000_0101 sext | zext
4117 // 0b0000_0000_0000_0000__0000_0000_0000_1010 zext
4119 // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
4120 // %2 = ashr i16 %1, 8
4121 // Wd<7-7,0> = Wn<7:7>
4122 // 0b1111_1111_1111_1111__1111_1111_1111_1111 sext
4123 // 0b0000_0000_0000_0000__0000_0000_0000_0000 sext
4124 // 0b0000_0000_0000_0000__0000_0000_0000_0000 zext
4126 // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
4127 // %2 = ashr i16 %1, 12
4128 // Wd<7-7,0> = Wn<7:7> <- clamp r to 7
4129 // 0b1111_1111_1111_1111__1111_1111_1111_1111 sext
4130 // 0b0000_0000_0000_0000__0000_0000_0000_0000 sext
4131 // 0b0000_0000_0000_0000__0000_0000_0000_0000 zext
4133 if (Shift >= SrcBits && IsZExt)
4134 return materializeInt(ConstantInt::get(*Context, APInt(RegSize, 0)), RetVT);
4136 unsigned ImmR = std::min<unsigned>(SrcBits - 1, Shift);
4137 unsigned ImmS = SrcBits - 1;
4138 static const unsigned OpcTable[2][2] = {
4139 {AArch64::SBFMWri, AArch64::SBFMXri},
4140 {AArch64::UBFMWri, AArch64::UBFMXri}
4142 unsigned Opc = OpcTable[IsZExt][Is64Bit];
4143 const TargetRegisterClass *RC =
4144 Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
4145 if (SrcVT.SimpleTy <= MVT::i32 && RetVT == MVT::i64) {
4146 unsigned TmpReg = MRI.createVirtualRegister(RC);
4147 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
4148 TII.get(AArch64::SUBREG_TO_REG), TmpReg)
4150 .addReg(Op0, getKillRegState(Op0IsKill))
4151 .addImm(AArch64::sub_32);
4155 return fastEmitInst_rii(Opc, RC, Op0, Op0IsKill, ImmR, ImmS);
4158 unsigned AArch64FastISel::emitIntExt(MVT SrcVT, unsigned SrcReg, MVT DestVT,
4160 assert(DestVT != MVT::i1 && "ZeroExt/SignExt an i1?");
4162 // FastISel does not have plumbing to deal with extensions where the SrcVT or
4163 // DestVT are odd things, so test to make sure that they are both types we can
4164 // handle (i1/i8/i16/i32 for SrcVT and i8/i16/i32/i64 for DestVT), otherwise
4165 // bail out to SelectionDAG.
4166 if (((DestVT != MVT::i8) && (DestVT != MVT::i16) &&
4167 (DestVT != MVT::i32) && (DestVT != MVT::i64)) ||
4168 ((SrcVT != MVT::i1) && (SrcVT != MVT::i8) &&
4169 (SrcVT != MVT::i16) && (SrcVT != MVT::i32)))
4175 switch (SrcVT.SimpleTy) {
4179 return emiti1Ext(SrcReg, DestVT, IsZExt);
4181 if (DestVT == MVT::i64)
4182 Opc = IsZExt ? AArch64::UBFMXri : AArch64::SBFMXri;
4184 Opc = IsZExt ? AArch64::UBFMWri : AArch64::SBFMWri;
4188 if (DestVT == MVT::i64)
4189 Opc = IsZExt ? AArch64::UBFMXri : AArch64::SBFMXri;
4191 Opc = IsZExt ? AArch64::UBFMWri : AArch64::SBFMWri;
4195 assert(DestVT == MVT::i64 && "IntExt i32 to i32?!?");
4196 Opc = IsZExt ? AArch64::UBFMXri : AArch64::SBFMXri;
4201 // Handle i8 and i16 as i32.
4202 if (DestVT == MVT::i8 || DestVT == MVT::i16)
4204 else if (DestVT == MVT::i64) {
4205 unsigned Src64 = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
4206 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
4207 TII.get(AArch64::SUBREG_TO_REG), Src64)
4210 .addImm(AArch64::sub_32);
4214 const TargetRegisterClass *RC =
4215 (DestVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
4216 return fastEmitInst_rii(Opc, RC, SrcReg, /*TODO:IsKill=*/false, 0, Imm);
4219 static bool isZExtLoad(const MachineInstr *LI) {
4220 switch (LI->getOpcode()) {
4223 case AArch64::LDURBBi:
4224 case AArch64::LDURHHi:
4225 case AArch64::LDURWi:
4226 case AArch64::LDRBBui:
4227 case AArch64::LDRHHui:
4228 case AArch64::LDRWui:
4229 case AArch64::LDRBBroX:
4230 case AArch64::LDRHHroX:
4231 case AArch64::LDRWroX:
4232 case AArch64::LDRBBroW:
4233 case AArch64::LDRHHroW:
4234 case AArch64::LDRWroW:
4239 static bool isSExtLoad(const MachineInstr *LI) {
4240 switch (LI->getOpcode()) {
4243 case AArch64::LDURSBWi:
4244 case AArch64::LDURSHWi:
4245 case AArch64::LDURSBXi:
4246 case AArch64::LDURSHXi:
4247 case AArch64::LDURSWi:
4248 case AArch64::LDRSBWui:
4249 case AArch64::LDRSHWui:
4250 case AArch64::LDRSBXui:
4251 case AArch64::LDRSHXui:
4252 case AArch64::LDRSWui:
4253 case AArch64::LDRSBWroX:
4254 case AArch64::LDRSHWroX:
4255 case AArch64::LDRSBXroX:
4256 case AArch64::LDRSHXroX:
4257 case AArch64::LDRSWroX:
4258 case AArch64::LDRSBWroW:
4259 case AArch64::LDRSHWroW:
4260 case AArch64::LDRSBXroW:
4261 case AArch64::LDRSHXroW:
4262 case AArch64::LDRSWroW:
4267 bool AArch64FastISel::optimizeIntExtLoad(const Instruction *I, MVT RetVT,
4269 const auto *LI = dyn_cast<LoadInst>(I->getOperand(0));
4270 if (!LI || !LI->hasOneUse())
4273 // Check if the load instruction has already been selected.
4274 unsigned Reg = lookUpRegForValue(LI);
4278 MachineInstr *MI = MRI.getUniqueVRegDef(Reg);
4282 // Check if the correct load instruction has been emitted - SelectionDAG might
4283 // have emitted a zero-extending load, but we need a sign-extending load.
4284 bool IsZExt = isa<ZExtInst>(I);
4285 const auto *LoadMI = MI;
4286 if (LoadMI->getOpcode() == TargetOpcode::COPY &&
4287 LoadMI->getOperand(1).getSubReg() == AArch64::sub_32) {
4288 unsigned LoadReg = MI->getOperand(1).getReg();
4289 LoadMI = MRI.getUniqueVRegDef(LoadReg);
4290 assert(LoadMI && "Expected valid instruction");
4292 if (!(IsZExt && isZExtLoad(LoadMI)) && !(!IsZExt && isSExtLoad(LoadMI)))
4295 // Nothing to be done.
4296 if (RetVT != MVT::i64 || SrcVT > MVT::i32) {
4297 updateValueMap(I, Reg);
4302 unsigned Reg64 = createResultReg(&AArch64::GPR64RegClass);
4303 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
4304 TII.get(AArch64::SUBREG_TO_REG), Reg64)
4306 .addReg(Reg, getKillRegState(true))
4307 .addImm(AArch64::sub_32);
4310 assert((MI->getOpcode() == TargetOpcode::COPY &&
4311 MI->getOperand(1).getSubReg() == AArch64::sub_32) &&
4312 "Expected copy instruction");
4313 Reg = MI->getOperand(1).getReg();
4314 MI->eraseFromParent();
4316 updateValueMap(I, Reg);
4320 bool AArch64FastISel::selectIntExt(const Instruction *I) {
4321 assert((isa<ZExtInst>(I) || isa<SExtInst>(I)) &&
4322 "Unexpected integer extend instruction.");
4325 if (!isTypeSupported(I->getType(), RetVT))
4328 if (!isTypeSupported(I->getOperand(0)->getType(), SrcVT))
4331 // Try to optimize already sign-/zero-extended values from load instructions.
4332 if (optimizeIntExtLoad(I, RetVT, SrcVT))
4335 unsigned SrcReg = getRegForValue(I->getOperand(0));
4338 bool SrcIsKill = hasTrivialKill(I->getOperand(0));
4340 // Try to optimize already sign-/zero-extended values from function arguments.
4341 bool IsZExt = isa<ZExtInst>(I);
4342 if (const auto *Arg = dyn_cast<Argument>(I->getOperand(0))) {
4343 if ((IsZExt && Arg->hasZExtAttr()) || (!IsZExt && Arg->hasSExtAttr())) {
4344 if (RetVT == MVT::i64 && SrcVT != MVT::i64) {
4345 unsigned ResultReg = createResultReg(&AArch64::GPR64RegClass);
4346 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
4347 TII.get(AArch64::SUBREG_TO_REG), ResultReg)
4349 .addReg(SrcReg, getKillRegState(SrcIsKill))
4350 .addImm(AArch64::sub_32);
4353 // Conservatively clear all kill flags from all uses, because we are
4354 // replacing a sign-/zero-extend instruction at IR level with a nop at MI
4355 // level. The result of the instruction at IR level might have been
4356 // trivially dead, which is now not longer true.
4357 unsigned UseReg = lookUpRegForValue(I);
4359 MRI.clearKillFlags(UseReg);
4361 updateValueMap(I, SrcReg);
4366 unsigned ResultReg = emitIntExt(SrcVT, SrcReg, RetVT, IsZExt);
4370 updateValueMap(I, ResultReg);
4374 bool AArch64FastISel::selectRem(const Instruction *I, unsigned ISDOpcode) {
4375 EVT DestEVT = TLI.getValueType(I->getType(), true);
4376 if (!DestEVT.isSimple())
4379 MVT DestVT = DestEVT.getSimpleVT();
4380 if (DestVT != MVT::i64 && DestVT != MVT::i32)
4384 bool Is64bit = (DestVT == MVT::i64);
4385 switch (ISDOpcode) {
4389 DivOpc = Is64bit ? AArch64::SDIVXr : AArch64::SDIVWr;
4392 DivOpc = Is64bit ? AArch64::UDIVXr : AArch64::UDIVWr;
4395 unsigned MSubOpc = Is64bit ? AArch64::MSUBXrrr : AArch64::MSUBWrrr;
4396 unsigned Src0Reg = getRegForValue(I->getOperand(0));
4399 bool Src0IsKill = hasTrivialKill(I->getOperand(0));
4401 unsigned Src1Reg = getRegForValue(I->getOperand(1));
4404 bool Src1IsKill = hasTrivialKill(I->getOperand(1));
4406 const TargetRegisterClass *RC =
4407 (DestVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
4408 unsigned QuotReg = fastEmitInst_rr(DivOpc, RC, Src0Reg, /*IsKill=*/false,
4409 Src1Reg, /*IsKill=*/false);
4410 assert(QuotReg && "Unexpected DIV instruction emission failure.");
4411 // The remainder is computed as numerator - (quotient * denominator) using the
4412 // MSUB instruction.
4413 unsigned ResultReg = fastEmitInst_rrr(MSubOpc, RC, QuotReg, /*IsKill=*/true,
4414 Src1Reg, Src1IsKill, Src0Reg,
4416 updateValueMap(I, ResultReg);
4420 bool AArch64FastISel::selectMul(const Instruction *I) {
4422 if (!isTypeSupported(I->getType(), VT, /*IsVectorAllowed=*/true))
4426 return selectBinaryOp(I, ISD::MUL);
4428 const Value *Src0 = I->getOperand(0);
4429 const Value *Src1 = I->getOperand(1);
4430 if (const auto *C = dyn_cast<ConstantInt>(Src0))
4431 if (C->getValue().isPowerOf2())
4432 std::swap(Src0, Src1);
4434 // Try to simplify to a shift instruction.
4435 if (const auto *C = dyn_cast<ConstantInt>(Src1))
4436 if (C->getValue().isPowerOf2()) {
4437 uint64_t ShiftVal = C->getValue().logBase2();
4440 if (const auto *ZExt = dyn_cast<ZExtInst>(Src0)) {
4441 if (!isIntExtFree(ZExt)) {
4443 if (isValueAvailable(ZExt) && isTypeSupported(ZExt->getSrcTy(), VT)) {
4446 Src0 = ZExt->getOperand(0);
4449 } else if (const auto *SExt = dyn_cast<SExtInst>(Src0)) {
4450 if (!isIntExtFree(SExt)) {
4452 if (isValueAvailable(SExt) && isTypeSupported(SExt->getSrcTy(), VT)) {
4455 Src0 = SExt->getOperand(0);
4460 unsigned Src0Reg = getRegForValue(Src0);
4463 bool Src0IsKill = hasTrivialKill(Src0);
4465 unsigned ResultReg =
4466 emitLSL_ri(VT, SrcVT, Src0Reg, Src0IsKill, ShiftVal, IsZExt);
4469 updateValueMap(I, ResultReg);
4474 unsigned Src0Reg = getRegForValue(I->getOperand(0));
4477 bool Src0IsKill = hasTrivialKill(I->getOperand(0));
4479 unsigned Src1Reg = getRegForValue(I->getOperand(1));
4482 bool Src1IsKill = hasTrivialKill(I->getOperand(1));
4484 unsigned ResultReg = emitMul_rr(VT, Src0Reg, Src0IsKill, Src1Reg, Src1IsKill);
4489 updateValueMap(I, ResultReg);
4493 bool AArch64FastISel::selectShift(const Instruction *I) {
4495 if (!isTypeSupported(I->getType(), RetVT, /*IsVectorAllowed=*/true))
4498 if (RetVT.isVector())
4499 return selectOperator(I, I->getOpcode());
4501 if (const auto *C = dyn_cast<ConstantInt>(I->getOperand(1))) {
4502 unsigned ResultReg = 0;
4503 uint64_t ShiftVal = C->getZExtValue();
4505 bool IsZExt = (I->getOpcode() == Instruction::AShr) ? false : true;
4506 const Value *Op0 = I->getOperand(0);
4507 if (const auto *ZExt = dyn_cast<ZExtInst>(Op0)) {
4508 if (!isIntExtFree(ZExt)) {
4510 if (isValueAvailable(ZExt) && isTypeSupported(ZExt->getSrcTy(), TmpVT)) {
4513 Op0 = ZExt->getOperand(0);
4516 } else if (const auto *SExt = dyn_cast<SExtInst>(Op0)) {
4517 if (!isIntExtFree(SExt)) {
4519 if (isValueAvailable(SExt) && isTypeSupported(SExt->getSrcTy(), TmpVT)) {
4522 Op0 = SExt->getOperand(0);
4527 unsigned Op0Reg = getRegForValue(Op0);
4530 bool Op0IsKill = hasTrivialKill(Op0);
4532 switch (I->getOpcode()) {
4533 default: llvm_unreachable("Unexpected instruction.");
4534 case Instruction::Shl:
4535 ResultReg = emitLSL_ri(RetVT, SrcVT, Op0Reg, Op0IsKill, ShiftVal, IsZExt);
4537 case Instruction::AShr:
4538 ResultReg = emitASR_ri(RetVT, SrcVT, Op0Reg, Op0IsKill, ShiftVal, IsZExt);
4540 case Instruction::LShr:
4541 ResultReg = emitLSR_ri(RetVT, SrcVT, Op0Reg, Op0IsKill, ShiftVal, IsZExt);
4547 updateValueMap(I, ResultReg);
4551 unsigned Op0Reg = getRegForValue(I->getOperand(0));
4554 bool Op0IsKill = hasTrivialKill(I->getOperand(0));
4556 unsigned Op1Reg = getRegForValue(I->getOperand(1));
4559 bool Op1IsKill = hasTrivialKill(I->getOperand(1));
4561 unsigned ResultReg = 0;
4562 switch (I->getOpcode()) {
4563 default: llvm_unreachable("Unexpected instruction.");
4564 case Instruction::Shl:
4565 ResultReg = emitLSL_rr(RetVT, Op0Reg, Op0IsKill, Op1Reg, Op1IsKill);
4567 case Instruction::AShr:
4568 ResultReg = emitASR_rr(RetVT, Op0Reg, Op0IsKill, Op1Reg, Op1IsKill);
4570 case Instruction::LShr:
4571 ResultReg = emitLSR_rr(RetVT, Op0Reg, Op0IsKill, Op1Reg, Op1IsKill);
4578 updateValueMap(I, ResultReg);
4582 bool AArch64FastISel::selectBitCast(const Instruction *I) {
4585 if (!isTypeLegal(I->getOperand(0)->getType(), SrcVT))
4587 if (!isTypeLegal(I->getType(), RetVT))
4591 if (RetVT == MVT::f32 && SrcVT == MVT::i32)
4592 Opc = AArch64::FMOVWSr;
4593 else if (RetVT == MVT::f64 && SrcVT == MVT::i64)
4594 Opc = AArch64::FMOVXDr;
4595 else if (RetVT == MVT::i32 && SrcVT == MVT::f32)
4596 Opc = AArch64::FMOVSWr;
4597 else if (RetVT == MVT::i64 && SrcVT == MVT::f64)
4598 Opc = AArch64::FMOVDXr;
4602 const TargetRegisterClass *RC = nullptr;
4603 switch (RetVT.SimpleTy) {
4604 default: llvm_unreachable("Unexpected value type.");
4605 case MVT::i32: RC = &AArch64::GPR32RegClass; break;
4606 case MVT::i64: RC = &AArch64::GPR64RegClass; break;
4607 case MVT::f32: RC = &AArch64::FPR32RegClass; break;
4608 case MVT::f64: RC = &AArch64::FPR64RegClass; break;
4610 unsigned Op0Reg = getRegForValue(I->getOperand(0));
4613 bool Op0IsKill = hasTrivialKill(I->getOperand(0));
4614 unsigned ResultReg = fastEmitInst_r(Opc, RC, Op0Reg, Op0IsKill);
4619 updateValueMap(I, ResultReg);
4623 bool AArch64FastISel::selectFRem(const Instruction *I) {
4625 if (!isTypeLegal(I->getType(), RetVT))
4629 switch (RetVT.SimpleTy) {
4633 LC = RTLIB::REM_F32;
4636 LC = RTLIB::REM_F64;
4641 Args.reserve(I->getNumOperands());
4643 // Populate the argument list.
4644 for (auto &Arg : I->operands()) {
4647 Entry.Ty = Arg->getType();
4648 Args.push_back(Entry);
4651 CallLoweringInfo CLI;
4652 CLI.setCallee(TLI.getLibcallCallingConv(LC), I->getType(),
4653 TLI.getLibcallName(LC), std::move(Args));
4654 if (!lowerCallTo(CLI))
4656 updateValueMap(I, CLI.ResultReg);
4660 bool AArch64FastISel::selectSDiv(const Instruction *I) {
4662 if (!isTypeLegal(I->getType(), VT))
4665 if (!isa<ConstantInt>(I->getOperand(1)))
4666 return selectBinaryOp(I, ISD::SDIV);
4668 const APInt &C = cast<ConstantInt>(I->getOperand(1))->getValue();
4669 if ((VT != MVT::i32 && VT != MVT::i64) || !C ||
4670 !(C.isPowerOf2() || (-C).isPowerOf2()))
4671 return selectBinaryOp(I, ISD::SDIV);
4673 unsigned Lg2 = C.countTrailingZeros();
4674 unsigned Src0Reg = getRegForValue(I->getOperand(0));
4677 bool Src0IsKill = hasTrivialKill(I->getOperand(0));
4679 if (cast<BinaryOperator>(I)->isExact()) {
4680 unsigned ResultReg = emitASR_ri(VT, VT, Src0Reg, Src0IsKill, Lg2);
4683 updateValueMap(I, ResultReg);
4687 int64_t Pow2MinusOne = (1ULL << Lg2) - 1;
4688 unsigned AddReg = emitAdd_ri_(VT, Src0Reg, /*IsKill=*/false, Pow2MinusOne);
4692 // (Src0 < 0) ? Pow2 - 1 : 0;
4693 if (!emitICmp_ri(VT, Src0Reg, /*IsKill=*/false, 0))
4697 const TargetRegisterClass *RC;
4698 if (VT == MVT::i64) {
4699 SelectOpc = AArch64::CSELXr;
4700 RC = &AArch64::GPR64RegClass;
4702 SelectOpc = AArch64::CSELWr;
4703 RC = &AArch64::GPR32RegClass;
4705 unsigned SelectReg =
4706 fastEmitInst_rri(SelectOpc, RC, AddReg, /*IsKill=*/true, Src0Reg,
4707 Src0IsKill, AArch64CC::LT);
4711 // Divide by Pow2 --> ashr. If we're dividing by a negative value we must also
4712 // negate the result.
4713 unsigned ZeroReg = (VT == MVT::i64) ? AArch64::XZR : AArch64::WZR;
4716 ResultReg = emitAddSub_rs(/*UseAdd=*/false, VT, ZeroReg, /*IsKill=*/true,
4717 SelectReg, /*IsKill=*/true, AArch64_AM::ASR, Lg2);
4719 ResultReg = emitASR_ri(VT, VT, SelectReg, /*IsKill=*/true, Lg2);
4724 updateValueMap(I, ResultReg);
4728 /// This is mostly a copy of the existing FastISel GEP code, but we have to
4729 /// duplicate it for AArch64, because otherwise we would bail out even for
4730 /// simple cases. This is because the standard fastEmit functions don't cover
4731 /// MUL at all and ADD is lowered very inefficientily.
4732 bool AArch64FastISel::selectGetElementPtr(const Instruction *I) {
4733 unsigned N = getRegForValue(I->getOperand(0));
4736 bool NIsKill = hasTrivialKill(I->getOperand(0));
4738 // Keep a running tab of the total offset to coalesce multiple N = N + Offset
4739 // into a single N = N + TotalOffset.
4740 uint64_t TotalOffs = 0;
4741 Type *Ty = I->getOperand(0)->getType();
4742 MVT VT = TLI.getPointerTy();
4743 for (auto OI = std::next(I->op_begin()), E = I->op_end(); OI != E; ++OI) {
4744 const Value *Idx = *OI;
4745 if (auto *StTy = dyn_cast<StructType>(Ty)) {
4746 unsigned Field = cast<ConstantInt>(Idx)->getZExtValue();
4749 TotalOffs += DL.getStructLayout(StTy)->getElementOffset(Field);
4750 Ty = StTy->getElementType(Field);
4752 Ty = cast<SequentialType>(Ty)->getElementType();
4753 // If this is a constant subscript, handle it quickly.
4754 if (const auto *CI = dyn_cast<ConstantInt>(Idx)) {
4759 DL.getTypeAllocSize(Ty) * cast<ConstantInt>(CI)->getSExtValue();
4763 N = emitAdd_ri_(VT, N, NIsKill, TotalOffs);
4770 // N = N + Idx * ElementSize;
4771 uint64_t ElementSize = DL.getTypeAllocSize(Ty);
4772 std::pair<unsigned, bool> Pair = getRegForGEPIndex(Idx);
4773 unsigned IdxN = Pair.first;
4774 bool IdxNIsKill = Pair.second;
4778 if (ElementSize != 1) {
4779 unsigned C = fastEmit_i(VT, VT, ISD::Constant, ElementSize);
4782 IdxN = emitMul_rr(VT, IdxN, IdxNIsKill, C, true);
4787 N = fastEmit_rr(VT, VT, ISD::ADD, N, NIsKill, IdxN, IdxNIsKill);
4793 N = emitAdd_ri_(VT, N, NIsKill, TotalOffs);
4797 updateValueMap(I, N);
4801 bool AArch64FastISel::fastSelectInstruction(const Instruction *I) {
4802 switch (I->getOpcode()) {
4805 case Instruction::Add:
4806 case Instruction::Sub:
4807 return selectAddSub(I);
4808 case Instruction::Mul:
4809 return selectMul(I);
4810 case Instruction::SDiv:
4811 return selectSDiv(I);
4812 case Instruction::SRem:
4813 if (!selectBinaryOp(I, ISD::SREM))
4814 return selectRem(I, ISD::SREM);
4816 case Instruction::URem:
4817 if (!selectBinaryOp(I, ISD::UREM))
4818 return selectRem(I, ISD::UREM);
4820 case Instruction::Shl:
4821 case Instruction::LShr:
4822 case Instruction::AShr:
4823 return selectShift(I);
4824 case Instruction::And:
4825 case Instruction::Or:
4826 case Instruction::Xor:
4827 return selectLogicalOp(I);
4828 case Instruction::Br:
4829 return selectBranch(I);
4830 case Instruction::IndirectBr:
4831 return selectIndirectBr(I);
4832 case Instruction::BitCast:
4833 if (!FastISel::selectBitCast(I))
4834 return selectBitCast(I);
4836 case Instruction::FPToSI:
4837 if (!selectCast(I, ISD::FP_TO_SINT))
4838 return selectFPToInt(I, /*Signed=*/true);
4840 case Instruction::FPToUI:
4841 return selectFPToInt(I, /*Signed=*/false);
4842 case Instruction::ZExt:
4843 case Instruction::SExt:
4844 return selectIntExt(I);
4845 case Instruction::Trunc:
4846 if (!selectCast(I, ISD::TRUNCATE))
4847 return selectTrunc(I);
4849 case Instruction::FPExt:
4850 return selectFPExt(I);
4851 case Instruction::FPTrunc:
4852 return selectFPTrunc(I);
4853 case Instruction::SIToFP:
4854 if (!selectCast(I, ISD::SINT_TO_FP))
4855 return selectIntToFP(I, /*Signed=*/true);
4857 case Instruction::UIToFP:
4858 return selectIntToFP(I, /*Signed=*/false);
4859 case Instruction::Load:
4860 return selectLoad(I);
4861 case Instruction::Store:
4862 return selectStore(I);
4863 case Instruction::FCmp:
4864 case Instruction::ICmp:
4865 return selectCmp(I);
4866 case Instruction::Select:
4867 return selectSelect(I);
4868 case Instruction::Ret:
4869 return selectRet(I);
4870 case Instruction::FRem:
4871 return selectFRem(I);
4872 case Instruction::GetElementPtr:
4873 return selectGetElementPtr(I);
4876 // fall-back to target-independent instruction selection.
4877 return selectOperator(I, I->getOpcode());
4878 // Silence warnings.
4879 (void)&CC_AArch64_DarwinPCS_VarArg;
4883 llvm::FastISel *AArch64::createFastISel(FunctionLoweringInfo &FuncInfo,
4884 const TargetLibraryInfo *LibInfo) {
4885 return new AArch64FastISel(FuncInfo, LibInfo);