1 //===-- AArch6464FastISel.cpp - AArch64 FastISel implementation -----------===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This file defines the AArch64-specific support for the FastISel class. Some
11 // of the target-specific code is generated by tablegen in the file
12 // AArch64GenFastISel.inc, which is #included here.
14 //===----------------------------------------------------------------------===//
17 #include "AArch64Subtarget.h"
18 #include "AArch64TargetMachine.h"
19 #include "MCTargetDesc/AArch64AddressingModes.h"
20 #include "llvm/Analysis/BranchProbabilityInfo.h"
21 #include "llvm/CodeGen/CallingConvLower.h"
22 #include "llvm/CodeGen/FastISel.h"
23 #include "llvm/CodeGen/FunctionLoweringInfo.h"
24 #include "llvm/CodeGen/MachineConstantPool.h"
25 #include "llvm/CodeGen/MachineFrameInfo.h"
26 #include "llvm/CodeGen/MachineInstrBuilder.h"
27 #include "llvm/CodeGen/MachineRegisterInfo.h"
28 #include "llvm/IR/CallingConv.h"
29 #include "llvm/IR/DataLayout.h"
30 #include "llvm/IR/DerivedTypes.h"
31 #include "llvm/IR/Function.h"
32 #include "llvm/IR/GetElementPtrTypeIterator.h"
33 #include "llvm/IR/GlobalAlias.h"
34 #include "llvm/IR/GlobalVariable.h"
35 #include "llvm/IR/Instructions.h"
36 #include "llvm/IR/IntrinsicInst.h"
37 #include "llvm/IR/Operator.h"
38 #include "llvm/Support/CommandLine.h"
43 class AArch64FastISel : public FastISel {
53 AArch64_AM::ShiftExtendType ExtType;
61 const GlobalValue *GV;
64 Address() : Kind(RegBase), ExtType(AArch64_AM::InvalidShiftExtend),
65 OffsetReg(0), Shift(0), Offset(0), GV(nullptr) { Base.Reg = 0; }
66 void setKind(BaseKind K) { Kind = K; }
67 BaseKind getKind() const { return Kind; }
68 void setExtendType(AArch64_AM::ShiftExtendType E) { ExtType = E; }
69 AArch64_AM::ShiftExtendType getExtendType() const { return ExtType; }
70 bool isRegBase() const { return Kind == RegBase; }
71 bool isFIBase() const { return Kind == FrameIndexBase; }
72 void setReg(unsigned Reg) {
73 assert(isRegBase() && "Invalid base register access!");
76 unsigned getReg() const {
77 assert(isRegBase() && "Invalid base register access!");
80 void setOffsetReg(unsigned Reg) {
81 assert(isRegBase() && "Invalid offset register access!");
84 unsigned getOffsetReg() const {
85 assert(isRegBase() && "Invalid offset register access!");
88 void setFI(unsigned FI) {
89 assert(isFIBase() && "Invalid base frame index access!");
92 unsigned getFI() const {
93 assert(isFIBase() && "Invalid base frame index access!");
96 void setOffset(int64_t O) { Offset = O; }
97 int64_t getOffset() { return Offset; }
98 void setShift(unsigned S) { Shift = S; }
99 unsigned getShift() { return Shift; }
101 void setGlobalValue(const GlobalValue *G) { GV = G; }
102 const GlobalValue *getGlobalValue() { return GV; }
105 /// Subtarget - Keep a pointer to the AArch64Subtarget around so that we can
106 /// make the right decision when generating code for different targets.
107 const AArch64Subtarget *Subtarget;
108 LLVMContext *Context;
110 bool FastLowerArguments() override;
111 bool FastLowerCall(CallLoweringInfo &CLI) override;
112 bool FastLowerIntrinsicCall(const IntrinsicInst *II) override;
115 // Selection routines.
116 bool SelectLoad(const Instruction *I);
117 bool SelectStore(const Instruction *I);
118 bool SelectBranch(const Instruction *I);
119 bool SelectIndirectBr(const Instruction *I);
120 bool SelectCmp(const Instruction *I);
121 bool SelectSelect(const Instruction *I);
122 bool SelectFPExt(const Instruction *I);
123 bool SelectFPTrunc(const Instruction *I);
124 bool SelectFPToInt(const Instruction *I, bool Signed);
125 bool SelectIntToFP(const Instruction *I, bool Signed);
126 bool SelectRem(const Instruction *I, unsigned ISDOpcode);
127 bool SelectRet(const Instruction *I);
128 bool SelectTrunc(const Instruction *I);
129 bool SelectIntExt(const Instruction *I);
130 bool SelectMul(const Instruction *I);
131 bool SelectShift(const Instruction *I, bool IsLeftShift, bool IsArithmetic);
132 bool SelectBitCast(const Instruction *I);
134 // Utility helper routines.
135 bool isTypeLegal(Type *Ty, MVT &VT);
136 bool isLoadStoreTypeLegal(Type *Ty, MVT &VT);
137 bool ComputeAddress(const Value *Obj, Address &Addr, Type *Ty = nullptr);
138 bool ComputeCallAddress(const Value *V, Address &Addr);
139 bool SimplifyAddress(Address &Addr, MVT VT);
140 void AddLoadStoreOperands(Address &Addr, const MachineInstrBuilder &MIB,
141 unsigned Flags, unsigned ScaleFactor,
142 MachineMemOperand *MMO);
143 bool IsMemCpySmall(uint64_t Len, unsigned Alignment);
144 bool TryEmitSmallMemCpy(Address Dest, Address Src, uint64_t Len,
146 bool foldXALUIntrinsic(AArch64CC::CondCode &CC, const Instruction *I,
150 bool EmitCmp(Value *Src1Value, Value *Src2Value, bool isZExt);
151 bool EmitLoad(MVT VT, unsigned &ResultReg, Address Addr,
152 MachineMemOperand *MMO = nullptr);
153 bool EmitStore(MVT VT, unsigned SrcReg, Address Addr,
154 MachineMemOperand *MMO = nullptr);
155 unsigned EmitIntExt(MVT SrcVT, unsigned SrcReg, MVT DestVT, bool isZExt);
156 unsigned Emiti1Ext(unsigned SrcReg, MVT DestVT, bool isZExt);
157 unsigned Emit_MUL_rr(MVT RetVT, unsigned Op0, bool Op0IsKill,
158 unsigned Op1, bool Op1IsKill);
159 unsigned Emit_SMULL_rr(MVT RetVT, unsigned Op0, bool Op0IsKill,
160 unsigned Op1, bool Op1IsKill);
161 unsigned Emit_UMULL_rr(MVT RetVT, unsigned Op0, bool Op0IsKill,
162 unsigned Op1, bool Op1IsKill);
163 unsigned Emit_LSL_ri(MVT RetVT, unsigned Op0, bool Op0IsKill, uint64_t Imm);
164 unsigned Emit_LSR_ri(MVT RetVT, unsigned Op0, bool Op0IsKill, uint64_t Imm);
165 unsigned Emit_ASR_ri(MVT RetVT, unsigned Op0, bool Op0IsKill, uint64_t Imm);
167 unsigned AArch64MaterializeInt(const ConstantInt *CI, MVT VT);
168 unsigned AArch64MaterializeFP(const ConstantFP *CFP, MVT VT);
169 unsigned AArch64MaterializeGV(const GlobalValue *GV);
171 // Call handling routines.
173 CCAssignFn *CCAssignFnForCall(CallingConv::ID CC) const;
174 bool ProcessCallArgs(CallLoweringInfo &CLI, SmallVectorImpl<MVT> &ArgVTs,
176 bool FinishCall(CallLoweringInfo &CLI, MVT RetVT, unsigned NumBytes);
179 // Backend specific FastISel code.
180 unsigned TargetMaterializeAlloca(const AllocaInst *AI) override;
181 unsigned TargetMaterializeConstant(const Constant *C) override;
183 explicit AArch64FastISel(FunctionLoweringInfo &funcInfo,
184 const TargetLibraryInfo *libInfo)
185 : FastISel(funcInfo, libInfo) {
186 Subtarget = &TM.getSubtarget<AArch64Subtarget>();
187 Context = &funcInfo.Fn->getContext();
190 bool TargetSelectInstruction(const Instruction *I) override;
192 #include "AArch64GenFastISel.inc"
195 } // end anonymous namespace
197 #include "AArch64GenCallingConv.inc"
199 CCAssignFn *AArch64FastISel::CCAssignFnForCall(CallingConv::ID CC) const {
200 if (CC == CallingConv::WebKit_JS)
201 return CC_AArch64_WebKit_JS;
202 return Subtarget->isTargetDarwin() ? CC_AArch64_DarwinPCS : CC_AArch64_AAPCS;
205 unsigned AArch64FastISel::TargetMaterializeAlloca(const AllocaInst *AI) {
206 assert(TLI.getValueType(AI->getType(), true) == MVT::i64 &&
207 "Alloca should always return a pointer.");
209 // Don't handle dynamic allocas.
210 if (!FuncInfo.StaticAllocaMap.count(AI))
213 DenseMap<const AllocaInst *, int>::iterator SI =
214 FuncInfo.StaticAllocaMap.find(AI);
216 if (SI != FuncInfo.StaticAllocaMap.end()) {
217 unsigned ResultReg = createResultReg(&AArch64::GPR64RegClass);
218 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADDXri),
220 .addFrameIndex(SI->second)
229 unsigned AArch64FastISel::AArch64MaterializeInt(const ConstantInt *CI, MVT VT) {
234 return FastEmit_i(VT, VT, ISD::Constant, CI->getZExtValue());
236 // Create a copy from the zero register to materialize a "0" value.
237 const TargetRegisterClass *RC = (VT == MVT::i64) ? &AArch64::GPR64RegClass
238 : &AArch64::GPR32RegClass;
239 unsigned ZeroReg = (VT == MVT::i64) ? AArch64::XZR : AArch64::WZR;
240 unsigned ResultReg = createResultReg(RC);
241 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
242 TII.get(TargetOpcode::COPY), ResultReg)
243 .addReg(ZeroReg, getKillRegState(true));
247 unsigned AArch64FastISel::AArch64MaterializeFP(const ConstantFP *CFP, MVT VT) {
248 if (VT != MVT::f32 && VT != MVT::f64)
251 const APFloat Val = CFP->getValueAPF();
252 bool Is64Bit = (VT == MVT::f64);
254 // This checks to see if we can use FMOV instructions to materialize
255 // a constant, otherwise we have to materialize via the constant pool.
256 if (TLI.isFPImmLegal(Val, VT)) {
257 unsigned ResultReg = createResultReg(TLI.getRegClassFor(VT));
258 // Positive zero (+0.0) has to be materialized with a fmov from the zero
259 // register, because the immediate version of fmov cannot encode zero.
260 if (Val.isPosZero()) {
261 unsigned ZReg = Is64Bit ? AArch64::XZR : AArch64::WZR;
262 unsigned Opc = Is64Bit ? AArch64::FMOVDr : AArch64::FMOVSr;
263 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg)
264 .addReg(ZReg, getKillRegState(true));
267 int Imm = Is64Bit ? AArch64_AM::getFP64Imm(Val)
268 : AArch64_AM::getFP32Imm(Val);
269 unsigned Opc = Is64Bit ? AArch64::FMOVDi : AArch64::FMOVSi;
270 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg)
275 // Materialize via constant pool. MachineConstantPool wants an explicit
277 unsigned Align = DL.getPrefTypeAlignment(CFP->getType());
279 Align = DL.getTypeAllocSize(CFP->getType());
281 unsigned CPI = MCP.getConstantPoolIndex(cast<Constant>(CFP), Align);
282 unsigned ADRPReg = createResultReg(&AArch64::GPR64commonRegClass);
283 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADRP),
285 .addConstantPoolIndex(CPI, 0, AArch64II::MO_PAGE);
287 unsigned Opc = Is64Bit ? AArch64::LDRDui : AArch64::LDRSui;
288 unsigned ResultReg = createResultReg(TLI.getRegClassFor(VT));
289 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg)
291 .addConstantPoolIndex(CPI, 0, AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
295 unsigned AArch64FastISel::AArch64MaterializeGV(const GlobalValue *GV) {
296 // We can't handle thread-local variables quickly yet.
297 if (GV->isThreadLocal())
300 // MachO still uses GOT for large code-model accesses, but ELF requires
301 // movz/movk sequences, which FastISel doesn't handle yet.
302 if (TM.getCodeModel() != CodeModel::Small && !Subtarget->isTargetMachO())
305 unsigned char OpFlags = Subtarget->ClassifyGlobalReference(GV, TM);
307 EVT DestEVT = TLI.getValueType(GV->getType(), true);
308 if (!DestEVT.isSimple())
311 unsigned ADRPReg = createResultReg(&AArch64::GPR64commonRegClass);
314 if (OpFlags & AArch64II::MO_GOT) {
316 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADRP),
318 .addGlobalAddress(GV, 0, AArch64II::MO_GOT | AArch64II::MO_PAGE);
320 ResultReg = createResultReg(&AArch64::GPR64RegClass);
321 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::LDRXui),
324 .addGlobalAddress(GV, 0, AArch64II::MO_GOT | AArch64II::MO_PAGEOFF |
328 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADRP),
330 .addGlobalAddress(GV, 0, AArch64II::MO_PAGE);
332 ResultReg = createResultReg(&AArch64::GPR64spRegClass);
333 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADDXri),
336 .addGlobalAddress(GV, 0, AArch64II::MO_PAGEOFF | AArch64II::MO_NC)
342 unsigned AArch64FastISel::TargetMaterializeConstant(const Constant *C) {
343 EVT CEVT = TLI.getValueType(C->getType(), true);
345 // Only handle simple types.
346 if (!CEVT.isSimple())
348 MVT VT = CEVT.getSimpleVT();
350 if (const auto *CI = dyn_cast<ConstantInt>(C))
351 return AArch64MaterializeInt(CI, VT);
352 else if (const ConstantFP *CFP = dyn_cast<ConstantFP>(C))
353 return AArch64MaterializeFP(CFP, VT);
354 else if (const GlobalValue *GV = dyn_cast<GlobalValue>(C))
355 return AArch64MaterializeGV(GV);
360 // Computes the address to get to an object.
361 bool AArch64FastISel::ComputeAddress(const Value *Obj, Address &Addr, Type *Ty)
363 const User *U = nullptr;
364 unsigned Opcode = Instruction::UserOp1;
365 if (const Instruction *I = dyn_cast<Instruction>(Obj)) {
366 // Don't walk into other basic blocks unless the object is an alloca from
367 // another block, otherwise it may not have a virtual register assigned.
368 if (FuncInfo.StaticAllocaMap.count(static_cast<const AllocaInst *>(Obj)) ||
369 FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB) {
370 Opcode = I->getOpcode();
373 } else if (const ConstantExpr *C = dyn_cast<ConstantExpr>(Obj)) {
374 Opcode = C->getOpcode();
378 if (const PointerType *Ty = dyn_cast<PointerType>(Obj->getType()))
379 if (Ty->getAddressSpace() > 255)
380 // Fast instruction selection doesn't support the special
387 case Instruction::BitCast: {
388 // Look through bitcasts.
389 return ComputeAddress(U->getOperand(0), Addr, Ty);
391 case Instruction::IntToPtr: {
392 // Look past no-op inttoptrs.
393 if (TLI.getValueType(U->getOperand(0)->getType()) == TLI.getPointerTy())
394 return ComputeAddress(U->getOperand(0), Addr, Ty);
397 case Instruction::PtrToInt: {
398 // Look past no-op ptrtoints.
399 if (TLI.getValueType(U->getType()) == TLI.getPointerTy())
400 return ComputeAddress(U->getOperand(0), Addr, Ty);
403 case Instruction::GetElementPtr: {
404 Address SavedAddr = Addr;
405 uint64_t TmpOffset = Addr.getOffset();
407 // Iterate through the GEP folding the constants into offsets where
409 gep_type_iterator GTI = gep_type_begin(U);
410 for (User::const_op_iterator i = U->op_begin() + 1, e = U->op_end(); i != e;
412 const Value *Op = *i;
413 if (StructType *STy = dyn_cast<StructType>(*GTI)) {
414 const StructLayout *SL = DL.getStructLayout(STy);
415 unsigned Idx = cast<ConstantInt>(Op)->getZExtValue();
416 TmpOffset += SL->getElementOffset(Idx);
418 uint64_t S = DL.getTypeAllocSize(GTI.getIndexedType());
420 if (const ConstantInt *CI = dyn_cast<ConstantInt>(Op)) {
421 // Constant-offset addressing.
422 TmpOffset += CI->getSExtValue() * S;
425 if (canFoldAddIntoGEP(U, Op)) {
426 // A compatible add with a constant operand. Fold the constant.
428 cast<ConstantInt>(cast<AddOperator>(Op)->getOperand(1));
429 TmpOffset += CI->getSExtValue() * S;
430 // Iterate on the other operand.
431 Op = cast<AddOperator>(Op)->getOperand(0);
435 goto unsupported_gep;
440 // Try to grab the base operand now.
441 Addr.setOffset(TmpOffset);
442 if (ComputeAddress(U->getOperand(0), Addr, Ty))
445 // We failed, restore everything and try the other options.
451 case Instruction::Alloca: {
452 const AllocaInst *AI = cast<AllocaInst>(Obj);
453 DenseMap<const AllocaInst *, int>::iterator SI =
454 FuncInfo.StaticAllocaMap.find(AI);
455 if (SI != FuncInfo.StaticAllocaMap.end()) {
456 Addr.setKind(Address::FrameIndexBase);
457 Addr.setFI(SI->second);
462 case Instruction::Add: {
463 // Adds of constants are common and easy enough.
464 const Value *LHS = U->getOperand(0);
465 const Value *RHS = U->getOperand(1);
467 if (isa<ConstantInt>(LHS))
470 if (const ConstantInt *CI = dyn_cast<ConstantInt>(RHS)) {
471 Addr.setOffset(Addr.getOffset() + (uint64_t)CI->getSExtValue());
472 return ComputeAddress(LHS, Addr, Ty);
475 Address Backup = Addr;
476 if (ComputeAddress(LHS, Addr, Ty) && ComputeAddress(RHS, Addr, Ty))
482 case Instruction::Shl:
483 if (Addr.getOffsetReg())
486 if (const auto *CI = dyn_cast<ConstantInt>(U->getOperand(1))) {
487 unsigned Val = CI->getZExtValue();
488 if (Val < 1 || Val > 3)
491 uint64_t NumBytes = 0;
492 if (Ty && Ty->isSized()) {
493 uint64_t NumBits = DL.getTypeSizeInBits(Ty);
494 NumBytes = NumBits / 8;
495 if (!isPowerOf2_64(NumBits))
499 if (NumBytes != (1UL << Val))
503 Addr.setExtendType(AArch64_AM::LSL);
505 if (const auto *I = dyn_cast<Instruction>(U->getOperand(0)))
506 if (FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB)
509 if (const auto *ZE = dyn_cast<ZExtInst>(U))
510 if (ZE->getOperand(0)->getType()->isIntegerTy(32))
511 Addr.setExtendType(AArch64_AM::UXTW);
513 if (const auto *SE = dyn_cast<SExtInst>(U))
514 if (SE->getOperand(0)->getType()->isIntegerTy(32))
515 Addr.setExtendType(AArch64_AM::SXTW);
517 unsigned Reg = getRegForValue(U->getOperand(0));
520 Addr.setOffsetReg(Reg);
527 if (!Addr.getOffsetReg()) {
528 unsigned Reg = getRegForValue(Obj);
531 Addr.setOffsetReg(Reg);
537 unsigned Reg = getRegForValue(Obj);
544 bool AArch64FastISel::ComputeCallAddress(const Value *V, Address &Addr) {
545 const User *U = nullptr;
546 unsigned Opcode = Instruction::UserOp1;
549 if (const auto *I = dyn_cast<Instruction>(V)) {
550 Opcode = I->getOpcode();
552 InMBB = I->getParent() == FuncInfo.MBB->getBasicBlock();
553 } else if (const auto *C = dyn_cast<ConstantExpr>(V)) {
554 Opcode = C->getOpcode();
560 case Instruction::BitCast:
561 // Look past bitcasts if its operand is in the same BB.
563 return ComputeCallAddress(U->getOperand(0), Addr);
565 case Instruction::IntToPtr:
566 // Look past no-op inttoptrs if its operand is in the same BB.
568 TLI.getValueType(U->getOperand(0)->getType()) == TLI.getPointerTy())
569 return ComputeCallAddress(U->getOperand(0), Addr);
571 case Instruction::PtrToInt:
572 // Look past no-op ptrtoints if its operand is in the same BB.
574 TLI.getValueType(U->getType()) == TLI.getPointerTy())
575 return ComputeCallAddress(U->getOperand(0), Addr);
579 if (const GlobalValue *GV = dyn_cast<GlobalValue>(V)) {
580 Addr.setGlobalValue(GV);
584 // If all else fails, try to materialize the value in a register.
585 if (!Addr.getGlobalValue()) {
586 Addr.setReg(getRegForValue(V));
587 return Addr.getReg() != 0;
594 bool AArch64FastISel::isTypeLegal(Type *Ty, MVT &VT) {
595 EVT evt = TLI.getValueType(Ty, true);
597 // Only handle simple types.
598 if (evt == MVT::Other || !evt.isSimple())
600 VT = evt.getSimpleVT();
602 // This is a legal type, but it's not something we handle in fast-isel.
606 // Handle all other legal types, i.e. a register that will directly hold this
608 return TLI.isTypeLegal(VT);
611 bool AArch64FastISel::isLoadStoreTypeLegal(Type *Ty, MVT &VT) {
612 if (isTypeLegal(Ty, VT))
615 // If this is a type than can be sign or zero-extended to a basic operation
616 // go ahead and accept it now. For stores, this reflects truncation.
617 if (VT == MVT::i1 || VT == MVT::i8 || VT == MVT::i16)
623 bool AArch64FastISel::SimplifyAddress(Address &Addr, MVT VT) {
624 unsigned ScaleFactor;
625 switch (VT.SimpleTy) {
626 default: return false;
627 case MVT::i1: // fall-through
628 case MVT::i8: ScaleFactor = 1; break;
629 case MVT::i16: ScaleFactor = 2; break;
630 case MVT::i32: // fall-through
631 case MVT::f32: ScaleFactor = 4; break;
632 case MVT::i64: // fall-through
633 case MVT::f64: ScaleFactor = 8; break;
636 bool ImmediateOffsetNeedsLowering = false;
637 bool RegisterOffsetNeedsLowering = false;
638 int64_t Offset = Addr.getOffset();
639 if (((Offset < 0) || (Offset & (ScaleFactor - 1))) && !isInt<9>(Offset))
640 ImmediateOffsetNeedsLowering = true;
641 else if (Offset > 0 && !(Offset & (ScaleFactor - 1)) &&
642 !isUInt<12>(Offset / ScaleFactor))
643 ImmediateOffsetNeedsLowering = true;
645 // Cannot encode an offset register and an immediate offset in the same
646 // instruction. Fold the immediate offset into the load/store instruction and
647 // emit an additonal add to take care of the offset register.
648 if (!ImmediateOffsetNeedsLowering && Addr.getOffset() && Addr.isRegBase() &&
650 RegisterOffsetNeedsLowering = true;
652 // If this is a stack pointer and the offset needs to be simplified then put
653 // the alloca address into a register, set the base type back to register and
654 // continue. This should almost never happen.
655 if (ImmediateOffsetNeedsLowering && Addr.isFIBase()) {
656 unsigned ResultReg = createResultReg(&AArch64::GPR64RegClass);
657 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADDXri),
659 .addFrameIndex(Addr.getFI())
662 Addr.setKind(Address::RegBase);
663 Addr.setReg(ResultReg);
666 if (RegisterOffsetNeedsLowering) {
667 unsigned ResultReg = 0;
669 ResultReg = createResultReg(&AArch64::GPR64RegClass);
670 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
671 TII.get(AArch64::ADDXrs), ResultReg)
672 .addReg(Addr.getReg())
673 .addReg(Addr.getOffsetReg())
674 .addImm(Addr.getShift());
676 ResultReg = Emit_LSL_ri(MVT::i64, Addr.getOffsetReg(),
677 /*Op0IsKill=*/false, Addr.getShift());
681 Addr.setReg(ResultReg);
682 Addr.setOffsetReg(0);
686 // Since the offset is too large for the load/store instruction get the
687 // reg+offset into a register.
688 if (ImmediateOffsetNeedsLowering) {
689 unsigned ResultReg = 0;
691 ResultReg = FastEmit_ri_(MVT::i64, ISD::ADD, Addr.getReg(),
692 /*IsKill=*/false, Offset, MVT::i64);
694 ResultReg = FastEmit_i(MVT::i64, MVT::i64, ISD::Constant, Offset);
698 Addr.setReg(ResultReg);
704 void AArch64FastISel::AddLoadStoreOperands(Address &Addr,
705 const MachineInstrBuilder &MIB,
707 unsigned ScaleFactor,
708 MachineMemOperand *MMO) {
709 int64_t Offset = Addr.getOffset() / ScaleFactor;
710 // Frame base works a bit differently. Handle it separately.
711 if (Addr.isFIBase()) {
712 int FI = Addr.getFI();
713 // FIXME: We shouldn't be using getObjectSize/getObjectAlignment. The size
714 // and alignment should be based on the VT.
715 MMO = FuncInfo.MF->getMachineMemOperand(
716 MachinePointerInfo::getFixedStack(FI, Offset), Flags,
717 MFI.getObjectSize(FI), MFI.getObjectAlignment(FI));
718 // Now add the rest of the operands.
719 MIB.addFrameIndex(FI).addImm(Offset);
721 assert(Addr.isRegBase() && "Unexpected address kind.");
722 if (Addr.getOffsetReg()) {
723 assert(Addr.getOffset() == 0 && "Unexpected offset");
724 bool IsSigned = Addr.getExtendType() == AArch64_AM::SXTW ||
725 Addr.getExtendType() == AArch64_AM::SXTX;
726 MIB.addReg(Addr.getReg());
727 MIB.addReg(Addr.getOffsetReg());
728 MIB.addImm(IsSigned);
729 MIB.addImm(Addr.getShift() != 0);
731 MIB.addReg(Addr.getReg());
737 MIB.addMemOperand(MMO);
740 bool AArch64FastISel::EmitLoad(MVT VT, unsigned &ResultReg, Address Addr,
741 MachineMemOperand *MMO) {
742 // Simplify this down to something we can handle.
743 if (!SimplifyAddress(Addr, VT))
746 unsigned ScaleFactor;
747 switch (VT.SimpleTy) {
748 default: llvm_unreachable("Unexpected value type.");
749 case MVT::i1: // fall-through
750 case MVT::i8: ScaleFactor = 1; break;
751 case MVT::i16: ScaleFactor = 2; break;
752 case MVT::i32: // fall-through
753 case MVT::f32: ScaleFactor = 4; break;
754 case MVT::i64: // fall-through
755 case MVT::f64: ScaleFactor = 8; break;
758 // Negative offsets require unscaled, 9-bit, signed immediate offsets.
759 // Otherwise, we try using scaled, 12-bit, unsigned immediate offsets.
760 bool UseScaled = true;
761 if ((Addr.getOffset() < 0) || (Addr.getOffset() & (ScaleFactor - 1))) {
766 static const unsigned OpcTable[4][6] = {
767 { AArch64::LDURBBi, AArch64::LDURHHi, AArch64::LDURWi, AArch64::LDURXi,
768 AArch64::LDURSi, AArch64::LDURDi },
769 { AArch64::LDRBBui, AArch64::LDRHHui, AArch64::LDRWui, AArch64::LDRXui,
770 AArch64::LDRSui, AArch64::LDRDui },
771 { AArch64::LDRBBroX, AArch64::LDRHHroX, AArch64::LDRWroX, AArch64::LDRXroX,
772 AArch64::LDRSroX, AArch64::LDRDroX },
773 { AArch64::LDRBBroW, AArch64::LDRHHroW, AArch64::LDRWroW, AArch64::LDRXroW,
774 AArch64::LDRSroW, AArch64::LDRDroW }
778 const TargetRegisterClass *RC;
780 bool UseRegOffset = Addr.isRegBase() && !Addr.getOffset() && Addr.getReg() &&
782 unsigned Idx = UseRegOffset ? 2 : UseScaled ? 1 : 0;
783 if (Addr.getExtendType() == AArch64_AM::UXTW ||
784 Addr.getExtendType() == AArch64_AM::SXTW)
787 switch (VT.SimpleTy) {
788 default: llvm_unreachable("Unexpected value type.");
789 case MVT::i1: VTIsi1 = true; // Intentional fall-through.
790 case MVT::i8: Opc = OpcTable[Idx][0]; RC = &AArch64::GPR32RegClass; break;
791 case MVT::i16: Opc = OpcTable[Idx][1]; RC = &AArch64::GPR32RegClass; break;
792 case MVT::i32: Opc = OpcTable[Idx][2]; RC = &AArch64::GPR32RegClass; break;
793 case MVT::i64: Opc = OpcTable[Idx][3]; RC = &AArch64::GPR64RegClass; break;
794 case MVT::f32: Opc = OpcTable[Idx][4]; RC = &AArch64::FPR32RegClass; break;
795 case MVT::f64: Opc = OpcTable[Idx][5]; RC = &AArch64::FPR64RegClass; break;
798 // Create the base instruction, then add the operands.
799 ResultReg = createResultReg(RC);
800 MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
801 TII.get(Opc), ResultReg);
802 AddLoadStoreOperands(Addr, MIB, MachineMemOperand::MOLoad, ScaleFactor, MMO);
804 // Loading an i1 requires special handling.
806 MRI.constrainRegClass(ResultReg, &AArch64::GPR32RegClass);
807 unsigned ANDReg = createResultReg(&AArch64::GPR32spRegClass);
808 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ANDWri),
811 .addImm(AArch64_AM::encodeLogicalImmediate(1, 32));
817 bool AArch64FastISel::SelectLoad(const Instruction *I) {
819 // Verify we have a legal type before going any further. Currently, we handle
820 // simple types that will directly fit in a register (i32/f32/i64/f64) or
821 // those that can be sign or zero-extended to a basic operation (i1/i8/i16).
822 if (!isLoadStoreTypeLegal(I->getType(), VT) || cast<LoadInst>(I)->isAtomic())
825 // See if we can handle this address.
827 if (!ComputeAddress(I->getOperand(0), Addr, I->getType()))
831 if (!EmitLoad(VT, ResultReg, Addr, createMachineMemOperandFor(I)))
834 UpdateValueMap(I, ResultReg);
838 bool AArch64FastISel::EmitStore(MVT VT, unsigned SrcReg, Address Addr,
839 MachineMemOperand *MMO) {
840 // Simplify this down to something we can handle.
841 if (!SimplifyAddress(Addr, VT))
844 unsigned ScaleFactor;
845 switch (VT.SimpleTy) {
846 default: llvm_unreachable("Unexpected value type.");
847 case MVT::i1: // fall-through
848 case MVT::i8: ScaleFactor = 1; break;
849 case MVT::i16: ScaleFactor = 2; break;
850 case MVT::i32: // fall-through
851 case MVT::f32: ScaleFactor = 4; break;
852 case MVT::i64: // fall-through
853 case MVT::f64: ScaleFactor = 8; break;
856 // Negative offsets require unscaled, 9-bit, signed immediate offsets.
857 // Otherwise, we try using scaled, 12-bit, unsigned immediate offsets.
858 bool UseScaled = true;
859 if ((Addr.getOffset() < 0) || (Addr.getOffset() & (ScaleFactor - 1))) {
865 static const unsigned OpcTable[4][6] = {
866 { AArch64::STURBBi, AArch64::STURHHi, AArch64::STURWi, AArch64::STURXi,
867 AArch64::STURSi, AArch64::STURDi },
868 { AArch64::STRBBui, AArch64::STRHHui, AArch64::STRWui, AArch64::STRXui,
869 AArch64::STRSui, AArch64::STRDui },
870 { AArch64::STRBBroX, AArch64::STRHHroX, AArch64::STRWroX, AArch64::STRXroX,
871 AArch64::STRSroX, AArch64::STRDroX },
872 { AArch64::STRBBroW, AArch64::STRHHroW, AArch64::STRWroW, AArch64::STRXroW,
873 AArch64::STRSroW, AArch64::STRDroW }
879 bool UseRegOffset = Addr.isRegBase() && !Addr.getOffset() && Addr.getReg() &&
881 unsigned Idx = UseRegOffset ? 2 : UseScaled ? 1 : 0;
882 if (Addr.getExtendType() == AArch64_AM::UXTW ||
883 Addr.getExtendType() == AArch64_AM::SXTW)
886 switch (VT.SimpleTy) {
887 default: llvm_unreachable("Unexpected value type.");
888 case MVT::i1: VTIsi1 = true;
889 case MVT::i8: Opc = OpcTable[Idx][0]; break;
890 case MVT::i16: Opc = OpcTable[Idx][1]; break;
891 case MVT::i32: Opc = OpcTable[Idx][2]; break;
892 case MVT::i64: Opc = OpcTable[Idx][3]; break;
893 case MVT::f32: Opc = OpcTable[Idx][4]; break;
894 case MVT::f64: Opc = OpcTable[Idx][5]; break;
897 // Storing an i1 requires special handling.
899 MRI.constrainRegClass(SrcReg, &AArch64::GPR32RegClass);
900 unsigned ANDReg = createResultReg(&AArch64::GPR32spRegClass);
901 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ANDWri),
904 .addImm(AArch64_AM::encodeLogicalImmediate(1, 32));
907 // Create the base instruction, then add the operands.
908 MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
911 AddLoadStoreOperands(Addr, MIB, MachineMemOperand::MOStore, ScaleFactor, MMO);
916 bool AArch64FastISel::SelectStore(const Instruction *I) {
918 Value *Op0 = I->getOperand(0);
919 // Verify we have a legal type before going any further. Currently, we handle
920 // simple types that will directly fit in a register (i32/f32/i64/f64) or
921 // those that can be sign or zero-extended to a basic operation (i1/i8/i16).
922 if (!isLoadStoreTypeLegal(Op0->getType(), VT) ||
923 cast<StoreInst>(I)->isAtomic())
926 // Get the value to be stored into a register.
927 unsigned SrcReg = getRegForValue(Op0);
931 // See if we can handle this address.
933 if (!ComputeAddress(I->getOperand(1), Addr, I->getOperand(0)->getType()))
936 if (!EmitStore(VT, SrcReg, Addr, createMachineMemOperandFor(I)))
941 static AArch64CC::CondCode getCompareCC(CmpInst::Predicate Pred) {
943 case CmpInst::FCMP_ONE:
944 case CmpInst::FCMP_UEQ:
946 // AL is our "false" for now. The other two need more compares.
947 return AArch64CC::AL;
948 case CmpInst::ICMP_EQ:
949 case CmpInst::FCMP_OEQ:
950 return AArch64CC::EQ;
951 case CmpInst::ICMP_SGT:
952 case CmpInst::FCMP_OGT:
953 return AArch64CC::GT;
954 case CmpInst::ICMP_SGE:
955 case CmpInst::FCMP_OGE:
956 return AArch64CC::GE;
957 case CmpInst::ICMP_UGT:
958 case CmpInst::FCMP_UGT:
959 return AArch64CC::HI;
960 case CmpInst::FCMP_OLT:
961 return AArch64CC::MI;
962 case CmpInst::ICMP_ULE:
963 case CmpInst::FCMP_OLE:
964 return AArch64CC::LS;
965 case CmpInst::FCMP_ORD:
966 return AArch64CC::VC;
967 case CmpInst::FCMP_UNO:
968 return AArch64CC::VS;
969 case CmpInst::FCMP_UGE:
970 return AArch64CC::PL;
971 case CmpInst::ICMP_SLT:
972 case CmpInst::FCMP_ULT:
973 return AArch64CC::LT;
974 case CmpInst::ICMP_SLE:
975 case CmpInst::FCMP_ULE:
976 return AArch64CC::LE;
977 case CmpInst::FCMP_UNE:
978 case CmpInst::ICMP_NE:
979 return AArch64CC::NE;
980 case CmpInst::ICMP_UGE:
981 return AArch64CC::HS;
982 case CmpInst::ICMP_ULT:
983 return AArch64CC::LO;
987 bool AArch64FastISel::SelectBranch(const Instruction *I) {
988 const BranchInst *BI = cast<BranchInst>(I);
989 MachineBasicBlock *TBB = FuncInfo.MBBMap[BI->getSuccessor(0)];
990 MachineBasicBlock *FBB = FuncInfo.MBBMap[BI->getSuccessor(1)];
992 AArch64CC::CondCode CC = AArch64CC::NE;
993 if (const CmpInst *CI = dyn_cast<CmpInst>(BI->getCondition())) {
994 if (CI->hasOneUse() && (CI->getParent() == I->getParent())) {
995 // We may not handle every CC for now.
996 CC = getCompareCC(CI->getPredicate());
997 if (CC == AArch64CC::AL)
1001 if (!EmitCmp(CI->getOperand(0), CI->getOperand(1), CI->isUnsigned()))
1005 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::Bcc))
1009 // Obtain the branch weight and add the TrueBB to the successor list.
1010 uint32_t BranchWeight = 0;
1012 BranchWeight = FuncInfo.BPI->getEdgeWeight(BI->getParent(),
1013 TBB->getBasicBlock());
1014 FuncInfo.MBB->addSuccessor(TBB, BranchWeight);
1016 FastEmitBranch(FBB, DbgLoc);
1019 } else if (TruncInst *TI = dyn_cast<TruncInst>(BI->getCondition())) {
1021 if (TI->hasOneUse() && TI->getParent() == I->getParent() &&
1022 (isLoadStoreTypeLegal(TI->getOperand(0)->getType(), SrcVT))) {
1023 unsigned CondReg = getRegForValue(TI->getOperand(0));
1027 // Issue an extract_subreg to get the lower 32-bits.
1028 if (SrcVT == MVT::i64)
1029 CondReg = FastEmitInst_extractsubreg(MVT::i32, CondReg, /*Kill=*/true,
1032 MRI.constrainRegClass(CondReg, &AArch64::GPR32RegClass);
1033 unsigned ANDReg = createResultReg(&AArch64::GPR32spRegClass);
1034 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
1035 TII.get(AArch64::ANDWri), ANDReg)
1037 .addImm(AArch64_AM::encodeLogicalImmediate(1, 32));
1038 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
1039 TII.get(AArch64::SUBSWri), AArch64::WZR)
1044 if (FuncInfo.MBB->isLayoutSuccessor(TBB)) {
1045 std::swap(TBB, FBB);
1048 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::Bcc))
1052 // Obtain the branch weight and add the TrueBB to the successor list.
1053 uint32_t BranchWeight = 0;
1055 BranchWeight = FuncInfo.BPI->getEdgeWeight(BI->getParent(),
1056 TBB->getBasicBlock());
1057 FuncInfo.MBB->addSuccessor(TBB, BranchWeight);
1059 FastEmitBranch(FBB, DbgLoc);
1062 } else if (const ConstantInt *CI =
1063 dyn_cast<ConstantInt>(BI->getCondition())) {
1064 uint64_t Imm = CI->getZExtValue();
1065 MachineBasicBlock *Target = (Imm == 0) ? FBB : TBB;
1066 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::B))
1069 // Obtain the branch weight and add the target to the successor list.
1070 uint32_t BranchWeight = 0;
1072 BranchWeight = FuncInfo.BPI->getEdgeWeight(BI->getParent(),
1073 Target->getBasicBlock());
1074 FuncInfo.MBB->addSuccessor(Target, BranchWeight);
1076 } else if (foldXALUIntrinsic(CC, I, BI->getCondition())) {
1077 // Fake request the condition, otherwise the intrinsic might be completely
1079 unsigned CondReg = getRegForValue(BI->getCondition());
1084 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::Bcc))
1088 // Obtain the branch weight and add the TrueBB to the successor list.
1089 uint32_t BranchWeight = 0;
1091 BranchWeight = FuncInfo.BPI->getEdgeWeight(BI->getParent(),
1092 TBB->getBasicBlock());
1093 FuncInfo.MBB->addSuccessor(TBB, BranchWeight);
1095 FastEmitBranch(FBB, DbgLoc);
1099 unsigned CondReg = getRegForValue(BI->getCondition());
1103 // We've been divorced from our compare! Our block was split, and
1104 // now our compare lives in a predecessor block. We musn't
1105 // re-compare here, as the children of the compare aren't guaranteed
1106 // live across the block boundary (we *could* check for this).
1107 // Regardless, the compare has been done in the predecessor block,
1108 // and it left a value for us in a virtual register. Ergo, we test
1109 // the one-bit value left in the virtual register.
1110 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::SUBSWri),
1116 if (FuncInfo.MBB->isLayoutSuccessor(TBB)) {
1117 std::swap(TBB, FBB);
1121 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::Bcc))
1125 // Obtain the branch weight and add the TrueBB to the successor list.
1126 uint32_t BranchWeight = 0;
1128 BranchWeight = FuncInfo.BPI->getEdgeWeight(BI->getParent(),
1129 TBB->getBasicBlock());
1130 FuncInfo.MBB->addSuccessor(TBB, BranchWeight);
1132 FastEmitBranch(FBB, DbgLoc);
1136 bool AArch64FastISel::SelectIndirectBr(const Instruction *I) {
1137 const IndirectBrInst *BI = cast<IndirectBrInst>(I);
1138 unsigned AddrReg = getRegForValue(BI->getOperand(0));
1142 // Emit the indirect branch.
1143 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::BR))
1146 // Make sure the CFG is up-to-date.
1147 for (unsigned i = 0, e = BI->getNumSuccessors(); i != e; ++i)
1148 FuncInfo.MBB->addSuccessor(FuncInfo.MBBMap[BI->getSuccessor(i)]);
1153 bool AArch64FastISel::EmitCmp(Value *Src1Value, Value *Src2Value, bool isZExt) {
1154 Type *Ty = Src1Value->getType();
1155 EVT SrcEVT = TLI.getValueType(Ty, true);
1156 if (!SrcEVT.isSimple())
1158 MVT SrcVT = SrcEVT.getSimpleVT();
1160 // Check to see if the 2nd operand is a constant that we can encode directly
1163 bool UseImm = false;
1164 bool isNegativeImm = false;
1165 if (const ConstantInt *ConstInt = dyn_cast<ConstantInt>(Src2Value)) {
1166 if (SrcVT == MVT::i64 || SrcVT == MVT::i32 || SrcVT == MVT::i16 ||
1167 SrcVT == MVT::i8 || SrcVT == MVT::i1) {
1168 const APInt &CIVal = ConstInt->getValue();
1170 Imm = (isZExt) ? CIVal.getZExtValue() : CIVal.getSExtValue();
1171 if (CIVal.isNegative()) {
1172 isNegativeImm = true;
1175 // FIXME: We can handle more immediates using shifts.
1176 UseImm = ((Imm & 0xfff) == Imm);
1178 } else if (const ConstantFP *ConstFP = dyn_cast<ConstantFP>(Src2Value)) {
1179 if (SrcVT == MVT::f32 || SrcVT == MVT::f64)
1180 if (ConstFP->isZero() && !ConstFP->isNegative())
1187 bool needsExt = false;
1188 switch (SrcVT.SimpleTy) {
1195 // Intentional fall-through.
1197 ZReg = AArch64::WZR;
1199 CmpOpc = isNegativeImm ? AArch64::ADDSWri : AArch64::SUBSWri;
1201 CmpOpc = AArch64::SUBSWrr;
1204 ZReg = AArch64::XZR;
1206 CmpOpc = isNegativeImm ? AArch64::ADDSXri : AArch64::SUBSXri;
1208 CmpOpc = AArch64::SUBSXrr;
1212 CmpOpc = UseImm ? AArch64::FCMPSri : AArch64::FCMPSrr;
1216 CmpOpc = UseImm ? AArch64::FCMPDri : AArch64::FCMPDrr;
1220 unsigned SrcReg1 = getRegForValue(Src1Value);
1226 SrcReg2 = getRegForValue(Src2Value);
1231 // We have i1, i8, or i16, we need to either zero extend or sign extend.
1233 SrcReg1 = EmitIntExt(SrcVT, SrcReg1, MVT::i32, isZExt);
1237 SrcReg2 = EmitIntExt(SrcVT, SrcReg2, MVT::i32, isZExt);
1245 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(CmpOpc), ZReg)
1250 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(CmpOpc), ZReg)
1255 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(CmpOpc))
1258 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(CmpOpc))
1265 bool AArch64FastISel::SelectCmp(const Instruction *I) {
1266 const CmpInst *CI = cast<CmpInst>(I);
1268 // We may not handle every CC for now.
1269 AArch64CC::CondCode CC = getCompareCC(CI->getPredicate());
1270 if (CC == AArch64CC::AL)
1274 if (!EmitCmp(CI->getOperand(0), CI->getOperand(1), CI->isUnsigned()))
1277 // Now set a register based on the comparison.
1278 AArch64CC::CondCode invertedCC = getInvertedCondCode(CC);
1279 unsigned ResultReg = createResultReg(&AArch64::GPR32RegClass);
1280 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::CSINCWr),
1282 .addReg(AArch64::WZR)
1283 .addReg(AArch64::WZR)
1284 .addImm(invertedCC);
1286 UpdateValueMap(I, ResultReg);
1290 bool AArch64FastISel::SelectSelect(const Instruction *I) {
1291 const SelectInst *SI = cast<SelectInst>(I);
1293 EVT DestEVT = TLI.getValueType(SI->getType(), true);
1294 if (!DestEVT.isSimple())
1297 MVT DestVT = DestEVT.getSimpleVT();
1298 if (DestVT != MVT::i32 && DestVT != MVT::i64 && DestVT != MVT::f32 &&
1303 switch (DestVT.SimpleTy) {
1304 default: return false;
1305 case MVT::i32: SelectOpc = AArch64::CSELWr; break;
1306 case MVT::i64: SelectOpc = AArch64::CSELXr; break;
1307 case MVT::f32: SelectOpc = AArch64::FCSELSrrr; break;
1308 case MVT::f64: SelectOpc = AArch64::FCSELDrrr; break;
1311 const Value *Cond = SI->getCondition();
1312 bool NeedTest = true;
1313 AArch64CC::CondCode CC = AArch64CC::NE;
1314 if (foldXALUIntrinsic(CC, I, Cond))
1317 unsigned CondReg = getRegForValue(Cond);
1320 bool CondIsKill = hasTrivialKill(Cond);
1323 MRI.constrainRegClass(CondReg, &AArch64::GPR32RegClass);
1324 unsigned ANDReg = createResultReg(&AArch64::GPR32spRegClass);
1325 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ANDWri),
1327 .addReg(CondReg, getKillRegState(CondIsKill))
1328 .addImm(AArch64_AM::encodeLogicalImmediate(1, 32));
1330 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::SUBSWri),
1337 unsigned TrueReg = getRegForValue(SI->getTrueValue());
1338 bool TrueIsKill = hasTrivialKill(SI->getTrueValue());
1340 unsigned FalseReg = getRegForValue(SI->getFalseValue());
1341 bool FalseIsKill = hasTrivialKill(SI->getFalseValue());
1343 if (!TrueReg || !FalseReg)
1346 unsigned ResultReg = createResultReg(TLI.getRegClassFor(DestVT));
1347 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(SelectOpc),
1349 .addReg(TrueReg, getKillRegState(TrueIsKill))
1350 .addReg(FalseReg, getKillRegState(FalseIsKill))
1353 UpdateValueMap(I, ResultReg);
1357 bool AArch64FastISel::SelectFPExt(const Instruction *I) {
1358 Value *V = I->getOperand(0);
1359 if (!I->getType()->isDoubleTy() || !V->getType()->isFloatTy())
1362 unsigned Op = getRegForValue(V);
1366 unsigned ResultReg = createResultReg(&AArch64::FPR64RegClass);
1367 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::FCVTDSr),
1368 ResultReg).addReg(Op);
1369 UpdateValueMap(I, ResultReg);
1373 bool AArch64FastISel::SelectFPTrunc(const Instruction *I) {
1374 Value *V = I->getOperand(0);
1375 if (!I->getType()->isFloatTy() || !V->getType()->isDoubleTy())
1378 unsigned Op = getRegForValue(V);
1382 unsigned ResultReg = createResultReg(&AArch64::FPR32RegClass);
1383 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::FCVTSDr),
1384 ResultReg).addReg(Op);
1385 UpdateValueMap(I, ResultReg);
1389 // FPToUI and FPToSI
1390 bool AArch64FastISel::SelectFPToInt(const Instruction *I, bool Signed) {
1392 if (!isTypeLegal(I->getType(), DestVT) || DestVT.isVector())
1395 unsigned SrcReg = getRegForValue(I->getOperand(0));
1399 EVT SrcVT = TLI.getValueType(I->getOperand(0)->getType(), true);
1400 if (SrcVT == MVT::f128)
1404 if (SrcVT == MVT::f64) {
1406 Opc = (DestVT == MVT::i32) ? AArch64::FCVTZSUWDr : AArch64::FCVTZSUXDr;
1408 Opc = (DestVT == MVT::i32) ? AArch64::FCVTZUUWDr : AArch64::FCVTZUUXDr;
1411 Opc = (DestVT == MVT::i32) ? AArch64::FCVTZSUWSr : AArch64::FCVTZSUXSr;
1413 Opc = (DestVT == MVT::i32) ? AArch64::FCVTZUUWSr : AArch64::FCVTZUUXSr;
1415 unsigned ResultReg = createResultReg(
1416 DestVT == MVT::i32 ? &AArch64::GPR32RegClass : &AArch64::GPR64RegClass);
1417 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg)
1419 UpdateValueMap(I, ResultReg);
1423 bool AArch64FastISel::SelectIntToFP(const Instruction *I, bool Signed) {
1425 if (!isTypeLegal(I->getType(), DestVT) || DestVT.isVector())
1427 assert ((DestVT == MVT::f32 || DestVT == MVT::f64) &&
1428 "Unexpected value type.");
1430 unsigned SrcReg = getRegForValue(I->getOperand(0));
1434 EVT SrcVT = TLI.getValueType(I->getOperand(0)->getType(), true);
1436 // Handle sign-extension.
1437 if (SrcVT == MVT::i16 || SrcVT == MVT::i8 || SrcVT == MVT::i1) {
1439 EmitIntExt(SrcVT.getSimpleVT(), SrcReg, MVT::i32, /*isZExt*/ !Signed);
1444 MRI.constrainRegClass(SrcReg, SrcVT == MVT::i64 ? &AArch64::GPR64RegClass
1445 : &AArch64::GPR32RegClass);
1448 if (SrcVT == MVT::i64) {
1450 Opc = (DestVT == MVT::f32) ? AArch64::SCVTFUXSri : AArch64::SCVTFUXDri;
1452 Opc = (DestVT == MVT::f32) ? AArch64::UCVTFUXSri : AArch64::UCVTFUXDri;
1455 Opc = (DestVT == MVT::f32) ? AArch64::SCVTFUWSri : AArch64::SCVTFUWDri;
1457 Opc = (DestVT == MVT::f32) ? AArch64::UCVTFUWSri : AArch64::UCVTFUWDri;
1460 unsigned ResultReg = createResultReg(TLI.getRegClassFor(DestVT));
1461 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg)
1463 UpdateValueMap(I, ResultReg);
1467 bool AArch64FastISel::FastLowerArguments() {
1468 if (!FuncInfo.CanLowerReturn)
1471 const Function *F = FuncInfo.Fn;
1475 CallingConv::ID CC = F->getCallingConv();
1476 if (CC != CallingConv::C)
1479 // Only handle simple cases like i1/i8/i16/i32/i64/f32/f64 of up to 8 GPR and
1481 unsigned GPRCnt = 0;
1482 unsigned FPRCnt = 0;
1484 for (auto const &Arg : F->args()) {
1485 // The first argument is at index 1.
1487 if (F->getAttributes().hasAttribute(Idx, Attribute::ByVal) ||
1488 F->getAttributes().hasAttribute(Idx, Attribute::InReg) ||
1489 F->getAttributes().hasAttribute(Idx, Attribute::StructRet) ||
1490 F->getAttributes().hasAttribute(Idx, Attribute::Nest))
1493 Type *ArgTy = Arg.getType();
1494 if (ArgTy->isStructTy() || ArgTy->isArrayTy() || ArgTy->isVectorTy())
1497 EVT ArgVT = TLI.getValueType(ArgTy);
1498 if (!ArgVT.isSimple()) return false;
1499 switch (ArgVT.getSimpleVT().SimpleTy) {
1500 default: return false;
1515 if (GPRCnt > 8 || FPRCnt > 8)
1519 static const MCPhysReg Registers[5][8] = {
1520 { AArch64::W0, AArch64::W1, AArch64::W2, AArch64::W3, AArch64::W4,
1521 AArch64::W5, AArch64::W6, AArch64::W7 },
1522 { AArch64::X0, AArch64::X1, AArch64::X2, AArch64::X3, AArch64::X4,
1523 AArch64::X5, AArch64::X6, AArch64::X7 },
1524 { AArch64::H0, AArch64::H1, AArch64::H2, AArch64::H3, AArch64::H4,
1525 AArch64::H5, AArch64::H6, AArch64::H7 },
1526 { AArch64::S0, AArch64::S1, AArch64::S2, AArch64::S3, AArch64::S4,
1527 AArch64::S5, AArch64::S6, AArch64::S7 },
1528 { AArch64::D0, AArch64::D1, AArch64::D2, AArch64::D3, AArch64::D4,
1529 AArch64::D5, AArch64::D6, AArch64::D7 }
1532 unsigned GPRIdx = 0;
1533 unsigned FPRIdx = 0;
1534 for (auto const &Arg : F->args()) {
1535 MVT VT = TLI.getSimpleValueType(Arg.getType());
1537 switch (VT.SimpleTy) {
1538 default: llvm_unreachable("Unexpected value type.");
1541 case MVT::i16: VT = MVT::i32; // fall-through
1542 case MVT::i32: SrcReg = Registers[0][GPRIdx++]; break;
1543 case MVT::i64: SrcReg = Registers[1][GPRIdx++]; break;
1544 case MVT::f16: SrcReg = Registers[2][FPRIdx++]; break;
1545 case MVT::f32: SrcReg = Registers[3][FPRIdx++]; break;
1546 case MVT::f64: SrcReg = Registers[4][FPRIdx++]; break;
1549 // Skip unused arguments.
1550 if (Arg.use_empty()) {
1551 UpdateValueMap(&Arg, 0);
1555 const TargetRegisterClass *RC = TLI.getRegClassFor(VT);
1556 unsigned DstReg = FuncInfo.MF->addLiveIn(SrcReg, RC);
1557 // FIXME: Unfortunately it's necessary to emit a copy from the livein copy.
1558 // Without this, EmitLiveInCopies may eliminate the livein if its only
1559 // use is a bitcast (which isn't turned into an instruction).
1560 unsigned ResultReg = createResultReg(RC);
1561 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
1562 TII.get(TargetOpcode::COPY), ResultReg)
1563 .addReg(DstReg, getKillRegState(true));
1564 UpdateValueMap(&Arg, ResultReg);
1569 bool AArch64FastISel::ProcessCallArgs(CallLoweringInfo &CLI,
1570 SmallVectorImpl<MVT> &OutVTs,
1571 unsigned &NumBytes) {
1572 CallingConv::ID CC = CLI.CallConv;
1573 SmallVector<CCValAssign, 16> ArgLocs;
1574 CCState CCInfo(CC, false, *FuncInfo.MF, ArgLocs, *Context);
1575 CCInfo.AnalyzeCallOperands(OutVTs, CLI.OutFlags, CCAssignFnForCall(CC));
1577 // Get a count of how many bytes are to be pushed on the stack.
1578 NumBytes = CCInfo.getNextStackOffset();
1580 // Issue CALLSEQ_START
1581 unsigned AdjStackDown = TII.getCallFrameSetupOpcode();
1582 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AdjStackDown))
1585 // Process the args.
1586 for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
1587 CCValAssign &VA = ArgLocs[i];
1588 const Value *ArgVal = CLI.OutVals[VA.getValNo()];
1589 MVT ArgVT = OutVTs[VA.getValNo()];
1591 unsigned ArgReg = getRegForValue(ArgVal);
1595 // Handle arg promotion: SExt, ZExt, AExt.
1596 switch (VA.getLocInfo()) {
1597 case CCValAssign::Full:
1599 case CCValAssign::SExt: {
1600 MVT DestVT = VA.getLocVT();
1602 ArgReg = EmitIntExt(SrcVT, ArgReg, DestVT, /*isZExt=*/false);
1607 case CCValAssign::AExt:
1608 // Intentional fall-through.
1609 case CCValAssign::ZExt: {
1610 MVT DestVT = VA.getLocVT();
1612 ArgReg = EmitIntExt(SrcVT, ArgReg, DestVT, /*isZExt=*/true);
1618 llvm_unreachable("Unknown arg promotion!");
1621 // Now copy/store arg to correct locations.
1622 if (VA.isRegLoc() && !VA.needsCustom()) {
1623 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
1624 TII.get(TargetOpcode::COPY), VA.getLocReg()).addReg(ArgReg);
1625 CLI.OutRegs.push_back(VA.getLocReg());
1626 } else if (VA.needsCustom()) {
1627 // FIXME: Handle custom args.
1630 assert(VA.isMemLoc() && "Assuming store on stack.");
1632 // Don't emit stores for undef values.
1633 if (isa<UndefValue>(ArgVal))
1636 // Need to store on the stack.
1637 unsigned ArgSize = (ArgVT.getSizeInBits() + 7) / 8;
1639 unsigned BEAlign = 0;
1640 if (ArgSize < 8 && !Subtarget->isLittleEndian())
1641 BEAlign = 8 - ArgSize;
1644 Addr.setKind(Address::RegBase);
1645 Addr.setReg(AArch64::SP);
1646 Addr.setOffset(VA.getLocMemOffset() + BEAlign);
1648 unsigned Alignment = DL.getABITypeAlignment(ArgVal->getType());
1649 MachineMemOperand *MMO = FuncInfo.MF->getMachineMemOperand(
1650 MachinePointerInfo::getStack(Addr.getOffset()),
1651 MachineMemOperand::MOStore, ArgVT.getStoreSize(), Alignment);
1653 if (!EmitStore(ArgVT, ArgReg, Addr, MMO))
1660 bool AArch64FastISel::FinishCall(CallLoweringInfo &CLI, MVT RetVT,
1661 unsigned NumBytes) {
1662 CallingConv::ID CC = CLI.CallConv;
1664 // Issue CALLSEQ_END
1665 unsigned AdjStackUp = TII.getCallFrameDestroyOpcode();
1666 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AdjStackUp))
1667 .addImm(NumBytes).addImm(0);
1669 // Now the return value.
1670 if (RetVT != MVT::isVoid) {
1671 SmallVector<CCValAssign, 16> RVLocs;
1672 CCState CCInfo(CC, false, *FuncInfo.MF, RVLocs, *Context);
1673 CCInfo.AnalyzeCallResult(RetVT, CCAssignFnForCall(CC));
1675 // Only handle a single return value.
1676 if (RVLocs.size() != 1)
1679 // Copy all of the result registers out of their specified physreg.
1680 MVT CopyVT = RVLocs[0].getValVT();
1681 unsigned ResultReg = createResultReg(TLI.getRegClassFor(CopyVT));
1682 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
1683 TII.get(TargetOpcode::COPY), ResultReg)
1684 .addReg(RVLocs[0].getLocReg());
1685 CLI.InRegs.push_back(RVLocs[0].getLocReg());
1687 CLI.ResultReg = ResultReg;
1688 CLI.NumResultRegs = 1;
1694 bool AArch64FastISel::FastLowerCall(CallLoweringInfo &CLI) {
1695 CallingConv::ID CC = CLI.CallConv;
1696 bool IsTailCall = CLI.IsTailCall;
1697 bool IsVarArg = CLI.IsVarArg;
1698 const Value *Callee = CLI.Callee;
1699 const char *SymName = CLI.SymName;
1701 // Allow SelectionDAG isel to handle tail calls.
1705 CodeModel::Model CM = TM.getCodeModel();
1706 // Only support the small and large code model.
1707 if (CM != CodeModel::Small && CM != CodeModel::Large)
1710 // FIXME: Add large code model support for ELF.
1711 if (CM == CodeModel::Large && !Subtarget->isTargetMachO())
1714 // Let SDISel handle vararg functions.
1718 // FIXME: Only handle *simple* calls for now.
1720 if (CLI.RetTy->isVoidTy())
1721 RetVT = MVT::isVoid;
1722 else if (!isTypeLegal(CLI.RetTy, RetVT))
1725 for (auto Flag : CLI.OutFlags)
1726 if (Flag.isInReg() || Flag.isSRet() || Flag.isNest() || Flag.isByVal())
1729 // Set up the argument vectors.
1730 SmallVector<MVT, 16> OutVTs;
1731 OutVTs.reserve(CLI.OutVals.size());
1733 for (auto *Val : CLI.OutVals) {
1735 if (!isTypeLegal(Val->getType(), VT) &&
1736 !(VT == MVT::i1 || VT == MVT::i8 || VT == MVT::i16))
1739 // We don't handle vector parameters yet.
1740 if (VT.isVector() || VT.getSizeInBits() > 64)
1743 OutVTs.push_back(VT);
1747 if (!ComputeCallAddress(Callee, Addr))
1750 // Handle the arguments now that we've gotten them.
1752 if (!ProcessCallArgs(CLI, OutVTs, NumBytes))
1756 MachineInstrBuilder MIB;
1757 if (CM == CodeModel::Small) {
1758 unsigned CallOpc = Addr.getReg() ? AArch64::BLR : AArch64::BL;
1759 MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(CallOpc));
1761 MIB.addExternalSymbol(SymName, 0);
1762 else if (Addr.getGlobalValue())
1763 MIB.addGlobalAddress(Addr.getGlobalValue(), 0, 0);
1764 else if (Addr.getReg())
1765 MIB.addReg(Addr.getReg());
1769 unsigned CallReg = 0;
1771 unsigned ADRPReg = createResultReg(&AArch64::GPR64commonRegClass);
1772 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADRP),
1774 .addExternalSymbol(SymName, AArch64II::MO_GOT | AArch64II::MO_PAGE);
1776 CallReg = createResultReg(&AArch64::GPR64RegClass);
1777 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::LDRXui),
1780 .addExternalSymbol(SymName, AArch64II::MO_GOT | AArch64II::MO_PAGEOFF |
1782 } else if (Addr.getGlobalValue()) {
1783 CallReg = AArch64MaterializeGV(Addr.getGlobalValue());
1784 } else if (Addr.getReg())
1785 CallReg = Addr.getReg();
1790 MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
1791 TII.get(AArch64::BLR)).addReg(CallReg);
1794 // Add implicit physical register uses to the call.
1795 for (auto Reg : CLI.OutRegs)
1796 MIB.addReg(Reg, RegState::Implicit);
1798 // Add a register mask with the call-preserved registers.
1799 // Proper defs for return values will be added by setPhysRegsDeadExcept().
1800 MIB.addRegMask(TRI.getCallPreservedMask(CC));
1804 // Finish off the call including any return values.
1805 return FinishCall(CLI, RetVT, NumBytes);
1808 bool AArch64FastISel::IsMemCpySmall(uint64_t Len, unsigned Alignment) {
1810 return Len / Alignment <= 4;
1815 bool AArch64FastISel::TryEmitSmallMemCpy(Address Dest, Address Src,
1816 uint64_t Len, unsigned Alignment) {
1817 // Make sure we don't bloat code by inlining very large memcpy's.
1818 if (!IsMemCpySmall(Len, Alignment))
1821 int64_t UnscaledOffset = 0;
1822 Address OrigDest = Dest;
1823 Address OrigSrc = Src;
1827 if (!Alignment || Alignment >= 8) {
1838 // Bound based on alignment.
1839 if (Len >= 4 && Alignment == 4)
1841 else if (Len >= 2 && Alignment == 2)
1850 RV = EmitLoad(VT, ResultReg, Src);
1854 RV = EmitStore(VT, ResultReg, Dest);
1858 int64_t Size = VT.getSizeInBits() / 8;
1860 UnscaledOffset += Size;
1862 // We need to recompute the unscaled offset for each iteration.
1863 Dest.setOffset(OrigDest.getOffset() + UnscaledOffset);
1864 Src.setOffset(OrigSrc.getOffset() + UnscaledOffset);
1870 /// \brief Check if it is possible to fold the condition from the XALU intrinsic
1871 /// into the user. The condition code will only be updated on success.
1872 bool AArch64FastISel::foldXALUIntrinsic(AArch64CC::CondCode &CC,
1873 const Instruction *I,
1874 const Value *Cond) {
1875 if (!isa<ExtractValueInst>(Cond))
1878 const auto *EV = cast<ExtractValueInst>(Cond);
1879 if (!isa<IntrinsicInst>(EV->getAggregateOperand()))
1882 const auto *II = cast<IntrinsicInst>(EV->getAggregateOperand());
1884 const Function *Callee = II->getCalledFunction();
1886 cast<StructType>(Callee->getReturnType())->getTypeAtIndex(0U);
1887 if (!isTypeLegal(RetTy, RetVT))
1890 if (RetVT != MVT::i32 && RetVT != MVT::i64)
1893 AArch64CC::CondCode TmpCC;
1894 switch (II->getIntrinsicID()) {
1895 default: return false;
1896 case Intrinsic::sadd_with_overflow:
1897 case Intrinsic::ssub_with_overflow: TmpCC = AArch64CC::VS; break;
1898 case Intrinsic::uadd_with_overflow: TmpCC = AArch64CC::HS; break;
1899 case Intrinsic::usub_with_overflow: TmpCC = AArch64CC::LO; break;
1900 case Intrinsic::smul_with_overflow:
1901 case Intrinsic::umul_with_overflow: TmpCC = AArch64CC::NE; break;
1904 // Check if both instructions are in the same basic block.
1905 if (II->getParent() != I->getParent())
1908 // Make sure nothing is in the way
1909 BasicBlock::const_iterator Start = I;
1910 BasicBlock::const_iterator End = II;
1911 for (auto Itr = std::prev(Start); Itr != End; --Itr) {
1912 // We only expect extractvalue instructions between the intrinsic and the
1913 // instruction to be selected.
1914 if (!isa<ExtractValueInst>(Itr))
1917 // Check that the extractvalue operand comes from the intrinsic.
1918 const auto *EVI = cast<ExtractValueInst>(Itr);
1919 if (EVI->getAggregateOperand() != II)
1927 bool AArch64FastISel::FastLowerIntrinsicCall(const IntrinsicInst *II) {
1928 // FIXME: Handle more intrinsics.
1929 switch (II->getIntrinsicID()) {
1930 default: return false;
1931 case Intrinsic::frameaddress: {
1932 MachineFrameInfo *MFI = FuncInfo.MF->getFrameInfo();
1933 MFI->setFrameAddressIsTaken(true);
1935 const AArch64RegisterInfo *RegInfo =
1936 static_cast<const AArch64RegisterInfo *>(
1937 TM.getSubtargetImpl()->getRegisterInfo());
1938 unsigned FramePtr = RegInfo->getFrameRegister(*(FuncInfo.MF));
1939 unsigned SrcReg = FramePtr;
1941 // Recursively load frame address
1947 unsigned Depth = cast<ConstantInt>(II->getOperand(0))->getZExtValue();
1949 DestReg = createResultReg(&AArch64::GPR64RegClass);
1950 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
1951 TII.get(AArch64::LDRXui), DestReg)
1952 .addReg(SrcReg).addImm(0);
1956 UpdateValueMap(II, SrcReg);
1959 case Intrinsic::memcpy:
1960 case Intrinsic::memmove: {
1961 const auto *MTI = cast<MemTransferInst>(II);
1962 // Don't handle volatile.
1963 if (MTI->isVolatile())
1966 // Disable inlining for memmove before calls to ComputeAddress. Otherwise,
1967 // we would emit dead code because we don't currently handle memmoves.
1968 bool IsMemCpy = (II->getIntrinsicID() == Intrinsic::memcpy);
1969 if (isa<ConstantInt>(MTI->getLength()) && IsMemCpy) {
1970 // Small memcpy's are common enough that we want to do them without a call
1972 uint64_t Len = cast<ConstantInt>(MTI->getLength())->getZExtValue();
1973 unsigned Alignment = MTI->getAlignment();
1974 if (IsMemCpySmall(Len, Alignment)) {
1976 if (!ComputeAddress(MTI->getRawDest(), Dest) ||
1977 !ComputeAddress(MTI->getRawSource(), Src))
1979 if (TryEmitSmallMemCpy(Dest, Src, Len, Alignment))
1984 if (!MTI->getLength()->getType()->isIntegerTy(64))
1987 if (MTI->getSourceAddressSpace() > 255 || MTI->getDestAddressSpace() > 255)
1988 // Fast instruction selection doesn't support the special
1992 const char *IntrMemName = isa<MemCpyInst>(II) ? "memcpy" : "memmove";
1993 return LowerCallTo(II, IntrMemName, II->getNumArgOperands() - 2);
1995 case Intrinsic::memset: {
1996 const MemSetInst *MSI = cast<MemSetInst>(II);
1997 // Don't handle volatile.
1998 if (MSI->isVolatile())
2001 if (!MSI->getLength()->getType()->isIntegerTy(64))
2004 if (MSI->getDestAddressSpace() > 255)
2005 // Fast instruction selection doesn't support the special
2009 return LowerCallTo(II, "memset", II->getNumArgOperands() - 2);
2011 case Intrinsic::trap: {
2012 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::BRK))
2016 case Intrinsic::sqrt: {
2017 Type *RetTy = II->getCalledFunction()->getReturnType();
2020 if (!isTypeLegal(RetTy, VT))
2023 unsigned Op0Reg = getRegForValue(II->getOperand(0));
2026 bool Op0IsKill = hasTrivialKill(II->getOperand(0));
2028 unsigned ResultReg = FastEmit_r(VT, VT, ISD::FSQRT, Op0Reg, Op0IsKill);
2032 UpdateValueMap(II, ResultReg);
2035 case Intrinsic::sadd_with_overflow:
2036 case Intrinsic::uadd_with_overflow:
2037 case Intrinsic::ssub_with_overflow:
2038 case Intrinsic::usub_with_overflow:
2039 case Intrinsic::smul_with_overflow:
2040 case Intrinsic::umul_with_overflow: {
2041 // This implements the basic lowering of the xalu with overflow intrinsics.
2042 const Function *Callee = II->getCalledFunction();
2043 auto *Ty = cast<StructType>(Callee->getReturnType());
2044 Type *RetTy = Ty->getTypeAtIndex(0U);
2045 Type *CondTy = Ty->getTypeAtIndex(1);
2048 if (!isTypeLegal(RetTy, VT))
2051 if (VT != MVT::i32 && VT != MVT::i64)
2054 const Value *LHS = II->getArgOperand(0);
2055 const Value *RHS = II->getArgOperand(1);
2056 // Canonicalize immediate to the RHS.
2057 if (isa<ConstantInt>(LHS) && !isa<ConstantInt>(RHS) &&
2058 isCommutativeIntrinsic(II))
2059 std::swap(LHS, RHS);
2061 unsigned LHSReg = getRegForValue(LHS);
2064 bool LHSIsKill = hasTrivialKill(LHS);
2066 // Check if the immediate can be encoded in the instruction and if we should
2067 // invert the instruction (adds -> subs) to handle negative immediates.
2068 bool UseImm = false;
2069 bool UseInverse = false;
2071 if (const auto *C = dyn_cast<ConstantInt>(RHS)) {
2072 if (C->isNegative()) {
2074 Imm = -(C->getSExtValue());
2076 Imm = C->getZExtValue();
2078 if (isUInt<12>(Imm))
2081 UseInverse = UseImm && UseInverse;
2084 static const unsigned OpcTable[2][2][2] = {
2085 { {AArch64::ADDSWrr, AArch64::ADDSXrr},
2086 {AArch64::ADDSWri, AArch64::ADDSXri} },
2087 { {AArch64::SUBSWrr, AArch64::SUBSXrr},
2088 {AArch64::SUBSWri, AArch64::SUBSXri} }
2091 unsigned MulReg = 0;
2092 unsigned RHSReg = 0;
2093 bool RHSIsKill = false;
2094 AArch64CC::CondCode CC = AArch64CC::Invalid;
2095 bool Is64Bit = VT == MVT::i64;
2096 switch (II->getIntrinsicID()) {
2097 default: llvm_unreachable("Unexpected intrinsic!");
2098 case Intrinsic::sadd_with_overflow:
2099 Opc = OpcTable[UseInverse][UseImm][Is64Bit]; CC = AArch64CC::VS; break;
2100 case Intrinsic::uadd_with_overflow:
2101 Opc = OpcTable[UseInverse][UseImm][Is64Bit]; CC = AArch64CC::HS; break;
2102 case Intrinsic::ssub_with_overflow:
2103 Opc = OpcTable[!UseInverse][UseImm][Is64Bit]; CC = AArch64CC::VS; break;
2104 case Intrinsic::usub_with_overflow:
2105 Opc = OpcTable[!UseInverse][UseImm][Is64Bit]; CC = AArch64CC::LO; break;
2106 case Intrinsic::smul_with_overflow: {
2108 RHSReg = getRegForValue(RHS);
2111 RHSIsKill = hasTrivialKill(RHS);
2113 if (VT == MVT::i32) {
2114 MulReg = Emit_SMULL_rr(MVT::i64, LHSReg, LHSIsKill, RHSReg, RHSIsKill);
2115 unsigned ShiftReg = Emit_LSR_ri(MVT::i64, MulReg, false, 32);
2116 MulReg = FastEmitInst_extractsubreg(VT, MulReg, /*IsKill=*/true,
2118 ShiftReg = FastEmitInst_extractsubreg(VT, ShiftReg, /*IsKill=*/true,
2120 unsigned CmpReg = createResultReg(TLI.getRegClassFor(VT));
2121 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2122 TII.get(AArch64::SUBSWrs), CmpReg)
2123 .addReg(ShiftReg, getKillRegState(true))
2124 .addReg(MulReg, getKillRegState(false))
2125 .addImm(159); // 159 <-> asr #31
2127 assert(VT == MVT::i64 && "Unexpected value type.");
2128 MulReg = Emit_MUL_rr(VT, LHSReg, LHSIsKill, RHSReg, RHSIsKill);
2129 unsigned SMULHReg = FastEmit_rr(VT, VT, ISD::MULHS, LHSReg, LHSIsKill,
2131 unsigned CmpReg = createResultReg(TLI.getRegClassFor(VT));
2132 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2133 TII.get(AArch64::SUBSXrs), CmpReg)
2134 .addReg(SMULHReg, getKillRegState(true))
2135 .addReg(MulReg, getKillRegState(false))
2136 .addImm(191); // 191 <-> asr #63
2140 case Intrinsic::umul_with_overflow: {
2142 RHSReg = getRegForValue(RHS);
2145 RHSIsKill = hasTrivialKill(RHS);
2147 if (VT == MVT::i32) {
2148 MulReg = Emit_UMULL_rr(MVT::i64, LHSReg, LHSIsKill, RHSReg, RHSIsKill);
2149 unsigned CmpReg = createResultReg(TLI.getRegClassFor(MVT::i64));
2150 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2151 TII.get(AArch64::SUBSXrs), CmpReg)
2152 .addReg(AArch64::XZR, getKillRegState(true))
2153 .addReg(MulReg, getKillRegState(false))
2154 .addImm(96); // 96 <-> lsr #32
2155 MulReg = FastEmitInst_extractsubreg(VT, MulReg, /*IsKill=*/true,
2158 assert(VT == MVT::i64 && "Unexpected value type.");
2159 MulReg = Emit_MUL_rr(VT, LHSReg, LHSIsKill, RHSReg, RHSIsKill);
2160 unsigned UMULHReg = FastEmit_rr(VT, VT, ISD::MULHU, LHSReg, LHSIsKill,
2162 unsigned CmpReg = createResultReg(TLI.getRegClassFor(VT));
2163 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2164 TII.get(AArch64::SUBSXrr), CmpReg)
2165 .addReg(AArch64::XZR, getKillRegState(true))
2166 .addReg(UMULHReg, getKillRegState(false));
2173 RHSReg = getRegForValue(RHS);
2176 RHSIsKill = hasTrivialKill(RHS);
2179 unsigned ResultReg = createResultReg(TLI.getRegClassFor(VT));
2181 MachineInstrBuilder MIB;
2182 MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc),
2184 .addReg(LHSReg, getKillRegState(LHSIsKill));
2189 MIB.addReg(RHSReg, getKillRegState(RHSIsKill));
2192 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2193 TII.get(TargetOpcode::COPY), ResultReg)
2196 unsigned ResultReg2 = FuncInfo.CreateRegs(CondTy);
2197 assert((ResultReg+1) == ResultReg2 && "Nonconsecutive result registers.");
2198 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::CSINCWr),
2200 .addReg(AArch64::WZR, getKillRegState(true))
2201 .addReg(AArch64::WZR, getKillRegState(true))
2202 .addImm(getInvertedCondCode(CC));
2204 UpdateValueMap(II, ResultReg, 2);
2211 bool AArch64FastISel::SelectRet(const Instruction *I) {
2212 const ReturnInst *Ret = cast<ReturnInst>(I);
2213 const Function &F = *I->getParent()->getParent();
2215 if (!FuncInfo.CanLowerReturn)
2221 // Build a list of return value registers.
2222 SmallVector<unsigned, 4> RetRegs;
2224 if (Ret->getNumOperands() > 0) {
2225 CallingConv::ID CC = F.getCallingConv();
2226 SmallVector<ISD::OutputArg, 4> Outs;
2227 GetReturnInfo(F.getReturnType(), F.getAttributes(), Outs, TLI);
2229 // Analyze operands of the call, assigning locations to each operand.
2230 SmallVector<CCValAssign, 16> ValLocs;
2231 CCState CCInfo(CC, F.isVarArg(), *FuncInfo.MF, ValLocs, I->getContext());
2232 CCAssignFn *RetCC = CC == CallingConv::WebKit_JS ? RetCC_AArch64_WebKit_JS
2233 : RetCC_AArch64_AAPCS;
2234 CCInfo.AnalyzeReturn(Outs, RetCC);
2236 // Only handle a single return value for now.
2237 if (ValLocs.size() != 1)
2240 CCValAssign &VA = ValLocs[0];
2241 const Value *RV = Ret->getOperand(0);
2243 // Don't bother handling odd stuff for now.
2244 if (VA.getLocInfo() != CCValAssign::Full)
2246 // Only handle register returns for now.
2249 unsigned Reg = getRegForValue(RV);
2253 unsigned SrcReg = Reg + VA.getValNo();
2254 unsigned DestReg = VA.getLocReg();
2255 // Avoid a cross-class copy. This is very unlikely.
2256 if (!MRI.getRegClass(SrcReg)->contains(DestReg))
2259 EVT RVEVT = TLI.getValueType(RV->getType());
2260 if (!RVEVT.isSimple())
2263 // Vectors (of > 1 lane) in big endian need tricky handling.
2264 if (RVEVT.isVector() && RVEVT.getVectorNumElements() > 1)
2267 MVT RVVT = RVEVT.getSimpleVT();
2268 if (RVVT == MVT::f128)
2270 MVT DestVT = VA.getValVT();
2271 // Special handling for extended integers.
2272 if (RVVT != DestVT) {
2273 if (RVVT != MVT::i1 && RVVT != MVT::i8 && RVVT != MVT::i16)
2276 if (!Outs[0].Flags.isZExt() && !Outs[0].Flags.isSExt())
2279 bool isZExt = Outs[0].Flags.isZExt();
2280 SrcReg = EmitIntExt(RVVT, SrcReg, DestVT, isZExt);
2286 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2287 TII.get(TargetOpcode::COPY), DestReg).addReg(SrcReg);
2289 // Add register to return instruction.
2290 RetRegs.push_back(VA.getLocReg());
2293 MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2294 TII.get(AArch64::RET_ReallyLR));
2295 for (unsigned i = 0, e = RetRegs.size(); i != e; ++i)
2296 MIB.addReg(RetRegs[i], RegState::Implicit);
2300 bool AArch64FastISel::SelectTrunc(const Instruction *I) {
2301 Type *DestTy = I->getType();
2302 Value *Op = I->getOperand(0);
2303 Type *SrcTy = Op->getType();
2305 EVT SrcEVT = TLI.getValueType(SrcTy, true);
2306 EVT DestEVT = TLI.getValueType(DestTy, true);
2307 if (!SrcEVT.isSimple())
2309 if (!DestEVT.isSimple())
2312 MVT SrcVT = SrcEVT.getSimpleVT();
2313 MVT DestVT = DestEVT.getSimpleVT();
2315 if (SrcVT != MVT::i64 && SrcVT != MVT::i32 && SrcVT != MVT::i16 &&
2318 if (DestVT != MVT::i32 && DestVT != MVT::i16 && DestVT != MVT::i8 &&
2322 unsigned SrcReg = getRegForValue(Op);
2326 // If we're truncating from i64 to a smaller non-legal type then generate an
2327 // AND. Otherwise, we know the high bits are undefined and a truncate doesn't
2328 // generate any code.
2329 if (SrcVT == MVT::i64) {
2331 switch (DestVT.SimpleTy) {
2333 // Trunc i64 to i32 is handled by the target-independent fast-isel.
2345 // Issue an extract_subreg to get the lower 32-bits.
2346 unsigned Reg32 = FastEmitInst_extractsubreg(MVT::i32, SrcReg, /*Kill=*/true,
2348 MRI.constrainRegClass(Reg32, &AArch64::GPR32RegClass);
2349 // Create the AND instruction which performs the actual truncation.
2350 unsigned ANDReg = createResultReg(&AArch64::GPR32spRegClass);
2351 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ANDWri),
2354 .addImm(AArch64_AM::encodeLogicalImmediate(Mask, 32));
2358 UpdateValueMap(I, SrcReg);
2362 unsigned AArch64FastISel::Emiti1Ext(unsigned SrcReg, MVT DestVT, bool isZExt) {
2363 assert((DestVT == MVT::i8 || DestVT == MVT::i16 || DestVT == MVT::i32 ||
2364 DestVT == MVT::i64) &&
2365 "Unexpected value type.");
2366 // Handle i8 and i16 as i32.
2367 if (DestVT == MVT::i8 || DestVT == MVT::i16)
2371 MRI.constrainRegClass(SrcReg, &AArch64::GPR32RegClass);
2372 unsigned ResultReg = createResultReg(&AArch64::GPR32spRegClass);
2373 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ANDWri),
2376 .addImm(AArch64_AM::encodeLogicalImmediate(1, 32));
2378 if (DestVT == MVT::i64) {
2379 // We're ZExt i1 to i64. The ANDWri Wd, Ws, #1 implicitly clears the
2380 // upper 32 bits. Emit a SUBREG_TO_REG to extend from Wd to Xd.
2381 unsigned Reg64 = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
2382 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2383 TII.get(AArch64::SUBREG_TO_REG), Reg64)
2386 .addImm(AArch64::sub_32);
2391 if (DestVT == MVT::i64) {
2392 // FIXME: We're SExt i1 to i64.
2395 unsigned ResultReg = createResultReg(&AArch64::GPR32RegClass);
2396 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::SBFMWri),
2405 unsigned AArch64FastISel::Emit_MUL_rr(MVT RetVT, unsigned Op0, bool Op0IsKill,
2406 unsigned Op1, bool Op1IsKill) {
2408 switch (RetVT.SimpleTy) {
2414 Opc = AArch64::MADDWrrr; ZReg = AArch64::WZR; break;
2416 Opc = AArch64::MADDXrrr; ZReg = AArch64::XZR; break;
2419 // Create the base instruction, then add the operands.
2420 unsigned ResultReg = createResultReg(TLI.getRegClassFor(RetVT));
2421 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg)
2422 .addReg(Op0, getKillRegState(Op0IsKill))
2423 .addReg(Op1, getKillRegState(Op1IsKill))
2424 .addReg(ZReg, getKillRegState(true));
2429 unsigned AArch64FastISel::Emit_SMULL_rr(MVT RetVT, unsigned Op0, bool Op0IsKill,
2430 unsigned Op1, bool Op1IsKill) {
2431 if (RetVT != MVT::i64)
2434 // Create the base instruction, then add the operands.
2435 unsigned ResultReg = createResultReg(&AArch64::GPR64RegClass);
2436 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::SMADDLrrr),
2438 .addReg(Op0, getKillRegState(Op0IsKill))
2439 .addReg(Op1, getKillRegState(Op1IsKill))
2440 .addReg(AArch64::XZR, getKillRegState(true));
2445 unsigned AArch64FastISel::Emit_UMULL_rr(MVT RetVT, unsigned Op0, bool Op0IsKill,
2446 unsigned Op1, bool Op1IsKill) {
2447 if (RetVT != MVT::i64)
2450 // Create the base instruction, then add the operands.
2451 unsigned ResultReg = createResultReg(&AArch64::GPR64RegClass);
2452 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::UMADDLrrr),
2454 .addReg(Op0, getKillRegState(Op0IsKill))
2455 .addReg(Op1, getKillRegState(Op1IsKill))
2456 .addReg(AArch64::XZR, getKillRegState(true));
2461 unsigned AArch64FastISel::Emit_LSL_ri(MVT RetVT, unsigned Op0, bool Op0IsKill,
2463 unsigned Opc, ImmR, ImmS;
2464 switch (RetVT.SimpleTy) {
2467 Opc = AArch64::UBFMWri; ImmR = -Shift % 32; ImmS = 7 - Shift; break;
2469 Opc = AArch64::UBFMWri; ImmR = -Shift % 32; ImmS = 15 - Shift; break;
2471 Opc = AArch64::UBFMWri; ImmR = -Shift % 32; ImmS = 31 - Shift; break;
2473 Opc = AArch64::UBFMXri; ImmR = -Shift % 64; ImmS = 63 - Shift; break;
2476 RetVT.SimpleTy = std::max(MVT::i32, RetVT.SimpleTy);
2477 return FastEmitInst_rii(Opc, TLI.getRegClassFor(RetVT), Op0, Op0IsKill, ImmR,
2481 unsigned AArch64FastISel::Emit_LSR_ri(MVT RetVT, unsigned Op0, bool Op0IsKill,
2484 switch (RetVT.SimpleTy) {
2486 case MVT::i8: Opc = AArch64::UBFMWri; ImmS = 7; break;
2487 case MVT::i16: Opc = AArch64::UBFMWri; ImmS = 15; break;
2488 case MVT::i32: Opc = AArch64::UBFMWri; ImmS = 31; break;
2489 case MVT::i64: Opc = AArch64::UBFMXri; ImmS = 63; break;
2492 RetVT.SimpleTy = std::max(MVT::i32, RetVT.SimpleTy);
2493 return FastEmitInst_rii(Opc, TLI.getRegClassFor(RetVT), Op0, Op0IsKill, Shift,
2497 unsigned AArch64FastISel::Emit_ASR_ri(MVT RetVT, unsigned Op0, bool Op0IsKill,
2500 switch (RetVT.SimpleTy) {
2502 case MVT::i8: Opc = AArch64::SBFMWri; ImmS = 7; break;
2503 case MVT::i16: Opc = AArch64::SBFMWri; ImmS = 15; break;
2504 case MVT::i32: Opc = AArch64::SBFMWri; ImmS = 31; break;
2505 case MVT::i64: Opc = AArch64::SBFMXri; ImmS = 63; break;
2508 RetVT.SimpleTy = std::max(MVT::i32, RetVT.SimpleTy);
2509 return FastEmitInst_rii(Opc, TLI.getRegClassFor(RetVT), Op0, Op0IsKill, Shift,
2513 unsigned AArch64FastISel::EmitIntExt(MVT SrcVT, unsigned SrcReg, MVT DestVT,
2515 assert(DestVT != MVT::i1 && "ZeroExt/SignExt an i1?");
2517 // FastISel does not have plumbing to deal with extensions where the SrcVT or
2518 // DestVT are odd things, so test to make sure that they are both types we can
2519 // handle (i1/i8/i16/i32 for SrcVT and i8/i16/i32/i64 for DestVT), otherwise
2520 // bail out to SelectionDAG.
2521 if (((DestVT != MVT::i8) && (DestVT != MVT::i16) &&
2522 (DestVT != MVT::i32) && (DestVT != MVT::i64)) ||
2523 ((SrcVT != MVT::i1) && (SrcVT != MVT::i8) &&
2524 (SrcVT != MVT::i16) && (SrcVT != MVT::i32)))
2530 switch (SrcVT.SimpleTy) {
2534 return Emiti1Ext(SrcReg, DestVT, isZExt);
2536 if (DestVT == MVT::i64)
2537 Opc = isZExt ? AArch64::UBFMXri : AArch64::SBFMXri;
2539 Opc = isZExt ? AArch64::UBFMWri : AArch64::SBFMWri;
2543 if (DestVT == MVT::i64)
2544 Opc = isZExt ? AArch64::UBFMXri : AArch64::SBFMXri;
2546 Opc = isZExt ? AArch64::UBFMWri : AArch64::SBFMWri;
2550 assert(DestVT == MVT::i64 && "IntExt i32 to i32?!?");
2551 Opc = isZExt ? AArch64::UBFMXri : AArch64::SBFMXri;
2556 // Handle i8 and i16 as i32.
2557 if (DestVT == MVT::i8 || DestVT == MVT::i16)
2559 else if (DestVT == MVT::i64) {
2560 unsigned Src64 = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
2561 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2562 TII.get(AArch64::SUBREG_TO_REG), Src64)
2565 .addImm(AArch64::sub_32);
2569 unsigned ResultReg = createResultReg(TLI.getRegClassFor(DestVT));
2570 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg)
2578 bool AArch64FastISel::SelectIntExt(const Instruction *I) {
2579 // On ARM, in general, integer casts don't involve legal types; this code
2580 // handles promotable integers. The high bits for a type smaller than
2581 // the register size are assumed to be undefined.
2582 Type *DestTy = I->getType();
2583 Value *Src = I->getOperand(0);
2584 Type *SrcTy = Src->getType();
2586 bool isZExt = isa<ZExtInst>(I);
2587 unsigned SrcReg = getRegForValue(Src);
2591 EVT SrcEVT = TLI.getValueType(SrcTy, true);
2592 EVT DestEVT = TLI.getValueType(DestTy, true);
2593 if (!SrcEVT.isSimple())
2595 if (!DestEVT.isSimple())
2598 MVT SrcVT = SrcEVT.getSimpleVT();
2599 MVT DestVT = DestEVT.getSimpleVT();
2600 unsigned ResultReg = 0;
2602 // Check if it is an argument and if it is already zero/sign-extended.
2603 if (const auto *Arg = dyn_cast<Argument>(Src)) {
2604 if ((isZExt && Arg->hasZExtAttr()) || (!isZExt && Arg->hasSExtAttr())) {
2605 if (DestVT == MVT::i64) {
2606 ResultReg = createResultReg(TLI.getRegClassFor(DestVT));
2607 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2608 TII.get(AArch64::SUBREG_TO_REG), ResultReg)
2611 .addImm(AArch64::sub_32);
2618 ResultReg = EmitIntExt(SrcVT, SrcReg, DestVT, isZExt);
2623 UpdateValueMap(I, ResultReg);
2627 bool AArch64FastISel::SelectRem(const Instruction *I, unsigned ISDOpcode) {
2628 EVT DestEVT = TLI.getValueType(I->getType(), true);
2629 if (!DestEVT.isSimple())
2632 MVT DestVT = DestEVT.getSimpleVT();
2633 if (DestVT != MVT::i64 && DestVT != MVT::i32)
2637 bool is64bit = (DestVT == MVT::i64);
2638 switch (ISDOpcode) {
2642 DivOpc = is64bit ? AArch64::SDIVXr : AArch64::SDIVWr;
2645 DivOpc = is64bit ? AArch64::UDIVXr : AArch64::UDIVWr;
2648 unsigned MSubOpc = is64bit ? AArch64::MSUBXrrr : AArch64::MSUBWrrr;
2649 unsigned Src0Reg = getRegForValue(I->getOperand(0));
2653 unsigned Src1Reg = getRegForValue(I->getOperand(1));
2657 unsigned QuotReg = createResultReg(TLI.getRegClassFor(DestVT));
2658 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(DivOpc), QuotReg)
2661 // The remainder is computed as numerator - (quotient * denominator) using the
2662 // MSUB instruction.
2663 unsigned ResultReg = createResultReg(TLI.getRegClassFor(DestVT));
2664 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(MSubOpc), ResultReg)
2668 UpdateValueMap(I, ResultReg);
2672 bool AArch64FastISel::SelectMul(const Instruction *I) {
2673 EVT SrcEVT = TLI.getValueType(I->getOperand(0)->getType(), true);
2674 if (!SrcEVT.isSimple())
2676 MVT SrcVT = SrcEVT.getSimpleVT();
2678 // Must be simple value type. Don't handle vectors.
2679 if (SrcVT != MVT::i64 && SrcVT != MVT::i32 && SrcVT != MVT::i16 &&
2683 unsigned Src0Reg = getRegForValue(I->getOperand(0));
2686 bool Src0IsKill = hasTrivialKill(I->getOperand(0));
2688 unsigned Src1Reg = getRegForValue(I->getOperand(1));
2691 bool Src1IsKill = hasTrivialKill(I->getOperand(1));
2693 unsigned ResultReg =
2694 Emit_MUL_rr(SrcVT, Src0Reg, Src0IsKill, Src1Reg, Src1IsKill);
2699 UpdateValueMap(I, ResultReg);
2703 bool AArch64FastISel::SelectShift(const Instruction *I, bool IsLeftShift,
2704 bool IsArithmetic) {
2705 EVT RetEVT = TLI.getValueType(I->getType(), true);
2706 if (!RetEVT.isSimple())
2708 MVT RetVT = RetEVT.getSimpleVT();
2710 if (!isa<ConstantInt>(I->getOperand(1)))
2713 unsigned Op0Reg = getRegForValue(I->getOperand(0));
2716 bool Op0IsKill = hasTrivialKill(I->getOperand(0));
2718 uint64_t ShiftVal = cast<ConstantInt>(I->getOperand(1))->getZExtValue();
2722 ResultReg = Emit_LSL_ri(RetVT, Op0Reg, Op0IsKill, ShiftVal);
2725 ResultReg = Emit_ASR_ri(RetVT, Op0Reg, Op0IsKill, ShiftVal);
2727 ResultReg = Emit_LSR_ri(RetVT, Op0Reg, Op0IsKill, ShiftVal);
2733 UpdateValueMap(I, ResultReg);
2737 bool AArch64FastISel::SelectBitCast(const Instruction *I) {
2740 if (!isTypeLegal(I->getOperand(0)->getType(), SrcVT))
2742 if (!isTypeLegal(I->getType(), RetVT))
2746 if (RetVT == MVT::f32 && SrcVT == MVT::i32)
2747 Opc = AArch64::FMOVWSr;
2748 else if (RetVT == MVT::f64 && SrcVT == MVT::i64)
2749 Opc = AArch64::FMOVXDr;
2750 else if (RetVT == MVT::i32 && SrcVT == MVT::f32)
2751 Opc = AArch64::FMOVSWr;
2752 else if (RetVT == MVT::i64 && SrcVT == MVT::f64)
2753 Opc = AArch64::FMOVDXr;
2757 unsigned Op0Reg = getRegForValue(I->getOperand(0));
2760 bool Op0IsKill = hasTrivialKill(I->getOperand(0));
2761 unsigned ResultReg = FastEmitInst_r(Opc, TLI.getRegClassFor(RetVT),
2767 UpdateValueMap(I, ResultReg);
2771 bool AArch64FastISel::TargetSelectInstruction(const Instruction *I) {
2772 switch (I->getOpcode()) {
2775 case Instruction::Load:
2776 return SelectLoad(I);
2777 case Instruction::Store:
2778 return SelectStore(I);
2779 case Instruction::Br:
2780 return SelectBranch(I);
2781 case Instruction::IndirectBr:
2782 return SelectIndirectBr(I);
2783 case Instruction::FCmp:
2784 case Instruction::ICmp:
2785 return SelectCmp(I);
2786 case Instruction::Select:
2787 return SelectSelect(I);
2788 case Instruction::FPExt:
2789 return SelectFPExt(I);
2790 case Instruction::FPTrunc:
2791 return SelectFPTrunc(I);
2792 case Instruction::FPToSI:
2793 return SelectFPToInt(I, /*Signed=*/true);
2794 case Instruction::FPToUI:
2795 return SelectFPToInt(I, /*Signed=*/false);
2796 case Instruction::SIToFP:
2797 return SelectIntToFP(I, /*Signed=*/true);
2798 case Instruction::UIToFP:
2799 return SelectIntToFP(I, /*Signed=*/false);
2800 case Instruction::SRem:
2801 return SelectRem(I, ISD::SREM);
2802 case Instruction::URem:
2803 return SelectRem(I, ISD::UREM);
2804 case Instruction::Ret:
2805 return SelectRet(I);
2806 case Instruction::Trunc:
2807 return SelectTrunc(I);
2808 case Instruction::ZExt:
2809 case Instruction::SExt:
2810 return SelectIntExt(I);
2812 // FIXME: All of these should really be handled by the target-independent
2813 // selector -> improve FastISel tblgen.
2814 case Instruction::Mul:
2815 return SelectMul(I);
2816 case Instruction::Shl:
2817 return SelectShift(I, /*IsLeftShift=*/true, /*IsArithmetic=*/false);
2818 case Instruction::LShr:
2819 return SelectShift(I, /*IsLeftShift=*/false, /*IsArithmetic=*/false);
2820 case Instruction::AShr:
2821 return SelectShift(I, /*IsLeftShift=*/false, /*IsArithmetic=*/true);
2822 case Instruction::BitCast:
2823 return SelectBitCast(I);
2826 // Silence warnings.
2827 (void)&CC_AArch64_DarwinPCS_VarArg;
2831 llvm::FastISel *AArch64::createFastISel(FunctionLoweringInfo &funcInfo,
2832 const TargetLibraryInfo *libInfo) {
2833 return new AArch64FastISel(funcInfo, libInfo);