1 //===-- ARM64ISelDAGToDAG.cpp - A dag to dag inst selector for ARM64 ------===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This file defines an instruction selector for the ARM64 target.
12 //===----------------------------------------------------------------------===//
14 #define DEBUG_TYPE "arm64-isel"
15 #include "ARM64TargetMachine.h"
16 #include "MCTargetDesc/ARM64AddressingModes.h"
17 #include "llvm/ADT/APSInt.h"
18 #include "llvm/CodeGen/SelectionDAGISel.h"
19 #include "llvm/IR/Function.h" // To access function attributes.
20 #include "llvm/IR/GlobalValue.h"
21 #include "llvm/IR/Intrinsics.h"
22 #include "llvm/Support/Debug.h"
23 #include "llvm/Support/ErrorHandling.h"
24 #include "llvm/Support/MathExtras.h"
25 #include "llvm/Support/raw_ostream.h"
29 //===--------------------------------------------------------------------===//
30 /// ARM64DAGToDAGISel - ARM64 specific code to select ARM64 machine
31 /// instructions for SelectionDAG operations.
35 class ARM64DAGToDAGISel : public SelectionDAGISel {
36 ARM64TargetMachine &TM;
38 /// Subtarget - Keep a pointer to the ARM64Subtarget around so that we can
39 /// make the right decision when generating code for different targets.
40 const ARM64Subtarget *Subtarget;
45 explicit ARM64DAGToDAGISel(ARM64TargetMachine &tm, CodeGenOpt::Level OptLevel)
46 : SelectionDAGISel(tm, OptLevel), TM(tm),
47 Subtarget(&TM.getSubtarget<ARM64Subtarget>()), ForCodeSize(false) {}
49 virtual const char *getPassName() const {
50 return "ARM64 Instruction Selection";
53 virtual bool runOnMachineFunction(MachineFunction &MF) {
54 AttributeSet FnAttrs = MF.getFunction()->getAttributes();
56 FnAttrs.hasAttribute(AttributeSet::FunctionIndex,
57 Attribute::OptimizeForSize) ||
58 FnAttrs.hasAttribute(AttributeSet::FunctionIndex, Attribute::MinSize);
59 return SelectionDAGISel::runOnMachineFunction(MF);
62 SDNode *Select(SDNode *Node);
64 /// SelectInlineAsmMemoryOperand - Implement addressing mode selection for
65 /// inline asm expressions.
66 virtual bool SelectInlineAsmMemoryOperand(const SDValue &Op,
68 std::vector<SDValue> &OutOps);
70 SDNode *SelectMLAV64LaneV128(SDNode *N);
71 SDNode *SelectMULLV64LaneV128(unsigned IntNo, SDNode *N);
72 bool SelectArithExtendedRegister(SDValue N, SDValue &Reg, SDValue &Shift);
73 bool SelectArithImmed(SDValue N, SDValue &Val, SDValue &Shift);
74 bool SelectNegArithImmed(SDValue N, SDValue &Val, SDValue &Shift);
75 bool SelectArithShiftedRegister(SDValue N, SDValue &Reg, SDValue &Shift) {
76 return SelectShiftedRegister(N, false, Reg, Shift);
78 bool SelectLogicalShiftedRegister(SDValue N, SDValue &Reg, SDValue &Shift) {
79 return SelectShiftedRegister(N, true, Reg, Shift);
81 bool SelectAddrModeIndexed8(SDValue N, SDValue &Base, SDValue &OffImm) {
82 return SelectAddrModeIndexed(N, 1, Base, OffImm);
84 bool SelectAddrModeIndexed16(SDValue N, SDValue &Base, SDValue &OffImm) {
85 return SelectAddrModeIndexed(N, 2, Base, OffImm);
87 bool SelectAddrModeIndexed32(SDValue N, SDValue &Base, SDValue &OffImm) {
88 return SelectAddrModeIndexed(N, 4, Base, OffImm);
90 bool SelectAddrModeIndexed64(SDValue N, SDValue &Base, SDValue &OffImm) {
91 return SelectAddrModeIndexed(N, 8, Base, OffImm);
93 bool SelectAddrModeIndexed128(SDValue N, SDValue &Base, SDValue &OffImm) {
94 return SelectAddrModeIndexed(N, 16, Base, OffImm);
96 bool SelectAddrModeUnscaled8(SDValue N, SDValue &Base, SDValue &OffImm) {
97 return SelectAddrModeUnscaled(N, 1, Base, OffImm);
99 bool SelectAddrModeUnscaled16(SDValue N, SDValue &Base, SDValue &OffImm) {
100 return SelectAddrModeUnscaled(N, 2, Base, OffImm);
102 bool SelectAddrModeUnscaled32(SDValue N, SDValue &Base, SDValue &OffImm) {
103 return SelectAddrModeUnscaled(N, 4, Base, OffImm);
105 bool SelectAddrModeUnscaled64(SDValue N, SDValue &Base, SDValue &OffImm) {
106 return SelectAddrModeUnscaled(N, 8, Base, OffImm);
108 bool SelectAddrModeUnscaled128(SDValue N, SDValue &Base, SDValue &OffImm) {
109 return SelectAddrModeUnscaled(N, 16, Base, OffImm);
112 bool SelectAddrModeRO8(SDValue N, SDValue &Base, SDValue &Offset,
114 return SelectAddrModeRO(N, 1, Base, Offset, Imm);
116 bool SelectAddrModeRO16(SDValue N, SDValue &Base, SDValue &Offset,
118 return SelectAddrModeRO(N, 2, Base, Offset, Imm);
120 bool SelectAddrModeRO32(SDValue N, SDValue &Base, SDValue &Offset,
122 return SelectAddrModeRO(N, 4, Base, Offset, Imm);
124 bool SelectAddrModeRO64(SDValue N, SDValue &Base, SDValue &Offset,
126 return SelectAddrModeRO(N, 8, Base, Offset, Imm);
128 bool SelectAddrModeRO128(SDValue N, SDValue &Base, SDValue &Offset,
130 return SelectAddrModeRO(N, 16, Base, Offset, Imm);
132 bool SelectAddrModeNoIndex(SDValue N, SDValue &Val);
134 /// Form sequences of consecutive 64/128-bit registers for use in NEON
135 /// instructions making use of a vector-list (e.g. ldN, tbl). Vecs must have
136 /// between 1 and 4 elements. If it contains a single element that is returned
137 /// unchanged; otherwise a REG_SEQUENCE value is returned.
138 SDValue createDTuple(ArrayRef<SDValue> Vecs);
139 SDValue createQTuple(ArrayRef<SDValue> Vecs);
141 /// Generic helper for the createDTuple/createQTuple
142 /// functions. Those should almost always be called instead.
143 SDValue createTuple(ArrayRef<SDValue> Vecs, unsigned RegClassIDs[],
146 SDNode *SelectTable(SDNode *N, unsigned NumVecs, unsigned Opc, bool isExt);
148 SDNode *SelectIndexedLoad(SDNode *N, bool &Done);
150 SDNode *SelectLoad(SDNode *N, unsigned NumVecs, unsigned Opc,
152 SDNode *SelectLoadLane(SDNode *N, unsigned NumVecs, unsigned Opc);
154 SDNode *SelectStore(SDNode *N, unsigned NumVecs, unsigned Opc);
155 SDNode *SelectStoreLane(SDNode *N, unsigned NumVecs, unsigned Opc);
157 SDNode *SelectSIMDAddSubNarrowing(unsigned IntNo, SDNode *Node);
158 SDNode *SelectSIMDXtnNarrowing(unsigned IntNo, SDNode *Node);
160 SDNode *SelectBitfieldExtractOp(SDNode *N);
161 SDNode *SelectBitfieldInsertOp(SDNode *N);
163 SDNode *SelectLIBM(SDNode *N);
165 // Include the pieces autogenerated from the target description.
166 #include "ARM64GenDAGISel.inc"
169 bool SelectShiftedRegister(SDValue N, bool AllowROR, SDValue &Reg,
171 bool SelectAddrModeIndexed(SDValue N, unsigned Size, SDValue &Base,
173 bool SelectAddrModeUnscaled(SDValue N, unsigned Size, SDValue &Base,
175 bool SelectAddrModeRO(SDValue N, unsigned Size, SDValue &Base,
176 SDValue &Offset, SDValue &Imm);
177 bool isWorthFolding(SDValue V) const;
178 bool SelectExtendedSHL(SDValue N, unsigned Size, SDValue &Offset,
181 template<unsigned RegWidth>
182 bool SelectCVTFixedPosOperand(SDValue N, SDValue &FixedPos) {
183 return SelectCVTFixedPosOperand(N, FixedPos, RegWidth);
186 bool SelectCVTFixedPosOperand(SDValue N, SDValue &FixedPos, unsigned Width);
188 } // end anonymous namespace
190 /// isIntImmediate - This method tests to see if the node is a constant
191 /// operand. If so Imm will receive the 32-bit value.
192 static bool isIntImmediate(const SDNode *N, uint64_t &Imm) {
193 if (const ConstantSDNode *C = dyn_cast<const ConstantSDNode>(N)) {
194 Imm = C->getZExtValue();
200 // isIntImmediate - This method tests to see if a constant operand.
201 // If so Imm will receive the value.
202 static bool isIntImmediate(SDValue N, uint64_t &Imm) {
203 return isIntImmediate(N.getNode(), Imm);
206 // isOpcWithIntImmediate - This method tests to see if the node is a specific
207 // opcode and that it has a immediate integer right operand.
208 // If so Imm will receive the 32 bit value.
209 static bool isOpcWithIntImmediate(const SDNode *N, unsigned Opc,
211 return N->getOpcode() == Opc &&
212 isIntImmediate(N->getOperand(1).getNode(), Imm);
215 bool ARM64DAGToDAGISel::SelectAddrModeNoIndex(SDValue N, SDValue &Val) {
216 EVT ValTy = N.getValueType();
217 if (ValTy != MVT::i64)
223 bool ARM64DAGToDAGISel::SelectInlineAsmMemoryOperand(
224 const SDValue &Op, char ConstraintCode, std::vector<SDValue> &OutOps) {
225 assert(ConstraintCode == 'm' && "unexpected asm memory constraint");
226 // Require the address to be in a register. That is safe for all ARM64
227 // variants and it is hard to do anything much smarter without knowing
228 // how the operand is used.
229 OutOps.push_back(Op);
233 /// SelectArithImmed - Select an immediate value that can be represented as
234 /// a 12-bit value shifted left by either 0 or 12. If so, return true with
235 /// Val set to the 12-bit value and Shift set to the shifter operand.
236 bool ARM64DAGToDAGISel::SelectArithImmed(SDValue N, SDValue &Val,
238 // This function is called from the addsub_shifted_imm ComplexPattern,
239 // which lists [imm] as the list of opcode it's interested in, however
240 // we still need to check whether the operand is actually an immediate
241 // here because the ComplexPattern opcode list is only used in
242 // root-level opcode matching.
243 if (!isa<ConstantSDNode>(N.getNode()))
246 uint64_t Immed = cast<ConstantSDNode>(N.getNode())->getZExtValue();
249 if (Immed >> 12 == 0) {
251 } else if ((Immed & 0xfff) == 0 && Immed >> 24 == 0) {
257 unsigned ShVal = ARM64_AM::getShifterImm(ARM64_AM::LSL, ShiftAmt);
258 Val = CurDAG->getTargetConstant(Immed, MVT::i32);
259 Shift = CurDAG->getTargetConstant(ShVal, MVT::i32);
263 /// SelectNegArithImmed - As above, but negates the value before trying to
265 bool ARM64DAGToDAGISel::SelectNegArithImmed(SDValue N, SDValue &Val,
267 // This function is called from the addsub_shifted_imm ComplexPattern,
268 // which lists [imm] as the list of opcode it's interested in, however
269 // we still need to check whether the operand is actually an immediate
270 // here because the ComplexPattern opcode list is only used in
271 // root-level opcode matching.
272 if (!isa<ConstantSDNode>(N.getNode()))
275 // The immediate operand must be a 24-bit zero-extended immediate.
276 uint64_t Immed = cast<ConstantSDNode>(N.getNode())->getZExtValue();
278 // This negation is almost always valid, but "cmp wN, #0" and "cmn wN, #0"
279 // have the opposite effect on the C flag, so this pattern mustn't match under
280 // those circumstances.
284 if (N.getValueType() == MVT::i32)
285 Immed = ~((uint32_t)Immed) + 1;
287 Immed = ~Immed + 1ULL;
288 if (Immed & 0xFFFFFFFFFF000000ULL)
291 Immed &= 0xFFFFFFULL;
292 return SelectArithImmed(CurDAG->getConstant(Immed, MVT::i32), Val, Shift);
295 /// getShiftTypeForNode - Translate a shift node to the corresponding
297 static ARM64_AM::ShiftType getShiftTypeForNode(SDValue N) {
298 switch (N.getOpcode()) {
300 return ARM64_AM::InvalidShift;
302 return ARM64_AM::LSL;
304 return ARM64_AM::LSR;
306 return ARM64_AM::ASR;
308 return ARM64_AM::ROR;
312 /// \brief Determine wether it is worth to fold V into an extended register.
313 bool ARM64DAGToDAGISel::isWorthFolding(SDValue V) const {
314 // it hurts if the a value is used at least twice, unless we are optimizing
316 if (ForCodeSize || V.hasOneUse())
321 /// SelectShiftedRegister - Select a "shifted register" operand. If the value
322 /// is not shifted, set the Shift operand to default of "LSL 0". The logical
323 /// instructions allow the shifted register to be rotated, but the arithmetic
324 /// instructions do not. The AllowROR parameter specifies whether ROR is
326 bool ARM64DAGToDAGISel::SelectShiftedRegister(SDValue N, bool AllowROR,
327 SDValue &Reg, SDValue &Shift) {
328 ARM64_AM::ShiftType ShType = getShiftTypeForNode(N);
329 if (ShType == ARM64_AM::InvalidShift)
331 if (!AllowROR && ShType == ARM64_AM::ROR)
334 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
335 unsigned BitSize = N.getValueType().getSizeInBits();
336 unsigned Val = RHS->getZExtValue() & (BitSize - 1);
337 unsigned ShVal = ARM64_AM::getShifterImm(ShType, Val);
339 Reg = N.getOperand(0);
340 Shift = CurDAG->getTargetConstant(ShVal, MVT::i32);
341 return isWorthFolding(N);
347 /// getExtendTypeForNode - Translate an extend node to the corresponding
348 /// ExtendType value.
349 static ARM64_AM::ExtendType getExtendTypeForNode(SDValue N,
350 bool IsLoadStore = false) {
351 if (N.getOpcode() == ISD::SIGN_EXTEND ||
352 N.getOpcode() == ISD::SIGN_EXTEND_INREG) {
354 if (N.getOpcode() == ISD::SIGN_EXTEND_INREG)
355 SrcVT = cast<VTSDNode>(N.getOperand(1))->getVT();
357 SrcVT = N.getOperand(0).getValueType();
359 if (!IsLoadStore && SrcVT == MVT::i8)
360 return ARM64_AM::SXTB;
361 else if (!IsLoadStore && SrcVT == MVT::i16)
362 return ARM64_AM::SXTH;
363 else if (SrcVT == MVT::i32)
364 return ARM64_AM::SXTW;
365 else if (SrcVT == MVT::i64)
366 return ARM64_AM::SXTX;
368 return ARM64_AM::InvalidExtend;
369 } else if (N.getOpcode() == ISD::ZERO_EXTEND ||
370 N.getOpcode() == ISD::ANY_EXTEND) {
371 EVT SrcVT = N.getOperand(0).getValueType();
372 if (!IsLoadStore && SrcVT == MVT::i8)
373 return ARM64_AM::UXTB;
374 else if (!IsLoadStore && SrcVT == MVT::i16)
375 return ARM64_AM::UXTH;
376 else if (SrcVT == MVT::i32)
377 return ARM64_AM::UXTW;
378 else if (SrcVT == MVT::i64)
379 return ARM64_AM::UXTX;
381 return ARM64_AM::InvalidExtend;
382 } else if (N.getOpcode() == ISD::AND) {
383 ConstantSDNode *CSD = dyn_cast<ConstantSDNode>(N.getOperand(1));
385 return ARM64_AM::InvalidExtend;
386 uint64_t AndMask = CSD->getZExtValue();
390 return ARM64_AM::InvalidExtend;
392 return !IsLoadStore ? ARM64_AM::UXTB : ARM64_AM::InvalidExtend;
394 return !IsLoadStore ? ARM64_AM::UXTH : ARM64_AM::InvalidExtend;
396 return ARM64_AM::UXTW;
400 return ARM64_AM::InvalidExtend;
403 // Helper for SelectMLAV64LaneV128 - Recognize high lane extracts.
404 static bool checkHighLaneIndex(SDNode *DL, SDValue &LaneOp, int &LaneIdx) {
405 if (DL->getOpcode() != ARM64ISD::DUPLANE16 &&
406 DL->getOpcode() != ARM64ISD::DUPLANE32)
409 SDValue SV = DL->getOperand(0);
410 if (SV.getOpcode() != ISD::INSERT_SUBVECTOR)
413 SDValue EV = SV.getOperand(1);
414 if (EV.getOpcode() != ISD::EXTRACT_SUBVECTOR)
417 ConstantSDNode *DLidx = cast<ConstantSDNode>(DL->getOperand(1).getNode());
418 ConstantSDNode *EVidx = cast<ConstantSDNode>(EV.getOperand(1).getNode());
419 LaneIdx = DLidx->getSExtValue() + EVidx->getSExtValue();
420 LaneOp = EV.getOperand(0);
425 // Helper for SelectOpcV64LaneV128 - Recogzine operatinos where one operand is a
426 // high lane extract.
427 static bool checkV64LaneV128(SDValue Op0, SDValue Op1, SDValue &StdOp,
428 SDValue &LaneOp, int &LaneIdx) {
430 if (!checkHighLaneIndex(Op0.getNode(), LaneOp, LaneIdx)) {
432 if (!checkHighLaneIndex(Op0.getNode(), LaneOp, LaneIdx))
439 /// SelectMLAV64LaneV128 - ARM64 supports vector MLAs where one multiplicand is
440 /// a lane in the upper half of a 128-bit vector. Recognize and select this so
441 /// that we don't emit unnecessary lane extracts.
442 SDNode *ARM64DAGToDAGISel::SelectMLAV64LaneV128(SDNode *N) {
443 SDValue Op0 = N->getOperand(0);
444 SDValue Op1 = N->getOperand(1);
445 SDValue MLAOp1; // Will hold ordinary multiplicand for MLA.
446 SDValue MLAOp2; // Will hold lane-accessed multiplicand for MLA.
447 int LaneIdx = -1; // Will hold the lane index.
449 if (Op1.getOpcode() != ISD::MUL ||
450 !checkV64LaneV128(Op1.getOperand(0), Op1.getOperand(1), MLAOp1, MLAOp2,
453 if (Op1.getOpcode() != ISD::MUL ||
454 !checkV64LaneV128(Op1.getOperand(0), Op1.getOperand(1), MLAOp1, MLAOp2,
459 SDValue LaneIdxVal = CurDAG->getTargetConstant(LaneIdx, MVT::i64);
461 SDValue Ops[] = { Op0, MLAOp1, MLAOp2, LaneIdxVal };
463 unsigned MLAOpc = ~0U;
465 switch (N->getSimpleValueType(0).SimpleTy) {
467 llvm_unreachable("Unrecognized MLA.");
469 MLAOpc = ARM64::MLAv4i16_indexed;
472 MLAOpc = ARM64::MLAv8i16_indexed;
475 MLAOpc = ARM64::MLAv2i32_indexed;
478 MLAOpc = ARM64::MLAv4i32_indexed;
482 return CurDAG->getMachineNode(MLAOpc, SDLoc(N), N->getValueType(0), Ops);
485 SDNode *ARM64DAGToDAGISel::SelectMULLV64LaneV128(unsigned IntNo, SDNode *N) {
490 if (!checkV64LaneV128(N->getOperand(1), N->getOperand(2), SMULLOp0, SMULLOp1,
494 SDValue LaneIdxVal = CurDAG->getTargetConstant(LaneIdx, MVT::i64);
496 SDValue Ops[] = { SMULLOp0, SMULLOp1, LaneIdxVal };
498 unsigned SMULLOpc = ~0U;
500 if (IntNo == Intrinsic::arm64_neon_smull) {
501 switch (N->getSimpleValueType(0).SimpleTy) {
503 llvm_unreachable("Unrecognized SMULL.");
505 SMULLOpc = ARM64::SMULLv4i16_indexed;
508 SMULLOpc = ARM64::SMULLv2i32_indexed;
511 } else if (IntNo == Intrinsic::arm64_neon_umull) {
512 switch (N->getSimpleValueType(0).SimpleTy) {
514 llvm_unreachable("Unrecognized SMULL.");
516 SMULLOpc = ARM64::UMULLv4i16_indexed;
519 SMULLOpc = ARM64::UMULLv2i32_indexed;
523 llvm_unreachable("Unrecognized intrinsic.");
525 return CurDAG->getMachineNode(SMULLOpc, SDLoc(N), N->getValueType(0), Ops);
528 /// SelectArithExtendedRegister - Select a "extended register" operand. This
529 /// operand folds in an extend followed by an optional left shift.
530 bool ARM64DAGToDAGISel::SelectArithExtendedRegister(SDValue N, SDValue &Reg,
532 unsigned ShiftVal = 0;
533 ARM64_AM::ExtendType Ext;
535 if (N.getOpcode() == ISD::SHL) {
536 ConstantSDNode *CSD = dyn_cast<ConstantSDNode>(N.getOperand(1));
539 ShiftVal = CSD->getZExtValue();
543 Ext = getExtendTypeForNode(N.getOperand(0));
544 if (Ext == ARM64_AM::InvalidExtend)
547 Reg = N.getOperand(0).getOperand(0);
549 Ext = getExtendTypeForNode(N);
550 if (Ext == ARM64_AM::InvalidExtend)
553 Reg = N.getOperand(0);
556 // ARM64 mandates that the RHS of the operation must use the smallest
557 // register classs that could contain the size being extended from. Thus,
558 // if we're folding a (sext i8), we need the RHS to be a GPR32, even though
559 // there might not be an actual 32-bit value in the program. We can
560 // (harmlessly) synthesize one by injected an EXTRACT_SUBREG here.
561 if (Reg.getValueType() == MVT::i64 && Ext != ARM64_AM::UXTX &&
562 Ext != ARM64_AM::SXTX) {
563 SDValue SubReg = CurDAG->getTargetConstant(ARM64::sub_32, MVT::i32);
564 MachineSDNode *Node = CurDAG->getMachineNode(
565 TargetOpcode::EXTRACT_SUBREG, SDLoc(N), MVT::i32, Reg, SubReg);
566 Reg = SDValue(Node, 0);
569 Shift = CurDAG->getTargetConstant(getArithExtendImm(Ext, ShiftVal), MVT::i32);
570 return isWorthFolding(N);
573 /// SelectAddrModeIndexed - Select a "register plus scaled unsigned 12-bit
574 /// immediate" address. The "Size" argument is the size in bytes of the memory
575 /// reference, which determines the scale.
576 bool ARM64DAGToDAGISel::SelectAddrModeIndexed(SDValue N, unsigned Size,
577 SDValue &Base, SDValue &OffImm) {
578 const TargetLowering *TLI = getTargetLowering();
579 if (N.getOpcode() == ISD::FrameIndex) {
580 int FI = cast<FrameIndexSDNode>(N)->getIndex();
581 Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy());
582 OffImm = CurDAG->getTargetConstant(0, MVT::i64);
586 if (N.getOpcode() == ARM64ISD::ADDlow) {
587 GlobalAddressSDNode *GAN =
588 dyn_cast<GlobalAddressSDNode>(N.getOperand(1).getNode());
589 Base = N.getOperand(0);
590 OffImm = N.getOperand(1);
594 const GlobalValue *GV = GAN->getGlobal();
595 unsigned Alignment = GV->getAlignment();
596 const DataLayout *DL = TLI->getDataLayout();
597 if (Alignment == 0 && !Subtarget->isTargetDarwin())
598 Alignment = DL->getABITypeAlignment(GV->getType()->getElementType());
600 if (Alignment >= Size)
604 if (CurDAG->isBaseWithConstantOffset(N)) {
605 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
606 int64_t RHSC = (int64_t)RHS->getZExtValue();
607 unsigned Scale = Log2_32(Size);
608 if ((RHSC & (Size - 1)) == 0 && RHSC >= 0 && RHSC < (0x1000 << Scale)) {
609 Base = N.getOperand(0);
610 if (Base.getOpcode() == ISD::FrameIndex) {
611 int FI = cast<FrameIndexSDNode>(Base)->getIndex();
612 Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy());
614 OffImm = CurDAG->getTargetConstant(RHSC >> Scale, MVT::i64);
620 // Before falling back to our general case, check if the unscaled
621 // instructions can handle this. If so, that's preferable.
622 if (SelectAddrModeUnscaled(N, Size, Base, OffImm))
625 // Base only. The address will be materialized into a register before
626 // the memory is accessed.
627 // add x0, Xbase, #offset
630 OffImm = CurDAG->getTargetConstant(0, MVT::i64);
634 /// SelectAddrModeUnscaled - Select a "register plus unscaled signed 9-bit
635 /// immediate" address. This should only match when there is an offset that
636 /// is not valid for a scaled immediate addressing mode. The "Size" argument
637 /// is the size in bytes of the memory reference, which is needed here to know
638 /// what is valid for a scaled immediate.
639 bool ARM64DAGToDAGISel::SelectAddrModeUnscaled(SDValue N, unsigned Size,
640 SDValue &Base, SDValue &OffImm) {
641 if (!CurDAG->isBaseWithConstantOffset(N))
643 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
644 int64_t RHSC = RHS->getSExtValue();
645 // If the offset is valid as a scaled immediate, don't match here.
646 if ((RHSC & (Size - 1)) == 0 && RHSC >= 0 &&
647 RHSC < (0x1000 << Log2_32(Size)))
649 if (RHSC >= -256 && RHSC < 256) {
650 Base = N.getOperand(0);
651 if (Base.getOpcode() == ISD::FrameIndex) {
652 int FI = cast<FrameIndexSDNode>(Base)->getIndex();
653 const TargetLowering *TLI = getTargetLowering();
654 Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy());
656 OffImm = CurDAG->getTargetConstant(RHSC, MVT::i64);
663 static SDValue Widen(SelectionDAG *CurDAG, SDValue N) {
664 SDValue SubReg = CurDAG->getTargetConstant(ARM64::sub_32, MVT::i32);
665 SDValue ImpDef = SDValue(
666 CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, SDLoc(N), MVT::i64),
668 MachineSDNode *Node = CurDAG->getMachineNode(
669 TargetOpcode::INSERT_SUBREG, SDLoc(N), MVT::i64, ImpDef, N, SubReg);
670 return SDValue(Node, 0);
673 static SDValue WidenIfNeeded(SelectionDAG *CurDAG, SDValue N) {
674 if (N.getValueType() == MVT::i32) {
675 return Widen(CurDAG, N);
681 /// \brief Check if the given SHL node (\p N), can be used to form an
682 /// extended register for an addressing mode.
683 bool ARM64DAGToDAGISel::SelectExtendedSHL(SDValue N, unsigned Size,
684 SDValue &Offset, SDValue &Imm) {
685 assert(N.getOpcode() == ISD::SHL && "Invalid opcode.");
686 ConstantSDNode *CSD = dyn_cast<ConstantSDNode>(N.getOperand(1));
687 if (CSD && (CSD->getZExtValue() & 0x7) == CSD->getZExtValue()) {
689 ARM64_AM::ExtendType Ext = getExtendTypeForNode(N.getOperand(0), true);
690 if (Ext == ARM64_AM::InvalidExtend) {
691 Ext = ARM64_AM::UXTX;
692 Offset = WidenIfNeeded(CurDAG, N.getOperand(0));
694 Offset = WidenIfNeeded(CurDAG, N.getOperand(0).getOperand(0));
697 unsigned LegalShiftVal = Log2_32(Size);
698 unsigned ShiftVal = CSD->getZExtValue();
700 if (ShiftVal != 0 && ShiftVal != LegalShiftVal)
703 Imm = CurDAG->getTargetConstant(
704 ARM64_AM::getMemExtendImm(Ext, ShiftVal != 0), MVT::i32);
705 if (isWorthFolding(N))
711 bool ARM64DAGToDAGISel::SelectAddrModeRO(SDValue N, unsigned Size,
712 SDValue &Base, SDValue &Offset,
714 if (N.getOpcode() != ISD::ADD)
716 SDValue LHS = N.getOperand(0);
717 SDValue RHS = N.getOperand(1);
719 // We don't want to match immediate adds here, because they are better lowered
720 // to the register-immediate addressing modes.
721 if (isa<ConstantSDNode>(LHS) || isa<ConstantSDNode>(RHS))
724 // Check if this particular node is reused in any non-memory related
725 // operation. If yes, do not try to fold this node into the address
726 // computation, since the computation will be kept.
727 const SDNode *Node = N.getNode();
728 for (SDNode *UI : Node->uses()) {
729 if (!isa<MemSDNode>(*UI))
733 // Remember if it is worth folding N when it produces extended register.
734 bool IsExtendedRegisterWorthFolding = isWorthFolding(N);
736 // Try to match a shifted extend on the RHS.
737 if (IsExtendedRegisterWorthFolding && RHS.getOpcode() == ISD::SHL &&
738 SelectExtendedSHL(RHS, Size, Offset, Imm)) {
743 // Try to match a shifted extend on the LHS.
744 if (IsExtendedRegisterWorthFolding && LHS.getOpcode() == ISD::SHL &&
745 SelectExtendedSHL(LHS, Size, Offset, Imm)) {
750 ARM64_AM::ExtendType Ext = ARM64_AM::UXTX;
751 // Try to match an unshifted extend on the LHS.
752 if (IsExtendedRegisterWorthFolding &&
753 (Ext = getExtendTypeForNode(LHS, true)) != ARM64_AM::InvalidExtend) {
755 Offset = WidenIfNeeded(CurDAG, LHS.getOperand(0));
756 Imm = CurDAG->getTargetConstant(ARM64_AM::getMemExtendImm(Ext, false),
758 if (isWorthFolding(LHS))
762 // Try to match an unshifted extend on the RHS.
763 if (IsExtendedRegisterWorthFolding &&
764 (Ext = getExtendTypeForNode(RHS, true)) != ARM64_AM::InvalidExtend) {
766 Offset = WidenIfNeeded(CurDAG, RHS.getOperand(0));
767 Imm = CurDAG->getTargetConstant(ARM64_AM::getMemExtendImm(Ext, false),
769 if (isWorthFolding(RHS))
773 // Match any non-shifted, non-extend, non-immediate add expression.
775 Offset = WidenIfNeeded(CurDAG, RHS);
776 Ext = ARM64_AM::UXTX;
777 Imm = CurDAG->getTargetConstant(ARM64_AM::getMemExtendImm(Ext, false),
779 // Reg1 + Reg2 is free: no check needed.
783 SDValue ARM64DAGToDAGISel::createDTuple(ArrayRef<SDValue> Regs) {
784 static unsigned RegClassIDs[] = { ARM64::DDRegClassID, ARM64::DDDRegClassID,
785 ARM64::DDDDRegClassID };
786 static unsigned SubRegs[] = { ARM64::dsub0, ARM64::dsub1,
787 ARM64::dsub2, ARM64::dsub3 };
789 return createTuple(Regs, RegClassIDs, SubRegs);
792 SDValue ARM64DAGToDAGISel::createQTuple(ArrayRef<SDValue> Regs) {
793 static unsigned RegClassIDs[] = { ARM64::QQRegClassID, ARM64::QQQRegClassID,
794 ARM64::QQQQRegClassID };
795 static unsigned SubRegs[] = { ARM64::qsub0, ARM64::qsub1,
796 ARM64::qsub2, ARM64::qsub3 };
798 return createTuple(Regs, RegClassIDs, SubRegs);
801 SDValue ARM64DAGToDAGISel::createTuple(ArrayRef<SDValue> Regs,
802 unsigned RegClassIDs[],
803 unsigned SubRegs[]) {
804 // There's no special register-class for a vector-list of 1 element: it's just
806 if (Regs.size() == 1)
809 assert(Regs.size() >= 2 && Regs.size() <= 4);
811 SDLoc DL(Regs[0].getNode());
813 SmallVector<SDValue, 4> Ops;
815 // First operand of REG_SEQUENCE is the desired RegClass.
817 CurDAG->getTargetConstant(RegClassIDs[Regs.size() - 2], MVT::i32));
819 // Then we get pairs of source & subregister-position for the components.
820 for (unsigned i = 0; i < Regs.size(); ++i) {
821 Ops.push_back(Regs[i]);
822 Ops.push_back(CurDAG->getTargetConstant(SubRegs[i], MVT::i32));
826 CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, DL, MVT::Untyped, Ops);
827 return SDValue(N, 0);
830 SDNode *ARM64DAGToDAGISel::SelectTable(SDNode *N, unsigned NumVecs,
831 unsigned Opc, bool isExt) {
833 EVT VT = N->getValueType(0);
835 unsigned ExtOff = isExt;
837 // Form a REG_SEQUENCE to force register allocation.
838 unsigned Vec0Off = ExtOff + 1;
839 SmallVector<SDValue, 4> Regs(N->op_begin() + Vec0Off,
840 N->op_begin() + Vec0Off + NumVecs);
841 SDValue RegSeq = createQTuple(Regs);
843 SmallVector<SDValue, 6> Ops;
845 Ops.push_back(N->getOperand(1));
846 Ops.push_back(RegSeq);
847 Ops.push_back(N->getOperand(NumVecs + ExtOff + 1));
848 return CurDAG->getMachineNode(Opc, dl, VT, Ops);
851 SDNode *ARM64DAGToDAGISel::SelectIndexedLoad(SDNode *N, bool &Done) {
852 LoadSDNode *LD = cast<LoadSDNode>(N);
853 if (LD->isUnindexed())
855 EVT VT = LD->getMemoryVT();
856 EVT DstVT = N->getValueType(0);
857 ISD::MemIndexedMode AM = LD->getAddressingMode();
858 bool IsPre = AM == ISD::PRE_INC || AM == ISD::PRE_DEC;
860 // We're not doing validity checking here. That was done when checking
861 // if we should mark the load as indexed or not. We're just selecting
862 // the right instruction.
865 ISD::LoadExtType ExtType = LD->getExtensionType();
866 bool InsertTo64 = false;
868 Opcode = IsPre ? ARM64::LDRXpre_isel : ARM64::LDRXpost_isel;
869 else if (VT == MVT::i32) {
870 if (ExtType == ISD::NON_EXTLOAD)
871 Opcode = IsPre ? ARM64::LDRWpre_isel : ARM64::LDRWpost_isel;
872 else if (ExtType == ISD::SEXTLOAD)
873 Opcode = IsPre ? ARM64::LDRSWpre_isel : ARM64::LDRSWpost_isel;
875 Opcode = IsPre ? ARM64::LDRWpre_isel : ARM64::LDRWpost_isel;
877 // The result of the load is only i32. It's the subreg_to_reg that makes
881 } else if (VT == MVT::i16) {
882 if (ExtType == ISD::SEXTLOAD) {
883 if (DstVT == MVT::i64)
884 Opcode = IsPre ? ARM64::LDRSHXpre_isel : ARM64::LDRSHXpost_isel;
886 Opcode = IsPre ? ARM64::LDRSHWpre_isel : ARM64::LDRSHWpost_isel;
888 Opcode = IsPre ? ARM64::LDRHHpre_isel : ARM64::LDRHHpost_isel;
889 InsertTo64 = DstVT == MVT::i64;
890 // The result of the load is only i32. It's the subreg_to_reg that makes
894 } else if (VT == MVT::i8) {
895 if (ExtType == ISD::SEXTLOAD) {
896 if (DstVT == MVT::i64)
897 Opcode = IsPre ? ARM64::LDRSBXpre_isel : ARM64::LDRSBXpost_isel;
899 Opcode = IsPre ? ARM64::LDRSBWpre_isel : ARM64::LDRSBWpost_isel;
901 Opcode = IsPre ? ARM64::LDRBBpre_isel : ARM64::LDRBBpost_isel;
902 InsertTo64 = DstVT == MVT::i64;
903 // The result of the load is only i32. It's the subreg_to_reg that makes
907 } else if (VT == MVT::f32) {
908 Opcode = IsPre ? ARM64::LDRSpre_isel : ARM64::LDRSpost_isel;
909 } else if (VT == MVT::f64) {
910 Opcode = IsPre ? ARM64::LDRDpre_isel : ARM64::LDRDpost_isel;
913 SDValue Chain = LD->getChain();
914 SDValue Base = LD->getBasePtr();
915 ConstantSDNode *OffsetOp = cast<ConstantSDNode>(LD->getOffset());
916 int OffsetVal = (int)OffsetOp->getZExtValue();
917 SDValue Offset = CurDAG->getTargetConstant(OffsetVal, MVT::i64);
918 SDValue Ops[] = { Base, Offset, Chain };
919 SDNode *Res = CurDAG->getMachineNode(Opcode, SDLoc(N), DstVT, MVT::i64,
921 // Either way, we're replacing the node, so tell the caller that.
924 SDValue SubReg = CurDAG->getTargetConstant(ARM64::sub_32, MVT::i32);
925 SDNode *Sub = CurDAG->getMachineNode(
926 ARM64::SUBREG_TO_REG, SDLoc(N), MVT::i64,
927 CurDAG->getTargetConstant(0, MVT::i64), SDValue(Res, 0), SubReg);
928 ReplaceUses(SDValue(N, 0), SDValue(Sub, 0));
929 ReplaceUses(SDValue(N, 1), SDValue(Res, 1));
930 ReplaceUses(SDValue(N, 2), SDValue(Res, 2));
936 SDNode *ARM64DAGToDAGISel::SelectLoad(SDNode *N, unsigned NumVecs, unsigned Opc,
937 unsigned SubRegIdx) {
939 EVT VT = N->getValueType(0);
940 SDValue Chain = N->getOperand(0);
942 SmallVector<SDValue, 6> Ops;
943 Ops.push_back(N->getOperand(2)); // Mem operand;
944 Ops.push_back(Chain);
946 std::vector<EVT> ResTys;
947 ResTys.push_back(MVT::Untyped);
948 ResTys.push_back(MVT::Other);
950 SDNode *Ld = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
951 SDValue SuperReg = SDValue(Ld, 0);
953 // MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
954 // MemOp[0] = cast<MemIntrinsicSDNode>(N)->getMemOperand();
955 // cast<MachineSDNode>(Ld)->setMemRefs(MemOp, MemOp + 1);
959 ReplaceUses(SDValue(N, 3), CurDAG->getTargetExtractSubreg(SubRegIdx + 3, dl,
963 ReplaceUses(SDValue(N, 2), CurDAG->getTargetExtractSubreg(SubRegIdx + 2, dl,
967 ReplaceUses(SDValue(N, 1), CurDAG->getTargetExtractSubreg(SubRegIdx + 1, dl,
969 ReplaceUses(SDValue(N, 0),
970 CurDAG->getTargetExtractSubreg(SubRegIdx, dl, VT, SuperReg));
973 ReplaceUses(SDValue(N, 0), SuperReg);
977 ReplaceUses(SDValue(N, NumVecs), SDValue(Ld, 1));
982 SDNode *ARM64DAGToDAGISel::SelectStore(SDNode *N, unsigned NumVecs,
985 EVT VT = N->getOperand(2)->getValueType(0);
987 // Form a REG_SEQUENCE to force register allocation.
988 bool Is128Bit = VT.getSizeInBits() == 128;
989 SmallVector<SDValue, 4> Regs(N->op_begin() + 2, N->op_begin() + 2 + NumVecs);
990 SDValue RegSeq = Is128Bit ? createQTuple(Regs) : createDTuple(Regs);
992 SmallVector<SDValue, 6> Ops;
993 Ops.push_back(RegSeq);
994 Ops.push_back(N->getOperand(NumVecs + 2));
995 Ops.push_back(N->getOperand(0));
996 SDNode *St = CurDAG->getMachineNode(Opc, dl, N->getValueType(0), Ops);
1001 /// WidenVector - Given a value in the V64 register class, produce the
1002 /// equivalent value in the V128 register class.
1007 WidenVector(SelectionDAG &DAG) : DAG(DAG) {}
1009 SDValue operator()(SDValue V64Reg) {
1010 EVT VT = V64Reg.getValueType();
1011 unsigned NarrowSize = VT.getVectorNumElements();
1012 MVT EltTy = VT.getVectorElementType().getSimpleVT();
1013 MVT WideTy = MVT::getVectorVT(EltTy, 2 * NarrowSize);
1017 SDValue(DAG.getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, WideTy), 0);
1018 return DAG.getTargetInsertSubreg(ARM64::dsub, DL, WideTy, Undef, V64Reg);
1022 /// NarrowVector - Given a value in the V128 register class, produce the
1023 /// equivalent value in the V64 register class.
1024 static SDValue NarrowVector(SDValue V128Reg, SelectionDAG &DAG) {
1025 EVT VT = V128Reg.getValueType();
1026 unsigned WideSize = VT.getVectorNumElements();
1027 MVT EltTy = VT.getVectorElementType().getSimpleVT();
1028 MVT NarrowTy = MVT::getVectorVT(EltTy, WideSize / 2);
1030 return DAG.getTargetExtractSubreg(ARM64::dsub, SDLoc(V128Reg), NarrowTy,
1034 SDNode *ARM64DAGToDAGISel::SelectLoadLane(SDNode *N, unsigned NumVecs,
1037 EVT VT = N->getValueType(0);
1038 bool Narrow = VT.getSizeInBits() == 64;
1040 // Form a REG_SEQUENCE to force register allocation.
1041 SmallVector<SDValue, 4> Regs(N->op_begin() + 2, N->op_begin() + 2 + NumVecs);
1044 std::transform(Regs.begin(), Regs.end(), Regs.begin(),
1045 WidenVector(*CurDAG));
1047 SDValue RegSeq = createQTuple(Regs);
1049 std::vector<EVT> ResTys;
1050 ResTys.push_back(MVT::Untyped);
1051 ResTys.push_back(MVT::Other);
1054 cast<ConstantSDNode>(N->getOperand(NumVecs + 2))->getZExtValue();
1056 SmallVector<SDValue, 6> Ops;
1057 Ops.push_back(RegSeq);
1058 Ops.push_back(CurDAG->getTargetConstant(LaneNo, MVT::i64));
1059 Ops.push_back(N->getOperand(NumVecs + 3));
1060 Ops.push_back(N->getOperand(0));
1061 SDNode *Ld = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
1062 SDValue SuperReg = SDValue(Ld, 0);
1064 EVT WideVT = RegSeq.getOperand(1)->getValueType(0);
1068 CurDAG->getTargetExtractSubreg(ARM64::qsub3, dl, WideVT, SuperReg);
1070 ReplaceUses(SDValue(N, 3), NarrowVector(NV3, *CurDAG));
1072 ReplaceUses(SDValue(N, 3), NV3);
1077 CurDAG->getTargetExtractSubreg(ARM64::qsub2, dl, WideVT, SuperReg);
1079 ReplaceUses(SDValue(N, 2), NarrowVector(NV2, *CurDAG));
1081 ReplaceUses(SDValue(N, 2), NV2);
1086 CurDAG->getTargetExtractSubreg(ARM64::qsub1, dl, WideVT, SuperReg);
1088 CurDAG->getTargetExtractSubreg(ARM64::qsub0, dl, WideVT, SuperReg);
1090 ReplaceUses(SDValue(N, 1), NarrowVector(NV1, *CurDAG));
1091 ReplaceUses(SDValue(N, 0), NarrowVector(NV0, *CurDAG));
1093 ReplaceUses(SDValue(N, 1), NV1);
1094 ReplaceUses(SDValue(N, 0), NV0);
1100 ReplaceUses(SDValue(N, NumVecs), SDValue(Ld, 1));
1105 SDNode *ARM64DAGToDAGISel::SelectStoreLane(SDNode *N, unsigned NumVecs,
1108 EVT VT = N->getOperand(2)->getValueType(0);
1109 bool Narrow = VT.getSizeInBits() == 64;
1111 // Form a REG_SEQUENCE to force register allocation.
1112 SmallVector<SDValue, 4> Regs(N->op_begin() + 2, N->op_begin() + 2 + NumVecs);
1115 std::transform(Regs.begin(), Regs.end(), Regs.begin(),
1116 WidenVector(*CurDAG));
1118 SDValue RegSeq = createQTuple(Regs);
1121 cast<ConstantSDNode>(N->getOperand(NumVecs + 2))->getZExtValue();
1123 SmallVector<SDValue, 6> Ops;
1124 Ops.push_back(RegSeq);
1125 Ops.push_back(CurDAG->getTargetConstant(LaneNo, MVT::i64));
1126 Ops.push_back(N->getOperand(NumVecs + 3));
1127 Ops.push_back(N->getOperand(0));
1128 SDNode *St = CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops);
1130 // Transfer memoperands.
1131 MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
1132 MemOp[0] = cast<MemIntrinsicSDNode>(N)->getMemOperand();
1133 cast<MachineSDNode>(St)->setMemRefs(MemOp, MemOp + 1);
1138 static bool isBitfieldExtractOpFromAnd(SelectionDAG *CurDAG, SDNode *N,
1139 unsigned &Opc, SDValue &Opd0,
1140 unsigned &LSB, unsigned &MSB,
1141 unsigned NumberOfIgnoredLowBits,
1142 bool BiggerPattern) {
1143 assert(N->getOpcode() == ISD::AND &&
1144 "N must be a AND operation to call this function");
1146 EVT VT = N->getValueType(0);
1148 // Here we can test the type of VT and return false when the type does not
1149 // match, but since it is done prior to that call in the current context
1150 // we turned that into an assert to avoid redundant code.
1151 assert((VT == MVT::i32 || VT == MVT::i64) &&
1152 "Type checking must have been done before calling this function");
1154 // FIXME: simplify-demanded-bits in DAGCombine will probably have
1155 // changed the AND node to a 32-bit mask operation. We'll have to
1156 // undo that as part of the transform here if we want to catch all
1157 // the opportunities.
1158 // Currently the NumberOfIgnoredLowBits argument helps to recover
1159 // form these situations when matching bigger pattern (bitfield insert).
1161 // For unsigned extracts, check for a shift right and mask
1162 uint64_t And_imm = 0;
1163 if (!isOpcWithIntImmediate(N, ISD::AND, And_imm))
1166 const SDNode *Op0 = N->getOperand(0).getNode();
1168 // Because of simplify-demanded-bits in DAGCombine, the mask may have been
1169 // simplified. Try to undo that
1170 And_imm |= (1 << NumberOfIgnoredLowBits) - 1;
1172 // The immediate is a mask of the low bits iff imm & (imm+1) == 0
1173 if (And_imm & (And_imm + 1))
1176 bool ClampMSB = false;
1177 uint64_t Srl_imm = 0;
1178 // Handle the SRL + ANY_EXTEND case.
1179 if (VT == MVT::i64 && Op0->getOpcode() == ISD::ANY_EXTEND &&
1180 isOpcWithIntImmediate(Op0->getOperand(0).getNode(), ISD::SRL, Srl_imm)) {
1181 // Extend the incoming operand of the SRL to 64-bit.
1182 Opd0 = Widen(CurDAG, Op0->getOperand(0).getOperand(0));
1183 // Make sure to clamp the MSB so that we preserve the semantics of the
1184 // original operations.
1186 } else if (isOpcWithIntImmediate(Op0, ISD::SRL, Srl_imm)) {
1187 Opd0 = Op0->getOperand(0);
1188 } else if (BiggerPattern) {
1189 // Let's pretend a 0 shift right has been performed.
1190 // The resulting code will be at least as good as the original one
1191 // plus it may expose more opportunities for bitfield insert pattern.
1192 // FIXME: Currently we limit this to the bigger pattern, because
1193 // some optimizations expect AND and not UBFM
1194 Opd0 = N->getOperand(0);
1198 assert((BiggerPattern || (Srl_imm > 0 && Srl_imm < VT.getSizeInBits())) &&
1199 "bad amount in shift node!");
1202 MSB = Srl_imm + (VT == MVT::i32 ? CountTrailingOnes_32(And_imm)
1203 : CountTrailingOnes_64(And_imm)) -
1206 // Since we're moving the extend before the right shift operation, we need
1207 // to clamp the MSB to make sure we don't shift in undefined bits instead of
1208 // the zeros which would get shifted in with the original right shift
1210 MSB = MSB > 31 ? 31 : MSB;
1212 Opc = VT == MVT::i32 ? ARM64::UBFMWri : ARM64::UBFMXri;
1216 static bool isOneBitExtractOpFromShr(SDNode *N, unsigned &Opc, SDValue &Opd0,
1217 unsigned &LSB, unsigned &MSB) {
1218 // We are looking for the following pattern which basically extracts a single
1219 // bit from the source value and places it in the LSB of the destination
1220 // value, all other bits of the destination value or set to zero:
1222 // Value2 = AND Value, MaskImm
1223 // SRL Value2, ShiftImm
1225 // with MaskImm >> ShiftImm == 1.
1227 // This gets selected into a single UBFM:
1229 // UBFM Value, ShiftImm, ShiftImm
1232 if (N->getOpcode() != ISD::SRL)
1235 uint64_t And_mask = 0;
1236 if (!isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::AND, And_mask))
1239 Opd0 = N->getOperand(0).getOperand(0);
1241 uint64_t Srl_imm = 0;
1242 if (!isIntImmediate(N->getOperand(1), Srl_imm))
1245 // Check whether we really have a one bit extract here.
1246 if (And_mask >> Srl_imm == 0x1) {
1247 if (N->getValueType(0) == MVT::i32)
1248 Opc = ARM64::UBFMWri;
1250 Opc = ARM64::UBFMXri;
1252 LSB = MSB = Srl_imm;
1260 static bool isBitfieldExtractOpFromShr(SDNode *N, unsigned &Opc, SDValue &Opd0,
1261 unsigned &LSB, unsigned &MSB,
1262 bool BiggerPattern) {
1263 assert((N->getOpcode() == ISD::SRA || N->getOpcode() == ISD::SRL) &&
1264 "N must be a SHR/SRA operation to call this function");
1266 EVT VT = N->getValueType(0);
1268 // Here we can test the type of VT and return false when the type does not
1269 // match, but since it is done prior to that call in the current context
1270 // we turned that into an assert to avoid redundant code.
1271 assert((VT == MVT::i32 || VT == MVT::i64) &&
1272 "Type checking must have been done before calling this function");
1274 // Check for AND + SRL doing a one bit extract.
1275 if (isOneBitExtractOpFromShr(N, Opc, Opd0, LSB, MSB))
1278 // we're looking for a shift of a shift
1279 uint64_t Shl_imm = 0;
1280 if (isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::SHL, Shl_imm)) {
1281 Opd0 = N->getOperand(0).getOperand(0);
1282 } else if (BiggerPattern) {
1283 // Let's pretend a 0 shift left has been performed.
1284 // FIXME: Currently we limit this to the bigger pattern case,
1285 // because some optimizations expect AND and not UBFM
1286 Opd0 = N->getOperand(0);
1290 assert(Shl_imm < VT.getSizeInBits() && "bad amount in shift node!");
1291 uint64_t Srl_imm = 0;
1292 if (!isIntImmediate(N->getOperand(1), Srl_imm))
1295 assert(Srl_imm > 0 && Srl_imm < VT.getSizeInBits() &&
1296 "bad amount in shift node!");
1297 // Note: The width operand is encoded as width-1.
1298 unsigned Width = VT.getSizeInBits() - Srl_imm - 1;
1299 int sLSB = Srl_imm - Shl_imm;
1304 // SRA requires a signed extraction
1306 Opc = N->getOpcode() == ISD::SRA ? ARM64::SBFMWri : ARM64::UBFMWri;
1308 Opc = N->getOpcode() == ISD::SRA ? ARM64::SBFMXri : ARM64::UBFMXri;
1312 static bool isBitfieldExtractOp(SelectionDAG *CurDAG, SDNode *N, unsigned &Opc,
1313 SDValue &Opd0, unsigned &LSB, unsigned &MSB,
1314 unsigned NumberOfIgnoredLowBits = 0,
1315 bool BiggerPattern = false) {
1316 if (N->getValueType(0) != MVT::i32 && N->getValueType(0) != MVT::i64)
1319 switch (N->getOpcode()) {
1321 if (!N->isMachineOpcode())
1325 return isBitfieldExtractOpFromAnd(CurDAG, N, Opc, Opd0, LSB, MSB,
1326 NumberOfIgnoredLowBits, BiggerPattern);
1329 return isBitfieldExtractOpFromShr(N, Opc, Opd0, LSB, MSB, BiggerPattern);
1332 unsigned NOpc = N->getMachineOpcode();
1336 case ARM64::SBFMWri:
1337 case ARM64::UBFMWri:
1338 case ARM64::SBFMXri:
1339 case ARM64::UBFMXri:
1341 Opd0 = N->getOperand(0);
1342 LSB = cast<ConstantSDNode>(N->getOperand(1).getNode())->getZExtValue();
1343 MSB = cast<ConstantSDNode>(N->getOperand(2).getNode())->getZExtValue();
1350 SDNode *ARM64DAGToDAGISel::SelectBitfieldExtractOp(SDNode *N) {
1351 unsigned Opc, LSB, MSB;
1353 if (!isBitfieldExtractOp(CurDAG, N, Opc, Opd0, LSB, MSB))
1356 EVT VT = N->getValueType(0);
1357 SDValue Ops[] = { Opd0, CurDAG->getTargetConstant(LSB, VT),
1358 CurDAG->getTargetConstant(MSB, VT) };
1359 return CurDAG->SelectNodeTo(N, Opc, VT, Ops, 3);
1362 // Is mask a i32 or i64 binary sequence 1..10..0 and
1363 // CountTrailingZeros(mask) == ExpectedTrailingZeros
1364 static bool isHighMask(uint64_t Mask, unsigned ExpectedTrailingZeros,
1365 unsigned NumberOfIgnoredHighBits, EVT VT) {
1366 assert((VT == MVT::i32 || VT == MVT::i64) &&
1367 "i32 or i64 mask type expected!");
1369 uint64_t ExpectedMask;
1370 if (VT == MVT::i32) {
1371 uint32_t ExpectedMaski32 = ~0 << ExpectedTrailingZeros;
1372 ExpectedMask = ExpectedMaski32;
1373 if (NumberOfIgnoredHighBits) {
1374 uint32_t highMask = ~0 << (32 - NumberOfIgnoredHighBits);
1378 ExpectedMask = ((uint64_t) ~0) << ExpectedTrailingZeros;
1379 if (NumberOfIgnoredHighBits)
1380 Mask |= ((uint64_t) ~0) << (64 - NumberOfIgnoredHighBits);
1383 return Mask == ExpectedMask;
1386 // Look for bits that will be useful for later uses.
1387 // A bit is consider useless as soon as it is dropped and never used
1388 // before it as been dropped.
1389 // E.g., looking for useful bit of x
1392 // After #1, x useful bits are 0x7, then the useful bits of x, live through
1394 // After #2, the useful bits of x are 0x4.
1395 // However, if x is used on an unpredicatable instruction, then all its bits
1401 static void getUsefulBits(SDValue Op, APInt &UsefulBits, unsigned Depth = 0);
1403 static void getUsefulBitsFromAndWithImmediate(SDValue Op, APInt &UsefulBits,
1406 cast<const ConstantSDNode>(Op.getOperand(1).getNode())->getZExtValue();
1407 Imm = ARM64_AM::decodeLogicalImmediate(Imm, UsefulBits.getBitWidth());
1408 UsefulBits &= APInt(UsefulBits.getBitWidth(), Imm);
1409 getUsefulBits(Op, UsefulBits, Depth + 1);
1412 static void getUsefulBitsFromBitfieldMoveOpd(SDValue Op, APInt &UsefulBits,
1413 uint64_t Imm, uint64_t MSB,
1415 // inherit the bitwidth value
1416 APInt OpUsefulBits(UsefulBits);
1420 OpUsefulBits = OpUsefulBits.shl(MSB - Imm + 1);
1422 // The interesting part will be in the lower part of the result
1423 getUsefulBits(Op, OpUsefulBits, Depth + 1);
1424 // The interesting part was starting at Imm in the argument
1425 OpUsefulBits = OpUsefulBits.shl(Imm);
1427 OpUsefulBits = OpUsefulBits.shl(MSB + 1);
1429 // The interesting part will be shifted in the result
1430 OpUsefulBits = OpUsefulBits.shl(OpUsefulBits.getBitWidth() - Imm);
1431 getUsefulBits(Op, OpUsefulBits, Depth + 1);
1432 // The interesting part was at zero in the argument
1433 OpUsefulBits = OpUsefulBits.lshr(OpUsefulBits.getBitWidth() - Imm);
1436 UsefulBits &= OpUsefulBits;
1439 static void getUsefulBitsFromUBFM(SDValue Op, APInt &UsefulBits,
1442 cast<const ConstantSDNode>(Op.getOperand(1).getNode())->getZExtValue();
1444 cast<const ConstantSDNode>(Op.getOperand(2).getNode())->getZExtValue();
1446 getUsefulBitsFromBitfieldMoveOpd(Op, UsefulBits, Imm, MSB, Depth);
1449 static void getUsefulBitsFromOrWithShiftedReg(SDValue Op, APInt &UsefulBits,
1451 uint64_t ShiftTypeAndValue =
1452 cast<const ConstantSDNode>(Op.getOperand(2).getNode())->getZExtValue();
1453 APInt Mask(UsefulBits);
1454 Mask.clearAllBits();
1457 if (ARM64_AM::getShiftType(ShiftTypeAndValue) == ARM64_AM::LSL) {
1459 uint64_t ShiftAmt = ARM64_AM::getShiftValue(ShiftTypeAndValue);
1460 Mask = Mask.shl(ShiftAmt);
1461 getUsefulBits(Op, Mask, Depth + 1);
1462 Mask = Mask.lshr(ShiftAmt);
1463 } else if (ARM64_AM::getShiftType(ShiftTypeAndValue) == ARM64_AM::LSR) {
1465 // We do not handle ARM64_AM::ASR, because the sign will change the
1466 // number of useful bits
1467 uint64_t ShiftAmt = ARM64_AM::getShiftValue(ShiftTypeAndValue);
1468 Mask = Mask.lshr(ShiftAmt);
1469 getUsefulBits(Op, Mask, Depth + 1);
1470 Mask = Mask.shl(ShiftAmt);
1477 static void getUsefulBitsFromBFM(SDValue Op, SDValue Orig, APInt &UsefulBits,
1480 cast<const ConstantSDNode>(Op.getOperand(2).getNode())->getZExtValue();
1482 cast<const ConstantSDNode>(Op.getOperand(3).getNode())->getZExtValue();
1484 if (Op.getOperand(1) == Orig)
1485 return getUsefulBitsFromBitfieldMoveOpd(Op, UsefulBits, Imm, MSB, Depth);
1487 APInt OpUsefulBits(UsefulBits);
1491 OpUsefulBits = OpUsefulBits.shl(MSB - Imm + 1);
1493 UsefulBits &= ~OpUsefulBits;
1494 getUsefulBits(Op, UsefulBits, Depth + 1);
1496 OpUsefulBits = OpUsefulBits.shl(MSB + 1);
1498 UsefulBits = ~(OpUsefulBits.shl(OpUsefulBits.getBitWidth() - Imm));
1499 getUsefulBits(Op, UsefulBits, Depth + 1);
1503 static void getUsefulBitsForUse(SDNode *UserNode, APInt &UsefulBits,
1504 SDValue Orig, unsigned Depth) {
1506 // Users of this node should have already been instruction selected
1507 // FIXME: Can we turn that into an assert?
1508 if (!UserNode->isMachineOpcode())
1511 switch (UserNode->getMachineOpcode()) {
1514 case ARM64::ANDSWri:
1515 case ARM64::ANDSXri:
1518 // We increment Depth only when we call the getUsefulBits
1519 return getUsefulBitsFromAndWithImmediate(SDValue(UserNode, 0), UsefulBits,
1521 case ARM64::UBFMWri:
1522 case ARM64::UBFMXri:
1523 return getUsefulBitsFromUBFM(SDValue(UserNode, 0), UsefulBits, Depth);
1527 if (UserNode->getOperand(1) != Orig)
1529 return getUsefulBitsFromOrWithShiftedReg(SDValue(UserNode, 0), UsefulBits,
1533 return getUsefulBitsFromBFM(SDValue(UserNode, 0), Orig, UsefulBits, Depth);
1537 static void getUsefulBits(SDValue Op, APInt &UsefulBits, unsigned Depth) {
1540 // Initialize UsefulBits
1542 unsigned Bitwidth = Op.getValueType().getScalarType().getSizeInBits();
1543 // At the beginning, assume every produced bits is useful
1544 UsefulBits = APInt(Bitwidth, 0);
1545 UsefulBits.flipAllBits();
1547 APInt UsersUsefulBits(UsefulBits.getBitWidth(), 0);
1549 for (SDNode *Node : Op.getNode()->uses()) {
1550 // A use cannot produce useful bits
1551 APInt UsefulBitsForUse = APInt(UsefulBits);
1552 getUsefulBitsForUse(Node, UsefulBitsForUse, Op, Depth);
1553 UsersUsefulBits |= UsefulBitsForUse;
1555 // UsefulBits contains the produced bits that are meaningful for the
1556 // current definition, thus a user cannot make a bit meaningful at
1558 UsefulBits &= UsersUsefulBits;
1561 // Given a OR operation, check if we have the following pattern
1562 // ubfm c, b, imm, imm2 (or something that does the same jobs, see
1563 // isBitfieldExtractOp)
1564 // d = e & mask2 ; where mask is a binary sequence of 1..10..0 and
1565 // countTrailingZeros(mask2) == imm2 - imm + 1
1567 // if yes, given reference arguments will be update so that one can replace
1568 // the OR instruction with:
1569 // f = Opc Opd0, Opd1, LSB, MSB ; where Opc is a BFM, LSB = imm, and MSB = imm2
1570 static bool isBitfieldInsertOpFromOr(SDNode *N, unsigned &Opc, SDValue &Opd0,
1571 SDValue &Opd1, unsigned &LSB,
1572 unsigned &MSB, SelectionDAG *CurDAG) {
1573 assert(N->getOpcode() == ISD::OR && "Expect a OR operation");
1576 EVT VT = N->getValueType(0);
1578 Opc = ARM64::BFMWri;
1579 else if (VT == MVT::i64)
1580 Opc = ARM64::BFMXri;
1584 // Because of simplify-demanded-bits in DAGCombine, involved masks may not
1585 // have the expected shape. Try to undo that.
1587 getUsefulBits(SDValue(N, 0), UsefulBits);
1589 unsigned NumberOfIgnoredLowBits = UsefulBits.countTrailingZeros();
1590 unsigned NumberOfIgnoredHighBits = UsefulBits.countLeadingZeros();
1592 // OR is commutative, check both possibilities (does llvm provide a
1593 // way to do that directely, e.g., via code matcher?)
1594 SDValue OrOpd1Val = N->getOperand(1);
1595 SDNode *OrOpd0 = N->getOperand(0).getNode();
1596 SDNode *OrOpd1 = N->getOperand(1).getNode();
1597 for (int i = 0; i < 2;
1598 ++i, std::swap(OrOpd0, OrOpd1), OrOpd1Val = N->getOperand(0)) {
1600 // Set Opd1, LSB and MSB arguments by looking for
1601 // c = ubfm b, imm, imm2
1602 if (!isBitfieldExtractOp(CurDAG, OrOpd0, BFXOpc, Opd1, LSB, MSB,
1603 NumberOfIgnoredLowBits, true))
1606 // Check that the returned opcode is compatible with the pattern,
1607 // i.e., same type and zero extended (U and not S)
1608 if ((BFXOpc != ARM64::UBFMXri && VT == MVT::i64) ||
1609 (BFXOpc != ARM64::UBFMWri && VT == MVT::i32))
1612 // Compute the width of the bitfield insertion
1613 int sMSB = MSB - LSB + 1;
1614 // FIXME: This constraints is to catch bitfield insertion we may
1615 // want to widen the pattern if we want to grab general bitfied
1620 // Check the second part of the pattern
1621 EVT VT = OrOpd1->getValueType(0);
1622 if (VT != MVT::i32 && VT != MVT::i64)
1625 // Compute the Known Zero for the candidate of the first operand.
1626 // This allows to catch more general case than just looking for
1627 // AND with imm. Indeed, simplify-demanded-bits may have removed
1628 // the AND instruction because it proves it was useless.
1629 APInt KnownZero, KnownOne;
1630 CurDAG->ComputeMaskedBits(OrOpd1Val, KnownZero, KnownOne);
1632 // Check if there is enough room for the second operand to appear
1634 if (KnownZero.countTrailingOnes() < (unsigned)sMSB)
1637 // Set the first operand
1639 if (isOpcWithIntImmediate(OrOpd1, ISD::AND, Imm) &&
1640 isHighMask(Imm, sMSB, NumberOfIgnoredHighBits, VT))
1641 // In that case, we can eliminate the AND
1642 Opd0 = OrOpd1->getOperand(0);
1644 // Maybe the AND has been removed by simplify-demanded-bits
1645 // or is useful because it discards more bits
1655 SDNode *ARM64DAGToDAGISel::SelectBitfieldInsertOp(SDNode *N) {
1656 if (N->getOpcode() != ISD::OR)
1663 if (!isBitfieldInsertOpFromOr(N, Opc, Opd0, Opd1, LSB, MSB, CurDAG))
1666 EVT VT = N->getValueType(0);
1667 SDValue Ops[] = { Opd0,
1669 CurDAG->getTargetConstant(LSB, VT),
1670 CurDAG->getTargetConstant(MSB, VT) };
1671 return CurDAG->SelectNodeTo(N, Opc, VT, Ops, 4);
1674 SDNode *ARM64DAGToDAGISel::SelectLIBM(SDNode *N) {
1675 EVT VT = N->getValueType(0);
1678 unsigned FRINTXOpcs[] = { ARM64::FRINTXSr, ARM64::FRINTXDr };
1680 if (VT == MVT::f32) {
1682 } else if (VT == MVT::f64) {
1685 return 0; // Unrecognized argument type. Fall back on default codegen.
1687 // Pick the FRINTX variant needed to set the flags.
1688 unsigned FRINTXOpc = FRINTXOpcs[Variant];
1690 switch (N->getOpcode()) {
1692 return 0; // Unrecognized libm ISD node. Fall back on default codegen.
1694 unsigned FRINTPOpcs[] = { ARM64::FRINTPSr, ARM64::FRINTPDr };
1695 Opc = FRINTPOpcs[Variant];
1699 unsigned FRINTMOpcs[] = { ARM64::FRINTMSr, ARM64::FRINTMDr };
1700 Opc = FRINTMOpcs[Variant];
1704 unsigned FRINTZOpcs[] = { ARM64::FRINTZSr, ARM64::FRINTZDr };
1705 Opc = FRINTZOpcs[Variant];
1709 unsigned FRINTAOpcs[] = { ARM64::FRINTASr, ARM64::FRINTADr };
1710 Opc = FRINTAOpcs[Variant];
1716 SDValue In = N->getOperand(0);
1717 SmallVector<SDValue, 2> Ops;
1720 if (!TM.Options.UnsafeFPMath) {
1721 SDNode *FRINTX = CurDAG->getMachineNode(FRINTXOpc, dl, VT, MVT::Glue, In);
1722 Ops.push_back(SDValue(FRINTX, 1));
1725 return CurDAG->getMachineNode(Opc, dl, VT, Ops);
1729 ARM64DAGToDAGISel::SelectCVTFixedPosOperand(SDValue N, SDValue &FixedPos,
1730 unsigned RegWidth) {
1732 if (ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(N))
1733 FVal = CN->getValueAPF();
1734 else if (LoadSDNode *LN = dyn_cast<LoadSDNode>(N)) {
1735 // Some otherwise illegal constants are allowed in this case.
1736 if (LN->getOperand(1).getOpcode() != ARM64ISD::ADDlow ||
1737 !isa<ConstantPoolSDNode>(LN->getOperand(1)->getOperand(1)))
1740 ConstantPoolSDNode *CN =
1741 dyn_cast<ConstantPoolSDNode>(LN->getOperand(1)->getOperand(1));
1742 FVal = cast<ConstantFP>(CN->getConstVal())->getValueAPF();
1746 // An FCVT[SU] instruction performs: convertToInt(Val * 2^fbits) where fbits
1747 // is between 1 and 32 for a destination w-register, or 1 and 64 for an
1750 // By this stage, we've detected (fp_to_[su]int (fmul Val, THIS_NODE)) so we
1751 // want THIS_NODE to be 2^fbits. This is much easier to deal with using
1755 // fbits is between 1 and 64 in the worst-case, which means the fmul
1756 // could have 2^64 as an actual operand. Need 65 bits of precision.
1757 APSInt IntVal(65, true);
1758 FVal.convertToInteger(IntVal, APFloat::rmTowardZero, &IsExact);
1760 // N.b. isPowerOf2 also checks for > 0.
1761 if (!IsExact || !IntVal.isPowerOf2()) return false;
1762 unsigned FBits = IntVal.logBase2();
1764 // Checks above should have guaranteed that we haven't lost information in
1765 // finding FBits, but it must still be in range.
1766 if (FBits == 0 || FBits > RegWidth) return false;
1768 FixedPos = CurDAG->getTargetConstant(FBits, MVT::i32);
1772 SDNode *ARM64DAGToDAGISel::Select(SDNode *Node) {
1773 // Dump information about the Node being selected
1774 DEBUG(errs() << "Selecting: ");
1775 DEBUG(Node->dump(CurDAG));
1776 DEBUG(errs() << "\n");
1778 // If we have a custom node, we already have selected!
1779 if (Node->isMachineOpcode()) {
1780 DEBUG(errs() << "== "; Node->dump(CurDAG); errs() << "\n");
1781 Node->setNodeId(-1);
1785 // Few custom selection stuff.
1786 SDNode *ResNode = 0;
1787 EVT VT = Node->getValueType(0);
1789 switch (Node->getOpcode()) {
1794 if (SDNode *I = SelectMLAV64LaneV128(Node))
1799 // Try to select as an indexed load. Fall through to normal processing
1802 SDNode *I = SelectIndexedLoad(Node, Done);
1811 if (SDNode *I = SelectBitfieldExtractOp(Node))
1816 if (SDNode *I = SelectBitfieldInsertOp(Node))
1820 case ISD::EXTRACT_VECTOR_ELT: {
1821 // Extracting lane zero is a special case where we can just use a plain
1822 // EXTRACT_SUBREG instruction, which will become FMOV. This is easier for
1823 // the rest of the compiler, especially the register allocator and copyi
1824 // propagation, to reason about, so is preferred when it's possible to
1826 ConstantSDNode *LaneNode = cast<ConstantSDNode>(Node->getOperand(1));
1827 // Bail and use the default Select() for non-zero lanes.
1828 if (LaneNode->getZExtValue() != 0)
1830 // If the element type is not the same as the result type, likewise
1831 // bail and use the default Select(), as there's more to do than just
1832 // a cross-class COPY. This catches extracts of i8 and i16 elements
1833 // since they will need an explicit zext.
1834 if (VT != Node->getOperand(0).getValueType().getVectorElementType())
1837 switch (Node->getOperand(0)
1839 .getVectorElementType()
1842 assert(0 && "Unexpected vector element type!");
1844 SubReg = ARM64::dsub;
1847 SubReg = ARM64::ssub;
1849 case 16: // FALLTHROUGH
1851 llvm_unreachable("unexpected zext-requiring extract element!");
1853 SDValue Extract = CurDAG->getTargetExtractSubreg(SubReg, SDLoc(Node), VT,
1854 Node->getOperand(0));
1855 DEBUG(dbgs() << "ISEL: Custom selection!\n=> ");
1856 DEBUG(Extract->dumpr(CurDAG));
1857 DEBUG(dbgs() << "\n");
1858 return Extract.getNode();
1860 case ISD::Constant: {
1861 // Materialize zero constants as copies from WZR/XZR. This allows
1862 // the coalescer to propagate these into other instructions.
1863 ConstantSDNode *ConstNode = cast<ConstantSDNode>(Node);
1864 if (ConstNode->isNullValue()) {
1866 return CurDAG->getCopyFromReg(CurDAG->getEntryNode(), SDLoc(Node),
1867 ARM64::WZR, MVT::i32).getNode();
1868 else if (VT == MVT::i64)
1869 return CurDAG->getCopyFromReg(CurDAG->getEntryNode(), SDLoc(Node),
1870 ARM64::XZR, MVT::i64).getNode();
1875 case ISD::FrameIndex: {
1876 // Selects to ADDXri FI, 0 which in turn will become ADDXri SP, imm.
1877 int FI = cast<FrameIndexSDNode>(Node)->getIndex();
1878 unsigned Shifter = ARM64_AM::getShifterImm(ARM64_AM::LSL, 0);
1879 const TargetLowering *TLI = getTargetLowering();
1880 SDValue TFI = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy());
1881 SDValue Ops[] = { TFI, CurDAG->getTargetConstant(0, MVT::i32),
1882 CurDAG->getTargetConstant(Shifter, MVT::i32) };
1883 return CurDAG->SelectNodeTo(Node, ARM64::ADDXri, MVT::i64, Ops, 3);
1885 case ISD::INTRINSIC_W_CHAIN: {
1886 unsigned IntNo = cast<ConstantSDNode>(Node->getOperand(1))->getZExtValue();
1890 case Intrinsic::arm64_ldaxp:
1891 case Intrinsic::arm64_ldxp: {
1893 IntNo == Intrinsic::arm64_ldaxp ? ARM64::LDAXPX : ARM64::LDXPX;
1894 SDValue MemAddr = Node->getOperand(2);
1896 SDValue Chain = Node->getOperand(0);
1898 SDNode *Ld = CurDAG->getMachineNode(Op, DL, MVT::i64, MVT::i64,
1899 MVT::Other, MemAddr, Chain);
1901 // Transfer memoperands.
1902 MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
1903 MemOp[0] = cast<MemIntrinsicSDNode>(Node)->getMemOperand();
1904 cast<MachineSDNode>(Ld)->setMemRefs(MemOp, MemOp + 1);
1907 case Intrinsic::arm64_stlxp:
1908 case Intrinsic::arm64_stxp: {
1910 IntNo == Intrinsic::arm64_stlxp ? ARM64::STLXPX : ARM64::STXPX;
1912 SDValue Chain = Node->getOperand(0);
1913 SDValue ValLo = Node->getOperand(2);
1914 SDValue ValHi = Node->getOperand(3);
1915 SDValue MemAddr = Node->getOperand(4);
1917 // Place arguments in the right order.
1918 SmallVector<SDValue, 7> Ops;
1919 Ops.push_back(ValLo);
1920 Ops.push_back(ValHi);
1921 Ops.push_back(MemAddr);
1922 Ops.push_back(Chain);
1924 SDNode *St = CurDAG->getMachineNode(Op, DL, MVT::i32, MVT::Other, Ops);
1925 // Transfer memoperands.
1926 MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
1927 MemOp[0] = cast<MemIntrinsicSDNode>(Node)->getMemOperand();
1928 cast<MachineSDNode>(St)->setMemRefs(MemOp, MemOp + 1);
1932 case Intrinsic::arm64_neon_ld1x2:
1933 if (VT == MVT::v8i8)
1934 return SelectLoad(Node, 2, ARM64::LD1Twov8b, ARM64::dsub0);
1935 else if (VT == MVT::v16i8)
1936 return SelectLoad(Node, 2, ARM64::LD1Twov16b, ARM64::qsub0);
1937 else if (VT == MVT::v4i16)
1938 return SelectLoad(Node, 2, ARM64::LD1Twov4h, ARM64::dsub0);
1939 else if (VT == MVT::v8i16)
1940 return SelectLoad(Node, 2, ARM64::LD1Twov8h, ARM64::qsub0);
1941 else if (VT == MVT::v2i32 || VT == MVT::v2f32)
1942 return SelectLoad(Node, 2, ARM64::LD1Twov2s, ARM64::dsub0);
1943 else if (VT == MVT::v4i32 || VT == MVT::v4f32)
1944 return SelectLoad(Node, 2, ARM64::LD1Twov4s, ARM64::qsub0);
1945 else if (VT == MVT::v1i64 || VT == MVT::v1f64)
1946 return SelectLoad(Node, 2, ARM64::LD1Twov1d, ARM64::dsub0);
1947 else if (VT == MVT::v2i64 || VT == MVT::v2f64)
1948 return SelectLoad(Node, 2, ARM64::LD1Twov2d, ARM64::qsub0);
1950 case Intrinsic::arm64_neon_ld1x3:
1951 if (VT == MVT::v8i8)
1952 return SelectLoad(Node, 3, ARM64::LD1Threev8b, ARM64::dsub0);
1953 else if (VT == MVT::v16i8)
1954 return SelectLoad(Node, 3, ARM64::LD1Threev16b, ARM64::qsub0);
1955 else if (VT == MVT::v4i16)
1956 return SelectLoad(Node, 3, ARM64::LD1Threev4h, ARM64::dsub0);
1957 else if (VT == MVT::v8i16)
1958 return SelectLoad(Node, 3, ARM64::LD1Threev8h, ARM64::qsub0);
1959 else if (VT == MVT::v2i32 || VT == MVT::v2f32)
1960 return SelectLoad(Node, 3, ARM64::LD1Threev2s, ARM64::dsub0);
1961 else if (VT == MVT::v4i32 || VT == MVT::v4f32)
1962 return SelectLoad(Node, 3, ARM64::LD1Threev4s, ARM64::qsub0);
1963 else if (VT == MVT::v1i64 || VT == MVT::v1f64)
1964 return SelectLoad(Node, 3, ARM64::LD1Threev1d, ARM64::dsub0);
1965 else if (VT == MVT::v2i64 || VT == MVT::v2f64)
1966 return SelectLoad(Node, 3, ARM64::LD1Threev2d, ARM64::qsub0);
1968 case Intrinsic::arm64_neon_ld1x4:
1969 if (VT == MVT::v8i8)
1970 return SelectLoad(Node, 4, ARM64::LD1Fourv8b, ARM64::dsub0);
1971 else if (VT == MVT::v16i8)
1972 return SelectLoad(Node, 4, ARM64::LD1Fourv16b, ARM64::qsub0);
1973 else if (VT == MVT::v4i16)
1974 return SelectLoad(Node, 4, ARM64::LD1Fourv4h, ARM64::dsub0);
1975 else if (VT == MVT::v8i16)
1976 return SelectLoad(Node, 4, ARM64::LD1Fourv8h, ARM64::qsub0);
1977 else if (VT == MVT::v2i32 || VT == MVT::v2f32)
1978 return SelectLoad(Node, 4, ARM64::LD1Fourv2s, ARM64::dsub0);
1979 else if (VT == MVT::v4i32 || VT == MVT::v4f32)
1980 return SelectLoad(Node, 4, ARM64::LD1Fourv4s, ARM64::qsub0);
1981 else if (VT == MVT::v1i64 || VT == MVT::v1f64)
1982 return SelectLoad(Node, 4, ARM64::LD1Fourv1d, ARM64::dsub0);
1983 else if (VT == MVT::v2i64 || VT == MVT::v2f64)
1984 return SelectLoad(Node, 4, ARM64::LD1Fourv2d, ARM64::qsub0);
1986 case Intrinsic::arm64_neon_ld2:
1987 if (VT == MVT::v8i8)
1988 return SelectLoad(Node, 2, ARM64::LD2Twov8b, ARM64::dsub0);
1989 else if (VT == MVT::v16i8)
1990 return SelectLoad(Node, 2, ARM64::LD2Twov16b, ARM64::qsub0);
1991 else if (VT == MVT::v4i16)
1992 return SelectLoad(Node, 2, ARM64::LD2Twov4h, ARM64::dsub0);
1993 else if (VT == MVT::v8i16)
1994 return SelectLoad(Node, 2, ARM64::LD2Twov8h, ARM64::qsub0);
1995 else if (VT == MVT::v2i32 || VT == MVT::v2f32)
1996 return SelectLoad(Node, 2, ARM64::LD2Twov2s, ARM64::dsub0);
1997 else if (VT == MVT::v4i32 || VT == MVT::v4f32)
1998 return SelectLoad(Node, 2, ARM64::LD2Twov4s, ARM64::qsub0);
1999 else if (VT == MVT::v1i64 || VT == MVT::v1f64)
2000 return SelectLoad(Node, 2, ARM64::LD1Twov1d, ARM64::dsub0);
2001 else if (VT == MVT::v2i64 || VT == MVT::v2f64)
2002 return SelectLoad(Node, 2, ARM64::LD2Twov2d, ARM64::qsub0);
2004 case Intrinsic::arm64_neon_ld3:
2005 if (VT == MVT::v8i8)
2006 return SelectLoad(Node, 3, ARM64::LD3Threev8b, ARM64::dsub0);
2007 else if (VT == MVT::v16i8)
2008 return SelectLoad(Node, 3, ARM64::LD3Threev16b, ARM64::qsub0);
2009 else if (VT == MVT::v4i16)
2010 return SelectLoad(Node, 3, ARM64::LD3Threev4h, ARM64::dsub0);
2011 else if (VT == MVT::v8i16)
2012 return SelectLoad(Node, 3, ARM64::LD3Threev8h, ARM64::qsub0);
2013 else if (VT == MVT::v2i32 || VT == MVT::v2f32)
2014 return SelectLoad(Node, 3, ARM64::LD3Threev2s, ARM64::dsub0);
2015 else if (VT == MVT::v4i32 || VT == MVT::v4f32)
2016 return SelectLoad(Node, 3, ARM64::LD3Threev4s, ARM64::qsub0);
2017 else if (VT == MVT::v1i64 || VT == MVT::v1f64)
2018 return SelectLoad(Node, 3, ARM64::LD1Threev1d, ARM64::dsub0);
2019 else if (VT == MVT::v2i64 || VT == MVT::v2f64)
2020 return SelectLoad(Node, 3, ARM64::LD3Threev2d, ARM64::qsub0);
2022 case Intrinsic::arm64_neon_ld4:
2023 if (VT == MVT::v8i8)
2024 return SelectLoad(Node, 4, ARM64::LD4Fourv8b, ARM64::dsub0);
2025 else if (VT == MVT::v16i8)
2026 return SelectLoad(Node, 4, ARM64::LD4Fourv16b, ARM64::qsub0);
2027 else if (VT == MVT::v4i16)
2028 return SelectLoad(Node, 4, ARM64::LD4Fourv4h, ARM64::dsub0);
2029 else if (VT == MVT::v8i16)
2030 return SelectLoad(Node, 4, ARM64::LD4Fourv8h, ARM64::qsub0);
2031 else if (VT == MVT::v2i32 || VT == MVT::v2f32)
2032 return SelectLoad(Node, 4, ARM64::LD4Fourv2s, ARM64::dsub0);
2033 else if (VT == MVT::v4i32 || VT == MVT::v4f32)
2034 return SelectLoad(Node, 4, ARM64::LD4Fourv4s, ARM64::qsub0);
2035 else if (VT == MVT::v1i64 || VT == MVT::v1f64)
2036 return SelectLoad(Node, 4, ARM64::LD1Fourv1d, ARM64::dsub0);
2037 else if (VT == MVT::v2i64 || VT == MVT::v2f64)
2038 return SelectLoad(Node, 4, ARM64::LD4Fourv2d, ARM64::qsub0);
2040 case Intrinsic::arm64_neon_ld2r:
2041 if (VT == MVT::v8i8)
2042 return SelectLoad(Node, 2, ARM64::LD2Rv8b, ARM64::dsub0);
2043 else if (VT == MVT::v16i8)
2044 return SelectLoad(Node, 2, ARM64::LD2Rv16b, ARM64::qsub0);
2045 else if (VT == MVT::v4i16)
2046 return SelectLoad(Node, 2, ARM64::LD2Rv4h, ARM64::dsub0);
2047 else if (VT == MVT::v8i16)
2048 return SelectLoad(Node, 2, ARM64::LD2Rv8h, ARM64::qsub0);
2049 else if (VT == MVT::v2i32 || VT == MVT::v2f32)
2050 return SelectLoad(Node, 2, ARM64::LD2Rv2s, ARM64::dsub0);
2051 else if (VT == MVT::v4i32 || VT == MVT::v4f32)
2052 return SelectLoad(Node, 2, ARM64::LD2Rv4s, ARM64::qsub0);
2053 else if (VT == MVT::v1i64 || VT == MVT::v1f64)
2054 return SelectLoad(Node, 2, ARM64::LD2Rv1d, ARM64::dsub0);
2055 else if (VT == MVT::v2i64 || VT == MVT::v2f64)
2056 return SelectLoad(Node, 2, ARM64::LD2Rv2d, ARM64::qsub0);
2058 case Intrinsic::arm64_neon_ld3r:
2059 if (VT == MVT::v8i8)
2060 return SelectLoad(Node, 3, ARM64::LD3Rv8b, ARM64::dsub0);
2061 else if (VT == MVT::v16i8)
2062 return SelectLoad(Node, 3, ARM64::LD3Rv16b, ARM64::qsub0);
2063 else if (VT == MVT::v4i16)
2064 return SelectLoad(Node, 3, ARM64::LD3Rv4h, ARM64::dsub0);
2065 else if (VT == MVT::v8i16)
2066 return SelectLoad(Node, 3, ARM64::LD3Rv8h, ARM64::qsub0);
2067 else if (VT == MVT::v2i32 || VT == MVT::v2f32)
2068 return SelectLoad(Node, 3, ARM64::LD3Rv2s, ARM64::dsub0);
2069 else if (VT == MVT::v4i32 || VT == MVT::v4f32)
2070 return SelectLoad(Node, 3, ARM64::LD3Rv4s, ARM64::qsub0);
2071 else if (VT == MVT::v1i64 || VT == MVT::v1f64)
2072 return SelectLoad(Node, 3, ARM64::LD3Rv1d, ARM64::dsub0);
2073 else if (VT == MVT::v2i64 || VT == MVT::v2f64)
2074 return SelectLoad(Node, 3, ARM64::LD3Rv2d, ARM64::qsub0);
2076 case Intrinsic::arm64_neon_ld4r:
2077 if (VT == MVT::v8i8)
2078 return SelectLoad(Node, 4, ARM64::LD4Rv8b, ARM64::dsub0);
2079 else if (VT == MVT::v16i8)
2080 return SelectLoad(Node, 4, ARM64::LD4Rv16b, ARM64::qsub0);
2081 else if (VT == MVT::v4i16)
2082 return SelectLoad(Node, 4, ARM64::LD4Rv4h, ARM64::dsub0);
2083 else if (VT == MVT::v8i16)
2084 return SelectLoad(Node, 4, ARM64::LD4Rv8h, ARM64::qsub0);
2085 else if (VT == MVT::v2i32 || VT == MVT::v2f32)
2086 return SelectLoad(Node, 4, ARM64::LD4Rv2s, ARM64::dsub0);
2087 else if (VT == MVT::v4i32 || VT == MVT::v4f32)
2088 return SelectLoad(Node, 4, ARM64::LD4Rv4s, ARM64::qsub0);
2089 else if (VT == MVT::v1i64 || VT == MVT::v1f64)
2090 return SelectLoad(Node, 4, ARM64::LD4Rv1d, ARM64::dsub0);
2091 else if (VT == MVT::v2i64 || VT == MVT::v2f64)
2092 return SelectLoad(Node, 4, ARM64::LD4Rv2d, ARM64::qsub0);
2094 case Intrinsic::arm64_neon_ld2lane:
2095 if (VT == MVT::v16i8 || VT == MVT::v8i8)
2096 return SelectLoadLane(Node, 2, ARM64::LD2i8);
2097 else if (VT == MVT::v8i16 || VT == MVT::v4i16)
2098 return SelectLoadLane(Node, 2, ARM64::LD2i16);
2099 else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
2101 return SelectLoadLane(Node, 2, ARM64::LD2i32);
2102 else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
2104 return SelectLoadLane(Node, 2, ARM64::LD2i64);
2106 case Intrinsic::arm64_neon_ld3lane:
2107 if (VT == MVT::v16i8 || VT == MVT::v8i8)
2108 return SelectLoadLane(Node, 3, ARM64::LD3i8);
2109 else if (VT == MVT::v8i16 || VT == MVT::v4i16)
2110 return SelectLoadLane(Node, 3, ARM64::LD3i16);
2111 else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
2113 return SelectLoadLane(Node, 3, ARM64::LD3i32);
2114 else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
2116 return SelectLoadLane(Node, 3, ARM64::LD3i64);
2118 case Intrinsic::arm64_neon_ld4lane:
2119 if (VT == MVT::v16i8 || VT == MVT::v8i8)
2120 return SelectLoadLane(Node, 4, ARM64::LD4i8);
2121 else if (VT == MVT::v8i16 || VT == MVT::v4i16)
2122 return SelectLoadLane(Node, 4, ARM64::LD4i16);
2123 else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
2125 return SelectLoadLane(Node, 4, ARM64::LD4i32);
2126 else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
2128 return SelectLoadLane(Node, 4, ARM64::LD4i64);
2132 case ISD::INTRINSIC_WO_CHAIN: {
2133 unsigned IntNo = cast<ConstantSDNode>(Node->getOperand(0))->getZExtValue();
2137 case Intrinsic::arm64_neon_tbl2:
2138 return SelectTable(Node, 2, VT == MVT::v8i8 ? ARM64::TBLv8i8Two
2139 : ARM64::TBLv16i8Two,
2141 case Intrinsic::arm64_neon_tbl3:
2142 return SelectTable(Node, 3, VT == MVT::v8i8 ? ARM64::TBLv8i8Three
2143 : ARM64::TBLv16i8Three,
2145 case Intrinsic::arm64_neon_tbl4:
2146 return SelectTable(Node, 4, VT == MVT::v8i8 ? ARM64::TBLv8i8Four
2147 : ARM64::TBLv16i8Four,
2149 case Intrinsic::arm64_neon_tbx2:
2150 return SelectTable(Node, 2, VT == MVT::v8i8 ? ARM64::TBXv8i8Two
2151 : ARM64::TBXv16i8Two,
2153 case Intrinsic::arm64_neon_tbx3:
2154 return SelectTable(Node, 3, VT == MVT::v8i8 ? ARM64::TBXv8i8Three
2155 : ARM64::TBXv16i8Three,
2157 case Intrinsic::arm64_neon_tbx4:
2158 return SelectTable(Node, 4, VT == MVT::v8i8 ? ARM64::TBXv8i8Four
2159 : ARM64::TBXv16i8Four,
2161 case Intrinsic::arm64_neon_smull:
2162 case Intrinsic::arm64_neon_umull:
2163 if (SDNode *N = SelectMULLV64LaneV128(IntNo, Node))
2169 case ISD::INTRINSIC_VOID: {
2170 unsigned IntNo = cast<ConstantSDNode>(Node->getOperand(1))->getZExtValue();
2171 if (Node->getNumOperands() >= 3)
2172 VT = Node->getOperand(2)->getValueType(0);
2176 case Intrinsic::arm64_neon_st1x2: {
2177 if (VT == MVT::v8i8)
2178 return SelectStore(Node, 2, ARM64::ST1Twov8b);
2179 else if (VT == MVT::v16i8)
2180 return SelectStore(Node, 2, ARM64::ST1Twov16b);
2181 else if (VT == MVT::v4i16)
2182 return SelectStore(Node, 2, ARM64::ST1Twov4h);
2183 else if (VT == MVT::v8i16)
2184 return SelectStore(Node, 2, ARM64::ST1Twov8h);
2185 else if (VT == MVT::v2i32 || VT == MVT::v2f32)
2186 return SelectStore(Node, 2, ARM64::ST1Twov2s);
2187 else if (VT == MVT::v4i32 || VT == MVT::v4f32)
2188 return SelectStore(Node, 2, ARM64::ST1Twov4s);
2189 else if (VT == MVT::v2i64 || VT == MVT::v2f64)
2190 return SelectStore(Node, 2, ARM64::ST1Twov2d);
2191 else if (VT == MVT::v1i64 || VT == MVT::v1f64)
2192 return SelectStore(Node, 2, ARM64::ST1Twov1d);
2195 case Intrinsic::arm64_neon_st1x3: {
2196 if (VT == MVT::v8i8)
2197 return SelectStore(Node, 3, ARM64::ST1Threev8b);
2198 else if (VT == MVT::v16i8)
2199 return SelectStore(Node, 3, ARM64::ST1Threev16b);
2200 else if (VT == MVT::v4i16)
2201 return SelectStore(Node, 3, ARM64::ST1Threev4h);
2202 else if (VT == MVT::v8i16)
2203 return SelectStore(Node, 3, ARM64::ST1Threev8h);
2204 else if (VT == MVT::v2i32 || VT == MVT::v2f32)
2205 return SelectStore(Node, 3, ARM64::ST1Threev2s);
2206 else if (VT == MVT::v4i32 || VT == MVT::v4f32)
2207 return SelectStore(Node, 3, ARM64::ST1Threev4s);
2208 else if (VT == MVT::v2i64 || VT == MVT::v2f64)
2209 return SelectStore(Node, 3, ARM64::ST1Threev2d);
2210 else if (VT == MVT::v1i64 || VT == MVT::v1f64)
2211 return SelectStore(Node, 3, ARM64::ST1Threev1d);
2214 case Intrinsic::arm64_neon_st1x4: {
2215 if (VT == MVT::v8i8)
2216 return SelectStore(Node, 4, ARM64::ST1Fourv8b);
2217 else if (VT == MVT::v16i8)
2218 return SelectStore(Node, 4, ARM64::ST1Fourv16b);
2219 else if (VT == MVT::v4i16)
2220 return SelectStore(Node, 4, ARM64::ST1Fourv4h);
2221 else if (VT == MVT::v8i16)
2222 return SelectStore(Node, 4, ARM64::ST1Fourv8h);
2223 else if (VT == MVT::v2i32 || VT == MVT::v2f32)
2224 return SelectStore(Node, 4, ARM64::ST1Fourv2s);
2225 else if (VT == MVT::v4i32 || VT == MVT::v4f32)
2226 return SelectStore(Node, 4, ARM64::ST1Fourv4s);
2227 else if (VT == MVT::v2i64 || VT == MVT::v2f64)
2228 return SelectStore(Node, 4, ARM64::ST1Fourv2d);
2229 else if (VT == MVT::v1i64 || VT == MVT::v1f64)
2230 return SelectStore(Node, 4, ARM64::ST1Fourv1d);
2233 case Intrinsic::arm64_neon_st2: {
2234 if (VT == MVT::v8i8)
2235 return SelectStore(Node, 2, ARM64::ST2Twov8b);
2236 else if (VT == MVT::v16i8)
2237 return SelectStore(Node, 2, ARM64::ST2Twov16b);
2238 else if (VT == MVT::v4i16)
2239 return SelectStore(Node, 2, ARM64::ST2Twov4h);
2240 else if (VT == MVT::v8i16)
2241 return SelectStore(Node, 2, ARM64::ST2Twov8h);
2242 else if (VT == MVT::v2i32 || VT == MVT::v2f32)
2243 return SelectStore(Node, 2, ARM64::ST2Twov2s);
2244 else if (VT == MVT::v4i32 || VT == MVT::v4f32)
2245 return SelectStore(Node, 2, ARM64::ST2Twov4s);
2246 else if (VT == MVT::v2i64 || VT == MVT::v2f64)
2247 return SelectStore(Node, 2, ARM64::ST2Twov2d);
2248 else if (VT == MVT::v1i64 || VT == MVT::v1f64)
2249 return SelectStore(Node, 2, ARM64::ST1Twov1d);
2252 case Intrinsic::arm64_neon_st3: {
2253 if (VT == MVT::v8i8)
2254 return SelectStore(Node, 3, ARM64::ST3Threev8b);
2255 else if (VT == MVT::v16i8)
2256 return SelectStore(Node, 3, ARM64::ST3Threev16b);
2257 else if (VT == MVT::v4i16)
2258 return SelectStore(Node, 3, ARM64::ST3Threev4h);
2259 else if (VT == MVT::v8i16)
2260 return SelectStore(Node, 3, ARM64::ST3Threev8h);
2261 else if (VT == MVT::v2i32 || VT == MVT::v2f32)
2262 return SelectStore(Node, 3, ARM64::ST3Threev2s);
2263 else if (VT == MVT::v4i32 || VT == MVT::v4f32)
2264 return SelectStore(Node, 3, ARM64::ST3Threev4s);
2265 else if (VT == MVT::v2i64 || VT == MVT::v2f64)
2266 return SelectStore(Node, 3, ARM64::ST3Threev2d);
2267 else if (VT == MVT::v1i64 || VT == MVT::v1f64)
2268 return SelectStore(Node, 3, ARM64::ST1Threev1d);
2271 case Intrinsic::arm64_neon_st4: {
2272 if (VT == MVT::v8i8)
2273 return SelectStore(Node, 4, ARM64::ST4Fourv8b);
2274 else if (VT == MVT::v16i8)
2275 return SelectStore(Node, 4, ARM64::ST4Fourv16b);
2276 else if (VT == MVT::v4i16)
2277 return SelectStore(Node, 4, ARM64::ST4Fourv4h);
2278 else if (VT == MVT::v8i16)
2279 return SelectStore(Node, 4, ARM64::ST4Fourv8h);
2280 else if (VT == MVT::v2i32 || VT == MVT::v2f32)
2281 return SelectStore(Node, 4, ARM64::ST4Fourv2s);
2282 else if (VT == MVT::v4i32 || VT == MVT::v4f32)
2283 return SelectStore(Node, 4, ARM64::ST4Fourv4s);
2284 else if (VT == MVT::v2i64 || VT == MVT::v2f64)
2285 return SelectStore(Node, 4, ARM64::ST4Fourv2d);
2286 else if (VT == MVT::v1i64 || VT == MVT::v1f64)
2287 return SelectStore(Node, 4, ARM64::ST1Fourv1d);
2290 case Intrinsic::arm64_neon_st2lane: {
2291 if (VT == MVT::v16i8 || VT == MVT::v8i8)
2292 return SelectStoreLane(Node, 2, ARM64::ST2i8);
2293 else if (VT == MVT::v8i16 || VT == MVT::v4i16)
2294 return SelectStoreLane(Node, 2, ARM64::ST2i16);
2295 else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
2297 return SelectStoreLane(Node, 2, ARM64::ST2i32);
2298 else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
2300 return SelectStoreLane(Node, 2, ARM64::ST2i64);
2303 case Intrinsic::arm64_neon_st3lane: {
2304 if (VT == MVT::v16i8 || VT == MVT::v8i8)
2305 return SelectStoreLane(Node, 3, ARM64::ST3i8);
2306 else if (VT == MVT::v8i16 || VT == MVT::v4i16)
2307 return SelectStoreLane(Node, 3, ARM64::ST3i16);
2308 else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
2310 return SelectStoreLane(Node, 3, ARM64::ST3i32);
2311 else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
2313 return SelectStoreLane(Node, 3, ARM64::ST3i64);
2316 case Intrinsic::arm64_neon_st4lane: {
2317 if (VT == MVT::v16i8 || VT == MVT::v8i8)
2318 return SelectStoreLane(Node, 4, ARM64::ST4i8);
2319 else if (VT == MVT::v8i16 || VT == MVT::v4i16)
2320 return SelectStoreLane(Node, 4, ARM64::ST4i16);
2321 else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
2323 return SelectStoreLane(Node, 4, ARM64::ST4i32);
2324 else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
2326 return SelectStoreLane(Node, 4, ARM64::ST4i64);
2336 if (SDNode *I = SelectLIBM(Node))
2341 // Select the default instruction
2342 ResNode = SelectCode(Node);
2344 DEBUG(errs() << "=> ");
2345 if (ResNode == NULL || ResNode == Node)
2346 DEBUG(Node->dump(CurDAG));
2348 DEBUG(ResNode->dump(CurDAG));
2349 DEBUG(errs() << "\n");
2354 /// createARM64ISelDag - This pass converts a legalized DAG into a
2355 /// ARM64-specific DAG, ready for instruction scheduling.
2356 FunctionPass *llvm::createARM64ISelDag(ARM64TargetMachine &TM,
2357 CodeGenOpt::Level OptLevel) {
2358 return new ARM64DAGToDAGISel(TM, OptLevel);