+/// PairSRegs - Form a D register from a pair of S registers.
+///
+SDNode *ARMDAGToDAGISel::PairSRegs(EVT VT, SDValue V0, SDValue V1) {
+ DebugLoc dl = V0.getNode()->getDebugLoc();
+ SDValue SubReg0 = CurDAG->getTargetConstant(ARM::ssub_0, MVT::i32);
+ SDValue SubReg1 = CurDAG->getTargetConstant(ARM::ssub_1, MVT::i32);
+ const SDValue Ops[] = { V0, SubReg0, V1, SubReg1 };
+ return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops, 4);
+}
+
+/// PairDRegs - Form a quad register from a pair of D registers.
+///
+SDNode *ARMDAGToDAGISel::PairDRegs(EVT VT, SDValue V0, SDValue V1) {
+ DebugLoc dl = V0.getNode()->getDebugLoc();
+ SDValue SubReg0 = CurDAG->getTargetConstant(ARM::dsub_0, MVT::i32);
+ SDValue SubReg1 = CurDAG->getTargetConstant(ARM::dsub_1, MVT::i32);
+ const SDValue Ops[] = { V0, SubReg0, V1, SubReg1 };
+ return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops, 4);
+}
+
+/// PairQRegs - Form 4 consecutive D registers from a pair of Q registers.
+///
+SDNode *ARMDAGToDAGISel::PairQRegs(EVT VT, SDValue V0, SDValue V1) {
+ DebugLoc dl = V0.getNode()->getDebugLoc();
+ SDValue SubReg0 = CurDAG->getTargetConstant(ARM::qsub_0, MVT::i32);
+ SDValue SubReg1 = CurDAG->getTargetConstant(ARM::qsub_1, MVT::i32);
+ const SDValue Ops[] = { V0, SubReg0, V1, SubReg1 };
+ return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops, 4);
+}
+
+/// QuadSRegs - Form 4 consecutive S registers.
+///
+SDNode *ARMDAGToDAGISel::QuadSRegs(EVT VT, SDValue V0, SDValue V1,
+ SDValue V2, SDValue V3) {
+ DebugLoc dl = V0.getNode()->getDebugLoc();
+ SDValue SubReg0 = CurDAG->getTargetConstant(ARM::ssub_0, MVT::i32);
+ SDValue SubReg1 = CurDAG->getTargetConstant(ARM::ssub_1, MVT::i32);
+ SDValue SubReg2 = CurDAG->getTargetConstant(ARM::ssub_2, MVT::i32);
+ SDValue SubReg3 = CurDAG->getTargetConstant(ARM::ssub_3, MVT::i32);
+ const SDValue Ops[] = { V0, SubReg0, V1, SubReg1, V2, SubReg2, V3, SubReg3 };
+ return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops, 8);
+}
+
+/// QuadDRegs - Form 4 consecutive D registers.
+///
+SDNode *ARMDAGToDAGISel::QuadDRegs(EVT VT, SDValue V0, SDValue V1,
+ SDValue V2, SDValue V3) {
+ DebugLoc dl = V0.getNode()->getDebugLoc();
+ SDValue SubReg0 = CurDAG->getTargetConstant(ARM::dsub_0, MVT::i32);
+ SDValue SubReg1 = CurDAG->getTargetConstant(ARM::dsub_1, MVT::i32);
+ SDValue SubReg2 = CurDAG->getTargetConstant(ARM::dsub_2, MVT::i32);
+ SDValue SubReg3 = CurDAG->getTargetConstant(ARM::dsub_3, MVT::i32);
+ const SDValue Ops[] = { V0, SubReg0, V1, SubReg1, V2, SubReg2, V3, SubReg3 };
+ return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops, 8);
+}
+
+/// QuadQRegs - Form 4 consecutive Q registers.
+///
+SDNode *ARMDAGToDAGISel::QuadQRegs(EVT VT, SDValue V0, SDValue V1,
+ SDValue V2, SDValue V3) {
+ DebugLoc dl = V0.getNode()->getDebugLoc();
+ SDValue SubReg0 = CurDAG->getTargetConstant(ARM::qsub_0, MVT::i32);
+ SDValue SubReg1 = CurDAG->getTargetConstant(ARM::qsub_1, MVT::i32);
+ SDValue SubReg2 = CurDAG->getTargetConstant(ARM::qsub_2, MVT::i32);
+ SDValue SubReg3 = CurDAG->getTargetConstant(ARM::qsub_3, MVT::i32);
+ const SDValue Ops[] = { V0, SubReg0, V1, SubReg1, V2, SubReg2, V3, SubReg3 };
+ return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops, 8);
+}
+
+/// OctoDRegs - Form 8 consecutive D registers.
+///
+SDNode *ARMDAGToDAGISel::OctoDRegs(EVT VT, SDValue V0, SDValue V1,
+ SDValue V2, SDValue V3,
+ SDValue V4, SDValue V5,
+ SDValue V6, SDValue V7) {
+ DebugLoc dl = V0.getNode()->getDebugLoc();
+ SDValue SubReg0 = CurDAG->getTargetConstant(ARM::dsub_0, MVT::i32);
+ SDValue SubReg1 = CurDAG->getTargetConstant(ARM::dsub_1, MVT::i32);
+ SDValue SubReg2 = CurDAG->getTargetConstant(ARM::dsub_2, MVT::i32);
+ SDValue SubReg3 = CurDAG->getTargetConstant(ARM::dsub_3, MVT::i32);
+ SDValue SubReg4 = CurDAG->getTargetConstant(ARM::dsub_4, MVT::i32);
+ SDValue SubReg5 = CurDAG->getTargetConstant(ARM::dsub_5, MVT::i32);
+ SDValue SubReg6 = CurDAG->getTargetConstant(ARM::dsub_6, MVT::i32);
+ SDValue SubReg7 = CurDAG->getTargetConstant(ARM::dsub_7, MVT::i32);
+ const SDValue Ops[] ={ V0, SubReg0, V1, SubReg1, V2, SubReg2, V3, SubReg3,
+ V4, SubReg4, V5, SubReg5, V6, SubReg6, V7, SubReg7 };
+ return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops, 16);
+}
+
+/// GetNEONSubregVT - Given a type for a 128-bit NEON vector, return the type
+/// for a 64-bit subregister of the vector.
+static EVT GetNEONSubregVT(EVT VT) {
+ switch (VT.getSimpleVT().SimpleTy) {
+ default: llvm_unreachable("unhandled NEON type");
+ case MVT::v16i8: return MVT::v8i8;
+ case MVT::v8i16: return MVT::v4i16;
+ case MVT::v4f32: return MVT::v2f32;
+ case MVT::v4i32: return MVT::v2i32;
+ case MVT::v2i64: return MVT::v1i64;
+ }
+}
+
+SDNode *ARMDAGToDAGISel::SelectVLD(SDNode *N, unsigned NumVecs,
+ unsigned *DOpcodes, unsigned *QOpcodes0,
+ unsigned *QOpcodes1) {
+ assert(NumVecs >= 1 && NumVecs <= 4 && "VLD NumVecs out-of-range");
+ DebugLoc dl = N->getDebugLoc();
+
+ SDValue MemAddr, Align;
+ if (!SelectAddrMode6(N, N->getOperand(2), MemAddr, Align))
+ return NULL;
+
+ SDValue Chain = N->getOperand(0);
+ EVT VT = N->getValueType(0);
+ bool is64BitVector = VT.is64BitVector();
+
+ unsigned OpcodeIndex;
+ switch (VT.getSimpleVT().SimpleTy) {
+ default: llvm_unreachable("unhandled vld type");
+ // Double-register operations:
+ case MVT::v8i8: OpcodeIndex = 0; break;
+ case MVT::v4i16: OpcodeIndex = 1; break;
+ case MVT::v2f32:
+ case MVT::v2i32: OpcodeIndex = 2; break;
+ case MVT::v1i64: OpcodeIndex = 3; break;
+ // Quad-register operations:
+ case MVT::v16i8: OpcodeIndex = 0; break;
+ case MVT::v8i16: OpcodeIndex = 1; break;
+ case MVT::v4f32:
+ case MVT::v4i32: OpcodeIndex = 2; break;
+ case MVT::v2i64: OpcodeIndex = 3;
+ assert(NumVecs == 1 && "v2i64 type only supported for VLD1");
+ break;
+ }
+
+ SDValue Pred = getAL(CurDAG);
+ SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
+ if (is64BitVector) {
+ unsigned Opc = DOpcodes[OpcodeIndex];
+ const SDValue Ops[] = { MemAddr, Align, Pred, Reg0, Chain };
+ std::vector<EVT> ResTys(NumVecs, VT);
+ ResTys.push_back(MVT::Other);
+ SDNode *VLd = CurDAG->getMachineNode(Opc, dl, ResTys, Ops, 5);
+ if (NumVecs < 2)
+ return VLd;
+
+ SDValue RegSeq;
+ SDValue V0 = SDValue(VLd, 0);
+ SDValue V1 = SDValue(VLd, 1);
+
+ // Form a REG_SEQUENCE to force register allocation.
+ if (NumVecs == 2)
+ RegSeq = SDValue(PairDRegs(MVT::v2i64, V0, V1), 0);
+ else {
+ SDValue V2 = SDValue(VLd, 2);
+ // If it's a vld3, form a quad D-register but discard the last part.
+ SDValue V3 = (NumVecs == 3)
+ ? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF,dl,VT), 0)
+ : SDValue(VLd, 3);
+ RegSeq = SDValue(QuadDRegs(MVT::v4i64, V0, V1, V2, V3), 0);
+ }
+
+ assert(ARM::dsub_7 == ARM::dsub_0+7 && "Unexpected subreg numbering");
+ for (unsigned Vec = 0; Vec < NumVecs; ++Vec) {
+ SDValue D = CurDAG->getTargetExtractSubreg(ARM::dsub_0+Vec,
+ dl, VT, RegSeq);
+ ReplaceUses(SDValue(N, Vec), D);
+ }
+ ReplaceUses(SDValue(N, NumVecs), SDValue(VLd, NumVecs));
+ return NULL;
+ }
+
+ EVT RegVT = GetNEONSubregVT(VT);
+ if (NumVecs <= 2) {
+ // Quad registers are directly supported for VLD1 and VLD2,
+ // loading pairs of D regs.
+ unsigned Opc = QOpcodes0[OpcodeIndex];
+ const SDValue Ops[] = { MemAddr, Align, Pred, Reg0, Chain };
+ std::vector<EVT> ResTys(2 * NumVecs, RegVT);
+ ResTys.push_back(MVT::Other);
+ SDNode *VLd = CurDAG->getMachineNode(Opc, dl, ResTys, Ops, 5);
+ Chain = SDValue(VLd, 2 * NumVecs);
+
+ // Combine the even and odd subregs to produce the result.
+ if (NumVecs == 1) {
+ SDNode *Q = PairDRegs(VT, SDValue(VLd, 0), SDValue(VLd, 1));
+ ReplaceUses(SDValue(N, 0), SDValue(Q, 0));
+ } else {
+ SDValue QQ = SDValue(QuadDRegs(MVT::v4i64,
+ SDValue(VLd, 0), SDValue(VLd, 1),
+ SDValue(VLd, 2), SDValue(VLd, 3)), 0);
+ SDValue Q0 = CurDAG->getTargetExtractSubreg(ARM::qsub_0, dl, VT, QQ);
+ SDValue Q1 = CurDAG->getTargetExtractSubreg(ARM::qsub_1, dl, VT, QQ);
+ ReplaceUses(SDValue(N, 0), Q0);
+ ReplaceUses(SDValue(N, 1), Q1);
+ }
+ } else {
+ // Otherwise, quad registers are loaded with two separate instructions,
+ // where one loads the even registers and the other loads the odd registers.
+
+ std::vector<EVT> ResTys(NumVecs, RegVT);
+ ResTys.push_back(MemAddr.getValueType());
+ ResTys.push_back(MVT::Other);
+
+ // Load the even subregs.
+ unsigned Opc = QOpcodes0[OpcodeIndex];
+ const SDValue OpsA[] = { MemAddr, Align, Reg0, Pred, Reg0, Chain };
+ SDNode *VLdA = CurDAG->getMachineNode(Opc, dl, ResTys, OpsA, 6);
+ Chain = SDValue(VLdA, NumVecs+1);
+
+ // Load the odd subregs.
+ Opc = QOpcodes1[OpcodeIndex];
+ const SDValue OpsB[] = { SDValue(VLdA, NumVecs),
+ Align, Reg0, Pred, Reg0, Chain };
+ SDNode *VLdB = CurDAG->getMachineNode(Opc, dl, ResTys, OpsB, 6);
+ Chain = SDValue(VLdB, NumVecs+1);
+
+ SDValue V0 = SDValue(VLdA, 0);
+ SDValue V1 = SDValue(VLdB, 0);
+ SDValue V2 = SDValue(VLdA, 1);
+ SDValue V3 = SDValue(VLdB, 1);
+ SDValue V4 = SDValue(VLdA, 2);
+ SDValue V5 = SDValue(VLdB, 2);
+ SDValue V6 = (NumVecs == 3)
+ ? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF,dl,RegVT), 0)
+ : SDValue(VLdA, 3);
+ SDValue V7 = (NumVecs == 3)
+ ? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF,dl,RegVT), 0)
+ : SDValue(VLdB, 3);
+ SDValue RegSeq = SDValue(OctoDRegs(MVT::v8i64, V0, V1, V2, V3,
+ V4, V5, V6, V7), 0);
+
+ // Extract out the 3 / 4 Q registers.
+ assert(ARM::qsub_3 == ARM::qsub_0+3 && "Unexpected subreg numbering");
+ for (unsigned Vec = 0; Vec < NumVecs; ++Vec) {
+ SDValue Q = CurDAG->getTargetExtractSubreg(ARM::qsub_0+Vec,
+ dl, VT, RegSeq);
+ ReplaceUses(SDValue(N, Vec), Q);
+ }
+ }
+ ReplaceUses(SDValue(N, NumVecs), Chain);
+ return NULL;
+}
+
+SDNode *ARMDAGToDAGISel::SelectVST(SDNode *N, unsigned NumVecs,
+ unsigned *DOpcodes, unsigned *QOpcodes0,
+ unsigned *QOpcodes1) {
+ assert(NumVecs >=1 && NumVecs <= 4 && "VST NumVecs out-of-range");
+ DebugLoc dl = N->getDebugLoc();
+
+ SDValue MemAddr, Align;
+ if (!SelectAddrMode6(N, N->getOperand(2), MemAddr, Align))
+ return NULL;
+
+ SDValue Chain = N->getOperand(0);
+ EVT VT = N->getOperand(3).getValueType();
+ bool is64BitVector = VT.is64BitVector();
+
+ unsigned OpcodeIndex;
+ switch (VT.getSimpleVT().SimpleTy) {
+ default: llvm_unreachable("unhandled vst type");
+ // Double-register operations:
+ case MVT::v8i8: OpcodeIndex = 0; break;
+ case MVT::v4i16: OpcodeIndex = 1; break;
+ case MVT::v2f32:
+ case MVT::v2i32: OpcodeIndex = 2; break;
+ case MVT::v1i64: OpcodeIndex = 3; break;
+ // Quad-register operations:
+ case MVT::v16i8: OpcodeIndex = 0; break;
+ case MVT::v8i16: OpcodeIndex = 1; break;
+ case MVT::v4f32:
+ case MVT::v4i32: OpcodeIndex = 2; break;
+ case MVT::v2i64: OpcodeIndex = 3;
+ assert(NumVecs == 1 && "v2i64 type only supported for VST1");
+ break;
+ }
+
+ SDValue Pred = getAL(CurDAG);
+ SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
+
+ SmallVector<SDValue, 10> Ops;
+ Ops.push_back(MemAddr);
+ Ops.push_back(Align);
+
+ if (is64BitVector) {
+ if (NumVecs >= 2) {
+ SDValue RegSeq;
+ SDValue V0 = N->getOperand(0+3);
+ SDValue V1 = N->getOperand(1+3);
+
+ // Form a REG_SEQUENCE to force register allocation.
+ if (NumVecs == 2)
+ RegSeq = SDValue(PairDRegs(MVT::v2i64, V0, V1), 0);
+ else {
+ SDValue V2 = N->getOperand(2+3);
+ // If it's a vld3, form a quad D-register and leave the last part as
+ // an undef.
+ SDValue V3 = (NumVecs == 3)
+ ? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF,dl,VT), 0)
+ : N->getOperand(3+3);
+ RegSeq = SDValue(QuadDRegs(MVT::v4i64, V0, V1, V2, V3), 0);
+ }
+
+ // Now extract the D registers back out.
+ Ops.push_back(CurDAG->getTargetExtractSubreg(ARM::dsub_0, dl, VT,
+ RegSeq));
+ Ops.push_back(CurDAG->getTargetExtractSubreg(ARM::dsub_1, dl, VT,
+ RegSeq));
+ if (NumVecs > 2)
+ Ops.push_back(CurDAG->getTargetExtractSubreg(ARM::dsub_2, dl, VT,
+ RegSeq));
+ if (NumVecs > 3)
+ Ops.push_back(CurDAG->getTargetExtractSubreg(ARM::dsub_3, dl, VT,
+ RegSeq));
+ } else {
+ for (unsigned Vec = 0; Vec < NumVecs; ++Vec)
+ Ops.push_back(N->getOperand(Vec+3));
+ }
+ Ops.push_back(Pred);
+ Ops.push_back(Reg0); // predicate register
+ Ops.push_back(Chain);
+ unsigned Opc = DOpcodes[OpcodeIndex];
+ return CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops.data(), NumVecs+5);
+ }
+
+ EVT RegVT = GetNEONSubregVT(VT);
+ if (NumVecs <= 2) {
+ // Quad registers are directly supported for VST1 and VST2,
+ // storing pairs of D regs.
+ unsigned Opc = QOpcodes0[OpcodeIndex];
+ if (NumVecs == 2) {
+ // First extract the pair of Q registers.
+ SDValue Q0 = N->getOperand(3);
+ SDValue Q1 = N->getOperand(4);
+
+ // Form a QQ register.
+ SDValue QQ = SDValue(PairQRegs(MVT::v4i64, Q0, Q1), 0);
+
+ // Now extract the D registers back out.
+ Ops.push_back(CurDAG->getTargetExtractSubreg(ARM::dsub_0, dl, RegVT,
+ QQ));
+ Ops.push_back(CurDAG->getTargetExtractSubreg(ARM::dsub_1, dl, RegVT,
+ QQ));
+ Ops.push_back(CurDAG->getTargetExtractSubreg(ARM::dsub_2, dl, RegVT,
+ QQ));
+ Ops.push_back(CurDAG->getTargetExtractSubreg(ARM::dsub_3, dl, RegVT,
+ QQ));
+ Ops.push_back(Pred);
+ Ops.push_back(Reg0); // predicate register
+ Ops.push_back(Chain);
+ return CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops.data(), 5 + 4);
+ } else {
+ for (unsigned Vec = 0; Vec < NumVecs; ++Vec) {
+ Ops.push_back(CurDAG->getTargetExtractSubreg(ARM::dsub_0, dl, RegVT,
+ N->getOperand(Vec+3)));
+ Ops.push_back(CurDAG->getTargetExtractSubreg(ARM::dsub_1, dl, RegVT,
+ N->getOperand(Vec+3)));
+ }
+ Ops.push_back(Pred);
+ Ops.push_back(Reg0); // predicate register
+ Ops.push_back(Chain);
+ return CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops.data(),
+ 5 + 2 * NumVecs);
+ }
+ }
+
+ // Otherwise, quad registers are stored with two separate instructions,
+ // where one stores the even registers and the other stores the odd registers.
+
+ // Form the QQQQ REG_SEQUENCE.
+ SDValue V[8];
+ for (unsigned Vec = 0, i = 0; Vec < NumVecs; ++Vec, i+=2) {
+ V[i] = CurDAG->getTargetExtractSubreg(ARM::dsub_0, dl, RegVT,
+ N->getOperand(Vec+3));
+ V[i+1] = CurDAG->getTargetExtractSubreg(ARM::dsub_1, dl, RegVT,
+ N->getOperand(Vec+3));
+ }
+ if (NumVecs == 3)
+ V[6] = V[7] = SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF,
+ dl, RegVT), 0);
+
+ SDValue RegSeq = SDValue(OctoDRegs(MVT::v8i64, V[0], V[1], V[2], V[3],
+ V[4], V[5], V[6], V[7]), 0);
+
+ // Store the even D registers.
+ assert(ARM::dsub_7 == ARM::dsub_0+7 && "Unexpected subreg numbering");
+ Ops.push_back(Reg0); // post-access address offset
+ for (unsigned Vec = 0; Vec < NumVecs; ++Vec)
+ Ops.push_back(CurDAG->getTargetExtractSubreg(ARM::dsub_0+Vec*2, dl,
+ RegVT, RegSeq));
+ Ops.push_back(Pred);
+ Ops.push_back(Reg0); // predicate register
+ Ops.push_back(Chain);
+ unsigned Opc = QOpcodes0[OpcodeIndex];
+ SDNode *VStA = CurDAG->getMachineNode(Opc, dl, MemAddr.getValueType(),
+ MVT::Other, Ops.data(), NumVecs+6);
+ Chain = SDValue(VStA, 1);
+
+ // Store the odd D registers.
+ Ops[0] = SDValue(VStA, 0); // MemAddr
+ for (unsigned Vec = 0; Vec < NumVecs; ++Vec)
+ Ops[Vec+3] = CurDAG->getTargetExtractSubreg(ARM::dsub_1+Vec*2, dl,
+ RegVT, RegSeq);
+ Ops[NumVecs+5] = Chain;
+ Opc = QOpcodes1[OpcodeIndex];
+ SDNode *VStB = CurDAG->getMachineNode(Opc, dl, MemAddr.getValueType(),
+ MVT::Other, Ops.data(), NumVecs+6);
+ Chain = SDValue(VStB, 1);
+ ReplaceUses(SDValue(N, 0), Chain);
+ return NULL;
+}
+
+SDNode *ARMDAGToDAGISel::SelectVLDSTLane(SDNode *N, bool IsLoad,
+ unsigned NumVecs, unsigned *DOpcodes,
+ unsigned *QOpcodes0,
+ unsigned *QOpcodes1) {
+ assert(NumVecs >=2 && NumVecs <= 4 && "VLDSTLane NumVecs out-of-range");