//
//===----------------------------------------------------------------------===//
+#define DEBUG_TYPE "arm-isel"
#include "ARM.h"
#include "ARMAddressingModes.h"
-#include "ARMISelLowering.h"
#include "ARMTargetMachine.h"
#include "llvm/CallingConv.h"
#include "llvm/Constants.h"
#include "llvm/CodeGen/SelectionDAGISel.h"
#include "llvm/Target/TargetLowering.h"
#include "llvm/Target/TargetOptions.h"
+#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Compiler.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
using namespace llvm;
+static cl::opt<bool>
+DisableShifterOp("disable-shifter-op", cl::Hidden,
+ cl::desc("Disable isel of shifter-op"),
+ cl::init(false));
+
//===--------------------------------------------------------------------===//
/// ARMDAGToDAGISel - ARM specific code to select ARM machine
/// instructions for SelectionDAG operations.
}
SDNode *Select(SDNode *N);
- virtual void InstructionSelect();
+
bool SelectShifterOperandReg(SDNode *Op, SDValue N, SDValue &A,
SDValue &B, SDValue &C);
bool SelectAddrMode2(SDNode *Op, SDValue N, SDValue &Base,
SDValue &Mode);
bool SelectAddrMode5(SDNode *Op, SDValue N, SDValue &Base,
SDValue &Offset);
- bool SelectAddrMode6(SDNode *Op, SDValue N, SDValue &Addr, SDValue &Update,
- SDValue &Opc, SDValue &Align);
+ bool SelectAddrMode6(SDNode *Op, SDValue N, SDValue &Addr, SDValue &Align);
bool SelectAddrModePC(SDNode *Op, SDValue N, SDValue &Offset,
SDValue &Label);
SDValue &OffImm);
bool SelectT2AddrModeImm8Offset(SDNode *Op, SDValue N,
SDValue &OffImm);
- bool SelectT2AddrModeImm8s4(SDNode *Op, SDValue N, SDValue &Base,
- SDValue &OffImm);
bool SelectT2AddrModeSoReg(SDNode *Op, SDValue N, SDValue &Base,
SDValue &OffReg, SDValue &ShImm);
+ inline bool Pred_so_imm(SDNode *inN) const {
+ ConstantSDNode *N = cast<ConstantSDNode>(inN);
+ return ARM_AM::getSOImmVal(N->getZExtValue()) != -1;
+ }
+
+ inline bool Pred_t2_so_imm(SDNode *inN) const {
+ ConstantSDNode *N = cast<ConstantSDNode>(inN);
+ return ARM_AM::getT2SOImmVal(N->getZExtValue()) != -1;
+ }
+
// Include the pieces autogenerated from the target description.
#include "ARMGenDAGISel.inc"
SDNode *SelectARMIndexedLoad(SDNode *N);
SDNode *SelectT2IndexedLoad(SDNode *N);
- /// SelectDYN_ALLOC - Select dynamic alloc for Thumb.
- SDNode *SelectDYN_ALLOC(SDNode *N);
-
- /// SelectVLD - Select NEON load intrinsics. NumVecs should
- /// be 2, 3 or 4. The opcode arrays specify the instructions used for
+ /// SelectVLD - Select NEON load intrinsics. NumVecs should be
+ /// 1, 2, 3 or 4. The opcode arrays specify the instructions used for
/// loads of D registers and even subregs and odd subregs of Q registers.
- /// For NumVecs == 2, QOpcodes1 is not used.
+ /// For NumVecs <= 2, QOpcodes1 is not used.
SDNode *SelectVLD(SDNode *N, unsigned NumVecs, unsigned *DOpcodes,
unsigned *QOpcodes0, unsigned *QOpcodes1);
/// SelectVST - Select NEON store intrinsics. NumVecs should
- /// be 2, 3 or 4. The opcode arrays specify the instructions used for
+ /// be 1, 2, 3 or 4. The opcode arrays specify the instructions used for
/// stores of D registers and even subregs and odd subregs of Q registers.
- /// For NumVecs == 2, QOpcodes1 is not used.
+ /// For NumVecs <= 2, QOpcodes1 is not used.
SDNode *SelectVST(SDNode *N, unsigned NumVecs, unsigned *DOpcodes,
unsigned *QOpcodes0, unsigned *QOpcodes1);
/// SelectVLDSTLane - Select NEON load/store lane intrinsics. NumVecs should
/// be 2, 3 or 4. The opcode arrays specify the instructions used for
- /// load/store of D registers and even subregs and odd subregs of Q registers.
+ /// load/store of D registers and Q registers.
SDNode *SelectVLDSTLane(SDNode *N, bool IsLoad, unsigned NumVecs,
- unsigned *DOpcodes, unsigned *QOpcodes0,
- unsigned *QOpcodes1);
+ unsigned *DOpcodes, unsigned *QOpcodes);
+
+ /// SelectVTBL - Select NEON VTBL and VTBX intrinsics. NumVecs should be 2,
+ /// 3 or 4. These are custom-selected so that a REG_SEQUENCE can be
+ /// generated to force the table registers to be consecutive.
+ SDNode *SelectVTBL(SDNode *N, bool IsExt, unsigned NumVecs, unsigned Opc);
/// SelectV6T2BitfieldExtractOp - Select SBFX/UBFX instructions for ARM.
- SDNode *SelectV6T2BitfieldExtractOp(SDNode *N, unsigned Opc);
+ SDNode *SelectV6T2BitfieldExtractOp(SDNode *N, bool isSigned);
/// SelectCMOVOp - Select CMOV instructions for ARM.
SDNode *SelectCMOVOp(SDNode *N);
ARMCC::CondCodes CCVal, SDValue CCR,
SDValue InFlag);
+ SDNode *SelectConcatVector(SDNode *N);
+
/// SelectInlineAsmMemoryOperand - Implement addressing mode selection for
/// inline asm expressions.
virtual bool SelectInlineAsmMemoryOperand(const SDValue &Op,
char ConstraintCode,
std::vector<SDValue> &OutOps);
- /// PairDRegs - Insert a pair of double registers into an implicit def to
- /// form a quad register.
+ // Form pairs of consecutive S, D, or Q registers.
+ SDNode *PairSRegs(EVT VT, SDValue V0, SDValue V1);
SDNode *PairDRegs(EVT VT, SDValue V0, SDValue V1);
+ SDNode *PairQRegs(EVT VT, SDValue V0, SDValue V1);
+
+ // Form sequences of 4 consecutive S, D, or Q registers.
+ SDNode *QuadSRegs(EVT VT, SDValue V0, SDValue V1, SDValue V2, SDValue V3);
+ SDNode *QuadDRegs(EVT VT, SDValue V0, SDValue V1, SDValue V2, SDValue V3);
+ SDNode *QuadQRegs(EVT VT, SDValue V0, SDValue V1, SDValue V2, SDValue V3);
};
}
}
-void ARMDAGToDAGISel::InstructionSelect() {
- SelectRoot(*CurDAG);
- CurDAG->RemoveDeadNodes();
-}
-
bool ARMDAGToDAGISel::SelectShifterOperandReg(SDNode *Op,
SDValue N,
SDValue &BaseReg,
SDValue &ShReg,
SDValue &Opc) {
+ if (DisableShifterOp)
+ return false;
+
ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(N);
// Don't match base register only case. That is matched to a separate
bool ARMDAGToDAGISel::SelectAddrMode4(SDNode *Op, SDValue N,
SDValue &Addr, SDValue &Mode) {
Addr = N;
- Mode = CurDAG->getTargetConstant(0, MVT::i32);
+ Mode = CurDAG->getTargetConstant(ARM_AM::getAM4ModeImm(ARM_AM::ia), MVT::i32);
return true;
}
}
bool ARMDAGToDAGISel::SelectAddrMode6(SDNode *Op, SDValue N,
- SDValue &Addr, SDValue &Update,
- SDValue &Opc, SDValue &Align) {
+ SDValue &Addr, SDValue &Align) {
Addr = N;
- // Default to no writeback.
- Update = CurDAG->getRegister(0, MVT::i32);
- Opc = CurDAG->getTargetConstant(ARM_AM::getAM6Opc(false), MVT::i32);
// Default to no alignment.
Align = CurDAG->getTargetConstant(0, MVT::i32);
return true;
bool ARMDAGToDAGISel::SelectThumbAddrModeRR(SDNode *Op, SDValue N,
SDValue &Base, SDValue &Offset){
// FIXME dl should come from the parent load or store, not the address
- DebugLoc dl = Op->getDebugLoc();
if (N.getOpcode() != ISD::ADD) {
ConstantSDNode *NC = dyn_cast<ConstantSDNode>(N);
- if (!NC || NC->getZExtValue() != 0)
+ if (!NC || !NC->isNullValue())
return false;
Base = Offset = N;
bool ARMDAGToDAGISel::SelectT2ShifterOperandReg(SDNode *Op, SDValue N,
SDValue &BaseReg,
SDValue &Opc) {
+ if (DisableShifterOp)
+ return false;
+
ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(N);
// Don't match base register only case. That is matched to a separate
return false;
}
-bool ARMDAGToDAGISel::SelectT2AddrModeImm8s4(SDNode *Op, SDValue N,
- SDValue &Base, SDValue &OffImm) {
- if (N.getOpcode() == ISD::ADD) {
- if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
- int RHSC = (int)RHS->getZExtValue();
- if (((RHSC & 0x3) == 0) &&
- ((RHSC >= 0 && RHSC < 0x400) || (RHSC < 0 && RHSC > -0x400))) { // 8 bits.
- Base = N.getOperand(0);
- OffImm = CurDAG->getTargetConstant(RHSC, MVT::i32);
- return true;
- }
- }
- } else if (N.getOpcode() == ISD::SUB) {
- if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
- int RHSC = (int)RHS->getZExtValue();
- if (((RHSC & 0x3) == 0) && (RHSC >= 0 && RHSC < 0x400)) { // 8 bits.
- Base = N.getOperand(0);
- OffImm = CurDAG->getTargetConstant(-RHSC, MVT::i32);
- return true;
- }
- }
- }
-
- return false;
-}
-
bool ARMDAGToDAGISel::SelectT2AddrModeSoReg(SDNode *Op, SDValue N,
SDValue &Base,
SDValue &OffReg, SDValue &ShImm) {
return NULL;
}
-SDNode *ARMDAGToDAGISel::SelectDYN_ALLOC(SDNode *N) {
- DebugLoc dl = N->getDebugLoc();
- EVT VT = N->getValueType(0);
- SDValue Chain = N->getOperand(0);
- SDValue Size = N->getOperand(1);
- SDValue Align = N->getOperand(2);
- SDValue SP = CurDAG->getRegister(ARM::SP, MVT::i32);
- int32_t AlignVal = cast<ConstantSDNode>(Align)->getSExtValue();
- if (AlignVal < 0)
- // We need to align the stack. Use Thumb1 tAND which is the only thumb
- // instruction that can read and write SP. This matches to a pseudo
- // instruction that has a chain to ensure the result is written back to
- // the stack pointer.
- SP = SDValue(CurDAG->getMachineNode(ARM::tANDsp, dl, VT, SP, Align), 0);
-
- bool isC = isa<ConstantSDNode>(Size);
- uint32_t C = isC ? cast<ConstantSDNode>(Size)->getZExtValue() : ~0UL;
- // Handle the most common case for both Thumb1 and Thumb2:
- // tSUBspi - immediate is between 0 ... 508 inclusive.
- if (C <= 508 && ((C & 3) == 0))
- // FIXME: tSUBspi encode scale 4 implicitly.
- return CurDAG->SelectNodeTo(N, ARM::tSUBspi_, VT, MVT::Other, SP,
- CurDAG->getTargetConstant(C/4, MVT::i32),
- Chain);
-
- if (Subtarget->isThumb1Only()) {
- // Use tADDspr since Thumb1 does not have a sub r, sp, r. ARMISelLowering
- // should have negated the size operand already. FIXME: We can't insert
- // new target independent node at this stage so we are forced to negate
- // it earlier. Is there a better solution?
- return CurDAG->SelectNodeTo(N, ARM::tADDspr_, VT, MVT::Other, SP, Size,
- Chain);
- } else if (Subtarget->isThumb2()) {
- if (isC && Predicate_t2_so_imm(Size.getNode())) {
- // t2SUBrSPi
- SDValue Ops[] = { SP, CurDAG->getTargetConstant(C, MVT::i32), Chain };
- return CurDAG->SelectNodeTo(N, ARM::t2SUBrSPi_, VT, MVT::Other, Ops, 3);
- } else if (isC && Predicate_imm0_4095(Size.getNode())) {
- // t2SUBrSPi12
- SDValue Ops[] = { SP, CurDAG->getTargetConstant(C, MVT::i32), Chain };
- return CurDAG->SelectNodeTo(N, ARM::t2SUBrSPi12_, VT, MVT::Other, Ops, 3);
- } else {
- // t2SUBrSPs
- SDValue Ops[] = { SP, Size,
- getI32Imm(ARM_AM::getSORegOpc(ARM_AM::lsl,0)), Chain };
- return CurDAG->SelectNodeTo(N, ARM::t2SUBrSPs_, VT, MVT::Other, Ops, 4);
- }
- }
-
- // FIXME: Add ADD / SUB sp instructions for ARM.
- return 0;
+/// PairSRegs - Form a D register from a pair of S registers.
+///
+SDNode *ARMDAGToDAGISel::PairSRegs(EVT VT, SDValue V0, SDValue V1) {
+ DebugLoc dl = V0.getNode()->getDebugLoc();
+ SDValue SubReg0 = CurDAG->getTargetConstant(ARM::ssub_0, MVT::i32);
+ SDValue SubReg1 = CurDAG->getTargetConstant(ARM::ssub_1, MVT::i32);
+ const SDValue Ops[] = { V0, SubReg0, V1, SubReg1 };
+ return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops, 4);
}
-/// PairDRegs - Insert a pair of double registers into an implicit def to
-/// form a quad register.
+/// PairDRegs - Form a quad register from a pair of D registers.
+///
SDNode *ARMDAGToDAGISel::PairDRegs(EVT VT, SDValue V0, SDValue V1) {
DebugLoc dl = V0.getNode()->getDebugLoc();
- SDValue Undef =
- SDValue(CurDAG->getMachineNode(TargetInstrInfo::IMPLICIT_DEF, dl, VT), 0);
- SDValue SubReg0 = CurDAG->getTargetConstant(ARM::DSUBREG_0, MVT::i32);
- SDValue SubReg1 = CurDAG->getTargetConstant(ARM::DSUBREG_1, MVT::i32);
- SDNode *Pair = CurDAG->getMachineNode(TargetInstrInfo::INSERT_SUBREG, dl,
- VT, Undef, V0, SubReg0);
- return CurDAG->getMachineNode(TargetInstrInfo::INSERT_SUBREG, dl,
- VT, SDValue(Pair, 0), V1, SubReg1);
+ SDValue SubReg0 = CurDAG->getTargetConstant(ARM::dsub_0, MVT::i32);
+ SDValue SubReg1 = CurDAG->getTargetConstant(ARM::dsub_1, MVT::i32);
+ const SDValue Ops[] = { V0, SubReg0, V1, SubReg1 };
+ return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops, 4);
}
-/// GetNEONSubregVT - Given a type for a 128-bit NEON vector, return the type
-/// for a 64-bit subregister of the vector.
-static EVT GetNEONSubregVT(EVT VT) {
- switch (VT.getSimpleVT().SimpleTy) {
- default: llvm_unreachable("unhandled NEON type");
- case MVT::v16i8: return MVT::v8i8;
- case MVT::v8i16: return MVT::v4i16;
- case MVT::v4f32: return MVT::v2f32;
- case MVT::v4i32: return MVT::v2i32;
- case MVT::v2i64: return MVT::v1i64;
- }
+/// PairQRegs - Form 4 consecutive D registers from a pair of Q registers.
+///
+SDNode *ARMDAGToDAGISel::PairQRegs(EVT VT, SDValue V0, SDValue V1) {
+ DebugLoc dl = V0.getNode()->getDebugLoc();
+ SDValue SubReg0 = CurDAG->getTargetConstant(ARM::qsub_0, MVT::i32);
+ SDValue SubReg1 = CurDAG->getTargetConstant(ARM::qsub_1, MVT::i32);
+ const SDValue Ops[] = { V0, SubReg0, V1, SubReg1 };
+ return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops, 4);
+}
+
+/// QuadSRegs - Form 4 consecutive S registers.
+///
+SDNode *ARMDAGToDAGISel::QuadSRegs(EVT VT, SDValue V0, SDValue V1,
+ SDValue V2, SDValue V3) {
+ DebugLoc dl = V0.getNode()->getDebugLoc();
+ SDValue SubReg0 = CurDAG->getTargetConstant(ARM::ssub_0, MVT::i32);
+ SDValue SubReg1 = CurDAG->getTargetConstant(ARM::ssub_1, MVT::i32);
+ SDValue SubReg2 = CurDAG->getTargetConstant(ARM::ssub_2, MVT::i32);
+ SDValue SubReg3 = CurDAG->getTargetConstant(ARM::ssub_3, MVT::i32);
+ const SDValue Ops[] = { V0, SubReg0, V1, SubReg1, V2, SubReg2, V3, SubReg3 };
+ return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops, 8);
+}
+
+/// QuadDRegs - Form 4 consecutive D registers.
+///
+SDNode *ARMDAGToDAGISel::QuadDRegs(EVT VT, SDValue V0, SDValue V1,
+ SDValue V2, SDValue V3) {
+ DebugLoc dl = V0.getNode()->getDebugLoc();
+ SDValue SubReg0 = CurDAG->getTargetConstant(ARM::dsub_0, MVT::i32);
+ SDValue SubReg1 = CurDAG->getTargetConstant(ARM::dsub_1, MVT::i32);
+ SDValue SubReg2 = CurDAG->getTargetConstant(ARM::dsub_2, MVT::i32);
+ SDValue SubReg3 = CurDAG->getTargetConstant(ARM::dsub_3, MVT::i32);
+ const SDValue Ops[] = { V0, SubReg0, V1, SubReg1, V2, SubReg2, V3, SubReg3 };
+ return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops, 8);
+}
+
+/// QuadQRegs - Form 4 consecutive Q registers.
+///
+SDNode *ARMDAGToDAGISel::QuadQRegs(EVT VT, SDValue V0, SDValue V1,
+ SDValue V2, SDValue V3) {
+ DebugLoc dl = V0.getNode()->getDebugLoc();
+ SDValue SubReg0 = CurDAG->getTargetConstant(ARM::qsub_0, MVT::i32);
+ SDValue SubReg1 = CurDAG->getTargetConstant(ARM::qsub_1, MVT::i32);
+ SDValue SubReg2 = CurDAG->getTargetConstant(ARM::qsub_2, MVT::i32);
+ SDValue SubReg3 = CurDAG->getTargetConstant(ARM::qsub_3, MVT::i32);
+ const SDValue Ops[] = { V0, SubReg0, V1, SubReg1, V2, SubReg2, V3, SubReg3 };
+ return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops, 8);
}
SDNode *ARMDAGToDAGISel::SelectVLD(SDNode *N, unsigned NumVecs,
unsigned *DOpcodes, unsigned *QOpcodes0,
unsigned *QOpcodes1) {
- assert(NumVecs >=2 && NumVecs <= 4 && "VLD NumVecs out-of-range");
+ assert(NumVecs >= 1 && NumVecs <= 4 && "VLD NumVecs out-of-range");
DebugLoc dl = N->getDebugLoc();
- SDValue MemAddr, MemUpdate, MemOpc, Align;
- if (!SelectAddrMode6(N, N->getOperand(2), MemAddr, MemUpdate, MemOpc, Align))
+ SDValue MemAddr, Align;
+ if (!SelectAddrMode6(N, N->getOperand(2), MemAddr, Align))
return NULL;
SDValue Chain = N->getOperand(0);
case MVT::v8i16: OpcodeIndex = 1; break;
case MVT::v4f32:
case MVT::v4i32: OpcodeIndex = 2; break;
+ case MVT::v2i64: OpcodeIndex = 3;
+ assert(NumVecs == 1 && "v2i64 type only supported for VLD1");
+ break;
+ }
+
+ EVT ResTy;
+ if (NumVecs == 1)
+ ResTy = VT;
+ else {
+ unsigned ResTyElts = (NumVecs == 3) ? 4 : NumVecs;
+ if (!is64BitVector)
+ ResTyElts *= 2;
+ ResTy = EVT::getVectorVT(*CurDAG->getContext(), MVT::i64, ResTyElts);
}
- SDValue Pred = CurDAG->getTargetConstant(14, MVT::i32);
- SDValue PredReg = CurDAG->getRegister(0, MVT::i32);
+ SDValue Pred = getAL(CurDAG);
+ SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
+ SDValue SuperReg;
if (is64BitVector) {
unsigned Opc = DOpcodes[OpcodeIndex];
- const SDValue Ops[] = { MemAddr, MemUpdate, MemOpc, Align,
- Pred, PredReg, Chain };
- std::vector<EVT> ResTys(NumVecs, VT);
- ResTys.push_back(MVT::Other);
- return CurDAG->getMachineNode(Opc, dl, ResTys, Ops, 7);
- }
+ const SDValue Ops[] = { MemAddr, Align, Pred, Reg0, Chain };
+ SDNode *VLd = CurDAG->getMachineNode(Opc, dl, ResTy, MVT::Other, Ops, 5);
+ if (NumVecs == 1)
+ return VLd;
- EVT RegVT = GetNEONSubregVT(VT);
- if (NumVecs == 2) {
- // Quad registers are directly supported for VLD2,
- // loading 2 pairs of D regs.
- unsigned Opc = QOpcodes0[OpcodeIndex];
- const SDValue Ops[] = { MemAddr, MemUpdate, MemOpc, Align,
- Pred, PredReg, Chain };
- std::vector<EVT> ResTys(4, VT);
- ResTys.push_back(MVT::Other);
- SDNode *VLd = CurDAG->getMachineNode(Opc, dl, ResTys, Ops, 7);
- Chain = SDValue(VLd, 4);
-
- // Combine the even and odd subregs to produce the result.
+ SuperReg = SDValue(VLd, 0);
+ assert(ARM::dsub_7 == ARM::dsub_0+7 && "Unexpected subreg numbering");
for (unsigned Vec = 0; Vec < NumVecs; ++Vec) {
- SDNode *Q = PairDRegs(VT, SDValue(VLd, 2*Vec), SDValue(VLd, 2*Vec+1));
- ReplaceUses(SDValue(N, Vec), SDValue(Q, 0));
+ SDValue D = CurDAG->getTargetExtractSubreg(ARM::dsub_0+Vec,
+ dl, VT, SuperReg);
+ ReplaceUses(SDValue(N, Vec), D);
}
+ ReplaceUses(SDValue(N, NumVecs), SDValue(VLd, 1));
+ return NULL;
+ }
+
+ if (NumVecs <= 2) {
+ // Quad registers are directly supported for VLD1 and VLD2,
+ // loading pairs of D regs.
+ unsigned Opc = QOpcodes0[OpcodeIndex];
+ const SDValue Ops[] = { MemAddr, Align, Pred, Reg0, Chain };
+ SDNode *VLd = CurDAG->getMachineNode(Opc, dl, ResTy, MVT::Other, Ops, 5);
+ if (NumVecs == 1)
+ return VLd;
+
+ SuperReg = SDValue(VLd, 0);
+ Chain = SDValue(VLd, 1);
+
} else {
// Otherwise, quad registers are loaded with two separate instructions,
// where one loads the even registers and the other loads the odd registers.
-
- // Enable writeback to the address register.
- MemOpc = CurDAG->getTargetConstant(ARM_AM::getAM6Opc(true), MVT::i32);
-
- std::vector<EVT> ResTys(NumVecs, RegVT);
- ResTys.push_back(MemAddr.getValueType());
- ResTys.push_back(MVT::Other);
+ EVT AddrTy = MemAddr.getValueType();
// Load the even subregs.
unsigned Opc = QOpcodes0[OpcodeIndex];
- const SDValue OpsA[] = { MemAddr, MemUpdate, MemOpc, Align,
- Pred, PredReg, Chain };
- SDNode *VLdA = CurDAG->getMachineNode(Opc, dl, ResTys, OpsA, 7);
- Chain = SDValue(VLdA, NumVecs+1);
+ SDValue ImplDef =
+ SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, ResTy), 0);
+ const SDValue OpsA[] = { MemAddr, Align, Reg0, ImplDef, Pred, Reg0, Chain };
+ SDNode *VLdA =
+ CurDAG->getMachineNode(Opc, dl, ResTy, AddrTy, MVT::Other, OpsA, 7);
+ Chain = SDValue(VLdA, 2);
// Load the odd subregs.
Opc = QOpcodes1[OpcodeIndex];
- const SDValue OpsB[] = { SDValue(VLdA, NumVecs), MemUpdate, MemOpc,
- Align, Pred, PredReg, Chain };
- SDNode *VLdB = CurDAG->getMachineNode(Opc, dl, ResTys, OpsB, 7);
- Chain = SDValue(VLdB, NumVecs+1);
+ const SDValue OpsB[] = { SDValue(VLdA, 1), Align, Reg0, SDValue(VLdA, 0),
+ Pred, Reg0, Chain };
+ SDNode *VLdB =
+ CurDAG->getMachineNode(Opc, dl, ResTy, AddrTy, MVT::Other, OpsB, 7);
+ SuperReg = SDValue(VLdB, 0);
+ Chain = SDValue(VLdB, 2);
+ }
- // Combine the even and odd subregs to produce the result.
- for (unsigned Vec = 0; Vec < NumVecs; ++Vec) {
- SDNode *Q = PairDRegs(VT, SDValue(VLdA, Vec), SDValue(VLdB, Vec));
- ReplaceUses(SDValue(N, Vec), SDValue(Q, 0));
- }
+ // Extract out the Q registers.
+ assert(ARM::qsub_3 == ARM::qsub_0+3 && "Unexpected subreg numbering");
+ for (unsigned Vec = 0; Vec < NumVecs; ++Vec) {
+ SDValue Q = CurDAG->getTargetExtractSubreg(ARM::qsub_0+Vec,
+ dl, VT, SuperReg);
+ ReplaceUses(SDValue(N, Vec), Q);
}
ReplaceUses(SDValue(N, NumVecs), Chain);
return NULL;
SDNode *ARMDAGToDAGISel::SelectVST(SDNode *N, unsigned NumVecs,
unsigned *DOpcodes, unsigned *QOpcodes0,
unsigned *QOpcodes1) {
- assert(NumVecs >=2 && NumVecs <= 4 && "VST NumVecs out-of-range");
+ assert(NumVecs >= 1 && NumVecs <= 4 && "VST NumVecs out-of-range");
DebugLoc dl = N->getDebugLoc();
- SDValue MemAddr, MemUpdate, MemOpc, Align;
- if (!SelectAddrMode6(N, N->getOperand(2), MemAddr, MemUpdate, MemOpc, Align))
+ SDValue MemAddr, Align;
+ if (!SelectAddrMode6(N, N->getOperand(2), MemAddr, Align))
return NULL;
SDValue Chain = N->getOperand(0);
case MVT::v8i16: OpcodeIndex = 1; break;
case MVT::v4f32:
case MVT::v4i32: OpcodeIndex = 2; break;
+ case MVT::v2i64: OpcodeIndex = 3;
+ assert(NumVecs == 1 && "v2i64 type only supported for VST1");
+ break;
}
- SDValue Pred = CurDAG->getTargetConstant(14, MVT::i32);
- SDValue PredReg = CurDAG->getRegister(0, MVT::i32);
+ SDValue Pred = getAL(CurDAG);
+ SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
- SmallVector<SDValue, 8> Ops;
+ SmallVector<SDValue, 7> Ops;
Ops.push_back(MemAddr);
- Ops.push_back(MemUpdate);
- Ops.push_back(MemOpc);
Ops.push_back(Align);
if (is64BitVector) {
- unsigned Opc = DOpcodes[OpcodeIndex];
- for (unsigned Vec = 0; Vec < NumVecs; ++Vec)
- Ops.push_back(N->getOperand(Vec+3));
+ if (NumVecs == 1) {
+ Ops.push_back(N->getOperand(3));
+ } else {
+ SDValue RegSeq;
+ SDValue V0 = N->getOperand(0+3);
+ SDValue V1 = N->getOperand(1+3);
+
+ // Form a REG_SEQUENCE to force register allocation.
+ if (NumVecs == 2)
+ RegSeq = SDValue(PairDRegs(MVT::v2i64, V0, V1), 0);
+ else {
+ SDValue V2 = N->getOperand(2+3);
+ // If it's a vld3, form a quad D-register and leave the last part as
+ // an undef.
+ SDValue V3 = (NumVecs == 3)
+ ? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF,dl,VT), 0)
+ : N->getOperand(3+3);
+ RegSeq = SDValue(QuadDRegs(MVT::v4i64, V0, V1, V2, V3), 0);
+ }
+ Ops.push_back(RegSeq);
+ }
Ops.push_back(Pred);
- Ops.push_back(PredReg);
+ Ops.push_back(Reg0); // predicate register
Ops.push_back(Chain);
- return CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops.data(), NumVecs+7);
+ unsigned Opc = DOpcodes[OpcodeIndex];
+ return CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops.data(), 6);
}
- EVT RegVT = GetNEONSubregVT(VT);
- if (NumVecs == 2) {
- // Quad registers are directly supported for VST2,
- // storing 2 pairs of D regs.
+ if (NumVecs <= 2) {
+ // Quad registers are directly supported for VST1 and VST2.
unsigned Opc = QOpcodes0[OpcodeIndex];
- for (unsigned Vec = 0; Vec < NumVecs; ++Vec) {
- Ops.push_back(CurDAG->getTargetExtractSubreg(ARM::DSUBREG_0, dl, RegVT,
- N->getOperand(Vec+3)));
- Ops.push_back(CurDAG->getTargetExtractSubreg(ARM::DSUBREG_1, dl, RegVT,
- N->getOperand(Vec+3)));
+ if (NumVecs == 1) {
+ Ops.push_back(N->getOperand(3));
+ } else {
+ // Form a QQ register.
+ SDValue Q0 = N->getOperand(3);
+ SDValue Q1 = N->getOperand(4);
+ Ops.push_back(SDValue(PairQRegs(MVT::v4i64, Q0, Q1), 0));
}
Ops.push_back(Pred);
- Ops.push_back(PredReg);
+ Ops.push_back(Reg0); // predicate register
Ops.push_back(Chain);
- return CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops.data(), 11);
+ return CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops.data(), 6);
}
// Otherwise, quad registers are stored with two separate instructions,
// where one stores the even registers and the other stores the odd registers.
- // Enable writeback to the address register.
- MemOpc = CurDAG->getTargetConstant(ARM_AM::getAM6Opc(true), MVT::i32);
-
- // Store the even subregs.
- for (unsigned Vec = 0; Vec < NumVecs; ++Vec)
- Ops.push_back(CurDAG->getTargetExtractSubreg(ARM::DSUBREG_0, dl, RegVT,
- N->getOperand(Vec+3)));
+ // Form the QQQQ REG_SEQUENCE.
+ SDValue V0 = N->getOperand(0+3);
+ SDValue V1 = N->getOperand(1+3);
+ SDValue V2 = N->getOperand(2+3);
+ SDValue V3 = (NumVecs == 3)
+ ? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, VT), 0)
+ : N->getOperand(3+3);
+ SDValue RegSeq = SDValue(QuadQRegs(MVT::v8i64, V0, V1, V2, V3), 0);
+
+ // Store the even D registers.
+ Ops.push_back(Reg0); // post-access address offset
+ Ops.push_back(RegSeq);
Ops.push_back(Pred);
- Ops.push_back(PredReg);
+ Ops.push_back(Reg0); // predicate register
Ops.push_back(Chain);
unsigned Opc = QOpcodes0[OpcodeIndex];
SDNode *VStA = CurDAG->getMachineNode(Opc, dl, MemAddr.getValueType(),
- MVT::Other, Ops.data(), NumVecs+7);
+ MVT::Other, Ops.data(), 7);
Chain = SDValue(VStA, 1);
- // Store the odd subregs.
+ // Store the odd D registers.
Ops[0] = SDValue(VStA, 0); // MemAddr
- for (unsigned Vec = 0; Vec < NumVecs; ++Vec)
- Ops[Vec+4] = CurDAG->getTargetExtractSubreg(ARM::DSUBREG_1, dl, RegVT,
- N->getOperand(Vec+3));
- Ops[NumVecs+4] = Pred;
- Ops[NumVecs+5] = PredReg;
- Ops[NumVecs+6] = Chain;
+ Ops[6] = Chain;
Opc = QOpcodes1[OpcodeIndex];
SDNode *VStB = CurDAG->getMachineNode(Opc, dl, MemAddr.getValueType(),
- MVT::Other, Ops.data(), NumVecs+7);
+ MVT::Other, Ops.data(), 7);
Chain = SDValue(VStB, 1);
ReplaceUses(SDValue(N, 0), Chain);
return NULL;
SDNode *ARMDAGToDAGISel::SelectVLDSTLane(SDNode *N, bool IsLoad,
unsigned NumVecs, unsigned *DOpcodes,
- unsigned *QOpcodes0,
- unsigned *QOpcodes1) {
+ unsigned *QOpcodes) {
assert(NumVecs >=2 && NumVecs <= 4 && "VLDSTLane NumVecs out-of-range");
DebugLoc dl = N->getDebugLoc();
- SDValue MemAddr, MemUpdate, MemOpc, Align;
- if (!SelectAddrMode6(N, N->getOperand(2), MemAddr, MemUpdate, MemOpc, Align))
+ SDValue MemAddr, Align;
+ if (!SelectAddrMode6(N, N->getOperand(2), MemAddr, Align))
return NULL;
SDValue Chain = N->getOperand(0);
EVT VT = IsLoad ? N->getValueType(0) : N->getOperand(3).getValueType();
bool is64BitVector = VT.is64BitVector();
- // Quad registers are handled by load/store of subregs. Find the subreg info.
- unsigned NumElts = 0;
- int SubregIdx = 0;
- EVT RegVT = VT;
- if (!is64BitVector) {
- RegVT = GetNEONSubregVT(VT);
- NumElts = RegVT.getVectorNumElements();
- SubregIdx = (Lane < NumElts) ? ARM::DSUBREG_0 : ARM::DSUBREG_1;
- }
-
unsigned OpcodeIndex;
switch (VT.getSimpleVT().SimpleTy) {
default: llvm_unreachable("unhandled vld/vst lane type");
case MVT::v4i32: OpcodeIndex = 1; break;
}
- SDValue Pred = CurDAG->getTargetConstant(14, MVT::i32);
- SDValue PredReg = CurDAG->getRegister(0, MVT::i32);
+ SDValue Pred = getAL(CurDAG);
+ SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
- SmallVector<SDValue, 9> Ops;
+ SmallVector<SDValue, 7> Ops;
Ops.push_back(MemAddr);
- Ops.push_back(MemUpdate);
- Ops.push_back(MemOpc);
Ops.push_back(Align);
- unsigned Opc = 0;
- if (is64BitVector) {
- Opc = DOpcodes[OpcodeIndex];
- for (unsigned Vec = 0; Vec < NumVecs; ++Vec)
- Ops.push_back(N->getOperand(Vec+3));
+ unsigned Opc = (is64BitVector ? DOpcodes[OpcodeIndex] :
+ Opc = QOpcodes[OpcodeIndex]);
+
+ SDValue SuperReg;
+ SDValue V0 = N->getOperand(0+3);
+ SDValue V1 = N->getOperand(1+3);
+ if (NumVecs == 2) {
+ if (is64BitVector)
+ SuperReg = SDValue(PairDRegs(MVT::v2i64, V0, V1), 0);
+ else
+ SuperReg = SDValue(PairQRegs(MVT::v4i64, V0, V1), 0);
} else {
- // Check if this is loading the even or odd subreg of a Q register.
- if (Lane < NumElts) {
- Opc = QOpcodes0[OpcodeIndex];
- } else {
- Lane -= NumElts;
- Opc = QOpcodes1[OpcodeIndex];
- }
- // Extract the subregs of the input vector.
- for (unsigned Vec = 0; Vec < NumVecs; ++Vec)
- Ops.push_back(CurDAG->getTargetExtractSubreg(SubregIdx, dl, RegVT,
- N->getOperand(Vec+3)));
+ SDValue V2 = N->getOperand(2+3);
+ SDValue V3 = (NumVecs == 3)
+ ? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF,dl,VT), 0)
+ : N->getOperand(3+3);
+ if (is64BitVector)
+ SuperReg = SDValue(QuadDRegs(MVT::v4i64, V0, V1, V2, V3), 0);
+ else
+ SuperReg = SDValue(QuadQRegs(MVT::v8i64, V0, V1, V2, V3), 0);
}
+ Ops.push_back(SuperReg);
Ops.push_back(getI32Imm(Lane));
Ops.push_back(Pred);
- Ops.push_back(PredReg);
+ Ops.push_back(Reg0);
Ops.push_back(Chain);
if (!IsLoad)
- return CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops.data(), NumVecs+8);
-
- std::vector<EVT> ResTys(NumVecs, RegVT);
- ResTys.push_back(MVT::Other);
- SDNode *VLdLn =
- CurDAG->getMachineNode(Opc, dl, ResTys, Ops.data(), NumVecs+8);
- // For a 64-bit vector load to D registers, nothing more needs to be done.
- if (is64BitVector)
- return VLdLn;
-
- // For 128-bit vectors, take the 64-bit results of the load and insert them
- // as subregs into the result.
- for (unsigned Vec = 0; Vec < NumVecs; ++Vec) {
- SDValue QuadVec = CurDAG->getTargetInsertSubreg(SubregIdx, dl, VT,
- N->getOperand(Vec+3),
- SDValue(VLdLn, Vec));
- ReplaceUses(SDValue(N, Vec), QuadVec);
- }
-
- Chain = SDValue(VLdLn, NumVecs);
+ return CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops.data(), 7);
+
+ EVT ResTy;
+ unsigned ResTyElts = (NumVecs == 3) ? 4 : NumVecs;
+ if (!is64BitVector)
+ ResTyElts *= 2;
+ ResTy = EVT::getVectorVT(*CurDAG->getContext(), MVT::i64, ResTyElts);
+
+ SDNode *VLdLn = CurDAG->getMachineNode(Opc, dl, ResTy, MVT::Other,
+ Ops.data(), 7);
+ SuperReg = SDValue(VLdLn, 0);
+ Chain = SDValue(VLdLn, 1);
+
+ // Extract the subregisters.
+ assert(ARM::dsub_7 == ARM::dsub_0+7 && "Unexpected subreg numbering");
+ assert(ARM::qsub_3 == ARM::qsub_0+3 && "Unexpected subreg numbering");
+ unsigned SubIdx = is64BitVector ? ARM::dsub_0 : ARM::qsub_0;
+ for (unsigned Vec = 0; Vec < NumVecs; ++Vec)
+ ReplaceUses(SDValue(N, Vec),
+ CurDAG->getTargetExtractSubreg(SubIdx+Vec, dl, VT, SuperReg));
ReplaceUses(SDValue(N, NumVecs), Chain);
return NULL;
}
+SDNode *ARMDAGToDAGISel::SelectVTBL(SDNode *N, bool IsExt, unsigned NumVecs,
+ unsigned Opc) {
+ assert(NumVecs >= 2 && NumVecs <= 4 && "VTBL NumVecs out-of-range");
+ DebugLoc dl = N->getDebugLoc();
+ EVT VT = N->getValueType(0);
+ unsigned FirstTblReg = IsExt ? 2 : 1;
+
+ // Form a REG_SEQUENCE to force register allocation.
+ SDValue RegSeq;
+ SDValue V0 = N->getOperand(FirstTblReg + 0);
+ SDValue V1 = N->getOperand(FirstTblReg + 1);
+ if (NumVecs == 2)
+ RegSeq = SDValue(PairDRegs(MVT::v16i8, V0, V1), 0);
+ else {
+ SDValue V2 = N->getOperand(FirstTblReg + 2);
+ // If it's a vtbl3, form a quad D-register and leave the last part as
+ // an undef.
+ SDValue V3 = (NumVecs == 3)
+ ? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, VT), 0)
+ : N->getOperand(FirstTblReg + 3);
+ RegSeq = SDValue(QuadDRegs(MVT::v4i64, V0, V1, V2, V3), 0);
+ }
+
+ // Now extract the D registers back out.
+ SmallVector<SDValue, 6> Ops;
+ if (IsExt)
+ Ops.push_back(N->getOperand(1));
+ Ops.push_back(CurDAG->getTargetExtractSubreg(ARM::dsub_0, dl, VT, RegSeq));
+ Ops.push_back(CurDAG->getTargetExtractSubreg(ARM::dsub_1, dl, VT, RegSeq));
+ if (NumVecs > 2)
+ Ops.push_back(CurDAG->getTargetExtractSubreg(ARM::dsub_2, dl, VT, RegSeq));
+ if (NumVecs > 3)
+ Ops.push_back(CurDAG->getTargetExtractSubreg(ARM::dsub_3, dl, VT, RegSeq));
+
+ Ops.push_back(N->getOperand(FirstTblReg + NumVecs));
+ Ops.push_back(getAL(CurDAG)); // predicate
+ Ops.push_back(CurDAG->getRegister(0, MVT::i32)); // predicate register
+ return CurDAG->getMachineNode(Opc, dl, VT, Ops.data(), Ops.size());
+}
+
SDNode *ARMDAGToDAGISel::SelectV6T2BitfieldExtractOp(SDNode *N,
- unsigned Opc) {
+ bool isSigned) {
if (!Subtarget->hasV6T2Ops())
return NULL;
+ unsigned Opc = isSigned ? (Subtarget->isThumb() ? ARM::t2SBFX : ARM::SBFX)
+ : (Subtarget->isThumb() ? ARM::t2UBFX : ARM::UBFX);
+
+
+ // For unsigned extracts, check for a shift right and mask
+ unsigned And_imm = 0;
+ if (N->getOpcode() == ISD::AND) {
+ if (isOpcWithIntImmediate(N, ISD::AND, And_imm)) {
+
+ // The immediate is a mask of the low bits iff imm & (imm+1) == 0
+ if (And_imm & (And_imm + 1))
+ return NULL;
+
+ unsigned Srl_imm = 0;
+ if (isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::SRL,
+ Srl_imm)) {
+ assert(Srl_imm > 0 && Srl_imm < 32 && "bad amount in shift node!");
+
+ unsigned Width = CountTrailingOnes_32(And_imm);
+ unsigned LSB = Srl_imm;
+ SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
+ SDValue Ops[] = { N->getOperand(0).getOperand(0),
+ CurDAG->getTargetConstant(LSB, MVT::i32),
+ CurDAG->getTargetConstant(Width, MVT::i32),
+ getAL(CurDAG), Reg0 };
+ return CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops, 5);
+ }
+ }
+ return NULL;
+ }
+
+ // Otherwise, we're looking for a shift of a shift
unsigned Shl_imm = 0;
if (isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::SHL, Shl_imm)) {
assert(Shl_imm > 0 && Shl_imm < 32 && "bad amount in shift node!");
if (!T)
return 0;
- if (Predicate_t2_so_imm(TrueVal.getNode())) {
+ if (Pred_t2_so_imm(TrueVal.getNode())) {
SDValue True = CurDAG->getTargetConstant(T->getZExtValue(), MVT::i32);
SDValue CC = CurDAG->getTargetConstant(CCVal, MVT::i32);
SDValue Ops[] = { FalseVal, True, CC, CCR, InFlag };
if (!T)
return 0;
- if (Predicate_so_imm(TrueVal.getNode())) {
+ if (Pred_so_imm(TrueVal.getNode())) {
SDValue True = CurDAG->getTargetConstant(T->getZExtValue(), MVT::i32);
SDValue CC = CurDAG->getTargetConstant(CCVal, MVT::i32);
SDValue Ops[] = { FalseVal, True, CC, CCR, InFlag };
}
// Pattern: (ARMcmov:i32 GPR:i32:$false,
- // (imm:i32)<<P:Predicate_so_imm>>:$true,
+ // (imm:i32)<<P:Pred_so_imm>>:$true,
// (imm:i32):$cc)
// Emits: (MOVCCi:i32 GPR:i32:$false,
// (so_imm:i32 (imm:i32):$true), (imm:i32):$cc)
return CurDAG->SelectNodeTo(N, Opc, VT, Ops, 5);
}
+SDNode *ARMDAGToDAGISel::SelectConcatVector(SDNode *N) {
+ // The only time a CONCAT_VECTORS operation can have legal types is when
+ // two 64-bit vectors are concatenated to a 128-bit vector.
+ EVT VT = N->getValueType(0);
+ if (!VT.is128BitVector() || N->getNumOperands() != 2)
+ llvm_unreachable("unexpected CONCAT_VECTORS");
+ DebugLoc dl = N->getDebugLoc();
+ SDValue V0 = N->getOperand(0);
+ SDValue V1 = N->getOperand(1);
+ SDValue SubReg0 = CurDAG->getTargetConstant(ARM::dsub_0, MVT::i32);
+ SDValue SubReg1 = CurDAG->getTargetConstant(ARM::dsub_1, MVT::i32);
+ const SDValue Ops[] = { V0, SubReg0, V1, SubReg1 };
+ return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops, 4);
+}
+
SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
DebugLoc dl = N->getDebugLoc();
SDNode *ResNode;
if (Subtarget->isThumb1Only()) {
- SDValue Pred = CurDAG->getTargetConstant(14, MVT::i32);
+ SDValue Pred = getAL(CurDAG);
SDValue PredReg = CurDAG->getRegister(0, MVT::i32);
SDValue Ops[] = { CPIdx, Pred, PredReg, CurDAG->getEntryNode() };
ResNode = CurDAG->getMachineNode(ARM::tLDRcp, dl, MVT::i32, MVT::Other,
return CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops, 5);
}
}
- case ARMISD::DYN_ALLOC:
- return SelectDYN_ALLOC(N);
case ISD::SRL:
- if (SDNode *I = SelectV6T2BitfieldExtractOp(N,
- Subtarget->isThumb() ? ARM::t2UBFX : ARM::UBFX))
+ if (SDNode *I = SelectV6T2BitfieldExtractOp(N, false))
return I;
break;
case ISD::SRA:
- if (SDNode *I = SelectV6T2BitfieldExtractOp(N,
- Subtarget->isThumb() ? ARM::t2SBFX : ARM::SBFX))
+ if (SDNode *I = SelectV6T2BitfieldExtractOp(N, true))
return I;
break;
case ISD::MUL:
SDValue ShImmOp = CurDAG->getTargetConstant(ShImm, MVT::i32);
SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
if (Subtarget->isThumb()) {
- SDValue Ops[] = { V, V, ShImmOp, getAL(CurDAG), Reg0 };
- return CurDAG->SelectNodeTo(N, ARM::t2RSBrs, MVT::i32, Ops, 5);
+ SDValue Ops[] = { V, V, ShImmOp, getAL(CurDAG), Reg0, Reg0 };
+ return CurDAG->SelectNodeTo(N, ARM::t2RSBrs, MVT::i32, Ops, 6);
} else {
SDValue Ops[] = { V, V, Reg0, ShImmOp, getAL(CurDAG), Reg0, Reg0 };
return CurDAG->SelectNodeTo(N, ARM::RSBrs, MVT::i32, Ops, 7);
}
break;
case ISD::AND: {
+ // Check for unsigned bitfield extract
+ if (SDNode *I = SelectV6T2BitfieldExtractOp(N, false))
+ return I;
+
// (and (or x, c2), c1) and top 16-bits of c1 and c2 match, lower 16-bits
// of c1 are 0xffff, and lower 16-bit of c2 are 0. That is, the top 16-bits
// are entirely contributed by c2 and lower 16-bits are entirely contributed
return CurDAG->getMachineNode(ARM::VMOVRRD, dl, MVT::i32, MVT::i32,
N->getOperand(0), getAL(CurDAG),
CurDAG->getRegister(0, MVT::i32));
- case ARMISD::RBIT: {
- EVT VT = N->getValueType(0);
- SDValue Ops[] = { N->getOperand(0),
- getAL(CurDAG), CurDAG->getRegister(0, MVT::i32) };
- return CurDAG->getMachineNode(ARM::RBIT, dl, VT, Ops, 3);
- }
case ISD::UMUL_LOHI: {
if (Subtarget->isThumb1Only())
break;
SDValue Ops[] = { N->getOperand(0), N->getOperand(1),
getAL(CurDAG), CurDAG->getRegister(0, MVT::i32),
CurDAG->getRegister(0, MVT::i32) };
- return CurDAG->getMachineNode(ARM::t2UMULL, dl, MVT::i32, MVT::i32, Ops,4);
+ return CurDAG->getMachineNode(ARM::t2UMULL, dl, MVT::i32, MVT::i32,Ops,4);
} else {
SDValue Ops[] = { N->getOperand(0), N->getOperand(1),
getAL(CurDAG), CurDAG->getRegister(0, MVT::i32),
if (Subtarget->isThumb()) {
SDValue Ops[] = { N->getOperand(0), N->getOperand(1),
getAL(CurDAG), CurDAG->getRegister(0, MVT::i32) };
- return CurDAG->getMachineNode(ARM::t2SMULL, dl, MVT::i32, MVT::i32, Ops,4);
+ return CurDAG->getMachineNode(ARM::t2SMULL, dl, MVT::i32, MVT::i32,Ops,4);
} else {
SDValue Ops[] = { N->getOperand(0), N->getOperand(1),
getAL(CurDAG), CurDAG->getRegister(0, MVT::i32),
case MVT::v4f32:
case MVT::v4i32: Opc = ARM::VZIPq32; break;
}
- SDValue Pred = CurDAG->getTargetConstant(14, MVT::i32);
+ SDValue Pred = getAL(CurDAG);
SDValue PredReg = CurDAG->getRegister(0, MVT::i32);
SDValue Ops[] = { N->getOperand(0), N->getOperand(1), Pred, PredReg };
return CurDAG->getMachineNode(Opc, dl, VT, VT, Ops, 4);
case MVT::v4f32:
case MVT::v4i32: Opc = ARM::VUZPq32; break;
}
- SDValue Pred = CurDAG->getTargetConstant(14, MVT::i32);
+ SDValue Pred = getAL(CurDAG);
SDValue PredReg = CurDAG->getRegister(0, MVT::i32);
SDValue Ops[] = { N->getOperand(0), N->getOperand(1), Pred, PredReg };
return CurDAG->getMachineNode(Opc, dl, VT, VT, Ops, 4);
case MVT::v4f32:
case MVT::v4i32: Opc = ARM::VTRNq32; break;
}
- SDValue Pred = CurDAG->getTargetConstant(14, MVT::i32);
+ SDValue Pred = getAL(CurDAG);
SDValue PredReg = CurDAG->getRegister(0, MVT::i32);
SDValue Ops[] = { N->getOperand(0), N->getOperand(1), Pred, PredReg };
return CurDAG->getMachineNode(Opc, dl, VT, VT, Ops, 4);
}
+ case ARMISD::BUILD_VECTOR: {
+ EVT VecVT = N->getValueType(0);
+ EVT EltVT = VecVT.getVectorElementType();
+ unsigned NumElts = VecVT.getVectorNumElements();
+ if (EltVT.getSimpleVT() == MVT::f64) {
+ assert(NumElts == 2 && "unexpected type for BUILD_VECTOR");
+ return PairDRegs(VecVT, N->getOperand(0), N->getOperand(1));
+ }
+ assert(EltVT.getSimpleVT() == MVT::f32 &&
+ "unexpected type for BUILD_VECTOR");
+ if (NumElts == 2)
+ return PairSRegs(VecVT, N->getOperand(0), N->getOperand(1));
+ assert(NumElts == 4 && "unexpected type for BUILD_VECTOR");
+ return QuadSRegs(VecVT, N->getOperand(0), N->getOperand(1),
+ N->getOperand(2), N->getOperand(3));
+ }
case ISD::INTRINSIC_VOID:
case ISD::INTRINSIC_W_CHAIN: {
default:
break;
+ case Intrinsic::arm_neon_vld1: {
+ unsigned DOpcodes[] = { ARM::VLD1d8, ARM::VLD1d16,
+ ARM::VLD1d32, ARM::VLD1d64 };
+ unsigned QOpcodes[] = { ARM::VLD1q8Pseudo, ARM::VLD1q16Pseudo,
+ ARM::VLD1q32Pseudo, ARM::VLD1q64Pseudo };
+ return SelectVLD(N, 1, DOpcodes, QOpcodes, 0);
+ }
+
case Intrinsic::arm_neon_vld2: {
- unsigned DOpcodes[] = { ARM::VLD2d8, ARM::VLD2d16,
- ARM::VLD2d32, ARM::VLD2d64 };
- unsigned QOpcodes[] = { ARM::VLD2q8, ARM::VLD2q16, ARM::VLD2q32 };
+ unsigned DOpcodes[] = { ARM::VLD2d8Pseudo, ARM::VLD2d16Pseudo,
+ ARM::VLD2d32Pseudo, ARM::VLD1q64Pseudo };
+ unsigned QOpcodes[] = { ARM::VLD2q8Pseudo, ARM::VLD2q16Pseudo,
+ ARM::VLD2q32Pseudo };
return SelectVLD(N, 2, DOpcodes, QOpcodes, 0);
}
case Intrinsic::arm_neon_vld3: {
- unsigned DOpcodes[] = { ARM::VLD3d8, ARM::VLD3d16,
- ARM::VLD3d32, ARM::VLD3d64 };
- unsigned QOpcodes0[] = { ARM::VLD3q8a, ARM::VLD3q16a, ARM::VLD3q32a };
- unsigned QOpcodes1[] = { ARM::VLD3q8b, ARM::VLD3q16b, ARM::VLD3q32b };
+ unsigned DOpcodes[] = { ARM::VLD3d8Pseudo, ARM::VLD3d16Pseudo,
+ ARM::VLD3d32Pseudo, ARM::VLD1d64TPseudo };
+ unsigned QOpcodes0[] = { ARM::VLD3q8Pseudo_UPD,
+ ARM::VLD3q16Pseudo_UPD,
+ ARM::VLD3q32Pseudo_UPD };
+ unsigned QOpcodes1[] = { ARM::VLD3q8oddPseudo_UPD,
+ ARM::VLD3q16oddPseudo_UPD,
+ ARM::VLD3q32oddPseudo_UPD };
return SelectVLD(N, 3, DOpcodes, QOpcodes0, QOpcodes1);
}
case Intrinsic::arm_neon_vld4: {
- unsigned DOpcodes[] = { ARM::VLD4d8, ARM::VLD4d16,
- ARM::VLD4d32, ARM::VLD4d64 };
- unsigned QOpcodes0[] = { ARM::VLD4q8a, ARM::VLD4q16a, ARM::VLD4q32a };
- unsigned QOpcodes1[] = { ARM::VLD4q8b, ARM::VLD4q16b, ARM::VLD4q32b };
+ unsigned DOpcodes[] = { ARM::VLD4d8Pseudo, ARM::VLD4d16Pseudo,
+ ARM::VLD4d32Pseudo, ARM::VLD1d64QPseudo };
+ unsigned QOpcodes0[] = { ARM::VLD4q8Pseudo_UPD,
+ ARM::VLD4q16Pseudo_UPD,
+ ARM::VLD4q32Pseudo_UPD };
+ unsigned QOpcodes1[] = { ARM::VLD4q8oddPseudo_UPD,
+ ARM::VLD4q16oddPseudo_UPD,
+ ARM::VLD4q32oddPseudo_UPD };
return SelectVLD(N, 4, DOpcodes, QOpcodes0, QOpcodes1);
}
case Intrinsic::arm_neon_vld2lane: {
- unsigned DOpcodes[] = { ARM::VLD2LNd8, ARM::VLD2LNd16, ARM::VLD2LNd32 };
- unsigned QOpcodes0[] = { ARM::VLD2LNq16a, ARM::VLD2LNq32a };
- unsigned QOpcodes1[] = { ARM::VLD2LNq16b, ARM::VLD2LNq32b };
- return SelectVLDSTLane(N, true, 2, DOpcodes, QOpcodes0, QOpcodes1);
+ unsigned DOpcodes[] = { ARM::VLD2LNd8Pseudo, ARM::VLD2LNd16Pseudo,
+ ARM::VLD2LNd32Pseudo };
+ unsigned QOpcodes[] = { ARM::VLD2LNq16Pseudo, ARM::VLD2LNq32Pseudo };
+ return SelectVLDSTLane(N, true, 2, DOpcodes, QOpcodes);
}
case Intrinsic::arm_neon_vld3lane: {
- unsigned DOpcodes[] = { ARM::VLD3LNd8, ARM::VLD3LNd16, ARM::VLD3LNd32 };
- unsigned QOpcodes0[] = { ARM::VLD3LNq16a, ARM::VLD3LNq32a };
- unsigned QOpcodes1[] = { ARM::VLD3LNq16b, ARM::VLD3LNq32b };
- return SelectVLDSTLane(N, true, 3, DOpcodes, QOpcodes0, QOpcodes1);
+ unsigned DOpcodes[] = { ARM::VLD3LNd8Pseudo, ARM::VLD3LNd16Pseudo,
+ ARM::VLD3LNd32Pseudo };
+ unsigned QOpcodes[] = { ARM::VLD3LNq16Pseudo, ARM::VLD3LNq32Pseudo };
+ return SelectVLDSTLane(N, true, 3, DOpcodes, QOpcodes);
}
case Intrinsic::arm_neon_vld4lane: {
- unsigned DOpcodes[] = { ARM::VLD4LNd8, ARM::VLD4LNd16, ARM::VLD4LNd32 };
- unsigned QOpcodes0[] = { ARM::VLD4LNq16a, ARM::VLD4LNq32a };
- unsigned QOpcodes1[] = { ARM::VLD4LNq16b, ARM::VLD4LNq32b };
- return SelectVLDSTLane(N, true, 4, DOpcodes, QOpcodes0, QOpcodes1);
+ unsigned DOpcodes[] = { ARM::VLD4LNd8Pseudo, ARM::VLD4LNd16Pseudo,
+ ARM::VLD4LNd32Pseudo };
+ unsigned QOpcodes[] = { ARM::VLD4LNq16Pseudo, ARM::VLD4LNq32Pseudo };
+ return SelectVLDSTLane(N, true, 4, DOpcodes, QOpcodes);
+ }
+
+ case Intrinsic::arm_neon_vst1: {
+ unsigned DOpcodes[] = { ARM::VST1d8, ARM::VST1d16,
+ ARM::VST1d32, ARM::VST1d64 };
+ unsigned QOpcodes[] = { ARM::VST1q8Pseudo, ARM::VST1q16Pseudo,
+ ARM::VST1q32Pseudo, ARM::VST1q64Pseudo };
+ return SelectVST(N, 1, DOpcodes, QOpcodes, 0);
}
case Intrinsic::arm_neon_vst2: {
- unsigned DOpcodes[] = { ARM::VST2d8, ARM::VST2d16,
- ARM::VST2d32, ARM::VST2d64 };
- unsigned QOpcodes[] = { ARM::VST2q8, ARM::VST2q16, ARM::VST2q32 };
+ unsigned DOpcodes[] = { ARM::VST2d8Pseudo, ARM::VST2d16Pseudo,
+ ARM::VST2d32Pseudo, ARM::VST1q64Pseudo };
+ unsigned QOpcodes[] = { ARM::VST2q8Pseudo, ARM::VST2q16Pseudo,
+ ARM::VST2q32Pseudo };
return SelectVST(N, 2, DOpcodes, QOpcodes, 0);
}
case Intrinsic::arm_neon_vst3: {
- unsigned DOpcodes[] = { ARM::VST3d8, ARM::VST3d16,
- ARM::VST3d32, ARM::VST3d64 };
- unsigned QOpcodes0[] = { ARM::VST3q8a, ARM::VST3q16a, ARM::VST3q32a };
- unsigned QOpcodes1[] = { ARM::VST3q8b, ARM::VST3q16b, ARM::VST3q32b };
+ unsigned DOpcodes[] = { ARM::VST3d8Pseudo, ARM::VST3d16Pseudo,
+ ARM::VST3d32Pseudo, ARM::VST1d64TPseudo };
+ unsigned QOpcodes0[] = { ARM::VST3q8Pseudo_UPD,
+ ARM::VST3q16Pseudo_UPD,
+ ARM::VST3q32Pseudo_UPD };
+ unsigned QOpcodes1[] = { ARM::VST3q8oddPseudo_UPD,
+ ARM::VST3q16oddPseudo_UPD,
+ ARM::VST3q32oddPseudo_UPD };
return SelectVST(N, 3, DOpcodes, QOpcodes0, QOpcodes1);
}
case Intrinsic::arm_neon_vst4: {
- unsigned DOpcodes[] = { ARM::VST4d8, ARM::VST4d16,
- ARM::VST4d32, ARM::VST4d64 };
- unsigned QOpcodes0[] = { ARM::VST4q8a, ARM::VST4q16a, ARM::VST4q32a };
- unsigned QOpcodes1[] = { ARM::VST4q8b, ARM::VST4q16b, ARM::VST4q32b };
+ unsigned DOpcodes[] = { ARM::VST4d8Pseudo, ARM::VST4d16Pseudo,
+ ARM::VST4d32Pseudo, ARM::VST1d64QPseudo };
+ unsigned QOpcodes0[] = { ARM::VST4q8Pseudo_UPD,
+ ARM::VST4q16Pseudo_UPD,
+ ARM::VST4q32Pseudo_UPD };
+ unsigned QOpcodes1[] = { ARM::VST4q8oddPseudo_UPD,
+ ARM::VST4q16oddPseudo_UPD,
+ ARM::VST4q32oddPseudo_UPD };
return SelectVST(N, 4, DOpcodes, QOpcodes0, QOpcodes1);
}
case Intrinsic::arm_neon_vst2lane: {
- unsigned DOpcodes[] = { ARM::VST2LNd8, ARM::VST2LNd16, ARM::VST2LNd32 };
- unsigned QOpcodes0[] = { ARM::VST2LNq16a, ARM::VST2LNq32a };
- unsigned QOpcodes1[] = { ARM::VST2LNq16b, ARM::VST2LNq32b };
- return SelectVLDSTLane(N, false, 2, DOpcodes, QOpcodes0, QOpcodes1);
+ unsigned DOpcodes[] = { ARM::VST2LNd8Pseudo, ARM::VST2LNd16Pseudo,
+ ARM::VST2LNd32Pseudo };
+ unsigned QOpcodes[] = { ARM::VST2LNq16Pseudo, ARM::VST2LNq32Pseudo };
+ return SelectVLDSTLane(N, false, 2, DOpcodes, QOpcodes);
}
case Intrinsic::arm_neon_vst3lane: {
- unsigned DOpcodes[] = { ARM::VST3LNd8, ARM::VST3LNd16, ARM::VST3LNd32 };
- unsigned QOpcodes0[] = { ARM::VST3LNq16a, ARM::VST3LNq32a };
- unsigned QOpcodes1[] = { ARM::VST3LNq16b, ARM::VST3LNq32b };
- return SelectVLDSTLane(N, false, 3, DOpcodes, QOpcodes0, QOpcodes1);
+ unsigned DOpcodes[] = { ARM::VST3LNd8Pseudo, ARM::VST3LNd16Pseudo,
+ ARM::VST3LNd32Pseudo };
+ unsigned QOpcodes[] = { ARM::VST3LNq16Pseudo, ARM::VST3LNq32Pseudo };
+ return SelectVLDSTLane(N, false, 3, DOpcodes, QOpcodes);
}
case Intrinsic::arm_neon_vst4lane: {
- unsigned DOpcodes[] = { ARM::VST4LNd8, ARM::VST4LNd16, ARM::VST4LNd32 };
- unsigned QOpcodes0[] = { ARM::VST4LNq16a, ARM::VST4LNq32a };
- unsigned QOpcodes1[] = { ARM::VST4LNq16b, ARM::VST4LNq32b };
- return SelectVLDSTLane(N, false, 4, DOpcodes, QOpcodes0, QOpcodes1);
+ unsigned DOpcodes[] = { ARM::VST4LNd8Pseudo, ARM::VST4LNd16Pseudo,
+ ARM::VST4LNd32Pseudo };
+ unsigned QOpcodes[] = { ARM::VST4LNq16Pseudo, ARM::VST4LNq32Pseudo };
+ return SelectVLDSTLane(N, false, 4, DOpcodes, QOpcodes);
+ }
}
+ break;
+ }
+
+ case ISD::INTRINSIC_WO_CHAIN: {
+ unsigned IntNo = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue();
+ switch (IntNo) {
+ default:
+ break;
+
+ case Intrinsic::arm_neon_vtbl2:
+ return SelectVTBL(N, false, 2, ARM::VTBL2);
+ case Intrinsic::arm_neon_vtbl3:
+ return SelectVTBL(N, false, 3, ARM::VTBL3);
+ case Intrinsic::arm_neon_vtbl4:
+ return SelectVTBL(N, false, 4, ARM::VTBL4);
+
+ case Intrinsic::arm_neon_vtbx2:
+ return SelectVTBL(N, true, 2, ARM::VTBX2);
+ case Intrinsic::arm_neon_vtbx3:
+ return SelectVTBL(N, true, 3, ARM::VTBX3);
+ case Intrinsic::arm_neon_vtbx4:
+ return SelectVTBL(N, true, 4, ARM::VTBX4);
}
+ break;
}
+
+ case ISD::CONCAT_VECTORS:
+ return SelectConcatVector(N);
}
return SelectCode(N);