static cl::opt<bool>
EnableARMTailCalls("arm-tail-calls", cl::Hidden,
cl::desc("Generate tail calls (TEMPORARY OPTION)."),
- cl::init(false));
+ cl::init(true));
static cl::opt<bool>
EnableARMLongCalls("arm-long-calls", cl::Hidden,
cl::desc("Generate calls via indirect call instructions."),
cl::init(false));
+static cl::opt<bool>
+ARMInterworking("arm-interworking", cl::Hidden,
+ cl::desc("Enable / disable ARM interworking (for debugging only)"),
+ cl::init(true));
+
static bool CC_ARM_APCS_Custom_f64(unsigned &ValNo, EVT &ValVT, EVT &LocVT,
CCValAssign::LocInfo &LocInfo,
ISD::ArgFlagsTy &ArgFlags,
}
setOperationAction(ISD::BUILD_VECTOR, VT.getSimpleVT(), Custom);
setOperationAction(ISD::VECTOR_SHUFFLE, VT.getSimpleVT(), Custom);
- if (llvm::ModelWithRegSequence())
- setOperationAction(ISD::CONCAT_VECTORS, VT.getSimpleVT(), Legal);
- else
- setOperationAction(ISD::CONCAT_VECTORS, VT.getSimpleVT(), Custom);
+ setOperationAction(ISD::CONCAT_VECTORS, VT.getSimpleVT(), Legal);
setOperationAction(ISD::EXTRACT_SUBVECTOR, VT.getSimpleVT(), Expand);
setOperationAction(ISD::SELECT, VT.getSimpleVT(), Expand);
setOperationAction(ISD::SELECT_CC, VT.getSimpleVT(), Expand);
// doesn't yet know how to not do that for SjLj.
setExceptionSelectorRegister(ARM::R0);
setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Expand);
- setOperationAction(ISD::MEMBARRIER, MVT::Other, Custom);
+ // Handle atomics directly for ARMv[67] (except for Thumb1), otherwise
+ // use the default expansion.
+ bool canHandleAtomics =
+ (Subtarget->hasV7Ops() ||
+ (Subtarget->hasV6Ops() && !Subtarget->isThumb1Only()));
+ if (canHandleAtomics) {
+ // membarrier needs custom lowering; the rest are legal and handled
+ // normally.
+ setOperationAction(ISD::MEMBARRIER, MVT::Other, Custom);
+ } else {
+ // Set them all for expansion, which will force libcalls.
+ setOperationAction(ISD::MEMBARRIER, MVT::Other, Expand);
+ setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i8, Expand);
+ setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i16, Expand);
+ setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i32, Expand);
+ setOperationAction(ISD::ATOMIC_SWAP, MVT::i8, Expand);
+ setOperationAction(ISD::ATOMIC_SWAP, MVT::i16, Expand);
+ setOperationAction(ISD::ATOMIC_SWAP, MVT::i32, Expand);
+ setOperationAction(ISD::ATOMIC_LOAD_ADD, MVT::i8, Expand);
+ setOperationAction(ISD::ATOMIC_LOAD_ADD, MVT::i16, Expand);
+ setOperationAction(ISD::ATOMIC_LOAD_ADD, MVT::i32, Expand);
+ setOperationAction(ISD::ATOMIC_LOAD_SUB, MVT::i8, Expand);
+ setOperationAction(ISD::ATOMIC_LOAD_SUB, MVT::i16, Expand);
+ setOperationAction(ISD::ATOMIC_LOAD_SUB, MVT::i32, Expand);
+ setOperationAction(ISD::ATOMIC_LOAD_AND, MVT::i8, Expand);
+ setOperationAction(ISD::ATOMIC_LOAD_AND, MVT::i16, Expand);
+ setOperationAction(ISD::ATOMIC_LOAD_AND, MVT::i32, Expand);
+ setOperationAction(ISD::ATOMIC_LOAD_OR, MVT::i8, Expand);
+ setOperationAction(ISD::ATOMIC_LOAD_OR, MVT::i16, Expand);
+ setOperationAction(ISD::ATOMIC_LOAD_OR, MVT::i32, Expand);
+ setOperationAction(ISD::ATOMIC_LOAD_XOR, MVT::i8, Expand);
+ setOperationAction(ISD::ATOMIC_LOAD_XOR, MVT::i16, Expand);
+ setOperationAction(ISD::ATOMIC_LOAD_XOR, MVT::i32, Expand);
+ setOperationAction(ISD::ATOMIC_LOAD_NAND, MVT::i8, Expand);
+ setOperationAction(ISD::ATOMIC_LOAD_NAND, MVT::i16, Expand);
+ setOperationAction(ISD::ATOMIC_LOAD_NAND, MVT::i32, Expand);
+ // Since the libcalls include locking, fold in the fences
+ setShouldFoldAtomicFences(true);
+ }
+ // 64-bit versions are always libcalls (for now)
+ setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i64, Expand);
+ setOperationAction(ISD::ATOMIC_SWAP, MVT::i64, Expand);
+ setOperationAction(ISD::ATOMIC_LOAD_ADD, MVT::i64, Expand);
+ setOperationAction(ISD::ATOMIC_LOAD_SUB, MVT::i64, Expand);
+ setOperationAction(ISD::ATOMIC_LOAD_AND, MVT::i64, Expand);
+ setOperationAction(ISD::ATOMIC_LOAD_OR, MVT::i64, Expand);
+ setOperationAction(ISD::ATOMIC_LOAD_XOR, MVT::i64, Expand);
+ setOperationAction(ISD::ATOMIC_LOAD_NAND, MVT::i64, Expand);
// If the subtarget does not have extract instructions, sign_extend_inreg
// needs to be expanded. Extract is available in ARM mode on v6 and up,
// and on most Thumb2 implementations.
- if ((!Subtarget->isThumb() && !Subtarget->hasV6Ops())
+ if (!Subtarget->hasV6Ops()
|| (Subtarget->isThumb2() && !Subtarget->hasT2ExtractPack())) {
setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16, Expand);
setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i8, Expand);
}
} else if (VA.isRegLoc()) {
RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
- } else {
+ } else if (!IsSibCall) {
assert(VA.isMemLoc());
MemOpChains.push_back(LowerMemOpCallTo(Chain, StackPtr, Arg,
// Build a sequence of copy-to-reg nodes chained together with token chain
// and flag operands which copy the outgoing args into the appropriate regs.
SDValue InFlag;
- for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
- Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
- RegsToPass[i].second, InFlag);
- InFlag = Chain.getValue(1);
- }
+ // Tail call byval lowering might overwrite argument registers so in case of
+ // tail call optimization the copies to registers are lowered later.
+ if (!isTailCall)
+ for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
+ Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
+ RegsToPass[i].second, InFlag);
+ InFlag = Chain.getValue(1);
+ }
// For tail calls lower the arguments to the 'real' stack slot.
if (isTailCall) {
getTargetMachine().getRelocationModel() != Reloc::Static;
isARMFunc = !Subtarget->isThumb() || isStub;
// ARM call to a local ARM function is predicable.
- isLocalARMFunc = !Subtarget->isThumb() && !isExt;
+ isLocalARMFunc = !Subtarget->isThumb() && (!isExt || !ARMInterworking);
// tBX takes a register source operand.
if (isARMFunc && Subtarget->isThumb1Only() && !Subtarget->hasV5TOps()) {
unsigned ARMPCLabelIndex = AFI->createConstPoolEntryUId();
const SmallVectorImpl<ISD::OutputArg> &Outs,
const SmallVectorImpl<ISD::InputArg> &Ins,
SelectionDAG& DAG) const {
-
const Function *CallerF = DAG.getMachineFunction().getFunction();
CallingConv::ID CallerCC = CallerF->getCallingConv();
bool CCMatch = CallerCC == CalleeCC;
// Look for obvious safe cases to perform tail call optimization that do not
// require ABI changes. This is what gcc calls sibcall.
- // Can't do sibcall if stack needs to be dynamically re-aligned. PEI needs to
- // emit a special epilogue.
- // Not sure yet if this is true on ARM.
-//?? if (RegInfo->needsStackRealignment(MF))
-//?? return false;
-
- // Do not sibcall optimize vararg calls unless the call site is not passing any
- // arguments.
+ // Do not sibcall optimize vararg calls unless the call site is not passing
+ // any arguments.
if (isVarArg && !Outs.empty())
return false;
if (isCalleeStructRet || isCallerStructRet)
return false;
+ // FIXME: Completely disable sibcal for Thumb1 since Thumb1RegisterInfo::
+ // emitEpilogue is not ready for them.
+ if (Subtarget->isThumb1Only())
+ return false;
+
+ if (isa<ExternalSymbolSDNode>(Callee))
+ return false;
+
+ if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
+ if (Subtarget->isThumb1Only())
+ return false;
+
+ // On Thumb, for the moment, we can only do this to functions defined in this
+ // compilation, or to indirect calls. A Thumb B to an ARM function is not
+ // easily fixed up in the linker, unlike BL.
+ if (Subtarget->isThumb()) {
+ const GlobalValue *GV = G->getGlobal();
+ if (GV->isDeclaration() || GV->isWeakForLinker())
+ return false;
+ }
+ }
+
+
// If the calling conventions do not match, then we'd better make sure the
// results are returned in the same way as what the caller expects.
if (!CCMatch) {
SDValue
ARMTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG,
- const ARMSubtarget *Subtarget)
- const {
+ const ARMSubtarget *Subtarget) const {
unsigned IntNo = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
DebugLoc dl = Op.getDebugLoc();
switch (IntNo) {
}
static SDValue LowerMEMBARRIER(SDValue Op, SelectionDAG &DAG,
- const ARMSubtarget *Subtarget) {
+ const ARMSubtarget *Subtarget) {
DebugLoc dl = Op.getDebugLoc();
SDValue Op5 = Op.getOperand(5);
- SDValue Res;
unsigned isDeviceBarrier = cast<ConstantSDNode>(Op5)->getZExtValue();
- if (isDeviceBarrier) {
- if (Subtarget->hasV7Ops())
- Res = DAG.getNode(ARMISD::SYNCBARRIER, dl, MVT::Other, Op.getOperand(0));
- else
- Res = DAG.getNode(ARMISD::SYNCBARRIER, dl, MVT::Other, Op.getOperand(0),
- DAG.getConstant(0, MVT::i32));
- } else {
- if (Subtarget->hasV7Ops())
- Res = DAG.getNode(ARMISD::MEMBARRIER, dl, MVT::Other, Op.getOperand(0));
- else
- Res = DAG.getNode(ARMISD::MEMBARRIER, dl, MVT::Other, Op.getOperand(0),
- DAG.getConstant(0, MVT::i32));
- }
- return Res;
+ // v6 and v7 can both handle barriers directly, but need handled a bit
+ // differently. Thumb1 and pre-v6 ARM mode use a libcall instead and should
+ // never get here.
+ unsigned Opc = isDeviceBarrier ? ARMISD::SYNCBARRIER : ARMISD::MEMBARRIER;
+ if (Subtarget->hasV7Ops())
+ return DAG.getNode(Opc, dl, MVT::Other, Op.getOperand(0));
+ else if (Subtarget->hasV6Ops() && !Subtarget->isThumb1Only())
+ return DAG.getNode(Opc, dl, MVT::Other, Op.getOperand(0),
+ DAG.getConstant(0, MVT::i32));
+ assert(0 && "Unexpected ISD::MEMBARRIER encountered. Should be libcall!");
+ return SDValue();
}
static SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG) {
DAG.getConstant(0, MVT::i32));
SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, Op,
DAG.getConstant(1, MVT::i32));
- return DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, Lo, Hi);
+ return DAG.getNode(ISD::BIT_CONVERT, dl, DstVT,
+ DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, Lo, Hi));
}
// Turn f64->i64 into VMOVRRD.
return Result;
}
-/// isVMOVSplat - Check if the specified splat value corresponds to an immediate
-/// VMOV instruction, and if so, return the constant being splatted.
-static SDValue isVMOVSplat(uint64_t SplatBits, uint64_t SplatUndef,
- unsigned SplatBitSize, SelectionDAG &DAG) {
+/// isNEONModifiedImm - Check if the specified splat value corresponds to a
+/// valid vector constant for a NEON instruction with a "modified immediate"
+/// operand (e.g., VMOV). If so, return either the constant being
+/// splatted or the encoded value, depending on the DoEncode parameter. The
+/// format of the encoded value is: bit12=Op, bits11-8=Cmode,
+/// bits7-0=Immediate.
+static SDValue isNEONModifiedImm(uint64_t SplatBits, uint64_t SplatUndef,
+ unsigned SplatBitSize, SelectionDAG &DAG,
+ bool isVMOV, bool DoEncode) {
+ unsigned Op, Cmode, Imm;
+ EVT VT;
+
+ // SplatBitSize is set to the smallest size that splats the vector, so a
+ // zero vector will always have SplatBitSize == 8. However, NEON modified
+ // immediate instructions others than VMOV do not support the 8-bit encoding
+ // of a zero vector, and the default encoding of zero is supposed to be the
+ // 32-bit version.
+ if (SplatBits == 0)
+ SplatBitSize = 32;
+
+ Op = 0;
switch (SplatBitSize) {
case 8:
- // Any 1-byte value is OK.
+ // Any 1-byte value is OK. Op=0, Cmode=1110.
assert((SplatBits & ~0xff) == 0 && "one byte splat value is too big");
- return DAG.getTargetConstant(SplatBits, MVT::i8);
+ Cmode = 0xe;
+ Imm = SplatBits;
+ VT = MVT::i8;
+ break;
case 16:
// NEON's 16-bit VMOV supports splat values where only one byte is nonzero.
- if ((SplatBits & ~0xff) == 0 ||
- (SplatBits & ~0xff00) == 0)
- return DAG.getTargetConstant(SplatBits, MVT::i16);
- break;
+ VT = MVT::i16;
+ if ((SplatBits & ~0xff) == 0) {
+ // Value = 0x00nn: Op=x, Cmode=100x.
+ Cmode = 0x8;
+ Imm = SplatBits;
+ break;
+ }
+ if ((SplatBits & ~0xff00) == 0) {
+ // Value = 0xnn00: Op=x, Cmode=101x.
+ Cmode = 0xa;
+ Imm = SplatBits >> 8;
+ break;
+ }
+ return SDValue();
case 32:
// NEON's 32-bit VMOV supports splat values where:
// * only one byte is nonzero, or
// * the least significant byte is 0xff and the second byte is nonzero, or
// * the least significant 2 bytes are 0xff and the third is nonzero.
- if ((SplatBits & ~0xff) == 0 ||
- (SplatBits & ~0xff00) == 0 ||
- (SplatBits & ~0xff0000) == 0 ||
- (SplatBits & ~0xff000000) == 0)
- return DAG.getTargetConstant(SplatBits, MVT::i32);
+ VT = MVT::i32;
+ if ((SplatBits & ~0xff) == 0) {
+ // Value = 0x000000nn: Op=x, Cmode=000x.
+ Cmode = 0;
+ Imm = SplatBits;
+ break;
+ }
+ if ((SplatBits & ~0xff00) == 0) {
+ // Value = 0x0000nn00: Op=x, Cmode=001x.
+ Cmode = 0x2;
+ Imm = SplatBits >> 8;
+ break;
+ }
+ if ((SplatBits & ~0xff0000) == 0) {
+ // Value = 0x00nn0000: Op=x, Cmode=010x.
+ Cmode = 0x4;
+ Imm = SplatBits >> 16;
+ break;
+ }
+ if ((SplatBits & ~0xff000000) == 0) {
+ // Value = 0xnn000000: Op=x, Cmode=011x.
+ Cmode = 0x6;
+ Imm = SplatBits >> 24;
+ break;
+ }
if ((SplatBits & ~0xffff) == 0 &&
- ((SplatBits | SplatUndef) & 0xff) == 0xff)
- return DAG.getTargetConstant(SplatBits | 0xff, MVT::i32);
+ ((SplatBits | SplatUndef) & 0xff) == 0xff) {
+ // Value = 0x0000nnff: Op=x, Cmode=1100.
+ Cmode = 0xc;
+ Imm = SplatBits >> 8;
+ SplatBits |= 0xff;
+ break;
+ }
if ((SplatBits & ~0xffffff) == 0 &&
- ((SplatBits | SplatUndef) & 0xffff) == 0xffff)
- return DAG.getTargetConstant(SplatBits | 0xffff, MVT::i32);
+ ((SplatBits | SplatUndef) & 0xffff) == 0xffff) {
+ // Value = 0x00nnffff: Op=x, Cmode=1101.
+ Cmode = 0xd;
+ Imm = SplatBits >> 16;
+ SplatBits |= 0xffff;
+ break;
+ }
// Note: there are a few 32-bit splat values (specifically: 00ffff00,
// ff000000, ff0000ff, and ffff00ff) that are valid for VMOV.I64 but not
// VMOV.I32. A (very) minor optimization would be to replicate the value
// and fall through here to test for a valid 64-bit splat. But, then the
// caller would also need to check and handle the change in size.
- break;
+ return SDValue();
case 64: {
// NEON has a 64-bit VMOV splat where each byte is either 0 or 0xff.
+ if (!isVMOV)
+ return SDValue();
uint64_t BitMask = 0xff;
uint64_t Val = 0;
+ unsigned ImmMask = 1;
+ Imm = 0;
for (int ByteNum = 0; ByteNum < 8; ++ByteNum) {
- if (((SplatBits | SplatUndef) & BitMask) == BitMask)
+ if (((SplatBits | SplatUndef) & BitMask) == BitMask) {
Val |= BitMask;
- else if ((SplatBits & BitMask) != 0)
+ Imm |= ImmMask;
+ } else if ((SplatBits & BitMask) != 0) {
return SDValue();
+ }
BitMask <<= 8;
+ ImmMask <<= 1;
}
- return DAG.getTargetConstant(Val, MVT::i64);
+ // Op=1, Cmode=1110.
+ Op = 1;
+ Cmode = 0xe;
+ SplatBits = Val;
+ VT = MVT::i64;
+ break;
}
default:
- llvm_unreachable("unexpected size for isVMOVSplat");
- break;
+ llvm_unreachable("unexpected size for isNEONModifiedImm");
+ return SDValue();
}
- return SDValue();
+ if (DoEncode)
+ return DAG.getTargetConstant((Op << 12) | (Cmode << 8) | Imm, MVT::i32);
+ return DAG.getTargetConstant(SplatBits, VT);
}
-/// getVMOVImm - If this is a build_vector of constants which can be
-/// formed by using a VMOV instruction of the specified element size,
-/// return the constant being splatted. The ByteSize field indicates the
-/// number of bytes of each element [1248].
-SDValue ARM::getVMOVImm(SDNode *N, unsigned ByteSize, SelectionDAG &DAG) {
+
+/// getNEONModImm - If this is a valid vector constant for a NEON instruction
+/// with a "modified immediate" operand (e.g., VMOV) of the specified element
+/// size, return the encoded value for that immediate. The ByteSize field
+/// indicates the number of bytes of each element [1248].
+SDValue ARM::getNEONModImm(SDNode *N, unsigned ByteSize, bool isVMOV,
+ SelectionDAG &DAG) {
BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(N);
APInt SplatBits, SplatUndef;
unsigned SplatBitSize;
if (SplatBitSize > ByteSize * 8)
return SDValue();
- return isVMOVSplat(SplatBits.getZExtValue(), SplatUndef.getZExtValue(),
- SplatBitSize, DAG);
+ return isNEONModifiedImm(SplatBits.getZExtValue(), SplatUndef.getZExtValue(),
+ SplatBitSize, DAG, isVMOV, true);
}
static bool isVEXTMask(const SmallVectorImpl<int> &M, EVT VT,
bool HasAnyUndefs;
if (BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize, HasAnyUndefs)) {
if (SplatBitSize <= 64) {
- SDValue Val = isVMOVSplat(SplatBits.getZExtValue(),
- SplatUndef.getZExtValue(), SplatBitSize, DAG);
+ // Check if an immediate VMOV works.
+ SDValue Val = isNEONModifiedImm(SplatBits.getZExtValue(),
+ SplatUndef.getZExtValue(),
+ SplatBitSize, DAG, true, false);
if (Val.getNode())
return BuildSplat(Val, VT, DAG, dl);
}
bool ReverseVEXT;
unsigned Imm, WhichResult;
- return (ShuffleVectorSDNode::isSplatMask(&M[0], VT) ||
+ unsigned EltSize = VT.getVectorElementType().getSizeInBits();
+ return (EltSize >= 32 ||
+ ShuffleVectorSDNode::isSplatMask(&M[0], VT) ||
isVREVMask(M, VT, 64) ||
isVREVMask(M, VT, 32) ||
isVREVMask(M, VT, 16) ||
// of the same time so that they get CSEd properly.
SVN->getMask(ShuffleMask);
- if (ShuffleVectorSDNode::isSplatMask(&ShuffleMask[0], VT)) {
- int Lane = SVN->getSplatIndex();
- // If this is undef splat, generate it via "just" vdup, if possible.
- if (Lane == -1) Lane = 0;
-
- if (Lane == 0 && V1.getOpcode() == ISD::SCALAR_TO_VECTOR) {
- return DAG.getNode(ARMISD::VDUP, dl, VT, V1.getOperand(0));
+ unsigned EltSize = VT.getVectorElementType().getSizeInBits();
+ if (EltSize <= 32) {
+ if (ShuffleVectorSDNode::isSplatMask(&ShuffleMask[0], VT)) {
+ int Lane = SVN->getSplatIndex();
+ // If this is undef splat, generate it via "just" vdup, if possible.
+ if (Lane == -1) Lane = 0;
+
+ if (Lane == 0 && V1.getOpcode() == ISD::SCALAR_TO_VECTOR) {
+ return DAG.getNode(ARMISD::VDUP, dl, VT, V1.getOperand(0));
+ }
+ return DAG.getNode(ARMISD::VDUPLANE, dl, VT, V1,
+ DAG.getConstant(Lane, MVT::i32));
}
- return DAG.getNode(ARMISD::VDUPLANE, dl, VT, V1,
- DAG.getConstant(Lane, MVT::i32));
- }
- bool ReverseVEXT;
- unsigned Imm;
- if (isVEXTMask(ShuffleMask, VT, ReverseVEXT, Imm)) {
- if (ReverseVEXT)
- std::swap(V1, V2);
- return DAG.getNode(ARMISD::VEXT, dl, VT, V1, V2,
- DAG.getConstant(Imm, MVT::i32));
- }
-
- if (isVREVMask(ShuffleMask, VT, 64))
- return DAG.getNode(ARMISD::VREV64, dl, VT, V1);
- if (isVREVMask(ShuffleMask, VT, 32))
- return DAG.getNode(ARMISD::VREV32, dl, VT, V1);
- if (isVREVMask(ShuffleMask, VT, 16))
- return DAG.getNode(ARMISD::VREV16, dl, VT, V1);
-
- // Check for Neon shuffles that modify both input vectors in place.
- // If both results are used, i.e., if there are two shuffles with the same
- // source operands and with masks corresponding to both results of one of
- // these operations, DAG memoization will ensure that a single node is
- // used for both shuffles.
- unsigned WhichResult;
- if (isVTRNMask(ShuffleMask, VT, WhichResult))
- return DAG.getNode(ARMISD::VTRN, dl, DAG.getVTList(VT, VT),
- V1, V2).getValue(WhichResult);
- if (isVUZPMask(ShuffleMask, VT, WhichResult))
- return DAG.getNode(ARMISD::VUZP, dl, DAG.getVTList(VT, VT),
- V1, V2).getValue(WhichResult);
- if (isVZIPMask(ShuffleMask, VT, WhichResult))
- return DAG.getNode(ARMISD::VZIP, dl, DAG.getVTList(VT, VT),
- V1, V2).getValue(WhichResult);
+ bool ReverseVEXT;
+ unsigned Imm;
+ if (isVEXTMask(ShuffleMask, VT, ReverseVEXT, Imm)) {
+ if (ReverseVEXT)
+ std::swap(V1, V2);
+ return DAG.getNode(ARMISD::VEXT, dl, VT, V1, V2,
+ DAG.getConstant(Imm, MVT::i32));
+ }
- if (isVTRN_v_undef_Mask(ShuffleMask, VT, WhichResult))
- return DAG.getNode(ARMISD::VTRN, dl, DAG.getVTList(VT, VT),
- V1, V1).getValue(WhichResult);
- if (isVUZP_v_undef_Mask(ShuffleMask, VT, WhichResult))
- return DAG.getNode(ARMISD::VUZP, dl, DAG.getVTList(VT, VT),
- V1, V1).getValue(WhichResult);
- if (isVZIP_v_undef_Mask(ShuffleMask, VT, WhichResult))
- return DAG.getNode(ARMISD::VZIP, dl, DAG.getVTList(VT, VT),
- V1, V1).getValue(WhichResult);
+ if (isVREVMask(ShuffleMask, VT, 64))
+ return DAG.getNode(ARMISD::VREV64, dl, VT, V1);
+ if (isVREVMask(ShuffleMask, VT, 32))
+ return DAG.getNode(ARMISD::VREV32, dl, VT, V1);
+ if (isVREVMask(ShuffleMask, VT, 16))
+ return DAG.getNode(ARMISD::VREV16, dl, VT, V1);
+
+ // Check for Neon shuffles that modify both input vectors in place.
+ // If both results are used, i.e., if there are two shuffles with the same
+ // source operands and with masks corresponding to both results of one of
+ // these operations, DAG memoization will ensure that a single node is
+ // used for both shuffles.
+ unsigned WhichResult;
+ if (isVTRNMask(ShuffleMask, VT, WhichResult))
+ return DAG.getNode(ARMISD::VTRN, dl, DAG.getVTList(VT, VT),
+ V1, V2).getValue(WhichResult);
+ if (isVUZPMask(ShuffleMask, VT, WhichResult))
+ return DAG.getNode(ARMISD::VUZP, dl, DAG.getVTList(VT, VT),
+ V1, V2).getValue(WhichResult);
+ if (isVZIPMask(ShuffleMask, VT, WhichResult))
+ return DAG.getNode(ARMISD::VZIP, dl, DAG.getVTList(VT, VT),
+ V1, V2).getValue(WhichResult);
+
+ if (isVTRN_v_undef_Mask(ShuffleMask, VT, WhichResult))
+ return DAG.getNode(ARMISD::VTRN, dl, DAG.getVTList(VT, VT),
+ V1, V1).getValue(WhichResult);
+ if (isVUZP_v_undef_Mask(ShuffleMask, VT, WhichResult))
+ return DAG.getNode(ARMISD::VUZP, dl, DAG.getVTList(VT, VT),
+ V1, V1).getValue(WhichResult);
+ if (isVZIP_v_undef_Mask(ShuffleMask, VT, WhichResult))
+ return DAG.getNode(ARMISD::VZIP, dl, DAG.getVTList(VT, VT),
+ V1, V1).getValue(WhichResult);
+ }
// If the shuffle is not directly supported and it has 4 elements, use
// the PerfectShuffle-generated table to synthesize it from other shuffles.
}
// Implement shuffles with 32- or 64-bit elements as ARMISD::BUILD_VECTORs.
- unsigned EltSize = VT.getVectorElementType().getSizeInBits();
if (EltSize >= 32) {
// Do the expansion with floating-point types, since that is what the VFP
// registers are defined to use, and since i64 is not legal.
}
}
if (StringRef("{cc}").equals_lower(Constraint))
- return std::make_pair(0U, ARM::CCRRegisterClass);
+ return std::make_pair(unsigned(ARM::CPSR), ARM::CCRRegisterClass);
return TargetLowering::getRegForInlineAsmConstraint(Constraint, VT);
}