setOperationAction(ISD::CTPOP, MVT::v4i16, Custom);
setOperationAction(ISD::CTPOP, MVT::v8i16, Custom);
+ // NEON only has FMA instructions as of VFP4.
+ if (!Subtarget->hasVFP4()) {
+ setOperationAction(ISD::FMA, MVT::v2f32, Expand);
+ setOperationAction(ISD::FMA, MVT::v4f32, Expand);
+ }
+
setTargetDAGCombine(ISD::INTRINSIC_VOID);
setTargetDAGCombine(ISD::INTRINSIC_W_CHAIN);
setTargetDAGCombine(ISD::INTRINSIC_WO_CHAIN);
setSchedulingPreference(Sched::Hybrid);
//// temporary - rewrite interface to use type
- maxStoresPerMemset = 8;
- maxStoresPerMemsetOptSize = Subtarget->isTargetDarwin() ? 8 : 4;
- maxStoresPerMemcpy = 4; // For @llvm.memcpy -> sequence of stores
- maxStoresPerMemcpyOptSize = Subtarget->isTargetDarwin() ? 4 : 2;
- maxStoresPerMemmove = 4; // For @llvm.memmove -> sequence of stores
- maxStoresPerMemmoveOptSize = Subtarget->isTargetDarwin() ? 4 : 2;
+ MaxStoresPerMemset = 8;
+ MaxStoresPerMemsetOptSize = Subtarget->isTargetDarwin() ? 8 : 4;
+ MaxStoresPerMemcpy = 4; // For @llvm.memcpy -> sequence of stores
+ MaxStoresPerMemcpyOptSize = Subtarget->isTargetDarwin() ? 4 : 2;
+ MaxStoresPerMemmove = 4; // For @llvm.memmove -> sequence of stores
+ MaxStoresPerMemmoveOptSize = Subtarget->isTargetDarwin() ? 4 : 2;
// On ARM arguments smaller than 4 bytes are extended, so all arguments
// are at least 4 bytes aligned.
setMinStackArgumentAlignment(4);
- benefitFromCodePlacementOpt = true;
+ BenefitFromCodePlacementOpt = true;
// Prefer likely predicted branches to selects on out-of-order cores.
- predictableSelectIsExpensive = Subtarget->isLikeA9();
+ PredictableSelectIsExpensive = Subtarget->isLikeA9();
setMinFunctionAlignment(Subtarget->isThumb() ? 1 : 2);
}
CCInfo.AnalyzeReturn(Outs, CCAssignFnForNode(CallConv, /* Return */ true,
isVarArg));
- // If this is the first return lowered for this function, add
- // the regs to the liveout set for the function.
- if (DAG.getMachineFunction().getRegInfo().liveout_empty()) {
- for (unsigned i = 0; i != RVLocs.size(); ++i)
- if (RVLocs[i].isRegLoc())
- DAG.getMachineFunction().getRegInfo().addLiveOut(RVLocs[i].getLocReg());
- }
-
SDValue Flag;
+ SmallVector<SDValue, 4> RetOps;
+ RetOps.push_back(Chain); // Operand #0 = Chain (updated below)
// Copy the result values into the output registers.
for (unsigned i = 0, realRVLocIdx = 0;
Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), HalfGPRs, Flag);
Flag = Chain.getValue(1);
+ RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
VA = RVLocs[++i]; // skip ahead to next loc
Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(),
HalfGPRs.getValue(1), Flag);
Flag = Chain.getValue(1);
+ RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
VA = RVLocs[++i]; // skip ahead to next loc
// Extract the 2nd half and fall through to handle it as an f64 value.
DAG.getVTList(MVT::i32, MVT::i32), &Arg, 1);
Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), fmrrd, Flag);
Flag = Chain.getValue(1);
+ RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
VA = RVLocs[++i]; // skip ahead to next loc
Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), fmrrd.getValue(1),
Flag);
// Guarantee that all emitted copies are
// stuck together, avoiding something bad.
Flag = Chain.getValue(1);
+ RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
}
- SDValue result;
+ // Update chain and glue.
+ RetOps[0] = Chain;
if (Flag.getNode())
- result = DAG.getNode(ARMISD::RET_FLAG, dl, MVT::Other, Chain, Flag);
- else // Return Void
- result = DAG.getNode(ARMISD::RET_FLAG, dl, MVT::Other, Chain);
+ RetOps.push_back(Flag);
- return result;
+ return DAG.getNode(ARMISD::RET_FLAG, dl, MVT::Other,
+ RetOps.data(), RetOps.size());
}
bool ARMTargetLowering::isUsedByReturnOnly(SDNode *N, SDValue &Chain) const {
ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
EVT PtrVT = getPointerTy();
- DebugLoc dl = Op.getDebugLoc();
Reloc::Model RelocM = getTargetMachine().getRelocationModel();
SDValue CPAddr;
unsigned PCAdj = (RelocM != Reloc::PIC_)
}
// The remaining GPRs hold either the beginning of variable-argument
-// data, or the beginning of an aggregate passed by value (usuall
+// data, or the beginning of an aggregate passed by value (usually
// byval). Either way, we allocate stack slots adjacent to the data
// provided by our caller, and store the unallocated registers there.
// If this is a variadic function, the va_list pointer will begin with
return true;
}
+/// \return true if this is a reverse operation on an vector.
+static bool isReverseMask(ArrayRef<int> M, EVT VT) {
+ unsigned NumElts = VT.getVectorNumElements();
+ // Make sure the mask has the right size.
+ if (NumElts != M.size())
+ return false;
+
+ // Look for <15, ..., 3, -1, 1, 0>.
+ for (unsigned i = 0; i != NumElts; ++i)
+ if (M[i] >= 0 && M[i] != (int) (NumElts - 1 - i))
+ return false;
+
+ return true;
+}
+
// If N is an integer constant that can be moved into a register in one
// instruction, return an SDValue of such a constant (will become a MOV
// instruction). Otherwise return null.
isVZIPMask(M, VT, WhichResult) ||
isVTRN_v_undef_Mask(M, VT, WhichResult) ||
isVUZP_v_undef_Mask(M, VT, WhichResult) ||
- isVZIP_v_undef_Mask(M, VT, WhichResult));
+ isVZIP_v_undef_Mask(M, VT, WhichResult) ||
+ ((VT == MVT::v8i16 || VT == MVT::v16i8) && isReverseMask(M, VT)));
}
/// GeneratePerfectShuffle - Given an entry in the perfect-shuffle table, emit
&VTBLMask[0], 8));
}
+static SDValue LowerReverse_VECTOR_SHUFFLEv16i8_v8i16(SDValue Op,
+ SelectionDAG &DAG) {
+ DebugLoc DL = Op.getDebugLoc();
+ SDValue OpLHS = Op.getOperand(0);
+ EVT VT = OpLHS.getValueType();
+
+ assert((VT == MVT::v8i16 || VT == MVT::v16i8) &&
+ "Expect an v8i16/v16i8 type");
+ OpLHS = DAG.getNode(ARMISD::VREV64, DL, VT, OpLHS);
+ // For a v16i8 type: After the VREV, we have got <8, ...15, 8, ..., 0>. Now,
+ // extract the first 8 bytes into the top double word and the last 8 bytes
+ // into the bottom double word. The v8i16 case is similar.
+ unsigned ExtractNum = (VT == MVT::v16i8) ? 8 : 4;
+ return DAG.getNode(ARMISD::VEXT, DL, VT, OpLHS, OpLHS,
+ DAG.getConstant(ExtractNum, MVT::i32));
+}
+
static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) {
SDValue V1 = Op.getOperand(0);
SDValue V2 = Op.getOperand(1);
return DAG.getNode(ISD::BITCAST, dl, VT, Val);
}
+ if ((VT == MVT::v8i16 || VT == MVT::v16i8) && isReverseMask(ShuffleMask, VT))
+ return LowerReverse_VECTOR_SHUFFLEv16i8_v8i16(Op, DAG);
+
if (VT == MVT::v8i8) {
SDValue NewOp = LowerVECTOR_SHUFFLEv8i8(Op, ShuffleMask, DAG);
if (NewOp.getNode())
MachineBasicBlock *TrapBB = MF->CreateMachineBasicBlock();
unsigned trap_opcode;
- if (Subtarget->isThumb()) {
+ if (Subtarget->isThumb())
trap_opcode = ARM::tTRAP;
- } else {
- if (Subtarget->useNaClTrap())
- trap_opcode = ARM::TRAPNaCl;
- else
- trap_opcode = ARM::TRAP;
- }
+ else
+ trap_opcode = Subtarget->useNaClTrap() ? ARM::TRAPNaCl : ARM::TRAP;
+
BuildMI(TrapBB, dl, TII->get(trap_opcode));
DispatchBB->addSuccessor(TrapBB);