#include "ARMConstantPoolValue.h"
#include "ARMMachineFunctionInfo.h"
#include "ARMPerfectShuffle.h"
-#include "ARMRegisterInfo.h"
#include "ARMSubtarget.h"
#include "ARMTargetMachine.h"
#include "ARMTargetObjectFile.h"
}
void ARMTargetLowering::addDRTypeForNEON(EVT VT) {
- addRegisterClass(VT, ARM::DPRRegisterClass);
+ addRegisterClass(VT, &ARM::DPRRegClass);
addTypeForNEON(VT, MVT::f64, MVT::v2i32);
}
void ARMTargetLowering::addQRTypeForNEON(EVT VT) {
- addRegisterClass(VT, ARM::QPRRegisterClass);
+ addRegisterClass(VT, &ARM::QPRRegClass);
addTypeForNEON(VT, MVT::v2f64, MVT::v4i32);
}
}
if (Subtarget->isThumb1Only())
- addRegisterClass(MVT::i32, ARM::tGPRRegisterClass);
+ addRegisterClass(MVT::i32, &ARM::tGPRRegClass);
else
- addRegisterClass(MVT::i32, ARM::GPRRegisterClass);
+ addRegisterClass(MVT::i32, &ARM::GPRRegClass);
if (!TM.Options.UseSoftFloat && Subtarget->hasVFP2() &&
!Subtarget->isThumb1Only()) {
- addRegisterClass(MVT::f32, ARM::SPRRegisterClass);
+ addRegisterClass(MVT::f32, &ARM::SPRRegClass);
if (!Subtarget->isFPOnlySP())
- addRegisterClass(MVT::f64, ARM::DPRRegisterClass);
+ addRegisterClass(MVT::f64, &ARM::DPRRegClass);
setTruncStoreAction(MVT::f64, MVT::f32, Expand);
}
setOperationAction(ISD::FRINT, MVT::v2f64, Expand);
setOperationAction(ISD::FNEARBYINT, MVT::v2f64, Expand);
setOperationAction(ISD::FFLOOR, MVT::v2f64, Expand);
-
+
setOperationAction(ISD::FSQRT, MVT::v4f32, Expand);
setOperationAction(ISD::FSIN, MVT::v4f32, Expand);
setOperationAction(ISD::FCOS, MVT::v4f32, Expand);
setOperationAction(ISD::FPOW, MVT::f64, Expand);
setOperationAction(ISD::FPOW, MVT::f32, Expand);
- setOperationAction(ISD::FMA, MVT::f64, Expand);
- setOperationAction(ISD::FMA, MVT::f32, Expand);
+ if (!Subtarget->hasVFP4()) {
+ setOperationAction(ISD::FMA, MVT::f64, Expand);
+ setOperationAction(ISD::FMA, MVT::f32, Expand);
+ }
// Various VFP goodness
if (!TM.Options.UseSoftFloat && !Subtarget->isThumb1Only()) {
// the cost is 1 for both f32 and f64.
case MVT::f32: case MVT::f64: case MVT::v8i8: case MVT::v4i16:
case MVT::v2i32: case MVT::v1i64: case MVT::v2f32:
- RRC = ARM::DPRRegisterClass;
+ RRC = &ARM::DPRRegClass;
// When NEON is used for SP, only half of the register file is available
// because operations that define both SP and DP results will be constrained
// to the VFP2 class (D0-D15). We currently model this constraint prior to
break;
case MVT::v16i8: case MVT::v8i16: case MVT::v4i32: case MVT::v2i64:
case MVT::v4f32: case MVT::v2f64:
- RRC = ARM::DPRRegisterClass;
+ RRC = &ARM::DPRRegClass;
Cost = 2;
break;
case MVT::v4i64:
- RRC = ARM::DPRRegisterClass;
+ RRC = &ARM::DPRRegClass;
Cost = 4;
break;
case MVT::v8i64:
- RRC = ARM::DPRRegisterClass;
+ RRC = &ARM::DPRRegClass;
Cost = 8;
break;
}
// load / store 4 to 8 consecutive D registers.
if (Subtarget->hasNEON()) {
if (VT == MVT::v4i64)
- return ARM::QQPRRegisterClass;
- else if (VT == MVT::v8i64)
- return ARM::QQQQPRRegisterClass;
+ return &ARM::QQPRRegClass;
+ if (VT == MVT::v8i64)
+ return &ARM::QQQQPRRegClass;
}
return TargetLowering::getRegClassFor(VT);
}
/// and then confiscate the rest of the parameter registers to insure
/// this.
void
-llvm::ARMTargetLowering::HandleByVal(CCState *State, unsigned &size) const {
+ARMTargetLowering::HandleByVal(CCState *State, unsigned &size) const {
unsigned reg = State->AllocateReg(GPRArgRegs, 4);
assert((State->getCallOrPrologue() == Prologue ||
State->getCallOrPrologue() == Call) &&
return result;
}
-bool ARMTargetLowering::isUsedByReturnOnly(SDNode *N) const {
+bool ARMTargetLowering::isUsedByReturnOnly(SDNode *N, SDValue &Chain) const {
if (N->getNumValues() != 1)
return false;
if (!N->hasNUsesOfValue(1, 0))
return false;
- unsigned NumCopies = 0;
- SDNode* Copies[2] = { 0, 0 };
- SDNode *Use = *N->use_begin();
- if (Use->getOpcode() == ISD::CopyToReg) {
- Copies[NumCopies++] = Use;
- } else if (Use->getOpcode() == ARMISD::VMOVRRD) {
+ SDValue TCChain = Chain;
+ SDNode *Copy = *N->use_begin();
+ if (Copy->getOpcode() == ISD::CopyToReg) {
+ // If the copy has a glue operand, we conservatively assume it isn't safe to
+ // perform a tail call.
+ if (Copy->getOperand(Copy->getNumOperands()-1).getValueType() == MVT::Glue)
+ return false;
+ TCChain = Copy->getOperand(0);
+ } else if (Copy->getOpcode() == ARMISD::VMOVRRD) {
+ SDNode *VMov = Copy;
// f64 returned in a pair of GPRs.
- for (SDNode::use_iterator UI = Use->use_begin(), UE = Use->use_end();
+ SmallPtrSet<SDNode*, 2> Copies;
+ for (SDNode::use_iterator UI = VMov->use_begin(), UE = VMov->use_end();
UI != UE; ++UI) {
if (UI->getOpcode() != ISD::CopyToReg)
return false;
- Copies[UI.getUse().getResNo()] = *UI;
- ++NumCopies;
+ Copies.insert(*UI);
}
- } else if (Use->getOpcode() == ISD::BITCAST) {
+ if (Copies.size() > 2)
+ return false;
+
+ for (SDNode::use_iterator UI = VMov->use_begin(), UE = VMov->use_end();
+ UI != UE; ++UI) {
+ SDValue UseChain = UI->getOperand(0);
+ if (Copies.count(UseChain.getNode()))
+ // Second CopyToReg
+ Copy = *UI;
+ else
+ // First CopyToReg
+ TCChain = UseChain;
+ }
+ } else if (Copy->getOpcode() == ISD::BITCAST) {
// f32 returned in a single GPR.
- if (!Use->hasNUsesOfValue(1, 0))
+ if (!Copy->hasOneUse())
return false;
- Use = *Use->use_begin();
- if (Use->getOpcode() != ISD::CopyToReg || !Use->hasNUsesOfValue(1, 0))
+ Copy = *Copy->use_begin();
+ if (Copy->getOpcode() != ISD::CopyToReg || !Copy->hasNUsesOfValue(1, 0))
return false;
- Copies[NumCopies++] = Use;
+ Chain = Copy->getOperand(0);
} else {
return false;
}
- if (NumCopies != 1 && NumCopies != 2)
- return false;
-
bool HasRet = false;
- for (unsigned i = 0; i < NumCopies; ++i) {
- SDNode *Copy = Copies[i];
- for (SDNode::use_iterator UI = Copy->use_begin(), UE = Copy->use_end();
- UI != UE; ++UI) {
- if (UI->getOpcode() == ISD::CopyToReg) {
- SDNode *Use = *UI;
- if (Use == Copies[0] || ((NumCopies == 2) && (Use == Copies[1])))
- continue;
- return false;
- }
- if (UI->getOpcode() != ARMISD::RET_FLAG)
- return false;
- HasRet = true;
- }
+ for (SDNode::use_iterator UI = Copy->use_begin(), UE = Copy->use_end();
+ UI != UE; ++UI) {
+ if (UI->getOpcode() != ARMISD::RET_FLAG)
+ return false;
+ HasRet = true;
}
- return HasRet;
+ if (!HasRet)
+ return false;
+
+ Chain = TCChain;
+ return true;
}
bool ARMTargetLowering::mayBeEmittedAsTailCall(CallInst *CI) const {
- if (!EnableARMTailCalls)
+ if (!EnableARMTailCalls && !Subtarget->supportsTailCall())
return false;
if (!CI->isTailCall())
const TargetRegisterClass *RC;
if (AFI->isThumb1OnlyFunction())
- RC = ARM::tGPRRegisterClass;
+ RC = &ARM::tGPRRegClass;
else
- RC = ARM::GPRRegisterClass;
+ RC = &ARM::GPRRegClass;
// Transform the arguments stored in physical registers into virtual ones.
unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC);
for (; firstRegToSaveIndex < 4; ++firstRegToSaveIndex) {
const TargetRegisterClass *RC;
if (AFI->isThumb1OnlyFunction())
- RC = ARM::tGPRRegisterClass;
+ RC = &ARM::tGPRRegClass;
else
- RC = ARM::GPRRegisterClass;
+ RC = &ARM::GPRRegClass;
unsigned VReg = MF.addLiveIn(GPRArgRegs[firstRegToSaveIndex], RC);
SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i32);
const TargetRegisterClass *RC;
if (RegVT == MVT::f32)
- RC = ARM::SPRRegisterClass;
+ RC = &ARM::SPRRegClass;
else if (RegVT == MVT::f64)
- RC = ARM::DPRRegisterClass;
+ RC = &ARM::DPRRegClass;
else if (RegVT == MVT::v2f64)
- RC = ARM::QPRRegisterClass;
+ RC = &ARM::QPRRegClass;
else if (RegVT == MVT::i32)
- RC = (AFI->isThumb1OnlyFunction() ?
- ARM::tGPRRegisterClass : ARM::GPRRegisterClass);
+ RC = AFI->isThumb1OnlyFunction() ?
+ (const TargetRegisterClass*)&ARM::tGPRRegClass :
+ (const TargetRegisterClass*)&ARM::GPRRegClass;
else
llvm_unreachable("RegVT not supported by FORMAL_ARGUMENTS Lowering");
return Result;
}
-SDValue ARMTargetLowering::LowerConstantFP(SDValue Op, SelectionDAG &DAG,
- const ARMSubtarget *ST) const {
- if (!ST->useNEONForSinglePrecisionFP() || !ST->hasVFP3() || ST->hasD16())
- return SDValue();
-
- ConstantFPSDNode *CFP = cast<ConstantFPSDNode>(Op);
- assert(Op.getValueType() == MVT::f32 &&
- "ConstantFP custom lowering should only occur for f32.");
-
- APFloat FPVal = CFP->getValueAPF();
- int ImmVal = ARM_AM::getFP32Imm(FPVal);
- if (ImmVal == -1)
- return SDValue();
-
- DebugLoc DL = Op.getDebugLoc();
- SDValue NewVal = DAG.getTargetConstant(ImmVal, MVT::i32);
- SDValue VecConstant = DAG.getNode(ARMISD::VMOVFPIMM, DL, MVT::v2f32, NewVal);
- return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, VecConstant,
- DAG.getConstant(0, MVT::i32));
-}
-
/// isNEONModifiedImm - Check if the specified splat value corresponds to a
/// valid vector constant for a NEON instruction with a "modified immediate"
/// operand (e.g., VMOV). If so, return the encoded value.
return DAG.getTargetConstant(EncodedVal, MVT::i32);
}
+SDValue ARMTargetLowering::LowerConstantFP(SDValue Op, SelectionDAG &DAG,
+ const ARMSubtarget *ST) const {
+ if (!ST->useNEONForSinglePrecisionFP() || !ST->hasVFP3() || ST->hasD16())
+ return SDValue();
+
+ ConstantFPSDNode *CFP = cast<ConstantFPSDNode>(Op);
+ assert(Op.getValueType() == MVT::f32 &&
+ "ConstantFP custom lowering should only occur for f32.");
+
+ // Try splatting with a VMOV.f32...
+ APFloat FPVal = CFP->getValueAPF();
+ int ImmVal = ARM_AM::getFP32Imm(FPVal);
+ if (ImmVal != -1) {
+ DebugLoc DL = Op.getDebugLoc();
+ SDValue NewVal = DAG.getTargetConstant(ImmVal, MVT::i32);
+ SDValue VecConstant = DAG.getNode(ARMISD::VMOVFPIMM, DL, MVT::v2f32,
+ NewVal);
+ return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, VecConstant,
+ DAG.getConstant(0, MVT::i32));
+ }
+
+ // If that fails, try a VMOV.i32
+ EVT VMovVT;
+ unsigned iVal = FPVal.bitcastToAPInt().getZExtValue();
+ SDValue NewVal = isNEONModifiedImm(iVal, 0, 32, DAG, VMovVT, false,
+ VMOVModImm);
+ if (NewVal != SDValue()) {
+ DebugLoc DL = Op.getDebugLoc();
+ SDValue VecConstant = DAG.getNode(ARMISD::VMOVIMM, DL, VMovVT,
+ NewVal);
+ SDValue VecFConstant = DAG.getNode(ISD::BITCAST, DL, MVT::v2f32,
+ VecConstant);
+ return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, VecFConstant,
+ DAG.getConstant(0, MVT::i32));
+ }
+
+ // Finally, try a VMVN.i32
+ NewVal = isNEONModifiedImm(~iVal & 0xffffffff, 0, 32, DAG, VMovVT, false,
+ VMVNModImm);
+ if (NewVal != SDValue()) {
+ DebugLoc DL = Op.getDebugLoc();
+ SDValue VecConstant = DAG.getNode(ARMISD::VMVNIMM, DL, VMovVT, NewVal);
+ SDValue VecFConstant = DAG.getNode(ISD::BITCAST, DL, MVT::v2f32,
+ VecConstant);
+ return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, VecFConstant,
+ DAG.getConstant(0, MVT::i32));
+ }
+
+ return SDValue();
+}
+
+
static bool isVEXTMask(ArrayRef<int> M, EVT VT,
bool &ReverseVEXT, unsigned &Imm) {
unsigned NumElts = VT.getVectorNumElements();
bool isThumb2 = Subtarget->isThumb2();
MachineRegisterInfo &MRI = BB->getParent()->getRegInfo();
- unsigned scratch =
- MRI.createVirtualRegister(isThumb2 ? ARM::rGPRRegisterClass
- : ARM::GPRRegisterClass);
+ unsigned scratch = MRI.createVirtualRegister(isThumb2 ?
+ (const TargetRegisterClass*)&ARM::rGPRRegClass :
+ (const TargetRegisterClass*)&ARM::GPRRegClass);
if (isThumb2) {
- MRI.constrainRegClass(dest, ARM::rGPRRegisterClass);
- MRI.constrainRegClass(oldval, ARM::rGPRRegisterClass);
- MRI.constrainRegClass(newval, ARM::rGPRRegisterClass);
+ MRI.constrainRegClass(dest, &ARM::rGPRRegClass);
+ MRI.constrainRegClass(oldval, &ARM::rGPRRegClass);
+ MRI.constrainRegClass(newval, &ARM::rGPRRegClass);
}
unsigned ldrOpc, strOpc;
MachineRegisterInfo &MRI = BB->getParent()->getRegInfo();
if (isThumb2) {
- MRI.constrainRegClass(dest, ARM::rGPRRegisterClass);
- MRI.constrainRegClass(ptr, ARM::rGPRRegisterClass);
+ MRI.constrainRegClass(dest, &ARM::rGPRRegClass);
+ MRI.constrainRegClass(ptr, &ARM::rGPRRegClass);
}
unsigned ldrOpc, strOpc;
BB->end());
exitMBB->transferSuccessorsAndUpdatePHIs(BB);
- const TargetRegisterClass *TRC =
- isThumb2 ? ARM::tGPRRegisterClass : ARM::GPRRegisterClass;
+ const TargetRegisterClass *TRC = isThumb2 ?
+ (const TargetRegisterClass*)&ARM::tGPRRegClass :
+ (const TargetRegisterClass*)&ARM::GPRRegClass;
unsigned scratch = MRI.createVirtualRegister(TRC);
unsigned scratch2 = (!BinOpcode) ? incr : MRI.createVirtualRegister(TRC);
MachineRegisterInfo &MRI = BB->getParent()->getRegInfo();
if (isThumb2) {
- MRI.constrainRegClass(dest, ARM::rGPRRegisterClass);
- MRI.constrainRegClass(ptr, ARM::rGPRRegisterClass);
+ MRI.constrainRegClass(dest, &ARM::rGPRRegClass);
+ MRI.constrainRegClass(ptr, &ARM::rGPRRegClass);
}
unsigned ldrOpc, strOpc, extendOpc;
BB->end());
exitMBB->transferSuccessorsAndUpdatePHIs(BB);
- const TargetRegisterClass *TRC =
- isThumb2 ? ARM::tGPRRegisterClass : ARM::GPRRegisterClass;
+ const TargetRegisterClass *TRC = isThumb2 ?
+ (const TargetRegisterClass*)&ARM::tGPRRegClass :
+ (const TargetRegisterClass*)&ARM::GPRRegClass;
unsigned scratch = MRI.createVirtualRegister(TRC);
unsigned scratch2 = MRI.createVirtualRegister(TRC);
// Sign extend the value, if necessary.
if (signExtend && extendOpc) {
- oldval = MRI.createVirtualRegister(ARM::GPRRegisterClass);
+ oldval = MRI.createVirtualRegister(&ARM::GPRRegClass);
AddDefaultPred(BuildMI(BB, dl, TII->get(extendOpc), oldval)
.addReg(dest)
.addImm(0));
MachineRegisterInfo &MRI = BB->getParent()->getRegInfo();
if (isThumb2) {
- MRI.constrainRegClass(destlo, ARM::rGPRRegisterClass);
- MRI.constrainRegClass(desthi, ARM::rGPRRegisterClass);
- MRI.constrainRegClass(ptr, ARM::rGPRRegisterClass);
+ MRI.constrainRegClass(destlo, &ARM::rGPRRegClass);
+ MRI.constrainRegClass(desthi, &ARM::rGPRRegClass);
+ MRI.constrainRegClass(ptr, &ARM::rGPRRegClass);
}
unsigned ldrOpc = isThumb2 ? ARM::t2LDREXD : ARM::LDREXD;
BB->end());
exitMBB->transferSuccessorsAndUpdatePHIs(BB);
- const TargetRegisterClass *TRC =
- isThumb2 ? ARM::tGPRRegisterClass : ARM::GPRRegisterClass;
+ const TargetRegisterClass *TRC = isThumb2 ?
+ (const TargetRegisterClass*)&ARM::tGPRRegClass :
+ (const TargetRegisterClass*)&ARM::GPRRegClass;
unsigned storesuccess = MRI.createVirtualRegister(TRC);
// thisMBB:
ARMConstantPoolMBB::Create(F->getContext(), DispatchBB, PCLabelId, PCAdj);
unsigned CPI = MCP->getConstantPoolIndex(CPV, 4);
- const TargetRegisterClass *TRC =
- isThumb ? ARM::tGPRRegisterClass : ARM::GPRRegisterClass;
+ const TargetRegisterClass *TRC = isThumb ?
+ (const TargetRegisterClass*)&ARM::tGPRRegClass :
+ (const TargetRegisterClass*)&ARM::GPRRegClass;
// Grab constant pool and fixed stack memory operands.
MachineMemOperand *CPMMO =
MachineFrameInfo *MFI = MF->getFrameInfo();
int FI = MFI->getFunctionContextIndex();
- const TargetRegisterClass *TRC =
- Subtarget->isThumb() ? ARM::tGPRRegisterClass : ARM::GPRRegisterClass;
+ const TargetRegisterClass *TRC = Subtarget->isThumb() ?
+ (const TargetRegisterClass*)&ARM::tGPRRegClass :
+ (const TargetRegisterClass*)&ARM::GPRRegClass;
// Get a mapping of the call site numbers to all of the landing pads they're
// associated with.
DenseMap<unsigned, SmallVector<MachineBasicBlock*, 2> > CallSiteNumToLPad;
unsigned MaxCSNum = 0;
MachineModuleInfo &MMI = MF->getMMI();
- for (MachineFunction::iterator BB = MF->begin(), E = MF->end(); BB != E; ++BB) {
+ for (MachineFunction::iterator BB = MF->begin(), E = MF->end(); BB != E;
+ ++BB) {
if (!BB->isLandingPad()) continue;
// FIXME: We should assert that the EH_LABEL is the first MI in the landing
BuildMI(DispatchBB, dl, TII->get(ARM::tInt_eh_sjlj_dispatchsetup));
else if (!Subtarget->hasVFP2())
BuildMI(DispatchBB, dl, TII->get(ARM::Int_eh_sjlj_dispatchsetup_nofp));
- else
+ else
BuildMI(DispatchBB, dl, TII->get(ARM::Int_eh_sjlj_dispatchsetup));
unsigned NumLPads = LPadList.size();
for (unsigned i = 0; SavedRegs[i] != 0; ++i) {
unsigned Reg = SavedRegs[i];
if (Subtarget->isThumb2() &&
- !ARM::tGPRRegisterClass->contains(Reg) &&
- !ARM::hGPRRegisterClass->contains(Reg))
+ !ARM::tGPRRegClass.contains(Reg) &&
+ !ARM::hGPRRegClass.contains(Reg))
continue;
- else if (Subtarget->isThumb1Only() &&
- !ARM::tGPRRegisterClass->contains(Reg))
+ if (Subtarget->isThumb1Only() && !ARM::tGPRRegClass.contains(Reg))
continue;
- else if (!Subtarget->isThumb() &&
- !ARM::GPRRegisterClass->contains(Reg))
+ if (!Subtarget->isThumb() && !ARM::GPRRegClass.contains(Reg))
continue;
if (!DefRegs[Reg])
MIB.addReg(Reg, RegState::ImplicitDefine | RegState::Dead);
MachineRegisterInfo &MRI = Fn->getRegInfo();
// In Thumb mode S must not be specified if source register is the SP or
// PC and if destination register is the SP, so restrict register class
- unsigned NewMovDstReg = MRI.createVirtualRegister(
- isThumb2 ? ARM::rGPRRegisterClass : ARM::GPRRegisterClass);
- unsigned NewRsbDstReg = MRI.createVirtualRegister(
- isThumb2 ? ARM::rGPRRegisterClass : ARM::GPRRegisterClass);
+ unsigned NewMovDstReg = MRI.createVirtualRegister(isThumb2 ?
+ (const TargetRegisterClass*)&ARM::rGPRRegClass :
+ (const TargetRegisterClass*)&ARM::GPRRegClass);
+ unsigned NewRsbDstReg = MRI.createVirtualRegister(isThumb2 ?
+ (const TargetRegisterClass*)&ARM::rGPRRegClass :
+ (const TargetRegisterClass*)&ARM::GPRRegClass);
// Transfer the remainder of BB and its successor edges to sinkMBB.
SinkBB->splice(SinkBB->begin(), BB,
/// ISD::STORE.
static SDValue PerformSTORECombine(SDNode *N,
TargetLowering::DAGCombinerInfo &DCI) {
- // Bitcast an i64 store extracted from a vector to f64.
- // Otherwise, the i64 value will be legalized to a pair of i32 values.
StoreSDNode *St = cast<StoreSDNode>(N);
+ if (St->isVolatile())
+ return SDValue();
+
+ // Optimize trunc store (of multiple scalars) to shuffle and store. First,
+ // pack all of the elements in one place. Next, store to memory in fewer
+ // chunks.
SDValue StVal = St->getValue();
- if (!ISD::isNormalStore(St) || St->isVolatile())
+ EVT VT = StVal.getValueType();
+ if (St->isTruncatingStore() && VT.isVector()) {
+ SelectionDAG &DAG = DCI.DAG;
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ EVT StVT = St->getMemoryVT();
+ unsigned NumElems = VT.getVectorNumElements();
+ assert(StVT != VT && "Cannot truncate to the same type");
+ unsigned FromEltSz = VT.getVectorElementType().getSizeInBits();
+ unsigned ToEltSz = StVT.getVectorElementType().getSizeInBits();
+
+ // From, To sizes and ElemCount must be pow of two
+ if (!isPowerOf2_32(NumElems * FromEltSz * ToEltSz)) return SDValue();
+
+ // We are going to use the original vector elt for storing.
+ // Accumulated smaller vector elements must be a multiple of the store size.
+ if (0 != (NumElems * FromEltSz) % ToEltSz) return SDValue();
+
+ unsigned SizeRatio = FromEltSz / ToEltSz;
+ assert(SizeRatio * NumElems * ToEltSz == VT.getSizeInBits());
+
+ // Create a type on which we perform the shuffle.
+ EVT WideVecVT = EVT::getVectorVT(*DAG.getContext(), StVT.getScalarType(),
+ NumElems*SizeRatio);
+ assert(WideVecVT.getSizeInBits() == VT.getSizeInBits());
+
+ DebugLoc DL = St->getDebugLoc();
+ SDValue WideVec = DAG.getNode(ISD::BITCAST, DL, WideVecVT, StVal);
+ SmallVector<int, 8> ShuffleVec(NumElems * SizeRatio, -1);
+ for (unsigned i = 0; i < NumElems; ++i) ShuffleVec[i] = i * SizeRatio;
+
+ // Can't shuffle using an illegal type.
+ if (!TLI.isTypeLegal(WideVecVT)) return SDValue();
+
+ SDValue Shuff = DAG.getVectorShuffle(WideVecVT, DL, WideVec,
+ DAG.getUNDEF(WideVec.getValueType()),
+ ShuffleVec.data());
+ // At this point all of the data is stored at the bottom of the
+ // register. We now need to save it to mem.
+
+ // Find the largest store unit
+ MVT StoreType = MVT::i8;
+ for (unsigned tp = MVT::FIRST_INTEGER_VALUETYPE;
+ tp < MVT::LAST_INTEGER_VALUETYPE; ++tp) {
+ MVT Tp = (MVT::SimpleValueType)tp;
+ if (TLI.isTypeLegal(Tp) && Tp.getSizeInBits() <= NumElems * ToEltSz)
+ StoreType = Tp;
+ }
+ // Didn't find a legal store type.
+ if (!TLI.isTypeLegal(StoreType))
+ return SDValue();
+
+ // Bitcast the original vector into a vector of store-size units
+ EVT StoreVecVT = EVT::getVectorVT(*DAG.getContext(),
+ StoreType, VT.getSizeInBits()/EVT(StoreType).getSizeInBits());
+ assert(StoreVecVT.getSizeInBits() == VT.getSizeInBits());
+ SDValue ShuffWide = DAG.getNode(ISD::BITCAST, DL, StoreVecVT, Shuff);
+ SmallVector<SDValue, 8> Chains;
+ SDValue Increment = DAG.getConstant(StoreType.getSizeInBits()/8,
+ TLI.getPointerTy());
+ SDValue BasePtr = St->getBasePtr();
+
+ // Perform one or more big stores into memory.
+ unsigned E = (ToEltSz*NumElems)/StoreType.getSizeInBits();
+ for (unsigned I = 0; I < E; I++) {
+ SDValue SubVec = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL,
+ StoreType, ShuffWide,
+ DAG.getIntPtrConstant(I));
+ SDValue Ch = DAG.getStore(St->getChain(), DL, SubVec, BasePtr,
+ St->getPointerInfo(), St->isVolatile(),
+ St->isNonTemporal(), St->getAlignment());
+ BasePtr = DAG.getNode(ISD::ADD, DL, BasePtr.getValueType(), BasePtr,
+ Increment);
+ Chains.push_back(Ch);
+ }
+ return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, &Chains[0],
+ Chains.size());
+ }
+
+ if (!ISD::isNormalStore(St))
return SDValue();
+ // Split a store of a VMOVDRR into two integer stores to avoid mixing NEON and
+ // ARM stores of arguments in the same cache line.
if (StVal.getNode()->getOpcode() == ARMISD::VMOVDRR &&
- StVal.getNode()->hasOneUse() && !St->isVolatile()) {
+ StVal.getNode()->hasOneUse()) {
SelectionDAG &DAG = DCI.DAG;
DebugLoc DL = St->getDebugLoc();
SDValue BasePtr = St->getBasePtr();
StVal.getNode()->getOpcode() != ISD::EXTRACT_VECTOR_ELT)
return SDValue();
+ // Bitcast an i64 store extracted from a vector to f64.
+ // Otherwise, the i64 value will be legalized to a pair of i32 values.
SelectionDAG &DAG = DCI.DAG;
DebugLoc dl = StVal.getDebugLoc();
SDValue IntVec = StVal.getOperand(0);
if (Res.getNode()) {
APInt KnownZero, KnownOne;
- APInt Mask = APInt::getAllOnesValue(VT.getScalarType().getSizeInBits());
- DAG.ComputeMaskedBits(SDValue(N,0), Mask, KnownZero, KnownOne);
+ DAG.ComputeMaskedBits(SDValue(N,0), KnownZero, KnownOne);
// Capture demanded bits information that would be otherwise lost.
if (KnownZero == 0xfffffffe)
Res = DAG.getNode(ISD::AssertZext, dl, MVT::i32, Res,
/// a register against the immediate without having to materialize the
/// immediate into a register.
bool ARMTargetLowering::isLegalICmpImmediate(int64_t Imm) const {
+ // Thumb2 and ARM modes can use cmn for negative immediates.
if (!Subtarget->isThumb())
- return ARM_AM::getSOImmVal(Imm) != -1;
+ return ARM_AM::getSOImmVal(llvm::abs64(Imm)) != -1;
if (Subtarget->isThumb2())
- return ARM_AM::getT2SOImmVal(Imm) != -1;
+ return ARM_AM::getT2SOImmVal(llvm::abs64(Imm)) != -1;
+ // Thumb1 doesn't have cmn, and only 8-bit immediates.
return Imm >= 0 && Imm <= 255;
}
}
void ARMTargetLowering::computeMaskedBitsForTargetNode(const SDValue Op,
- const APInt &Mask,
APInt &KnownZero,
APInt &KnownOne,
const SelectionDAG &DAG,
unsigned Depth) const {
- KnownZero = KnownOne = APInt(Mask.getBitWidth(), 0);
+ KnownZero = KnownOne = APInt(KnownOne.getBitWidth(), 0);
switch (Op.getOpcode()) {
default: break;
case ARMISD::CMOV: {
// Bits are known zero/one if known on the LHS and RHS.
- DAG.ComputeMaskedBits(Op.getOperand(0), Mask, KnownZero, KnownOne, Depth+1);
+ DAG.ComputeMaskedBits(Op.getOperand(0), KnownZero, KnownOne, Depth+1);
if (KnownZero == 0 && KnownOne == 0) return;
APInt KnownZeroRHS, KnownOneRHS;
- DAG.ComputeMaskedBits(Op.getOperand(1), Mask,
- KnownZeroRHS, KnownOneRHS, Depth+1);
+ DAG.ComputeMaskedBits(Op.getOperand(1), KnownZeroRHS, KnownOneRHS, Depth+1);
KnownZero &= KnownZeroRHS;
KnownOne &= KnownOneRHS;
return;
switch (Constraint[0]) {
case 'l': // Low regs or general regs.
if (Subtarget->isThumb())
- return RCPair(0U, ARM::tGPRRegisterClass);
- else
- return RCPair(0U, ARM::GPRRegisterClass);
+ return RCPair(0U, &ARM::tGPRRegClass);
+ return RCPair(0U, &ARM::GPRRegClass);
case 'h': // High regs or no regs.
if (Subtarget->isThumb())
- return RCPair(0U, ARM::hGPRRegisterClass);
+ return RCPair(0U, &ARM::hGPRRegClass);
break;
case 'r':
- return RCPair(0U, ARM::GPRRegisterClass);
+ return RCPair(0U, &ARM::GPRRegClass);
case 'w':
if (VT == MVT::f32)
- return RCPair(0U, ARM::SPRRegisterClass);
+ return RCPair(0U, &ARM::SPRRegClass);
if (VT.getSizeInBits() == 64)
- return RCPair(0U, ARM::DPRRegisterClass);
+ return RCPair(0U, &ARM::DPRRegClass);
if (VT.getSizeInBits() == 128)
- return RCPair(0U, ARM::QPRRegisterClass);
+ return RCPair(0U, &ARM::QPRRegClass);
break;
case 'x':
if (VT == MVT::f32)
- return RCPair(0U, ARM::SPR_8RegisterClass);
+ return RCPair(0U, &ARM::SPR_8RegClass);
if (VT.getSizeInBits() == 64)
- return RCPair(0U, ARM::DPR_8RegisterClass);
+ return RCPair(0U, &ARM::DPR_8RegClass);
if (VT.getSizeInBits() == 128)
- return RCPair(0U, ARM::QPR_8RegisterClass);
+ return RCPair(0U, &ARM::QPR_8RegClass);
break;
case 't':
if (VT == MVT::f32)
- return RCPair(0U, ARM::SPRRegisterClass);
+ return RCPair(0U, &ARM::SPRRegClass);
break;
}
}
if (StringRef("{cc}").equals_lower(Constraint))
- return std::make_pair(unsigned(ARM::CPSR), ARM::CCRRegisterClass);
+ return std::make_pair(unsigned(ARM::CPSR), &ARM::CCRRegClass);
return TargetLowering::getRegForInlineAsmConstraint(Constraint, VT);
}