STATISTIC(PreIndexedNodes , "Number of pre-indexed nodes created");
STATISTIC(PostIndexedNodes, "Number of post-indexed nodes created");
STATISTIC(OpsNarrowed , "Number of load/op/store narrowed");
+STATISTIC(LdStFP2Int , "Number of fp load/store pairs transformed to int");
namespace {
static cl::opt<bool>
SDNode *MatchRotate(SDValue LHS, SDValue RHS, DebugLoc DL);
SDValue ReduceLoadWidth(SDNode *N);
SDValue ReduceLoadOpStoreWidth(SDNode *N);
+ SDValue TransformFPLoadStorePair(SDNode *N);
SDValue GetDemandedBits(SDValue V, const APInt &Mask);
return SDValue();
}
+/// TransformFPLoadStorePair - For a given floating point load / store pair,
+/// if the load value isn't used by any other operations, then consider
+/// transforming the pair to integer load / store operations if the target
+/// deems the transformation profitable.
+SDValue DAGCombiner::TransformFPLoadStorePair(SDNode *N) {
+ StoreSDNode *ST = cast<StoreSDNode>(N);
+ SDValue Chain = ST->getChain();
+ SDValue Value = ST->getValue();
+ if (ISD::isNormalStore(ST) && ISD::isNormalLoad(Value.getNode()) &&
+ Value.hasOneUse() &&
+ Chain == SDValue(Value.getNode(), 1)) {
+ LoadSDNode *LD = cast<LoadSDNode>(Value);
+ EVT VT = LD->getMemoryVT();
+ if (!VT.isFloatingPoint() ||
+ VT != ST->getMemoryVT() ||
+ LD->isNonTemporal() ||
+ ST->isNonTemporal() ||
+ LD->getPointerInfo().getAddrSpace() != 0 ||
+ ST->getPointerInfo().getAddrSpace() != 0)
+ return SDValue();
+
+ EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), VT.getSizeInBits());
+ if (!TLI.isOperationLegal(ISD::LOAD, IntVT) ||
+ !TLI.isOperationLegal(ISD::STORE, IntVT) ||
+ !TLI.isDesirableToTransformToIntegerOp(ISD::LOAD, VT) ||
+ !TLI.isDesirableToTransformToIntegerOp(ISD::STORE, VT))
+ return SDValue();
+
+ unsigned LDAlign = LD->getAlignment();
+ unsigned STAlign = ST->getAlignment();
+ const Type *IntVTTy = IntVT.getTypeForEVT(*DAG.getContext());
+ unsigned ABIAlign = TLI.getTargetData()->getABITypeAlignment(IntVTTy);
+ if (LDAlign < ABIAlign || STAlign < ABIAlign)
+ return SDValue();
+
+ SDValue NewLD = DAG.getLoad(IntVT, Value.getDebugLoc(),
+ LD->getChain(), LD->getBasePtr(),
+ LD->getPointerInfo(),
+ false, false, LDAlign);
+
+ SDValue NewST = DAG.getStore(NewLD.getValue(1), N->getDebugLoc(),
+ NewLD, ST->getBasePtr(),
+ ST->getPointerInfo(),
+ false, false, STAlign);
+
+ AddToWorkList(NewLD.getNode());
+ AddToWorkList(NewST.getNode());
+ WorkListRemover DeadNodes(*this);
+ DAG.ReplaceAllUsesOfValueWith(Value.getValue(1), NewLD.getValue(1),
+ &DeadNodes);
+ ++LdStFP2Int;
+ return NewST;
+ }
+
+ return SDValue();
+}
+
SDValue DAGCombiner::visitSTORE(SDNode *N) {
StoreSDNode *ST = cast<StoreSDNode>(N);
SDValue Chain = ST->getChain();
}
}
+ // Try transforming a pair floating point load / store ops to integer
+ // load / store ops.
+ SDValue NewST = TransformFPLoadStorePair(N);
+ if (NewST.getNode())
+ return NewST;
+
if (CombinerAA) {
// Walk up chain skipping non-aliasing memory nodes.
SDValue BetterChain = FindBetterChain(N, Chain);
virtual void ReplaceNodeResults(SDNode *N, SmallVectorImpl<SDValue>&Results,
SelectionDAG &DAG) const;
- virtual SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const;
-
virtual const char *getTargetNodeName(unsigned Opcode) const;
virtual MachineBasicBlock *
EmitInstrWithCustomInserter(MachineInstr *MI,
MachineBasicBlock *MBB) const;
+ virtual SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const;
+
+ bool isDesirableToTransformToIntegerOp(unsigned Opc, EVT VT) const;
+
/// allowsUnalignedMemoryAccesses - Returns true if the target allows
/// unaligned memory accesses. of the specified type.
/// FIXME: Add getOptimalMemOpType to implement memcpy with NEON?
--- /dev/null
+; RUN: llc < %s -mtriple=armv7-apple-darwin -mcpu=cortex-a8 | FileCheck %s
+; Check if the f32 load / store pair are optimized to i32 load / store.
+; rdar://8944252
+
+define void @t(i32 %width, float* nocapture %src, float* nocapture %dst, i32 %index) nounwind {
+; CHECK: t:
+entry:
+ %src6 = bitcast float* %src to i8*
+ %0 = icmp eq i32 %width, 0
+ br i1 %0, label %return, label %bb
+
+bb:
+; CHECK: ldr [[REGISTER:(r[0-9]+)]], [r1], r3
+; CHECK: str [[REGISTER]], [r2], #4
+ %j.05 = phi i32 [ %2, %bb ], [ 0, %entry ]
+ %tmp = mul i32 %j.05, %index
+ %uglygep = getelementptr i8* %src6, i32 %tmp
+ %src_addr.04 = bitcast i8* %uglygep to float*
+ %dst_addr.03 = getelementptr float* %dst, i32 %j.05
+ %1 = load float* %src_addr.04, align 4
+ store float %1, float* %dst_addr.03, align 4
+ %2 = add i32 %j.05, 1
+ %exitcond = icmp eq i32 %2, %width
+ br i1 %exitcond, label %return, label %bb
+
+return:
+ ret void
+}