Given a pair of floating point load and store, if there are no other uses of

author Evan Cheng <evan.cheng@apple.com>

Wed, 2 Feb 2011 01:06:55 +0000 (01:06 +0000)

committer Evan Cheng <evan.cheng@apple.com>

Wed, 2 Feb 2011 01:06:55 +0000 (01:06 +0000)
author Evan Cheng <evan.cheng@apple.com>
Wed, 2 Feb 2011 01:06:55 +0000 (01:06 +0000)
committer Evan Cheng <evan.cheng@apple.com>
Wed, 2 Feb 2011 01:06:55 +0000 (01:06 +0000)
diff --git a/include/llvm/Target/TargetLowering.h b/include/llvm/Target/TargetLowering.h

index c81d1f76d019cb69a7b10ccb589fc2d68f174555..5141b7b56229777cbb38cd4d2b223e8c4765b98c 100644 (file)
--- a/include/llvm/Target/TargetLowering.h
+++ b/include/llvm/Target/TargetLowering.h
@@ -963,6 +963,13 @@ public:
      return isTypeLegal(VT);
    }
  
+  /// isDesirableToPromoteOp - Return true if it is profitable for dag combiner
+  /// to transform a floating point op of specified opcode to a equivalent op of
+  /// an integer type. e.g. f32 load -> i32 load can be profitable on ARM.
+  virtual bool isDesirableToTransformToIntegerOp(unsigned Opc, EVT VT) const {
+    return false;
+  }
+
    /// IsDesirableToPromoteOp - This method query the target whether it is
    /// beneficial for dag combiner to promote the specified node. If true, it
    /// should return the desired promotion type by reference.
diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp

index e91592b2429be2523e1473dafb629e1f3937f474..dd7d56ab9d29f59afa2e4f01d959ba38f0eca323 100644 (file)
--- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -42,6 +42,7 @@ STATISTIC(NodesCombined   , "Number of dag nodes combined");
  STATISTIC(PreIndexedNodes , "Number of pre-indexed nodes created");
  STATISTIC(PostIndexedNodes, "Number of post-indexed nodes created");
  STATISTIC(OpsNarrowed     , "Number of load/op/store narrowed");
+STATISTIC(LdStFP2Int      , "Number of fp load/store pairs transformed to int");
  
  namespace {
    static cl::opt<bool>
@@ -234,6 +235,7 @@ namespace {
      SDNode *MatchRotate(SDValue LHS, SDValue RHS, DebugLoc DL);
      SDValue ReduceLoadWidth(SDNode *N);
      SDValue ReduceLoadOpStoreWidth(SDNode *N);
+    SDValue TransformFPLoadStorePair(SDNode *N);
  
      SDValue GetDemandedBits(SDValue V, const APInt &Mask);
  
@@ -6111,6 +6113,63 @@ SDValue DAGCombiner::ReduceLoadOpStoreWidth(SDNode *N) {
    return SDValue();
  }
  
+/// TransformFPLoadStorePair - For a given floating point load / store pair,
+/// if the load value isn't used by any other operations, then consider
+/// transforming the pair to integer load / store operations if the target
+/// deems the transformation profitable.
+SDValue DAGCombiner::TransformFPLoadStorePair(SDNode *N) {
+  StoreSDNode *ST  = cast<StoreSDNode>(N);
+  SDValue Chain = ST->getChain();
+  SDValue Value = ST->getValue();
+  if (ISD::isNormalStore(ST) && ISD::isNormalLoad(Value.getNode()) &&
+      Value.hasOneUse() &&
+      Chain == SDValue(Value.getNode(), 1)) {
+    LoadSDNode *LD = cast<LoadSDNode>(Value);
+    EVT VT = LD->getMemoryVT();
+    if (!VT.isFloatingPoint() ||
+        VT != ST->getMemoryVT() ||
+        LD->isNonTemporal() ||
+        ST->isNonTemporal() ||
+        LD->getPointerInfo().getAddrSpace() != 0 ||
+        ST->getPointerInfo().getAddrSpace() != 0)
+      return SDValue();
+
+    EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), VT.getSizeInBits());
+    if (!TLI.isOperationLegal(ISD::LOAD, IntVT) ||
+        !TLI.isOperationLegal(ISD::STORE, IntVT) ||
+        !TLI.isDesirableToTransformToIntegerOp(ISD::LOAD, VT) ||
+        !TLI.isDesirableToTransformToIntegerOp(ISD::STORE, VT))
+      return SDValue();
+
+    unsigned LDAlign = LD->getAlignment();
+    unsigned STAlign = ST->getAlignment();
+    const Type *IntVTTy = IntVT.getTypeForEVT(*DAG.getContext());
+    unsigned ABIAlign = TLI.getTargetData()->getABITypeAlignment(IntVTTy);
+    if (LDAlign < ABIAlign || STAlign < ABIAlign)
+      return SDValue();
+
+    SDValue NewLD = DAG.getLoad(IntVT, Value.getDebugLoc(),
+                                LD->getChain(), LD->getBasePtr(),
+                                LD->getPointerInfo(),
+                                false, false, LDAlign);
+
+    SDValue NewST = DAG.getStore(NewLD.getValue(1), N->getDebugLoc(),
+                                 NewLD, ST->getBasePtr(),
+                                 ST->getPointerInfo(),
+                                 false, false, STAlign);
+
+    AddToWorkList(NewLD.getNode());
+    AddToWorkList(NewST.getNode());
+    WorkListRemover DeadNodes(*this);
+    DAG.ReplaceAllUsesOfValueWith(Value.getValue(1), NewLD.getValue(1),
+                                  &DeadNodes);
+    ++LdStFP2Int;
+    return NewST;
+  }
+
+  return SDValue();
+}
+
  SDValue DAGCombiner::visitSTORE(SDNode *N) {
    StoreSDNode *ST  = cast<StoreSDNode>(N);
    SDValue Chain = ST->getChain();
@@ -6210,6 +6269,12 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) {
      }
    }
  
+  // Try transforming a pair floating point load / store ops to integer
+  // load / store ops.
+  SDValue NewST = TransformFPLoadStorePair(N);
+  if (NewST.getNode())
+    return NewST;
+
    if (CombinerAA) {
      // Walk up chain skipping non-aliasing memory nodes.
      SDValue BetterChain = FindBetterChain(N, Chain);
diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp

index 50557671ab511d1e101886a43c16412bc1c31e44..92ea6cb0f8963be856ca4b43356ae616ac0a187b 100644 (file)
--- a/lib/Target/ARM/ARMISelLowering.cpp
+++ b/lib/Target/ARM/ARMISelLowering.cpp
@@ -5724,6 +5724,11 @@ SDValue ARMTargetLowering::PerformDAGCombine(SDNode *N,
    return SDValue();
  }
  
+bool ARMTargetLowering::isDesirableToTransformToIntegerOp(unsigned Opc,
+                                                          EVT VT) const {
+  return (VT == MVT::f32) && (Opc == ISD::LOAD || Opc == ISD::STORE);
+}
+
  bool ARMTargetLowering::allowsUnalignedMemoryAccesses(EVT VT) const {
    if (!Subtarget->allowsUnalignedMem())
      return false;
diff --git a/lib/Target/ARM/ARMISelLowering.h b/lib/Target/ARM/ARMISelLowering.h

index 28bf60c8c289c97de0736e1e5ea1be7ca11c825c..b06b8d3e152be95b8c1dba3bb5c5316d0121cafa 100644 (file)
--- a/lib/Target/ARM/ARMISelLowering.h
+++ b/lib/Target/ARM/ARMISelLowering.h
@@ -213,14 +213,16 @@ namespace llvm {
      virtual void ReplaceNodeResults(SDNode *N, SmallVectorImpl<SDValue>&Results,
                                      SelectionDAG &DAG) const;
  
-    virtual SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const;
-
      virtual const char *getTargetNodeName(unsigned Opcode) const;
  
      virtual MachineBasicBlock *
        EmitInstrWithCustomInserter(MachineInstr *MI,
                                    MachineBasicBlock *MBB) const;
  
+    virtual SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const;
+
+    bool isDesirableToTransformToIntegerOp(unsigned Opc, EVT VT) const;
+
      /// allowsUnalignedMemoryAccesses - Returns true if the target allows
      /// unaligned memory accesses. of the specified type.
      /// FIXME: Add getOptimalMemOpType to implement memcpy with NEON?
diff --git a/test/CodeGen/ARM/ldst-f32-2-i32.ll b/test/CodeGen/ARM/ldst-f32-2-i32.ll

new file mode 100644 (file)

index 0000000..2d016f6
--- /dev/null
+++ b/test/CodeGen/ARM/ldst-f32-2-i32.ll
@@ -0,0 +1,28 @@
+; RUN: llc < %s -mtriple=armv7-apple-darwin -mcpu=cortex-a8 | FileCheck %s
+; Check if the f32 load / store pair are optimized to i32 load / store.
+; rdar://8944252
+
+define void @t(i32 %width, float* nocapture %src, float* nocapture %dst, i32 %index) nounwind {
+; CHECK: t:
+entry:
+  %src6 = bitcast float* %src to i8*
+  %0 = icmp eq i32 %width, 0
+  br i1 %0, label %return, label %bb
+
+bb:
+; CHECK: ldr [[REGISTER:(r[0-9]+)]], [r1], r3
+; CHECK: str [[REGISTER]], [r2], #4
+  %j.05 = phi i32 [ %2, %bb ], [ 0, %entry ]
+  %tmp = mul i32 %j.05, %index
+  %uglygep = getelementptr i8* %src6, i32 %tmp
+  %src_addr.04 = bitcast i8* %uglygep to float*
+  %dst_addr.03 = getelementptr float* %dst, i32 %j.05
+  %1 = load float* %src_addr.04, align 4
+  store float %1, float* %dst_addr.03, align 4
+  %2 = add i32 %j.05, 1
+  %exitcond = icmp eq i32 %2, %width
+  br i1 %exitcond, label %return, label %bb
+
+return:
+  ret void
+}
author	Evan Cheng <evan.cheng@apple.com>
	Wed, 2 Feb 2011 01:06:55 +0000 (01:06 +0000)
committer	Evan Cheng <evan.cheng@apple.com>
	Wed, 2 Feb 2011 01:06:55 +0000 (01:06 +0000)
include/llvm/Target/TargetLowering.h		patch \| blob \| history
lib/CodeGen/SelectionDAG/DAGCombiner.cpp		patch \| blob \| history
lib/Target/ARM/ARMISelLowering.cpp		patch \| blob \| history
lib/Target/ARM/ARMISelLowering.h		patch \| blob \| history
test/CodeGen/ARM/ldst-f32-2-i32.ll	[new file with mode: 0644]	patch \| blob