DAGCombine's logic for forming pre- and post- indexed loads / stores were being

author Evan Cheng <evan.cheng@apple.com>

Fri, 13 Jan 2012 01:37:24 +0000 (01:37 +0000)

committer Evan Cheng <evan.cheng@apple.com>

Fri, 13 Jan 2012 01:37:24 +0000 (01:37 +0000)
author Evan Cheng <evan.cheng@apple.com>
Fri, 13 Jan 2012 01:37:24 +0000 (01:37 +0000)
committer Evan Cheng <evan.cheng@apple.com>
Fri, 13 Jan 2012 01:37:24 +0000 (01:37 +0000)
diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp

index 6d35b1200e06ca6fdd88d68bef9d00b78e868dda..c52a60b6f2985a9060b20e896200b8be2df01cd2 100644 (file)
--- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -5984,6 +5984,44 @@ SDValue DAGCombiner::visitBR_CC(SDNode *N) {
    return SDValue();
  }
  
+/// canFoldInAddressingMode - Return true if 'Use' is a load or a store that
+/// uses N as its base pointer and that N may be folded in the load / store
+/// addressing mode. FIXME: This currently only looks for folding of
+/// [reg +/- imm] addressing modes.
+static bool canFoldInAddressingMode(SDNode *N, SDNode *Use,
+                                    SelectionDAG &DAG,
+                                    const TargetLowering &TLI) {
+  EVT VT;
+  if (LoadSDNode *LD  = dyn_cast<LoadSDNode>(Use)) {
+    if (LD->isIndexed() || LD->getBasePtr().getNode() != N)
+      return false;
+    VT = Use->getValueType(0);
+  } else if (StoreSDNode *ST  = dyn_cast<StoreSDNode>(Use)) {
+    if (ST->isIndexed() || ST->getBasePtr().getNode() != N)
+      return false;
+    VT = ST->getValue().getValueType();
+  } else
+    return false;
+
+  TargetLowering::AddrMode AM;
+  if (N->getOpcode() == ISD::ADD) {
+    ConstantSDNode *Offset = dyn_cast<ConstantSDNode>(N->getOperand(1));
+    if (Offset)
+      AM.BaseOffs = Offset->getSExtValue();
+    else
+      return false;
+  } else if (N->getOpcode() == ISD::SUB) {
+    ConstantSDNode *Offset = dyn_cast<ConstantSDNode>(N->getOperand(1));
+    if (Offset)
+      AM.BaseOffs = -Offset->getSExtValue();
+    else
+      return false;
+  } else
+    return false;
+
+  return TLI.isLegalAddressingMode(AM, VT.getTypeForEVT(*DAG.getContext()));
+}
+
  /// CombineToPreIndexedLoadStore - Try turning a load / store into a
  /// pre-indexed load / store when the base pointer is an add or subtract
  /// and it has other uses besides the load / store. After the
@@ -6070,10 +6108,9 @@ bool DAGCombiner::CombineToPreIndexedLoadStore(SDNode *N) {
      if (N->hasPredecessorHelper(Use, Visited, Worklist))
        return false;
  
-    if (!((Use->getOpcode() == ISD::LOAD &&
-           cast<LoadSDNode>(Use)->getBasePtr() == Ptr) ||
-          (Use->getOpcode() == ISD::STORE &&
-           cast<StoreSDNode>(Use)->getBasePtr() == Ptr)))
+    // If Ptr may be folded in addressing mode of other use, then it's
+    // not profitable to do this transformation.
+    if (!canFoldInAddressingMode(Ptr.getNode(), Use, DAG, TLI))
        RealUse = true;
    }
  
@@ -6170,7 +6207,8 @@ bool DAGCombiner::CombineToPostIndexedLoadStore(SDNode *N) {
          continue;
  
        // Try turning it into a post-indexed load / store except when
-      // 1) All uses are load / store ops that use it as base ptr.
+      // 1) All uses are load / store ops that use it as base ptr (and
+      //    it may be folded as addressing mmode).
        // 2) Op must be independent of N, i.e. Op is neither a predecessor
        //    nor a successor of N. Otherwise, if Op is folded that would
        //    create a cycle.
@@ -6193,10 +6231,7 @@ bool DAGCombiner::CombineToPostIndexedLoadStore(SDNode *N) {
            for (SDNode::use_iterator III = Use->use_begin(),
                   EEE = Use->use_end(); III != EEE; ++III) {
              SDNode *UseUse = *III;
-            if (!((UseUse->getOpcode() == ISD::LOAD &&
-                   cast<LoadSDNode>(UseUse)->getBasePtr().getNode() == Use) ||
-                  (UseUse->getOpcode() == ISD::STORE &&
-                   cast<StoreSDNode>(UseUse)->getBasePtr().getNode() == Use)))
+            if (!canFoldInAddressingMode(Use, UseUse, DAG, TLI)) 
                RealUse = true;
            }
  
diff --git a/test/CodeGen/ARM/shifter_operand.ll b/test/CodeGen/ARM/shifter_operand.ll

index 964cef084f17712fbe23cb8c80b37c9e1ebec633..521ffa1c9e3ad02dd54ba4c14e2ac5173f0df2f5 100644 (file)
--- a/test/CodeGen/ARM/shifter_operand.ll
+++ b/test/CodeGen/ARM/shifter_operand.ll
@@ -54,12 +54,12 @@ declare i8* @malloc(...)
  define fastcc void @test4(i16 %addr) nounwind {
  entry:
  ; A8: test4:
-; A8: ldr [[REG:r[0-9]+]], [r0, r1, lsl #2]
-; A8: str [[REG]], [r0, r1, lsl #2]
+; A8: ldr [[REG:r[0-9]+]], [r0, r1, lsl #2]!
+; A8: str [[REG]], [r0]
  
  ; A9: test4:
-; A9: ldr [[REG:r[0-9]+]], [r0, r1, lsl #2]
-; A9: str [[REG]], [r0, r1, lsl #2]
+; A9: ldr [[REG:r[0-9]+]], [r0, r1, lsl #2]!
+; A9: str [[REG]], [r0]
    %0 = tail call i8* (...)* @malloc(i32 undef) nounwind
    %1 = bitcast i8* %0 to i32*
    %2 = sext i16 %addr to i32
diff --git a/test/CodeGen/ARM/test-sharedidx.ll b/test/CodeGen/ARM/test-sharedidx.ll

new file mode 100644 (file)

index 0000000..07c22a1
--- /dev/null
+++ b/test/CodeGen/ARM/test-sharedidx.ll
@@ -0,0 +1,95 @@
+; RUN: llc < %s -mtriple=armv7-apple-ios -mcpu=cortex-a9 -stress-ivchain | FileCheck %s
+
+; @sharedidx is an unrolled variant of this loop:
+;  for (unsigned long i = 0; i < len; i += s) {
+;    c[i] = a[i] + b[i];
+;  }
+; where 's' cannot be folded into the addressing mode.
+;
+; This is not quite profitable to chain. But with -stress-ivchain, we
+; can form three address chains in place of the shared induction
+; variable.
+
+; rdar://10674430
+define void @sharedidx(i8* nocapture %a, i8* nocapture %b, i8* nocapture %c, i32 %s, i32 %len) nounwind ssp {
+entry:
+; CHECK: sharedidx:
+  %cmp8 = icmp eq i32 %len, 0
+  br i1 %cmp8, label %for.end, label %for.body
+
+for.body:                                         ; preds = %entry, %for.body.3
+; CHECK: %for.body
+; CHECK: ldrb {{r[0-9]|lr}}, [{{r[0-9]|lr}}, {{r[0-9]|lr}}]!
+; CHECK: ldrb {{r[0-9]|lr}}, [{{r[0-9]|lr}}, {{r[0-9]|lr}}]!
+  %i.09 = phi i32 [ %add5.3, %for.body.3 ], [ 0, %entry ]
+  %arrayidx = getelementptr inbounds i8* %a, i32 %i.09
+  %0 = load i8* %arrayidx, align 1
+  %conv6 = zext i8 %0 to i32
+  %arrayidx1 = getelementptr inbounds i8* %b, i32 %i.09
+  %1 = load i8* %arrayidx1, align 1
+  %conv27 = zext i8 %1 to i32
+  %add = add nsw i32 %conv27, %conv6
+  %conv3 = trunc i32 %add to i8
+  %arrayidx4 = getelementptr inbounds i8* %c, i32 %i.09
+  store i8 %conv3, i8* %arrayidx4, align 1
+  %add5 = add i32 %i.09, %s
+  %cmp = icmp ult i32 %add5, %len
+  br i1 %cmp, label %for.body.1, label %for.end
+
+for.end:                                          ; preds = %for.body, %for.body.1, %for.body.2, %for.body.3, %entry
+  ret void
+
+for.body.1:                                       ; preds = %for.body
+; CHECK: %for.body.1
+; CHECK: ldrb {{r[0-9]|lr}}, [{{r[0-9]|lr}}, {{r[0-9]|lr}}]!
+; CHECK: ldrb {{r[0-9]|lr}}, [{{r[0-9]|lr}}, {{r[0-9]|lr}}]!
+  %arrayidx.1 = getelementptr inbounds i8* %a, i32 %add5
+  %2 = load i8* %arrayidx.1, align 1
+  %conv6.1 = zext i8 %2 to i32
+  %arrayidx1.1 = getelementptr inbounds i8* %b, i32 %add5
+  %3 = load i8* %arrayidx1.1, align 1
+  %conv27.1 = zext i8 %3 to i32
+  %add.1 = add nsw i32 %conv27.1, %conv6.1
+  %conv3.1 = trunc i32 %add.1 to i8
+  %arrayidx4.1 = getelementptr inbounds i8* %c, i32 %add5
+  store i8 %conv3.1, i8* %arrayidx4.1, align 1
+  %add5.1 = add i32 %add5, %s
+  %cmp.1 = icmp ult i32 %add5.1, %len
+  br i1 %cmp.1, label %for.body.2, label %for.end
+
+for.body.2:                                       ; preds = %for.body.1
+; CHECK: %for.body.2
+; CHECK: ldrb {{r[0-9]|lr}}, [{{r[0-9]|lr}}, {{r[0-9]|lr}}]!
+; CHECK: ldrb {{r[0-9]|lr}}, [{{r[0-9]|lr}}, {{r[0-9]|lr}}]!
+  %arrayidx.2 = getelementptr inbounds i8* %a, i32 %add5.1
+  %4 = load i8* %arrayidx.2, align 1
+  %conv6.2 = zext i8 %4 to i32
+  %arrayidx1.2 = getelementptr inbounds i8* %b, i32 %add5.1
+  %5 = load i8* %arrayidx1.2, align 1
+  %conv27.2 = zext i8 %5 to i32
+  %add.2 = add nsw i32 %conv27.2, %conv6.2
+  %conv3.2 = trunc i32 %add.2 to i8
+  %arrayidx4.2 = getelementptr inbounds i8* %c, i32 %add5.1
+  store i8 %conv3.2, i8* %arrayidx4.2, align 1
+  %add5.2 = add i32 %add5.1, %s
+  %cmp.2 = icmp ult i32 %add5.2, %len
+  br i1 %cmp.2, label %for.body.3, label %for.end
+
+for.body.3:                                       ; preds = %for.body.2
+; CHECK: %for.body.3
+; CHECK: ldrb {{r[0-9]|lr}}, [{{r[0-9]|lr}}, {{r[0-9]|lr}}]!
+; CHECK: ldrb {{r[0-9]|lr}}, [{{r[0-9]|lr}}, {{r[0-9]|lr}}]!
+  %arrayidx.3 = getelementptr inbounds i8* %a, i32 %add5.2
+  %6 = load i8* %arrayidx.3, align 1
+  %conv6.3 = zext i8 %6 to i32
+  %arrayidx1.3 = getelementptr inbounds i8* %b, i32 %add5.2
+  %7 = load i8* %arrayidx1.3, align 1
+  %conv27.3 = zext i8 %7 to i32
+  %add.3 = add nsw i32 %conv27.3, %conv6.3
+  %conv3.3 = trunc i32 %add.3 to i8
+  %arrayidx4.3 = getelementptr inbounds i8* %c, i32 %add5.2
+  store i8 %conv3.3, i8* %arrayidx4.3, align 1
+  %add5.3 = add i32 %add5.2, %s
+  %cmp.3 = icmp ult i32 %add5.3, %len
+  br i1 %cmp.3, label %for.body, label %for.end
+}
author	Evan Cheng <evan.cheng@apple.com>
	Fri, 13 Jan 2012 01:37:24 +0000 (01:37 +0000)
committer	Evan Cheng <evan.cheng@apple.com>
	Fri, 13 Jan 2012 01:37:24 +0000 (01:37 +0000)
lib/CodeGen/SelectionDAG/DAGCombiner.cpp		patch \| blob \| history
test/CodeGen/ARM/shifter_operand.ll		patch \| blob \| history
test/CodeGen/ARM/test-sharedidx.ll	[new file with mode: 0644]	patch \| blob