Loosen up iv reuse to allow reuse of the same stride but a larger type when truncatin...

author Evan Cheng <evan.cheng@apple.com>

Fri, 26 Oct 2007 01:56:11 +0000 (01:56 +0000)

committer Evan Cheng <evan.cheng@apple.com>

Fri, 26 Oct 2007 01:56:11 +0000 (01:56 +0000)
author Evan Cheng <evan.cheng@apple.com>
Fri, 26 Oct 2007 01:56:11 +0000 (01:56 +0000)
committer Evan Cheng <evan.cheng@apple.com>
Fri, 26 Oct 2007 01:56:11 +0000 (01:56 +0000)
diff --git a/include/llvm/Target/TargetLowering.h b/include/llvm/Target/TargetLowering.h

index d55a8af72278e4d3875be5034862532792aea88d..e092145ebe6da9c3091028cf8227505055cd2d4a 100644 (file)
--- a/include/llvm/Target/TargetLowering.h
+++ b/include/llvm/Target/TargetLowering.h
@@ -104,7 +104,7 @@ public:
    /// isPow2DivCheap() - Return true if pow2 div is cheaper than a chain of
    /// srl/add/sra.
    bool isPow2DivCheap() const { return Pow2DivIsCheap; }
-  
+
    /// getSetCCResultTy - Return the ValueType of the result of setcc operations.
    ///
    MVT::ValueType getSetCCResultTy() const { return SetCCResultTy; }
@@ -994,6 +994,13 @@ public:
    /// TODO: Handle pre/postinc as well.
    virtual bool isLegalAddressingMode(const AddrMode &AM, const Type *Ty) const;
  
+  /// isTruncateFree - Return true if it's free to truncate a value of
+  /// type Ty1 to type Ty2. e.g. On x86 it's free to truncate a i32 value in
+  /// register EAX to i16 by referencing its sub-register AX.
+  virtual bool isTruncateFree(const Type *Ty1, const Type *Ty2) const {
+    return false;
+  }
+  
    //===--------------------------------------------------------------------===//
    // Div utility functions
    //
diff --git a/lib/Target/X86/README.txt b/lib/Target/X86/README.txt

index ccd15be4e6b16dd71f1fa47fdb6ddffd216c496c..41b38d84c8aa3c7aa9b6ae2fe0ff27e16f518c0e 100644 (file)
--- a/lib/Target/X86/README.txt
+++ b/lib/Target/X86/README.txt
@@ -339,20 +339,18 @@ void foo(int N) {
    for (i = 0; i < N; i++) { X = i; Y = i*4; }
  }
  
-LBB1_1:        #bb.preheader
-       xorl %ecx, %ecx
-       xorw %dx, %dx
-LBB1_2:        #bb
-       movl L_X$non_lazy_ptr, %esi
-       movw %dx, (%esi)
-       movw %dx, %si
-       shlw $2, %si
-       movl L_Y$non_lazy_ptr, %edi
-       movw %si, (%edi)
-       incl %ecx
-       incw %dx
-       cmpl %eax, %ecx
-       jne LBB1_2      #bb
+LBB1_1:        # entry.bb_crit_edge
+       xorl    %ecx, %ecx
+       xorw    %dx, %dx
+LBB1_2:        # bb
+       movl    L_X$non_lazy_ptr, %esi
+       movw    %cx, (%esi)
+       movl    L_Y$non_lazy_ptr, %esi
+       movw    %dx, (%esi)
+       addw    $4, %dx
+       incl    %ecx
+       cmpl    %eax, %ecx
+       jne     LBB1_2  # bb
  
  vs.
  
@@ -367,11 +365,7 @@ L4:
         cmpl    %edx, %edi
         jne     L4
  
-There are 3 issues:
-
-1. Lack of post regalloc LICM.
-2. LSR unable to reused IV for a different type (i16 vs. i32) even though
-   the cast would be free.
+This is due to the lack of post regalloc LICM.
  
  //===---------------------------------------------------------------------===//
  
diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp

index 4d528abb6b4b12b0585eed8b59fe84ea9c9782e3..172aa5338b94abef708d3ca6a6d0c04fafd04f70 100644 (file)
--- a/lib/Target/X86/X86ISelLowering.cpp
+++ b/lib/Target/X86/X86ISelLowering.cpp
@@ -5122,6 +5122,13 @@ bool X86TargetLowering::isLegalAddressingMode(const AddrMode &AM,
  }
  
  
+bool X86TargetLowering::isTruncateFree(const Type *Ty1, const Type *Ty2) const {
+  if (!Ty1->isInteger() || !Ty2->isInteger())
+    return false;
+  return Ty1->getPrimitiveSizeInBits() > Ty2->getPrimitiveSizeInBits();
+}
+
+
  /// isShuffleMaskLegal - Targets can use this to indicate that they only
  /// support *some* VECTOR_SHUFFLE operations, those with specific masks.
  /// By default, if a target supports the VECTOR_SHUFFLE node, all mask values
diff --git a/lib/Target/X86/X86ISelLowering.h b/lib/Target/X86/X86ISelLowering.h

index 7123adaad27d6e7a9b486bb671b5433321f1fbf0..b68de5a675394dab578f17b9522ddbf19f890d8a 100644 (file)
--- a/lib/Target/X86/X86ISelLowering.h
+++ b/lib/Target/X86/X86ISelLowering.h
@@ -359,6 +359,11 @@ namespace llvm {
      /// by AM is legal for this target, for a load/store of the specified type.
      virtual bool isLegalAddressingMode(const AddrMode &AM, const Type *Ty)const;
  
+    /// isTruncateFree - Return true if it's free to truncate a value of
+    /// type Ty1 to type Ty2. e.g. On x86 it's free to truncate a i32 value in
+    /// register EAX to i16 by referencing its sub-register AX.
+    virtual bool isTruncateFree(const Type *Ty1, const Type *Ty2) const;
+  
      /// isShuffleMaskLegal - Targets can use this to indicate that they only
      /// support *some* VECTOR_SHUFFLE operations, those with specific masks.
      /// By default, if a target supports the VECTOR_SHUFFLE node, all mask
diff --git a/lib/Transforms/Scalar/LoopStrengthReduce.cpp b/lib/Transforms/Scalar/LoopStrengthReduce.cpp

index fbe3171f81ffa47fde8ed4e5171218c251160bcb..d81ea2b7994ab9dcae80e3b06ffe66435eb29ffa 100644 (file)
--- a/lib/Transforms/Scalar/LoopStrengthReduce.cpp
+++ b/lib/Transforms/Scalar/LoopStrengthReduce.cpp
@@ -178,7 +178,7 @@ private:
      bool FindIVForUser(ICmpInst *Cond, IVStrideUse *&CondUse,
                         const SCEVHandle *&CondStride);
      bool RequiresTypeConversion(const Type *Ty, const Type *NewTy);
-    unsigned CheckForIVReuse(bool, const SCEVHandle&,
+    unsigned CheckForIVReuse(bool, bool, const SCEVHandle&,
                               IVExpr&, const Type*,
                               const std::vector<BasedUser>& UsersToProcess);
      bool ValidStride(bool, int64_t,
@@ -980,15 +980,17 @@ bool LoopStrengthReduce::ValidStride(bool HasBaseReg,
  
  /// RequiresTypeConversion - Returns true if converting Ty to NewTy is not
  /// a nop.
-bool LoopStrengthReduce::RequiresTypeConversion(const Type *Ty,
-                                                const Type *NewTy) {
-  if (Ty == NewTy)
+bool LoopStrengthReduce::RequiresTypeConversion(const Type *Ty1,
+                                                const Type *Ty2) {
+  if (Ty1 == Ty2)
      return false;
-  return (!Ty->canLosslesslyBitCastTo(NewTy) &&
-          !(isa<PointerType>(NewTy) &&
-            Ty->canLosslesslyBitCastTo(UIntPtrTy)) &&
-          !(isa<PointerType>(Ty) &&
-            NewTy->canLosslesslyBitCastTo(UIntPtrTy)));
+  if (TLI && TLI->isTruncateFree(Ty1, Ty2))
+    return false;
+  return (!Ty1->canLosslesslyBitCastTo(Ty2) &&
+          !(isa<PointerType>(Ty2) &&
+            Ty1->canLosslesslyBitCastTo(UIntPtrTy)) &&
+          !(isa<PointerType>(Ty1) &&
+            Ty2->canLosslesslyBitCastTo(UIntPtrTy)));
  }
  
  /// CheckForIVReuse - Returns the multiple if the stride is the multiple
@@ -997,20 +999,23 @@ bool LoopStrengthReduce::RequiresTypeConversion(const Type *Ty,
  /// this stride to be rewritten as prev iv * factor. It returns 0 if no
  /// reuse is possible.
  unsigned LoopStrengthReduce::CheckForIVReuse(bool HasBaseReg,
+                                bool AllUsesAreAddresses,
                                  const SCEVHandle &Stride, 
                                  IVExpr &IV, const Type *Ty,
                                  const std::vector<BasedUser>& UsersToProcess) {
    if (SCEVConstant *SC = dyn_cast<SCEVConstant>(Stride)) {
      int64_t SInt = SC->getValue()->getSExtValue();
-    if (SInt == 1) return 0;
-
      for (std::map<SCEVHandle, IVsOfOneStride>::iterator SI= IVsByStride.begin(),
             SE = IVsByStride.end(); SI != SE; ++SI) {
        int64_t SSInt = cast<SCEVConstant>(SI->first)->getValue()->getSExtValue();
-      if (SInt != -SSInt &&
+      if (SI->first != Stride &&
            (unsigned(abs(SInt)) < SSInt || (SInt % SSInt) != 0))
          continue;
        int64_t Scale = SInt / SSInt;
+      // When scale is 1, we don't need to worry about whether the
+      // multiplication can be folded into the addressing mode.
+      if (!AllUsesAreAddresses && Scale != 1)
+        continue;
        // Check that this stride is valid for all the types used for loads and
        // stores; if it can be used for some and not others, we might as well use
        // the original stride everywhere, since we have to create the IV for it
@@ -1021,7 +1026,7 @@ unsigned LoopStrengthReduce::CheckForIVReuse(bool HasBaseReg,
            // FIXME: Only handle base == 0 for now.
            // Only reuse previous IV if it would not require a type conversion.
            if (isZero(II->Base) &&
-              !RequiresTypeConversion(II->Base->getType(),Ty)) {
+              !RequiresTypeConversion(II->Base->getType(), Ty)) {
              IV = *II;
              return Scale;
            }
@@ -1183,10 +1188,9 @@ void LoopStrengthReduce::StrengthReduceStridedIVUsers(const SCEVHandle &Stride,
                     SE->getIntegerSCEV(0, Type::Int32Ty),
                     0, 0);
    unsigned RewriteFactor = 0;
-  if (AllUsesAreAddresses)
-    RewriteFactor = CheckForIVReuse(HaveCommonExprs, Stride, ReuseIV,
-                                    CommonExprs->getType(),
-                                    UsersToProcess);
+  RewriteFactor = CheckForIVReuse(HaveCommonExprs, AllUsesAreAddresses,
+                                  Stride, ReuseIV, CommonExprs->getType(),
+                                  UsersToProcess);
    if (RewriteFactor != 0) {
      DOUT << "BASED ON IV of STRIDE " << *ReuseIV.Stride
           << " and BASE " << *ReuseIV.Base << " :\n";
diff --git a/test/CodeGen/X86/loop-strength-reduce5.ll b/test/CodeGen/X86/loop-strength-reduce5.ll

new file mode 100644 (file)

index 0000000..6e037e2
--- /dev/null
+++ b/test/CodeGen/X86/loop-strength-reduce5.ll
@@ -0,0 +1,23 @@
+; RUN: llvm-as < %s | llc -march=x86 | grep inc | count 1
+
+@X = weak global i16 0         ; <i16*> [#uses=1]
+@Y = weak global i16 0         ; <i16*> [#uses=1]
+
+define void @foo(i32 %N) {
+entry:
+       %tmp1019 = icmp sgt i32 %N, 0           ; <i1> [#uses=1]
+       br i1 %tmp1019, label %bb, label %return
+
+bb:            ; preds = %bb, %entry
+       %i.014.0 = phi i32 [ 0, %entry ], [ %indvar.next, %bb ]         ; <i32> [#uses=2]
+       %tmp1 = trunc i32 %i.014.0 to i16               ; <i16> [#uses=2]
+       volatile store i16 %tmp1, i16* @X, align 2
+       %tmp34 = shl i16 %tmp1, 2               ; <i16> [#uses=1]
+       volatile store i16 %tmp34, i16* @Y, align 2
+       %indvar.next = add i32 %i.014.0, 1              ; <i32> [#uses=2]
+       %exitcond = icmp eq i32 %indvar.next, %N                ; <i1> [#uses=1]
+       br i1 %exitcond, label %return, label %bb
+
+return:                ; preds = %bb, %entry
+       ret void
+}
author	Evan Cheng <evan.cheng@apple.com>
	Fri, 26 Oct 2007 01:56:11 +0000 (01:56 +0000)
committer	Evan Cheng <evan.cheng@apple.com>
	Fri, 26 Oct 2007 01:56:11 +0000 (01:56 +0000)
include/llvm/Target/TargetLowering.h		patch \| blob \| history
lib/Target/X86/README.txt		patch \| blob \| history
lib/Target/X86/X86ISelLowering.cpp		patch \| blob \| history
lib/Target/X86/X86ISelLowering.h		patch \| blob \| history
lib/Transforms/Scalar/LoopStrengthReduce.cpp		patch \| blob \| history
test/CodeGen/X86/loop-strength-reduce5.ll	[new file with mode: 0644]	patch \| blob