Revert "LoopVectorizer: Only allow vectorization of intrinsics."

author Arnold Schwaighofer <aschwaighofer@apple.com>

Mon, 23 Sep 2013 14:54:39 +0000 (14:54 +0000)

committer Arnold Schwaighofer <aschwaighofer@apple.com>

Mon, 23 Sep 2013 14:54:39 +0000 (14:54 +0000)
author Arnold Schwaighofer <aschwaighofer@apple.com>
Mon, 23 Sep 2013 14:54:39 +0000 (14:54 +0000)
committer Arnold Schwaighofer <aschwaighofer@apple.com>
Mon, 23 Sep 2013 14:54:39 +0000 (14:54 +0000)
diff --git a/lib/Transforms/Vectorize/LoopVectorize.cpp b/lib/Transforms/Vectorize/LoopVectorize.cpp

index 02029e6ae06df4db2cedf2c047fb1a507a307dbb..e3bae02c7f90bf09d41b8d8278e4709707681644 100644 (file)
--- a/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -1809,6 +1809,31 @@ LoopVectorizationLegality::getReductionIdentity(ReductionKind K, Type *Tp) {
    }
  }
  
+static Intrinsic::ID checkUnaryFloatSignature(const CallInst &I,
+                                              Intrinsic::ID ValidIntrinsicID) {
+  if (I.getNumArgOperands() != 1 ||
+      !I.getArgOperand(0)->getType()->isFloatingPointTy() ||
+      I.getType() != I.getArgOperand(0)->getType() ||
+      !I.onlyReadsMemory())
+    return Intrinsic::not_intrinsic;
+
+  return ValidIntrinsicID;
+}
+
+static Intrinsic::ID checkBinaryFloatSignature(const CallInst &I,
+                                               Intrinsic::ID ValidIntrinsicID) {
+  if (I.getNumArgOperands() != 2 ||
+      !I.getArgOperand(0)->getType()->isFloatingPointTy() ||
+      !I.getArgOperand(1)->getType()->isFloatingPointTy() ||
+      I.getType() != I.getArgOperand(0)->getType() ||
+      I.getType() != I.getArgOperand(1)->getType() ||
+      !I.onlyReadsMemory())
+    return Intrinsic::not_intrinsic;
+
+  return ValidIntrinsicID;
+}
+
+
  static Intrinsic::ID
  getIntrinsicIDForCall(CallInst *CI, const TargetLibraryInfo *TLI) {
    // If we have an intrinsic call, check if it is trivially vectorizable.
@@ -1847,8 +1872,9 @@ getIntrinsicIDForCall(CallInst *CI, const TargetLibraryInfo *TLI) {
    LibFunc::Func Func;
    Function *F = CI->getCalledFunction();
    // We're going to make assumptions on the semantics of the functions, check
-  // that the target knows that it's available in this environment.
-  if (!F || !TLI->getLibFunc(F->getName(), Func))
+  // that the target knows that it's available in this environment and it does
+  // not have local linkage.
+  if (!F || F->hasLocalLinkage() || !TLI->getLibFunc(F->getName(), Func))
      return Intrinsic::not_intrinsic;
  
    // Otherwise check if we have a call to a function that can be turned into a
@@ -1859,67 +1885,67 @@ getIntrinsicIDForCall(CallInst *CI, const TargetLibraryInfo *TLI) {
    case LibFunc::sin:
    case LibFunc::sinf:
    case LibFunc::sinl:
-    return Intrinsic::sin;
+    return checkUnaryFloatSignature(*CI, Intrinsic::sin);
    case LibFunc::cos:
    case LibFunc::cosf:
    case LibFunc::cosl:
-    return Intrinsic::cos;
+    return checkUnaryFloatSignature(*CI, Intrinsic::cos);
    case LibFunc::exp:
    case LibFunc::expf:
    case LibFunc::expl:
-    return Intrinsic::exp;
+    return checkUnaryFloatSignature(*CI, Intrinsic::exp);
    case LibFunc::exp2:
    case LibFunc::exp2f:
    case LibFunc::exp2l:
-    return Intrinsic::exp2;
+    return checkUnaryFloatSignature(*CI, Intrinsic::exp2);
    case LibFunc::log:
    case LibFunc::logf:
    case LibFunc::logl:
-    return Intrinsic::log;
+    return checkUnaryFloatSignature(*CI, Intrinsic::log);
    case LibFunc::log10:
    case LibFunc::log10f:
    case LibFunc::log10l:
-    return Intrinsic::log10;
+    return checkUnaryFloatSignature(*CI, Intrinsic::log10);
    case LibFunc::log2:
    case LibFunc::log2f:
    case LibFunc::log2l:
-    return Intrinsic::log2;
+    return checkUnaryFloatSignature(*CI, Intrinsic::log2);
    case LibFunc::fabs:
    case LibFunc::fabsf:
    case LibFunc::fabsl:
-    return Intrinsic::fabs;
+    return checkUnaryFloatSignature(*CI, Intrinsic::fabs);
    case LibFunc::copysign:
    case LibFunc::copysignf:
    case LibFunc::copysignl:
-    return Intrinsic::copysign;
+    return checkBinaryFloatSignature(*CI, Intrinsic::copysign);
    case LibFunc::floor:
    case LibFunc::floorf:
    case LibFunc::floorl:
-    return Intrinsic::floor;
+    return checkUnaryFloatSignature(*CI, Intrinsic::floor);
    case LibFunc::ceil:
    case LibFunc::ceilf:
    case LibFunc::ceill:
-    return Intrinsic::ceil;
+    return checkUnaryFloatSignature(*CI, Intrinsic::ceil);
    case LibFunc::trunc:
    case LibFunc::truncf:
    case LibFunc::truncl:
-    return Intrinsic::trunc;
+    return checkUnaryFloatSignature(*CI, Intrinsic::trunc);
    case LibFunc::rint:
    case LibFunc::rintf:
    case LibFunc::rintl:
-    return Intrinsic::rint;
+    return checkUnaryFloatSignature(*CI, Intrinsic::rint);
    case LibFunc::nearbyint:
    case LibFunc::nearbyintf:
    case LibFunc::nearbyintl:
-    return Intrinsic::nearbyint;
+    return checkUnaryFloatSignature(*CI, Intrinsic::nearbyint);
    case LibFunc::round:
    case LibFunc::roundf:
    case LibFunc::roundl:
-    return Intrinsic::round;
+    return checkUnaryFloatSignature(*CI, Intrinsic::round);
    case LibFunc::pow:
    case LibFunc::powf:
    case LibFunc::powl:
-    return Intrinsic::pow;
+    return checkBinaryFloatSignature(*CI, Intrinsic::pow);
    }
  
    return Intrinsic::not_intrinsic;
@@ -2925,18 +2951,9 @@ bool LoopVectorizationLegality::canVectorizeInstrs() {
        // We still don't handle functions. However, we can ignore dbg intrinsic
        // calls and we do handle certain intrinsic and libm functions.
        CallInst *CI = dyn_cast<CallInst>(it);
-      if (CI) {
+      if (CI && !getIntrinsicIDForCall(CI, TLI) && !isa<DbgInfoIntrinsic>(CI)) {
          DEBUG(dbgs() << "LV: Found a call site.\n");
-
-        if (!isa<IntrinsicInst>(it)) {
-          DEBUG(dbgs() << "LV: We only vectorize intrinsics.\n");
-          return false;
-        }
-
-        if (!getIntrinsicIDForCall(CI, TLI) && !isa<DbgInfoIntrinsic>(CI)) {
-          DEBUG(dbgs() << "LV: Found an unknown intrinsic.\n");
-          return false;
-        }
+        return false;
        }
  
        // Check that the instruction return type is vectorizable.
diff --git a/test/Transforms/LoopVectorize/intrinsic.ll b/test/Transforms/LoopVectorize/intrinsic.ll

index 99d6646e673abb1215b41a49ef8e1dba066e7fcb..c3d570c03a77fbf9ebcf97fe42f91b547243f846 100644 (file)
--- a/test/Transforms/LoopVectorize/intrinsic.ll
+++ b/test/Transforms/LoopVectorize/intrinsic.ll
@@ -1018,7 +1018,7 @@ for.body:                                         ; preds = %entry, %for.body
    %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
    %arrayidx = getelementptr inbounds float* %x, i64 %indvars.iv
    %0 = load float* %arrayidx, align 4
-  %call = tail call float @llvm.fabs.f32(float %0) nounwind readnone
+  %call = tail call float @fabsf(float %0) nounwind readnone
    store float %call, float* %arrayidx, align 4
    %indvars.iv.next = add i64 %indvars.iv, 1
    %lftr.wideiv = trunc i64 %indvars.iv.next to i32
@@ -1029,31 +1029,64 @@ for.end:                                          ; preds = %for.body
    ret void
  }
  
+declare float @fabsf(float) nounwind readnone
+
  declare double @llvm.pow.f64(double, double) nounwind readnone
  
  
-;CHECK: @not_intrin
-;CHECK: @round
-;CHECK-NOT: @round
-;CHECK: ret
-define void @not_intrin(i32* nocapture %A) nounwind ssp uwtable {
-  br label %1
-
-; <label>:1                                       ; preds = %1, %0
-  %indvars.iv = phi i64 [ 0, %0 ], [ %indvars.iv.next, %1 ]
-  %2 = getelementptr inbounds i32* %A, i64 %indvars.iv
-  %3 = load i32* %2, align 4
-  %4 = add nsw i32 %3, 3
-  store i32 %4, i32* %2, align 4
-  %5 = trunc i64 %indvars.iv to i32
-  tail call void @round(i32 %5) nounwind
+
+; Make sure we don't replace calls to functions with standard library function
+; signatures but defined with internal linkage.
+
+define internal float @roundf(float %x) nounwind readnone {
+  ret float 0.00000000
+}
+; CHECK-LABEL: internal_round
+; CHECK-NOT:  load <4 x float>
+
+define void @internal_round(float* nocapture %x) nounwind {
+entry:
+  br label %for.body
+
+for.body:                                         ; preds = %entry, %for.body
+  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+  %arrayidx = getelementptr inbounds float* %x, i64 %indvars.iv
+  %0 = load float* %arrayidx, align 4
+  %call = tail call float @roundf(float %0) nounwind readnone
+  store float %call, float* %arrayidx, align 4
+  %indvars.iv.next = add i64 %indvars.iv, 1
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+  %exitcond = icmp eq i32 %lftr.wideiv, 1024
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body
+  ret void
+}
+
+; Make sure we don't replace calls to functions with standard library names but
+; different signatures.
+
+declare void @round(double %f)
+
+; CHECK-LABEL: wrong_signature
+; CHECK-NOT:  load <4 x double>
+
+define void @wrong_signature(double* nocapture %x) nounwind {
+entry:
+  br label %for.body
+
+for.body:                                         ; preds = %entry, %for.body
+  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+  %arrayidx = getelementptr inbounds double* %x, i64 %indvars.iv
+  %0 = load double* %arrayidx, align 4
+  store double %0, double* %arrayidx, align 4
+  tail call void @round(double %0) nounwind readnone
    %indvars.iv.next = add i64 %indvars.iv, 1
    %lftr.wideiv = trunc i64 %indvars.iv.next to i32
-  %exitcond = icmp eq i32 %lftr.wideiv, 256
-  br i1 %exitcond, label %6, label %1
+  %exitcond = icmp eq i32 %lftr.wideiv, 1024
+  br i1 %exitcond, label %for.end, label %for.body
  
-; <label>:6                                       ; preds = %1
+for.end:                                          ; preds = %for.body
    ret void
  }
  
-declare void @round(i32)
author	Arnold Schwaighofer <aschwaighofer@apple.com>
	Mon, 23 Sep 2013 14:54:39 +0000 (14:54 +0000)
committer	Arnold Schwaighofer <aschwaighofer@apple.com>
	Mon, 23 Sep 2013 14:54:39 +0000 (14:54 +0000)
lib/Transforms/Vectorize/LoopVectorize.cpp		patch \| blob \| history
test/Transforms/LoopVectorize/intrinsic.ll		patch \| blob \| history