From b0ee2374cea63f2edcc1bf092f6f46f3cc60da62 Mon Sep 17 00:00:00 2001 From: Arnold Schwaighofer Date: Wed, 9 Apr 2014 14:20:47 +0000 Subject: [PATCH] SLPVectorizer: Only vectorize intrinsics whose operands are widened equally The vectorizer only knows how to vectorize intrinics by widening all operands by the same factor. Patch by Tyler Nowicki! git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@205855 91177308-0d34-0410-b5e6-96231b3b80d8 --- include/llvm/Transforms/Utils/VectorUtils.h | 54 +++++++++++++++++++ lib/Transforms/Vectorize/LoopVectorize.cpp | 31 +++-------- lib/Transforms/Vectorize/SLPVectorizer.cpp | 5 +- .../X86/non-vectorizable-intrinsic.ll | 36 +++++++++++++ 4 files changed, 100 insertions(+), 26 deletions(-) create mode 100644 include/llvm/Transforms/Utils/VectorUtils.h create mode 100644 test/Transforms/SLPVectorizer/X86/non-vectorizable-intrinsic.ll diff --git a/include/llvm/Transforms/Utils/VectorUtils.h b/include/llvm/Transforms/Utils/VectorUtils.h new file mode 100644 index 00000000000..9b5c8619544 --- /dev/null +++ b/include/llvm/Transforms/Utils/VectorUtils.h @@ -0,0 +1,54 @@ +//===- llvm/Transforms/Utils/VectorUtils.h - Vector utilities -*- C++ -*-=====// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines some vectorizer utilities. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_TRANSFORMS_UTILS_VECTORUTILS_H +#define LLVM_TRANSFORMS_UTILS_VECTORUTILS_H + +namespace llvm { + +/// \brief Identify if the intrinsic is trivially vectorizable. +/// +/// This method returns true if the intrinsic's argument types are all +/// scalars for the scalar form of the intrinsic and all vectors for +/// the vector form of the intrinsic. +static inline bool isTriviallyVectorizable(Intrinsic::ID ID) { + switch (ID) { + case Intrinsic::sqrt: + case Intrinsic::sin: + case Intrinsic::cos: + case Intrinsic::exp: + case Intrinsic::exp2: + case Intrinsic::log: + case Intrinsic::log10: + case Intrinsic::log2: + case Intrinsic::fabs: + case Intrinsic::copysign: + case Intrinsic::floor: + case Intrinsic::ceil: + case Intrinsic::trunc: + case Intrinsic::rint: + case Intrinsic::nearbyint: + case Intrinsic::round: + case Intrinsic::ctpop: + case Intrinsic::pow: + case Intrinsic::fma: + case Intrinsic::fmuladd: + return true; + default: + return false; + } +} + +} // llvm namespace + +#endif diff --git a/lib/Transforms/Vectorize/LoopVectorize.cpp b/lib/Transforms/Vectorize/LoopVectorize.cpp index c5de6bc6bbe..9bf5e3ccb6e 100644 --- a/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -91,6 +91,7 @@ #include "llvm/Transforms/Scalar.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Transforms/Utils/Local.h" +#include "llvm/Transforms/Utils/VectorUtils.h" #include #include @@ -2266,32 +2267,12 @@ static Intrinsic::ID getIntrinsicIDForCall(CallInst *CI, const TargetLibraryInfo *TLI) { // If we have an intrinsic call, check if it is trivially vectorizable. if (IntrinsicInst *II = dyn_cast(CI)) { - switch (II->getIntrinsicID()) { - case Intrinsic::sqrt: - case Intrinsic::sin: - case Intrinsic::cos: - case Intrinsic::exp: - case Intrinsic::exp2: - case Intrinsic::log: - case Intrinsic::log10: - case Intrinsic::log2: - case Intrinsic::fabs: - case Intrinsic::copysign: - case Intrinsic::floor: - case Intrinsic::ceil: - case Intrinsic::trunc: - case Intrinsic::rint: - case Intrinsic::nearbyint: - case Intrinsic::round: - case Intrinsic::pow: - case Intrinsic::fma: - case Intrinsic::fmuladd: - case Intrinsic::lifetime_start: - case Intrinsic::lifetime_end: - return II->getIntrinsicID(); - default: + Intrinsic::ID ID = II->getIntrinsicID(); + if (isTriviallyVectorizable(ID) || ID == Intrinsic::lifetime_start || + ID == Intrinsic::lifetime_end) + return ID; + else return Intrinsic::not_intrinsic; - } } if (!TLI) diff --git a/lib/Transforms/Vectorize/SLPVectorizer.cpp b/lib/Transforms/Vectorize/SLPVectorizer.cpp index ca55b4d6c90..6a78d756ccc 100644 --- a/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -41,6 +41,7 @@ #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" +#include "llvm/Transforms/Utils/VectorUtils.h" #include #include @@ -949,7 +950,9 @@ void BoUpSLP::buildTree_rec(ArrayRef VL, unsigned Depth) { case Instruction::Call: { // Check if the calls are all to the same vectorizable intrinsic. IntrinsicInst *II = dyn_cast(VL[0]); - if (II==NULL) { + Intrinsic::ID ID = II ? II->getIntrinsicID() : Intrinsic::not_intrinsic; + + if (!isTriviallyVectorizable(ID)) { newTreeEntry(VL, false); DEBUG(dbgs() << "SLP: Non-vectorizable call.\n"); return; diff --git a/test/Transforms/SLPVectorizer/X86/non-vectorizable-intrinsic.ll b/test/Transforms/SLPVectorizer/X86/non-vectorizable-intrinsic.ll new file mode 100644 index 00000000000..b250735874c --- /dev/null +++ b/test/Transforms/SLPVectorizer/X86/non-vectorizable-intrinsic.ll @@ -0,0 +1,36 @@ +; RUN: opt < %s -slp-vectorizer -o - -S -slp-threshold=-1000 + +target datalayout = "e-p:32:32-i64:64-v16:16-v32:32-n16:32:64" +target triple = "nvptx--nvidiacl" + +; CTLZ cannot be vectorized currently because the second argument is a scalar +; for both the scalar and vector forms of the intrinsic. In the future it +; should be possible to vectorize such functions. +; Test causes an assert if LLVM tries to vectorize CTLZ. + +define <2 x i8> @cltz_test(<2 x i8> %x) #0 { +entry: + %0 = extractelement <2 x i8> %x, i32 0 + %call.i = call i8 @llvm.ctlz.i8(i8 %0, i1 false) + %vecinit = insertelement <2 x i8> undef, i8 %call.i, i32 0 + %1 = extractelement <2 x i8> %x, i32 1 + %call.i4 = call i8 @llvm.ctlz.i8(i8 %1, i1 false) + %vecinit2 = insertelement <2 x i8> %vecinit, i8 %call.i4, i32 1 + ret <2 x i8> %vecinit2 +} + +define <2 x i8> @cltz_test2(<2 x i8> %x) #1 { +entry: + %0 = extractelement <2 x i8> %x, i32 0 + %1 = extractelement <2 x i8> %x, i32 1 + %call.i = call i8 @llvm.ctlz.i8(i8 %0, i1 false) + %call.i4 = call i8 @llvm.ctlz.i8(i8 %1, i1 false) + %vecinit = insertelement <2 x i8> undef, i8 %call.i, i32 0 + %vecinit2 = insertelement <2 x i8> %vecinit, i8 %call.i4, i32 1 + ret <2 x i8> %vecinit2 +} + +declare i8 @llvm.ctlz.i8(i8, i1) #3 + +attributes #0 = { alwaysinline nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #1 = { nounwind readnone } -- 2.34.1