From b86dff862f6ff434a903f99b36d73a5e5280d9e4 Mon Sep 17 00:00:00 2001 From: Nadav Rotem Date: Sun, 21 Apr 2013 08:05:59 +0000 Subject: [PATCH] SLPVectorize: Add support for vectorization of casts. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@179975 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Transforms/Vectorize/VecUtils.cpp | 69 +++++++++++++++++++++++ test/Transforms/SLPVectorizer/X86/cast.ll | 38 +++++++++++++ 2 files changed, 107 insertions(+) create mode 100644 test/Transforms/SLPVectorizer/X86/cast.ll diff --git a/lib/Transforms/Vectorize/VecUtils.cpp b/lib/Transforms/Vectorize/VecUtils.cpp index 7701d080ffd..9b9436683b1 100644 --- a/lib/Transforms/Vectorize/VecUtils.cpp +++ b/lib/Transforms/Vectorize/VecUtils.cpp @@ -328,6 +328,18 @@ void BoUpSLP::getTreeUses_rec(ArrayRef VL, unsigned Depth) { } switch (Opcode) { + case Instruction::ZExt: + case Instruction::SExt: + case Instruction::FPToUI: + case Instruction::FPToSI: + case Instruction::FPExt: + case Instruction::PtrToInt: + case Instruction::IntToPtr: + case Instruction::SIToFP: + case Instruction::UIToFP: + case Instruction::Trunc: + case Instruction::FPTrunc: + case Instruction::BitCast: case Instruction::Add: case Instruction::FAdd: case Instruction::Sub: @@ -445,6 +457,41 @@ int BoUpSLP::getTreeCost_rec(ArrayRef VL, unsigned Depth) { } switch (Opcode) { + case Instruction::ZExt: + case Instruction::SExt: + case Instruction::FPToUI: + case Instruction::FPToSI: + case Instruction::FPExt: + case Instruction::PtrToInt: + case Instruction::IntToPtr: + case Instruction::SIToFP: + case Instruction::UIToFP: + case Instruction::Trunc: + case Instruction::FPTrunc: + case Instruction::BitCast: { + int Cost = 0; + ValueList Operands; + Type *SrcTy = VL0->getOperand(0)->getType(); + // Prepare the operand vector. + for (unsigned j = 0; j < VL.size(); ++j) { + Operands.push_back(cast(VL[j])->getOperand(0)); + // Check that the casted type is the same for all users. + if (cast(VL[j])->getOperand(0)->getType() != SrcTy) + return getScalarizationCost(VecTy); + } + + Cost += getTreeCost_rec(Operands, Depth+1); + if (Cost >= max_cost) return max_cost; + + // Calculate the cost of this instruction. + int ScalarCost = VL.size() * TTI->getCastInstrCost(VL0->getOpcode(), + VL0->getType(), SrcTy); + + VectorType *SrcVecTy = VectorType::get(SrcTy, VL.size()); + int VecCost = TTI->getCastInstrCost(VL0->getOpcode(), VecTy, SrcVecTy); + Cost += (VecCost - ScalarCost); + return Cost; + } case Instruction::Add: case Instruction::FAdd: case Instruction::Sub: @@ -583,6 +630,28 @@ Value *BoUpSLP::vectorizeTree_rec(ArrayRef VL, int VF) { } switch (Opcode) { + case Instruction::ZExt: + case Instruction::SExt: + case Instruction::FPToUI: + case Instruction::FPToSI: + case Instruction::FPExt: + case Instruction::PtrToInt: + case Instruction::IntToPtr: + case Instruction::SIToFP: + case Instruction::UIToFP: + case Instruction::Trunc: + case Instruction::FPTrunc: + case Instruction::BitCast: { + ValueList INVL; + for (int i = 0; i < VF; ++i) + INVL.push_back(cast(VL[i])->getOperand(0)); + Value *InVec = vectorizeTree_rec(INVL, VF); + IRBuilder<> Builder(GetLastInstr(VL, VF)); + CastInst *CI = dyn_cast(VL0); + Value *V = Builder.CreateCast(CI->getOpcode(), InVec, VecTy); + VectorizedValues[VL0] = V; + return V; + } case Instruction::Add: case Instruction::FAdd: case Instruction::Sub: diff --git a/test/Transforms/SLPVectorizer/X86/cast.ll b/test/Transforms/SLPVectorizer/X86/cast.ll new file mode 100644 index 00000000000..344dbbca2c5 --- /dev/null +++ b/test/Transforms/SLPVectorizer/X86/cast.ll @@ -0,0 +1,38 @@ +; RUN: opt < %s -basicaa -slp-vectorizer -dce -S -mtriple=x86_64-apple-macosx10.8.0 -mcpu=corei7-avx | FileCheck %s + +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" +target triple = "x86_64-apple-macosx10.9.0" + +; int foo(int * restrict A, char * restrict B) { +; A[0] = B[0]; +; A[1] = B[1]; +; A[2] = B[2]; +; A[3] = B[3]; +; } +;CHECK: @foo +;CHECK: load <4 x i8> +;CHECK: sext +;CHECK: store <4 x i32> +define i32 @foo(i32* noalias nocapture %A, i8* noalias nocapture %B) { +entry: + %0 = load i8* %B, align 1 + %conv = sext i8 %0 to i32 + store i32 %conv, i32* %A, align 4 + %arrayidx2 = getelementptr inbounds i8* %B, i64 1 + %1 = load i8* %arrayidx2, align 1 + %conv3 = sext i8 %1 to i32 + %arrayidx4 = getelementptr inbounds i32* %A, i64 1 + store i32 %conv3, i32* %arrayidx4, align 4 + %arrayidx5 = getelementptr inbounds i8* %B, i64 2 + %2 = load i8* %arrayidx5, align 1 + %conv6 = sext i8 %2 to i32 + %arrayidx7 = getelementptr inbounds i32* %A, i64 2 + store i32 %conv6, i32* %arrayidx7, align 4 + %arrayidx8 = getelementptr inbounds i8* %B, i64 3 + %3 = load i8* %arrayidx8, align 1 + %conv9 = sext i8 %3 to i32 + %arrayidx10 = getelementptr inbounds i32* %A, i64 3 + store i32 %conv9, i32* %arrayidx10, align 4 + ret i32 undef +} + -- 2.34.1