From 43afab3bdb46bc4d3b5568540428920311821891 Mon Sep 17 00:00:00 2001 From: Elena Demikhovsky Date: Thu, 9 Jul 2015 07:42:48 +0000 Subject: [PATCH] Extended syntax of vector version of getelementptr instruction. The justification of this change is here: http://lists.cs.uiuc.edu/pipermail/llvmdev/2015-March/082989.html According to the current GEP syntax, vector GEP requires that each index must be a vector with the same number of elements. %A = getelementptr i8, <4 x i8*> %ptrs, <4 x i64> %offsets In this implementation I let each index be or vector or scalar. All vector indices must have the same number of elements. The scalar value will mean the splat vector value. (1) %A = getelementptr i8, i8* %ptr, <4 x i64> %offsets or (2) %A = getelementptr i8, <4 x i8*> %ptrs, i64 %offset In all cases the %A type is <4 x i8*> In the case (2) we add the same offset to all pointers. The case (1) covers C[B[i]] case, when we have the same base C and different offsets B[i]. The documentation is updated. http://reviews.llvm.org/D10496 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@241788 91177308-0d34-0410-b5e6-96231b3b80d8 --- docs/LangRef.rst | 58 +++++++++++++++++-- include/llvm/IR/Instructions.h | 8 ++- lib/AsmParser/LLParser.cpp | 18 +++--- .../SelectionDAG/SelectionDAGBuilder.cpp | 29 +++++++++- lib/IR/Verifier.cpp | 23 ++++---- test/Assembler/getelementptr_vec_idx1.ll | 4 +- test/Assembler/getelementptr_vec_idx2.ll | 22 +++++-- test/Assembler/getelementptr_vec_idx3.ll | 4 +- test/CodeGen/X86/vector-gep.ll | 22 +++++++ 9 files changed, 152 insertions(+), 36 deletions(-) diff --git a/docs/LangRef.rst b/docs/LangRef.rst index 2e4bcbe7302..ca5c16c0f49 100644 --- a/docs/LangRef.rst +++ b/docs/LangRef.rst @@ -6718,7 +6718,8 @@ Overview: The '``getelementptr``' instruction is used to get the address of a subelement of an :ref:`aggregate ` data structure. It performs -address calculation only and does not access memory. +address calculation only and does not access memory. The instruction can also +be used to calculate a vector of such addresses. Arguments: """""""""" @@ -6844,12 +6845,61 @@ Example: ; yields i32*:iptr %iptr = getelementptr [10 x i32], [10 x i32]* @arr, i16 0, i16 0 -In cases where the pointer argument is a vector of pointers, each index -must be a vector with the same number of elements. For example: +Vector of pointers: +""""""""""""""""""" + +The ``getelementptr`` returns a vector of pointers, instead of a single address, +when one or more of its arguments is a vector. In such cases, all vector +arguments should have the same number of elements, and every scalar argument +will be effectively broadcast into a vector during address calculation. + +.. code-block:: llvm + + ; All arguments are vectors: + ; A[i] = ptrs[i] + offsets[i]*sizeof(i8) + %A = getelementptr i8, <4 x i8*> %ptrs, <4 x i64> %offsets + + ; Add the same scalar offset to each pointer of a vector: + ; A[i] = ptrs[i] + offset*sizeof(i8) + %A = getelementptr i8, <4 x i8*> %ptrs, i64 %offset + + ; Add distinct offsets to the same pointer: + ; A[i] = ptr + offsets[i]*sizeof(i8) + %A = getelementptr i8, i8* %ptr, <4 x i64> %offsets + + ; In all cases described above the type of the result is <4 x i8*> + +The two following instructions are equivalent: + +.. code-block:: llvm + + getelementptr %struct.ST, <4 x %struct.ST*> %s, <4 x i64> %ind1, + <4 x i32> , + <4 x i32> , + <4 x i32> %ind4, + <4 x i64> + + getelementptr %struct.ST, <4 x %struct.ST*> %s, <4 x i64> %ind1, + i32 2, i32 1, <4 x i32> %ind4, i64 13 + +Let's look at the C code, where the vector version of ``getelementptr`` +makes sense: + +.. code-block:: c + + // Let's assume that we vectorize the following loop: + double *A, B; int *C; + for (int i = 0; i < size; ++i) { + A[i] = B[C[i]]; + } .. code-block:: llvm - %A = getelementptr i8, <4 x i8*> %ptrs, <4 x i64> %offsets, + ; get pointers for 8 elements from array B + %ptrs = getelementptr double, double* %B, <8 x i32> %C + ; load 8 elements from array B into A + %A = call <8 x double> @llvm.masked.gather.v8f64(<8 x double*> %ptrs, + i32 8, <8 x i1> %mask, <8 x double> %passthru) Conversion Operations --------------------- diff --git a/include/llvm/IR/Instructions.h b/include/llvm/IR/Instructions.h index c5890f01ea7..62723e44c0c 100644 --- a/include/llvm/IR/Instructions.h +++ b/include/llvm/IR/Instructions.h @@ -990,10 +990,14 @@ public: Ptr->getType()->getPointerAddressSpace()); // Vector GEP if (Ptr->getType()->isVectorTy()) { - unsigned NumElem = cast(Ptr->getType())->getNumElements(); + unsigned NumElem = Ptr->getType()->getVectorNumElements(); return VectorType::get(PtrTy, NumElem); } - + for (Value *Index : IdxList) + if (Index->getType()->isVectorTy()) { + unsigned NumElem = Index->getType()->getVectorNumElements(); + return VectorType::get(PtrTy, NumElem); + } // Scalar GEP return PtrTy; } diff --git a/lib/AsmParser/LLParser.cpp b/lib/AsmParser/LLParser.cpp index b3c7fa087d4..91a88bc91fd 100644 --- a/lib/AsmParser/LLParser.cpp +++ b/lib/AsmParser/LLParser.cpp @@ -2873,8 +2873,8 @@ bool LLParser::ParseValID(ValID &ID, PerFunctionState *PFS) { if (ValTy->isVectorTy() != BaseType->isVectorTy()) return Error(ID.Loc, "getelementptr index type missmatch"); if (ValTy->isVectorTy()) { - unsigned ValNumEl = cast(ValTy)->getNumElements(); - unsigned PtrNumEl = cast(BaseType)->getNumElements(); + unsigned ValNumEl = ValTy->getVectorNumElements(); + unsigned PtrNumEl = BaseType->getVectorNumElements(); if (ValNumEl != PtrNumEl) return Error( ID.Loc, @@ -5572,6 +5572,11 @@ int LLParser::ParseGetElementPtr(Instruction *&Inst, PerFunctionState &PFS) { SmallVector Indices; bool AteExtraComma = false; + // GEP returns a vector of pointers if at least one of parameters is a vector. + // All vector parameters should have the same vector width. + unsigned GEPWidth = BaseType->isVectorTy() ? + BaseType->getVectorNumElements() : 0; + while (EatIfPresent(lltok::comma)) { if (Lex.getKind() == lltok::MetadataVar) { AteExtraComma = true; @@ -5580,14 +5585,13 @@ int LLParser::ParseGetElementPtr(Instruction *&Inst, PerFunctionState &PFS) { if (ParseTypeAndValue(Val, EltLoc, PFS)) return true; if (!Val->getType()->getScalarType()->isIntegerTy()) return Error(EltLoc, "getelementptr index must be an integer"); - if (Val->getType()->isVectorTy() != Ptr->getType()->isVectorTy()) - return Error(EltLoc, "getelementptr index type missmatch"); + if (Val->getType()->isVectorTy()) { - unsigned ValNumEl = cast(Val->getType())->getNumElements(); - unsigned PtrNumEl = cast(Ptr->getType())->getNumElements(); - if (ValNumEl != PtrNumEl) + unsigned ValNumEl = Val->getType()->getVectorNumElements(); + if (GEPWidth && GEPWidth != ValNumEl) return Error(EltLoc, "getelementptr vector index has a wrong number of elements"); + GEPWidth = ValNumEl; } Indices.push_back(Val); } diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp index 4f0fc6e1eb5..345bd0ae2dc 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -2787,6 +2787,16 @@ void SelectionDAGBuilder::visitGetElementPtr(const User &I) { SDValue N = getValue(Op0); SDLoc dl = getCurSDLoc(); + // Normalize Vector GEP - all scalar operands should be converted to the + // splat vector. + unsigned VectorWidth = I.getType()->isVectorTy() ? + cast(I.getType())->getVectorNumElements() : 0; + + if (VectorWidth && !N.getValueType().isVector()) { + MVT VT = MVT::getVectorVT(N.getValueType().getSimpleVT(), VectorWidth); + SmallVector Ops(VectorWidth, N); + N = DAG.getNode(ISD::BUILD_VECTOR, dl, VT, Ops); + } for (GetElementPtrInst::const_op_iterator OI = I.op_begin()+1, E = I.op_end(); OI != E; ++OI) { const Value *Idx = *OI; @@ -2807,12 +2817,20 @@ void SelectionDAGBuilder::visitGetElementPtr(const User &I) { unsigned PtrSize = PtrTy.getSizeInBits(); APInt ElementSize(PtrSize, DL->getTypeAllocSize(Ty)); - // If this is a constant subscript, handle it quickly. - if (const auto *CI = dyn_cast(Idx)) { + // If this is a scalar constant or a splat vector of constants, + // handle it quickly. + const auto *CI = dyn_cast(Idx); + if (!CI && isa(Idx) && + cast(Idx)->getSplatValue()) + CI = cast(cast(Idx)->getSplatValue()); + + if (CI) { if (CI->isZero()) continue; APInt Offs = ElementSize * CI->getValue().sextOrTrunc(PtrSize); - SDValue OffsVal = DAG.getConstant(Offs, dl, PtrTy); + SDValue OffsVal = VectorWidth ? + DAG.getConstant(Offs, dl, MVT::getVectorVT(PtrTy, VectorWidth)) : + DAG.getConstant(Offs, dl, PtrTy); N = DAG.getNode(ISD::ADD, dl, N.getValueType(), N, OffsVal); continue; } @@ -2820,6 +2838,11 @@ void SelectionDAGBuilder::visitGetElementPtr(const User &I) { // N = N + Idx * ElementSize; SDValue IdxN = getValue(Idx); + if (!IdxN.getValueType().isVector() && VectorWidth) { + MVT VT = MVT::getVectorVT(IdxN.getValueType().getSimpleVT(), VectorWidth); + SmallVector Ops(VectorWidth, IdxN); + IdxN = DAG.getNode(ISD::BUILD_VECTOR, dl, VT, Ops); + } // If the index is smaller or larger than intptr_t, truncate or extend // it. IdxN = DAG.getSExtOrTrunc(IdxN, dl, N.getValueType()); diff --git a/lib/IR/Verifier.cpp b/lib/IR/Verifier.cpp index a2ce7d56b38..647920f23da 100644 --- a/lib/IR/Verifier.cpp +++ b/lib/IR/Verifier.cpp @@ -2538,10 +2538,6 @@ void Verifier::visitGetElementPtrInst(GetElementPtrInst &GEP) { Assert(isa(TargetTy), "GEP base pointer is not a vector or a vector of pointers", &GEP); Assert(GEP.getSourceElementType()->isSized(), "GEP into unsized type!", &GEP); - Assert(GEP.getPointerOperandType()->isVectorTy() == - GEP.getType()->isVectorTy(), - "Vector GEP must return a vector value", &GEP); - SmallVector Idxs(GEP.idx_begin(), GEP.idx_end()); Type *ElTy = GetElementPtrInst::getIndexedType(GEP.getSourceElementType(), Idxs); @@ -2551,17 +2547,20 @@ void Verifier::visitGetElementPtrInst(GetElementPtrInst &GEP) { GEP.getResultElementType() == ElTy, "GEP is not of right type for indices!", &GEP, ElTy); - if (GEP.getPointerOperandType()->isVectorTy()) { + if (GEP.getType()->isVectorTy()) { // Additional checks for vector GEPs. - unsigned GepWidth = GEP.getPointerOperandType()->getVectorNumElements(); - Assert(GepWidth == GEP.getType()->getVectorNumElements(), - "Vector GEP result width doesn't match operand's", &GEP); + unsigned GEPWidth = GEP.getType()->getVectorNumElements(); + if (GEP.getPointerOperandType()->isVectorTy()) + Assert(GEPWidth == GEP.getPointerOperandType()->getVectorNumElements(), + "Vector GEP result width doesn't match operand's", &GEP); for (unsigned i = 0, e = Idxs.size(); i != e; ++i) { Type *IndexTy = Idxs[i]->getType(); - Assert(IndexTy->isVectorTy(), "Vector GEP must have vector indices!", - &GEP); - unsigned IndexWidth = IndexTy->getVectorNumElements(); - Assert(IndexWidth == GepWidth, "Invalid GEP index vector width", &GEP); + if (IndexTy->isVectorTy()) { + unsigned IndexWidth = IndexTy->getVectorNumElements(); + Assert(IndexWidth == GEPWidth, "Invalid GEP index vector width", &GEP); + } + Assert(IndexTy->getScalarType()->isIntegerTy(), + "All GEP indices should be of integer type"); } } visitInstruction(GEP); diff --git a/test/Assembler/getelementptr_vec_idx1.ll b/test/Assembler/getelementptr_vec_idx1.ll index 084a31e7234..12160574d4e 100644 --- a/test/Assembler/getelementptr_vec_idx1.ll +++ b/test/Assembler/getelementptr_vec_idx1.ll @@ -1,8 +1,8 @@ ; RUN: not llvm-as < %s >/dev/null 2> %t ; RUN: FileCheck %s < %t -; Test that a vector index is only used with a vector pointer. +; Test that a vector GEP may be used with a scalar base, the result is a vector of pointers -; CHECK: getelementptr index type missmatch +; CHECK: '%w' defined with type '<2 x i32*> define i32 @test(i32* %a) { %w = getelementptr i32, i32* %a, <2 x i32> diff --git a/test/Assembler/getelementptr_vec_idx2.ll b/test/Assembler/getelementptr_vec_idx2.ll index 638fcb8b67d..be294098c9e 100644 --- a/test/Assembler/getelementptr_vec_idx2.ll +++ b/test/Assembler/getelementptr_vec_idx2.ll @@ -1,10 +1,24 @@ ; RUN: not llvm-as < %s >/dev/null 2> %t ; RUN: FileCheck %s < %t -; Test that a vector pointer is only used with a vector index. +; Test that a vector pointer may be used with a scalar index. +; Test that a vector pointer and vector index should have the same vector width -; CHECK: getelementptr index type missmatch - -define <2 x i32> @test(<2 x i32*> %a) { +; This code is correct +define <2 x i32*> @test2(<2 x i32*> %a) { %w = getelementptr i32, <2 x i32*> %a, i32 2 + ret <2 x i32*> %w +} + +; This code is correct +define <2 x i32*> @test3(i32* %a) { + %w = getelementptr i32, i32* %a, <2 x i32> + ret <2 x i32*> %w +} + +; CHECK: getelementptr vector index has a wrong number of elements + +define <2 x i32> @test1(<2 x i32*> %a) { + %w = getelementptr i32, <2 x i32*> %a, <4 x i32> ret <2 x i32> %w } + diff --git a/test/Assembler/getelementptr_vec_idx3.ll b/test/Assembler/getelementptr_vec_idx3.ll index ac94459e23d..767c817cc62 100644 --- a/test/Assembler/getelementptr_vec_idx3.ll +++ b/test/Assembler/getelementptr_vec_idx3.ll @@ -1,8 +1,8 @@ ; RUN: not llvm-as < %s >/dev/null 2> %t ; RUN: FileCheck %s < %t -; Test that vector indices have the same number of elements as the pointer. +; Test that a vector GEP may be used with a scalar base, the result is a vector of pointers -; CHECK: getelementptr index type missmatch +; CHECK: '%w' defined with type '<2 x <4 x i32>*>' define <4 x i32> @test(<4 x i32>* %a) { %w = getelementptr <4 x i32>, <4 x i32>* %a, <2 x i32> diff --git a/test/CodeGen/X86/vector-gep.ll b/test/CodeGen/X86/vector-gep.ll index ce98e6759b6..47878360ca0 100644 --- a/test/CodeGen/X86/vector-gep.ll +++ b/test/CodeGen/X86/vector-gep.ll @@ -92,3 +92,25 @@ entry: ;CHECK: ret } +;CHECK-LABEL: AGEP7: +define <4 x i8*> @AGEP7(<4 x i8*> %param, i32 %off) nounwind { +entry: +;CHECK: vbroadcastss +;CHECK: vpadd + %A = getelementptr i8, <4 x i8*> %param, i32 %off + ret <4 x i8*> %A +;CHECK: ret +} + +;CHECK-LABEL: AGEP8: +define <4 x i16*> @AGEP8(i16* %param, <4 x i32> %off) nounwind { +entry: +; Multiply offset by two (add it to itself). +;CHECK: vpadd +; add the base to the offset +;CHECK: vbroadcastss +;CHECK-NEXT: vpadd + %A = getelementptr i16, i16* %param, <4 x i32> %off + ret <4 x i16*> %A +;CHECK: ret +} -- 2.34.1