From 43afab3bdb46bc4d3b5568540428920311821891 Mon Sep 17 00:00:00 2001
From: Elena Demikhovsky <elena.demikhovsky@intel.com>
Date: Thu, 9 Jul 2015 07:42:48 +0000
Subject: [PATCH] Extended syntax of vector version of getelementptr
 instruction.

The justification of this change is here: http://lists.cs.uiuc.edu/pipermail/llvmdev/2015-March/082989.html

According to the current GEP syntax, vector GEP requires that each index must be a vector with the same number of elements.

%A = getelementptr i8, <4 x i8*> %ptrs, <4 x i64> %offsets

In this implementation I let each index be or vector or scalar. All vector indices must have the same number of elements. The scalar value will mean the splat vector value.

(1) %A = getelementptr i8, i8* %ptr, <4 x i64> %offsets
or
(2) %A = getelementptr i8, <4 x i8*> %ptrs, i64 %offset

In all cases the %A type is <4 x i8*>

In the case (2) we add the same offset to all pointers.

The case (1) covers C[B[i]] case, when we have the same base C and different offsets B[i].

The documentation is updated.

http://reviews.llvm.org/D10496




git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@241788 91177308-0d34-0410-b5e6-96231b3b80d8
---
 docs/LangRef.rst                              | 58 +++++++++++++++++--
 include/llvm/IR/Instructions.h                |  8 ++-
 lib/AsmParser/LLParser.cpp                    | 18 +++---
 .../SelectionDAG/SelectionDAGBuilder.cpp      | 29 +++++++++-
 lib/IR/Verifier.cpp                           | 23 ++++----
 test/Assembler/getelementptr_vec_idx1.ll      |  4 +-
 test/Assembler/getelementptr_vec_idx2.ll      | 22 +++++--
 test/Assembler/getelementptr_vec_idx3.ll      |  4 +-
 test/CodeGen/X86/vector-gep.ll                | 22 +++++++
 9 files changed, 152 insertions(+), 36 deletions(-)
diff --git a/docs/LangRef.rst b/docs/LangRef.rst
index 2e4bcbe7302..ca5c16c0f49 100644
--- a/docs/LangRef.rst
+++ b/docs/LangRef.rst
@@ -6718,7 +6718,8 @@ Overview:
 
 The '``getelementptr``' instruction is used to get the address of a
 subelement of an :ref:`aggregate <t_aggregate>` data structure. It performs
-address calculation only and does not access memory.
+address calculation only and does not access memory. The instruction can also
+be used to calculate a vector of such addresses.
 
 Arguments:
 """"""""""
@@ -6844,12 +6845,61 @@ Example:
         ; yields i32*:iptr
         %iptr = getelementptr [10 x i32], [10 x i32]* @arr, i16 0, i16 0
 
-In cases where the pointer argument is a vector of pointers, each index
-must be a vector with the same number of elements. For example:
+Vector of pointers:
+"""""""""""""""""""
+
+The ``getelementptr`` returns a vector of pointers, instead of a single address,
+when one or more of its arguments is a vector. In such cases, all vector
+arguments should have the same number of elements, and every scalar argument
+will be effectively broadcast into a vector during address calculation.
+
+.. code-block:: llvm
+
+     ; All arguments are vectors:
+     ;   A[i] = ptrs[i] + offsets[i]*sizeof(i8)
+     %A = getelementptr i8, <4 x i8*> %ptrs, <4 x i64> %offsets
+     
+     ; Add the same scalar offset to each pointer of a vector:
+     ;   A[i] = ptrs[i] + offset*sizeof(i8)
+     %A = getelementptr i8, <4 x i8*> %ptrs, i64 %offset
+     
+     ; Add distinct offsets to the same pointer:
+     ;   A[i] = ptr + offsets[i]*sizeof(i8)
+     %A = getelementptr i8, i8* %ptr, <4 x i64> %offsets
+     
+     ; In all cases described above the type of the result is <4 x i8*>
+
+The two following instructions are equivalent:
+
+.. code-block:: llvm
+
+     getelementptr  %struct.ST, <4 x %struct.ST*> %s, <4 x i64> %ind1,
+       <4 x i32> <i32 2, i32 2, i32 2, i32 2>,
+       <4 x i32> <i32 1, i32 1, i32 1, i32 1>,
+       <4 x i32> %ind4,
+       <4 x i64> <i64 13, i64 13, i64 13, i64 13>
+     
+     getelementptr  %struct.ST, <4 x %struct.ST*> %s, <4 x i64> %ind1,
+       i32 2, i32 1, <4 x i32> %ind4, i64 13
+
+Let's look at the C code, where the vector version of ``getelementptr``
+makes sense:
+
+.. code-block:: c
+
+    // Let's assume that we vectorize the following loop:
+    double *A, B; int *C;
+    for (int i = 0; i < size; ++i) {
+      A[i] = B[C[i]];
+    }
 
 .. code-block:: llvm
 
-     %A = getelementptr i8, <4 x i8*> %ptrs, <4 x i64> %offsets,
+    ; get pointers for 8 elements from array B
+    %ptrs = getelementptr double, double* %B, <8 x i32> %C
+    ; load 8 elements from array B into A
+    %A = call <8 x double> @llvm.masked.gather.v8f64(<8 x double*> %ptrs,
+         i32 8, <8 x i1> %mask, <8 x double> %passthru)
 
 Conversion Operations
 ---------------------
diff --git a/include/llvm/IR/Instructions.h b/include/llvm/IR/Instructions.h
index c5890f01ea7..62723e44c0c 100644
--- a/include/llvm/IR/Instructions.h
+++ b/include/llvm/IR/Instructions.h
@@ -990,10 +990,14 @@ public:
                                    Ptr->getType()->getPointerAddressSpace());
     // Vector GEP
     if (Ptr->getType()->isVectorTy()) {
-      unsigned NumElem = cast<VectorType>(Ptr->getType())->getNumElements();
+      unsigned NumElem = Ptr->getType()->getVectorNumElements();
       return VectorType::get(PtrTy, NumElem);
     }
-
+    for (Value *Index : IdxList)
+      if (Index->getType()->isVectorTy()) {
+        unsigned NumElem = Index->getType()->getVectorNumElements();
+        return VectorType::get(PtrTy, NumElem);
+      }
     // Scalar GEP
     return PtrTy;
   }
diff --git a/lib/AsmParser/LLParser.cpp b/lib/AsmParser/LLParser.cpp
index b3c7fa087d4..91a88bc91fd 100644
--- a/lib/AsmParser/LLParser.cpp
+++ b/lib/AsmParser/LLParser.cpp
@@ -2873,8 +2873,8 @@ bool LLParser::ParseValID(ValID &ID, PerFunctionState *PFS) {
         if (ValTy->isVectorTy() != BaseType->isVectorTy())
           return Error(ID.Loc, "getelementptr index type missmatch");
         if (ValTy->isVectorTy()) {
-          unsigned ValNumEl = cast<VectorType>(ValTy)->getNumElements();
-          unsigned PtrNumEl = cast<VectorType>(BaseType)->getNumElements();
+          unsigned ValNumEl = ValTy->getVectorNumElements();
+          unsigned PtrNumEl = BaseType->getVectorNumElements();
           if (ValNumEl != PtrNumEl)
             return Error(
                 ID.Loc,
@@ -5572,6 +5572,11 @@ int LLParser::ParseGetElementPtr(Instruction *&Inst, PerFunctionState &PFS) {
 
   SmallVector<Value*, 16> Indices;
   bool AteExtraComma = false;
+  // GEP returns a vector of pointers if at least one of parameters is a vector.
+  // All vector parameters should have the same vector width.
+  unsigned GEPWidth = BaseType->isVectorTy() ?
+    BaseType->getVectorNumElements() : 0;
+
   while (EatIfPresent(lltok::comma)) {
     if (Lex.getKind() == lltok::MetadataVar) {
       AteExtraComma = true;
@@ -5580,14 +5585,13 @@ int LLParser::ParseGetElementPtr(Instruction *&Inst, PerFunctionState &PFS) {
     if (ParseTypeAndValue(Val, EltLoc, PFS)) return true;
     if (!Val->getType()->getScalarType()->isIntegerTy())
       return Error(EltLoc, "getelementptr index must be an integer");
-    if (Val->getType()->isVectorTy() != Ptr->getType()->isVectorTy())
-      return Error(EltLoc, "getelementptr index type missmatch");
+
     if (Val->getType()->isVectorTy()) {
-      unsigned ValNumEl = cast<VectorType>(Val->getType())->getNumElements();
-      unsigned PtrNumEl = cast<VectorType>(Ptr->getType())->getNumElements();
-      if (ValNumEl != PtrNumEl)
+      unsigned ValNumEl = Val->getType()->getVectorNumElements();
+      if (GEPWidth && GEPWidth != ValNumEl)
         return Error(EltLoc,
           "getelementptr vector index has a wrong number of elements");
+      GEPWidth = ValNumEl;
     }
     Indices.push_back(Val);
   }
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
index 4f0fc6e1eb5..345bd0ae2dc 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -2787,6 +2787,16 @@ void SelectionDAGBuilder::visitGetElementPtr(const User &I) {
   SDValue N = getValue(Op0);
   SDLoc dl = getCurSDLoc();
 
+  // Normalize Vector GEP - all scalar operands should be converted to the
+  // splat vector.
+  unsigned VectorWidth = I.getType()->isVectorTy() ?
+    cast<VectorType>(I.getType())->getVectorNumElements() : 0;
+
+  if (VectorWidth && !N.getValueType().isVector()) {
+    MVT VT = MVT::getVectorVT(N.getValueType().getSimpleVT(), VectorWidth);
+    SmallVector<SDValue, 16> Ops(VectorWidth, N);
+    N = DAG.getNode(ISD::BUILD_VECTOR, dl, VT, Ops);
+  }
   for (GetElementPtrInst::const_op_iterator OI = I.op_begin()+1, E = I.op_end();
        OI != E; ++OI) {
     const Value *Idx = *OI;
@@ -2807,12 +2817,20 @@ void SelectionDAGBuilder::visitGetElementPtr(const User &I) {
       unsigned PtrSize = PtrTy.getSizeInBits();
       APInt ElementSize(PtrSize, DL->getTypeAllocSize(Ty));
 
-      // If this is a constant subscript, handle it quickly.
-      if (const auto *CI = dyn_cast<ConstantInt>(Idx)) {
+      // If this is a scalar constant or a splat vector of constants,
+      // handle it quickly.
+      const auto *CI = dyn_cast<ConstantInt>(Idx);
+      if (!CI && isa<ConstantDataVector>(Idx) &&
+          cast<ConstantDataVector>(Idx)->getSplatValue())
+        CI = cast<ConstantInt>(cast<ConstantDataVector>(Idx)->getSplatValue());
+
+      if (CI) {
         if (CI->isZero())
           continue;
         APInt Offs = ElementSize * CI->getValue().sextOrTrunc(PtrSize);
-        SDValue OffsVal = DAG.getConstant(Offs, dl, PtrTy);
+        SDValue OffsVal = VectorWidth ?
+          DAG.getConstant(Offs, dl, MVT::getVectorVT(PtrTy, VectorWidth)) :
+          DAG.getConstant(Offs, dl, PtrTy);
         N = DAG.getNode(ISD::ADD, dl, N.getValueType(), N, OffsVal);
         continue;
       }
@@ -2820,6 +2838,11 @@ void SelectionDAGBuilder::visitGetElementPtr(const User &I) {
       // N = N + Idx * ElementSize;
       SDValue IdxN = getValue(Idx);
 
+      if (!IdxN.getValueType().isVector() && VectorWidth) {
+        MVT VT = MVT::getVectorVT(IdxN.getValueType().getSimpleVT(), VectorWidth);
+        SmallVector<SDValue, 16> Ops(VectorWidth, IdxN);
+        IdxN = DAG.getNode(ISD::BUILD_VECTOR, dl, VT, Ops);      
+      }
       // If the index is smaller or larger than intptr_t, truncate or extend
       // it.
       IdxN = DAG.getSExtOrTrunc(IdxN, dl, N.getValueType());
diff --git a/lib/IR/Verifier.cpp b/lib/IR/Verifier.cpp
index a2ce7d56b38..647920f23da 100644
--- a/lib/IR/Verifier.cpp
+++ b/lib/IR/Verifier.cpp
@@ -2538,10 +2538,6 @@ void Verifier::visitGetElementPtrInst(GetElementPtrInst &GEP) {
   Assert(isa<PointerType>(TargetTy),
          "GEP base pointer is not a vector or a vector of pointers", &GEP);
   Assert(GEP.getSourceElementType()->isSized(), "GEP into unsized type!", &GEP);
-  Assert(GEP.getPointerOperandType()->isVectorTy() ==
-             GEP.getType()->isVectorTy(),
-         "Vector GEP must return a vector value", &GEP);
-
   SmallVector<Value*, 16> Idxs(GEP.idx_begin(), GEP.idx_end());
   Type *ElTy =
       GetElementPtrInst::getIndexedType(GEP.getSourceElementType(), Idxs);
@@ -2551,17 +2547,20 @@ void Verifier::visitGetElementPtrInst(GetElementPtrInst &GEP) {
              GEP.getResultElementType() == ElTy,
          "GEP is not of right type for indices!", &GEP, ElTy);
 
-  if (GEP.getPointerOperandType()->isVectorTy()) {
+  if (GEP.getType()->isVectorTy()) {
     // Additional checks for vector GEPs.
-    unsigned GepWidth = GEP.getPointerOperandType()->getVectorNumElements();
-    Assert(GepWidth == GEP.getType()->getVectorNumElements(),
-           "Vector GEP result width doesn't match operand's", &GEP);
+    unsigned GEPWidth = GEP.getType()->getVectorNumElements();
+    if (GEP.getPointerOperandType()->isVectorTy())
+      Assert(GEPWidth == GEP.getPointerOperandType()->getVectorNumElements(),
+             "Vector GEP result width doesn't match operand's", &GEP);
     for (unsigned i = 0, e = Idxs.size(); i != e; ++i) {
       Type *IndexTy = Idxs[i]->getType();
-      Assert(IndexTy->isVectorTy(), "Vector GEP must have vector indices!",
-             &GEP);
-      unsigned IndexWidth = IndexTy->getVectorNumElements();
-      Assert(IndexWidth == GepWidth, "Invalid GEP index vector width", &GEP);
+      if (IndexTy->isVectorTy()) {
+        unsigned IndexWidth = IndexTy->getVectorNumElements();
+        Assert(IndexWidth == GEPWidth, "Invalid GEP index vector width", &GEP);
+      }
+      Assert(IndexTy->getScalarType()->isIntegerTy(),
+             "All GEP indices should be of integer type");
     }
   }
   visitInstruction(GEP);
diff --git a/test/Assembler/getelementptr_vec_idx1.ll b/test/Assembler/getelementptr_vec_idx1.ll
index 084a31e7234..12160574d4e 100644
--- a/test/Assembler/getelementptr_vec_idx1.ll
+++ b/test/Assembler/getelementptr_vec_idx1.ll
@@ -1,8 +1,8 @@
 ; RUN: not llvm-as < %s >/dev/null 2> %t
 ; RUN: FileCheck %s < %t
-; Test that a vector index is only used with a vector pointer.
+; Test that a vector GEP may be used with a scalar base, the result is a vector of pointers
 
-; CHECK: getelementptr index type missmatch
+; CHECK: '%w' defined with type '<2 x i32*>
 
 define i32 @test(i32* %a) {
   %w = getelementptr i32, i32* %a, <2 x i32> <i32 5, i32 9>
diff --git a/test/Assembler/getelementptr_vec_idx2.ll b/test/Assembler/getelementptr_vec_idx2.ll
index 638fcb8b67d..be294098c9e 100644
--- a/test/Assembler/getelementptr_vec_idx2.ll
+++ b/test/Assembler/getelementptr_vec_idx2.ll
@@ -1,10 +1,24 @@
 ; RUN: not llvm-as < %s >/dev/null 2> %t
 ; RUN: FileCheck %s < %t
-; Test that a vector pointer is only used with a vector index.
+; Test that a vector pointer may be used with a scalar index.
+; Test that a vector pointer and vector index should have the same vector width
 
-; CHECK: getelementptr index type missmatch
-
-define <2 x i32> @test(<2 x i32*> %a) {
+; This code is correct
+define <2 x i32*> @test2(<2 x i32*> %a) {
   %w = getelementptr i32, <2 x i32*> %a, i32 2
+  ret <2 x i32*> %w
+}
+
+; This code is correct
+define <2 x i32*> @test3(i32* %a) {
+  %w = getelementptr i32, i32* %a, <2 x i32> <i32 2, i32 2>
+  ret <2 x i32*> %w
+}
+
+; CHECK: getelementptr vector index has a wrong number of elements
+
+define <2 x i32> @test1(<2 x i32*> %a) {
+  %w = getelementptr i32, <2 x i32*> %a, <4 x i32><i32 2, i32 2, i32 2, i32 2>
   ret <2 x i32> %w
 }
+
diff --git a/test/Assembler/getelementptr_vec_idx3.ll b/test/Assembler/getelementptr_vec_idx3.ll
index ac94459e23d..767c817cc62 100644
--- a/test/Assembler/getelementptr_vec_idx3.ll
+++ b/test/Assembler/getelementptr_vec_idx3.ll
@@ -1,8 +1,8 @@
 ; RUN: not llvm-as < %s >/dev/null 2> %t
 ; RUN: FileCheck %s < %t
-; Test that vector indices have the same number of elements as the pointer.
+; Test that a vector GEP may be used with a scalar base, the result is a vector of pointers
 
-; CHECK: getelementptr index type missmatch
+; CHECK: '%w' defined with type '<2 x <4 x i32>*>'
 
 define <4 x i32> @test(<4 x i32>* %a) {
   %w = getelementptr <4 x i32>, <4 x i32>* %a, <2 x i32> <i32 5, i32 9>
diff --git a/test/CodeGen/X86/vector-gep.ll b/test/CodeGen/X86/vector-gep.ll
index ce98e6759b6..47878360ca0 100644
--- a/test/CodeGen/X86/vector-gep.ll
+++ b/test/CodeGen/X86/vector-gep.ll
@@ -92,3 +92,25 @@ entry:
 ;CHECK: ret
 }
 
+;CHECK-LABEL: AGEP7:
+define <4 x i8*> @AGEP7(<4 x i8*> %param, i32 %off) nounwind {
+entry:
+;CHECK: vbroadcastss
+;CHECK: vpadd
+  %A = getelementptr i8, <4 x i8*> %param, i32 %off
+  ret <4 x i8*> %A
+;CHECK: ret
+}
+
+;CHECK-LABEL: AGEP8:
+define <4 x i16*> @AGEP8(i16* %param, <4 x i32> %off) nounwind {
+entry:
+; Multiply offset by two (add it to itself).
+;CHECK: vpadd
+; add the base to the offset
+;CHECK: vbroadcastss
+;CHECK-NEXT: vpadd
+  %A = getelementptr i16, i16* %param, <4 x i32> %off
+  ret <4 x i16*> %A
+;CHECK: ret
+}
-- 
2.34.1