test/Transforms/ScalarRepl/dynamic-vector-gep.ll

   1 ; RUN: opt < %s -scalarrepl -S | FileCheck %s
   2
   3 target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64"
   4 target triple = "x86_64-apple-darwin10.0.0"
   5
   6 ; CHECK: @test1
   7 ; CHECK: %[[alloc:[\.a-z0-9]*]] = alloca <4 x float>
   8 ; CHECK: store <4 x float> zeroinitializer, <4 x float>* %[[alloc]]
   9 ; CHECK: memset
  10 ; CHECK: extractelement <4 x float> zeroinitializer, i32 %idx2
  11
  12 ; Split the array but don't replace the memset with an insert
  13 ; element as its not a constant offset.
  14 ; The load, however, can be replaced with an extract element.
  15 define float @test1(i32 %idx1, i32 %idx2) {
  16 entry:
  17   %0 = alloca [4 x <4 x float>]
  18   store [4 x <4 x float>] zeroinitializer, [4 x <4 x float>]* %0
  19   %ptr1 = getelementptr [4 x <4 x float>]* %0, i32 0, i32 0, i32 %idx1
  20   %cast = bitcast float* %ptr1 to i8*
  21   call void @llvm.memset.p0i8.i32(i8* %cast, i8 0, i32 4, i32 4, i1 false)
  22   %ptr2 = getelementptr [4 x <4 x float>]* %0, i32 0, i32 1, i32 %idx2
  23   %ret = load float* %ptr2
  24   ret float %ret
  25 }
  26
  27 ; CHECK: @test2
  28 ; CHECK: %[[ins:[\.a-z0-9]*]] = insertelement <4 x float> zeroinitializer, float 1.000000e+00, i32 %idx1
  29 ; CHECK: extractelement <4 x float> %[[ins]], i32 %idx2
  30
  31 ; Do SROA on the array when it has dynamic vector reads and writes.
  32 define float @test2(i32 %idx1, i32 %idx2) {
  33 entry:
  34   %0 = alloca [4 x <4 x float>]
  35   store [4 x <4 x float>] zeroinitializer, [4 x <4 x float>]* %0
  36   %ptr1 = getelementptr [4 x <4 x float>]* %0, i32 0, i32 0, i32 %idx1
  37   store float 1.0, float* %ptr1
  38   %ptr2 = getelementptr [4 x <4 x float>]* %0, i32 0, i32 0, i32 %idx2
  39   %ret = load float* %ptr2
  40   ret float %ret
  41 }
  42
  43 ; CHECK: test3
  44 ; CHECK: %0 = alloca [4 x <4 x float>]
  45 ; CHECK-NOT: alloca
  46
  47 ; Don't do SROA on a dynamically indexed vector when it spans
  48 ; more than one array element of the alloca array it is within.
  49 define float @test3(i32 %idx1, i32 %idx2) {
  50 entry:
  51   %0 = alloca [4 x <4 x float>]
  52   store [4 x <4 x float>] zeroinitializer, [4 x <4 x float>]* %0
  53   %bigvec = bitcast [4 x <4 x float>]* %0 to <16 x float>*
  54   %ptr1 = getelementptr <16 x float>* %bigvec, i32 0, i32 %idx1
  55   store float 1.0, float* %ptr1
  56   %ptr2 = getelementptr <16 x float>* %bigvec, i32 0, i32 %idx2
  57   %ret = load float* %ptr2
  58   ret float %ret
  59 }
  60
  61 ; CHECK: test4
  62 ; CHECK: insertelement <16 x float> zeroinitializer, float 1.000000e+00, i32 %idx1
  63 ; CHECK: extractelement <16 x float> %0, i32 %idx2
  64
  65 ; Don't do SROA on a dynamically indexed vector when it spans
  66 ; more than one array element of the alloca array it is within.
  67 ; However, unlike test3, the store is on the vector type
  68 ; so SROA will convert the large alloca into the large vector
  69 ; type and do all accesses with insert/extract element
  70 define float @test4(i32 %idx1, i32 %idx2) {
  71 entry:
  72   %0 = alloca [4 x <4 x float>]
  73   %bigvec = bitcast [4 x <4 x float>]* %0 to <16 x float>*
  74   store <16 x float> zeroinitializer, <16 x float>* %bigvec
  75   %ptr1 = getelementptr <16 x float>* %bigvec, i32 0, i32 %idx1
  76   store float 1.0, float* %ptr1
  77   %ptr2 = getelementptr <16 x float>* %bigvec, i32 0, i32 %idx2
  78   %ret = load float* %ptr2
  79   ret float %ret
  80 }
  81
  82 ; CHECK: @test5
  83 ; CHECK: %0 = alloca [4 x <4 x float>]
  84 ; CHECK-NOT: alloca
  85
  86 ; Don't do SROA as the is a second dynamically indexed array
  87 ; which may span multiple elements of the alloca.
  88 define float @test5(i32 %idx1, i32 %idx2) {
  89 entry:
  90   %0 = alloca [4 x <4 x float>]
  91   store [4 x <4 x float>] zeroinitializer, [4 x <4 x float>]* %0
  92   %ptr1 = getelementptr [4 x <4 x float>]* %0, i32 0, i32 0, i32 %idx1
  93   %ptr2 = bitcast float* %ptr1 to [1 x <2 x float>]*
  94   %ptr3 = getelementptr [1 x <2 x float>]* %ptr2, i32 0, i32 0, i32 %idx1
  95   store float 1.0, float* %ptr1
  96   %ptr4 = getelementptr [4 x <4 x float>]* %0, i32 0, i32 0, i32 %idx2
  97   %ret = load float* %ptr4
  98   ret float %ret
  99 }
 100
 101 ; CHECK: test6
 102 ; CHECK: insertelement <4 x float> zeroinitializer, float 1.000000e+00, i32 %idx1
 103 ; CHECK: extractelement <4 x float> zeroinitializer, i32 %idx2
 104
 105 %vector.pair = type { %vector.anon, %vector.anon }
 106 %vector.anon = type { %vector }
 107 %vector = type { <4 x float> }
 108
 109 ; Dynamic GEPs on vectors were crashing when the vector was inside a struct
 110 ; as the new GEP for the new alloca might not include all the indices from
 111 ; the original GEP, just the indices it needs to get to the correct offset of
 112 ; some type, not necessarily the dynamic vector.
 113 ; This test makes sure we don't have this crash.
 114 define float @test6(i32 %idx1, i32 %idx2) {
 115 entry:
 116   %0 = alloca %vector.pair
 117   store %vector.pair zeroinitializer, %vector.pair* %0
 118   %ptr1 = getelementptr %vector.pair* %0, i32 0, i32 0, i32 0, i32 0, i32 %idx1
 119   store float 1.0, float* %ptr1
 120   %ptr2 = getelementptr %vector.pair* %0, i32 0, i32 1, i32 0, i32 0, i32 %idx2
 121   %ret = load float* %ptr2
 122   ret float %ret
 123 }
 124
 125 ; CHECK: test7
 126 ; CHECK: insertelement <4 x float> zeroinitializer, float 1.000000e+00, i32 %idx1
 127 ; CHECK: extractelement <4 x float> zeroinitializer, i32 %idx2
 128
 129 %array.pair = type { [2 x %array.anon], %array.anon }
 130 %array.anon = type { [2 x %vector] }
 131
 132 ; This is the same as test6 and tests the same crash, but on arrays.
 133 define float @test7(i32 %idx1, i32 %idx2) {
 134 entry:
 135   %0 = alloca %array.pair
 136   store %array.pair zeroinitializer, %array.pair* %0
 137   %ptr1 = getelementptr %array.pair* %0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 %idx1
 138   store float 1.0, float* %ptr1
 139   %ptr2 = getelementptr %array.pair* %0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 %idx2
 140   %ret = load float* %ptr2
 141   ret float %ret
 142 }
 143
 144 ; CHECK: test8
 145 ; CHECK: %[[offset1:[\.a-z0-9]*]] = add i32 %idx1, 1
 146 ; CHECK: %[[ins:[\.a-z0-9]*]] = insertelement <4 x float> zeroinitializer, float 1.000000e+00, i32 %[[offset1]]
 147 ; CHECK: %[[offset2:[\.a-z0-9]*]] = add i32 %idx2, 2
 148 ; CHECK: extractelement <4 x float> %[[ins]], i32 %[[offset2]]
 149
 150 ; Do SROA on the vector when it has dynamic vector reads and writes
 151 ; from a non-zero offset.
 152 define float @test8(i32 %idx1, i32 %idx2) {
 153 entry:
 154   %0 = alloca <4 x float>
 155   store <4 x float> zeroinitializer, <4 x float>* %0
 156   %ptr1 = getelementptr <4 x float>* %0, i32 0, i32 1
 157   %ptr2 = bitcast float* %ptr1 to <3 x float>*
 158   %ptr3 = getelementptr <3 x float>* %ptr2, i32 0, i32 %idx1
 159   store float 1.0, float* %ptr3
 160   %ptr4 = getelementptr <4 x float>* %0, i32 0, i32 2
 161   %ptr5 = bitcast float* %ptr4 to <2 x float>*
 162   %ptr6 = getelementptr <2 x float>* %ptr5, i32 0, i32 %idx2
 163   %ret = load float* %ptr6
 164   ret float %ret
 165 }
 166
 167 declare void @llvm.memset.p0i8.i32(i8*, i8, i32, i32, i1)