1 ; RUN: opt < %s -scalarrepl -S | FileCheck %s
3 target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64"
4 target triple = "x86_64-apple-darwin10.0.0"
7 ; CHECK: %[[alloc:[\.a-z0-9]*]] = alloca <4 x float>
8 ; CHECK: store <4 x float> zeroinitializer, <4 x float>* %[[alloc]]
10 ; CHECK: extractelement <4 x float> zeroinitializer, i32 %idx2
12 ; Split the array but don't replace the memset with an insert
13 ; element as its not a constant offset.
14 ; The load, however, can be replaced with an extract element.
15 define float @test1(i32 %idx1, i32 %idx2) {
17 %0 = alloca [4 x <4 x float>]
18 store [4 x <4 x float>] zeroinitializer, [4 x <4 x float>]* %0
19 %ptr1 = getelementptr [4 x <4 x float>]* %0, i32 0, i32 0, i32 %idx1
20 %cast = bitcast float* %ptr1 to i8*
21 call void @llvm.memset.p0i8.i32(i8* %cast, i8 0, i32 4, i32 4, i1 false)
22 %ptr2 = getelementptr [4 x <4 x float>]* %0, i32 0, i32 1, i32 %idx2
23 %ret = load float* %ptr2
28 ; CHECK: %[[ins:[\.a-z0-9]*]] = insertelement <4 x float> zeroinitializer, float 1.000000e+00, i32 %idx1
29 ; CHECK: extractelement <4 x float> %[[ins]], i32 %idx2
31 ; Do SROA on the array when it has dynamic vector reads and writes.
32 define float @test2(i32 %idx1, i32 %idx2) {
34 %0 = alloca [4 x <4 x float>]
35 store [4 x <4 x float>] zeroinitializer, [4 x <4 x float>]* %0
36 %ptr1 = getelementptr [4 x <4 x float>]* %0, i32 0, i32 0, i32 %idx1
37 store float 1.0, float* %ptr1
38 %ptr2 = getelementptr [4 x <4 x float>]* %0, i32 0, i32 0, i32 %idx2
39 %ret = load float* %ptr2
44 ; CHECK: %0 = alloca [4 x <4 x float>]
47 ; Don't do SROA on a dynamically indexed vector when it spans
48 ; more than one array element of the alloca array it is within.
49 define float @test3(i32 %idx1, i32 %idx2) {
51 %0 = alloca [4 x <4 x float>]
52 store [4 x <4 x float>] zeroinitializer, [4 x <4 x float>]* %0
53 %bigvec = bitcast [4 x <4 x float>]* %0 to <16 x float>*
54 %ptr1 = getelementptr <16 x float>* %bigvec, i32 0, i32 %idx1
55 store float 1.0, float* %ptr1
56 %ptr2 = getelementptr <16 x float>* %bigvec, i32 0, i32 %idx2
57 %ret = load float* %ptr2
62 ; CHECK: insertelement <16 x float> zeroinitializer, float 1.000000e+00, i32 %idx1
63 ; CHECK: extractelement <16 x float> %0, i32 %idx2
65 ; Don't do SROA on a dynamically indexed vector when it spans
66 ; more than one array element of the alloca array it is within.
67 ; However, unlike test3, the store is on the vector type
68 ; so SROA will convert the large alloca into the large vector
69 ; type and do all accesses with insert/extract element
70 define float @test4(i32 %idx1, i32 %idx2) {
72 %0 = alloca [4 x <4 x float>]
73 %bigvec = bitcast [4 x <4 x float>]* %0 to <16 x float>*
74 store <16 x float> zeroinitializer, <16 x float>* %bigvec
75 %ptr1 = getelementptr <16 x float>* %bigvec, i32 0, i32 %idx1
76 store float 1.0, float* %ptr1
77 %ptr2 = getelementptr <16 x float>* %bigvec, i32 0, i32 %idx2
78 %ret = load float* %ptr2
83 ; CHECK: %0 = alloca [4 x <4 x float>]
86 ; Don't do SROA as the is a second dynamically indexed array
87 ; which may span multiple elements of the alloca.
88 define float @test5(i32 %idx1, i32 %idx2) {
90 %0 = alloca [4 x <4 x float>]
91 store [4 x <4 x float>] zeroinitializer, [4 x <4 x float>]* %0
92 %ptr1 = getelementptr [4 x <4 x float>]* %0, i32 0, i32 0, i32 %idx1
93 %ptr2 = bitcast float* %ptr1 to [1 x <2 x float>]*
94 %ptr3 = getelementptr [1 x <2 x float>]* %ptr2, i32 0, i32 0, i32 %idx1
95 store float 1.0, float* %ptr1
96 %ptr4 = getelementptr [4 x <4 x float>]* %0, i32 0, i32 0, i32 %idx2
97 %ret = load float* %ptr4
102 ; CHECK: insertelement <4 x float> zeroinitializer, float 1.000000e+00, i32 %idx1
103 ; CHECK: extractelement <4 x float> zeroinitializer, i32 %idx2
105 %vector.pair = type { %vector.anon, %vector.anon }
106 %vector.anon = type { %vector }
107 %vector = type { <4 x float> }
109 ; Dynamic GEPs on vectors were crashing when the vector was inside a struct
110 ; as the new GEP for the new alloca might not include all the indices from
111 ; the original GEP, just the indices it needs to get to the correct offset of
112 ; some type, not necessarily the dynamic vector.
113 ; This test makes sure we don't have this crash.
114 define float @test6(i32 %idx1, i32 %idx2) {
116 %0 = alloca %vector.pair
117 store %vector.pair zeroinitializer, %vector.pair* %0
118 %ptr1 = getelementptr %vector.pair* %0, i32 0, i32 0, i32 0, i32 0, i32 %idx1
119 store float 1.0, float* %ptr1
120 %ptr2 = getelementptr %vector.pair* %0, i32 0, i32 1, i32 0, i32 0, i32 %idx2
121 %ret = load float* %ptr2
126 ; CHECK: insertelement <4 x float> zeroinitializer, float 1.000000e+00, i32 %idx1
127 ; CHECK: extractelement <4 x float> zeroinitializer, i32 %idx2
129 %array.pair = type { [2 x %array.anon], %array.anon }
130 %array.anon = type { [2 x %vector] }
132 ; This is the same as test6 and tests the same crash, but on arrays.
133 define float @test7(i32 %idx1, i32 %idx2) {
135 %0 = alloca %array.pair
136 store %array.pair zeroinitializer, %array.pair* %0
137 %ptr1 = getelementptr %array.pair* %0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 %idx1
138 store float 1.0, float* %ptr1
139 %ptr2 = getelementptr %array.pair* %0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 %idx2
140 %ret = load float* %ptr2
145 ; CHECK: %[[offset1:[\.a-z0-9]*]] = add i32 %idx1, 1
146 ; CHECK: %[[ins:[\.a-z0-9]*]] = insertelement <4 x float> zeroinitializer, float 1.000000e+00, i32 %[[offset1]]
147 ; CHECK: %[[offset2:[\.a-z0-9]*]] = add i32 %idx2, 2
148 ; CHECK: extractelement <4 x float> %[[ins]], i32 %[[offset2]]
150 ; Do SROA on the vector when it has dynamic vector reads and writes
151 ; from a non-zero offset.
152 define float @test8(i32 %idx1, i32 %idx2) {
154 %0 = alloca <4 x float>
155 store <4 x float> zeroinitializer, <4 x float>* %0
156 %ptr1 = getelementptr <4 x float>* %0, i32 0, i32 1
157 %ptr2 = bitcast float* %ptr1 to <3 x float>*
158 %ptr3 = getelementptr <3 x float>* %ptr2, i32 0, i32 %idx1
159 store float 1.0, float* %ptr3
160 %ptr4 = getelementptr <4 x float>* %0, i32 0, i32 2
161 %ptr5 = bitcast float* %ptr4 to <2 x float>*
162 %ptr6 = getelementptr <2 x float>* %ptr5, i32 0, i32 %idx2
163 %ret = load float* %ptr6
167 declare void @llvm.memset.p0i8.i32(i8*, i8, i32, i32, i1)