1 ; RUN: opt < %s -basicaa -loop-interchange -S | FileCheck %s
2 ;; We test profitability model in these test cases.
4 target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
5 target triple = "x86_64-unknown-linux-gnu"
7 @A = common global [100 x [100 x i32]] zeroinitializer
8 @B = common global [100 x [100 x i32]] zeroinitializer
10 ;;---------------------------------------Test case 01---------------------------------
11 ;; Loops interchange will result in code vectorization and hence profitable. Check for interchange.
12 ;; for(int i=1;i<N;i++)
13 ;; for(int j=1;j<N;j++)
14 ;; A[j][i] = A[j - 1][i] + B[j][i];
16 define void @interchange_01(i32 %N) {
18 %cmp27 = icmp sgt i32 %N, 1
19 br i1 %cmp27, label %for.cond1.preheader.lr.ph, label %for.end16
21 for.cond1.preheader.lr.ph:
23 br label %for.body3.preheader
26 %indvars.iv30 = phi i64 [ 1, %for.cond1.preheader.lr.ph ], [ %indvars.iv.next31, %for.inc14 ]
30 %indvars.iv = phi i64 [ %indvars.iv.next, %for.body3 ], [ 1, %for.body3.preheader ]
31 %1 = add nsw i64 %indvars.iv, -1
32 %arrayidx5 = getelementptr inbounds [100 x [100 x i32]], [100 x [100 x i32]]* @A, i64 0, i64 %1, i64 %indvars.iv30
33 %2 = load i32, i32* %arrayidx5
34 %arrayidx9 = getelementptr inbounds [100 x [100 x i32]], [100 x [100 x i32]]* @B, i64 0, i64 %indvars.iv, i64 %indvars.iv30
35 %3 = load i32, i32* %arrayidx9
36 %add = add nsw i32 %3, %2
37 %arrayidx13 = getelementptr inbounds [100 x [100 x i32]], [100 x [100 x i32]]* @A, i64 0, i64 %indvars.iv, i64 %indvars.iv30
38 store i32 %add, i32* %arrayidx13
39 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
40 %lftr.wideiv = trunc i64 %indvars.iv to i32
41 %exitcond = icmp eq i32 %lftr.wideiv, %0
42 br i1 %exitcond, label %for.inc14, label %for.body3
45 %indvars.iv.next31 = add nuw nsw i64 %indvars.iv30, 1
46 %lftr.wideiv32 = trunc i64 %indvars.iv30 to i32
47 %exitcond33 = icmp eq i32 %lftr.wideiv32, %0
48 br i1 %exitcond33, label %for.end16, label %for.body3.preheader
53 ;; Here we are checking partial .ll to check if loop are interchanged.
54 ; CHECK-LABEL: @interchange_01
55 ; CHECK: for.body3.preheader: ; preds = %for.inc14, %for.cond1.preheader.lr.ph
56 ; CHECK: %indvars.iv30 = phi i64 [ 1, %for.cond1.preheader.lr.ph ], [ %indvars.iv.next31, %for.inc14 ]
57 ; CHECK: br label %for.body3.split2
59 ; CHECK: for.body3.preheader1: ; preds = %entry
60 ; CHECK: br label %for.body3
62 ; CHECK: for.body3: ; preds = %for.body3.preheader1, %for.body3.split
63 ; CHECK: %indvars.iv = phi i64 [ %indvars.iv.next, %for.body3.split ], [ 1, %for.body3.preheader1 ]
64 ; CHECK: br label %for.cond1.preheader.lr.ph
66 ; CHECK: for.body3.split2: ; preds = %for.body3.preheader
67 ; CHECK: %1 = add nsw i64 %indvars.iv, -1
68 ; CHECK: %arrayidx5 = getelementptr inbounds [100 x [100 x i32]], [100 x [100 x i32]]* @A, i64 0, i64 %1, i64 %indvars.iv30
69 ; CHECK: %2 = load i32, i32* %arrayidx5
70 ; CHECK: %arrayidx9 = getelementptr inbounds [100 x [100 x i32]], [100 x [100 x i32]]* @B, i64 0, i64 %indvars.iv, i64 %indvars.iv30
71 ; CHECK: %3 = load i32, i32* %arrayidx9
72 ; CHECK: %add = add nsw i32 %3, %2
73 ; CHECK: %arrayidx13 = getelementptr inbounds [100 x [100 x i32]], [100 x [100 x i32]]* @A, i64 0, i64 %indvars.iv, i64 %indvars.iv30
74 ; CHECK: store i32 %add, i32* %arrayidx13
75 ; CHECK: br label %for.inc14
78 ;; ---------------------------------------Test case 02---------------------------------
79 ;; Check loop interchange profitability model.
80 ;; This tests profitability model when operands of getelementpointer and not exactly the induction variable but some
81 ;; arithmetic operation on them.
82 ;; for(int i=1;i<N;i++)
83 ;; for(int j=1;j<N;j++)
84 ;; A[j-1][i-1] = A[j - 1][i-1] + B[j-1][i-1];
86 define void @interchange_02(i32 %N) {
88 %cmp32 = icmp sgt i32 %N, 1
89 br i1 %cmp32, label %for.cond1.preheader.lr.ph, label %for.end21
91 for.cond1.preheader.lr.ph:
93 br label %for.body3.lr.ph
96 %indvars.iv35 = phi i64 [ 1, %for.cond1.preheader.lr.ph ], [ %indvars.iv.next36, %for.inc19 ]
97 %1 = add nsw i64 %indvars.iv35, -1
101 %indvars.iv = phi i64 [ 1, %for.body3.lr.ph ], [ %indvars.iv.next, %for.body3 ]
102 %2 = add nsw i64 %indvars.iv, -1
103 %arrayidx6 = getelementptr inbounds [100 x [100 x i32]], [100 x [100 x i32]]* @A, i64 0, i64 %2, i64 %1
104 %3 = load i32, i32* %arrayidx6
105 %arrayidx12 = getelementptr inbounds [100 x [100 x i32]], [100 x [100 x i32]]* @B, i64 0, i64 %2, i64 %1
106 %4 = load i32, i32* %arrayidx12
107 %add = add nsw i32 %4, %3
108 store i32 %add, i32* %arrayidx6
109 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
110 %lftr.wideiv = trunc i64 %indvars.iv to i32
111 %exitcond = icmp eq i32 %lftr.wideiv, %0
112 br i1 %exitcond, label %for.inc19, label %for.body3
115 %indvars.iv.next36 = add nuw nsw i64 %indvars.iv35, 1
116 %lftr.wideiv38 = trunc i64 %indvars.iv35 to i32
117 %exitcond39 = icmp eq i32 %lftr.wideiv38, %0
118 br i1 %exitcond39, label %for.end21, label %for.body3.lr.ph
123 ; CHECK-LABEL: @interchange_02
124 ; CHECK: for.body3.lr.ph: ; preds = %for.inc19, %for.cond1.preheader.lr.ph
125 ; CHECK: %indvars.iv35 = phi i64 [ 1, %for.cond1.preheader.lr.ph ], [ %indvars.iv.next36, %for.inc19 ]
126 ; CHECK: %0 = add nsw i64 %indvars.iv35, -1
127 ; CHECK: br label %for.body3.split1
129 ; CHECK: for.body3.preheader: ; preds = %entry
130 ; CHECK: %1 = add i32 %N, -1
131 ; CHECK: br label %for.body3
133 ; CHECK: for.body3: ; preds = %for.body3.preheader, %for.body3.split
134 ; CHECK: %indvars.iv = phi i64 [ %indvars.iv.next, %for.body3.split ], [ 1, %for.body3.preheader ]
135 ; CHECK: br label %for.cond1.preheader.lr.ph
137 ; CHECK: for.body3.split1: ; preds = %for.body3.lr.ph
138 ; CHECK: %2 = add nsw i64 %indvars.iv, -1
139 ; CHECK: %arrayidx6 = getelementptr inbounds [100 x [100 x i32]], [100 x [100 x i32]]* @A, i64 0, i64 %2, i64 %0
140 ; CHECK: %3 = load i32, i32* %arrayidx6
141 ; CHECK: %arrayidx12 = getelementptr inbounds [100 x [100 x i32]], [100 x [100 x i32]]* @B, i64 0, i64 %2, i64 %0
142 ; CHECK: %4 = load i32, i32* %arrayidx12
143 ; CHECK: %add = add nsw i32 %4, %3
144 ; CHECK: store i32 %add, i32* %arrayidx6
145 ; CHECK: br label %for.inc19
148 ;;---------------------------------------Test case 03---------------------------------
149 ;; Loops interchange is not profitable.
150 ;; for(int i=1;i<N;i++)
151 ;; for(int j=1;j<N;j++)
152 ;; A[i-1][j-1] = A[i - 1][j-1] + B[i][j];
154 define void @interchange_03(i32 %N){
156 %cmp31 = icmp sgt i32 %N, 1
157 br i1 %cmp31, label %for.cond1.preheader.lr.ph, label %for.end19
159 for.cond1.preheader.lr.ph:
161 br label %for.body3.lr.ph
164 %indvars.iv34 = phi i64 [ 1, %for.cond1.preheader.lr.ph ], [ %indvars.iv.next35, %for.inc17 ]
165 %1 = add nsw i64 %indvars.iv34, -1
169 %indvars.iv = phi i64 [ 1, %for.body3.lr.ph ], [ %indvars.iv.next, %for.body3 ]
170 %2 = add nsw i64 %indvars.iv, -1
171 %arrayidx6 = getelementptr inbounds [100 x [100 x i32]], [100 x [100 x i32]]* @A, i64 0, i64 %1, i64 %2
172 %3 = load i32, i32* %arrayidx6
173 %arrayidx10 = getelementptr inbounds [100 x [100 x i32]], [100 x [100 x i32]]* @B, i64 0, i64 %indvars.iv34, i64 %indvars.iv
174 %4 = load i32, i32* %arrayidx10
175 %add = add nsw i32 %4, %3
176 store i32 %add, i32* %arrayidx6
177 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
178 %lftr.wideiv = trunc i64 %indvars.iv to i32
179 %exitcond = icmp eq i32 %lftr.wideiv, %0
180 br i1 %exitcond, label %for.inc17, label %for.body3
183 %indvars.iv.next35 = add nuw nsw i64 %indvars.iv34, 1
184 %lftr.wideiv37 = trunc i64 %indvars.iv34 to i32
185 %exitcond38 = icmp eq i32 %lftr.wideiv37, %0
186 br i1 %exitcond38, label %for.end19, label %for.body3.lr.ph
192 ; CHECK-LABEL: @interchange_03
193 ; CHECK: for.body3.lr.ph:
194 ; CHECK: %indvars.iv34 = phi i64 [ 1, %for.cond1.preheader.lr.ph ], [ %indvars.iv.next35, %for.inc17 ]
195 ; CHECK: %1 = add nsw i64 %indvars.iv34, -1
196 ; CHECK: br label %for.body3.preheader
197 ; CHECK: for.body3.preheader:
198 ; CHECK: br label %for.body3
200 ; CHECK: %indvars.iv = phi i64 [ %indvars.iv.next, %for.body3 ], [ 1, %for.body3.preheader ]
201 ; CHECK: %2 = add nsw i64 %indvars.iv, -1
202 ; CHECK: %arrayidx6 = getelementptr inbounds [100 x [100 x i32]], [100 x [100 x i32]]* @A, i64 0, i64 %1, i64 %2
203 ; CHECK: %3 = load i32, i32* %arrayidx6
204 ; CHECK: %arrayidx10 = getelementptr inbounds [100 x [100 x i32]], [100 x [100 x i32]]* @B, i64 0, i64 %indvars.iv34, i64 %indvars.iv
205 ; CHECK: %4 = load i32, i32* %arrayidx10