test/CodeGen/ARM/lsr-on-unrolled-loops.ll

   1 ; RUN: llc -mtriple=thumbv7-apple-darwin10 -mcpu=cortex-a8 < %s | FileCheck %s
   2
   3 ; LSR should recognize that this is an unrolled loop which can use
   4 ; constant offset addressing, so that each of the following stores
   5 ; uses the same register.
   6
   7 ; CHECK: vstr.32 s{{.*}}, [{{(r[0-9]+)|(lr)}}, #-128]
   8 ; CHECK: vstr.32 s{{.*}}, [{{(r[0-9]+)|(lr)}}, #-96]
   9 ; CHECK: vstr.32 s{{.*}}, [{{(r[0-9]+)|(lr)}}, #-64]
  10 ; CHECK: vstr.32 s{{.*}}, [{{(r[0-9]+)|(lr)}}, #-32]
  11 ; CHECK: vstr.32 s{{.*}}, [{{(r[0-9]+)|(lr)}}]
  12 ; CHECK: vstr.32 s{{.*}}, [{{(r[0-9]+)|(lr)}}, #32]
  13 ; CHECK: vstr.32 s{{.*}}, [{{(r[0-9]+)|(lr)}}, #64]
  14 ; CHECK: vstr.32 s{{.*}}, [{{(r[0-9]+)|(lr)}}, #96]
  15
  16 target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:32-f32:32:32-f64:32:32-v64:64:64-v128:128:128-a0:0:32-n32"
  17
  18 %0 = type { %1*, %3*, %6*, i8*, i32, i32, %8*, i32, i32, i32, i32, i32, i32, i32, double, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i8**, i32, i32, i32, i32, i32, [64 x i32]*, [4 x %9*], [4 x %10*], [4 x %10*], i32, %11*, i32, i32, [16 x i8], [16 x i8], [16 x i8], i32, i32, i8, i8, i8, i16, i16, i32, i8, i32, %12*, i32, i32, i32, i32, i8*, i32, [4 x %11*], i32, i32, i32, [10 x i32], i32, i32, i32, i32, i32, %13*, %14*, %15*, %16*, %17*, %18*, %19*, %20*, %21*, %22*, %23* }
  19 %1 = type { void (%2*)*, void (%2*, i32)*, void (%2*)*, void (%2*, i8*)*, void (%2*)*, i32, %7, i32, i32, i8**, i32, i8**, i32, i32 }
  20 %2 = type { %1*, %3*, %6*, i8*, i32, i32 }
  21 %3 = type { i8* (%2*, i32, i32)*, i8* (%2*, i32, i32)*, i8** (%2*, i32, i32, i32)*, [64 x i16]** (%2*, i32, i32, i32)*, %4* (%2*, i32, i32, i32, i32, i32)*, %5* (%2*, i32, i32, i32, i32, i32)*, void (%2*)*, i8** (%2*, %4*, i32, i32, i32)*, [64 x i16]** (%2*, %5*, i32, i32, i32)*, void (%2*, i32)*, void (%2*)*, i32, i32 }
  22 %4 = type opaque
  23 %5 = type opaque
  24 %6 = type { void (%2*)*, i32, i32, i32, i32 }
  25 %7 = type { [8 x i32], [12 x i32] }
  26 %8 = type { i8*, i32, void (%0*)*, i32 (%0*)*, void (%0*, i32)*, i32 (%0*, i32)*, void (%0*)* }
  27 %9 = type { [64 x i16], i32 }
  28 %10 = type { [17 x i8], [256 x i8], i32 }
  29 %11 = type { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, %9*, i8* }
  30 %12 = type { %12*, i8, i32, i32, i8* }
  31 %13 = type { void (%0*)*, void (%0*)*, i32 }
  32 %14 = type { void (%0*, i32)*, void (%0*, i8**, i32*, i32)* }
  33 %15 = type { void (%0*)*, i32 (%0*)*, void (%0*)*, i32 (%0*, i8***)*, %5** }
  34 %16 = type { void (%0*, i32)*, void (%0*, i8***, i32*, i32, i8**, i32*, i32)* }
  35 %17 = type { i32 (%0*)*, void (%0*)*, void (%0*)*, void (%0*)*, i32, i32 }
  36 %18 = type { void (%0*)*, i32 (%0*)*, i32 (%0*)*, i32, i32, i32, i32 }
  37 %19 = type { void (%0*)*, i32 (%0*, [64 x i16]**)*, i32 }
  38 %20 = type { void (%0*)*, [10 x void (%0*, %11*, i16*, i8**, i32)*] }
  39 %21 = type { void (%0*)*, void (%0*, i8***, i32*, i32, i8**, i32*, i32)*, i32 }
  40 %22 = type { void (%0*)*, void (%0*, i8***, i32, i8**, i32)* }
  41 %23 = type { void (%0*, i32)*, void (%0*, i8**, i8**, i32)*, void (%0*)*, void (%0*)* }
  42
  43 define void @test(%0* nocapture %a0, %11* nocapture %a1, i16* nocapture %a2, i8** nocapture %a3, i32 %a4) nounwind {
  44 bb:
  45   %t = alloca [64 x float], align 4
  46   %t5 = getelementptr inbounds %0* %a0, i32 0, i32 65
  47   %t6 = load i8** %t5, align 4
  48   %t7 = getelementptr inbounds %11* %a1, i32 0, i32 20
  49   %t8 = load i8** %t7, align 4
  50   br label %bb9
  51
  52 bb9:
  53   %t10 = phi i32 [ 0, %bb ], [ %t157, %bb156 ]
  54   %t11 = add i32 %t10, 8
  55   %t12 = getelementptr [64 x float]* %t, i32 0, i32 %t11
  56   %t13 = add i32 %t10, 16
  57   %t14 = getelementptr [64 x float]* %t, i32 0, i32 %t13
  58   %t15 = add i32 %t10, 24
  59   %t16 = getelementptr [64 x float]* %t, i32 0, i32 %t15
  60   %t17 = add i32 %t10, 32
  61   %t18 = getelementptr [64 x float]* %t, i32 0, i32 %t17
  62   %t19 = add i32 %t10, 40
  63   %t20 = getelementptr [64 x float]* %t, i32 0, i32 %t19
  64   %t21 = add i32 %t10, 48
  65   %t22 = getelementptr [64 x float]* %t, i32 0, i32 %t21
  66   %t23 = add i32 %t10, 56
  67   %t24 = getelementptr [64 x float]* %t, i32 0, i32 %t23
  68   %t25 = getelementptr [64 x float]* %t, i32 0, i32 %t10
  69   %t26 = shl i32 %t10, 5
  70   %t27 = or i32 %t26, 8
  71   %t28 = getelementptr i8* %t8, i32 %t27
  72   %t29 = bitcast i8* %t28 to float*
  73   %t30 = or i32 %t26, 16
  74   %t31 = getelementptr i8* %t8, i32 %t30
  75   %t32 = bitcast i8* %t31 to float*
  76   %t33 = or i32 %t26, 24
  77   %t34 = getelementptr i8* %t8, i32 %t33
  78   %t35 = bitcast i8* %t34 to float*
  79   %t36 = or i32 %t26, 4
  80   %t37 = getelementptr i8* %t8, i32 %t36
  81   %t38 = bitcast i8* %t37 to float*
  82   %t39 = or i32 %t26, 12
  83   %t40 = getelementptr i8* %t8, i32 %t39
  84   %t41 = bitcast i8* %t40 to float*
  85   %t42 = or i32 %t26, 20
  86   %t43 = getelementptr i8* %t8, i32 %t42
  87   %t44 = bitcast i8* %t43 to float*
  88   %t45 = or i32 %t26, 28
  89   %t46 = getelementptr i8* %t8, i32 %t45
  90   %t47 = bitcast i8* %t46 to float*
  91   %t48 = getelementptr i8* %t8, i32 %t26
  92   %t49 = bitcast i8* %t48 to float*
  93   %t50 = shl i32 %t10, 3
  94   %t51 = or i32 %t50, 1
  95   %t52 = getelementptr i16* %a2, i32 %t51
  96   %t53 = or i32 %t50, 2
  97   %t54 = getelementptr i16* %a2, i32 %t53
  98   %t55 = or i32 %t50, 3
  99   %t56 = getelementptr i16* %a2, i32 %t55
 100   %t57 = or i32 %t50, 4
 101   %t58 = getelementptr i16* %a2, i32 %t57
 102   %t59 = or i32 %t50, 5
 103   %t60 = getelementptr i16* %a2, i32 %t59
 104   %t61 = or i32 %t50, 6
 105   %t62 = getelementptr i16* %a2, i32 %t61
 106   %t63 = or i32 %t50, 7
 107   %t64 = getelementptr i16* %a2, i32 %t63
 108   %t65 = getelementptr i16* %a2, i32 %t50
 109   %t66 = load i16* %t52, align 2
 110   %t67 = icmp eq i16 %t66, 0
 111   %t68 = load i16* %t54, align 2
 112   %t69 = icmp eq i16 %t68, 0
 113   %t70 = and i1 %t67, %t69
 114   br i1 %t70, label %bb71, label %bb91
 115
 116 bb71:
 117   %t72 = load i16* %t56, align 2
 118   %t73 = icmp eq i16 %t72, 0
 119   br i1 %t73, label %bb74, label %bb91
 120
 121 bb74:
 122   %t75 = load i16* %t58, align 2
 123   %t76 = icmp eq i16 %t75, 0
 124   br i1 %t76, label %bb77, label %bb91
 125
 126 bb77:
 127   %t78 = load i16* %t60, align 2
 128   %t79 = icmp eq i16 %t78, 0
 129   br i1 %t79, label %bb80, label %bb91
 130
 131 bb80:
 132   %t81 = load i16* %t62, align 2
 133   %t82 = icmp eq i16 %t81, 0
 134   br i1 %t82, label %bb83, label %bb91
 135
 136 bb83:
 137   %t84 = load i16* %t64, align 2
 138   %t85 = icmp eq i16 %t84, 0
 139   br i1 %t85, label %bb86, label %bb91
 140
 141 bb86:
 142   %t87 = load i16* %t65, align 2
 143   %t88 = sitofp i16 %t87 to float
 144   %t89 = load float* %t49, align 4
 145   %t90 = fmul float %t88, %t89
 146   store float %t90, float* %t25, align 4
 147   store float %t90, float* %t12, align 4
 148   store float %t90, float* %t14, align 4
 149   store float %t90, float* %t16, align 4
 150   store float %t90, float* %t18, align 4
 151   store float %t90, float* %t20, align 4
 152   store float %t90, float* %t22, align 4
 153   store float %t90, float* %t24, align 4
 154   br label %bb156
 155
 156 bb91:
 157   %t92 = load i16* %t65, align 2
 158   %t93 = sitofp i16 %t92 to float
 159   %t94 = load float* %t49, align 4
 160   %t95 = fmul float %t93, %t94
 161   %t96 = sitofp i16 %t68 to float
 162   %t97 = load float* %t29, align 4
 163   %t98 = fmul float %t96, %t97
 164   %t99 = load i16* %t58, align 2
 165   %t100 = sitofp i16 %t99 to float
 166   %t101 = load float* %t32, align 4
 167   %t102 = fmul float %t100, %t101
 168   %t103 = load i16* %t62, align 2
 169   %t104 = sitofp i16 %t103 to float
 170   %t105 = load float* %t35, align 4
 171   %t106 = fmul float %t104, %t105
 172   %t107 = fadd float %t95, %t102
 173   %t108 = fsub float %t95, %t102
 174   %t109 = fadd float %t98, %t106
 175   %t110 = fsub float %t98, %t106
 176   %t111 = fmul float %t110, 0x3FF6A09E60000000
 177   %t112 = fsub float %t111, %t109
 178   %t113 = fadd float %t107, %t109
 179   %t114 = fsub float %t107, %t109
 180   %t115 = fadd float %t108, %t112
 181   %t116 = fsub float %t108, %t112
 182   %t117 = sitofp i16 %t66 to float
 183   %t118 = load float* %t38, align 4
 184   %t119 = fmul float %t117, %t118
 185   %t120 = load i16* %t56, align 2
 186   %t121 = sitofp i16 %t120 to float
 187   %t122 = load float* %t41, align 4
 188   %t123 = fmul float %t121, %t122
 189   %t124 = load i16* %t60, align 2
 190   %t125 = sitofp i16 %t124 to float
 191   %t126 = load float* %t44, align 4
 192   %t127 = fmul float %t125, %t126
 193   %t128 = load i16* %t64, align 2
 194   %t129 = sitofp i16 %t128 to float
 195   %t130 = load float* %t47, align 4
 196   %t131 = fmul float %t129, %t130
 197   %t132 = fadd float %t127, %t123
 198   %t133 = fsub float %t127, %t123
 199   %t134 = fadd float %t119, %t131
 200   %t135 = fsub float %t119, %t131
 201   %t136 = fadd float %t134, %t132
 202   %t137 = fsub float %t134, %t132
 203   %t138 = fmul float %t137, 0x3FF6A09E60000000
 204   %t139 = fadd float %t133, %t135
 205   %t140 = fmul float %t139, 0x3FFD906BC0000000
 206   %t141 = fmul float %t135, 0x3FF1517A80000000
 207   %t142 = fsub float %t141, %t140
 208   %t143 = fmul float %t133, 0xC004E7AEA0000000
 209   %t144 = fadd float %t143, %t140
 210   %t145 = fsub float %t144, %t136
 211   %t146 = fsub float %t138, %t145
 212   %t147 = fadd float %t142, %t146
 213   %t148 = fadd float %t113, %t136
 214   store float %t148, float* %t25, align 4
 215   %t149 = fsub float %t113, %t136
 216   store float %t149, float* %t24, align 4
 217   %t150 = fadd float %t115, %t145
 218   store float %t150, float* %t12, align 4
 219   %t151 = fsub float %t115, %t145
 220   store float %t151, float* %t22, align 4
 221   %t152 = fadd float %t116, %t146
 222   store float %t152, float* %t14, align 4
 223   %t153 = fsub float %t116, %t146
 224   store float %t153, float* %t20, align 4
 225   %t154 = fadd float %t114, %t147
 226   store float %t154, float* %t18, align 4
 227   %t155 = fsub float %t114, %t147
 228   store float %t155, float* %t16, align 4
 229   br label %bb156
 230
 231 bb156:
 232   %t157 = add i32 %t10, 1
 233   %t158 = icmp eq i32 %t157, 8
 234   br i1 %t158, label %bb159, label %bb9
 235
 236 bb159:
 237   %t160 = add i32 %a4, 7
 238   %t161 = add i32 %a4, 1
 239   %t162 = add i32 %a4, 6
 240   %t163 = add i32 %a4, 2
 241   %t164 = add i32 %a4, 5
 242   %t165 = add i32 %a4, 4
 243   %t166 = add i32 %a4, 3
 244   br label %bb167
 245
 246 bb167:
 247   %t168 = phi i32 [ 0, %bb159 ], [ %t293, %bb167 ]
 248   %t169 = getelementptr i8** %a3, i32 %t168
 249   %t170 = shl i32 %t168, 3
 250   %t171 = or i32 %t170, 4
 251   %t172 = getelementptr [64 x float]* %t, i32 0, i32 %t171
 252   %t173 = or i32 %t170, 2
 253   %t174 = getelementptr [64 x float]* %t, i32 0, i32 %t173
 254   %t175 = or i32 %t170, 6
 255   %t176 = getelementptr [64 x float]* %t, i32 0, i32 %t175
 256   %t177 = or i32 %t170, 5
 257   %t178 = getelementptr [64 x float]* %t, i32 0, i32 %t177
 258   %t179 = or i32 %t170, 3
 259   %t180 = getelementptr [64 x float]* %t, i32 0, i32 %t179
 260   %t181 = or i32 %t170, 1
 261   %t182 = getelementptr [64 x float]* %t, i32 0, i32 %t181
 262   %t183 = or i32 %t170, 7
 263   %t184 = getelementptr [64 x float]* %t, i32 0, i32 %t183
 264   %t185 = getelementptr [64 x float]* %t, i32 0, i32 %t170
 265   %t186 = load i8** %t169, align 4
 266   %t187 = getelementptr inbounds i8* %t186, i32 %a4
 267   %t188 = load float* %t185, align 4
 268   %t189 = load float* %t172, align 4
 269   %t190 = fadd float %t188, %t189
 270   %t191 = fsub float %t188, %t189
 271   %t192 = load float* %t174, align 4
 272   %t193 = load float* %t176, align 4
 273   %t194 = fadd float %t192, %t193
 274   %t195 = fsub float %t192, %t193
 275   %t196 = fmul float %t195, 0x3FF6A09E60000000
 276   %t197 = fsub float %t196, %t194
 277   %t198 = fadd float %t190, %t194
 278   %t199 = fsub float %t190, %t194
 279   %t200 = fadd float %t191, %t197
 280   %t201 = fsub float %t191, %t197
 281   %t202 = load float* %t178, align 4
 282   %t203 = load float* %t180, align 4
 283   %t204 = fadd float %t202, %t203
 284   %t205 = fsub float %t202, %t203
 285   %t206 = load float* %t182, align 4
 286   %t207 = load float* %t184, align 4
 287   %t208 = fadd float %t206, %t207
 288   %t209 = fsub float %t206, %t207
 289   %t210 = fadd float %t208, %t204
 290   %t211 = fsub float %t208, %t204
 291   %t212 = fmul float %t211, 0x3FF6A09E60000000
 292   %t213 = fadd float %t205, %t209
 293   %t214 = fmul float %t213, 0x3FFD906BC0000000
 294   %t215 = fmul float %t209, 0x3FF1517A80000000
 295   %t216 = fsub float %t215, %t214
 296   %t217 = fmul float %t205, 0xC004E7AEA0000000
 297   %t218 = fadd float %t217, %t214
 298   %t219 = fsub float %t218, %t210
 299   %t220 = fsub float %t212, %t219
 300   %t221 = fadd float %t216, %t220
 301   %t222 = fadd float %t198, %t210
 302   %t223 = fptosi float %t222 to i32
 303   %t224 = add nsw i32 %t223, 4
 304   %t225 = lshr i32 %t224, 3
 305   %t226 = and i32 %t225, 1023
 306   %t227 = add i32 %t226, 128
 307   %t228 = getelementptr inbounds i8* %t6, i32 %t227
 308   %t229 = load i8* %t228, align 1
 309   store i8 %t229, i8* %t187, align 1
 310   %t230 = fsub float %t198, %t210
 311   %t231 = fptosi float %t230 to i32
 312   %t232 = add nsw i32 %t231, 4
 313   %t233 = lshr i32 %t232, 3
 314   %t234 = and i32 %t233, 1023
 315   %t235 = add i32 %t234, 128
 316   %t236 = getelementptr inbounds i8* %t6, i32 %t235
 317   %t237 = load i8* %t236, align 1
 318   %t238 = getelementptr inbounds i8* %t186, i32 %t160
 319   store i8 %t237, i8* %t238, align 1
 320   %t239 = fadd float %t200, %t219
 321   %t240 = fptosi float %t239 to i32
 322   %t241 = add nsw i32 %t240, 4
 323   %t242 = lshr i32 %t241, 3
 324   %t243 = and i32 %t242, 1023
 325   %t244 = add i32 %t243, 128
 326   %t245 = getelementptr inbounds i8* %t6, i32 %t244
 327   %t246 = load i8* %t245, align 1
 328   %t247 = getelementptr inbounds i8* %t186, i32 %t161
 329   store i8 %t246, i8* %t247, align 1
 330   %t248 = fsub float %t200, %t219
 331   %t249 = fptosi float %t248 to i32
 332   %t250 = add nsw i32 %t249, 4
 333   %t251 = lshr i32 %t250, 3
 334   %t252 = and i32 %t251, 1023
 335   %t253 = add i32 %t252, 128
 336   %t254 = getelementptr inbounds i8* %t6, i32 %t253
 337   %t255 = load i8* %t254, align 1
 338   %t256 = getelementptr inbounds i8* %t186, i32 %t162
 339   store i8 %t255, i8* %t256, align 1
 340   %t257 = fadd float %t201, %t220
 341   %t258 = fptosi float %t257 to i32
 342   %t259 = add nsw i32 %t258, 4
 343   %t260 = lshr i32 %t259, 3
 344   %t261 = and i32 %t260, 1023
 345   %t262 = add i32 %t261, 128
 346   %t263 = getelementptr inbounds i8* %t6, i32 %t262
 347   %t264 = load i8* %t263, align 1
 348   %t265 = getelementptr inbounds i8* %t186, i32 %t163
 349   store i8 %t264, i8* %t265, align 1
 350   %t266 = fsub float %t201, %t220
 351   %t267 = fptosi float %t266 to i32
 352   %t268 = add nsw i32 %t267, 4
 353   %t269 = lshr i32 %t268, 3
 354   %t270 = and i32 %t269, 1023
 355   %t271 = add i32 %t270, 128
 356   %t272 = getelementptr inbounds i8* %t6, i32 %t271
 357   %t273 = load i8* %t272, align 1
 358   %t274 = getelementptr inbounds i8* %t186, i32 %t164
 359   store i8 %t273, i8* %t274, align 1
 360   %t275 = fadd float %t199, %t221
 361   %t276 = fptosi float %t275 to i32
 362   %t277 = add nsw i32 %t276, 4
 363   %t278 = lshr i32 %t277, 3
 364   %t279 = and i32 %t278, 1023
 365   %t280 = add i32 %t279, 128
 366   %t281 = getelementptr inbounds i8* %t6, i32 %t280
 367   %t282 = load i8* %t281, align 1
 368   %t283 = getelementptr inbounds i8* %t186, i32 %t165
 369   store i8 %t282, i8* %t283, align 1
 370   %t284 = fsub float %t199, %t221
 371   %t285 = fptosi float %t284 to i32
 372   %t286 = add nsw i32 %t285, 4
 373   %t287 = lshr i32 %t286, 3
 374   %t288 = and i32 %t287, 1023
 375   %t289 = add i32 %t288, 128
 376   %t290 = getelementptr inbounds i8* %t6, i32 %t289
 377   %t291 = load i8* %t290, align 1
 378   %t292 = getelementptr inbounds i8* %t186, i32 %t166
 379   store i8 %t291, i8* %t292, align 1
 380   %t293 = add nsw i32 %t168, 1
 381   %t294 = icmp eq i32 %t293, 8
 382   br i1 %t294, label %bb295, label %bb167
 383
 384 bb295:
 385   ret void
 386 }
 387
 388 %struct.ct_data_s = type { %union.anon, %union.anon }
 389 %struct.gz_header = type { i32, i32, i32, i32, i8*, i32, i32, i8*, i32, i8*, i32, i32, i32 }
 390 %struct.internal_state = type { %struct.z_stream*, i32, i8*, i32, i8*, i32, i32, %struct.gz_header*, i32, i8, i32, i32, i32, i32, i8*, i32, i16*, i16*, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, [573 x %struct.ct_data_s], [61 x %struct.ct_data_s], [39 x %struct.ct_data_s], %struct.tree_desc_s, %struct.tree_desc_s, %struct.tree_desc_s, [16 x i16], [573 x i32], i32, i32, [573 x i8], i8*, i32, i32, i16*, i32, i32, i32, i32, i16, i32 }
 391 %struct.static_tree_desc = type { i32 }
 392 %struct.tree_desc_s = type { %struct.ct_data_s*, i32, %struct.static_tree_desc* }
 393 %struct.z_stream = type { i8*, i32, i32, i8*, i32, i32, i8*, %struct.internal_state*, i8* (i8*, i32, i32)*, void (i8*, i8*)*, i8*, i32, i32, i32 }
 394 %union.anon = type { i16 }
 395
 396 define i32 @longest_match(%struct.internal_state* %s, i32 %cur_match) nounwind optsize {
 397 entry:
 398   %0 = getelementptr inbounds %struct.internal_state* %s, i32 0, i32 31 ; <i32*> [#uses=1]
 399   %1 = load i32* %0, align 4                      ; <i32> [#uses=2]
 400   %2 = getelementptr inbounds %struct.internal_state* %s, i32 0, i32 14 ; <i8**> [#uses=1]
 401   %3 = load i8** %2, align 4                      ; <i8*> [#uses=27]
 402   %4 = getelementptr inbounds %struct.internal_state* %s, i32 0, i32 27 ; <i32*> [#uses=1]
 403   %5 = load i32* %4, align 4                      ; <i32> [#uses=17]
 404   %6 = getelementptr inbounds i8* %3, i32 %5      ; <i8*> [#uses=1]
 405   %7 = getelementptr inbounds %struct.internal_state* %s, i32 0, i32 30 ; <i32*> [#uses=1]
 406   %8 = load i32* %7, align 4                      ; <i32> [#uses=4]
 407   %9 = getelementptr inbounds %struct.internal_state* %s, i32 0, i32 36 ; <i32*> [#uses=1]
 408   %10 = load i32* %9, align 4                     ; <i32> [#uses=2]
 409   %11 = getelementptr inbounds %struct.internal_state* %s, i32 0, i32 11 ; <i32*> [#uses=1]
 410   %12 = load i32* %11, align 4                    ; <i32> [#uses=2]
 411   %13 = add i32 %12, -262                         ; <i32> [#uses=1]
 412   %14 = icmp ugt i32 %5, %13                      ; <i1> [#uses=1]
 413   br i1 %14, label %bb, label %bb2
 414
 415 bb:                                               ; preds = %entry
 416   %15 = add i32 %5, 262                           ; <i32> [#uses=1]
 417   %16 = sub i32 %15, %12                          ; <i32> [#uses=1]
 418   br label %bb2
 419
 420 bb2:                                              ; preds = %bb, %entry
 421   %iftmp.48.0 = phi i32 [ %16, %bb ], [ 0, %entry ] ; <i32> [#uses=1]
 422   %17 = getelementptr inbounds %struct.internal_state* %s, i32 0, i32 16 ; <i16**> [#uses=1]
 423   %18 = load i16** %17, align 4                   ; <i16*> [#uses=1]
 424   %19 = getelementptr inbounds %struct.internal_state* %s, i32 0, i32 13 ; <i32*> [#uses=1]
 425   %20 = load i32* %19, align 4                    ; <i32> [#uses=1]
 426   %.sum = add i32 %5, 258                         ; <i32> [#uses=2]
 427   %21 = getelementptr inbounds i8* %3, i32 %.sum  ; <i8*> [#uses=1]
 428   %22 = add nsw i32 %5, -1                        ; <i32> [#uses=1]
 429   %.sum30 = add i32 %22, %8                       ; <i32> [#uses=1]
 430   %23 = getelementptr inbounds i8* %3, i32 %.sum30 ; <i8*> [#uses=1]
 431   %24 = load i8* %23, align 1                     ; <i8> [#uses=1]
 432   %.sum31 = add i32 %8, %5                        ; <i32> [#uses=1]
 433   %25 = getelementptr inbounds i8* %3, i32 %.sum31 ; <i8*> [#uses=1]
 434   %26 = load i8* %25, align 1                     ; <i8> [#uses=1]
 435   %27 = getelementptr inbounds %struct.internal_state* %s, i32 0, i32 35 ; <i32*> [#uses=1]
 436   %28 = load i32* %27, align 4                    ; <i32> [#uses=1]
 437   %29 = lshr i32 %1, 2                            ; <i32> [#uses=1]
 438   %30 = icmp ult i32 %8, %28                      ; <i1> [#uses=1]
 439   %. = select i1 %30, i32 %1, i32 %29             ; <i32> [#uses=1]
 440   %31 = getelementptr inbounds %struct.internal_state* %s, i32 0, i32 29 ; <i32*> [#uses=1]
 441   %32 = load i32* %31, align 4                    ; <i32> [#uses=4]
 442   %33 = icmp ugt i32 %10, %32                     ; <i1> [#uses=1]
 443   %nice_match.0.ph = select i1 %33, i32 %32, i32 %10 ; <i32> [#uses=1]
 444   %34 = getelementptr inbounds %struct.internal_state* %s, i32 0, i32 28 ; <i32*> [#uses=1]
 445   %35 = ptrtoint i8* %21 to i32                   ; <i32> [#uses=1]
 446   %36 = add nsw i32 %5, 257                       ; <i32> [#uses=1]
 447   %tmp81 = add i32 %., -1                         ; <i32> [#uses=1]
 448   br label %bb6
 449
 450 bb6:                                              ; preds = %bb24, %bb2
 451   %indvar78 = phi i32 [ 0, %bb2 ], [ %indvar.next79, %bb24 ] ; <i32> [#uses=2]
 452   %best_len.2 = phi i32 [ %8, %bb2 ], [ %best_len.0, %bb24 ] ; <i32> [#uses=8]
 453   %scan_end1.1 = phi i8 [ %24, %bb2 ], [ %scan_end1.0, %bb24 ] ; <i8> [#uses=6]
 454   %cur_match_addr.0 = phi i32 [ %cur_match, %bb2 ], [ %90, %bb24 ] ; <i32> [#uses=14]
 455   %scan_end.1 = phi i8 [ %26, %bb2 ], [ %scan_end.0, %bb24 ] ; <i8> [#uses=6]
 456   %37 = getelementptr inbounds i8* %3, i32 %cur_match_addr.0 ; <i8*> [#uses=1]
 457   %.sum32 = add i32 %cur_match_addr.0, %best_len.2 ; <i32> [#uses=1]
 458   %38 = getelementptr inbounds i8* %3, i32 %.sum32 ; <i8*> [#uses=1]
 459   %39 = load i8* %38, align 1                     ; <i8> [#uses=1]
 460   %40 = icmp eq i8 %39, %scan_end.1               ; <i1> [#uses=1]
 461   br i1 %40, label %bb7, label %bb23
 462
 463 bb7:                                              ; preds = %bb6
 464   %41 = add nsw i32 %best_len.2, -1               ; <i32> [#uses=1]
 465   %.sum33 = add i32 %41, %cur_match_addr.0        ; <i32> [#uses=1]
 466   %42 = getelementptr inbounds i8* %3, i32 %.sum33 ; <i8*> [#uses=1]
 467   %43 = load i8* %42, align 1                     ; <i8> [#uses=1]
 468   %44 = icmp eq i8 %43, %scan_end1.1              ; <i1> [#uses=1]
 469   br i1 %44, label %bb8, label %bb23
 470
 471 bb8:                                              ; preds = %bb7
 472   %45 = load i8* %37, align 1                     ; <i8> [#uses=1]
 473   %46 = load i8* %6, align 1                      ; <i8> [#uses=1]
 474   %47 = icmp eq i8 %45, %46                       ; <i1> [#uses=1]
 475   br i1 %47, label %bb9, label %bb23
 476
 477 bb9:                                              ; preds = %bb8
 478   %.sum34 = add i32 %cur_match_addr.0, 1          ; <i32> [#uses=1]
 479   %48 = getelementptr inbounds i8* %3, i32 %.sum34 ; <i8*> [#uses=1]
 480   %49 = load i8* %48, align 1                     ; <i8> [#uses=1]
 481   %.sum88 = add i32 %5, 1                         ; <i32> [#uses=1]
 482   %50 = getelementptr inbounds i8* %3, i32 %.sum88 ; <i8*> [#uses=1]
 483   %51 = load i8* %50, align 1                     ; <i8> [#uses=1]
 484   %52 = icmp eq i8 %49, %51                       ; <i1> [#uses=1]
 485   br i1 %52, label %bb10, label %bb23
 486
 487 bb10:                                             ; preds = %bb9
 488   %tmp39 = add i32 %cur_match_addr.0, 10          ; <i32> [#uses=1]
 489   %tmp41 = add i32 %cur_match_addr.0, 9           ; <i32> [#uses=1]
 490   %tmp44 = add i32 %cur_match_addr.0, 8           ; <i32> [#uses=1]
 491   %tmp47 = add i32 %cur_match_addr.0, 7           ; <i32> [#uses=1]
 492   %tmp50 = add i32 %cur_match_addr.0, 6           ; <i32> [#uses=1]
 493   %tmp53 = add i32 %cur_match_addr.0, 5           ; <i32> [#uses=1]
 494   %tmp56 = add i32 %cur_match_addr.0, 4           ; <i32> [#uses=1]
 495   %tmp59 = add i32 %cur_match_addr.0, 3           ; <i32> [#uses=1]
 496   br label %bb11
 497
 498 bb11:                                             ; preds = %bb18, %bb10
 499   %indvar = phi i32 [ %indvar.next, %bb18 ], [ 0, %bb10 ] ; <i32> [#uses=2]
 500   %tmp = shl i32 %indvar, 3                       ; <i32> [#uses=16]
 501   %tmp40 = add i32 %tmp39, %tmp                   ; <i32> [#uses=1]
 502   %scevgep = getelementptr i8* %3, i32 %tmp40     ; <i8*> [#uses=1]
 503   %tmp42 = add i32 %tmp41, %tmp                   ; <i32> [#uses=1]
 504   %scevgep43 = getelementptr i8* %3, i32 %tmp42   ; <i8*> [#uses=1]
 505   %tmp45 = add i32 %tmp44, %tmp                   ; <i32> [#uses=1]
 506   %scevgep46 = getelementptr i8* %3, i32 %tmp45   ; <i8*> [#uses=1]
 507   %tmp48 = add i32 %tmp47, %tmp                   ; <i32> [#uses=1]
 508   %scevgep49 = getelementptr i8* %3, i32 %tmp48   ; <i8*> [#uses=1]
 509   %tmp51 = add i32 %tmp50, %tmp                   ; <i32> [#uses=1]
 510   %scevgep52 = getelementptr i8* %3, i32 %tmp51   ; <i8*> [#uses=1]
 511   %tmp54 = add i32 %tmp53, %tmp                   ; <i32> [#uses=1]
 512   %scevgep55 = getelementptr i8* %3, i32 %tmp54   ; <i8*> [#uses=1]
 513   %tmp60 = add i32 %tmp59, %tmp                   ; <i32> [#uses=1]
 514   %scevgep61 = getelementptr i8* %3, i32 %tmp60   ; <i8*> [#uses=1]
 515   %tmp62 = add i32 %tmp, 10                       ; <i32> [#uses=1]
 516   %.sum89 = add i32 %5, %tmp62                    ; <i32> [#uses=2]
 517   %scevgep63 = getelementptr i8* %3, i32 %.sum89  ; <i8*> [#uses=2]
 518   %tmp64 = add i32 %tmp, 9                        ; <i32> [#uses=1]
 519   %.sum90 = add i32 %5, %tmp64                    ; <i32> [#uses=1]
 520   %scevgep65 = getelementptr i8* %3, i32 %.sum90  ; <i8*> [#uses=2]
 521   %tmp66 = add i32 %tmp, 8                        ; <i32> [#uses=1]
 522   %.sum91 = add i32 %5, %tmp66                    ; <i32> [#uses=1]
 523   %scevgep67 = getelementptr i8* %3, i32 %.sum91  ; <i8*> [#uses=2]
 524   %tmp6883 = or i32 %tmp, 7                       ; <i32> [#uses=1]
 525   %.sum92 = add i32 %5, %tmp6883                  ; <i32> [#uses=1]
 526   %scevgep69 = getelementptr i8* %3, i32 %.sum92  ; <i8*> [#uses=2]
 527   %tmp7084 = or i32 %tmp, 6                       ; <i32> [#uses=1]
 528   %.sum93 = add i32 %5, %tmp7084                  ; <i32> [#uses=1]
 529   %scevgep71 = getelementptr i8* %3, i32 %.sum93  ; <i8*> [#uses=2]
 530   %tmp7285 = or i32 %tmp, 5                       ; <i32> [#uses=1]
 531   %.sum94 = add i32 %5, %tmp7285                  ; <i32> [#uses=1]
 532   %scevgep73 = getelementptr i8* %3, i32 %.sum94  ; <i8*> [#uses=2]
 533   %tmp7486 = or i32 %tmp, 4                       ; <i32> [#uses=1]
 534   %.sum95 = add i32 %5, %tmp7486                  ; <i32> [#uses=1]
 535   %scevgep75 = getelementptr i8* %3, i32 %.sum95  ; <i8*> [#uses=2]
 536   %tmp7687 = or i32 %tmp, 3                       ; <i32> [#uses=1]
 537   %.sum96 = add i32 %5, %tmp7687                  ; <i32> [#uses=1]
 538   %scevgep77 = getelementptr i8* %3, i32 %.sum96  ; <i8*> [#uses=2]
 539   %53 = load i8* %scevgep77, align 1              ; <i8> [#uses=1]
 540   %54 = load i8* %scevgep61, align 1              ; <i8> [#uses=1]
 541   %55 = icmp eq i8 %53, %54                       ; <i1> [#uses=1]
 542   br i1 %55, label %bb12, label %bb20
 543
 544 bb12:                                             ; preds = %bb11
 545   %tmp57 = add i32 %tmp56, %tmp                   ; <i32> [#uses=1]
 546   %scevgep58 = getelementptr i8* %3, i32 %tmp57   ; <i8*> [#uses=1]
 547   %56 = load i8* %scevgep75, align 1              ; <i8> [#uses=1]
 548   %57 = load i8* %scevgep58, align 1              ; <i8> [#uses=1]
 549   %58 = icmp eq i8 %56, %57                       ; <i1> [#uses=1]
 550   br i1 %58, label %bb13, label %bb20
 551
 552 bb13:                                             ; preds = %bb12
 553   %59 = load i8* %scevgep73, align 1              ; <i8> [#uses=1]
 554   %60 = load i8* %scevgep55, align 1              ; <i8> [#uses=1]
 555   %61 = icmp eq i8 %59, %60                       ; <i1> [#uses=1]
 556   br i1 %61, label %bb14, label %bb20
 557
 558 bb14:                                             ; preds = %bb13
 559   %62 = load i8* %scevgep71, align 1              ; <i8> [#uses=1]
 560   %63 = load i8* %scevgep52, align 1              ; <i8> [#uses=1]
 561   %64 = icmp eq i8 %62, %63                       ; <i1> [#uses=1]
 562   br i1 %64, label %bb15, label %bb20
 563
 564 bb15:                                             ; preds = %bb14
 565   %65 = load i8* %scevgep69, align 1              ; <i8> [#uses=1]
 566   %66 = load i8* %scevgep49, align 1              ; <i8> [#uses=1]
 567   %67 = icmp eq i8 %65, %66                       ; <i1> [#uses=1]
 568   br i1 %67, label %bb16, label %bb20
 569
 570 bb16:                                             ; preds = %bb15
 571   %68 = load i8* %scevgep67, align 1              ; <i8> [#uses=1]
 572   %69 = load i8* %scevgep46, align 1              ; <i8> [#uses=1]
 573   %70 = icmp eq i8 %68, %69                       ; <i1> [#uses=1]
 574   br i1 %70, label %bb17, label %bb20
 575
 576 bb17:                                             ; preds = %bb16
 577   %71 = load i8* %scevgep65, align 1              ; <i8> [#uses=1]
 578   %72 = load i8* %scevgep43, align 1              ; <i8> [#uses=1]
 579   %73 = icmp eq i8 %71, %72                       ; <i1> [#uses=1]
 580   br i1 %73, label %bb18, label %bb20
 581
 582 bb18:                                             ; preds = %bb17
 583   %74 = load i8* %scevgep63, align 1              ; <i8> [#uses=1]
 584   %75 = load i8* %scevgep, align 1                ; <i8> [#uses=1]
 585   %76 = icmp eq i8 %74, %75                       ; <i1> [#uses=1]
 586   %77 = icmp slt i32 %.sum89, %.sum               ; <i1> [#uses=1]
 587   %or.cond = and i1 %76, %77                      ; <i1> [#uses=1]
 588   %indvar.next = add i32 %indvar, 1               ; <i32> [#uses=1]
 589   br i1 %or.cond, label %bb11, label %bb20
 590
 591 bb20:                                             ; preds = %bb18, %bb17, %bb16, %bb15, %bb14, %bb13, %bb12, %bb11
 592   %scan.3 = phi i8* [ %scevgep77, %bb11 ], [ %scevgep75, %bb12 ], [ %scevgep73, %bb13 ], [ %scevgep71, %bb14 ], [ %scevgep69, %bb15 ], [ %scevgep67, %bb16 ], [ %scevgep65, %bb17 ], [ %scevgep63, %bb18 ] ; <i8*> [#uses=1]
 593   %78 = ptrtoint i8* %scan.3 to i32               ; <i32> [#uses=1]
 594   %79 = sub nsw i32 %78, %35                      ; <i32> [#uses=2]
 595   %80 = add i32 %79, 258                          ; <i32> [#uses=5]
 596   %81 = icmp sgt i32 %80, %best_len.2             ; <i1> [#uses=1]
 597   br i1 %81, label %bb21, label %bb23
 598
 599 bb21:                                             ; preds = %bb20
 600   store i32 %cur_match_addr.0, i32* %34, align 4
 601   %82 = icmp slt i32 %80, %nice_match.0.ph        ; <i1> [#uses=1]
 602   br i1 %82, label %bb22, label %bb25
 603
 604 bb22:                                             ; preds = %bb21
 605   %.sum37 = add i32 %36, %79                      ; <i32> [#uses=1]
 606   %83 = getelementptr inbounds i8* %3, i32 %.sum37 ; <i8*> [#uses=1]
 607   %84 = load i8* %83, align 1                     ; <i8> [#uses=1]
 608   %.sum38 = add i32 %80, %5                       ; <i32> [#uses=1]
 609   %85 = getelementptr inbounds i8* %3, i32 %.sum38 ; <i8*> [#uses=1]
 610   %86 = load i8* %85, align 1                     ; <i8> [#uses=1]
 611   br label %bb23
 612
 613 bb23:                                             ; preds = %bb22, %bb20, %bb9, %bb8, %bb7, %bb6
 614   %best_len.0 = phi i32 [ %best_len.2, %bb6 ], [ %best_len.2, %bb7 ], [ %best_len.2, %bb8 ], [ %best_len.2, %bb9 ], [ %80, %bb22 ], [ %best_len.2, %bb20 ] ; <i32> [#uses=3]
 615   %scan_end1.0 = phi i8 [ %scan_end1.1, %bb6 ], [ %scan_end1.1, %bb7 ], [ %scan_end1.1, %bb8 ], [ %scan_end1.1, %bb9 ], [ %84, %bb22 ], [ %scan_end1.1, %bb20 ] ; <i8> [#uses=1]
 616   %scan_end.0 = phi i8 [ %scan_end.1, %bb6 ], [ %scan_end.1, %bb7 ], [ %scan_end.1, %bb8 ], [ %scan_end.1, %bb9 ], [ %86, %bb22 ], [ %scan_end.1, %bb20 ] ; <i8> [#uses=1]
 617   %87 = and i32 %cur_match_addr.0, %20            ; <i32> [#uses=1]
 618   %88 = getelementptr inbounds i16* %18, i32 %87  ; <i16*> [#uses=1]
 619   %89 = load i16* %88, align 2                    ; <i16> [#uses=1]
 620   %90 = zext i16 %89 to i32                       ; <i32> [#uses=2]
 621   %91 = icmp ugt i32 %90, %iftmp.48.0             ; <i1> [#uses=1]
 622   br i1 %91, label %bb24, label %bb25
 623
 624 bb24:                                             ; preds = %bb23
 625
 626 ; LSR should use count-down iteration to avoid requiring the trip count
 627 ; in a register.
 628
 629 ;      CHECK: @ %bb24
 630 ; CHECK: subs{{.*}} {{(r[0-9]+)|(lr)}}, #1
 631 ; CHECK: bne.w
 632
 633   %92 = icmp eq i32 %tmp81, %indvar78             ; <i1> [#uses=1]
 634   %indvar.next79 = add i32 %indvar78, 1           ; <i32> [#uses=1]
 635   br i1 %92, label %bb25, label %bb6
 636
 637 bb25:                                             ; preds = %bb24, %bb23, %bb21
 638   %best_len.1 = phi i32 [ %best_len.0, %bb23 ], [ %best_len.0, %bb24 ], [ %80, %bb21 ] ; <i32> [#uses=2]
 639   %93 = icmp ugt i32 %best_len.1, %32             ; <i1> [#uses=1]
 640   %merge = select i1 %93, i32 %32, i32 %best_len.1 ; <i32> [#uses=1]
 641   ret i32 %merge
 642 }