test/CodeGen/X86/machine-combiner.ll

   1 ; RUN: llc -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=sse -enable-unsafe-fp-math < %s | FileCheck %s --check-prefix=SSE
   2 ; RUN: llc -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=avx -enable-unsafe-fp-math < %s | FileCheck %s --check-prefix=AVX
   3
   4 ; Verify that the first two adds are independent regardless of how the inputs are
   5 ; commuted. The destination registers are used as source registers for the third add.
   6
   7 define float @reassociate_adds1(float %x0, float %x1, float %x2, float %x3) {
   8 ; SSE-LABEL: reassociate_adds1:
   9 ; SSE:       # BB#0:
  10 ; SSE-NEXT:    addss %xmm1, %xmm0
  11 ; SSE-NEXT:    addss %xmm3, %xmm2
  12 ; SSE-NEXT:    addss %xmm2, %xmm0
  13 ; SSE-NEXT:    retq
  14 ;
  15 ; AVX-LABEL: reassociate_adds1:
  16 ; AVX:       # BB#0:
  17 ; AVX-NEXT:    vaddss %xmm1, %xmm0, %xmm0
  18 ; AVX-NEXT:    vaddss %xmm3, %xmm2, %xmm1
  19 ; AVX-NEXT:    vaddss %xmm1, %xmm0, %xmm0
  20 ; AVX-NEXT:    retq
  21   %t0 = fadd float %x0, %x1
  22   %t1 = fadd float %t0, %x2
  23   %t2 = fadd float %t1, %x3
  24   ret float %t2
  25 }
  26
  27 define float @reassociate_adds2(float %x0, float %x1, float %x2, float %x3) {
  28 ; SSE-LABEL: reassociate_adds2:
  29 ; SSE:       # BB#0:
  30 ; SSE-NEXT:    addss %xmm1, %xmm0
  31 ; SSE-NEXT:    addss %xmm3, %xmm2
  32 ; SSE-NEXT:    addss %xmm2, %xmm0
  33 ; SSE-NEXT:    retq
  34 ;
  35 ; AVX-LABEL: reassociate_adds2:
  36 ; AVX:       # BB#0:
  37 ; AVX-NEXT:    vaddss %xmm1, %xmm0, %xmm0
  38 ; AVX-NEXT:    vaddss %xmm3, %xmm2, %xmm1
  39 ; AVX-NEXT:    vaddss %xmm1, %xmm0, %xmm0
  40 ; AVX-NEXT:    retq
  41   %t0 = fadd float %x0, %x1
  42   %t1 = fadd float %x2, %t0
  43   %t2 = fadd float %t1, %x3
  44   ret float %t2
  45 }
  46
  47 define float @reassociate_adds3(float %x0, float %x1, float %x2, float %x3) {
  48 ; SSE-LABEL: reassociate_adds3:
  49 ; SSE:       # BB#0:
  50 ; SSE-NEXT:    addss %xmm1, %xmm0
  51 ; SSE-NEXT:    addss %xmm3, %xmm2
  52 ; SSE-NEXT:    addss %xmm2, %xmm0
  53 ; SSE-NEXT:    retq
  54 ;
  55 ; AVX-LABEL: reassociate_adds3:
  56 ; AVX:       # BB#0:
  57 ; AVX-NEXT:    vaddss %xmm1, %xmm0, %xmm0
  58 ; AVX-NEXT:    vaddss %xmm3, %xmm2, %xmm1
  59 ; AVX-NEXT:    vaddss %xmm1, %xmm0, %xmm0
  60 ; AVX-NEXT:    retq
  61   %t0 = fadd float %x0, %x1
  62   %t1 = fadd float %t0, %x2
  63   %t2 = fadd float %x3, %t1
  64   ret float %t2
  65 }
  66
  67 define float @reassociate_adds4(float %x0, float %x1, float %x2, float %x3) {
  68 ; SSE-LABEL: reassociate_adds4:
  69 ; SSE:       # BB#0:
  70 ; SSE-NEXT:    addss %xmm1, %xmm0
  71 ; SSE-NEXT:    addss %xmm3, %xmm2
  72 ; SSE-NEXT:    addss %xmm2, %xmm0
  73 ; SSE-NEXT:    retq
  74 ;
  75 ; AVX-LABEL: reassociate_adds4:
  76 ; AVX:       # BB#0:
  77 ; AVX-NEXT:    vaddss %xmm1, %xmm0, %xmm0
  78 ; AVX-NEXT:    vaddss %xmm3, %xmm2, %xmm1
  79 ; AVX-NEXT:    vaddss %xmm1, %xmm0, %xmm0
  80 ; AVX-NEXT:    retq
  81   %t0 = fadd float %x0, %x1
  82   %t1 = fadd float %x2, %t0
  83   %t2 = fadd float %x3, %t1
  84   ret float %t2
  85 }
  86
  87 ; Verify that we reassociate some of these ops. The optimal balanced tree of adds is not
  88 ; produced because that would cost more compile time.
  89
  90 define float @reassociate_adds5(float %x0, float %x1, float %x2, float %x3, float %x4, float %x5, float %x6, float %x7) {
  91 ; SSE-LABEL: reassociate_adds5:
  92 ; SSE:       # BB#0:
  93 ; SSE-NEXT:    addss %xmm1, %xmm0
  94 ; SSE-NEXT:    addss %xmm3, %xmm2
  95 ; SSE-NEXT:    addss %xmm2, %xmm0
  96 ; SSE-NEXT:    addss %xmm5, %xmm4
  97 ; SSE-NEXT:    addss %xmm6, %xmm4
  98 ; SSE-NEXT:    addss %xmm4, %xmm0
  99 ; SSE-NEXT:    addss %xmm7, %xmm0
 100 ; SSE-NEXT:    retq
 101 ;
 102 ; AVX-LABEL: reassociate_adds5:
 103 ; AVX:       # BB#0:
 104 ; AVX-NEXT:    vaddss %xmm1, %xmm0, %xmm0
 105 ; AVX-NEXT:    vaddss %xmm3, %xmm2, %xmm1
 106 ; AVX-NEXT:    vaddss %xmm1, %xmm0, %xmm0
 107 ; AVX-NEXT:    vaddss %xmm5, %xmm4, %xmm1
 108 ; AVX-NEXT:    vaddss %xmm6, %xmm1, %xmm1
 109 ; AVX-NEXT:    vaddss %xmm1, %xmm0, %xmm0
 110 ; AVX-NEXT:    vaddss %xmm7, %xmm0, %xmm0
 111 ; AVX-NEXT:    retq
 112   %t0 = fadd float %x0, %x1
 113   %t1 = fadd float %t0, %x2
 114   %t2 = fadd float %t1, %x3
 115   %t3 = fadd float %t2, %x4
 116   %t4 = fadd float %t3, %x5
 117   %t5 = fadd float %t4, %x6
 118   %t6 = fadd float %t5, %x7
 119   ret float %t6
 120 }
 121
 122 ; Verify that we only need two associative operations to reassociate the operands.
 123 ; Also, we should reassociate such that the result of the high latency division
 124 ; is used by the final 'add' rather than reassociating the %x3 operand with the
 125 ; division. The latter reassociation would not improve anything.
 126
 127 define float @reassociate_adds6(float %x0, float %x1, float %x2, float %x3) {
 128 ; SSE-LABEL: reassociate_adds6:
 129 ; SSE:       # BB#0:
 130 ; SSE-NEXT:    divss %xmm1, %xmm0
 131 ; SSE-NEXT:    addss %xmm3, %xmm2
 132 ; SSE-NEXT:    addss %xmm2, %xmm0
 133 ; SSE-NEXT:    retq
 134 ;
 135 ; AVX-LABEL: reassociate_adds6:
 136 ; AVX:       # BB#0:
 137 ; AVX-NEXT:    vdivss %xmm1, %xmm0, %xmm0
 138 ; AVX-NEXT:    vaddss %xmm3, %xmm2, %xmm1
 139 ; AVX-NEXT:    vaddss %xmm1, %xmm0, %xmm0
 140 ; AVX-NEXT:    retq
 141   %t0 = fdiv float %x0, %x1
 142   %t1 = fadd float %x2, %t0
 143   %t2 = fadd float %x3, %t1
 144   ret float %t2
 145 }
 146
 147 ; Verify that SSE and AVX scalar single-precision multiplies are reassociated.
 148
 149 define float @reassociate_muls1(float %x0, float %x1, float %x2, float %x3) {
 150 ; SSE-LABEL: reassociate_muls1:
 151 ; SSE:       # BB#0:
 152 ; SSE-NEXT:    divss %xmm1, %xmm0
 153 ; SSE-NEXT:    mulss %xmm3, %xmm2
 154 ; SSE-NEXT:    mulss %xmm2, %xmm0
 155 ; SSE-NEXT:    retq
 156 ;
 157 ; AVX-LABEL: reassociate_muls1:
 158 ; AVX:       # BB#0:
 159 ; AVX-NEXT:    vdivss %xmm1, %xmm0, %xmm0
 160 ; AVX-NEXT:    vmulss %xmm3, %xmm2, %xmm1
 161 ; AVX-NEXT:    vmulss %xmm1, %xmm0, %xmm0
 162 ; AVX-NEXT:    retq
 163   %t0 = fdiv float %x0, %x1
 164   %t1 = fmul float %x2, %t0
 165   %t2 = fmul float %x3, %t1
 166   ret float %t2
 167 }
 168
 169 ; Verify that SSE and AVX scalar double-precision adds are reassociated.
 170
 171 define double @reassociate_adds_double(double %x0, double %x1, double %x2, double %x3) {
 172 ; SSE-LABEL: reassociate_adds_double:
 173 ; SSE:       # BB#0:
 174 ; SSE-NEXT:    divsd %xmm1, %xmm0
 175 ; SSE-NEXT:    addsd %xmm3, %xmm2
 176 ; SSE-NEXT:    addsd %xmm2, %xmm0
 177 ; SSE-NEXT:    retq
 178 ;
 179 ; AVX-LABEL: reassociate_adds_double:
 180 ; AVX:       # BB#0:
 181 ; AVX-NEXT:    vdivsd %xmm1, %xmm0, %xmm0
 182 ; AVX-NEXT:    vaddsd %xmm3, %xmm2, %xmm1
 183 ; AVX-NEXT:    vaddsd %xmm1, %xmm0, %xmm0
 184 ; AVX-NEXT:    retq
 185   %t0 = fdiv double %x0, %x1
 186   %t1 = fadd double %x2, %t0
 187   %t2 = fadd double %x3, %t1
 188   ret double %t2
 189 }
 190
 191 ; Verify that SSE and AVX scalar double-precision multiplies are reassociated.
 192
 193 define double @reassociate_muls_double(double %x0, double %x1, double %x2, double %x3) {
 194 ; SSE-LABEL: reassociate_muls_double:
 195 ; SSE:       # BB#0:
 196 ; SSE-NEXT:    divsd %xmm1, %xmm0
 197 ; SSE-NEXT:    mulsd %xmm3, %xmm2
 198 ; SSE-NEXT:    mulsd %xmm2, %xmm0
 199 ; SSE-NEXT:    retq
 200 ;
 201 ; AVX-LABEL: reassociate_muls_double:
 202 ; AVX:       # BB#0:
 203 ; AVX-NEXT:    vdivsd %xmm1, %xmm0, %xmm0
 204 ; AVX-NEXT:    vmulsd %xmm3, %xmm2, %xmm1
 205 ; AVX-NEXT:    vmulsd %xmm1, %xmm0, %xmm0
 206 ; AVX-NEXT:    retq
 207   %t0 = fdiv double %x0, %x1
 208   %t1 = fmul double %x2, %t0
 209   %t2 = fmul double %x3, %t1
 210   ret double %t2
 211 }
 212
 213 ; Verify that SSE and AVX 128-bit vector single-precision adds are reassociated.
 214
 215 define <4 x float> @reassociate_adds_v4f32(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, <4 x float> %x3) {
 216 ; SSE-LABEL: reassociate_adds_v4f32:
 217 ; SSE:       # BB#0:
 218 ; SSE-NEXT:    mulps %xmm1, %xmm0
 219 ; SSE-NEXT:    addps %xmm3, %xmm2
 220 ; SSE-NEXT:    addps %xmm2, %xmm0
 221 ; SSE-NEXT:    retq
 222 ;
 223 ; AVX-LABEL: reassociate_adds_v4f32:
 224 ; AVX:       # BB#0:
 225 ; AVX-NEXT:    vmulps %xmm1, %xmm0, %xmm0
 226 ; AVX-NEXT:    vaddps %xmm3, %xmm2, %xmm1
 227 ; AVX-NEXT:    vaddps %xmm1, %xmm0, %xmm0
 228 ; AVX-NEXT:    retq
 229   %t0 = fmul <4 x float> %x0, %x1
 230   %t1 = fadd <4 x float> %x2, %t0
 231   %t2 = fadd <4 x float> %x3, %t1
 232   ret <4 x float> %t2
 233 }
 234
 235 ; Verify that SSE and AVX 128-bit vector double-precision adds are reassociated.
 236
 237 define <2 x double> @reassociate_adds_v2f64(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, <2 x double> %x3) {
 238 ; SSE-LABEL: reassociate_adds_v2f64:
 239 ; SSE:       # BB#0:
 240 ; SSE-NEXT:    mulpd %xmm1, %xmm0
 241 ; SSE-NEXT:    addpd %xmm3, %xmm2
 242 ; SSE-NEXT:    addpd %xmm2, %xmm0
 243 ; SSE-NEXT:    retq
 244 ;
 245 ; AVX-LABEL: reassociate_adds_v2f64:
 246 ; AVX:       # BB#0:
 247 ; AVX-NEXT:    vmulpd %xmm1, %xmm0, %xmm0
 248 ; AVX-NEXT:    vaddpd %xmm3, %xmm2, %xmm1
 249 ; AVX-NEXT:    vaddpd %xmm1, %xmm0, %xmm0
 250 ; AVX-NEXT:    retq
 251   %t0 = fmul <2 x double> %x0, %x1
 252   %t1 = fadd <2 x double> %x2, %t0
 253   %t2 = fadd <2 x double> %x3, %t1
 254   ret <2 x double> %t2
 255 }
 256
 257 ; Verify that SSE and AVX 128-bit vector single-precision multiplies are reassociated.
 258
 259 define <4 x float> @reassociate_muls_v4f32(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, <4 x float> %x3) {
 260 ; SSE-LABEL: reassociate_muls_v4f32:
 261 ; SSE:       # BB#0:
 262 ; SSE-NEXT:    addps %xmm1, %xmm0
 263 ; SSE-NEXT:    mulps %xmm3, %xmm2
 264 ; SSE-NEXT:    mulps %xmm2, %xmm0
 265 ; SSE-NEXT:    retq
 266 ;
 267 ; AVX-LABEL: reassociate_muls_v4f32:
 268 ; AVX:       # BB#0:
 269 ; AVX-NEXT:    vaddps %xmm1, %xmm0, %xmm0
 270 ; AVX-NEXT:    vmulps %xmm3, %xmm2, %xmm1
 271 ; AVX-NEXT:    vmulps %xmm1, %xmm0, %xmm0
 272 ; AVX-NEXT:    retq
 273   %t0 = fadd <4 x float> %x0, %x1
 274   %t1 = fmul <4 x float> %x2, %t0
 275   %t2 = fmul <4 x float> %x3, %t1
 276   ret <4 x float> %t2
 277 }
 278
 279 ; Verify that SSE and AVX 128-bit vector double-precision multiplies are reassociated.
 280
 281 define <2 x double> @reassociate_muls_v2f64(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, <2 x double> %x3) {
 282 ; SSE-LABEL: reassociate_muls_v2f64:
 283 ; SSE:       # BB#0:
 284 ; SSE-NEXT:    addpd %xmm1, %xmm0
 285 ; SSE-NEXT:    mulpd %xmm3, %xmm2
 286 ; SSE-NEXT:    mulpd %xmm2, %xmm0
 287 ; SSE-NEXT:    retq
 288 ;
 289 ; AVX-LABEL: reassociate_muls_v2f64:
 290 ; AVX:       # BB#0:
 291 ; AVX-NEXT:    vaddpd %xmm1, %xmm0, %xmm0
 292 ; AVX-NEXT:    vmulpd %xmm3, %xmm2, %xmm1
 293 ; AVX-NEXT:    vmulpd %xmm1, %xmm0, %xmm0
 294 ; AVX-NEXT:    retq
 295   %t0 = fadd <2 x double> %x0, %x1
 296   %t1 = fmul <2 x double> %x2, %t0
 297   %t2 = fmul <2 x double> %x3, %t1
 298   ret <2 x double> %t2
 299 }
 300
 301 ; Verify that AVX 256-bit vector single-precision adds are reassociated.
 302
 303 define <8 x float> @reassociate_adds_v8f32(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, <8 x float> %x3) {
 304 ; AVX-LABEL: reassociate_adds_v8f32:
 305 ; AVX:       # BB#0:
 306 ; AVX-NEXT:    vmulps %ymm1, %ymm0, %ymm0
 307 ; AVX-NEXT:    vaddps %ymm3, %ymm2, %ymm1
 308 ; AVX-NEXT:    vaddps %ymm1, %ymm0, %ymm0
 309 ; AVX-NEXT:    retq
 310   %t0 = fmul <8 x float> %x0, %x1
 311   %t1 = fadd <8 x float> %x2, %t0
 312   %t2 = fadd <8 x float> %x3, %t1
 313   ret <8 x float> %t2
 314 }
 315
 316 ; Verify that AVX 256-bit vector double-precision adds are reassociated.
 317
 318 define <4 x double> @reassociate_adds_v4f64(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, <4 x double> %x3) {
 319 ; AVX-LABEL: reassociate_adds_v4f64:
 320 ; AVX:       # BB#0:
 321 ; AVX-NEXT:    vmulpd %ymm1, %ymm0, %ymm0
 322 ; AVX-NEXT:    vaddpd %ymm3, %ymm2, %ymm1
 323 ; AVX-NEXT:    vaddpd %ymm1, %ymm0, %ymm0
 324 ; AVX-NEXT:    retq
 325   %t0 = fmul <4 x double> %x0, %x1
 326   %t1 = fadd <4 x double> %x2, %t0
 327   %t2 = fadd <4 x double> %x3, %t1
 328   ret <4 x double> %t2
 329 }
 330
 331 ; Verify that AVX 256-bit vector single-precision multiplies are reassociated.
 332
 333 define <8 x float> @reassociate_muls_v8f32(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, <8 x float> %x3) {
 334 ; AVX-LABEL: reassociate_muls_v8f32:
 335 ; AVX:       # BB#0:
 336 ; AVX-NEXT:    vaddps %ymm1, %ymm0, %ymm0
 337 ; AVX-NEXT:    vmulps %ymm3, %ymm2, %ymm1
 338 ; AVX-NEXT:    vmulps %ymm1, %ymm0, %ymm0
 339 ; AVX-NEXT:    retq
 340   %t0 = fadd <8 x float> %x0, %x1
 341   %t1 = fmul <8 x float> %x2, %t0
 342   %t2 = fmul <8 x float> %x3, %t1
 343   ret <8 x float> %t2
 344 }
 345
 346 ; Verify that AVX 256-bit vector double-precision multiplies are reassociated.
 347
 348 define <4 x double> @reassociate_muls_v4f64(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, <4 x double> %x3) {
 349 ; AVX-LABEL: reassociate_muls_v4f64:
 350 ; AVX:       # BB#0:
 351 ; AVX-NEXT:    vaddpd %ymm1, %ymm0, %ymm0
 352 ; AVX-NEXT:    vmulpd %ymm3, %ymm2, %ymm1
 353 ; AVX-NEXT:    vmulpd %ymm1, %ymm0, %ymm0
 354 ; AVX-NEXT:    retq
 355   %t0 = fadd <4 x double> %x0, %x1
 356   %t1 = fmul <4 x double> %x2, %t0
 357   %t2 = fmul <4 x double> %x3, %t1
 358   ret <4 x double> %t2
 359 }
 360
 361 ; Verify that SSE and AVX scalar single-precision minimum ops are reassociated.
 362
 363 define float @reassociate_mins_single(float %x0, float %x1, float %x2, float %x3) {
 364 ; SSE-LABEL: reassociate_mins_single:
 365 ; SSE:       # BB#0:
 366 ; SSE-NEXT:    divss %xmm1, %xmm0
 367 ; SSE-NEXT:    minss %xmm3, %xmm2
 368 ; SSE-NEXT:    minss %xmm2, %xmm0
 369 ; SSE-NEXT:    retq
 370 ;
 371 ; AVX-LABEL: reassociate_mins_single:
 372 ; AVX:       # BB#0:
 373 ; AVX-NEXT:    vdivss %xmm1, %xmm0, %xmm0
 374 ; AVX-NEXT:    vminss %xmm3, %xmm2, %xmm1
 375 ; AVX-NEXT:    vminss %xmm1, %xmm0, %xmm0
 376 ; AVX-NEXT:    retq
 377   %t0 = fdiv float %x0, %x1
 378   %cmp1 = fcmp olt float %x2, %t0
 379   %sel1 = select i1 %cmp1, float %x2, float %t0
 380   %cmp2 = fcmp olt float %x3, %sel1
 381   %sel2 = select i1 %cmp2, float %x3, float %sel1
 382   ret float %sel2
 383 }
 384
 385 ; Verify that SSE and AVX scalar single-precision maximum ops are reassociated.
 386
 387 define float @reassociate_maxs_single(float %x0, float %x1, float %x2, float %x3) {
 388 ; SSE-LABEL: reassociate_maxs_single:
 389 ; SSE:       # BB#0:
 390 ; SSE-NEXT:    divss %xmm1, %xmm0
 391 ; SSE-NEXT:    maxss %xmm3, %xmm2
 392 ; SSE-NEXT:    maxss %xmm2, %xmm0
 393 ; SSE-NEXT:    retq
 394 ;
 395 ; AVX-LABEL: reassociate_maxs_single:
 396 ; AVX:       # BB#0:
 397 ; AVX-NEXT:    vdivss %xmm1, %xmm0, %xmm0
 398 ; AVX-NEXT:    vmaxss %xmm3, %xmm2, %xmm1
 399 ; AVX-NEXT:    vmaxss %xmm1, %xmm0, %xmm0
 400 ; AVX-NEXT:    retq
 401   %t0 = fdiv float %x0, %x1
 402   %cmp1 = fcmp ogt float %x2, %t0
 403   %sel1 = select i1 %cmp1, float %x2, float %t0
 404   %cmp2 = fcmp ogt float %x3, %sel1
 405   %sel2 = select i1 %cmp2, float %x3, float %sel1
 406   ret float %sel2
 407 }
 408
 409 ; Verify that SSE and AVX scalar double-precision minimum ops are reassociated.
 410
 411 define double @reassociate_mins_double(double %x0, double %x1, double %x2, double %x3) {
 412 ; SSE-LABEL: reassociate_mins_double:
 413 ; SSE:       # BB#0:
 414 ; SSE-NEXT:    divsd %xmm1, %xmm0
 415 ; SSE-NEXT:    minsd %xmm3, %xmm2
 416 ; SSE-NEXT:    minsd %xmm2, %xmm0
 417 ; SSE-NEXT:    retq
 418 ;
 419 ; AVX-LABEL: reassociate_mins_double:
 420 ; AVX:       # BB#0:
 421 ; AVX-NEXT:    vdivsd %xmm1, %xmm0, %xmm0
 422 ; AVX-NEXT:    vminsd %xmm3, %xmm2, %xmm1
 423 ; AVX-NEXT:    vminsd %xmm1, %xmm0, %xmm0
 424 ; AVX-NEXT:    retq
 425   %t0 = fdiv double %x0, %x1
 426   %cmp1 = fcmp olt double %x2, %t0
 427   %sel1 = select i1 %cmp1, double %x2, double %t0
 428   %cmp2 = fcmp olt double %x3, %sel1
 429   %sel2 = select i1 %cmp2, double %x3, double %sel1
 430   ret double %sel2
 431 }
 432
 433 ; Verify that SSE and AVX scalar double-precision maximum ops are reassociated.
 434
 435 define double @reassociate_maxs_double(double %x0, double %x1, double %x2, double %x3) {
 436 ; SSE-LABEL: reassociate_maxs_double:
 437 ; SSE:       # BB#0:
 438 ; SSE-NEXT:    divsd %xmm1, %xmm0
 439 ; SSE-NEXT:    maxsd %xmm3, %xmm2
 440 ; SSE-NEXT:    maxsd %xmm2, %xmm0
 441 ; SSE-NEXT:    retq
 442 ;
 443 ; AVX-LABEL: reassociate_maxs_double:
 444 ; AVX:       # BB#0:
 445 ; AVX-NEXT:    vdivsd %xmm1, %xmm0, %xmm0
 446 ; AVX-NEXT:    vmaxsd %xmm3, %xmm2, %xmm1
 447 ; AVX-NEXT:    vmaxsd %xmm1, %xmm0, %xmm0
 448 ; AVX-NEXT:    retq
 449   %t0 = fdiv double %x0, %x1
 450   %cmp1 = fcmp ogt double %x2, %t0
 451   %sel1 = select i1 %cmp1, double %x2, double %t0
 452   %cmp2 = fcmp ogt double %x3, %sel1
 453   %sel2 = select i1 %cmp2, double %x3, double %sel1
 454   ret double %sel2
 455 }
 456
 457 ; Verify that SSE and AVX 128-bit vector single-precision minimum ops are reassociated.
 458
 459 define <4 x float> @reassociate_mins_v4f32(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, <4 x float> %x3) {
 460 ; SSE-LABEL: reassociate_mins_v4f32:
 461 ; SSE:       # BB#0:
 462 ; SSE-NEXT:    addps %xmm1, %xmm0
 463 ; SSE-NEXT:    minps %xmm3, %xmm2
 464 ; SSE-NEXT:    minps %xmm2, %xmm0
 465 ; SSE-NEXT:    retq
 466 ;
 467 ; AVX-LABEL: reassociate_mins_v4f32:
 468 ; AVX:       # BB#0:
 469 ; AVX-NEXT:    vaddps %xmm1, %xmm0, %xmm0
 470 ; AVX-NEXT:    vminps %xmm3, %xmm2, %xmm1
 471 ; AVX-NEXT:    vminps %xmm1, %xmm0, %xmm0
 472 ; AVX-NEXT:    retq
 473   %t0 = fadd <4 x float> %x0, %x1
 474   %cmp1 = fcmp olt <4 x float> %x2, %t0
 475   %sel1 = select <4 x i1> %cmp1, <4 x float> %x2, <4 x float> %t0
 476   %cmp2 = fcmp olt <4 x float> %x3, %sel1
 477   %sel2 = select <4 x i1> %cmp2, <4 x float> %x3, <4 x float> %sel1
 478   ret <4 x float> %sel2
 479 }
 480
 481 ; Verify that SSE and AVX 128-bit vector single-precision maximum ops are reassociated.
 482
 483 define <4 x float> @reassociate_maxs_v4f32(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, <4 x float> %x3) {
 484 ; SSE-LABEL: reassociate_maxs_v4f32:
 485 ; SSE:       # BB#0:
 486 ; SSE-NEXT:    addps %xmm1, %xmm0
 487 ; SSE-NEXT:    maxps %xmm3, %xmm2
 488 ; SSE-NEXT:    maxps %xmm2, %xmm0
 489 ; SSE-NEXT:    retq
 490 ;
 491 ; AVX-LABEL: reassociate_maxs_v4f32:
 492 ; AVX:       # BB#0:
 493 ; AVX-NEXT:    vaddps %xmm1, %xmm0, %xmm0
 494 ; AVX-NEXT:    vmaxps %xmm3, %xmm2, %xmm1
 495 ; AVX-NEXT:    vmaxps %xmm1, %xmm0, %xmm0
 496 ; AVX-NEXT:    retq
 497   %t0 = fadd <4 x float> %x0, %x1
 498   %cmp1 = fcmp ogt <4 x float> %x2, %t0
 499   %sel1 = select <4 x i1> %cmp1, <4 x float> %x2, <4 x float> %t0
 500   %cmp2 = fcmp ogt <4 x float> %x3, %sel1
 501   %sel2 = select <4 x i1> %cmp2, <4 x float> %x3, <4 x float> %sel1
 502   ret <4 x float> %sel2
 503 }
 504
 505 ; Verify that SSE and AVX 128-bit vector double-precision minimum ops are reassociated.
 506
 507 define <2 x double> @reassociate_mins_v2f64(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, <2 x double> %x3) {
 508 ; SSE-LABEL: reassociate_mins_v2f64:
 509 ; SSE:       # BB#0:
 510 ; SSE-NEXT:    addpd %xmm1, %xmm0
 511 ; SSE-NEXT:    minpd %xmm3, %xmm2
 512 ; SSE-NEXT:    minpd %xmm2, %xmm0
 513 ; SSE-NEXT:    retq
 514 ;
 515 ; AVX-LABEL: reassociate_mins_v2f64:
 516 ; AVX:       # BB#0:
 517 ; AVX-NEXT:    vaddpd %xmm1, %xmm0, %xmm0
 518 ; AVX-NEXT:    vminpd %xmm3, %xmm2, %xmm1
 519 ; AVX-NEXT:    vminpd %xmm1, %xmm0, %xmm0
 520 ; AVX-NEXT:    retq
 521   %t0 = fadd <2 x double> %x0, %x1
 522   %cmp1 = fcmp olt <2 x double> %x2, %t0
 523   %sel1 = select <2 x i1> %cmp1, <2 x double> %x2, <2 x double> %t0
 524   %cmp2 = fcmp olt <2 x double> %x3, %sel1
 525   %sel2 = select <2 x i1> %cmp2, <2 x double> %x3, <2 x double> %sel1
 526   ret <2 x double> %sel2
 527 }
 528
 529 ; Verify that SSE and AVX 128-bit vector double-precision maximum ops are reassociated.
 530
 531 define <2 x double> @reassociate_maxs_v2f64(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, <2 x double> %x3) {
 532 ; SSE-LABEL: reassociate_maxs_v2f64:
 533 ; SSE:       # BB#0:
 534 ; SSE-NEXT:    addpd %xmm1, %xmm0
 535 ; SSE-NEXT:    maxpd %xmm3, %xmm2
 536 ; SSE-NEXT:    maxpd %xmm2, %xmm0
 537 ; SSE-NEXT:    retq
 538 ;
 539 ; AVX-LABEL: reassociate_maxs_v2f64:
 540 ; AVX:       # BB#0:
 541 ; AVX-NEXT:    vaddpd %xmm1, %xmm0, %xmm0
 542 ; AVX-NEXT:    vmaxpd %xmm3, %xmm2, %xmm1
 543 ; AVX-NEXT:    vmaxpd %xmm1, %xmm0, %xmm0
 544 ; AVX-NEXT:    retq
 545   %t0 = fadd <2 x double> %x0, %x1
 546   %cmp1 = fcmp ogt <2 x double> %x2, %t0
 547   %sel1 = select <2 x i1> %cmp1, <2 x double> %x2, <2 x double> %t0
 548   %cmp2 = fcmp ogt <2 x double> %x3, %sel1
 549   %sel2 = select <2 x i1> %cmp2, <2 x double> %x3, <2 x double> %sel1
 550   ret <2 x double> %sel2
 551 }
 552
 553 ; Verify that AVX 256-bit vector single-precision minimum ops are reassociated.
 554
 555 define <8 x float> @reassociate_mins_v8f32(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, <8 x float> %x3) {
 556 ; AVX-LABEL: reassociate_mins_v8f32:
 557 ; AVX:       # BB#0:
 558 ; AVX-NEXT:    vaddps %ymm1, %ymm0, %ymm0
 559 ; AVX-NEXT:    vminps %ymm3, %ymm2, %ymm1
 560 ; AVX-NEXT:    vminps %ymm1, %ymm0, %ymm0
 561 ; AVX-NEXT:    retq
 562   %t0 = fadd <8 x float> %x0, %x1
 563   %cmp1 = fcmp olt <8 x float> %x2, %t0
 564   %sel1 = select <8 x i1> %cmp1, <8 x float> %x2, <8 x float> %t0
 565   %cmp2 = fcmp olt <8 x float> %x3, %sel1
 566   %sel2 = select <8 x i1> %cmp2, <8 x float> %x3, <8 x float> %sel1
 567   ret <8 x float> %sel2
 568 }
 569
 570 ; Verify that AVX 256-bit vector single-precision maximum ops are reassociated.
 571
 572 define <8 x float> @reassociate_maxs_v8f32(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, <8 x float> %x3) {
 573 ; AVX-LABEL: reassociate_maxs_v8f32:
 574 ; AVX:       # BB#0:
 575 ; AVX-NEXT:    vaddps %ymm1, %ymm0, %ymm0
 576 ; AVX-NEXT:    vmaxps %ymm3, %ymm2, %ymm1
 577 ; AVX-NEXT:    vmaxps %ymm1, %ymm0, %ymm0
 578 ; AVX-NEXT:    retq
 579   %t0 = fadd <8 x float> %x0, %x1
 580   %cmp1 = fcmp ogt <8 x float> %x2, %t0
 581   %sel1 = select <8 x i1> %cmp1, <8 x float> %x2, <8 x float> %t0
 582   %cmp2 = fcmp ogt <8 x float> %x3, %sel1
 583   %sel2 = select <8 x i1> %cmp2, <8 x float> %x3, <8 x float> %sel1
 584   ret <8 x float> %sel2
 585 }
 586
 587 ; Verify that AVX 256-bit vector double-precision minimum ops are reassociated.
 588
 589 define <4 x double> @reassociate_mins_v4f64(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, <4 x double> %x3) {
 590 ; AVX-LABEL: reassociate_mins_v4f64:
 591 ; AVX:       # BB#0:
 592 ; AVX-NEXT:    vaddpd %ymm1, %ymm0, %ymm0
 593 ; AVX-NEXT:    vminpd %ymm3, %ymm2, %ymm1
 594 ; AVX-NEXT:    vminpd %ymm1, %ymm0, %ymm0
 595 ; AVX-NEXT:    retq
 596   %t0 = fadd <4 x double> %x0, %x1
 597   %cmp1 = fcmp olt <4 x double> %x2, %t0
 598   %sel1 = select <4 x i1> %cmp1, <4 x double> %x2, <4 x double> %t0
 599   %cmp2 = fcmp olt <4 x double> %x3, %sel1
 600   %sel2 = select <4 x i1> %cmp2, <4 x double> %x3, <4 x double> %sel1
 601   ret <4 x double> %sel2
 602 }
 603
 604 ; Verify that AVX 256-bit vector double-precision maximum ops are reassociated.
 605
 606 define <4 x double> @reassociate_maxs_v4f64(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, <4 x double> %x3) {
 607 ; AVX-LABEL: reassociate_maxs_v4f64:
 608 ; AVX:       # BB#0:
 609 ; AVX-NEXT:    vaddpd %ymm1, %ymm0, %ymm0
 610 ; AVX-NEXT:    vmaxpd %ymm3, %ymm2, %ymm1
 611 ; AVX-NEXT:    vmaxpd %ymm1, %ymm0, %ymm0
 612 ; AVX-NEXT:    retq
 613   %t0 = fadd <4 x double> %x0, %x1
 614   %cmp1 = fcmp ogt <4 x double> %x2, %t0
 615   %sel1 = select <4 x i1> %cmp1, <4 x double> %x2, <4 x double> %t0
 616   %cmp2 = fcmp ogt <4 x double> %x3, %sel1
 617   %sel2 = select <4 x i1> %cmp2, <4 x double> %x3, <4 x double> %sel1
 618   ret <4 x double> %sel2
 619 }
 620
 621 ; PR25016: https://llvm.org/bugs/show_bug.cgi?id=25016
 622 ; Verify that reassociation is not happening needlessly or wrongly.
 623
 624 declare double @bar()
 625
 626 define double @reassociate_adds_from_calls() {
 627 ; AVX-LABEL: reassociate_adds_from_calls:
 628 ; AVX:       callq   bar
 629 ; AVX-NEXT:  vmovsd  %xmm0, 16(%rsp)
 630 ; AVX-NEXT:  callq   bar
 631 ; AVX-NEXT:  vmovsd  %xmm0, 8(%rsp)
 632 ; AVX-NEXT:  callq   bar
 633 ; AVX-NEXT:  vmovsd  %xmm0, (%rsp)
 634 ; AVX-NEXT:  callq   bar
 635 ; AVX-NEXT:  vmovsd  (%rsp), %xmm1
 636 ; AVX:       vaddsd  8(%rsp), %xmm1, %xmm1
 637 ; AVX-NEXT:  vaddsd  %xmm0, %xmm1, %xmm0
 638 ; AVX-NEXT:  vaddsd  16(%rsp), %xmm0, %xmm0
 639
 640   %x0 = call double @bar()
 641   %x1 = call double @bar()
 642   %x2 = call double @bar()
 643   %x3 = call double @bar()
 644   %t0 = fadd double %x0, %x1
 645   %t1 = fadd double %t0, %x2
 646   %t2 = fadd double %t1, %x3
 647   ret double %t2
 648 }
 649
 650 define double @already_reassociated() {
 651 ; AVX-LABEL: already_reassociated:
 652 ; AVX:       callq   bar
 653 ; AVX-NEXT:  vmovsd  %xmm0, 16(%rsp)
 654 ; AVX-NEXT:  callq   bar
 655 ; AVX-NEXT:  vmovsd  %xmm0, 8(%rsp)
 656 ; AVX-NEXT:  callq   bar
 657 ; AVX-NEXT:  vmovsd  %xmm0, (%rsp)
 658 ; AVX-NEXT:  callq   bar
 659 ; AVX-NEXT:  vaddsd  (%rsp), %xmm0, %xmm0
 660 ; AVX-NEXT:  vaddsd  8(%rsp), %xmm0, %xmm0
 661 ; AVX-NEXT:  vaddsd  16(%rsp), %xmm0, %xmm0
 662
 663   %x0 = call double @bar()
 664   %x1 = call double @bar()
 665   %x2 = call double @bar()
 666   %x3 = call double @bar()
 667   %t0 = fadd double %x0, %x1
 668   %t1 = fadd double %x2, %x3
 669   %t2 = fadd double %t0, %t1
 670   ret double %t2
 671 }
 672