test/CodeGen/X86/sse-scalar-fp-arith.ll

   1 ; RUN: llc -mcpu=x86-64 -mattr=+sse2 < %s | FileCheck --check-prefix=SSE --check-prefix=SSE2 %s
   2 ; RUN: llc -mcpu=x86-64 -mattr=+sse4.1 < %s | FileCheck --check-prefix=SSE --check-prefix=SSE41 %s
   3 ; RUN: llc -mcpu=x86-64 -mattr=+avx < %s | FileCheck --check-prefix=AVX %s
   4
   5 target triple = "x86_64-unknown-unknown"
   6
   7 ; Ensure that the backend no longer emits unnecessary vector insert
   8 ; instructions immediately after SSE scalar fp instructions
   9 ; like addss or mulss.
  10
  11 define <4 x float> @test_add_ss(<4 x float> %a, <4 x float> %b) {
  12 ; SSE-LABEL: test_add_ss:
  13 ; SSE:       # BB#0:
  14 ; SSE-NEXT:    addss %xmm1, %xmm0
  15 ; SSE-NEXT:    retq
  16 ;
  17 ; AVX-LABEL: test_add_ss:
  18 ; AVX:       # BB#0:
  19 ; AVX-NEXT:    vaddss %xmm1, %xmm0, %xmm0
  20 ; AVX-NEXT:    retq
  21   %1 = extractelement <4 x float> %b, i32 0
  22   %2 = extractelement <4 x float> %a, i32 0
  23   %add = fadd float %2, %1
  24   %3 = insertelement <4 x float> %a, float %add, i32 0
  25   ret <4 x float> %3
  26 }
  27
  28 define <4 x float> @test_sub_ss(<4 x float> %a, <4 x float> %b) {
  29 ; SSE-LABEL: test_sub_ss:
  30 ; SSE:       # BB#0:
  31 ; SSE-NEXT:    subss %xmm1, %xmm0
  32 ; SSE-NEXT:    retq
  33 ;
  34 ; AVX-LABEL: test_sub_ss:
  35 ; AVX:       # BB#0:
  36 ; AVX-NEXT:    vsubss %xmm1, %xmm0, %xmm0
  37 ; AVX-NEXT:    retq
  38   %1 = extractelement <4 x float> %b, i32 0
  39   %2 = extractelement <4 x float> %a, i32 0
  40   %sub = fsub float %2, %1
  41   %3 = insertelement <4 x float> %a, float %sub, i32 0
  42   ret <4 x float> %3
  43 }
  44
  45 define <4 x float> @test_mul_ss(<4 x float> %a, <4 x float> %b) {
  46 ; SSE-LABEL: test_mul_ss:
  47 ; SSE:       # BB#0:
  48 ; SSE-NEXT:    mulss %xmm1, %xmm0
  49 ; SSE-NEXT:    retq
  50 ;
  51 ; AVX-LABEL: test_mul_ss:
  52 ; AVX:       # BB#0:
  53 ; AVX-NEXT:    vmulss %xmm1, %xmm0, %xmm0
  54 ; AVX-NEXT:    retq
  55   %1 = extractelement <4 x float> %b, i32 0
  56   %2 = extractelement <4 x float> %a, i32 0
  57   %mul = fmul float %2, %1
  58   %3 = insertelement <4 x float> %a, float %mul, i32 0
  59   ret <4 x float> %3
  60 }
  61
  62 define <4 x float> @test_div_ss(<4 x float> %a, <4 x float> %b) {
  63 ; SSE-LABEL: test_div_ss:
  64 ; SSE:       # BB#0:
  65 ; SSE-NEXT:    divss %xmm1, %xmm0
  66 ; SSE-NEXT:    retq
  67 ;
  68 ; AVX-LABEL: test_div_ss:
  69 ; AVX:       # BB#0:
  70 ; AVX-NEXT:    vdivss %xmm1, %xmm0, %xmm0
  71 ; AVX-NEXT:    retq
  72   %1 = extractelement <4 x float> %b, i32 0
  73   %2 = extractelement <4 x float> %a, i32 0
  74   %div = fdiv float %2, %1
  75   %3 = insertelement <4 x float> %a, float %div, i32 0
  76   ret <4 x float> %3
  77 }
  78
  79 define <2 x double> @test_add_sd(<2 x double> %a, <2 x double> %b) {
  80 ; SSE-LABEL: test_add_sd:
  81 ; SSE:       # BB#0:
  82 ; SSE-NEXT:    addsd %xmm1, %xmm0
  83 ; SSE-NEXT:    retq
  84 ;
  85 ; AVX-LABEL: test_add_sd:
  86 ; AVX:       # BB#0:
  87 ; AVX-NEXT:    vaddsd %xmm1, %xmm0, %xmm0
  88 ; AVX-NEXT:    retq
  89   %1 = extractelement <2 x double> %b, i32 0
  90   %2 = extractelement <2 x double> %a, i32 0
  91   %add = fadd double %2, %1
  92   %3 = insertelement <2 x double> %a, double %add, i32 0
  93   ret <2 x double> %3
  94 }
  95
  96 define <2 x double> @test_sub_sd(<2 x double> %a, <2 x double> %b) {
  97 ; SSE-LABEL: test_sub_sd:
  98 ; SSE:       # BB#0:
  99 ; SSE-NEXT:    subsd %xmm1, %xmm0
 100 ; SSE-NEXT:    retq
 101 ;
 102 ; AVX-LABEL: test_sub_sd:
 103 ; AVX:       # BB#0:
 104 ; AVX-NEXT:    vsubsd %xmm1, %xmm0, %xmm0
 105 ; AVX-NEXT:    retq
 106   %1 = extractelement <2 x double> %b, i32 0
 107   %2 = extractelement <2 x double> %a, i32 0
 108   %sub = fsub double %2, %1
 109   %3 = insertelement <2 x double> %a, double %sub, i32 0
 110   ret <2 x double> %3
 111 }
 112
 113 define <2 x double> @test_mul_sd(<2 x double> %a, <2 x double> %b) {
 114 ; SSE-LABEL: test_mul_sd:
 115 ; SSE:       # BB#0:
 116 ; SSE-NEXT:    mulsd %xmm1, %xmm0
 117 ; SSE-NEXT:    retq
 118 ;
 119 ; AVX-LABEL: test_mul_sd:
 120 ; AVX:       # BB#0:
 121 ; AVX-NEXT:    vmulsd %xmm1, %xmm0, %xmm0
 122 ; AVX-NEXT:    retq
 123   %1 = extractelement <2 x double> %b, i32 0
 124   %2 = extractelement <2 x double> %a, i32 0
 125   %mul = fmul double %2, %1
 126   %3 = insertelement <2 x double> %a, double %mul, i32 0
 127   ret <2 x double> %3
 128 }
 129
 130 define <2 x double> @test_div_sd(<2 x double> %a, <2 x double> %b) {
 131 ; SSE-LABEL: test_div_sd:
 132 ; SSE:       # BB#0:
 133 ; SSE-NEXT:    divsd %xmm1, %xmm0
 134 ; SSE-NEXT:    retq
 135 ;
 136 ; AVX-LABEL: test_div_sd:
 137 ; AVX:       # BB#0:
 138 ; AVX-NEXT:    vdivsd %xmm1, %xmm0, %xmm0
 139 ; AVX-NEXT:    retq
 140   %1 = extractelement <2 x double> %b, i32 0
 141   %2 = extractelement <2 x double> %a, i32 0
 142   %div = fdiv double %2, %1
 143   %3 = insertelement <2 x double> %a, double %div, i32 0
 144   ret <2 x double> %3
 145 }
 146
 147 define <4 x float> @test2_add_ss(<4 x float> %a, <4 x float> %b) {
 148 ; SSE-LABEL: test2_add_ss:
 149 ; SSE:       # BB#0:
 150 ; SSE-NEXT:    addss %xmm0, %xmm1
 151 ; SSE-NEXT:    movaps %xmm1, %xmm0
 152 ; SSE-NEXT:    retq
 153 ;
 154 ; AVX-LABEL: test2_add_ss:
 155 ; AVX:       # BB#0:
 156 ; AVX-NEXT:    vaddss %xmm0, %xmm1, %xmm0
 157 ; AVX-NEXT:    retq
 158   %1 = extractelement <4 x float> %a, i32 0
 159   %2 = extractelement <4 x float> %b, i32 0
 160   %add = fadd float %1, %2
 161   %3 = insertelement <4 x float> %b, float %add, i32 0
 162   ret <4 x float> %3
 163 }
 164
 165 define <4 x float> @test2_sub_ss(<4 x float> %a, <4 x float> %b) {
 166 ; SSE-LABEL: test2_sub_ss:
 167 ; SSE:       # BB#0:
 168 ; SSE-NEXT:    subss %xmm0, %xmm1
 169 ; SSE-NEXT:    movaps %xmm1, %xmm0
 170 ; SSE-NEXT:    retq
 171 ;
 172 ; AVX-LABEL: test2_sub_ss:
 173 ; AVX:       # BB#0:
 174 ; AVX-NEXT:    vsubss %xmm0, %xmm1, %xmm0
 175 ; AVX-NEXT:    retq
 176   %1 = extractelement <4 x float> %a, i32 0
 177   %2 = extractelement <4 x float> %b, i32 0
 178   %sub = fsub float %2, %1
 179   %3 = insertelement <4 x float> %b, float %sub, i32 0
 180   ret <4 x float> %3
 181 }
 182
 183 define <4 x float> @test2_mul_ss(<4 x float> %a, <4 x float> %b) {
 184 ; SSE-LABEL: test2_mul_ss:
 185 ; SSE:       # BB#0:
 186 ; SSE-NEXT:    mulss %xmm0, %xmm1
 187 ; SSE-NEXT:    movaps %xmm1, %xmm0
 188 ; SSE-NEXT:    retq
 189 ;
 190 ; AVX-LABEL: test2_mul_ss:
 191 ; AVX:       # BB#0:
 192 ; AVX-NEXT:    vmulss %xmm0, %xmm1, %xmm0
 193 ; AVX-NEXT:    retq
 194   %1 = extractelement <4 x float> %a, i32 0
 195   %2 = extractelement <4 x float> %b, i32 0
 196   %mul = fmul float %1, %2
 197   %3 = insertelement <4 x float> %b, float %mul, i32 0
 198   ret <4 x float> %3
 199 }
 200
 201 define <4 x float> @test2_div_ss(<4 x float> %a, <4 x float> %b) {
 202 ; SSE-LABEL: test2_div_ss:
 203 ; SSE:       # BB#0:
 204 ; SSE-NEXT:    divss %xmm0, %xmm1
 205 ; SSE-NEXT:    movaps %xmm1, %xmm0
 206 ; SSE-NEXT:    retq
 207 ;
 208 ; AVX-LABEL: test2_div_ss:
 209 ; AVX:       # BB#0:
 210 ; AVX-NEXT:    vdivss %xmm0, %xmm1, %xmm0
 211 ; AVX-NEXT:    retq
 212   %1 = extractelement <4 x float> %a, i32 0
 213   %2 = extractelement <4 x float> %b, i32 0
 214   %div = fdiv float %2, %1
 215   %3 = insertelement <4 x float> %b, float %div, i32 0
 216   ret <4 x float> %3
 217 }
 218
 219 define <2 x double> @test2_add_sd(<2 x double> %a, <2 x double> %b) {
 220 ; SSE-LABEL: test2_add_sd:
 221 ; SSE:       # BB#0:
 222 ; SSE-NEXT:    addsd %xmm0, %xmm1
 223 ; SSE-NEXT:    movaps %xmm1, %xmm0
 224 ; SSE-NEXT:    retq
 225 ;
 226 ; AVX-LABEL: test2_add_sd:
 227 ; AVX:       # BB#0:
 228 ; AVX-NEXT:    vaddsd %xmm0, %xmm1, %xmm0
 229 ; AVX-NEXT:    retq
 230   %1 = extractelement <2 x double> %a, i32 0
 231   %2 = extractelement <2 x double> %b, i32 0
 232   %add = fadd double %1, %2
 233   %3 = insertelement <2 x double> %b, double %add, i32 0
 234   ret <2 x double> %3
 235 }
 236
 237 define <2 x double> @test2_sub_sd(<2 x double> %a, <2 x double> %b) {
 238 ; SSE-LABEL: test2_sub_sd:
 239 ; SSE:       # BB#0:
 240 ; SSE-NEXT:    subsd %xmm0, %xmm1
 241 ; SSE-NEXT:    movaps %xmm1, %xmm0
 242 ; SSE-NEXT:    retq
 243 ;
 244 ; AVX-LABEL: test2_sub_sd:
 245 ; AVX:       # BB#0:
 246 ; AVX-NEXT:    vsubsd %xmm0, %xmm1, %xmm0
 247 ; AVX-NEXT:    retq
 248   %1 = extractelement <2 x double> %a, i32 0
 249   %2 = extractelement <2 x double> %b, i32 0
 250   %sub = fsub double %2, %1
 251   %3 = insertelement <2 x double> %b, double %sub, i32 0
 252   ret <2 x double> %3
 253 }
 254
 255 define <2 x double> @test2_mul_sd(<2 x double> %a, <2 x double> %b) {
 256 ; SSE-LABEL: test2_mul_sd:
 257 ; SSE:       # BB#0:
 258 ; SSE-NEXT:    mulsd %xmm0, %xmm1
 259 ; SSE-NEXT:    movaps %xmm1, %xmm0
 260 ; SSE-NEXT:    retq
 261 ;
 262 ; AVX-LABEL: test2_mul_sd:
 263 ; AVX:       # BB#0:
 264 ; AVX-NEXT:    vmulsd %xmm0, %xmm1, %xmm0
 265 ; AVX-NEXT:    retq
 266   %1 = extractelement <2 x double> %a, i32 0
 267   %2 = extractelement <2 x double> %b, i32 0
 268   %mul = fmul double %1, %2
 269   %3 = insertelement <2 x double> %b, double %mul, i32 0
 270   ret <2 x double> %3
 271 }
 272
 273 define <2 x double> @test2_div_sd(<2 x double> %a, <2 x double> %b) {
 274 ; SSE-LABEL: test2_div_sd:
 275 ; SSE:       # BB#0:
 276 ; SSE-NEXT:    divsd %xmm0, %xmm1
 277 ; SSE-NEXT:    movaps %xmm1, %xmm0
 278 ; SSE-NEXT:    retq
 279 ;
 280 ; AVX-LABEL: test2_div_sd:
 281 ; AVX:       # BB#0:
 282 ; AVX-NEXT:    vdivsd %xmm0, %xmm1, %xmm0
 283 ; AVX-NEXT:    retq
 284   %1 = extractelement <2 x double> %a, i32 0
 285   %2 = extractelement <2 x double> %b, i32 0
 286   %div = fdiv double %2, %1
 287   %3 = insertelement <2 x double> %b, double %div, i32 0
 288   ret <2 x double> %3
 289 }
 290
 291 define <4 x float> @test_multiple_add_ss(<4 x float> %a, <4 x float> %b) {
 292 ; SSE-LABEL: test_multiple_add_ss:
 293 ; SSE:       # BB#0:
 294 ; SSE-NEXT:    addss %xmm0, %xmm1
 295 ; SSE-NEXT:    addss %xmm1, %xmm0
 296 ; SSE-NEXT:    retq
 297 ;
 298 ; AVX-LABEL: test_multiple_add_ss:
 299 ; AVX:       # BB#0:
 300 ; AVX-NEXT:    vaddss %xmm1, %xmm0, %xmm1
 301 ; AVX-NEXT:    vaddss %xmm1, %xmm0, %xmm0
 302 ; AVX-NEXT:    retq
 303   %1 = extractelement <4 x float> %b, i32 0
 304   %2 = extractelement <4 x float> %a, i32 0
 305   %add = fadd float %2, %1
 306   %add2 = fadd float %2, %add
 307   %3 = insertelement <4 x float> %a, float %add2, i32 0
 308   ret <4 x float> %3
 309 }
 310
 311 define <4 x float> @test_multiple_sub_ss(<4 x float> %a, <4 x float> %b) {
 312 ; SSE-LABEL: test_multiple_sub_ss:
 313 ; SSE:       # BB#0:
 314 ; SSE-NEXT:    movaps %xmm0, %xmm2
 315 ; SSE-NEXT:    subss %xmm1, %xmm2
 316 ; SSE-NEXT:    subss %xmm2, %xmm0
 317 ; SSE-NEXT:    retq
 318 ;
 319 ; AVX-LABEL: test_multiple_sub_ss:
 320 ; AVX:       # BB#0:
 321 ; AVX-NEXT:    vsubss %xmm1, %xmm0, %xmm1
 322 ; AVX-NEXT:    vsubss %xmm1, %xmm0, %xmm0
 323 ; AVX-NEXT:    retq
 324   %1 = extractelement <4 x float> %b, i32 0
 325   %2 = extractelement <4 x float> %a, i32 0
 326   %sub = fsub float %2, %1
 327   %sub2 = fsub float %2, %sub
 328   %3 = insertelement <4 x float> %a, float %sub2, i32 0
 329   ret <4 x float> %3
 330 }
 331
 332 define <4 x float> @test_multiple_mul_ss(<4 x float> %a, <4 x float> %b) {
 333 ; SSE-LABEL: test_multiple_mul_ss:
 334 ; SSE:       # BB#0:
 335 ; SSE-NEXT:    mulss %xmm0, %xmm1
 336 ; SSE-NEXT:    mulss %xmm1, %xmm0
 337 ; SSE-NEXT:    retq
 338 ;
 339 ; AVX-LABEL: test_multiple_mul_ss:
 340 ; AVX:       # BB#0:
 341 ; AVX-NEXT:    vmulss %xmm1, %xmm0, %xmm1
 342 ; AVX-NEXT:    vmulss %xmm1, %xmm0, %xmm0
 343 ; AVX-NEXT:    retq
 344   %1 = extractelement <4 x float> %b, i32 0
 345   %2 = extractelement <4 x float> %a, i32 0
 346   %mul = fmul float %2, %1
 347   %mul2 = fmul float %2, %mul
 348   %3 = insertelement <4 x float> %a, float %mul2, i32 0
 349   ret <4 x float> %3
 350 }
 351
 352 define <4 x float> @test_multiple_div_ss(<4 x float> %a, <4 x float> %b) {
 353 ; SSE-LABEL: test_multiple_div_ss:
 354 ; SSE:       # BB#0:
 355 ; SSE-NEXT:    movaps %xmm0, %xmm2
 356 ; SSE-NEXT:    divss %xmm1, %xmm2
 357 ; SSE-NEXT:    divss %xmm2, %xmm0
 358 ; SSE-NEXT:    retq
 359 ;
 360 ; AVX-LABEL: test_multiple_div_ss:
 361 ; AVX:       # BB#0:
 362 ; AVX-NEXT:    vdivss %xmm1, %xmm0, %xmm1
 363 ; AVX-NEXT:    vdivss %xmm1, %xmm0, %xmm0
 364 ; AVX-NEXT:    retq
 365   %1 = extractelement <4 x float> %b, i32 0
 366   %2 = extractelement <4 x float> %a, i32 0
 367   %div = fdiv float %2, %1
 368   %div2 = fdiv float %2, %div
 369   %3 = insertelement <4 x float> %a, float %div2, i32 0
 370   ret <4 x float> %3
 371 }
 372
 373 ; Ensure that the backend selects SSE/AVX scalar fp instructions
 374 ; from a packed fp instrution plus a vector insert.
 375
 376 define <4 x float> @insert_test_add_ss(<4 x float> %a, <4 x float> %b) {
 377 ; SSE-LABEL: insert_test_add_ss:
 378 ; SSE:       # BB#0:
 379 ; SSE-NEXT:    addss %xmm1, %xmm0
 380 ; SSE-NEXT:    retq
 381 ;
 382 ; AVX-LABEL: insert_test_add_ss:
 383 ; AVX:       # BB#0:
 384 ; AVX-NEXT:    vaddss %xmm1, %xmm0, %xmm0
 385 ; AVX-NEXT:    retq
 386   %1 = fadd <4 x float> %a, %b
 387   %2 = shufflevector <4 x float> %1, <4 x float> %a, <4 x i32> <i32 0, i32 5, i32 6, i32 7>
 388   ret <4 x float> %2
 389 }
 390
 391 define <4 x float> @insert_test_sub_ss(<4 x float> %a, <4 x float> %b) {
 392 ; SSE-LABEL: insert_test_sub_ss:
 393 ; SSE:       # BB#0:
 394 ; SSE-NEXT:    subss %xmm1, %xmm0
 395 ; SSE-NEXT:    retq
 396 ;
 397 ; AVX-LABEL: insert_test_sub_ss:
 398 ; AVX:       # BB#0:
 399 ; AVX-NEXT:    vsubss %xmm1, %xmm0, %xmm0
 400 ; AVX-NEXT:    retq
 401   %1 = fsub <4 x float> %a, %b
 402   %2 = shufflevector <4 x float> %1, <4 x float> %a, <4 x i32> <i32 0, i32 5, i32 6, i32 7>
 403   ret <4 x float> %2
 404 }
 405
 406 define <4 x float> @insert_test_mul_ss(<4 x float> %a, <4 x float> %b) {
 407 ; SSE-LABEL: insert_test_mul_ss:
 408 ; SSE:       # BB#0:
 409 ; SSE-NEXT:    mulss %xmm1, %xmm0
 410 ; SSE-NEXT:    retq
 411 ;
 412 ; AVX-LABEL: insert_test_mul_ss:
 413 ; AVX:       # BB#0:
 414 ; AVX-NEXT:    vmulss %xmm1, %xmm0, %xmm0
 415 ; AVX-NEXT:    retq
 416   %1 = fmul <4 x float> %a, %b
 417   %2 = shufflevector <4 x float> %1, <4 x float> %a, <4 x i32> <i32 0, i32 5, i32 6, i32 7>
 418   ret <4 x float> %2
 419 }
 420
 421 define <4 x float> @insert_test_div_ss(<4 x float> %a, <4 x float> %b) {
 422 ; SSE-LABEL: insert_test_div_ss:
 423 ; SSE:       # BB#0:
 424 ; SSE-NEXT:    divss %xmm1, %xmm0
 425 ; SSE-NEXT:    retq
 426 ;
 427 ; AVX-LABEL: insert_test_div_ss:
 428 ; AVX:       # BB#0:
 429 ; AVX-NEXT:    vdivss %xmm1, %xmm0, %xmm0
 430 ; AVX-NEXT:    retq
 431   %1 = fdiv <4 x float> %a, %b
 432   %2 = shufflevector <4 x float> %1, <4 x float> %a, <4 x i32> <i32 0, i32 5, i32 6, i32 7>
 433   ret <4 x float> %2
 434 }
 435
 436 define <2 x double> @insert_test_add_sd(<2 x double> %a, <2 x double> %b) {
 437 ; SSE-LABEL: insert_test_add_sd:
 438 ; SSE:       # BB#0:
 439 ; SSE-NEXT:    addsd %xmm1, %xmm0
 440 ; SSE-NEXT:    retq
 441 ;
 442 ; AVX-LABEL: insert_test_add_sd:
 443 ; AVX:       # BB#0:
 444 ; AVX-NEXT:    vaddsd %xmm1, %xmm0, %xmm0
 445 ; AVX-NEXT:    retq
 446   %1 = fadd <2 x double> %a, %b
 447   %2 = shufflevector <2 x double> %1, <2 x double> %a, <2 x i32> <i32 0, i32 3>
 448   ret <2 x double> %2
 449 }
 450
 451 define <2 x double> @insert_test_sub_sd(<2 x double> %a, <2 x double> %b) {
 452 ; SSE-LABEL: insert_test_sub_sd:
 453 ; SSE:       # BB#0:
 454 ; SSE-NEXT:    subsd %xmm1, %xmm0
 455 ; SSE-NEXT:    retq
 456 ;
 457 ; AVX-LABEL: insert_test_sub_sd:
 458 ; AVX:       # BB#0:
 459 ; AVX-NEXT:    vsubsd %xmm1, %xmm0, %xmm0
 460 ; AVX-NEXT:    retq
 461   %1 = fsub <2 x double> %a, %b
 462   %2 = shufflevector <2 x double> %1, <2 x double> %a, <2 x i32> <i32 0, i32 3>
 463   ret <2 x double> %2
 464 }
 465
 466 define <2 x double> @insert_test_mul_sd(<2 x double> %a, <2 x double> %b) {
 467 ; SSE-LABEL: insert_test_mul_sd:
 468 ; SSE:       # BB#0:
 469 ; SSE-NEXT:    mulsd %xmm1, %xmm0
 470 ; SSE-NEXT:    retq
 471 ;
 472 ; AVX-LABEL: insert_test_mul_sd:
 473 ; AVX:       # BB#0:
 474 ; AVX-NEXT:    vmulsd %xmm1, %xmm0, %xmm0
 475 ; AVX-NEXT:    retq
 476   %1 = fmul <2 x double> %a, %b
 477   %2 = shufflevector <2 x double> %1, <2 x double> %a, <2 x i32> <i32 0, i32 3>
 478   ret <2 x double> %2
 479 }
 480
 481 define <2 x double> @insert_test_div_sd(<2 x double> %a, <2 x double> %b) {
 482 ; SSE-LABEL: insert_test_div_sd:
 483 ; SSE:       # BB#0:
 484 ; SSE-NEXT:    divsd %xmm1, %xmm0
 485 ; SSE-NEXT:    retq
 486 ;
 487 ; AVX-LABEL: insert_test_div_sd:
 488 ; AVX:       # BB#0:
 489 ; AVX-NEXT:    vdivsd %xmm1, %xmm0, %xmm0
 490 ; AVX-NEXT:    retq
 491   %1 = fdiv <2 x double> %a, %b
 492   %2 = shufflevector <2 x double> %1, <2 x double> %a, <2 x i32> <i32 0, i32 3>
 493   ret <2 x double> %2
 494 }
 495
 496 define <4 x float> @insert_test2_add_ss(<4 x float> %a, <4 x float> %b) {
 497 ; SSE-LABEL: insert_test2_add_ss:
 498 ; SSE:       # BB#0:
 499 ; SSE-NEXT:    addss %xmm0, %xmm1
 500 ; SSE-NEXT:    movaps %xmm1, %xmm0
 501 ; SSE-NEXT:    retq
 502 ;
 503 ; AVX-LABEL: insert_test2_add_ss:
 504 ; AVX:       # BB#0:
 505 ; AVX-NEXT:    vaddss %xmm0, %xmm1, %xmm0
 506 ; AVX-NEXT:    retq
 507   %1 = fadd <4 x float> %b, %a
 508   %2 = shufflevector <4 x float> %1, <4 x float> %b, <4 x i32> <i32 0, i32 5, i32 6, i32 7>
 509   ret <4 x float> %2
 510 }
 511
 512 define <4 x float> @insert_test2_sub_ss(<4 x float> %a, <4 x float> %b) {
 513 ; SSE-LABEL: insert_test2_sub_ss:
 514 ; SSE:       # BB#0:
 515 ; SSE-NEXT:    subss %xmm0, %xmm1
 516 ; SSE-NEXT:    movaps %xmm1, %xmm0
 517 ; SSE-NEXT:    retq
 518 ;
 519 ; AVX-LABEL: insert_test2_sub_ss:
 520 ; AVX:       # BB#0:
 521 ; AVX-NEXT:    vsubss %xmm0, %xmm1, %xmm0
 522 ; AVX-NEXT:    retq
 523   %1 = fsub <4 x float> %b, %a
 524   %2 = shufflevector <4 x float> %1, <4 x float> %b, <4 x i32> <i32 0, i32 5, i32 6, i32 7>
 525   ret <4 x float> %2
 526 }
 527
 528 define <4 x float> @insert_test2_mul_ss(<4 x float> %a, <4 x float> %b) {
 529 ; SSE-LABEL: insert_test2_mul_ss:
 530 ; SSE:       # BB#0:
 531 ; SSE-NEXT:    mulss %xmm0, %xmm1
 532 ; SSE-NEXT:    movaps %xmm1, %xmm0
 533 ; SSE-NEXT:    retq
 534 ;
 535 ; AVX-LABEL: insert_test2_mul_ss:
 536 ; AVX:       # BB#0:
 537 ; AVX-NEXT:    vmulss %xmm0, %xmm1, %xmm0
 538 ; AVX-NEXT:    retq
 539   %1 = fmul <4 x float> %b, %a
 540   %2 = shufflevector <4 x float> %1, <4 x float> %b, <4 x i32> <i32 0, i32 5, i32 6, i32 7>
 541   ret <4 x float> %2
 542 }
 543
 544 define <4 x float> @insert_test2_div_ss(<4 x float> %a, <4 x float> %b) {
 545 ; SSE-LABEL: insert_test2_div_ss:
 546 ; SSE:       # BB#0:
 547 ; SSE-NEXT:    divss %xmm0, %xmm1
 548 ; SSE-NEXT:    movaps %xmm1, %xmm0
 549 ; SSE-NEXT:    retq
 550 ;
 551 ; AVX-LABEL: insert_test2_div_ss:
 552 ; AVX:       # BB#0:
 553 ; AVX-NEXT:    vdivss %xmm0, %xmm1, %xmm0
 554 ; AVX-NEXT:    retq
 555   %1 = fdiv <4 x float> %b, %a
 556   %2 = shufflevector <4 x float> %1, <4 x float> %b, <4 x i32> <i32 0, i32 5, i32 6, i32 7>
 557   ret <4 x float> %2
 558 }
 559
 560 define <2 x double> @insert_test2_add_sd(<2 x double> %a, <2 x double> %b) {
 561 ; SSE-LABEL: insert_test2_add_sd:
 562 ; SSE:       # BB#0:
 563 ; SSE-NEXT:    addsd %xmm0, %xmm1
 564 ; SSE-NEXT:    movaps %xmm1, %xmm0
 565 ; SSE-NEXT:    retq
 566 ;
 567 ; AVX-LABEL: insert_test2_add_sd:
 568 ; AVX:       # BB#0:
 569 ; AVX-NEXT:    vaddsd %xmm0, %xmm1, %xmm0
 570 ; AVX-NEXT:    retq
 571   %1 = fadd <2 x double> %b, %a
 572   %2 = shufflevector <2 x double> %1, <2 x double> %b, <2 x i32> <i32 0, i32 3>
 573   ret <2 x double> %2
 574 }
 575
 576 define <2 x double> @insert_test2_sub_sd(<2 x double> %a, <2 x double> %b) {
 577 ; SSE-LABEL: insert_test2_sub_sd:
 578 ; SSE:       # BB#0:
 579 ; SSE-NEXT:    subsd %xmm0, %xmm1
 580 ; SSE-NEXT:    movaps %xmm1, %xmm0
 581 ; SSE-NEXT:    retq
 582 ;
 583 ; AVX-LABEL: insert_test2_sub_sd:
 584 ; AVX:       # BB#0:
 585 ; AVX-NEXT:    vsubsd %xmm0, %xmm1, %xmm0
 586 ; AVX-NEXT:    retq
 587   %1 = fsub <2 x double> %b, %a
 588   %2 = shufflevector <2 x double> %1, <2 x double> %b, <2 x i32> <i32 0, i32 3>
 589   ret <2 x double> %2
 590 }
 591
 592 define <2 x double> @insert_test2_mul_sd(<2 x double> %a, <2 x double> %b) {
 593 ; SSE-LABEL: insert_test2_mul_sd:
 594 ; SSE:       # BB#0:
 595 ; SSE-NEXT:    mulsd %xmm0, %xmm1
 596 ; SSE-NEXT:    movaps %xmm1, %xmm0
 597 ; SSE-NEXT:    retq
 598 ;
 599 ; AVX-LABEL: insert_test2_mul_sd:
 600 ; AVX:       # BB#0:
 601 ; AVX-NEXT:    vmulsd %xmm0, %xmm1, %xmm0
 602 ; AVX-NEXT:    retq
 603   %1 = fmul <2 x double> %b, %a
 604   %2 = shufflevector <2 x double> %1, <2 x double> %b, <2 x i32> <i32 0, i32 3>
 605   ret <2 x double> %2
 606 }
 607
 608 define <2 x double> @insert_test2_div_sd(<2 x double> %a, <2 x double> %b) {
 609 ; SSE-LABEL: insert_test2_div_sd:
 610 ; SSE:       # BB#0:
 611 ; SSE-NEXT:    divsd %xmm0, %xmm1
 612 ; SSE-NEXT:    movaps %xmm1, %xmm0
 613 ; SSE-NEXT:    retq
 614 ;
 615 ; AVX-LABEL: insert_test2_div_sd:
 616 ; AVX:       # BB#0:
 617 ; AVX-NEXT:    vdivsd %xmm0, %xmm1, %xmm0
 618 ; AVX-NEXT:    retq
 619   %1 = fdiv <2 x double> %b, %a
 620   %2 = shufflevector <2 x double> %1, <2 x double> %b, <2 x i32> <i32 0, i32 3>
 621   ret <2 x double> %2
 622 }
 623
 624 define <4 x float> @insert_test3_add_ss(<4 x float> %a, <4 x float> %b) {
 625 ; SSE-LABEL: insert_test3_add_ss:
 626 ; SSE:       # BB#0:
 627 ; SSE-NEXT:    addss %xmm1, %xmm0
 628 ; SSE-NEXT:    retq
 629 ;
 630 ; AVX-LABEL: insert_test3_add_ss:
 631 ; AVX:       # BB#0:
 632 ; AVX-NEXT:    vaddss %xmm1, %xmm0, %xmm0
 633 ; AVX-NEXT:    retq
 634   %1 = fadd <4 x float> %a, %b
 635   %2 = select <4 x i1> <i1 false, i1 true, i1 true, i1 true>, <4 x float> %a, <4 x float> %1
 636   ret <4 x float> %2
 637 }
 638
 639 define <4 x float> @insert_test3_sub_ss(<4 x float> %a, <4 x float> %b) {
 640 ; SSE-LABEL: insert_test3_sub_ss:
 641 ; SSE:       # BB#0:
 642 ; SSE-NEXT:    subss %xmm1, %xmm0
 643 ; SSE-NEXT:    retq
 644 ;
 645 ; AVX-LABEL: insert_test3_sub_ss:
 646 ; AVX:       # BB#0:
 647 ; AVX-NEXT:    vsubss %xmm1, %xmm0, %xmm0
 648 ; AVX-NEXT:    retq
 649   %1 = fsub <4 x float> %a, %b
 650   %2 = select <4 x i1> <i1 false, i1 true, i1 true, i1 true>, <4 x float> %a, <4 x float> %1
 651   ret <4 x float> %2
 652 }
 653
 654 define <4 x float> @insert_test3_mul_ss(<4 x float> %a, <4 x float> %b) {
 655 ; SSE-LABEL: insert_test3_mul_ss:
 656 ; SSE:       # BB#0:
 657 ; SSE-NEXT:    mulss %xmm1, %xmm0
 658 ; SSE-NEXT:    retq
 659 ;
 660 ; AVX-LABEL: insert_test3_mul_ss:
 661 ; AVX:       # BB#0:
 662 ; AVX-NEXT:    vmulss %xmm1, %xmm0, %xmm0
 663 ; AVX-NEXT:    retq
 664   %1 = fmul <4 x float> %a, %b
 665   %2 = select <4 x i1> <i1 false, i1 true, i1 true, i1 true>, <4 x float> %a, <4 x float> %1
 666   ret <4 x float> %2
 667 }
 668
 669 define <4 x float> @insert_test3_div_ss(<4 x float> %a, <4 x float> %b) {
 670 ; SSE-LABEL: insert_test3_div_ss:
 671 ; SSE:       # BB#0:
 672 ; SSE-NEXT:    divss %xmm1, %xmm0
 673 ; SSE-NEXT:    retq
 674 ;
 675 ; AVX-LABEL: insert_test3_div_ss:
 676 ; AVX:       # BB#0:
 677 ; AVX-NEXT:    vdivss %xmm1, %xmm0, %xmm0
 678 ; AVX-NEXT:    retq
 679   %1 = fdiv <4 x float> %a, %b
 680   %2 = select <4 x i1> <i1 false, i1 true, i1 true, i1 true>, <4 x float> %a, <4 x float> %1
 681   ret <4 x float> %2
 682 }
 683
 684 define <2 x double> @insert_test3_add_sd(<2 x double> %a, <2 x double> %b) {
 685 ; SSE-LABEL: insert_test3_add_sd:
 686 ; SSE:       # BB#0:
 687 ; SSE-NEXT:    addsd %xmm1, %xmm0
 688 ; SSE-NEXT:    retq
 689 ;
 690 ; AVX-LABEL: insert_test3_add_sd:
 691 ; AVX:       # BB#0:
 692 ; AVX-NEXT:    vaddsd %xmm1, %xmm0, %xmm0
 693 ; AVX-NEXT:    retq
 694   %1 = fadd <2 x double> %a, %b
 695   %2 = select <2 x i1> <i1 false, i1 true>, <2 x double> %a, <2 x double> %1
 696   ret <2 x double> %2
 697 }
 698
 699 define <2 x double> @insert_test3_sub_sd(<2 x double> %a, <2 x double> %b) {
 700 ; SSE-LABEL: insert_test3_sub_sd:
 701 ; SSE:       # BB#0:
 702 ; SSE-NEXT:    subsd %xmm1, %xmm0
 703 ; SSE-NEXT:    retq
 704 ;
 705 ; AVX-LABEL: insert_test3_sub_sd:
 706 ; AVX:       # BB#0:
 707 ; AVX-NEXT:    vsubsd %xmm1, %xmm0, %xmm0
 708 ; AVX-NEXT:    retq
 709   %1 = fsub <2 x double> %a, %b
 710   %2 = select <2 x i1> <i1 false, i1 true>, <2 x double> %a, <2 x double> %1
 711   ret <2 x double> %2
 712 }
 713
 714 define <2 x double> @insert_test3_mul_sd(<2 x double> %a, <2 x double> %b) {
 715 ; SSE-LABEL: insert_test3_mul_sd:
 716 ; SSE:       # BB#0:
 717 ; SSE-NEXT:    mulsd %xmm1, %xmm0
 718 ; SSE-NEXT:    retq
 719 ;
 720 ; AVX-LABEL: insert_test3_mul_sd:
 721 ; AVX:       # BB#0:
 722 ; AVX-NEXT:    vmulsd %xmm1, %xmm0, %xmm0
 723 ; AVX-NEXT:    retq
 724   %1 = fmul <2 x double> %a, %b
 725   %2 = select <2 x i1> <i1 false, i1 true>, <2 x double> %a, <2 x double> %1
 726   ret <2 x double> %2
 727 }
 728
 729 define <2 x double> @insert_test3_div_sd(<2 x double> %a, <2 x double> %b) {
 730 ; SSE-LABEL: insert_test3_div_sd:
 731 ; SSE:       # BB#0:
 732 ; SSE-NEXT:    divsd %xmm1, %xmm0
 733 ; SSE-NEXT:    retq
 734 ;
 735 ; AVX-LABEL: insert_test3_div_sd:
 736 ; AVX:       # BB#0:
 737 ; AVX-NEXT:    vdivsd %xmm1, %xmm0, %xmm0
 738 ; AVX-NEXT:    retq
 739   %1 = fdiv <2 x double> %a, %b
 740   %2 = select <2 x i1> <i1 false, i1 true>, <2 x double> %a, <2 x double> %1
 741   ret <2 x double> %2
 742 }
 743
 744 define <4 x float> @insert_test4_add_ss(<4 x float> %a, <4 x float> %b) {
 745 ; SSE-LABEL: insert_test4_add_ss:
 746 ; SSE:       # BB#0:
 747 ; SSE-NEXT:    addss %xmm0, %xmm1
 748 ; SSE-NEXT:    movaps %xmm1, %xmm0
 749 ; SSE-NEXT:    retq
 750 ;
 751 ; AVX-LABEL: insert_test4_add_ss:
 752 ; AVX:       # BB#0:
 753 ; AVX-NEXT:    vaddss %xmm0, %xmm1, %xmm0
 754 ; AVX-NEXT:    retq
 755   %1 = fadd <4 x float> %b, %a
 756   %2 = select <4 x i1> <i1 false, i1 true, i1 true, i1 true>, <4 x float> %b, <4 x float> %1
 757   ret <4 x float> %2
 758 }
 759
 760 define <4 x float> @insert_test4_sub_ss(<4 x float> %a, <4 x float> %b) {
 761 ; SSE-LABEL: insert_test4_sub_ss:
 762 ; SSE:       # BB#0:
 763 ; SSE-NEXT:    subss %xmm0, %xmm1
 764 ; SSE-NEXT:    movaps %xmm1, %xmm0
 765 ; SSE-NEXT:    retq
 766 ;
 767 ; AVX-LABEL: insert_test4_sub_ss:
 768 ; AVX:       # BB#0:
 769 ; AVX-NEXT:    vsubss %xmm0, %xmm1, %xmm0
 770 ; AVX-NEXT:    retq
 771   %1 = fsub <4 x float> %b, %a
 772   %2 = select <4 x i1> <i1 false, i1 true, i1 true, i1 true>, <4 x float> %b, <4 x float> %1
 773   ret <4 x float> %2
 774 }
 775
 776 define <4 x float> @insert_test4_mul_ss(<4 x float> %a, <4 x float> %b) {
 777 ; SSE-LABEL: insert_test4_mul_ss:
 778 ; SSE:       # BB#0:
 779 ; SSE-NEXT:    mulss %xmm0, %xmm1
 780 ; SSE-NEXT:    movaps %xmm1, %xmm0
 781 ; SSE-NEXT:    retq
 782 ;
 783 ; AVX-LABEL: insert_test4_mul_ss:
 784 ; AVX:       # BB#0:
 785 ; AVX-NEXT:    vmulss %xmm0, %xmm1, %xmm0
 786 ; AVX-NEXT:    retq
 787   %1 = fmul <4 x float> %b, %a
 788   %2 = select <4 x i1> <i1 false, i1 true, i1 true, i1 true>, <4 x float> %b, <4 x float> %1
 789   ret <4 x float> %2
 790 }
 791
 792 define <4 x float> @insert_test4_div_ss(<4 x float> %a, <4 x float> %b) {
 793 ; SSE-LABEL: insert_test4_div_ss:
 794 ; SSE:       # BB#0:
 795 ; SSE-NEXT:    divss %xmm0, %xmm1
 796 ; SSE-NEXT:    movaps %xmm1, %xmm0
 797 ; SSE-NEXT:    retq
 798 ;
 799 ; AVX-LABEL: insert_test4_div_ss:
 800 ; AVX:       # BB#0:
 801 ; AVX-NEXT:    vdivss %xmm0, %xmm1, %xmm0
 802 ; AVX-NEXT:    retq
 803   %1 = fdiv <4 x float> %b, %a
 804   %2 = select <4 x i1> <i1 false, i1 true, i1 true, i1 true>, <4 x float> %b, <4 x float> %1
 805   ret <4 x float> %2
 806 }
 807
 808 define <2 x double> @insert_test4_add_sd(<2 x double> %a, <2 x double> %b) {
 809 ; SSE-LABEL: insert_test4_add_sd:
 810 ; SSE:       # BB#0:
 811 ; SSE-NEXT:    addsd %xmm0, %xmm1
 812 ; SSE-NEXT:    movaps %xmm1, %xmm0
 813 ; SSE-NEXT:    retq
 814 ;
 815 ; AVX-LABEL: insert_test4_add_sd:
 816 ; AVX:       # BB#0:
 817 ; AVX-NEXT:    vaddsd %xmm0, %xmm1, %xmm0
 818 ; AVX-NEXT:    retq
 819   %1 = fadd <2 x double> %b, %a
 820   %2 = select <2 x i1> <i1 false, i1 true>, <2 x double> %b, <2 x double> %1
 821   ret <2 x double> %2
 822 }
 823
 824 define <2 x double> @insert_test4_sub_sd(<2 x double> %a, <2 x double> %b) {
 825 ; SSE-LABEL: insert_test4_sub_sd:
 826 ; SSE:       # BB#0:
 827 ; SSE-NEXT:    subsd %xmm0, %xmm1
 828 ; SSE-NEXT:    movaps %xmm1, %xmm0
 829 ; SSE-NEXT:    retq
 830 ;
 831 ; AVX-LABEL: insert_test4_sub_sd:
 832 ; AVX:       # BB#0:
 833 ; AVX-NEXT:    vsubsd %xmm0, %xmm1, %xmm0
 834 ; AVX-NEXT:    retq
 835   %1 = fsub <2 x double> %b, %a
 836   %2 = select <2 x i1> <i1 false, i1 true>, <2 x double> %b, <2 x double> %1
 837   ret <2 x double> %2
 838 }
 839
 840 define <2 x double> @insert_test4_mul_sd(<2 x double> %a, <2 x double> %b) {
 841 ; SSE-LABEL: insert_test4_mul_sd:
 842 ; SSE:       # BB#0:
 843 ; SSE-NEXT:    mulsd %xmm0, %xmm1
 844 ; SSE-NEXT:    movaps %xmm1, %xmm0
 845 ; SSE-NEXT:    retq
 846 ;
 847 ; AVX-LABEL: insert_test4_mul_sd:
 848 ; AVX:       # BB#0:
 849 ; AVX-NEXT:    vmulsd %xmm0, %xmm1, %xmm0
 850 ; AVX-NEXT:    retq
 851   %1 = fmul <2 x double> %b, %a
 852   %2 = select <2 x i1> <i1 false, i1 true>, <2 x double> %b, <2 x double> %1
 853   ret <2 x double> %2
 854 }
 855
 856 define <2 x double> @insert_test4_div_sd(<2 x double> %a, <2 x double> %b) {
 857 ; SSE-LABEL: insert_test4_div_sd:
 858 ; SSE:       # BB#0:
 859 ; SSE-NEXT:    divsd %xmm0, %xmm1
 860 ; SSE-NEXT:    movaps %xmm1, %xmm0
 861 ; SSE-NEXT:    retq
 862 ;
 863 ; AVX-LABEL: insert_test4_div_sd:
 864 ; AVX:       # BB#0:
 865 ; AVX-NEXT:    vdivsd %xmm0, %xmm1, %xmm0
 866 ; AVX-NEXT:    retq
 867   %1 = fdiv <2 x double> %b, %a
 868   %2 = select <2 x i1> <i1 false, i1 true>, <2 x double> %b, <2 x double> %1
 869   ret <2 x double> %2
 870 }