test/CodeGen/X86/sse-scalar-fp-arith.ll

   1 ; RUN: llc -mcpu=x86-64 -mattr=+sse2 < %s | FileCheck --check-prefix=SSE --check-prefix=SSE2 %s
   2 ; RUN: llc -mcpu=x86-64 -mattr=+sse4.1 < %s | FileCheck --check-prefix=SSE --check-prefix=SSE41 %s
   3 ; RUN: llc -mcpu=x86-64 -mattr=+avx < %s | FileCheck --check-prefix=AVX %s
   4
   5 target triple = "x86_64-unknown-unknown"
   6
   7 ; Ensure that the backend no longer emits unnecessary vector insert
   8 ; instructions immediately after SSE scalar fp instructions
   9 ; like addss or mulss.
  10
  11 define <4 x float> @test_add_ss(<4 x float> %a, <4 x float> %b) {
  12 ; SSE-LABEL: test_add_ss:
  13 ; SSE:       # BB#0:
  14 ; SSE-NEXT:    addss %xmm1, %xmm0
  15 ; SSE-NEXT:    retq
  16 ;
  17 ; AVX-LABEL: test_add_ss:
  18 ; AVX:       # BB#0:
  19 ; AVX-NEXT:    vaddss %xmm1, %xmm0, %xmm0
  20 ; AVX-NEXT:    retq
  21   %1 = extractelement <4 x float> %b, i32 0
  22   %2 = extractelement <4 x float> %a, i32 0
  23   %add = fadd float %2, %1
  24   %3 = insertelement <4 x float> %a, float %add, i32 0
  25   ret <4 x float> %3
  26 }
  27
  28 define <4 x float> @test_sub_ss(<4 x float> %a, <4 x float> %b) {
  29 ; SSE-LABEL: test_sub_ss:
  30 ; SSE:       # BB#0:
  31 ; SSE-NEXT:    subss %xmm1, %xmm0
  32 ; SSE-NEXT:    retq
  33 ;
  34 ; AVX-LABEL: test_sub_ss:
  35 ; AVX:       # BB#0:
  36 ; AVX-NEXT:    vsubss %xmm1, %xmm0, %xmm0
  37 ; AVX-NEXT:    retq
  38   %1 = extractelement <4 x float> %b, i32 0
  39   %2 = extractelement <4 x float> %a, i32 0
  40   %sub = fsub float %2, %1
  41   %3 = insertelement <4 x float> %a, float %sub, i32 0
  42   ret <4 x float> %3
  43 }
  44
  45 define <4 x float> @test_mul_ss(<4 x float> %a, <4 x float> %b) {
  46 ; SSE-LABEL: test_mul_ss:
  47 ; SSE:       # BB#0:
  48 ; SSE-NEXT:    mulss %xmm1, %xmm0
  49 ; SSE-NEXT:    retq
  50 ;
  51 ; AVX-LABEL: test_mul_ss:
  52 ; AVX:       # BB#0:
  53 ; AVX-NEXT:    vmulss %xmm1, %xmm0, %xmm0
  54 ; AVX-NEXT:    retq
  55   %1 = extractelement <4 x float> %b, i32 0
  56   %2 = extractelement <4 x float> %a, i32 0
  57   %mul = fmul float %2, %1
  58   %3 = insertelement <4 x float> %a, float %mul, i32 0
  59   ret <4 x float> %3
  60 }
  61
  62 define <4 x float> @test_div_ss(<4 x float> %a, <4 x float> %b) {
  63 ; SSE-LABEL: test_div_ss:
  64 ; SSE:       # BB#0:
  65 ; SSE-NEXT:    divss %xmm1, %xmm0
  66 ; SSE-NEXT:    retq
  67 ;
  68 ; AVX-LABEL: test_div_ss:
  69 ; AVX:       # BB#0:
  70 ; AVX-NEXT:    vdivss %xmm1, %xmm0, %xmm0
  71 ; AVX-NEXT:    retq
  72   %1 = extractelement <4 x float> %b, i32 0
  73   %2 = extractelement <4 x float> %a, i32 0
  74   %div = fdiv float %2, %1
  75   %3 = insertelement <4 x float> %a, float %div, i32 0
  76   ret <4 x float> %3
  77 }
  78
  79 define <4 x float> @test_sqrt_ss(<4 x float> %a) {
  80 ; SSE2-LABEL: test_sqrt_ss:
  81 ; SSE2:       # BB#0:
  82 ; SSE2-NEXT:   sqrtss %xmm0, %xmm1
  83 ; SSE2-NEXT:   movss %xmm1, %xmm0
  84 ; SSE2-NEXT:   retq
  85 ;
  86 ; SSE41-LABEL: test_sqrt_ss:
  87 ; SSE41:       # BB#0:
  88 ; SSE41-NEXT:  sqrtss %xmm0, %xmm1
  89 ; SSE41-NEXT:  blendps {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3]
  90 ; SSE41-NEXT:  retq
  91 ;
  92 ; AVX-LABEL: test_sqrt_ss:
  93 ; AVX:       # BB#0:
  94 ; AVX-NEXT:    vsqrtss %xmm0, %xmm0, %xmm1
  95 ; AVX-NEXT:    vblendps {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3]
  96 ; AVX-NEXT:    retq
  97   %1 = extractelement <4 x float> %a, i32 0
  98   %2 = call float @llvm.sqrt.f32(float %1)
  99   %3 = insertelement <4 x float> %a, float %2, i32 0
 100   ret <4 x float> %3
 101 }
 102 declare float @llvm.sqrt.f32(float)
 103
 104 define <2 x double> @test_add_sd(<2 x double> %a, <2 x double> %b) {
 105 ; SSE-LABEL: test_add_sd:
 106 ; SSE:       # BB#0:
 107 ; SSE-NEXT:    addsd %xmm1, %xmm0
 108 ; SSE-NEXT:    retq
 109 ;
 110 ; AVX-LABEL: test_add_sd:
 111 ; AVX:       # BB#0:
 112 ; AVX-NEXT:    vaddsd %xmm1, %xmm0, %xmm0
 113 ; AVX-NEXT:    retq
 114   %1 = extractelement <2 x double> %b, i32 0
 115   %2 = extractelement <2 x double> %a, i32 0
 116   %add = fadd double %2, %1
 117   %3 = insertelement <2 x double> %a, double %add, i32 0
 118   ret <2 x double> %3
 119 }
 120
 121 define <2 x double> @test_sub_sd(<2 x double> %a, <2 x double> %b) {
 122 ; SSE-LABEL: test_sub_sd:
 123 ; SSE:       # BB#0:
 124 ; SSE-NEXT:    subsd %xmm1, %xmm0
 125 ; SSE-NEXT:    retq
 126 ;
 127 ; AVX-LABEL: test_sub_sd:
 128 ; AVX:       # BB#0:
 129 ; AVX-NEXT:    vsubsd %xmm1, %xmm0, %xmm0
 130 ; AVX-NEXT:    retq
 131   %1 = extractelement <2 x double> %b, i32 0
 132   %2 = extractelement <2 x double> %a, i32 0
 133   %sub = fsub double %2, %1
 134   %3 = insertelement <2 x double> %a, double %sub, i32 0
 135   ret <2 x double> %3
 136 }
 137
 138 define <2 x double> @test_mul_sd(<2 x double> %a, <2 x double> %b) {
 139 ; SSE-LABEL: test_mul_sd:
 140 ; SSE:       # BB#0:
 141 ; SSE-NEXT:    mulsd %xmm1, %xmm0
 142 ; SSE-NEXT:    retq
 143 ;
 144 ; AVX-LABEL: test_mul_sd:
 145 ; AVX:       # BB#0:
 146 ; AVX-NEXT:    vmulsd %xmm1, %xmm0, %xmm0
 147 ; AVX-NEXT:    retq
 148   %1 = extractelement <2 x double> %b, i32 0
 149   %2 = extractelement <2 x double> %a, i32 0
 150   %mul = fmul double %2, %1
 151   %3 = insertelement <2 x double> %a, double %mul, i32 0
 152   ret <2 x double> %3
 153 }
 154
 155 define <2 x double> @test_div_sd(<2 x double> %a, <2 x double> %b) {
 156 ; SSE-LABEL: test_div_sd:
 157 ; SSE:       # BB#0:
 158 ; SSE-NEXT:    divsd %xmm1, %xmm0
 159 ; SSE-NEXT:    retq
 160 ;
 161 ; AVX-LABEL: test_div_sd:
 162 ; AVX:       # BB#0:
 163 ; AVX-NEXT:    vdivsd %xmm1, %xmm0, %xmm0
 164 ; AVX-NEXT:    retq
 165   %1 = extractelement <2 x double> %b, i32 0
 166   %2 = extractelement <2 x double> %a, i32 0
 167   %div = fdiv double %2, %1
 168   %3 = insertelement <2 x double> %a, double %div, i32 0
 169   ret <2 x double> %3
 170 }
 171
 172 define <2 x double> @test_sqrt_sd(<2 x double> %a) {
 173 ; SSE-LABEL: test_sqrt_sd:
 174 ; SSE:       # BB#0:
 175 ; SSE-NEXT:    sqrtsd %xmm0, %xmm1
 176 ; SSE-NEXT:    movsd %xmm1, %xmm0
 177 ; SSE-NEXT:    retq
 178 ;
 179 ; AVX-LABEL: test_sqrt_sd:
 180 ; AVX:       # BB#0:
 181 ; AVX-NEXT:    vsqrtsd %xmm0, %xmm0, %xmm1
 182 ; AVX-NEXT:    vmovsd %xmm1, %xmm0, %xmm0
 183 ; AVX-NEXT:    retq
 184   %1 = extractelement <2 x double> %a, i32 0
 185   %2 = call double @llvm.sqrt.f64(double %1)
 186   %3 = insertelement <2 x double> %a, double %2, i32 0
 187   ret <2 x double> %3
 188 }
 189 declare double @llvm.sqrt.f64(double)
 190
 191 define <4 x float> @test2_add_ss(<4 x float> %a, <4 x float> %b) {
 192 ; SSE-LABEL: test2_add_ss:
 193 ; SSE:       # BB#0:
 194 ; SSE-NEXT:    addss %xmm0, %xmm1
 195 ; SSE-NEXT:    movaps %xmm1, %xmm0
 196 ; SSE-NEXT:    retq
 197 ;
 198 ; AVX-LABEL: test2_add_ss:
 199 ; AVX:       # BB#0:
 200 ; AVX-NEXT:    vaddss %xmm0, %xmm1, %xmm0
 201 ; AVX-NEXT:    retq
 202   %1 = extractelement <4 x float> %a, i32 0
 203   %2 = extractelement <4 x float> %b, i32 0
 204   %add = fadd float %1, %2
 205   %3 = insertelement <4 x float> %b, float %add, i32 0
 206   ret <4 x float> %3
 207 }
 208
 209 define <4 x float> @test2_sub_ss(<4 x float> %a, <4 x float> %b) {
 210 ; SSE-LABEL: test2_sub_ss:
 211 ; SSE:       # BB#0:
 212 ; SSE-NEXT:    subss %xmm0, %xmm1
 213 ; SSE-NEXT:    movaps %xmm1, %xmm0
 214 ; SSE-NEXT:    retq
 215 ;
 216 ; AVX-LABEL: test2_sub_ss:
 217 ; AVX:       # BB#0:
 218 ; AVX-NEXT:    vsubss %xmm0, %xmm1, %xmm0
 219 ; AVX-NEXT:    retq
 220   %1 = extractelement <4 x float> %a, i32 0
 221   %2 = extractelement <4 x float> %b, i32 0
 222   %sub = fsub float %2, %1
 223   %3 = insertelement <4 x float> %b, float %sub, i32 0
 224   ret <4 x float> %3
 225 }
 226
 227 define <4 x float> @test2_mul_ss(<4 x float> %a, <4 x float> %b) {
 228 ; SSE-LABEL: test2_mul_ss:
 229 ; SSE:       # BB#0:
 230 ; SSE-NEXT:    mulss %xmm0, %xmm1
 231 ; SSE-NEXT:    movaps %xmm1, %xmm0
 232 ; SSE-NEXT:    retq
 233 ;
 234 ; AVX-LABEL: test2_mul_ss:
 235 ; AVX:       # BB#0:
 236 ; AVX-NEXT:    vmulss %xmm0, %xmm1, %xmm0
 237 ; AVX-NEXT:    retq
 238   %1 = extractelement <4 x float> %a, i32 0
 239   %2 = extractelement <4 x float> %b, i32 0
 240   %mul = fmul float %1, %2
 241   %3 = insertelement <4 x float> %b, float %mul, i32 0
 242   ret <4 x float> %3
 243 }
 244
 245 define <4 x float> @test2_div_ss(<4 x float> %a, <4 x float> %b) {
 246 ; SSE-LABEL: test2_div_ss:
 247 ; SSE:       # BB#0:
 248 ; SSE-NEXT:    divss %xmm0, %xmm1
 249 ; SSE-NEXT:    movaps %xmm1, %xmm0
 250 ; SSE-NEXT:    retq
 251 ;
 252 ; AVX-LABEL: test2_div_ss:
 253 ; AVX:       # BB#0:
 254 ; AVX-NEXT:    vdivss %xmm0, %xmm1, %xmm0
 255 ; AVX-NEXT:    retq
 256   %1 = extractelement <4 x float> %a, i32 0
 257   %2 = extractelement <4 x float> %b, i32 0
 258   %div = fdiv float %2, %1
 259   %3 = insertelement <4 x float> %b, float %div, i32 0
 260   ret <4 x float> %3
 261 }
 262
 263 define <2 x double> @test2_add_sd(<2 x double> %a, <2 x double> %b) {
 264 ; SSE-LABEL: test2_add_sd:
 265 ; SSE:       # BB#0:
 266 ; SSE-NEXT:    addsd %xmm0, %xmm1
 267 ; SSE-NEXT:    movapd %xmm1, %xmm0
 268 ; SSE-NEXT:    retq
 269 ;
 270 ; AVX-LABEL: test2_add_sd:
 271 ; AVX:       # BB#0:
 272 ; AVX-NEXT:    vaddsd %xmm0, %xmm1, %xmm0
 273 ; AVX-NEXT:    retq
 274   %1 = extractelement <2 x double> %a, i32 0
 275   %2 = extractelement <2 x double> %b, i32 0
 276   %add = fadd double %1, %2
 277   %3 = insertelement <2 x double> %b, double %add, i32 0
 278   ret <2 x double> %3
 279 }
 280
 281 define <2 x double> @test2_sub_sd(<2 x double> %a, <2 x double> %b) {
 282 ; SSE-LABEL: test2_sub_sd:
 283 ; SSE:       # BB#0:
 284 ; SSE-NEXT:    subsd %xmm0, %xmm1
 285 ; SSE-NEXT:    movapd %xmm1, %xmm0
 286 ; SSE-NEXT:    retq
 287 ;
 288 ; AVX-LABEL: test2_sub_sd:
 289 ; AVX:       # BB#0:
 290 ; AVX-NEXT:    vsubsd %xmm0, %xmm1, %xmm0
 291 ; AVX-NEXT:    retq
 292   %1 = extractelement <2 x double> %a, i32 0
 293   %2 = extractelement <2 x double> %b, i32 0
 294   %sub = fsub double %2, %1
 295   %3 = insertelement <2 x double> %b, double %sub, i32 0
 296   ret <2 x double> %3
 297 }
 298
 299 define <2 x double> @test2_mul_sd(<2 x double> %a, <2 x double> %b) {
 300 ; SSE-LABEL: test2_mul_sd:
 301 ; SSE:       # BB#0:
 302 ; SSE-NEXT:    mulsd %xmm0, %xmm1
 303 ; SSE-NEXT:    movapd %xmm1, %xmm0
 304 ; SSE-NEXT:    retq
 305 ;
 306 ; AVX-LABEL: test2_mul_sd:
 307 ; AVX:       # BB#0:
 308 ; AVX-NEXT:    vmulsd %xmm0, %xmm1, %xmm0
 309 ; AVX-NEXT:    retq
 310   %1 = extractelement <2 x double> %a, i32 0
 311   %2 = extractelement <2 x double> %b, i32 0
 312   %mul = fmul double %1, %2
 313   %3 = insertelement <2 x double> %b, double %mul, i32 0
 314   ret <2 x double> %3
 315 }
 316
 317 define <2 x double> @test2_div_sd(<2 x double> %a, <2 x double> %b) {
 318 ; SSE-LABEL: test2_div_sd:
 319 ; SSE:       # BB#0:
 320 ; SSE-NEXT:    divsd %xmm0, %xmm1
 321 ; SSE-NEXT:    movapd %xmm1, %xmm0
 322 ; SSE-NEXT:    retq
 323 ;
 324 ; AVX-LABEL: test2_div_sd:
 325 ; AVX:       # BB#0:
 326 ; AVX-NEXT:    vdivsd %xmm0, %xmm1, %xmm0
 327 ; AVX-NEXT:    retq
 328   %1 = extractelement <2 x double> %a, i32 0
 329   %2 = extractelement <2 x double> %b, i32 0
 330   %div = fdiv double %2, %1
 331   %3 = insertelement <2 x double> %b, double %div, i32 0
 332   ret <2 x double> %3
 333 }
 334
 335 define <4 x float> @test_multiple_add_ss(<4 x float> %a, <4 x float> %b) {
 336 ; SSE-LABEL: test_multiple_add_ss:
 337 ; SSE:       # BB#0:
 338 ; SSE-NEXT:    addss %xmm0, %xmm1
 339 ; SSE-NEXT:    addss %xmm1, %xmm0
 340 ; SSE-NEXT:    retq
 341 ;
 342 ; AVX-LABEL: test_multiple_add_ss:
 343 ; AVX:       # BB#0:
 344 ; AVX-NEXT:    vaddss %xmm1, %xmm0, %xmm1
 345 ; AVX-NEXT:    vaddss %xmm1, %xmm0, %xmm0
 346 ; AVX-NEXT:    retq
 347   %1 = extractelement <4 x float> %b, i32 0
 348   %2 = extractelement <4 x float> %a, i32 0
 349   %add = fadd float %2, %1
 350   %add2 = fadd float %2, %add
 351   %3 = insertelement <4 x float> %a, float %add2, i32 0
 352   ret <4 x float> %3
 353 }
 354
 355 define <4 x float> @test_multiple_sub_ss(<4 x float> %a, <4 x float> %b) {
 356 ; SSE-LABEL: test_multiple_sub_ss:
 357 ; SSE:       # BB#0:
 358 ; SSE-NEXT:    movaps %xmm0, %xmm2
 359 ; SSE-NEXT:    subss %xmm1, %xmm2
 360 ; SSE-NEXT:    subss %xmm2, %xmm0
 361 ; SSE-NEXT:    retq
 362 ;
 363 ; AVX-LABEL: test_multiple_sub_ss:
 364 ; AVX:       # BB#0:
 365 ; AVX-NEXT:    vsubss %xmm1, %xmm0, %xmm1
 366 ; AVX-NEXT:    vsubss %xmm1, %xmm0, %xmm0
 367 ; AVX-NEXT:    retq
 368   %1 = extractelement <4 x float> %b, i32 0
 369   %2 = extractelement <4 x float> %a, i32 0
 370   %sub = fsub float %2, %1
 371   %sub2 = fsub float %2, %sub
 372   %3 = insertelement <4 x float> %a, float %sub2, i32 0
 373   ret <4 x float> %3
 374 }
 375
 376 define <4 x float> @test_multiple_mul_ss(<4 x float> %a, <4 x float> %b) {
 377 ; SSE-LABEL: test_multiple_mul_ss:
 378 ; SSE:       # BB#0:
 379 ; SSE-NEXT:    mulss %xmm0, %xmm1
 380 ; SSE-NEXT:    mulss %xmm1, %xmm0
 381 ; SSE-NEXT:    retq
 382 ;
 383 ; AVX-LABEL: test_multiple_mul_ss:
 384 ; AVX:       # BB#0:
 385 ; AVX-NEXT:    vmulss %xmm1, %xmm0, %xmm1
 386 ; AVX-NEXT:    vmulss %xmm1, %xmm0, %xmm0
 387 ; AVX-NEXT:    retq
 388   %1 = extractelement <4 x float> %b, i32 0
 389   %2 = extractelement <4 x float> %a, i32 0
 390   %mul = fmul float %2, %1
 391   %mul2 = fmul float %2, %mul
 392   %3 = insertelement <4 x float> %a, float %mul2, i32 0
 393   ret <4 x float> %3
 394 }
 395
 396 define <4 x float> @test_multiple_div_ss(<4 x float> %a, <4 x float> %b) {
 397 ; SSE-LABEL: test_multiple_div_ss:
 398 ; SSE:       # BB#0:
 399 ; SSE-NEXT:    movaps %xmm0, %xmm2
 400 ; SSE-NEXT:    divss %xmm1, %xmm2
 401 ; SSE-NEXT:    divss %xmm2, %xmm0
 402 ; SSE-NEXT:    retq
 403 ;
 404 ; AVX-LABEL: test_multiple_div_ss:
 405 ; AVX:       # BB#0:
 406 ; AVX-NEXT:    vdivss %xmm1, %xmm0, %xmm1
 407 ; AVX-NEXT:    vdivss %xmm1, %xmm0, %xmm0
 408 ; AVX-NEXT:    retq
 409   %1 = extractelement <4 x float> %b, i32 0
 410   %2 = extractelement <4 x float> %a, i32 0
 411   %div = fdiv float %2, %1
 412   %div2 = fdiv float %2, %div
 413   %3 = insertelement <4 x float> %a, float %div2, i32 0
 414   ret <4 x float> %3
 415 }
 416
 417 ; With SSE4.1 or greater, the shuffles in the following tests may
 418 ; be lowered to X86Blendi nodes.
 419
 420 define <4 x float> @blend_add_ss(<4 x float> %a, float %b) {
 421 ; SSE-LABEL: blend_add_ss:
 422 ; SSE:       # BB#0:
 423 ; SSE-NEXT:    addss %xmm1, %xmm0
 424 ; SSE-NEXT:    retq
 425 ;
 426 ; AVX-LABEL: blend_add_ss:
 427 ; AVX:       # BB#0:
 428 ; AVX-NEXT:    vaddss %xmm1, %xmm0, %xmm0
 429 ; AVX-NEXT:    retq
 430
 431   %ext = extractelement <4 x float> %a, i32 0
 432   %op = fadd float %b, %ext
 433   %ins = insertelement <4 x float> undef, float %op, i32 0
 434   %shuf = shufflevector <4 x float> %ins, <4 x float> %a, <4 x i32> <i32 0, i32 5, i32 6, i32 7>
 435   ret <4 x float> %shuf
 436 }
 437
 438 define <4 x float> @blend_sub_ss(<4 x float> %a, float %b) {
 439 ; SSE-LABEL: blend_sub_ss:
 440 ; SSE:       # BB#0:
 441 ; SSE-NEXT:    subss %xmm1, %xmm0
 442 ; SSE-NEXT:    retq
 443 ;
 444 ; AVX-LABEL: blend_sub_ss:
 445 ; AVX:       # BB#0:
 446 ; AVX-NEXT:    vsubss %xmm1, %xmm0, %xmm0
 447 ; AVX-NEXT:    retq
 448
 449   %ext = extractelement <4 x float> %a, i32 0
 450   %op = fsub float %ext, %b
 451   %ins = insertelement <4 x float> undef, float %op, i32 0
 452   %shuf = shufflevector <4 x float> %ins, <4 x float> %a, <4 x i32> <i32 0, i32 5, i32 6, i32 7>
 453   ret <4 x float> %shuf
 454 }
 455
 456 define <4 x float> @blend_mul_ss(<4 x float> %a, float %b) {
 457 ; SSE-LABEL: blend_mul_ss:
 458 ; SSE:       # BB#0:
 459 ; SSE-NEXT:    mulss %xmm1, %xmm0
 460 ; SSE-NEXT:    retq
 461 ;
 462 ; AVX-LABEL: blend_mul_ss:
 463 ; AVX:       # BB#0:
 464 ; AVX-NEXT:    vmulss %xmm1, %xmm0, %xmm0
 465 ; AVX-NEXT:    retq
 466
 467   %ext = extractelement <4 x float> %a, i32 0
 468   %op = fmul float %b, %ext
 469   %ins = insertelement <4 x float> undef, float %op, i32 0
 470   %shuf = shufflevector <4 x float> %ins, <4 x float> %a, <4 x i32> <i32 0, i32 5, i32 6, i32 7>
 471   ret <4 x float> %shuf
 472 }
 473
 474 define <4 x float> @blend_div_ss(<4 x float> %a, float %b) {
 475 ; SSE-LABEL: blend_div_ss:
 476 ; SSE:       # BB#0:
 477 ; SSE-NEXT:    divss %xmm1, %xmm0
 478 ; SSE-NEXT:    retq
 479 ;
 480 ; AVX-LABEL: blend_div_ss:
 481 ; AVX:       # BB#0:
 482 ; AVX-NEXT:    vdivss %xmm1, %xmm0, %xmm0
 483 ; AVX-NEXT:    retq
 484
 485   %ext = extractelement <4 x float> %a, i32 0
 486   %op = fdiv float %ext, %b
 487   %ins = insertelement <4 x float> undef, float %op, i32 0
 488   %shuf = shufflevector <4 x float> %ins, <4 x float> %a, <4 x i32> <i32 0, i32 5, i32 6, i32 7>
 489   ret <4 x float> %shuf
 490 }
 491
 492 define <2 x double> @blend_add_sd(<2 x double> %a, double %b) {
 493 ; SSE-LABEL: blend_add_sd:
 494 ; SSE:       # BB#0:
 495 ; SSE-NEXT:    addsd %xmm1, %xmm0
 496 ; SSE-NEXT:    retq
 497 ;
 498 ; AVX-LABEL: blend_add_sd:
 499 ; AVX:       # BB#0:
 500 ; AVX-NEXT:    vaddsd %xmm1, %xmm0, %xmm0
 501 ; AVX-NEXT:    retq
 502
 503   %ext = extractelement <2 x double> %a, i32 0
 504   %op = fadd double %b, %ext
 505   %ins = insertelement <2 x double> undef, double %op, i32 0
 506   %shuf = shufflevector <2 x double> %ins, <2 x double> %a, <2 x i32> <i32 0, i32 3>
 507   ret <2 x double> %shuf
 508 }
 509
 510 define <2 x double> @blend_sub_sd(<2 x double> %a, double %b) {
 511 ; SSE-LABEL: blend_sub_sd:
 512 ; SSE:       # BB#0:
 513 ; SSE-NEXT:    subsd %xmm1, %xmm0
 514 ; SSE-NEXT:    retq
 515 ;
 516 ; AVX-LABEL: blend_sub_sd:
 517 ; AVX:       # BB#0:
 518 ; AVX-NEXT:    vsubsd %xmm1, %xmm0, %xmm0
 519 ; AVX-NEXT:    retq
 520
 521   %ext = extractelement <2 x double> %a, i32 0
 522   %op = fsub double %ext, %b
 523   %ins = insertelement <2 x double> undef, double %op, i32 0
 524   %shuf = shufflevector <2 x double> %ins, <2 x double> %a, <2 x i32> <i32 0, i32 3>
 525   ret <2 x double> %shuf
 526 }
 527
 528 define <2 x double> @blend_mul_sd(<2 x double> %a, double %b) {
 529 ; SSE-LABEL: blend_mul_sd:
 530 ; SSE:       # BB#0:
 531 ; SSE-NEXT:    mulsd %xmm1, %xmm0
 532 ; SSE-NEXT:    retq
 533 ;
 534 ; AVX-LABEL: blend_mul_sd:
 535 ; AVX:       # BB#0:
 536 ; AVX-NEXT:    vmulsd %xmm1, %xmm0, %xmm0
 537 ; AVX-NEXT:    retq
 538
 539   %ext = extractelement <2 x double> %a, i32 0
 540   %op = fmul double %b, %ext
 541   %ins = insertelement <2 x double> undef, double %op, i32 0
 542   %shuf = shufflevector <2 x double> %ins, <2 x double> %a, <2 x i32> <i32 0, i32 3>
 543   ret <2 x double> %shuf
 544 }
 545
 546 define <2 x double> @blend_div_sd(<2 x double> %a, double %b) {
 547 ; SSE-LABEL: blend_div_sd:
 548 ; SSE:       # BB#0:
 549 ; SSE-NEXT:    divsd %xmm1, %xmm0
 550 ; SSE-NEXT:    retq
 551 ;
 552 ; AVX-LABEL: blend_div_sd:
 553 ; AVX:       # BB#0:
 554 ; AVX-NEXT:    vdivsd %xmm1, %xmm0, %xmm0
 555 ; AVX-NEXT:    retq
 556
 557   %ext = extractelement <2 x double> %a, i32 0
 558   %op = fdiv double %ext, %b
 559   %ins = insertelement <2 x double> undef, double %op, i32 0
 560   %shuf = shufflevector <2 x double> %ins, <2 x double> %a, <2 x i32> <i32 0, i32 3>
 561   ret <2 x double> %shuf
 562 }
 563
 564 ; Ensure that the backend selects SSE/AVX scalar fp instructions
 565 ; from a packed fp instruction plus a vector insert.
 566
 567 define <4 x float> @insert_test_add_ss(<4 x float> %a, <4 x float> %b) {
 568 ; SSE-LABEL: insert_test_add_ss:
 569 ; SSE:       # BB#0:
 570 ; SSE-NEXT:    addss %xmm1, %xmm0
 571 ; SSE-NEXT:    retq
 572 ;
 573 ; AVX-LABEL: insert_test_add_ss:
 574 ; AVX:       # BB#0:
 575 ; AVX-NEXT:    vaddss %xmm1, %xmm0, %xmm0
 576 ; AVX-NEXT:    retq
 577   %1 = fadd <4 x float> %a, %b
 578   %2 = shufflevector <4 x float> %1, <4 x float> %a, <4 x i32> <i32 0, i32 5, i32 6, i32 7>
 579   ret <4 x float> %2
 580 }
 581
 582 define <4 x float> @insert_test_sub_ss(<4 x float> %a, <4 x float> %b) {
 583 ; SSE-LABEL: insert_test_sub_ss:
 584 ; SSE:       # BB#0:
 585 ; SSE-NEXT:    subss %xmm1, %xmm0
 586 ; SSE-NEXT:    retq
 587 ;
 588 ; AVX-LABEL: insert_test_sub_ss:
 589 ; AVX:       # BB#0:
 590 ; AVX-NEXT:    vsubss %xmm1, %xmm0, %xmm0
 591 ; AVX-NEXT:    retq
 592   %1 = fsub <4 x float> %a, %b
 593   %2 = shufflevector <4 x float> %1, <4 x float> %a, <4 x i32> <i32 0, i32 5, i32 6, i32 7>
 594   ret <4 x float> %2
 595 }
 596
 597 define <4 x float> @insert_test_mul_ss(<4 x float> %a, <4 x float> %b) {
 598 ; SSE-LABEL: insert_test_mul_ss:
 599 ; SSE:       # BB#0:
 600 ; SSE-NEXT:    mulss %xmm1, %xmm0
 601 ; SSE-NEXT:    retq
 602 ;
 603 ; AVX-LABEL: insert_test_mul_ss:
 604 ; AVX:       # BB#0:
 605 ; AVX-NEXT:    vmulss %xmm1, %xmm0, %xmm0
 606 ; AVX-NEXT:    retq
 607   %1 = fmul <4 x float> %a, %b
 608   %2 = shufflevector <4 x float> %1, <4 x float> %a, <4 x i32> <i32 0, i32 5, i32 6, i32 7>
 609   ret <4 x float> %2
 610 }
 611
 612 define <4 x float> @insert_test_div_ss(<4 x float> %a, <4 x float> %b) {
 613 ; SSE-LABEL: insert_test_div_ss:
 614 ; SSE:       # BB#0:
 615 ; SSE-NEXT:    divss %xmm1, %xmm0
 616 ; SSE-NEXT:    retq
 617 ;
 618 ; AVX-LABEL: insert_test_div_ss:
 619 ; AVX:       # BB#0:
 620 ; AVX-NEXT:    vdivss %xmm1, %xmm0, %xmm0
 621 ; AVX-NEXT:    retq
 622   %1 = fdiv <4 x float> %a, %b
 623   %2 = shufflevector <4 x float> %1, <4 x float> %a, <4 x i32> <i32 0, i32 5, i32 6, i32 7>
 624   ret <4 x float> %2
 625 }
 626
 627 define <2 x double> @insert_test_add_sd(<2 x double> %a, <2 x double> %b) {
 628 ; SSE-LABEL: insert_test_add_sd:
 629 ; SSE:       # BB#0:
 630 ; SSE-NEXT:    addsd %xmm1, %xmm0
 631 ; SSE-NEXT:    retq
 632 ;
 633 ; AVX-LABEL: insert_test_add_sd:
 634 ; AVX:       # BB#0:
 635 ; AVX-NEXT:    vaddsd %xmm1, %xmm0, %xmm0
 636 ; AVX-NEXT:    retq
 637   %1 = fadd <2 x double> %a, %b
 638   %2 = shufflevector <2 x double> %1, <2 x double> %a, <2 x i32> <i32 0, i32 3>
 639   ret <2 x double> %2
 640 }
 641
 642 define <2 x double> @insert_test_sub_sd(<2 x double> %a, <2 x double> %b) {
 643 ; SSE-LABEL: insert_test_sub_sd:
 644 ; SSE:       # BB#0:
 645 ; SSE-NEXT:    subsd %xmm1, %xmm0
 646 ; SSE-NEXT:    retq
 647 ;
 648 ; AVX-LABEL: insert_test_sub_sd:
 649 ; AVX:       # BB#0:
 650 ; AVX-NEXT:    vsubsd %xmm1, %xmm0, %xmm0
 651 ; AVX-NEXT:    retq
 652   %1 = fsub <2 x double> %a, %b
 653   %2 = shufflevector <2 x double> %1, <2 x double> %a, <2 x i32> <i32 0, i32 3>
 654   ret <2 x double> %2
 655 }
 656
 657 define <2 x double> @insert_test_mul_sd(<2 x double> %a, <2 x double> %b) {
 658 ; SSE-LABEL: insert_test_mul_sd:
 659 ; SSE:       # BB#0:
 660 ; SSE-NEXT:    mulsd %xmm1, %xmm0
 661 ; SSE-NEXT:    retq
 662 ;
 663 ; AVX-LABEL: insert_test_mul_sd:
 664 ; AVX:       # BB#0:
 665 ; AVX-NEXT:    vmulsd %xmm1, %xmm0, %xmm0
 666 ; AVX-NEXT:    retq
 667   %1 = fmul <2 x double> %a, %b
 668   %2 = shufflevector <2 x double> %1, <2 x double> %a, <2 x i32> <i32 0, i32 3>
 669   ret <2 x double> %2
 670 }
 671
 672 define <2 x double> @insert_test_div_sd(<2 x double> %a, <2 x double> %b) {
 673 ; SSE-LABEL: insert_test_div_sd:
 674 ; SSE:       # BB#0:
 675 ; SSE-NEXT:    divsd %xmm1, %xmm0
 676 ; SSE-NEXT:    retq
 677 ;
 678 ; AVX-LABEL: insert_test_div_sd:
 679 ; AVX:       # BB#0:
 680 ; AVX-NEXT:    vdivsd %xmm1, %xmm0, %xmm0
 681 ; AVX-NEXT:    retq
 682   %1 = fdiv <2 x double> %a, %b
 683   %2 = shufflevector <2 x double> %1, <2 x double> %a, <2 x i32> <i32 0, i32 3>
 684   ret <2 x double> %2
 685 }
 686
 687 define <4 x float> @insert_test2_add_ss(<4 x float> %a, <4 x float> %b) {
 688 ; SSE-LABEL: insert_test2_add_ss:
 689 ; SSE:       # BB#0:
 690 ; SSE-NEXT:    addss %xmm0, %xmm1
 691 ; SSE-NEXT:    movaps %xmm1, %xmm0
 692 ; SSE-NEXT:    retq
 693 ;
 694 ; AVX-LABEL: insert_test2_add_ss:
 695 ; AVX:       # BB#0:
 696 ; AVX-NEXT:    vaddss %xmm0, %xmm1, %xmm0
 697 ; AVX-NEXT:    retq
 698   %1 = fadd <4 x float> %b, %a
 699   %2 = shufflevector <4 x float> %1, <4 x float> %b, <4 x i32> <i32 0, i32 5, i32 6, i32 7>
 700   ret <4 x float> %2
 701 }
 702
 703 define <4 x float> @insert_test2_sub_ss(<4 x float> %a, <4 x float> %b) {
 704 ; SSE-LABEL: insert_test2_sub_ss:
 705 ; SSE:       # BB#0:
 706 ; SSE-NEXT:    subss %xmm0, %xmm1
 707 ; SSE-NEXT:    movaps %xmm1, %xmm0
 708 ; SSE-NEXT:    retq
 709 ;
 710 ; AVX-LABEL: insert_test2_sub_ss:
 711 ; AVX:       # BB#0:
 712 ; AVX-NEXT:    vsubss %xmm0, %xmm1, %xmm0
 713 ; AVX-NEXT:    retq
 714   %1 = fsub <4 x float> %b, %a
 715   %2 = shufflevector <4 x float> %1, <4 x float> %b, <4 x i32> <i32 0, i32 5, i32 6, i32 7>
 716   ret <4 x float> %2
 717 }
 718
 719 define <4 x float> @insert_test2_mul_ss(<4 x float> %a, <4 x float> %b) {
 720 ; SSE-LABEL: insert_test2_mul_ss:
 721 ; SSE:       # BB#0:
 722 ; SSE-NEXT:    mulss %xmm0, %xmm1
 723 ; SSE-NEXT:    movaps %xmm1, %xmm0
 724 ; SSE-NEXT:    retq
 725 ;
 726 ; AVX-LABEL: insert_test2_mul_ss:
 727 ; AVX:       # BB#0:
 728 ; AVX-NEXT:    vmulss %xmm0, %xmm1, %xmm0
 729 ; AVX-NEXT:    retq
 730   %1 = fmul <4 x float> %b, %a
 731   %2 = shufflevector <4 x float> %1, <4 x float> %b, <4 x i32> <i32 0, i32 5, i32 6, i32 7>
 732   ret <4 x float> %2
 733 }
 734
 735 define <4 x float> @insert_test2_div_ss(<4 x float> %a, <4 x float> %b) {
 736 ; SSE-LABEL: insert_test2_div_ss:
 737 ; SSE:       # BB#0:
 738 ; SSE-NEXT:    divss %xmm0, %xmm1
 739 ; SSE-NEXT:    movaps %xmm1, %xmm0
 740 ; SSE-NEXT:    retq
 741 ;
 742 ; AVX-LABEL: insert_test2_div_ss:
 743 ; AVX:       # BB#0:
 744 ; AVX-NEXT:    vdivss %xmm0, %xmm1, %xmm0
 745 ; AVX-NEXT:    retq
 746   %1 = fdiv <4 x float> %b, %a
 747   %2 = shufflevector <4 x float> %1, <4 x float> %b, <4 x i32> <i32 0, i32 5, i32 6, i32 7>
 748   ret <4 x float> %2
 749 }
 750
 751 define <2 x double> @insert_test2_add_sd(<2 x double> %a, <2 x double> %b) {
 752 ; SSE-LABEL: insert_test2_add_sd:
 753 ; SSE:       # BB#0:
 754 ; SSE-NEXT:    addsd %xmm0, %xmm1
 755 ; SSE-NEXT:    movapd %xmm1, %xmm0
 756 ; SSE-NEXT:    retq
 757 ;
 758 ; AVX-LABEL: insert_test2_add_sd:
 759 ; AVX:       # BB#0:
 760 ; AVX-NEXT:    vaddsd %xmm0, %xmm1, %xmm0
 761 ; AVX-NEXT:    retq
 762   %1 = fadd <2 x double> %b, %a
 763   %2 = shufflevector <2 x double> %1, <2 x double> %b, <2 x i32> <i32 0, i32 3>
 764   ret <2 x double> %2
 765 }
 766
 767 define <2 x double> @insert_test2_sub_sd(<2 x double> %a, <2 x double> %b) {
 768 ; SSE-LABEL: insert_test2_sub_sd:
 769 ; SSE:       # BB#0:
 770 ; SSE-NEXT:    subsd %xmm0, %xmm1
 771 ; SSE-NEXT:    movapd %xmm1, %xmm0
 772 ; SSE-NEXT:    retq
 773 ;
 774 ; AVX-LABEL: insert_test2_sub_sd:
 775 ; AVX:       # BB#0:
 776 ; AVX-NEXT:    vsubsd %xmm0, %xmm1, %xmm0
 777 ; AVX-NEXT:    retq
 778   %1 = fsub <2 x double> %b, %a
 779   %2 = shufflevector <2 x double> %1, <2 x double> %b, <2 x i32> <i32 0, i32 3>
 780   ret <2 x double> %2
 781 }
 782
 783 define <2 x double> @insert_test2_mul_sd(<2 x double> %a, <2 x double> %b) {
 784 ; SSE-LABEL: insert_test2_mul_sd:
 785 ; SSE:       # BB#0:
 786 ; SSE-NEXT:    mulsd %xmm0, %xmm1
 787 ; SSE-NEXT:    movapd %xmm1, %xmm0
 788 ; SSE-NEXT:    retq
 789 ;
 790 ; AVX-LABEL: insert_test2_mul_sd:
 791 ; AVX:       # BB#0:
 792 ; AVX-NEXT:    vmulsd %xmm0, %xmm1, %xmm0
 793 ; AVX-NEXT:    retq
 794   %1 = fmul <2 x double> %b, %a
 795   %2 = shufflevector <2 x double> %1, <2 x double> %b, <2 x i32> <i32 0, i32 3>
 796   ret <2 x double> %2
 797 }
 798
 799 define <2 x double> @insert_test2_div_sd(<2 x double> %a, <2 x double> %b) {
 800 ; SSE-LABEL: insert_test2_div_sd:
 801 ; SSE:       # BB#0:
 802 ; SSE-NEXT:    divsd %xmm0, %xmm1
 803 ; SSE-NEXT:    movapd %xmm1, %xmm0
 804 ; SSE-NEXT:    retq
 805 ;
 806 ; AVX-LABEL: insert_test2_div_sd:
 807 ; AVX:       # BB#0:
 808 ; AVX-NEXT:    vdivsd %xmm0, %xmm1, %xmm0
 809 ; AVX-NEXT:    retq
 810   %1 = fdiv <2 x double> %b, %a
 811   %2 = shufflevector <2 x double> %1, <2 x double> %b, <2 x i32> <i32 0, i32 3>
 812   ret <2 x double> %2
 813 }
 814
 815 define <4 x float> @insert_test3_add_ss(<4 x float> %a, <4 x float> %b) {
 816 ; SSE-LABEL: insert_test3_add_ss:
 817 ; SSE:       # BB#0:
 818 ; SSE-NEXT:    addss %xmm1, %xmm0
 819 ; SSE-NEXT:    retq
 820 ;
 821 ; AVX-LABEL: insert_test3_add_ss:
 822 ; AVX:       # BB#0:
 823 ; AVX-NEXT:    vaddss %xmm1, %xmm0, %xmm0
 824 ; AVX-NEXT:    retq
 825   %1 = fadd <4 x float> %a, %b
 826   %2 = select <4 x i1> <i1 false, i1 true, i1 true, i1 true>, <4 x float> %a, <4 x float> %1
 827   ret <4 x float> %2
 828 }
 829
 830 define <4 x float> @insert_test3_sub_ss(<4 x float> %a, <4 x float> %b) {
 831 ; SSE-LABEL: insert_test3_sub_ss:
 832 ; SSE:       # BB#0:
 833 ; SSE-NEXT:    subss %xmm1, %xmm0
 834 ; SSE-NEXT:    retq
 835 ;
 836 ; AVX-LABEL: insert_test3_sub_ss:
 837 ; AVX:       # BB#0:
 838 ; AVX-NEXT:    vsubss %xmm1, %xmm0, %xmm0
 839 ; AVX-NEXT:    retq
 840   %1 = fsub <4 x float> %a, %b
 841   %2 = select <4 x i1> <i1 false, i1 true, i1 true, i1 true>, <4 x float> %a, <4 x float> %1
 842   ret <4 x float> %2
 843 }
 844
 845 define <4 x float> @insert_test3_mul_ss(<4 x float> %a, <4 x float> %b) {
 846 ; SSE-LABEL: insert_test3_mul_ss:
 847 ; SSE:       # BB#0:
 848 ; SSE-NEXT:    mulss %xmm1, %xmm0
 849 ; SSE-NEXT:    retq
 850 ;
 851 ; AVX-LABEL: insert_test3_mul_ss:
 852 ; AVX:       # BB#0:
 853 ; AVX-NEXT:    vmulss %xmm1, %xmm0, %xmm0
 854 ; AVX-NEXT:    retq
 855   %1 = fmul <4 x float> %a, %b
 856   %2 = select <4 x i1> <i1 false, i1 true, i1 true, i1 true>, <4 x float> %a, <4 x float> %1
 857   ret <4 x float> %2
 858 }
 859
 860 define <4 x float> @insert_test3_div_ss(<4 x float> %a, <4 x float> %b) {
 861 ; SSE-LABEL: insert_test3_div_ss:
 862 ; SSE:       # BB#0:
 863 ; SSE-NEXT:    divss %xmm1, %xmm0
 864 ; SSE-NEXT:    retq
 865 ;
 866 ; AVX-LABEL: insert_test3_div_ss:
 867 ; AVX:       # BB#0:
 868 ; AVX-NEXT:    vdivss %xmm1, %xmm0, %xmm0
 869 ; AVX-NEXT:    retq
 870   %1 = fdiv <4 x float> %a, %b
 871   %2 = select <4 x i1> <i1 false, i1 true, i1 true, i1 true>, <4 x float> %a, <4 x float> %1
 872   ret <4 x float> %2
 873 }
 874
 875 define <2 x double> @insert_test3_add_sd(<2 x double> %a, <2 x double> %b) {
 876 ; SSE-LABEL: insert_test3_add_sd:
 877 ; SSE:       # BB#0:
 878 ; SSE-NEXT:    addsd %xmm1, %xmm0
 879 ; SSE-NEXT:    retq
 880 ;
 881 ; AVX-LABEL: insert_test3_add_sd:
 882 ; AVX:       # BB#0:
 883 ; AVX-NEXT:    vaddsd %xmm1, %xmm0, %xmm0
 884 ; AVX-NEXT:    retq
 885   %1 = fadd <2 x double> %a, %b
 886   %2 = select <2 x i1> <i1 false, i1 true>, <2 x double> %a, <2 x double> %1
 887   ret <2 x double> %2
 888 }
 889
 890 define <2 x double> @insert_test3_sub_sd(<2 x double> %a, <2 x double> %b) {
 891 ; SSE-LABEL: insert_test3_sub_sd:
 892 ; SSE:       # BB#0:
 893 ; SSE-NEXT:    subsd %xmm1, %xmm0
 894 ; SSE-NEXT:    retq
 895 ;
 896 ; AVX-LABEL: insert_test3_sub_sd:
 897 ; AVX:       # BB#0:
 898 ; AVX-NEXT:    vsubsd %xmm1, %xmm0, %xmm0
 899 ; AVX-NEXT:    retq
 900   %1 = fsub <2 x double> %a, %b
 901   %2 = select <2 x i1> <i1 false, i1 true>, <2 x double> %a, <2 x double> %1
 902   ret <2 x double> %2
 903 }
 904
 905 define <2 x double> @insert_test3_mul_sd(<2 x double> %a, <2 x double> %b) {
 906 ; SSE-LABEL: insert_test3_mul_sd:
 907 ; SSE:       # BB#0:
 908 ; SSE-NEXT:    mulsd %xmm1, %xmm0
 909 ; SSE-NEXT:    retq
 910 ;
 911 ; AVX-LABEL: insert_test3_mul_sd:
 912 ; AVX:       # BB#0:
 913 ; AVX-NEXT:    vmulsd %xmm1, %xmm0, %xmm0
 914 ; AVX-NEXT:    retq
 915   %1 = fmul <2 x double> %a, %b
 916   %2 = select <2 x i1> <i1 false, i1 true>, <2 x double> %a, <2 x double> %1
 917   ret <2 x double> %2
 918 }
 919
 920 define <2 x double> @insert_test3_div_sd(<2 x double> %a, <2 x double> %b) {
 921 ; SSE-LABEL: insert_test3_div_sd:
 922 ; SSE:       # BB#0:
 923 ; SSE-NEXT:    divsd %xmm1, %xmm0
 924 ; SSE-NEXT:    retq
 925 ;
 926 ; AVX-LABEL: insert_test3_div_sd:
 927 ; AVX:       # BB#0:
 928 ; AVX-NEXT:    vdivsd %xmm1, %xmm0, %xmm0
 929 ; AVX-NEXT:    retq
 930   %1 = fdiv <2 x double> %a, %b
 931   %2 = select <2 x i1> <i1 false, i1 true>, <2 x double> %a, <2 x double> %1
 932   ret <2 x double> %2
 933 }
 934
 935 define <4 x float> @insert_test4_add_ss(<4 x float> %a, <4 x float> %b) {
 936 ; SSE-LABEL: insert_test4_add_ss:
 937 ; SSE:       # BB#0:
 938 ; SSE-NEXT:    addss %xmm0, %xmm1
 939 ; SSE-NEXT:    movaps %xmm1, %xmm0
 940 ; SSE-NEXT:    retq
 941 ;
 942 ; AVX-LABEL: insert_test4_add_ss:
 943 ; AVX:       # BB#0:
 944 ; AVX-NEXT:    vaddss %xmm0, %xmm1, %xmm0
 945 ; AVX-NEXT:    retq
 946   %1 = fadd <4 x float> %b, %a
 947   %2 = select <4 x i1> <i1 false, i1 true, i1 true, i1 true>, <4 x float> %b, <4 x float> %1
 948   ret <4 x float> %2
 949 }
 950
 951 define <4 x float> @insert_test4_sub_ss(<4 x float> %a, <4 x float> %b) {
 952 ; SSE-LABEL: insert_test4_sub_ss:
 953 ; SSE:       # BB#0:
 954 ; SSE-NEXT:    subss %xmm0, %xmm1
 955 ; SSE-NEXT:    movaps %xmm1, %xmm0
 956 ; SSE-NEXT:    retq
 957 ;
 958 ; AVX-LABEL: insert_test4_sub_ss:
 959 ; AVX:       # BB#0:
 960 ; AVX-NEXT:    vsubss %xmm0, %xmm1, %xmm0
 961 ; AVX-NEXT:    retq
 962   %1 = fsub <4 x float> %b, %a
 963   %2 = select <4 x i1> <i1 false, i1 true, i1 true, i1 true>, <4 x float> %b, <4 x float> %1
 964   ret <4 x float> %2
 965 }
 966
 967 define <4 x float> @insert_test4_mul_ss(<4 x float> %a, <4 x float> %b) {
 968 ; SSE-LABEL: insert_test4_mul_ss:
 969 ; SSE:       # BB#0:
 970 ; SSE-NEXT:    mulss %xmm0, %xmm1
 971 ; SSE-NEXT:    movaps %xmm1, %xmm0
 972 ; SSE-NEXT:    retq
 973 ;
 974 ; AVX-LABEL: insert_test4_mul_ss:
 975 ; AVX:       # BB#0:
 976 ; AVX-NEXT:    vmulss %xmm0, %xmm1, %xmm0
 977 ; AVX-NEXT:    retq
 978   %1 = fmul <4 x float> %b, %a
 979   %2 = select <4 x i1> <i1 false, i1 true, i1 true, i1 true>, <4 x float> %b, <4 x float> %1
 980   ret <4 x float> %2
 981 }
 982
 983 define <4 x float> @insert_test4_div_ss(<4 x float> %a, <4 x float> %b) {
 984 ; SSE-LABEL: insert_test4_div_ss:
 985 ; SSE:       # BB#0:
 986 ; SSE-NEXT:    divss %xmm0, %xmm1
 987 ; SSE-NEXT:    movaps %xmm1, %xmm0
 988 ; SSE-NEXT:    retq
 989 ;
 990 ; AVX-LABEL: insert_test4_div_ss:
 991 ; AVX:       # BB#0:
 992 ; AVX-NEXT:    vdivss %xmm0, %xmm1, %xmm0
 993 ; AVX-NEXT:    retq
 994   %1 = fdiv <4 x float> %b, %a
 995   %2 = select <4 x i1> <i1 false, i1 true, i1 true, i1 true>, <4 x float> %b, <4 x float> %1
 996   ret <4 x float> %2
 997 }
 998
 999 define <2 x double> @insert_test4_add_sd(<2 x double> %a, <2 x double> %b) {
1000 ; SSE-LABEL: insert_test4_add_sd:
1001 ; SSE:       # BB#0:
1002 ; SSE-NEXT:    addsd %xmm0, %xmm1
1003 ; SSE-NEXT:    movapd %xmm1, %xmm0
1004 ; SSE-NEXT:    retq
1005 ;
1006 ; AVX-LABEL: insert_test4_add_sd:
1007 ; AVX:       # BB#0:
1008 ; AVX-NEXT:    vaddsd %xmm0, %xmm1, %xmm0
1009 ; AVX-NEXT:    retq
1010   %1 = fadd <2 x double> %b, %a
1011   %2 = select <2 x i1> <i1 false, i1 true>, <2 x double> %b, <2 x double> %1
1012   ret <2 x double> %2
1013 }
1014
1015 define <2 x double> @insert_test4_sub_sd(<2 x double> %a, <2 x double> %b) {
1016 ; SSE-LABEL: insert_test4_sub_sd:
1017 ; SSE:       # BB#0:
1018 ; SSE-NEXT:    subsd %xmm0, %xmm1
1019 ; SSE-NEXT:    movapd %xmm1, %xmm0
1020 ; SSE-NEXT:    retq
1021 ;
1022 ; AVX-LABEL: insert_test4_sub_sd:
1023 ; AVX:       # BB#0:
1024 ; AVX-NEXT:    vsubsd %xmm0, %xmm1, %xmm0
1025 ; AVX-NEXT:    retq
1026   %1 = fsub <2 x double> %b, %a
1027   %2 = select <2 x i1> <i1 false, i1 true>, <2 x double> %b, <2 x double> %1
1028   ret <2 x double> %2
1029 }
1030
1031 define <2 x double> @insert_test4_mul_sd(<2 x double> %a, <2 x double> %b) {
1032 ; SSE-LABEL: insert_test4_mul_sd:
1033 ; SSE:       # BB#0:
1034 ; SSE-NEXT:    mulsd %xmm0, %xmm1
1035 ; SSE-NEXT:    movapd %xmm1, %xmm0
1036 ; SSE-NEXT:    retq
1037 ;
1038 ; AVX-LABEL: insert_test4_mul_sd:
1039 ; AVX:       # BB#0:
1040 ; AVX-NEXT:    vmulsd %xmm0, %xmm1, %xmm0
1041 ; AVX-NEXT:    retq
1042   %1 = fmul <2 x double> %b, %a
1043   %2 = select <2 x i1> <i1 false, i1 true>, <2 x double> %b, <2 x double> %1
1044   ret <2 x double> %2
1045 }
1046
1047 define <2 x double> @insert_test4_div_sd(<2 x double> %a, <2 x double> %b) {
1048 ; SSE-LABEL: insert_test4_div_sd:
1049 ; SSE:       # BB#0:
1050 ; SSE-NEXT:    divsd %xmm0, %xmm1
1051 ; SSE-NEXT:    movapd %xmm1, %xmm0
1052 ; SSE-NEXT:    retq
1053 ;
1054 ; AVX-LABEL: insert_test4_div_sd:
1055 ; AVX:       # BB#0:
1056 ; AVX-NEXT:    vdivsd %xmm0, %xmm1, %xmm0
1057 ; AVX-NEXT:    retq
1058   %1 = fdiv <2 x double> %b, %a
1059   %2 = select <2 x i1> <i1 false, i1 true>, <2 x double> %b, <2 x double> %1
1060   ret <2 x double> %2
1061 }