test/CodeGen/X86/sse-scalar-fp-arith.ll

   1 ; RUN: llc -mcpu=x86-64 -mattr=+sse2 < %s | FileCheck --check-prefix=SSE --check-prefix=SSE2 %s
   2 ; RUN: llc -mcpu=x86-64 -mattr=+sse2 < %s -x86-experimental-vector-shuffle-lowering | FileCheck --check-prefix=SSE --check-prefix=SSE2 %s
   3 ; RUN: llc -mcpu=x86-64 -mattr=+sse4.1 < %s | FileCheck --check-prefix=SSE --check-prefix=SSE41 %s
   4 ; RUN: llc -mcpu=x86-64 -mattr=+sse4.1 < %s -x86-experimental-vector-shuffle-lowering | FileCheck --check-prefix=SSE --check-prefix=SSE41 %s
   5 ; RUN: llc -mcpu=x86-64 -mattr=+avx < %s | FileCheck --check-prefix=AVX %s
   6 ; RUN: llc -mcpu=x86-64 -mattr=+avx < %s -x86-experimental-vector-shuffle-lowering | FileCheck --check-prefix=AVX %s
   7
   8 target triple = "x86_64-unknown-unknown"
   9
  10 ; Ensure that the backend no longer emits unnecessary vector insert
  11 ; instructions immediately after SSE scalar fp instructions
  12 ; like addss or mulss.
  13
  14 define <4 x float> @test_add_ss(<4 x float> %a, <4 x float> %b) {
  15 ; SSE-LABEL: test_add_ss:
  16 ; SSE:       # BB#0:
  17 ; SSE-NEXT:    addss %xmm1, %xmm0
  18 ; SSE-NEXT:    retq
  19 ;
  20 ; AVX-LABEL: test_add_ss:
  21 ; AVX:       # BB#0:
  22 ; AVX-NEXT:    vaddss %xmm1, %xmm0, %xmm0
  23 ; AVX-NEXT:    retq
  24   %1 = extractelement <4 x float> %b, i32 0
  25   %2 = extractelement <4 x float> %a, i32 0
  26   %add = fadd float %2, %1
  27   %3 = insertelement <4 x float> %a, float %add, i32 0
  28   ret <4 x float> %3
  29 }
  30
  31 define <4 x float> @test_sub_ss(<4 x float> %a, <4 x float> %b) {
  32 ; SSE-LABEL: test_sub_ss:
  33 ; SSE:       # BB#0:
  34 ; SSE-NEXT:    subss %xmm1, %xmm0
  35 ; SSE-NEXT:    retq
  36 ;
  37 ; AVX-LABEL: test_sub_ss:
  38 ; AVX:       # BB#0:
  39 ; AVX-NEXT:    vsubss %xmm1, %xmm0, %xmm0
  40 ; AVX-NEXT:    retq
  41   %1 = extractelement <4 x float> %b, i32 0
  42   %2 = extractelement <4 x float> %a, i32 0
  43   %sub = fsub float %2, %1
  44   %3 = insertelement <4 x float> %a, float %sub, i32 0
  45   ret <4 x float> %3
  46 }
  47
  48 define <4 x float> @test_mul_ss(<4 x float> %a, <4 x float> %b) {
  49 ; SSE-LABEL: test_mul_ss:
  50 ; SSE:       # BB#0:
  51 ; SSE-NEXT:    mulss %xmm1, %xmm0
  52 ; SSE-NEXT:    retq
  53 ;
  54 ; AVX-LABEL: test_mul_ss:
  55 ; AVX:       # BB#0:
  56 ; AVX-NEXT:    vmulss %xmm1, %xmm0, %xmm0
  57 ; AVX-NEXT:    retq
  58   %1 = extractelement <4 x float> %b, i32 0
  59   %2 = extractelement <4 x float> %a, i32 0
  60   %mul = fmul float %2, %1
  61   %3 = insertelement <4 x float> %a, float %mul, i32 0
  62   ret <4 x float> %3
  63 }
  64
  65 define <4 x float> @test_div_ss(<4 x float> %a, <4 x float> %b) {
  66 ; SSE-LABEL: test_div_ss:
  67 ; SSE:       # BB#0:
  68 ; SSE-NEXT:    divss %xmm1, %xmm0
  69 ; SSE-NEXT:    retq
  70 ;
  71 ; AVX-LABEL: test_div_ss:
  72 ; AVX:       # BB#0:
  73 ; AVX-NEXT:    vdivss %xmm1, %xmm0, %xmm0
  74 ; AVX-NEXT:    retq
  75   %1 = extractelement <4 x float> %b, i32 0
  76   %2 = extractelement <4 x float> %a, i32 0
  77   %div = fdiv float %2, %1
  78   %3 = insertelement <4 x float> %a, float %div, i32 0
  79   ret <4 x float> %3
  80 }
  81
  82 define <2 x double> @test_add_sd(<2 x double> %a, <2 x double> %b) {
  83 ; SSE-LABEL: test_add_sd:
  84 ; SSE:       # BB#0:
  85 ; SSE-NEXT:    addsd %xmm1, %xmm0
  86 ; SSE-NEXT:    retq
  87 ;
  88 ; AVX-LABEL: test_add_sd:
  89 ; AVX:       # BB#0:
  90 ; AVX-NEXT:    vaddsd %xmm1, %xmm0, %xmm0
  91 ; AVX-NEXT:    retq
  92   %1 = extractelement <2 x double> %b, i32 0
  93   %2 = extractelement <2 x double> %a, i32 0
  94   %add = fadd double %2, %1
  95   %3 = insertelement <2 x double> %a, double %add, i32 0
  96   ret <2 x double> %3
  97 }
  98
  99 define <2 x double> @test_sub_sd(<2 x double> %a, <2 x double> %b) {
 100 ; SSE-LABEL: test_sub_sd:
 101 ; SSE:       # BB#0:
 102 ; SSE-NEXT:    subsd %xmm1, %xmm0
 103 ; SSE-NEXT:    retq
 104 ;
 105 ; AVX-LABEL: test_sub_sd:
 106 ; AVX:       # BB#0:
 107 ; AVX-NEXT:    vsubsd %xmm1, %xmm0, %xmm0
 108 ; AVX-NEXT:    retq
 109   %1 = extractelement <2 x double> %b, i32 0
 110   %2 = extractelement <2 x double> %a, i32 0
 111   %sub = fsub double %2, %1
 112   %3 = insertelement <2 x double> %a, double %sub, i32 0
 113   ret <2 x double> %3
 114 }
 115
 116 define <2 x double> @test_mul_sd(<2 x double> %a, <2 x double> %b) {
 117 ; SSE-LABEL: test_mul_sd:
 118 ; SSE:       # BB#0:
 119 ; SSE-NEXT:    mulsd %xmm1, %xmm0
 120 ; SSE-NEXT:    retq
 121 ;
 122 ; AVX-LABEL: test_mul_sd:
 123 ; AVX:       # BB#0:
 124 ; AVX-NEXT:    vmulsd %xmm1, %xmm0, %xmm0
 125 ; AVX-NEXT:    retq
 126   %1 = extractelement <2 x double> %b, i32 0
 127   %2 = extractelement <2 x double> %a, i32 0
 128   %mul = fmul double %2, %1
 129   %3 = insertelement <2 x double> %a, double %mul, i32 0
 130   ret <2 x double> %3
 131 }
 132
 133 define <2 x double> @test_div_sd(<2 x double> %a, <2 x double> %b) {
 134 ; SSE-LABEL: test_div_sd:
 135 ; SSE:       # BB#0:
 136 ; SSE-NEXT:    divsd %xmm1, %xmm0
 137 ; SSE-NEXT:    retq
 138 ;
 139 ; AVX-LABEL: test_div_sd:
 140 ; AVX:       # BB#0:
 141 ; AVX-NEXT:    vdivsd %xmm1, %xmm0, %xmm0
 142 ; AVX-NEXT:    retq
 143   %1 = extractelement <2 x double> %b, i32 0
 144   %2 = extractelement <2 x double> %a, i32 0
 145   %div = fdiv double %2, %1
 146   %3 = insertelement <2 x double> %a, double %div, i32 0
 147   ret <2 x double> %3
 148 }
 149
 150 define <4 x float> @test2_add_ss(<4 x float> %a, <4 x float> %b) {
 151 ; SSE-LABEL: test2_add_ss:
 152 ; SSE:       # BB#0:
 153 ; SSE-NEXT:    addss %xmm0, %xmm1
 154 ; SSE-NEXT:    movaps %xmm1, %xmm0
 155 ; SSE-NEXT:    retq
 156 ;
 157 ; AVX-LABEL: test2_add_ss:
 158 ; AVX:       # BB#0:
 159 ; AVX-NEXT:    vaddss %xmm0, %xmm1, %xmm0
 160 ; AVX-NEXT:    retq
 161   %1 = extractelement <4 x float> %a, i32 0
 162   %2 = extractelement <4 x float> %b, i32 0
 163   %add = fadd float %1, %2
 164   %3 = insertelement <4 x float> %b, float %add, i32 0
 165   ret <4 x float> %3
 166 }
 167
 168 define <4 x float> @test2_sub_ss(<4 x float> %a, <4 x float> %b) {
 169 ; SSE-LABEL: test2_sub_ss:
 170 ; SSE:       # BB#0:
 171 ; SSE-NEXT:    subss %xmm0, %xmm1
 172 ; SSE-NEXT:    movaps %xmm1, %xmm0
 173 ; SSE-NEXT:    retq
 174 ;
 175 ; AVX-LABEL: test2_sub_ss:
 176 ; AVX:       # BB#0:
 177 ; AVX-NEXT:    vsubss %xmm0, %xmm1, %xmm0
 178 ; AVX-NEXT:    retq
 179   %1 = extractelement <4 x float> %a, i32 0
 180   %2 = extractelement <4 x float> %b, i32 0
 181   %sub = fsub float %2, %1
 182   %3 = insertelement <4 x float> %b, float %sub, i32 0
 183   ret <4 x float> %3
 184 }
 185
 186 define <4 x float> @test2_mul_ss(<4 x float> %a, <4 x float> %b) {
 187 ; SSE-LABEL: test2_mul_ss:
 188 ; SSE:       # BB#0:
 189 ; SSE-NEXT:    mulss %xmm0, %xmm1
 190 ; SSE-NEXT:    movaps %xmm1, %xmm0
 191 ; SSE-NEXT:    retq
 192 ;
 193 ; AVX-LABEL: test2_mul_ss:
 194 ; AVX:       # BB#0:
 195 ; AVX-NEXT:    vmulss %xmm0, %xmm1, %xmm0
 196 ; AVX-NEXT:    retq
 197   %1 = extractelement <4 x float> %a, i32 0
 198   %2 = extractelement <4 x float> %b, i32 0
 199   %mul = fmul float %1, %2
 200   %3 = insertelement <4 x float> %b, float %mul, i32 0
 201   ret <4 x float> %3
 202 }
 203
 204 define <4 x float> @test2_div_ss(<4 x float> %a, <4 x float> %b) {
 205 ; SSE-LABEL: test2_div_ss:
 206 ; SSE:       # BB#0:
 207 ; SSE-NEXT:    divss %xmm0, %xmm1
 208 ; SSE-NEXT:    movaps %xmm1, %xmm0
 209 ; SSE-NEXT:    retq
 210 ;
 211 ; AVX-LABEL: test2_div_ss:
 212 ; AVX:       # BB#0:
 213 ; AVX-NEXT:    vdivss %xmm0, %xmm1, %xmm0
 214 ; AVX-NEXT:    retq
 215   %1 = extractelement <4 x float> %a, i32 0
 216   %2 = extractelement <4 x float> %b, i32 0
 217   %div = fdiv float %2, %1
 218   %3 = insertelement <4 x float> %b, float %div, i32 0
 219   ret <4 x float> %3
 220 }
 221
 222 define <2 x double> @test2_add_sd(<2 x double> %a, <2 x double> %b) {
 223 ; SSE-LABEL: test2_add_sd:
 224 ; SSE:       # BB#0:
 225 ; SSE-NEXT:    addsd %xmm0, %xmm1
 226 ; SSE-NEXT:    movaps %xmm1, %xmm0
 227 ; SSE-NEXT:    retq
 228 ;
 229 ; AVX-LABEL: test2_add_sd:
 230 ; AVX:       # BB#0:
 231 ; AVX-NEXT:    vaddsd %xmm0, %xmm1, %xmm0
 232 ; AVX-NEXT:    retq
 233   %1 = extractelement <2 x double> %a, i32 0
 234   %2 = extractelement <2 x double> %b, i32 0
 235   %add = fadd double %1, %2
 236   %3 = insertelement <2 x double> %b, double %add, i32 0
 237   ret <2 x double> %3
 238 }
 239
 240 define <2 x double> @test2_sub_sd(<2 x double> %a, <2 x double> %b) {
 241 ; SSE-LABEL: test2_sub_sd:
 242 ; SSE:       # BB#0:
 243 ; SSE-NEXT:    subsd %xmm0, %xmm1
 244 ; SSE-NEXT:    movaps %xmm1, %xmm0
 245 ; SSE-NEXT:    retq
 246 ;
 247 ; AVX-LABEL: test2_sub_sd:
 248 ; AVX:       # BB#0:
 249 ; AVX-NEXT:    vsubsd %xmm0, %xmm1, %xmm0
 250 ; AVX-NEXT:    retq
 251   %1 = extractelement <2 x double> %a, i32 0
 252   %2 = extractelement <2 x double> %b, i32 0
 253   %sub = fsub double %2, %1
 254   %3 = insertelement <2 x double> %b, double %sub, i32 0
 255   ret <2 x double> %3
 256 }
 257
 258 define <2 x double> @test2_mul_sd(<2 x double> %a, <2 x double> %b) {
 259 ; SSE-LABEL: test2_mul_sd:
 260 ; SSE:       # BB#0:
 261 ; SSE-NEXT:    mulsd %xmm0, %xmm1
 262 ; SSE-NEXT:    movaps %xmm1, %xmm0
 263 ; SSE-NEXT:    retq
 264 ;
 265 ; AVX-LABEL: test2_mul_sd:
 266 ; AVX:       # BB#0:
 267 ; AVX-NEXT:    vmulsd %xmm0, %xmm1, %xmm0
 268 ; AVX-NEXT:    retq
 269   %1 = extractelement <2 x double> %a, i32 0
 270   %2 = extractelement <2 x double> %b, i32 0
 271   %mul = fmul double %1, %2
 272   %3 = insertelement <2 x double> %b, double %mul, i32 0
 273   ret <2 x double> %3
 274 }
 275
 276 define <2 x double> @test2_div_sd(<2 x double> %a, <2 x double> %b) {
 277 ; SSE-LABEL: test2_div_sd:
 278 ; SSE:       # BB#0:
 279 ; SSE-NEXT:    divsd %xmm0, %xmm1
 280 ; SSE-NEXT:    movaps %xmm1, %xmm0
 281 ; SSE-NEXT:    retq
 282 ;
 283 ; AVX-LABEL: test2_div_sd:
 284 ; AVX:       # BB#0:
 285 ; AVX-NEXT:    vdivsd %xmm0, %xmm1, %xmm0
 286 ; AVX-NEXT:    retq
 287   %1 = extractelement <2 x double> %a, i32 0
 288   %2 = extractelement <2 x double> %b, i32 0
 289   %div = fdiv double %2, %1
 290   %3 = insertelement <2 x double> %b, double %div, i32 0
 291   ret <2 x double> %3
 292 }
 293
 294 define <4 x float> @test_multiple_add_ss(<4 x float> %a, <4 x float> %b) {
 295 ; SSE-LABEL: test_multiple_add_ss:
 296 ; SSE:       # BB#0:
 297 ; SSE-NEXT:    addss %xmm0, %xmm1
 298 ; SSE-NEXT:    addss %xmm1, %xmm0
 299 ; SSE-NEXT:    retq
 300 ;
 301 ; AVX-LABEL: test_multiple_add_ss:
 302 ; AVX:       # BB#0:
 303 ; AVX-NEXT:    vaddss %xmm1, %xmm0, %xmm1
 304 ; AVX-NEXT:    vaddss %xmm1, %xmm0, %xmm0
 305 ; AVX-NEXT:    retq
 306   %1 = extractelement <4 x float> %b, i32 0
 307   %2 = extractelement <4 x float> %a, i32 0
 308   %add = fadd float %2, %1
 309   %add2 = fadd float %2, %add
 310   %3 = insertelement <4 x float> %a, float %add2, i32 0
 311   ret <4 x float> %3
 312 }
 313
 314 define <4 x float> @test_multiple_sub_ss(<4 x float> %a, <4 x float> %b) {
 315 ; SSE-LABEL: test_multiple_sub_ss:
 316 ; SSE:       # BB#0:
 317 ; SSE-NEXT:    movaps %xmm0, %xmm2
 318 ; SSE-NEXT:    subss %xmm1, %xmm2
 319 ; SSE-NEXT:    subss %xmm2, %xmm0
 320 ; SSE-NEXT:    retq
 321 ;
 322 ; AVX-LABEL: test_multiple_sub_ss:
 323 ; AVX:       # BB#0:
 324 ; AVX-NEXT:    vsubss %xmm1, %xmm0, %xmm1
 325 ; AVX-NEXT:    vsubss %xmm1, %xmm0, %xmm0
 326 ; AVX-NEXT:    retq
 327   %1 = extractelement <4 x float> %b, i32 0
 328   %2 = extractelement <4 x float> %a, i32 0
 329   %sub = fsub float %2, %1
 330   %sub2 = fsub float %2, %sub
 331   %3 = insertelement <4 x float> %a, float %sub2, i32 0
 332   ret <4 x float> %3
 333 }
 334
 335 define <4 x float> @test_multiple_mul_ss(<4 x float> %a, <4 x float> %b) {
 336 ; SSE-LABEL: test_multiple_mul_ss:
 337 ; SSE:       # BB#0:
 338 ; SSE-NEXT:    mulss %xmm0, %xmm1
 339 ; SSE-NEXT:    mulss %xmm1, %xmm0
 340 ; SSE-NEXT:    retq
 341 ;
 342 ; AVX-LABEL: test_multiple_mul_ss:
 343 ; AVX:       # BB#0:
 344 ; AVX-NEXT:    vmulss %xmm1, %xmm0, %xmm1
 345 ; AVX-NEXT:    vmulss %xmm1, %xmm0, %xmm0
 346 ; AVX-NEXT:    retq
 347   %1 = extractelement <4 x float> %b, i32 0
 348   %2 = extractelement <4 x float> %a, i32 0
 349   %mul = fmul float %2, %1
 350   %mul2 = fmul float %2, %mul
 351   %3 = insertelement <4 x float> %a, float %mul2, i32 0
 352   ret <4 x float> %3
 353 }
 354
 355 define <4 x float> @test_multiple_div_ss(<4 x float> %a, <4 x float> %b) {
 356 ; SSE-LABEL: test_multiple_div_ss:
 357 ; SSE:       # BB#0:
 358 ; SSE-NEXT:    movaps %xmm0, %xmm2
 359 ; SSE-NEXT:    divss %xmm1, %xmm2
 360 ; SSE-NEXT:    divss %xmm2, %xmm0
 361 ; SSE-NEXT:    retq
 362 ;
 363 ; AVX-LABEL: test_multiple_div_ss:
 364 ; AVX:       # BB#0:
 365 ; AVX-NEXT:    vdivss %xmm1, %xmm0, %xmm1
 366 ; AVX-NEXT:    vdivss %xmm1, %xmm0, %xmm0
 367 ; AVX-NEXT:    retq
 368   %1 = extractelement <4 x float> %b, i32 0
 369   %2 = extractelement <4 x float> %a, i32 0
 370   %div = fdiv float %2, %1
 371   %div2 = fdiv float %2, %div
 372   %3 = insertelement <4 x float> %a, float %div2, i32 0
 373   ret <4 x float> %3
 374 }
 375
 376 ; Ensure that the backend selects SSE/AVX scalar fp instructions
 377 ; from a packed fp instrution plus a vector insert.
 378
 379 define <4 x float> @insert_test_add_ss(<4 x float> %a, <4 x float> %b) {
 380 ; SSE-LABEL: insert_test_add_ss:
 381 ; SSE:       # BB#0:
 382 ; SSE-NEXT:    addss %xmm1, %xmm0
 383 ; SSE-NEXT:    retq
 384 ;
 385 ; AVX-LABEL: insert_test_add_ss:
 386 ; AVX:       # BB#0:
 387 ; AVX-NEXT:    vaddss %xmm1, %xmm0, %xmm0
 388 ; AVX-NEXT:    retq
 389   %1 = fadd <4 x float> %a, %b
 390   %2 = shufflevector <4 x float> %1, <4 x float> %a, <4 x i32> <i32 0, i32 5, i32 6, i32 7>
 391   ret <4 x float> %2
 392 }
 393
 394 define <4 x float> @insert_test_sub_ss(<4 x float> %a, <4 x float> %b) {
 395 ; SSE-LABEL: insert_test_sub_ss:
 396 ; SSE:       # BB#0:
 397 ; SSE-NEXT:    subss %xmm1, %xmm0
 398 ; SSE-NEXT:    retq
 399 ;
 400 ; AVX-LABEL: insert_test_sub_ss:
 401 ; AVX:       # BB#0:
 402 ; AVX-NEXT:    vsubss %xmm1, %xmm0, %xmm0
 403 ; AVX-NEXT:    retq
 404   %1 = fsub <4 x float> %a, %b
 405   %2 = shufflevector <4 x float> %1, <4 x float> %a, <4 x i32> <i32 0, i32 5, i32 6, i32 7>
 406   ret <4 x float> %2
 407 }
 408
 409 define <4 x float> @insert_test_mul_ss(<4 x float> %a, <4 x float> %b) {
 410 ; SSE-LABEL: insert_test_mul_ss:
 411 ; SSE:       # BB#0:
 412 ; SSE-NEXT:    mulss %xmm1, %xmm0
 413 ; SSE-NEXT:    retq
 414 ;
 415 ; AVX-LABEL: insert_test_mul_ss:
 416 ; AVX:       # BB#0:
 417 ; AVX-NEXT:    vmulss %xmm1, %xmm0, %xmm0
 418 ; AVX-NEXT:    retq
 419   %1 = fmul <4 x float> %a, %b
 420   %2 = shufflevector <4 x float> %1, <4 x float> %a, <4 x i32> <i32 0, i32 5, i32 6, i32 7>
 421   ret <4 x float> %2
 422 }
 423
 424 define <4 x float> @insert_test_div_ss(<4 x float> %a, <4 x float> %b) {
 425 ; SSE-LABEL: insert_test_div_ss:
 426 ; SSE:       # BB#0:
 427 ; SSE-NEXT:    divss %xmm1, %xmm0
 428 ; SSE-NEXT:    retq
 429 ;
 430 ; AVX-LABEL: insert_test_div_ss:
 431 ; AVX:       # BB#0:
 432 ; AVX-NEXT:    vdivss %xmm1, %xmm0, %xmm0
 433 ; AVX-NEXT:    retq
 434   %1 = fdiv <4 x float> %a, %b
 435   %2 = shufflevector <4 x float> %1, <4 x float> %a, <4 x i32> <i32 0, i32 5, i32 6, i32 7>
 436   ret <4 x float> %2
 437 }
 438
 439 define <2 x double> @insert_test_add_sd(<2 x double> %a, <2 x double> %b) {
 440 ; SSE-LABEL: insert_test_add_sd:
 441 ; SSE:       # BB#0:
 442 ; SSE-NEXT:    addsd %xmm1, %xmm0
 443 ; SSE-NEXT:    retq
 444 ;
 445 ; AVX-LABEL: insert_test_add_sd:
 446 ; AVX:       # BB#0:
 447 ; AVX-NEXT:    vaddsd %xmm1, %xmm0, %xmm0
 448 ; AVX-NEXT:    retq
 449   %1 = fadd <2 x double> %a, %b
 450   %2 = shufflevector <2 x double> %1, <2 x double> %a, <2 x i32> <i32 0, i32 3>
 451   ret <2 x double> %2
 452 }
 453
 454 define <2 x double> @insert_test_sub_sd(<2 x double> %a, <2 x double> %b) {
 455 ; SSE-LABEL: insert_test_sub_sd:
 456 ; SSE:       # BB#0:
 457 ; SSE-NEXT:    subsd %xmm1, %xmm0
 458 ; SSE-NEXT:    retq
 459 ;
 460 ; AVX-LABEL: insert_test_sub_sd:
 461 ; AVX:       # BB#0:
 462 ; AVX-NEXT:    vsubsd %xmm1, %xmm0, %xmm0
 463 ; AVX-NEXT:    retq
 464   %1 = fsub <2 x double> %a, %b
 465   %2 = shufflevector <2 x double> %1, <2 x double> %a, <2 x i32> <i32 0, i32 3>
 466   ret <2 x double> %2
 467 }
 468
 469 define <2 x double> @insert_test_mul_sd(<2 x double> %a, <2 x double> %b) {
 470 ; SSE-LABEL: insert_test_mul_sd:
 471 ; SSE:       # BB#0:
 472 ; SSE-NEXT:    mulsd %xmm1, %xmm0
 473 ; SSE-NEXT:    retq
 474 ;
 475 ; AVX-LABEL: insert_test_mul_sd:
 476 ; AVX:       # BB#0:
 477 ; AVX-NEXT:    vmulsd %xmm1, %xmm0, %xmm0
 478 ; AVX-NEXT:    retq
 479   %1 = fmul <2 x double> %a, %b
 480   %2 = shufflevector <2 x double> %1, <2 x double> %a, <2 x i32> <i32 0, i32 3>
 481   ret <2 x double> %2
 482 }
 483
 484 define <2 x double> @insert_test_div_sd(<2 x double> %a, <2 x double> %b) {
 485 ; SSE-LABEL: insert_test_div_sd:
 486 ; SSE:       # BB#0:
 487 ; SSE-NEXT:    divsd %xmm1, %xmm0
 488 ; SSE-NEXT:    retq
 489 ;
 490 ; AVX-LABEL: insert_test_div_sd:
 491 ; AVX:       # BB#0:
 492 ; AVX-NEXT:    vdivsd %xmm1, %xmm0, %xmm0
 493 ; AVX-NEXT:    retq
 494   %1 = fdiv <2 x double> %a, %b
 495   %2 = shufflevector <2 x double> %1, <2 x double> %a, <2 x i32> <i32 0, i32 3>
 496   ret <2 x double> %2
 497 }
 498
 499 define <4 x float> @insert_test2_add_ss(<4 x float> %a, <4 x float> %b) {
 500 ; SSE-LABEL: insert_test2_add_ss:
 501 ; SSE:       # BB#0:
 502 ; SSE-NEXT:    addss %xmm0, %xmm1
 503 ; SSE-NEXT:    movaps %xmm1, %xmm0
 504 ; SSE-NEXT:    retq
 505 ;
 506 ; AVX-LABEL: insert_test2_add_ss:
 507 ; AVX:       # BB#0:
 508 ; AVX-NEXT:    vaddss %xmm0, %xmm1, %xmm0
 509 ; AVX-NEXT:    retq
 510   %1 = fadd <4 x float> %b, %a
 511   %2 = shufflevector <4 x float> %1, <4 x float> %b, <4 x i32> <i32 0, i32 5, i32 6, i32 7>
 512   ret <4 x float> %2
 513 }
 514
 515 define <4 x float> @insert_test2_sub_ss(<4 x float> %a, <4 x float> %b) {
 516 ; SSE-LABEL: insert_test2_sub_ss:
 517 ; SSE:       # BB#0:
 518 ; SSE-NEXT:    subss %xmm0, %xmm1
 519 ; SSE-NEXT:    movaps %xmm1, %xmm0
 520 ; SSE-NEXT:    retq
 521 ;
 522 ; AVX-LABEL: insert_test2_sub_ss:
 523 ; AVX:       # BB#0:
 524 ; AVX-NEXT:    vsubss %xmm0, %xmm1, %xmm0
 525 ; AVX-NEXT:    retq
 526   %1 = fsub <4 x float> %b, %a
 527   %2 = shufflevector <4 x float> %1, <4 x float> %b, <4 x i32> <i32 0, i32 5, i32 6, i32 7>
 528   ret <4 x float> %2
 529 }
 530
 531 define <4 x float> @insert_test2_mul_ss(<4 x float> %a, <4 x float> %b) {
 532 ; SSE-LABEL: insert_test2_mul_ss:
 533 ; SSE:       # BB#0:
 534 ; SSE-NEXT:    mulss %xmm0, %xmm1
 535 ; SSE-NEXT:    movaps %xmm1, %xmm0
 536 ; SSE-NEXT:    retq
 537 ;
 538 ; AVX-LABEL: insert_test2_mul_ss:
 539 ; AVX:       # BB#0:
 540 ; AVX-NEXT:    vmulss %xmm0, %xmm1, %xmm0
 541 ; AVX-NEXT:    retq
 542   %1 = fmul <4 x float> %b, %a
 543   %2 = shufflevector <4 x float> %1, <4 x float> %b, <4 x i32> <i32 0, i32 5, i32 6, i32 7>
 544   ret <4 x float> %2
 545 }
 546
 547 define <4 x float> @insert_test2_div_ss(<4 x float> %a, <4 x float> %b) {
 548 ; SSE-LABEL: insert_test2_div_ss:
 549 ; SSE:       # BB#0:
 550 ; SSE-NEXT:    divss %xmm0, %xmm1
 551 ; SSE-NEXT:    movaps %xmm1, %xmm0
 552 ; SSE-NEXT:    retq
 553 ;
 554 ; AVX-LABEL: insert_test2_div_ss:
 555 ; AVX:       # BB#0:
 556 ; AVX-NEXT:    vdivss %xmm0, %xmm1, %xmm0
 557 ; AVX-NEXT:    retq
 558   %1 = fdiv <4 x float> %b, %a
 559   %2 = shufflevector <4 x float> %1, <4 x float> %b, <4 x i32> <i32 0, i32 5, i32 6, i32 7>
 560   ret <4 x float> %2
 561 }
 562
 563 define <2 x double> @insert_test2_add_sd(<2 x double> %a, <2 x double> %b) {
 564 ; SSE-LABEL: insert_test2_add_sd:
 565 ; SSE:       # BB#0:
 566 ; SSE-NEXT:    addsd %xmm0, %xmm1
 567 ; SSE-NEXT:    movaps %xmm1, %xmm0
 568 ; SSE-NEXT:    retq
 569 ;
 570 ; AVX-LABEL: insert_test2_add_sd:
 571 ; AVX:       # BB#0:
 572 ; AVX-NEXT:    vaddsd %xmm0, %xmm1, %xmm0
 573 ; AVX-NEXT:    retq
 574   %1 = fadd <2 x double> %b, %a
 575   %2 = shufflevector <2 x double> %1, <2 x double> %b, <2 x i32> <i32 0, i32 3>
 576   ret <2 x double> %2
 577 }
 578
 579 define <2 x double> @insert_test2_sub_sd(<2 x double> %a, <2 x double> %b) {
 580 ; SSE-LABEL: insert_test2_sub_sd:
 581 ; SSE:       # BB#0:
 582 ; SSE-NEXT:    subsd %xmm0, %xmm1
 583 ; SSE-NEXT:    movaps %xmm1, %xmm0
 584 ; SSE-NEXT:    retq
 585 ;
 586 ; AVX-LABEL: insert_test2_sub_sd:
 587 ; AVX:       # BB#0:
 588 ; AVX-NEXT:    vsubsd %xmm0, %xmm1, %xmm0
 589 ; AVX-NEXT:    retq
 590   %1 = fsub <2 x double> %b, %a
 591   %2 = shufflevector <2 x double> %1, <2 x double> %b, <2 x i32> <i32 0, i32 3>
 592   ret <2 x double> %2
 593 }
 594
 595 define <2 x double> @insert_test2_mul_sd(<2 x double> %a, <2 x double> %b) {
 596 ; SSE-LABEL: insert_test2_mul_sd:
 597 ; SSE:       # BB#0:
 598 ; SSE-NEXT:    mulsd %xmm0, %xmm1
 599 ; SSE-NEXT:    movaps %xmm1, %xmm0
 600 ; SSE-NEXT:    retq
 601 ;
 602 ; AVX-LABEL: insert_test2_mul_sd:
 603 ; AVX:       # BB#0:
 604 ; AVX-NEXT:    vmulsd %xmm0, %xmm1, %xmm0
 605 ; AVX-NEXT:    retq
 606   %1 = fmul <2 x double> %b, %a
 607   %2 = shufflevector <2 x double> %1, <2 x double> %b, <2 x i32> <i32 0, i32 3>
 608   ret <2 x double> %2
 609 }
 610
 611 define <2 x double> @insert_test2_div_sd(<2 x double> %a, <2 x double> %b) {
 612 ; SSE-LABEL: insert_test2_div_sd:
 613 ; SSE:       # BB#0:
 614 ; SSE-NEXT:    divsd %xmm0, %xmm1
 615 ; SSE-NEXT:    movaps %xmm1, %xmm0
 616 ; SSE-NEXT:    retq
 617 ;
 618 ; AVX-LABEL: insert_test2_div_sd:
 619 ; AVX:       # BB#0:
 620 ; AVX-NEXT:    vdivsd %xmm0, %xmm1, %xmm0
 621 ; AVX-NEXT:    retq
 622   %1 = fdiv <2 x double> %b, %a
 623   %2 = shufflevector <2 x double> %1, <2 x double> %b, <2 x i32> <i32 0, i32 3>
 624   ret <2 x double> %2
 625 }
 626
 627 define <4 x float> @insert_test3_add_ss(<4 x float> %a, <4 x float> %b) {
 628 ; SSE-LABEL: insert_test3_add_ss:
 629 ; SSE:       # BB#0:
 630 ; SSE-NEXT:    addss %xmm1, %xmm0
 631 ; SSE-NEXT:    retq
 632 ;
 633 ; AVX-LABEL: insert_test3_add_ss:
 634 ; AVX:       # BB#0:
 635 ; AVX-NEXT:    vaddss %xmm1, %xmm0, %xmm0
 636 ; AVX-NEXT:    retq
 637   %1 = fadd <4 x float> %a, %b
 638   %2 = select <4 x i1> <i1 false, i1 true, i1 true, i1 true>, <4 x float> %a, <4 x float> %1
 639   ret <4 x float> %2
 640 }
 641
 642 define <4 x float> @insert_test3_sub_ss(<4 x float> %a, <4 x float> %b) {
 643 ; SSE-LABEL: insert_test3_sub_ss:
 644 ; SSE:       # BB#0:
 645 ; SSE-NEXT:    subss %xmm1, %xmm0
 646 ; SSE-NEXT:    retq
 647 ;
 648 ; AVX-LABEL: insert_test3_sub_ss:
 649 ; AVX:       # BB#0:
 650 ; AVX-NEXT:    vsubss %xmm1, %xmm0, %xmm0
 651 ; AVX-NEXT:    retq
 652   %1 = fsub <4 x float> %a, %b
 653   %2 = select <4 x i1> <i1 false, i1 true, i1 true, i1 true>, <4 x float> %a, <4 x float> %1
 654   ret <4 x float> %2
 655 }
 656
 657 define <4 x float> @insert_test3_mul_ss(<4 x float> %a, <4 x float> %b) {
 658 ; SSE-LABEL: insert_test3_mul_ss:
 659 ; SSE:       # BB#0:
 660 ; SSE-NEXT:    mulss %xmm1, %xmm0
 661 ; SSE-NEXT:    retq
 662 ;
 663 ; AVX-LABEL: insert_test3_mul_ss:
 664 ; AVX:       # BB#0:
 665 ; AVX-NEXT:    vmulss %xmm1, %xmm0, %xmm0
 666 ; AVX-NEXT:    retq
 667   %1 = fmul <4 x float> %a, %b
 668   %2 = select <4 x i1> <i1 false, i1 true, i1 true, i1 true>, <4 x float> %a, <4 x float> %1
 669   ret <4 x float> %2
 670 }
 671
 672 define <4 x float> @insert_test3_div_ss(<4 x float> %a, <4 x float> %b) {
 673 ; SSE-LABEL: insert_test3_div_ss:
 674 ; SSE:       # BB#0:
 675 ; SSE-NEXT:    divss %xmm1, %xmm0
 676 ; SSE-NEXT:    retq
 677 ;
 678 ; AVX-LABEL: insert_test3_div_ss:
 679 ; AVX:       # BB#0:
 680 ; AVX-NEXT:    vdivss %xmm1, %xmm0, %xmm0
 681 ; AVX-NEXT:    retq
 682   %1 = fdiv <4 x float> %a, %b
 683   %2 = select <4 x i1> <i1 false, i1 true, i1 true, i1 true>, <4 x float> %a, <4 x float> %1
 684   ret <4 x float> %2
 685 }
 686
 687 define <2 x double> @insert_test3_add_sd(<2 x double> %a, <2 x double> %b) {
 688 ; SSE-LABEL: insert_test3_add_sd:
 689 ; SSE:       # BB#0:
 690 ; SSE-NEXT:    addsd %xmm1, %xmm0
 691 ; SSE-NEXT:    retq
 692 ;
 693 ; AVX-LABEL: insert_test3_add_sd:
 694 ; AVX:       # BB#0:
 695 ; AVX-NEXT:    vaddsd %xmm1, %xmm0, %xmm0
 696 ; AVX-NEXT:    retq
 697   %1 = fadd <2 x double> %a, %b
 698   %2 = select <2 x i1> <i1 false, i1 true>, <2 x double> %a, <2 x double> %1
 699   ret <2 x double> %2
 700 }
 701
 702 define <2 x double> @insert_test3_sub_sd(<2 x double> %a, <2 x double> %b) {
 703 ; SSE-LABEL: insert_test3_sub_sd:
 704 ; SSE:       # BB#0:
 705 ; SSE-NEXT:    subsd %xmm1, %xmm0
 706 ; SSE-NEXT:    retq
 707 ;
 708 ; AVX-LABEL: insert_test3_sub_sd:
 709 ; AVX:       # BB#0:
 710 ; AVX-NEXT:    vsubsd %xmm1, %xmm0, %xmm0
 711 ; AVX-NEXT:    retq
 712   %1 = fsub <2 x double> %a, %b
 713   %2 = select <2 x i1> <i1 false, i1 true>, <2 x double> %a, <2 x double> %1
 714   ret <2 x double> %2
 715 }
 716
 717 define <2 x double> @insert_test3_mul_sd(<2 x double> %a, <2 x double> %b) {
 718 ; SSE-LABEL: insert_test3_mul_sd:
 719 ; SSE:       # BB#0:
 720 ; SSE-NEXT:    mulsd %xmm1, %xmm0
 721 ; SSE-NEXT:    retq
 722 ;
 723 ; AVX-LABEL: insert_test3_mul_sd:
 724 ; AVX:       # BB#0:
 725 ; AVX-NEXT:    vmulsd %xmm1, %xmm0, %xmm0
 726 ; AVX-NEXT:    retq
 727   %1 = fmul <2 x double> %a, %b
 728   %2 = select <2 x i1> <i1 false, i1 true>, <2 x double> %a, <2 x double> %1
 729   ret <2 x double> %2
 730 }
 731
 732 define <2 x double> @insert_test3_div_sd(<2 x double> %a, <2 x double> %b) {
 733 ; SSE-LABEL: insert_test3_div_sd:
 734 ; SSE:       # BB#0:
 735 ; SSE-NEXT:    divsd %xmm1, %xmm0
 736 ; SSE-NEXT:    retq
 737 ;
 738 ; AVX-LABEL: insert_test3_div_sd:
 739 ; AVX:       # BB#0:
 740 ; AVX-NEXT:    vdivsd %xmm1, %xmm0, %xmm0
 741 ; AVX-NEXT:    retq
 742   %1 = fdiv <2 x double> %a, %b
 743   %2 = select <2 x i1> <i1 false, i1 true>, <2 x double> %a, <2 x double> %1
 744   ret <2 x double> %2
 745 }
 746
 747 define <4 x float> @insert_test4_add_ss(<4 x float> %a, <4 x float> %b) {
 748 ; SSE-LABEL: insert_test4_add_ss:
 749 ; SSE:       # BB#0:
 750 ; SSE-NEXT:    addss %xmm0, %xmm1
 751 ; SSE-NEXT:    movaps %xmm1, %xmm0
 752 ; SSE-NEXT:    retq
 753 ;
 754 ; AVX-LABEL: insert_test4_add_ss:
 755 ; AVX:       # BB#0:
 756 ; AVX-NEXT:    vaddss %xmm0, %xmm1, %xmm0
 757 ; AVX-NEXT:    retq
 758   %1 = fadd <4 x float> %b, %a
 759   %2 = select <4 x i1> <i1 false, i1 true, i1 true, i1 true>, <4 x float> %b, <4 x float> %1
 760   ret <4 x float> %2
 761 }
 762
 763 define <4 x float> @insert_test4_sub_ss(<4 x float> %a, <4 x float> %b) {
 764 ; SSE-LABEL: insert_test4_sub_ss:
 765 ; SSE:       # BB#0:
 766 ; SSE-NEXT:    subss %xmm0, %xmm1
 767 ; SSE-NEXT:    movaps %xmm1, %xmm0
 768 ; SSE-NEXT:    retq
 769 ;
 770 ; AVX-LABEL: insert_test4_sub_ss:
 771 ; AVX:       # BB#0:
 772 ; AVX-NEXT:    vsubss %xmm0, %xmm1, %xmm0
 773 ; AVX-NEXT:    retq
 774   %1 = fsub <4 x float> %b, %a
 775   %2 = select <4 x i1> <i1 false, i1 true, i1 true, i1 true>, <4 x float> %b, <4 x float> %1
 776   ret <4 x float> %2
 777 }
 778
 779 define <4 x float> @insert_test4_mul_ss(<4 x float> %a, <4 x float> %b) {
 780 ; SSE-LABEL: insert_test4_mul_ss:
 781 ; SSE:       # BB#0:
 782 ; SSE-NEXT:    mulss %xmm0, %xmm1
 783 ; SSE-NEXT:    movaps %xmm1, %xmm0
 784 ; SSE-NEXT:    retq
 785 ;
 786 ; AVX-LABEL: insert_test4_mul_ss:
 787 ; AVX:       # BB#0:
 788 ; AVX-NEXT:    vmulss %xmm0, %xmm1, %xmm0
 789 ; AVX-NEXT:    retq
 790   %1 = fmul <4 x float> %b, %a
 791   %2 = select <4 x i1> <i1 false, i1 true, i1 true, i1 true>, <4 x float> %b, <4 x float> %1
 792   ret <4 x float> %2
 793 }
 794
 795 define <4 x float> @insert_test4_div_ss(<4 x float> %a, <4 x float> %b) {
 796 ; SSE-LABEL: insert_test4_div_ss:
 797 ; SSE:       # BB#0:
 798 ; SSE-NEXT:    divss %xmm0, %xmm1
 799 ; SSE-NEXT:    movaps %xmm1, %xmm0
 800 ; SSE-NEXT:    retq
 801 ;
 802 ; AVX-LABEL: insert_test4_div_ss:
 803 ; AVX:       # BB#0:
 804 ; AVX-NEXT:    vdivss %xmm0, %xmm1, %xmm0
 805 ; AVX-NEXT:    retq
 806   %1 = fdiv <4 x float> %b, %a
 807   %2 = select <4 x i1> <i1 false, i1 true, i1 true, i1 true>, <4 x float> %b, <4 x float> %1
 808   ret <4 x float> %2
 809 }
 810
 811 define <2 x double> @insert_test4_add_sd(<2 x double> %a, <2 x double> %b) {
 812 ; SSE-LABEL: insert_test4_add_sd:
 813 ; SSE:       # BB#0:
 814 ; SSE-NEXT:    addsd %xmm0, %xmm1
 815 ; SSE-NEXT:    movaps %xmm1, %xmm0
 816 ; SSE-NEXT:    retq
 817 ;
 818 ; AVX-LABEL: insert_test4_add_sd:
 819 ; AVX:       # BB#0:
 820 ; AVX-NEXT:    vaddsd %xmm0, %xmm1, %xmm0
 821 ; AVX-NEXT:    retq
 822   %1 = fadd <2 x double> %b, %a
 823   %2 = select <2 x i1> <i1 false, i1 true>, <2 x double> %b, <2 x double> %1
 824   ret <2 x double> %2
 825 }
 826
 827 define <2 x double> @insert_test4_sub_sd(<2 x double> %a, <2 x double> %b) {
 828 ; SSE-LABEL: insert_test4_sub_sd:
 829 ; SSE:       # BB#0:
 830 ; SSE-NEXT:    subsd %xmm0, %xmm1
 831 ; SSE-NEXT:    movaps %xmm1, %xmm0
 832 ; SSE-NEXT:    retq
 833 ;
 834 ; AVX-LABEL: insert_test4_sub_sd:
 835 ; AVX:       # BB#0:
 836 ; AVX-NEXT:    vsubsd %xmm0, %xmm1, %xmm0
 837 ; AVX-NEXT:    retq
 838   %1 = fsub <2 x double> %b, %a
 839   %2 = select <2 x i1> <i1 false, i1 true>, <2 x double> %b, <2 x double> %1
 840   ret <2 x double> %2
 841 }
 842
 843 define <2 x double> @insert_test4_mul_sd(<2 x double> %a, <2 x double> %b) {
 844 ; SSE-LABEL: insert_test4_mul_sd:
 845 ; SSE:       # BB#0:
 846 ; SSE-NEXT:    mulsd %xmm0, %xmm1
 847 ; SSE-NEXT:    movaps %xmm1, %xmm0
 848 ; SSE-NEXT:    retq
 849 ;
 850 ; AVX-LABEL: insert_test4_mul_sd:
 851 ; AVX:       # BB#0:
 852 ; AVX-NEXT:    vmulsd %xmm0, %xmm1, %xmm0
 853 ; AVX-NEXT:    retq
 854   %1 = fmul <2 x double> %b, %a
 855   %2 = select <2 x i1> <i1 false, i1 true>, <2 x double> %b, <2 x double> %1
 856   ret <2 x double> %2
 857 }
 858
 859 define <2 x double> @insert_test4_div_sd(<2 x double> %a, <2 x double> %b) {
 860 ; SSE-LABEL: insert_test4_div_sd:
 861 ; SSE:       # BB#0:
 862 ; SSE-NEXT:    divsd %xmm0, %xmm1
 863 ; SSE-NEXT:    movaps %xmm1, %xmm0
 864 ; SSE-NEXT:    retq
 865 ;
 866 ; AVX-LABEL: insert_test4_div_sd:
 867 ; AVX:       # BB#0:
 868 ; AVX-NEXT:    vdivsd %xmm0, %xmm1, %xmm0
 869 ; AVX-NEXT:    retq
 870   %1 = fdiv <2 x double> %b, %a
 871   %2 = select <2 x i1> <i1 false, i1 true>, <2 x double> %b, <2 x double> %1
 872   ret <2 x double> %2
 873 }