test/CodeGen/X86/sse2.ll

   1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
   2 ; Tests for SSE2 and below, without SSE3+.
   3 ; RUN: llc < %s -mtriple=i386-apple-darwin10 -mcpu=pentium4 -O3 | FileCheck %s
   4
   5 define void @test1(<2 x double>* %r, <2 x double>* %A, double %B) nounwind  {
   6 ; CHECK-LABEL: test1:
   7 ; CHECK:       ## BB#0:
   8 ; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax
   9 ; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %ecx
  10 ; CHECK-NEXT:    movapd (%ecx), %xmm0
  11 ; CHECK-NEXT:    movlpd {{[0-9]+}}(%esp), %xmm0
  12 ; CHECK-NEXT:    movapd %xmm0, (%eax)
  13 ; CHECK-NEXT:    retl
  14         %tmp3 = load <2 x double>, <2 x double>* %A, align 16
  15         %tmp7 = insertelement <2 x double> undef, double %B, i32 0
  16         %tmp9 = shufflevector <2 x double> %tmp3, <2 x double> %tmp7, <2 x i32> < i32 2, i32 1 >
  17         store <2 x double> %tmp9, <2 x double>* %r, align 16
  18         ret void
  19 }
  20
  21 define void @test2(<2 x double>* %r, <2 x double>* %A, double %B) nounwind  {
  22 ; CHECK-LABEL: test2:
  23 ; CHECK:       ## BB#0:
  24 ; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax
  25 ; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %ecx
  26 ; CHECK-NEXT:    movapd (%ecx), %xmm0
  27 ; CHECK-NEXT:    movhpd {{[0-9]+}}(%esp), %xmm0
  28 ; CHECK-NEXT:    movapd %xmm0, (%eax)
  29 ; CHECK-NEXT:    retl
  30         %tmp3 = load <2 x double>, <2 x double>* %A, align 16
  31         %tmp7 = insertelement <2 x double> undef, double %B, i32 0
  32         %tmp9 = shufflevector <2 x double> %tmp3, <2 x double> %tmp7, <2 x i32> < i32 0, i32 2 >
  33         store <2 x double> %tmp9, <2 x double>* %r, align 16
  34         ret void
  35 }
  36
  37
  38 define void @test3(<4 x float>* %res, <4 x float>* %A, <4 x float>* %B) nounwind {
  39 ; CHECK-LABEL: test3:
  40 ; CHECK:       ## BB#0:
  41 ; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax
  42 ; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %ecx
  43 ; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %edx
  44 ; CHECK-NEXT:    movaps (%edx), %xmm0
  45 ; CHECK-NEXT:    unpcklps {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1]
  46 ; CHECK-NEXT:    movaps %xmm0, (%eax)
  47 ; CHECK-NEXT:    retl
  48         %tmp = load <4 x float>, <4 x float>* %B                ; <<4 x float>> [#uses=2]
  49         %tmp3 = load <4 x float>, <4 x float>* %A               ; <<4 x float>> [#uses=2]
  50         %tmp.upgrd.1 = extractelement <4 x float> %tmp3, i32 0          ; <float> [#uses=1]
  51         %tmp7 = extractelement <4 x float> %tmp, i32 0          ; <float> [#uses=1]
  52         %tmp8 = extractelement <4 x float> %tmp3, i32 1         ; <float> [#uses=1]
  53         %tmp9 = extractelement <4 x float> %tmp, i32 1          ; <float> [#uses=1]
  54         %tmp10 = insertelement <4 x float> undef, float %tmp.upgrd.1, i32 0             ; <<4 x float>> [#uses=1]
  55         %tmp11 = insertelement <4 x float> %tmp10, float %tmp7, i32 1           ; <<4 x float>> [#uses=1]
  56         %tmp12 = insertelement <4 x float> %tmp11, float %tmp8, i32 2           ; <<4 x float>> [#uses=1]
  57         %tmp13 = insertelement <4 x float> %tmp12, float %tmp9, i32 3           ; <<4 x float>> [#uses=1]
  58         store <4 x float> %tmp13, <4 x float>* %res
  59         ret void
  60 }
  61
  62 define void @test4(<4 x float> %X, <4 x float>* %res) nounwind {
  63 ; CHECK-LABEL: test4:
  64 ; CHECK:       ## BB#0:
  65 ; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax
  66 ; CHECK-NEXT:    shufps {{.*#+}} xmm0 = xmm0[2,1,3,3]
  67 ; CHECK-NEXT:    movaps %xmm0, (%eax)
  68 ; CHECK-NEXT:    retl
  69         %tmp5 = shufflevector <4 x float> %X, <4 x float> undef, <4 x i32> < i32 2, i32 6, i32 3, i32 7 >               ; <<4 x float>> [#uses=1]
  70         store <4 x float> %tmp5, <4 x float>* %res
  71         ret void
  72 }
  73
  74 define <4 x i32> @test5(i8** %ptr) nounwind {
  75 ; CHECK-LABEL: test5:
  76 ; CHECK:       ## BB#0:
  77 ; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax
  78 ; CHECK-NEXT:    movl (%eax), %eax
  79 ; CHECK-NEXT:    movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
  80 ; CHECK-NEXT:    pxor %xmm0, %xmm0
  81 ; CHECK-NEXT:    punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
  82 ; CHECK-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
  83 ; CHECK-NEXT:    retl
  84         %tmp = load i8*, i8** %ptr              ; <i8*> [#uses=1]
  85         %tmp.upgrd.1 = bitcast i8* %tmp to float*               ; <float*> [#uses=1]
  86         %tmp.upgrd.2 = load float, float* %tmp.upgrd.1          ; <float> [#uses=1]
  87         %tmp.upgrd.3 = insertelement <4 x float> undef, float %tmp.upgrd.2, i32 0               ; <<4 x float>> [#uses=1]
  88         %tmp9 = insertelement <4 x float> %tmp.upgrd.3, float 0.000000e+00, i32 1               ; <<4 x float>> [#uses=1]
  89         %tmp10 = insertelement <4 x float> %tmp9, float 0.000000e+00, i32 2             ; <<4 x float>> [#uses=1]
  90         %tmp11 = insertelement <4 x float> %tmp10, float 0.000000e+00, i32 3            ; <<4 x float>> [#uses=1]
  91         %tmp21 = bitcast <4 x float> %tmp11 to <16 x i8>                ; <<16 x i8>> [#uses=1]
  92         %tmp22 = shufflevector <16 x i8> %tmp21, <16 x i8> zeroinitializer, <16 x i32> < i32 0, i32 16, i32 1, i32 17, i32 2, i32 18, i32 3, i32 19, i32 4, i32 20, i32 5, i32 21, i32 6, i32 22, i32 7, i32 23 >               ; <<16 x i8>> [#uses=1]
  93         %tmp31 = bitcast <16 x i8> %tmp22 to <8 x i16>          ; <<8 x i16>> [#uses=1]
  94         %tmp.upgrd.4 = shufflevector <8 x i16> zeroinitializer, <8 x i16> %tmp31, <8 x i32> < i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11 >                ; <<8 x i16>> [#uses=1]
  95         %tmp36 = bitcast <8 x i16> %tmp.upgrd.4 to <4 x i32>            ; <<4 x i32>> [#uses=1]
  96         ret <4 x i32> %tmp36
  97 }
  98
  99 define void @test6(<4 x float>* %res, <4 x float>* %A) nounwind {
 100 ; CHECK-LABEL: test6:
 101 ; CHECK:       ## BB#0:
 102 ; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax
 103 ; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %ecx
 104 ; CHECK-NEXT:    movaps (%ecx), %xmm0
 105 ; CHECK-NEXT:    movaps %xmm0, (%eax)
 106 ; CHECK-NEXT:    retl
 107   %tmp1 = load <4 x float>, <4 x float>* %A            ; <<4 x float>> [#uses=1]
 108   %tmp2 = shufflevector <4 x float> %tmp1, <4 x float> undef, <4 x i32> < i32 0, i32 5, i32 6, i32 7 >          ; <<4 x float>> [#uses=1]
 109   store <4 x float> %tmp2, <4 x float>* %res
 110   ret void
 111 }
 112
 113 define void @test7() nounwind {
 114 ; CHECK-LABEL: test7:
 115 ; CHECK:       ## BB#0:
 116 ; CHECK-NEXT:    xorps %xmm0, %xmm0
 117 ; CHECK-NEXT:    movaps %xmm0, 0
 118 ; CHECK-NEXT:    retl
 119   bitcast <4 x i32> zeroinitializer to <4 x float>                ; <<4 x float>>:1 [#uses=1]
 120   shufflevector <4 x float> %1, <4 x float> zeroinitializer, <4 x i32> zeroinitializer         ; <<4 x float>>:2 [#uses=1]
 121   store <4 x float> %2, <4 x float>* null
 122   ret void
 123 }
 124
 125 @x = external global [4 x i32]
 126
 127 define <2 x i64> @test8() nounwind {
 128 ; CHECK-LABEL: test8:
 129 ; CHECK:       ## BB#0:
 130 ; CHECK-NEXT:    movl L_x$non_lazy_ptr, %eax
 131 ; CHECK-NEXT:    movups (%eax), %xmm0
 132 ; CHECK-NEXT:    retl
 133         %tmp = load i32, i32* getelementptr ([4 x i32], [4 x i32]* @x, i32 0, i32 0)            ; <i32> [#uses=1]
 134         %tmp3 = load i32, i32* getelementptr ([4 x i32], [4 x i32]* @x, i32 0, i32 1)           ; <i32> [#uses=1]
 135         %tmp5 = load i32, i32* getelementptr ([4 x i32], [4 x i32]* @x, i32 0, i32 2)           ; <i32> [#uses=1]
 136         %tmp7 = load i32, i32* getelementptr ([4 x i32], [4 x i32]* @x, i32 0, i32 3)           ; <i32> [#uses=1]
 137         %tmp.upgrd.1 = insertelement <4 x i32> undef, i32 %tmp, i32 0           ; <<4 x i32>> [#uses=1]
 138         %tmp13 = insertelement <4 x i32> %tmp.upgrd.1, i32 %tmp3, i32 1         ; <<4 x i32>> [#uses=1]
 139         %tmp14 = insertelement <4 x i32> %tmp13, i32 %tmp5, i32 2               ; <<4 x i32>> [#uses=1]
 140         %tmp15 = insertelement <4 x i32> %tmp14, i32 %tmp7, i32 3               ; <<4 x i32>> [#uses=1]
 141         %tmp16 = bitcast <4 x i32> %tmp15 to <2 x i64>          ; <<2 x i64>> [#uses=1]
 142         ret <2 x i64> %tmp16
 143 }
 144
 145 define <4 x float> @test9(i32 %dummy, float %a, float %b, float %c, float %d) nounwind {
 146 ; CHECK-LABEL: test9:
 147 ; CHECK:       ## BB#0:
 148 ; CHECK-NEXT:    movups {{[0-9]+}}(%esp), %xmm0
 149 ; CHECK-NEXT:    retl
 150         %tmp = insertelement <4 x float> undef, float %a, i32 0         ; <<4 x float>> [#uses=1]
 151         %tmp11 = insertelement <4 x float> %tmp, float %b, i32 1                ; <<4 x float>> [#uses=1]
 152         %tmp12 = insertelement <4 x float> %tmp11, float %c, i32 2              ; <<4 x float>> [#uses=1]
 153         %tmp13 = insertelement <4 x float> %tmp12, float %d, i32 3              ; <<4 x float>> [#uses=1]
 154         ret <4 x float> %tmp13
 155 }
 156
 157 define <4 x float> @test10(float %a, float %b, float %c, float %d) nounwind {
 158 ; CHECK-LABEL: test10:
 159 ; CHECK:       ## BB#0:
 160 ; CHECK-NEXT:    movaps {{[0-9]+}}(%esp), %xmm0
 161 ; CHECK-NEXT:    retl
 162         %tmp = insertelement <4 x float> undef, float %a, i32 0         ; <<4 x float>> [#uses=1]
 163         %tmp11 = insertelement <4 x float> %tmp, float %b, i32 1                ; <<4 x float>> [#uses=1]
 164         %tmp12 = insertelement <4 x float> %tmp11, float %c, i32 2              ; <<4 x float>> [#uses=1]
 165         %tmp13 = insertelement <4 x float> %tmp12, float %d, i32 3              ; <<4 x float>> [#uses=1]
 166         ret <4 x float> %tmp13
 167 }
 168
 169 define <2 x double> @test11(double %a, double %b) nounwind {
 170 ; CHECK-LABEL: test11:
 171 ; CHECK:       ## BB#0:
 172 ; CHECK-NEXT:    movaps {{[0-9]+}}(%esp), %xmm0
 173 ; CHECK-NEXT:    retl
 174         %tmp = insertelement <2 x double> undef, double %a, i32 0               ; <<2 x double>> [#uses=1]
 175         %tmp7 = insertelement <2 x double> %tmp, double %b, i32 1               ; <<2 x double>> [#uses=1]
 176         ret <2 x double> %tmp7
 177 }
 178
 179 define void @test12() nounwind {
 180 ; CHECK-LABEL: test12:
 181 ; CHECK:       ## BB#0:
 182 ; CHECK-NEXT:    movapd 0, %xmm0
 183 ; CHECK-NEXT:    movapd {{.*#+}} xmm1 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00]
 184 ; CHECK-NEXT:    movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1]
 185 ; CHECK-NEXT:    xorpd %xmm2, %xmm2
 186 ; CHECK-NEXT:    unpckhpd {{.*#+}} xmm0 = xmm0[1],xmm2[1]
 187 ; CHECK-NEXT:    addps %xmm1, %xmm0
 188 ; CHECK-NEXT:    movaps %xmm0, 0
 189 ; CHECK-NEXT:    retl
 190   %tmp1 = load <4 x float>, <4 x float>* null          ; <<4 x float>> [#uses=2]
 191   %tmp2 = shufflevector <4 x float> %tmp1, <4 x float> < float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00 >, <4 x i32> < i32 0, i32 1, i32 6, i32 7 >             ; <<4 x float>> [#uses=1]
 192   %tmp3 = shufflevector <4 x float> %tmp1, <4 x float> zeroinitializer, <4 x i32> < i32 2, i32 3, i32 6, i32 7 >                ; <<4 x float>> [#uses=1]
 193   %tmp4 = fadd <4 x float> %tmp2, %tmp3            ; <<4 x float>> [#uses=1]
 194   store <4 x float> %tmp4, <4 x float>* null
 195   ret void
 196 }
 197
 198 define void @test13(<4 x float>* %res, <4 x float>* %A, <4 x float>* %B, <4 x float>* %C) nounwind {
 199 ; CHECK-LABEL: test13:
 200 ; CHECK:       ## BB#0:
 201 ; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax
 202 ; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %ecx
 203 ; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %edx
 204 ; CHECK-NEXT:    movaps (%edx), %xmm0
 205 ; CHECK-NEXT:    shufps {{.*#+}} xmm0 = xmm0[1,1],mem[0,1]
 206 ; CHECK-NEXT:    shufps {{.*#+}} xmm0 = xmm0[0,2,1,3]
 207 ; CHECK-NEXT:    movaps %xmm0, (%eax)
 208 ; CHECK-NEXT:    retl
 209   %tmp3 = load <4 x float>, <4 x float>* %B            ; <<4 x float>> [#uses=1]
 210   %tmp5 = load <4 x float>, <4 x float>* %C            ; <<4 x float>> [#uses=1]
 211   %tmp11 = shufflevector <4 x float> %tmp3, <4 x float> %tmp5, <4 x i32> < i32 1, i32 4, i32 1, i32 5 >         ; <<4 x float>> [#uses=1]
 212   store <4 x float> %tmp11, <4 x float>* %res
 213   ret void
 214 }
 215
 216 define <4 x float> @test14(<4 x float>* %x, <4 x float>* %y) nounwind {
 217 ; CHECK-LABEL: test14:
 218 ; CHECK:       ## BB#0:
 219 ; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax
 220 ; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %ecx
 221 ; CHECK-NEXT:    movaps (%ecx), %xmm1
 222 ; CHECK-NEXT:    movaps (%eax), %xmm2
 223 ; CHECK-NEXT:    movaps %xmm2, %xmm0
 224 ; CHECK-NEXT:    addps %xmm1, %xmm0
 225 ; CHECK-NEXT:    subps %xmm1, %xmm2
 226 ; CHECK-NEXT:    unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm2[0]
 227 ; CHECK-NEXT:    retl
 228   %tmp = load <4 x float>, <4 x float>* %y             ; <<4 x float>> [#uses=2]
 229   %tmp5 = load <4 x float>, <4 x float>* %x            ; <<4 x float>> [#uses=2]
 230   %tmp9 = fadd <4 x float> %tmp5, %tmp             ; <<4 x float>> [#uses=1]
 231   %tmp21 = fsub <4 x float> %tmp5, %tmp            ; <<4 x float>> [#uses=1]
 232   %tmp27 = shufflevector <4 x float> %tmp9, <4 x float> %tmp21, <4 x i32> < i32 0, i32 1, i32 4, i32 5 >                ; <<4 x float>> [#uses=1]
 233   ret <4 x float> %tmp27
 234 }
 235
 236 define <4 x float> @test15(<4 x float>* %x, <4 x float>* %y) nounwind {
 237 ; CHECK-LABEL: test15:
 238 ; CHECK:       ## BB#0: ## %entry
 239 ; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax
 240 ; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %ecx
 241 ; CHECK-NEXT:    movapd (%ecx), %xmm0
 242 ; CHECK-NEXT:    unpckhpd {{.*#+}} xmm0 = xmm0[1],mem[1]
 243 ; CHECK-NEXT:    retl
 244 entry:
 245   %tmp = load <4 x float>, <4 x float>* %y             ; <<4 x float>> [#uses=1]
 246   %tmp3 = load <4 x float>, <4 x float>* %x            ; <<4 x float>> [#uses=1]
 247   %tmp4 = shufflevector <4 x float> %tmp3, <4 x float> %tmp, <4 x i32> < i32 2, i32 3, i32 6, i32 7 >           ; <<4 x float>> [#uses=1]
 248   ret <4 x float> %tmp4
 249 }
 250
 251 ; PR8900
 252
 253 define  <2 x double> @test16(<4 x double> * nocapture %srcA, <2 x double>* nocapture %dst) {
 254 ; CHECK-LABEL: test16:
 255 ; CHECK:       ## BB#0:
 256 ; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax
 257 ; CHECK-NEXT:    movapd 96(%eax), %xmm0
 258 ; CHECK-NEXT:    unpcklpd {{.*#+}} xmm0 = xmm0[0],mem[0]
 259 ; CHECK-NEXT:    retl
 260   %i5 = getelementptr inbounds <4 x double>, <4 x double>* %srcA, i32 3
 261   %i6 = load <4 x double>, <4 x double>* %i5, align 32
 262   %i7 = shufflevector <4 x double> %i6, <4 x double> undef, <2 x i32> <i32 0, i32 2>
 263   ret <2 x double> %i7
 264 }
 265
 266 ; PR9009
 267 define fastcc void @test17() nounwind {
 268 ; CHECK-LABEL: test17:
 269 ; CHECK:       ## BB#0: ## %entry
 270 ; CHECK-NEXT:    movaps {{.*#+}} xmm0 = <u,u,32768,32768>
 271 ; CHECK-NEXT:    movaps %xmm0, (%eax)
 272 ; CHECK-NEXT:    retl
 273 entry:
 274   %0 = insertelement <4 x i32> undef, i32 undef, i32 1
 275   %1 = shufflevector <4 x i32> <i32 undef, i32 undef, i32 32768, i32 32768>, <4 x i32> %0, <4 x i32> <i32 4, i32 5, i32 2, i32 3>
 276   %2 = bitcast <4 x i32> %1 to <4 x float>
 277   store <4 x float> %2, <4 x float> * undef
 278   ret void
 279 }
 280
 281 ; PR9210
 282 define <4 x float> @f(<4 x double>) nounwind {
 283 ; CHECK-LABEL: f:
 284 ; CHECK:       ## BB#0: ## %entry
 285 ; CHECK-NEXT:    cvtpd2ps %xmm1, %xmm1
 286 ; CHECK-NEXT:    cvtpd2ps %xmm0, %xmm0
 287 ; CHECK-NEXT:    unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
 288 ; CHECK-NEXT:    retl
 289 entry:
 290  %double2float.i = fptrunc <4 x double> %0 to <4 x float>
 291  ret <4 x float> %double2float.i
 292 }
 293
 294 define <2 x i64> @test_insert_64_zext(<2 x i64> %i) {
 295 ; CHECK-LABEL: test_insert_64_zext:
 296 ; CHECK:       ## BB#0:
 297 ; CHECK-NEXT:    movq {{.*#+}} xmm0 = xmm0[0],zero
 298 ; CHECK-NEXT:    retl
 299   %1 = shufflevector <2 x i64> %i, <2 x i64> <i64 0, i64 undef>, <2 x i32> <i32 0, i32 2>
 300   ret <2 x i64> %1
 301 }
 302
 303 define <4 x i32> @PR19721(<4 x i32> %i) {
 304 ; CHECK-LABEL: PR19721:
 305 ; CHECK:       ## BB#0:
 306 ; CHECK-NEXT:    andps LCPI19_0, %xmm0
 307 ; CHECK-NEXT:    retl
 308   %bc = bitcast <4 x i32> %i to i128
 309   %insert = and i128 %bc, -4294967296
 310   %bc2 = bitcast i128 %insert to <4 x i32>
 311   ret <4 x i32> %bc2
 312 }
 313
 314 define <4 x i32> @test_mul(<4 x i32> %x, <4 x i32> %y) {
 315 ; CHECK-LABEL: test_mul:
 316 ; CHECK:       ## BB#0:
 317 ; CHECK-NEXT:    pshufd {{.*#+}} xmm2 = xmm0[1,1,3,3]
 318 ; CHECK-NEXT:    pmuludq %xmm1, %xmm0
 319 ; CHECK-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
 320 ; CHECK-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
 321 ; CHECK-NEXT:    pmuludq %xmm2, %xmm1
 322 ; CHECK-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
 323 ; CHECK-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
 324 ; CHECK-NEXT:    retl
 325   %m = mul <4 x i32> %x, %y
 326   ret <4 x i32> %m
 327 }