test/CodeGen/X86/vec_shuffle-39.ll

   1 ; RUN: llc < %s -mtriple=x86_64-linux -mcpu=penryn | FileCheck %s
   2 ; rdar://10050222, rdar://10134392
   3
   4 define <4 x float> @t1(<4 x float> %a, <1 x i64>* nocapture %p) nounwind {
   5 entry:
   6 ; CHECK-LABEL: t1:
   7 ; CHECK: movlps (%rdi), %xmm0
   8 ; CHECK: ret
   9   %p.val = load <1 x i64>* %p, align 1
  10   %0 = bitcast <1 x i64> %p.val to <2 x float>
  11   %shuffle.i = shufflevector <2 x float> %0, <2 x float> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
  12   %shuffle1.i = shufflevector <4 x float> %a, <4 x float> %shuffle.i, <4 x i32> <i32 4, i32 5, i32 2, i32 3>
  13   ret <4 x float> %shuffle1.i
  14 }
  15
  16 define <4 x float> @t1a(<4 x float> %a, <1 x i64>* nocapture %p) nounwind {
  17 entry:
  18 ; CHECK-LABEL: t1a:
  19 ; CHECK: movlps (%rdi), %xmm0
  20 ; CHECK: ret
  21   %0 = bitcast <1 x i64>* %p to double*
  22   %1 = load double* %0
  23   %2 = insertelement <2 x double> undef, double %1, i32 0
  24   %3 = bitcast <2 x double> %2 to <4 x float>
  25   %4 = shufflevector <4 x float> %a, <4 x float> %3, <4 x i32> <i32 4, i32 5, i32 2, i32 3>
  26   ret <4 x float> %4
  27 }
  28
  29 define void @t2(<1 x i64>* nocapture %p, <4 x float> %a) nounwind {
  30 entry:
  31 ; CHECK-LABEL: t2:
  32 ; CHECK: movlps %xmm0, (%rdi)
  33 ; CHECK: ret
  34   %cast.i = bitcast <4 x float> %a to <2 x i64>
  35   %extract.i = extractelement <2 x i64> %cast.i, i32 0
  36   %0 = getelementptr inbounds <1 x i64>* %p, i64 0, i64 0
  37   store i64 %extract.i, i64* %0, align 8
  38   ret void
  39 }
  40
  41 define void @t2a(<1 x i64>* nocapture %p, <4 x float> %a) nounwind {
  42 entry:
  43 ; CHECK-LABEL: t2a:
  44 ; CHECK: movlps %xmm0, (%rdi)
  45 ; CHECK: ret
  46   %0 = bitcast <1 x i64>* %p to double*
  47   %1 = bitcast <4 x float> %a to <2 x double>
  48   %2 = extractelement <2 x double> %1, i32 0
  49   store double %2, double* %0
  50   ret void
  51 }
  52
  53 ; rdar://10436044
  54 define <2 x double> @t3() nounwind readonly {
  55 bb:
  56 ; CHECK-LABEL: t3:
  57 ; CHECK: movq (%rax), %xmm1
  58 ; CHECK: punpcklqdq %xmm2, %xmm0
  59 ; CHECK: movsd %xmm1, %xmm0
  60   %tmp0 = load i128* null, align 1
  61   %tmp1 = load <2 x i32>* undef, align 8
  62   %tmp2 = bitcast i128 %tmp0 to <16 x i8>
  63   %tmp3 = bitcast <2 x i32> %tmp1 to i64
  64   %tmp4 = insertelement <2 x i64> undef, i64 %tmp3, i32 0
  65   %tmp5 = bitcast <16 x i8> %tmp2 to <2 x double>
  66   %tmp6 = bitcast <2 x i64> %tmp4 to <2 x double>
  67   %tmp7 = shufflevector <2 x double> %tmp5, <2 x double> %tmp6, <2 x i32> <i32 2, i32 1>
  68   ret <2 x double> %tmp7
  69 }
  70
  71 ; rdar://10450317
  72 define <2 x i64> @t4() nounwind readonly {
  73 bb:
  74 ; CHECK-LABEL: t4:
  75 ; CHECK: movq (%rax), %xmm0
  76 ; CHECK: punpcklqdq %{{xmm.}}, %[[XMM:xmm[0-9]]]
  77 ; CHECK: movsd %[[XMM]], %xmm0
  78   %tmp0 = load i128* null, align 1
  79   %tmp1 = load <2 x i32>* undef, align 8
  80   %tmp2 = bitcast i128 %tmp0 to <16 x i8>
  81   %tmp3 = bitcast <2 x i32> %tmp1 to i64
  82   %tmp4 = insertelement <2 x i64> undef, i64 %tmp3, i32 0
  83   %tmp5 = bitcast <16 x i8> %tmp2 to <2 x i64>
  84   %tmp6 = shufflevector <2 x i64> %tmp4, <2 x i64> %tmp5, <2 x i32> <i32 2, i32 1>
  85   ret <2 x i64> %tmp6
  86 }