[AVX512] Enabling bit logic lowering

[oota-llvm.git] / test / CodeGen / X86 / avx-splat.ll
diff --git a/test/CodeGen/X86/avx-splat.ll b/test/CodeGen/X86/avx-splat.ll

index 36d469417f93edde2987a1072b17d5337e3d0f6a..98c1645b9080134cf0ab137989abef6766f194ed 100644 (file)
--- a/test/CodeGen/X86/avx-splat.ll
+++ b/test/CodeGen/X86/avx-splat.ll
@@ -1,30 +1,24 @@
  ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=corei7-avx -mattr=+avx | FileCheck %s
  
-; FIXME: use avx versions for punpcklbw, punpckhbw and punpckhwd
  
-; CHECK: vextractf128 $0
-; CHECK-NEXT: punpcklbw
-; CHECK-NEXT: punpckhbw
+; CHECK: vpshufb {{.*}} ## xmm0 = xmm0[5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5]
  ; CHECK-NEXT: vinsertf128 $1
-; CHECK-NEXT: vpermilps $85
  define <32 x i8> @funcA(<32 x i8> %a) nounwind uwtable readnone ssp {
  entry:
    %shuffle = shufflevector <32 x i8> %a, <32 x i8> undef, <32 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
    ret <32 x i8> %shuffle
  }
  
-; CHECK: vextractf128 $0
-; CHECK-NEXT: punpckhwd
+; CHECK: vpshufb {{.*}} ## xmm0 = xmm0[10,11,10,11,10,11,10,11,10,11,10,11,10,11,10,11]
  ; CHECK-NEXT: vinsertf128 $1
-; CHECK-NEXT: vpermilps $85
  define <16 x i16> @funcB(<16 x i16> %a) nounwind uwtable readnone ssp {
  entry:
    %shuffle = shufflevector <16 x i16> %a, <16 x i16> undef, <16 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
    ret <16 x i16> %shuffle
  }
  
-; CHECK: vmovd
-; CHECK-NEXT: movlhps
+; CHECK: vmovq
+; CHECK-NEXT: vunpcklpd %xmm
  ; CHECK-NEXT: vinsertf128 $1
  define <4 x i64> @funcC(i64 %q) nounwind uwtable readnone ssp {
  entry:
@@ -35,7 +29,7 @@ entry:
    ret <4 x i64> %vecinit6.i
  }
  
-; CHECK: vshufpd
+; CHECK: vunpcklpd %xmm
  ; CHECK-NEXT: vinsertf128 $1
  define <4 x double> @funcD(double %q) nounwind uwtable readnone ssp {
  entry:
@@ -46,13 +40,11 @@ entry:
    ret <4 x double> %vecinit6.i
  }
  
-; Test this simple opt:
+; Test this turns into a broadcast:
  ;   shuffle (scalar_to_vector (load (ptr + 4))), undef, <0, 0, 0, 0>
-; To:
-;   shuffle (vload ptr)), undef, <1, 1, 1, 1>
-; CHECK: vmovaps
-; CHECK-NEXT: vpextrd
-define void @funcE() nounwind {
+;   
+; CHECK: vbroadcastss
+define <8 x float> @funcE() nounwind {
  allocas:
    %udx495 = alloca [18 x [18 x float]], align 32
    br label %for_test505.preheader
@@ -74,6 +66,32 @@ load.i1247:                                       ; preds = %for_exit499
  
  __load_and_broadcast_32.exit1249:                 ; preds = %load.i1247, %for_exit499
    %load_broadcast12281250 = phi <8 x float> [ %phitmp, %load.i1247 ], [ undef, %for_exit499 ]
-  ret void
+  ret <8 x float> %load_broadcast12281250
+}
+
+; CHECK: vpermilps $4
+; CHECK-NEXT: vinsertf128 $1
+define <8 x float> @funcF(i32 %val) nounwind {
+  %ret6 = insertelement <8 x i32> undef, i32 %val, i32 6
+  %ret7 = insertelement <8 x i32> %ret6, i32 %val, i32 7
+  %tmp = bitcast <8 x i32> %ret7 to <8 x float>
+  ret <8 x float> %tmp
+}
+
+; CHECK: vpermilps $0
+; CHECK-NEXT: vinsertf128  $1
+define <8 x float> @funcG(<8 x float> %a) nounwind uwtable readnone ssp {
+entry:
+  %shuffle = shufflevector <8 x float> %a, <8 x float> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
+  ret <8 x float> %shuffle
+}
+
+; CHECK: vextractf128  $1
+; CHECK-NEXT: vpermilps $85
+; CHECK-NEXT: vinsertf128  $1
+define <8 x float> @funcH(<8 x float> %a) nounwind uwtable readnone ssp {
+entry:
+  %shuffle = shufflevector <8 x float> %a, <8 x float> undef, <8 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
+  ret <8 x float> %shuffle
  }