X-Git-Url: http://plrg.eecs.uci.edu/git/?p=oota-llvm.git;a=blobdiff_plain;f=test%2FCodeGen%2FX86%2Fvector-shuffle-256-v32.ll;h=f49b0e187030ff59e221368587ae2abf571d565e;hp=bbbe0cd8dfce71b1acbfab0800708453c806eab2;hb=ed7eb85a5d881d54570854b49fda5abacb13ed6c;hpb=1670bbc481e066c1fc98e9ebb507b6ca12206a60 diff --git a/test/CodeGen/X86/vector-shuffle-256-v32.ll b/test/CodeGen/X86/vector-shuffle-256-v32.ll index bbbe0cd8dfc..f49b0e18703 100644 --- a/test/CodeGen/X86/vector-shuffle-256-v32.ll +++ b/test/CodeGen/X86/vector-shuffle-256-v32.ll @@ -1974,3 +1974,48 @@ define <32 x i8> @shuffle_v32i8_15_00_01_02_03_04_05_06_07_08_09_10_11_12_13_14_ %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> ret <32 x i8> %shuffle } + +define <32 x i8> @insert_dup_mem_v32i8_i32(i32* %ptr) { +; AVX1-LABEL: insert_dup_mem_v32i8_i32: +; AVX1: # BB#0: +; AVX1-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero +; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; AVX1-NEXT: vpshufb %xmm1, %xmm0, %xmm0 +; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 +; AVX1-NEXT: retq +; +; AVX2-LABEL: insert_dup_mem_v32i8_i32: +; AVX2: # BB#0: +; AVX2-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero +; AVX2-NEXT: vpbroadcastb %xmm0, %ymm0 +; AVX2-NEXT: retq + %tmp = load i32, i32* %ptr, align 4 + %tmp1 = insertelement <4 x i32> zeroinitializer, i32 %tmp, i32 0 + %tmp2 = bitcast <4 x i32> %tmp1 to <16 x i8> + %tmp3 = shufflevector <16 x i8> %tmp2, <16 x i8> undef, <32 x i32> zeroinitializer + ret <32 x i8> %tmp3 +} + +define <32 x i8> @insert_dup_mem_v32i8_sext_i8(i8* %ptr) { +; AVX1-LABEL: insert_dup_mem_v32i8_sext_i8: +; AVX1: # BB#0: +; AVX1-NEXT: movsbl (%rdi), %eax +; AVX1-NEXT: vmovd %eax, %xmm0 +; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; AVX1-NEXT: vpshufb %xmm1, %xmm0, %xmm0 +; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 +; AVX1-NEXT: retq +; +; AVX2-LABEL: insert_dup_mem_v32i8_sext_i8: +; AVX2: # BB#0: +; AVX2-NEXT: movsbl (%rdi), %eax +; AVX2-NEXT: vmovd %eax, %xmm0 +; AVX2-NEXT: vpbroadcastb %xmm0, %ymm0 +; AVX2-NEXT: retq + %tmp = load i8, i8* %ptr, align 1 + %tmp1 = sext i8 %tmp to i32 + %tmp2 = insertelement <4 x i32> zeroinitializer, i32 %tmp1, i32 0 + %tmp3 = bitcast <4 x i32> %tmp2 to <16 x i8> + %tmp4 = shufflevector <16 x i8> %tmp3, <16 x i8> undef, <32 x i32> zeroinitializer + ret <32 x i8> %tmp4 +}