X-Git-Url: http://plrg.eecs.uci.edu/git/?p=oota-llvm.git;a=blobdiff_plain;f=test%2FCodeGen%2FX86%2Fxop-intrinsics-x86_64.ll;h=e96fed36d15c78d669e99dd2551e26ae641a46c9;hp=e154e4a159b5b354b34a63ebe9c0cd7abc08b291;hb=db0def4a5cb14629257780d48b3c1ac28768f9b1;hpb=f3455f13a2c5b85c48aff0923028858ec9f6aa50 diff --git a/test/CodeGen/X86/xop-intrinsics-x86_64.ll b/test/CodeGen/X86/xop-intrinsics-x86_64.ll index e154e4a159b..e96fed36d15 100644 --- a/test/CodeGen/X86/xop-intrinsics-x86_64.ll +++ b/test/CodeGen/X86/xop-intrinsics-x86_64.ll @@ -8,14 +8,14 @@ define <2 x double> @test_int_x86_xop_vpermil2pd(<2 x double> %a0, <2 x double> define <2 x double> @test_int_x86_xop_vpermil2pd_mr(<2 x double> %a0, <2 x double>* %a1, <2 x double> %a2) { ; CHECK-NOT: vmovaps ; CHECK: vpermil2pd - %vec = load <2 x double>* %a1 + %vec = load <2 x double>, <2 x double>* %a1 %res = call <2 x double> @llvm.x86.xop.vpermil2pd(<2 x double> %a0, <2 x double> %vec, <2 x double> %a2, i8 1) ; [#uses=1] ret <2 x double> %res } define <2 x double> @test_int_x86_xop_vpermil2pd_rm(<2 x double> %a0, <2 x double> %a1, <2 x double>* %a2) { ; CHECK-NOT: vmovaps ; CHECK: vpermil2pd - %vec = load <2 x double>* %a2 + %vec = load <2 x double>, <2 x double>* %a2 %res = call <2 x double> @llvm.x86.xop.vpermil2pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %vec, i8 1) ; [#uses=1] ret <2 x double> %res } @@ -31,7 +31,7 @@ define <4 x double> @test_int_x86_xop_vpermil2pd_256_mr(<4 x double> %a0, <4 x d ; CHECK-NOT: vmovaps ; CHECK: vpermil2pd ; CHECK: ymm - %vec = load <4 x double>* %a1 + %vec = load <4 x double>, <4 x double>* %a1 %res = call <4 x double> @llvm.x86.xop.vpermil2pd.256(<4 x double> %a0, <4 x double> %vec, <4 x double> %a2, i8 2) ; ret <4 x double> %res } @@ -39,7 +39,7 @@ define <4 x double> @test_int_x86_xop_vpermil2pd_256_rm(<4 x double> %a0, <4 x d ; CHECK-NOT: vmovaps ; CHECK: vpermil2pd ; CHECK: ymm - %vec = load <4 x double>* %a2 + %vec = load <4 x double>, <4 x double>* %a2 %res = call <4 x double> @llvm.x86.xop.vpermil2pd.256(<4 x double> %a0, <4 x double> %a1, <4 x double> %vec, i8 2) ; ret <4 x double> %res } @@ -77,7 +77,7 @@ define <4 x i64> @test_int_x86_xop_vpcmov_256_mr(<4 x i64> %a0, <4 x i64>* %a1, ; CHECK-NOT: vmovaps ; CHECK: vpcmov ; CHECK: ymm - %vec = load <4 x i64>* %a1 + %vec = load <4 x i64>, <4 x i64>* %a1 %res = call <4 x i64> @llvm.x86.xop.vpcmov.256(<4 x i64> %a0, <4 x i64> %vec, <4 x i64> %a2) ; ret <4 x i64> %res } @@ -85,7 +85,7 @@ define <4 x i64> @test_int_x86_xop_vpcmov_256_rm(<4 x i64> %a0, <4 x i64> %a1, < ; CHECK-NOT: vmovaps ; CHECK: vpcmov ; CHECK: ymm - %vec = load <4 x i64>* %a2 + %vec = load <4 x i64>, <4 x i64>* %a2 %res = call <4 x i64> @llvm.x86.xop.vpcmov.256(<4 x i64> %a0, <4 x i64> %a1, <4 x i64> %vec) ; ret <4 x i64> %res } @@ -99,7 +99,7 @@ define <16 x i8> @test_int_x86_xop_vpcomeqb(<16 x i8> %a0, <16 x i8> %a1) { define <16 x i8> @test_int_x86_xop_vpcomeqb_mem(<16 x i8> %a0, <16 x i8>* %a1) { ; CHECK-NOT: vmovaps ; CHECK:vpcomeqb - %vec = load <16 x i8>* %a1 + %vec = load <16 x i8>, <16 x i8>* %a1 %res = call <16 x i8> @llvm.x86.xop.vpcomeqb(<16 x i8> %a0, <16 x i8> %vec) ; ret <16 x i8> %res } @@ -645,7 +645,7 @@ define <2 x i64> @test_int_x86_xop_vphsubdq(<4 x i32> %a0) { define <2 x i64> @test_int_x86_xop_vphsubdq_mem(<4 x i32>* %a0) { ; CHECK-NOT: vmovaps ; CHECK: vphsubdq - %vec = load <4 x i32>* %a0 + %vec = load <4 x i32>, <4 x i32>* %a0 %res = call <2 x i64> @llvm.x86.xop.vphsubdq(<4 x i32> %vec) ; ret <2 x i64> %res } @@ -659,7 +659,7 @@ define <4 x i32> @test_int_x86_xop_vphsubwd(<8 x i16> %a0) { define <4 x i32> @test_int_x86_xop_vphsubwd_mem(<8 x i16>* %a0) { ; CHECK-NOT: vmovaps ; CHECK: vphsubwd - %vec = load <8 x i16>* %a0 + %vec = load <8 x i16>, <8 x i16>* %a0 %res = call <4 x i32> @llvm.x86.xop.vphsubwd(<8 x i16> %vec) ; ret <4 x i32> %res } @@ -750,7 +750,7 @@ define <4 x i32> @test_int_x86_xop_vpmadcswd(<8 x i16> %a0, <8 x i16> %a1, <4 x define <4 x i32> @test_int_x86_xop_vpmadcswd_mem(<8 x i16> %a0, <8 x i16>* %a1, <4 x i32> %a2) { ; CHECK-NOT: vmovaps ; CHECK: vpmadcswd - %vec = load <8 x i16>* %a1 + %vec = load <8 x i16>, <8 x i16>* %a1 %res = call <4 x i32> @llvm.x86.xop.vpmadcswd(<8 x i16> %a0, <8 x i16> %vec, <4 x i32> %a2) ; ret <4 x i32> %res } @@ -764,14 +764,14 @@ define <16 x i8> @test_int_x86_xop_vpperm(<16 x i8> %a0, <16 x i8> %a1, <16 x i8 define <16 x i8> @test_int_x86_xop_vpperm_rm(<16 x i8> %a0, <16 x i8> %a1, <16 x i8>* %a2) { ; CHECK-NOT: vmovaps ; CHECK: vpperm - %vec = load <16 x i8>* %a2 + %vec = load <16 x i8>, <16 x i8>* %a2 %res = call <16 x i8> @llvm.x86.xop.vpperm(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> %vec) ; ret <16 x i8> %res } define <16 x i8> @test_int_x86_xop_vpperm_mr(<16 x i8> %a0, <16 x i8>* %a1, <16 x i8> %a2) { ; CHECK-NOT: vmovaps ; CHECK: vpperm - %vec = load <16 x i8>* %a1 + %vec = load <16 x i8>, <16 x i8>* %a1 %res = call <16 x i8> @llvm.x86.xop.vpperm(<16 x i8> %a0, <16 x i8> %vec, <16 x i8> %a2) ; ret <16 x i8> %res } @@ -805,6 +805,34 @@ define <8 x i16> @test_int_x86_xop_vprotw(<8 x i16> %a0, <8 x i16> %a1) { } declare <8 x i16> @llvm.x86.xop.vprotw(<8 x i16>, <8 x i16>) nounwind readnone +define <16 x i8> @test_int_x86_xop_vprotbi(<16 x i8> %a0) { + ; CHECK: vprotb + %res = call <16 x i8> @llvm.x86.xop.vprotbi(<16 x i8> %a0, i8 1) ; + ret <16 x i8> %res +} +declare <16 x i8> @llvm.x86.xop.vprotbi(<16 x i8>, i8) nounwind readnone + +define <4 x i32> @test_int_x86_xop_vprotdi(<4 x i32> %a0) { + ; CHECK: vprotd + %res = call <4 x i32> @llvm.x86.xop.vprotdi(<4 x i32> %a0, i8 -2) ; + ret <4 x i32> %res +} +declare <4 x i32> @llvm.x86.xop.vprotdi(<4 x i32>, i8) nounwind readnone + +define <2 x i64> @test_int_x86_xop_vprotqi(<2 x i64> %a0) { + ; CHECK: vprotq + %res = call <2 x i64> @llvm.x86.xop.vprotqi(<2 x i64> %a0, i8 3) ; + ret <2 x i64> %res +} +declare <2 x i64> @llvm.x86.xop.vprotqi(<2 x i64>, i8) nounwind readnone + +define <8 x i16> @test_int_x86_xop_vprotwi(<8 x i16> %a0) { + ; CHECK: vprotw + %res = call <8 x i16> @llvm.x86.xop.vprotwi(<8 x i16> %a0, i8 -4) ; + ret <8 x i16> %res +} +declare <8 x i16> @llvm.x86.xop.vprotwi(<8 x i16>, i8) nounwind readnone + define <16 x i8> @test_int_x86_xop_vpshab(<16 x i8> %a0, <16 x i8> %a1) { ; CHECK: vpshab %res = call <16 x i8> @llvm.x86.xop.vpshab(<16 x i8> %a0, <16 x i8> %a1) ; @@ -862,14 +890,14 @@ define <8 x i16> @test_int_x86_xop_vpshlw(<8 x i16> %a0, <8 x i16> %a1) { define <8 x i16> @test_int_x86_xop_vpshlw_rm(<8 x i16> %a0, <8 x i16>* %a1) { ; CHECK-NOT: vmovaps ; CHECK: vpshlw - %vec = load <8 x i16>* %a1 + %vec = load <8 x i16>, <8 x i16>* %a1 %res = call <8 x i16> @llvm.x86.xop.vpshlw(<8 x i16> %a0, <8 x i16> %vec) ; ret <8 x i16> %res } define <8 x i16> @test_int_x86_xop_vpshlw_mr(<8 x i16>* %a0, <8 x i16> %a1) { ; CHECK-NOT: vmovaps ; CHECK: vpshlw - %vec = load <8 x i16>* %a0 + %vec = load <8 x i16>, <8 x i16>* %a0 %res = call <8 x i16> @llvm.x86.xop.vpshlw(<8 x i16> %vec, <8 x i16> %a1) ; ret <8 x i16> %res } @@ -884,7 +912,7 @@ define <4 x float> @test_int_x86_xop_vfrcz_ss(<4 x float> %a0) { define <4 x float> @test_int_x86_xop_vfrcz_ss_mem(float* %a0) { ; CHECK-NOT: mov ; CHECK: vfrczss - %elem = load float* %a0 + %elem = load float, float* %a0 %vec = insertelement <4 x float> undef, float %elem, i32 0 %res = call <4 x float> @llvm.x86.xop.vfrcz.ss(<4 x float> %vec) ; ret <4 x float> %res @@ -900,7 +928,7 @@ define <2 x double> @test_int_x86_xop_vfrcz_sd(<2 x double> %a0) { define <2 x double> @test_int_x86_xop_vfrcz_sd_mem(double* %a0) { ; CHECK-NOT: mov ; CHECK: vfrczsd - %elem = load double* %a0 + %elem = load double, double* %a0 %vec = insertelement <2 x double> undef, double %elem, i32 0 %res = call <2 x double> @llvm.x86.xop.vfrcz.sd(<2 x double> %vec) ; ret <2 x double> %res @@ -915,7 +943,7 @@ define <2 x double> @test_int_x86_xop_vfrcz_pd(<2 x double> %a0) { define <2 x double> @test_int_x86_xop_vfrcz_pd_mem(<2 x double>* %a0) { ; CHECK-NOT: vmovaps ; CHECK: vfrczpd - %vec = load <2 x double>* %a0 + %vec = load <2 x double>, <2 x double>* %a0 %res = call <2 x double> @llvm.x86.xop.vfrcz.pd(<2 x double> %vec) ; ret <2 x double> %res } @@ -931,7 +959,7 @@ define <4 x double> @test_int_x86_xop_vfrcz_pd_256_mem(<4 x double>* %a0) { ; CHECK-NOT: vmovaps ; CHECK: vfrczpd ; CHECK: ymm - %vec = load <4 x double>* %a0 + %vec = load <4 x double>, <4 x double>* %a0 %res = call <4 x double> @llvm.x86.xop.vfrcz.pd.256(<4 x double> %vec) ; ret <4 x double> %res } @@ -945,7 +973,7 @@ define <4 x float> @test_int_x86_xop_vfrcz_ps(<4 x float> %a0) { define <4 x float> @test_int_x86_xop_vfrcz_ps_mem(<4 x float>* %a0) { ; CHECK-NOT: vmovaps ; CHECK: vfrczps - %vec = load <4 x float>* %a0 + %vec = load <4 x float>, <4 x float>* %a0 %res = call <4 x float> @llvm.x86.xop.vfrcz.ps(<4 x float> %vec) ; ret <4 x float> %res } @@ -961,7 +989,7 @@ define <8 x float> @test_int_x86_xop_vfrcz_ps_256_mem(<8 x float>* %a0) { ; CHECK-NOT: vmovaps ; CHECK: vfrczps ; CHECK: ymm - %vec = load <8 x float>* %a0 + %vec = load <8 x float>, <8 x float>* %a0 %res = call <8 x float> @llvm.x86.xop.vfrcz.ps.256(<8 x float> %vec) ; ret <8 x float> %res }