define <2 x double> @test_int_x86_xop_vpermil2pd_mr(<2 x double> %a0, <2 x double>* %a1, <2 x double> %a2) {
; CHECK-NOT: vmovaps
; CHECK: vpermil2pd
- %vec = load <2 x double>* %a1
+ %vec = load <2 x double>, <2 x double>* %a1
%res = call <2 x double> @llvm.x86.xop.vpermil2pd(<2 x double> %a0, <2 x double> %vec, <2 x double> %a2, i8 1) ; [#uses=1]
ret <2 x double> %res
}
define <2 x double> @test_int_x86_xop_vpermil2pd_rm(<2 x double> %a0, <2 x double> %a1, <2 x double>* %a2) {
; CHECK-NOT: vmovaps
; CHECK: vpermil2pd
- %vec = load <2 x double>* %a2
+ %vec = load <2 x double>, <2 x double>* %a2
%res = call <2 x double> @llvm.x86.xop.vpermil2pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %vec, i8 1) ; [#uses=1]
ret <2 x double> %res
}
; CHECK-NOT: vmovaps
; CHECK: vpermil2pd
; CHECK: ymm
- %vec = load <4 x double>* %a1
+ %vec = load <4 x double>, <4 x double>* %a1
%res = call <4 x double> @llvm.x86.xop.vpermil2pd.256(<4 x double> %a0, <4 x double> %vec, <4 x double> %a2, i8 2) ;
ret <4 x double> %res
}
; CHECK-NOT: vmovaps
; CHECK: vpermil2pd
; CHECK: ymm
- %vec = load <4 x double>* %a2
+ %vec = load <4 x double>, <4 x double>* %a2
%res = call <4 x double> @llvm.x86.xop.vpermil2pd.256(<4 x double> %a0, <4 x double> %a1, <4 x double> %vec, i8 2) ;
ret <4 x double> %res
}
; CHECK-NOT: vmovaps
; CHECK: vpcmov
; CHECK: ymm
- %vec = load <4 x i64>* %a1
+ %vec = load <4 x i64>, <4 x i64>* %a1
%res = call <4 x i64> @llvm.x86.xop.vpcmov.256(<4 x i64> %a0, <4 x i64> %vec, <4 x i64> %a2) ;
ret <4 x i64> %res
}
; CHECK-NOT: vmovaps
; CHECK: vpcmov
; CHECK: ymm
- %vec = load <4 x i64>* %a2
+ %vec = load <4 x i64>, <4 x i64>* %a2
%res = call <4 x i64> @llvm.x86.xop.vpcmov.256(<4 x i64> %a0, <4 x i64> %a1, <4 x i64> %vec) ;
ret <4 x i64> %res
}
define <16 x i8> @test_int_x86_xop_vpcomeqb_mem(<16 x i8> %a0, <16 x i8>* %a1) {
; CHECK-NOT: vmovaps
; CHECK:vpcomeqb
- %vec = load <16 x i8>* %a1
+ %vec = load <16 x i8>, <16 x i8>* %a1
%res = call <16 x i8> @llvm.x86.xop.vpcomeqb(<16 x i8> %a0, <16 x i8> %vec) ;
ret <16 x i8> %res
}
define <2 x i64> @test_int_x86_xop_vphsubdq_mem(<4 x i32>* %a0) {
; CHECK-NOT: vmovaps
; CHECK: vphsubdq
- %vec = load <4 x i32>* %a0
+ %vec = load <4 x i32>, <4 x i32>* %a0
%res = call <2 x i64> @llvm.x86.xop.vphsubdq(<4 x i32> %vec) ;
ret <2 x i64> %res
}
define <4 x i32> @test_int_x86_xop_vphsubwd_mem(<8 x i16>* %a0) {
; CHECK-NOT: vmovaps
; CHECK: vphsubwd
- %vec = load <8 x i16>* %a0
+ %vec = load <8 x i16>, <8 x i16>* %a0
%res = call <4 x i32> @llvm.x86.xop.vphsubwd(<8 x i16> %vec) ;
ret <4 x i32> %res
}
define <4 x i32> @test_int_x86_xop_vpmadcswd_mem(<8 x i16> %a0, <8 x i16>* %a1, <4 x i32> %a2) {
; CHECK-NOT: vmovaps
; CHECK: vpmadcswd
- %vec = load <8 x i16>* %a1
+ %vec = load <8 x i16>, <8 x i16>* %a1
%res = call <4 x i32> @llvm.x86.xop.vpmadcswd(<8 x i16> %a0, <8 x i16> %vec, <4 x i32> %a2) ;
ret <4 x i32> %res
}
define <16 x i8> @test_int_x86_xop_vpperm_rm(<16 x i8> %a0, <16 x i8> %a1, <16 x i8>* %a2) {
; CHECK-NOT: vmovaps
; CHECK: vpperm
- %vec = load <16 x i8>* %a2
+ %vec = load <16 x i8>, <16 x i8>* %a2
%res = call <16 x i8> @llvm.x86.xop.vpperm(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> %vec) ;
ret <16 x i8> %res
}
define <16 x i8> @test_int_x86_xop_vpperm_mr(<16 x i8> %a0, <16 x i8>* %a1, <16 x i8> %a2) {
; CHECK-NOT: vmovaps
; CHECK: vpperm
- %vec = load <16 x i8>* %a1
+ %vec = load <16 x i8>, <16 x i8>* %a1
%res = call <16 x i8> @llvm.x86.xop.vpperm(<16 x i8> %a0, <16 x i8> %vec, <16 x i8> %a2) ;
ret <16 x i8> %res
}
}
declare <8 x i16> @llvm.x86.xop.vprotw(<8 x i16>, <8 x i16>) nounwind readnone
+define <16 x i8> @test_int_x86_xop_vprotbi(<16 x i8> %a0) {
+ ; CHECK: vprotb
+ %res = call <16 x i8> @llvm.x86.xop.vprotbi(<16 x i8> %a0, i8 1) ;
+ ret <16 x i8> %res
+}
+declare <16 x i8> @llvm.x86.xop.vprotbi(<16 x i8>, i8) nounwind readnone
+
+define <4 x i32> @test_int_x86_xop_vprotdi(<4 x i32> %a0) {
+ ; CHECK: vprotd
+ %res = call <4 x i32> @llvm.x86.xop.vprotdi(<4 x i32> %a0, i8 -2) ;
+ ret <4 x i32> %res
+}
+declare <4 x i32> @llvm.x86.xop.vprotdi(<4 x i32>, i8) nounwind readnone
+
+define <2 x i64> @test_int_x86_xop_vprotqi(<2 x i64> %a0) {
+ ; CHECK: vprotq
+ %res = call <2 x i64> @llvm.x86.xop.vprotqi(<2 x i64> %a0, i8 3) ;
+ ret <2 x i64> %res
+}
+declare <2 x i64> @llvm.x86.xop.vprotqi(<2 x i64>, i8) nounwind readnone
+
+define <8 x i16> @test_int_x86_xop_vprotwi(<8 x i16> %a0) {
+ ; CHECK: vprotw
+ %res = call <8 x i16> @llvm.x86.xop.vprotwi(<8 x i16> %a0, i8 -4) ;
+ ret <8 x i16> %res
+}
+declare <8 x i16> @llvm.x86.xop.vprotwi(<8 x i16>, i8) nounwind readnone
+
define <16 x i8> @test_int_x86_xop_vpshab(<16 x i8> %a0, <16 x i8> %a1) {
; CHECK: vpshab
%res = call <16 x i8> @llvm.x86.xop.vpshab(<16 x i8> %a0, <16 x i8> %a1) ;
define <8 x i16> @test_int_x86_xop_vpshlw_rm(<8 x i16> %a0, <8 x i16>* %a1) {
; CHECK-NOT: vmovaps
; CHECK: vpshlw
- %vec = load <8 x i16>* %a1
+ %vec = load <8 x i16>, <8 x i16>* %a1
%res = call <8 x i16> @llvm.x86.xop.vpshlw(<8 x i16> %a0, <8 x i16> %vec) ;
ret <8 x i16> %res
}
define <8 x i16> @test_int_x86_xop_vpshlw_mr(<8 x i16>* %a0, <8 x i16> %a1) {
; CHECK-NOT: vmovaps
; CHECK: vpshlw
- %vec = load <8 x i16>* %a0
+ %vec = load <8 x i16>, <8 x i16>* %a0
%res = call <8 x i16> @llvm.x86.xop.vpshlw(<8 x i16> %vec, <8 x i16> %a1) ;
ret <8 x i16> %res
}
define <4 x float> @test_int_x86_xop_vfrcz_ss_mem(float* %a0) {
; CHECK-NOT: mov
; CHECK: vfrczss
- %elem = load float* %a0
+ %elem = load float, float* %a0
%vec = insertelement <4 x float> undef, float %elem, i32 0
%res = call <4 x float> @llvm.x86.xop.vfrcz.ss(<4 x float> %vec) ;
ret <4 x float> %res
define <2 x double> @test_int_x86_xop_vfrcz_sd_mem(double* %a0) {
; CHECK-NOT: mov
; CHECK: vfrczsd
- %elem = load double* %a0
+ %elem = load double, double* %a0
%vec = insertelement <2 x double> undef, double %elem, i32 0
%res = call <2 x double> @llvm.x86.xop.vfrcz.sd(<2 x double> %vec) ;
ret <2 x double> %res
define <2 x double> @test_int_x86_xop_vfrcz_pd_mem(<2 x double>* %a0) {
; CHECK-NOT: vmovaps
; CHECK: vfrczpd
- %vec = load <2 x double>* %a0
+ %vec = load <2 x double>, <2 x double>* %a0
%res = call <2 x double> @llvm.x86.xop.vfrcz.pd(<2 x double> %vec) ;
ret <2 x double> %res
}
; CHECK-NOT: vmovaps
; CHECK: vfrczpd
; CHECK: ymm
- %vec = load <4 x double>* %a0
+ %vec = load <4 x double>, <4 x double>* %a0
%res = call <4 x double> @llvm.x86.xop.vfrcz.pd.256(<4 x double> %vec) ;
ret <4 x double> %res
}
define <4 x float> @test_int_x86_xop_vfrcz_ps_mem(<4 x float>* %a0) {
; CHECK-NOT: vmovaps
; CHECK: vfrczps
- %vec = load <4 x float>* %a0
+ %vec = load <4 x float>, <4 x float>* %a0
%res = call <4 x float> @llvm.x86.xop.vfrcz.ps(<4 x float> %vec) ;
ret <4 x float> %res
}
; CHECK-NOT: vmovaps
; CHECK: vfrczps
; CHECK: ymm
- %vec = load <8 x float>* %a0
+ %vec = load <8 x float>, <8 x float>* %a0
%res = call <8 x float> @llvm.x86.xop.vfrcz.ps.256(<8 x float> %vec) ;
ret <8 x float> %res
}