From: Craig Topper Date: Fri, 4 Nov 2011 06:59:49 +0000 (+0000) Subject: Add intrinsics for X86 vcvtps2ph and vcvtph2ps instructions X-Git-Url: http://plrg.eecs.uci.edu/git/?a=commitdiff_plain;h=018262768f8df65f47ab43d8c0731c68d0a5b172;p=oota-llvm.git Add intrinsics for X86 vcvtps2ph and vcvtph2ps instructions git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@143683 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/include/llvm/IntrinsicsX86.td b/include/llvm/IntrinsicsX86.td index 09f958c1319..3b11ba56081 100644 --- a/include/llvm/IntrinsicsX86.td +++ b/include/llvm/IntrinsicsX86.td @@ -2013,3 +2013,19 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". def int_x86_wrgsbase_64 : GCCBuiltin<"__builtin_ia32_wrgsbase64">, Intrinsic<[], [llvm_i64_ty]>; } + +//===----------------------------------------------------------------------===// +// Half float conversion + +let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". + def int_x86_vcvtph2ps_128 : GCCBuiltin<"__builtin_ia32_vcvtph2ps">, + Intrinsic<[llvm_v4f32_ty], [llvm_v8i16_ty], [IntrNoMem]>; + def int_x86_vcvtph2ps_256 : GCCBuiltin<"__builtin_ia32_vcvtph2ps256">, + Intrinsic<[llvm_v8f32_ty], [llvm_v8i16_ty], [IntrNoMem]>; + def int_x86_vcvtps2ph_128 : GCCBuiltin<"__builtin_ia32_vcvtps2ph">, + Intrinsic<[llvm_v8i16_ty], [llvm_v4f32_ty, llvm_i32_ty], + [IntrNoMem]>; + def int_x86_vcvtps2ph_256 : GCCBuiltin<"__builtin_ia32_vcvtps2ph256">, + Intrinsic<[llvm_v8i16_ty], [llvm_v8f32_ty, llvm_i32_ty], + [IntrNoMem]>; +} diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td index 8f7e27bab57..d2291a246ef 100644 --- a/lib/Target/X86/X86InstrSSE.td +++ b/lib/Target/X86/X86InstrSSE.td @@ -7365,29 +7365,34 @@ let Defs = [YMM0, YMM1, YMM2, YMM3, YMM4, YMM5, YMM6, YMM7, //===----------------------------------------------------------------------===// // Half precision conversion instructions // +multiclass f16c_ph2ps { let Predicates = [HasAVX, HasF16C] in { - def VCVTPH2PSrm : I<0x13, MRMSrcMem, (outs VR128:$dst), (ins f64mem:$src), - "vcvtph2ps\t{$src, $dst|$dst, $src}", []>, T8, OpSize, VEX; - def VCVTPH2PSrr : I<0x13, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), - "vcvtph2ps\t{$src, $dst|$dst, $src}", []>, T8, OpSize, VEX; - def VCVTPH2PSYrm : I<0x13, MRMSrcMem, (outs VR256:$dst), (ins f128mem:$src), - "vcvtph2ps\t{$src, $dst|$dst, $src}", []>, T8, OpSize, VEX; - def VCVTPH2PSYrr : I<0x13, MRMSrcReg, (outs VR256:$dst), (ins VR128:$src), - "vcvtph2ps\t{$src, $dst|$dst, $src}", []>, T8, OpSize, VEX; - def VCVTPS2PHmr : Ii8<0x1D, MRMDestMem, (outs f64mem:$dst), - (ins VR128:$src1, i32i8imm:$src2), - "vcvtps2ph\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>, - TA, OpSize, VEX; - def VCVTPS2PHrr : Ii8<0x1D, MRMDestReg, (outs VR128:$dst), - (ins VR128:$src1, i32i8imm:$src2), - "vcvtps2ph\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>, - TA, OpSize, VEX; - def VCVTPS2PHYmr : Ii8<0x1D, MRMDestMem, (outs f128mem:$dst), - (ins VR256:$src1, i32i8imm:$src2), - "vcvtps2ph\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>, - TA, OpSize, VEX; - def VCVTPS2PHYrr : Ii8<0x1D, MRMDestReg, (outs VR128:$dst), - (ins VR256:$src1, i32i8imm:$src2), - "vcvtps2ph\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>, - TA, OpSize, VEX; + def rr : I<0x13, MRMSrcReg, (outs RC:$dst), (ins VR128:$src), + "vcvtph2ps\t{$src, $dst|$dst, $src}", + [(set RC:$dst, (Int VR128:$src))]>, + T8, OpSize, VEX; + let neverHasSideEffects = 1, mayLoad = 1 in + def rm : I<0x13, MRMSrcMem, (outs RC:$dst), (ins x86memop:$src), + "vcvtph2ps\t{$src, $dst|$dst, $src}", []>, T8, OpSize, VEX; } +} + +multiclass f16c_ps2ph { +let Predicates = [HasAVX, HasF16C] in { + def rr : Ii8<0x1D, MRMDestReg, (outs VR128:$dst), + (ins RC:$src1, i32i8imm:$src2), + "vcvtps2ph\t{$src2, $src1, $dst|$dst, $src1, $src2}", + [(set VR128:$dst, (Int RC:$src1, imm:$src2))]>, + TA, OpSize, VEX; + let neverHasSideEffects = 1, mayLoad = 1 in + def mr : Ii8<0x1D, MRMDestMem, (outs x86memop:$dst), + (ins RC:$src1, i32i8imm:$src2), + "vcvtps2ph\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>, + TA, OpSize, VEX; +} +} + +defm VCVTPH2PS : f16c_ph2ps; +defm VCVTPH2PSY : f16c_ph2ps; +defm VCVTPS2PH : f16c_ps2ph; +defm VCVTPS2PHY : f16c_ps2ph;