From da394041c409cb06008e60b9f9f845e845215b03 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Sun, 9 Oct 2011 07:31:39 +0000 Subject: [PATCH] Add Ivy Bridge 16-bit floating point conversion instructions for the X86 disassembler. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@141505 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86.td | 2 ++ lib/Target/X86/X86InstrInfo.td | 1 + lib/Target/X86/X86InstrSSE.td | 32 +++++++++++++++++- lib/Target/X86/X86Subtarget.cpp | 2 ++ lib/Target/X86/X86Subtarget.h | 8 +++-- test/MC/Disassembler/X86/simple-tests.txt | 40 ++++++++++++++++++----- test/MC/Disassembler/X86/x86-32.txt | 24 ++++++++++++++ 7 files changed, 98 insertions(+), 11 deletions(-) diff --git a/lib/Target/X86/X86.td b/lib/Target/X86/X86.td index c5c6c4d16a1..b7951199e8a 100644 --- a/lib/Target/X86/X86.td +++ b/lib/Target/X86/X86.td @@ -100,6 +100,8 @@ def FeatureMOVBE : SubtargetFeature<"movbe", "HasMOVBE", "true", "Support MOVBE instruction">; def FeatureRDRAND : SubtargetFeature<"rdrand", "HasRDRAND", "true", "Support RDRAND instruction">; +def FeatureF16C : SubtargetFeature<"f16c", "HasF16C", "true", + "Support 16-bit floating point conversion instructions">; //===----------------------------------------------------------------------===// // X86 processors supported. diff --git a/lib/Target/X86/X86InstrInfo.td b/lib/Target/X86/X86InstrInfo.td index 01f09eeac0c..efae5fbbd42 100644 --- a/lib/Target/X86/X86InstrInfo.td +++ b/lib/Target/X86/X86InstrInfo.td @@ -475,6 +475,7 @@ def HasFMA3 : Predicate<"Subtarget->hasFMA3()">; def HasFMA4 : Predicate<"Subtarget->hasFMA4()">; def HasMOVBE : Predicate<"Subtarget->hasMOVBE()">; def HasRDRAND : Predicate<"Subtarget->hasRDRAND()">; +def HasF16C : Predicate<"Subtarget->hasF16C()">; def FPStackf32 : Predicate<"!Subtarget->hasXMM()">; def FPStackf64 : Predicate<"!Subtarget->hasXMMInt()">; def HasCmpxchg16b: Predicate<"Subtarget->hasCmpxchg16b()">; diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td index 2da02120d1f..f02c4ff29d1 100644 --- a/lib/Target/X86/X86InstrSSE.td +++ b/lib/Target/X86/X86InstrSSE.td @@ -6753,9 +6753,39 @@ let Defs = [YMM0, YMM1, YMM2, YMM3, YMM4, YMM5, YMM6, YMM7, YMM8, YMM9, YMM10, YMM11, YMM12, YMM13, YMM14, YMM15] in { // Zero All YMM registers def VZEROALL : I<0x77, RawFrm, (outs), (ins), "vzeroall", - [(int_x86_avx_vzeroall)]>, TB, VEX, VEX_L, Requires<[HasAVX]>; + [(int_x86_avx_vzeroall)]>, TB, VEX, VEX_L, Requires<[HasAVX]>; // Zero Upper bits of YMM registers def VZEROUPPER : I<0x77, RawFrm, (outs), (ins), "vzeroupper", [(int_x86_avx_vzeroupper)]>, TB, VEX, Requires<[HasAVX]>; } + +//===----------------------------------------------------------------------===// +// Half precision conversion instructions +// +let Predicates = [HasF16C] in { + def VCVTPH2PSrm : I<0x13, MRMSrcMem, (outs VR128:$dst), (ins f64mem:$src), + "vcvtph2ps\t{$src, $dst|$dst, $src}", []>, T8, OpSize, VEX; + def VCVTPH2PSrr : I<0x13, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), + "vcvtph2ps\t{$src, $dst|$dst, $src}", []>, T8, OpSize, VEX; + def VCVTPH2PSYrm : I<0x13, MRMSrcMem, (outs VR256:$dst), (ins f128mem:$src), + "vcvtph2ps\t{$src, $dst|$dst, $src}", []>, T8, OpSize, VEX; + def VCVTPH2PSYrr : I<0x13, MRMSrcReg, (outs VR256:$dst), (ins VR128:$src), + "vcvtph2ps\t{$src, $dst|$dst, $src}", []>, T8, OpSize, VEX; + def VCVTPS2PHmr : Ii8<0x1D, MRMDestMem, (outs f64mem:$dst), + (ins VR128:$src1, i32i8imm:$src2), + "vcvtps2ph\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>, + TA, OpSize, VEX; + def VCVTPS2PHrr : Ii8<0x1D, MRMDestReg, (outs VR128:$dst), + (ins VR128:$src1, i32i8imm:$src2), + "vcvtps2ph\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>, + TA, OpSize, VEX; + def VCVTPS2PHYmr : Ii8<0x1D, MRMDestMem, (outs f128mem:$dst), + (ins VR256:$src1, i32i8imm:$src2), + "vcvtps2ph\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>, + TA, OpSize, VEX; + def VCVTPS2PHYrr : Ii8<0x1D, MRMDestReg, (outs VR128:$dst), + (ins VR256:$src1, i32i8imm:$src2), + "vcvtps2ph\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>, + TA, OpSize, VEX; +} diff --git a/lib/Target/X86/X86Subtarget.cpp b/lib/Target/X86/X86Subtarget.cpp index 2d24a5ed15a..1069dcb08fc 100644 --- a/lib/Target/X86/X86Subtarget.cpp +++ b/lib/Target/X86/X86Subtarget.cpp @@ -206,6 +206,7 @@ void X86Subtarget::AutoDetectSubtargetFeatures() { HasMOVBE = IsIntel && ((ECX >> 22) & 0x1); ToggleFeature(X86::FeatureMOVBE); HasPOPCNT = IsIntel && ((ECX >> 23) & 0x1); ToggleFeature(X86::FeaturePOPCNT); HasAES = IsIntel && ((ECX >> 25) & 0x1); ToggleFeature(X86::FeatureAES); + HasF16C = IsIntel && ((ECX >> 29) & 0x1); ToggleFeature(X86::FeatureF16C); HasRDRAND = IsIntel && ((ECX >> 30) & 0x1); ToggleFeature(X86::FeatureRDRAND); HasCmpxchg16b = ((ECX >> 13) & 0x1); ToggleFeature(X86::FeatureCMPXCHG16B); @@ -258,6 +259,7 @@ X86Subtarget::X86Subtarget(const std::string &TT, const std::string &CPU, , HasFMA4(false) , HasMOVBE(false) , HasRDRAND(false) + , HasF16C(false) , IsBTMemSlow(false) , IsUAMemFast(false) , HasVectorUAMem(false) diff --git a/lib/Target/X86/X86Subtarget.h b/lib/Target/X86/X86Subtarget.h index 8a6977cc056..37704dc2343 100644 --- a/lib/Target/X86/X86Subtarget.h +++ b/lib/Target/X86/X86Subtarget.h @@ -90,12 +90,15 @@ protected: /// HasFMA4 - Target has 4-operand fused multiply-add bool HasFMA4; - /// HasMOVBE - True if the processor has the MOVBE instruction; + /// HasMOVBE - True if the processor has the MOVBE instruction. bool HasMOVBE; - /// HasRDRAND - True if the processor has the RDRAND instruction; + /// HasRDRAND - True if the processor has the RDRAND instruction. bool HasRDRAND; + /// HasF16C - Processor has 16-bit floating point conversion instructions. + bool HasF16C; + /// IsBTMemSlow - True if BT (bit test) of memory instructions are slow. bool IsBTMemSlow; @@ -180,6 +183,7 @@ public: bool hasFMA4() const { return HasFMA4; } bool hasMOVBE() const { return HasMOVBE; } bool hasRDRAND() const { return HasRDRAND; } + bool hasF16C() const { return HasF16C; } bool isBTMemSlow() const { return IsBTMemSlow; } bool isUnalignedMemAccessFast() const { return IsUAMemFast; } bool hasVectorUAMem() const { return HasVectorUAMem; } diff --git a/test/MC/Disassembler/X86/simple-tests.txt b/test/MC/Disassembler/X86/simple-tests.txt index 39e641b18bf..0b0085842af 100644 --- a/test/MC/Disassembler/X86/simple-tests.txt +++ b/test/MC/Disassembler/X86/simple-tests.txt @@ -432,26 +432,50 @@ # CHECK: xsaveopt (%rax) 0x0f 0xae 0x30 -# CHECK rdfsbasel %eax +# CHECK: rdfsbasel %eax 0xf3 0x0f 0xae 0xc0 -# CHECK rdgsbasel %eax +# CHECK: rdgsbasel %eax 0xf3 0x0f 0xae 0xc8 -# CHECK wrfsbasel %eax +# CHECK: wrfsbasel %eax 0xf3 0x0f 0xae 0xd0 -# CHECK wrgsbasel %eax +# CHECK: wrgsbasel %eax 0xf3 0x0f 0xae 0xd8 -# CHECK rdfsbaseq %rax +# CHECK: rdfsbaseq %rax 0xf3 0x48 0x0f 0xae 0xc0 -# CHECK rdgsbaseq %rax +# CHECK: rdgsbaseq %rax 0xf3 0x48 0x0f 0xae 0xc8 -# CHECK wrfsbaseq %rax +# CHECK: wrfsbaseq %rax 0xf3 0x48 0x0f 0xae 0xd0 -# CHECK wrgsbaseq %rax +# CHECK: wrgsbaseq %rax 0xf3 0x48 0x0f 0xae 0xd8 + +# CHECK: vcvtph2ps %xmm0, %xmm0 +0xc4 0xe2 0x79 0x13 0xc0 + +# CHECK: vcvtph2ps (%rax), %xmm0 +0xc4 0xe2 0x79 0x13 0x00 + +# CHECK: vcvtph2ps %xmm0, %ymm0 +0xc4 0xe2 0x7d 0x13 0xc0 + +# CHECK: vcvtph2ps (%rax), %ymm0 +0xc4 0xe2 0x7d 0x13 0x00 + +# CHECK: vcvtps2ph $0, %xmm0, %xmm0 +0xc4 0xe3 0x79 0x1d 0xc0 0x00 + +# CHECK: vcvtps2ph $0, %xmm0, (%rax) +0xc4 0xe3 0x79 0x1d 0x00 0x00 + +# CHECK: vcvtps2ph $0, %ymm0, %xmm0 +0xc4 0xe3 0x7d 0x1d 0xc0 0x00 + +# CHECK: vcvtps2ph $0, %ymm0, (%rax) +0xc4 0xe3 0x7d 0x1d 0x00 0x00 diff --git a/test/MC/Disassembler/X86/x86-32.txt b/test/MC/Disassembler/X86/x86-32.txt index 59438f1d27d..d0b24d2ebe7 100644 --- a/test/MC/Disassembler/X86/x86-32.txt +++ b/test/MC/Disassembler/X86/x86-32.txt @@ -441,3 +441,27 @@ # CHECK: xsaveopt (%eax) 0x0f 0xae 0x30 + +# CHECK: vcvtph2ps %xmm0, %xmm0 +0xc4 0xe2 0x79 0x13 0xc0 + +# CHECK: vcvtph2ps (%eax), %xmm0 +0xc4 0xe2 0x79 0x13 0x00 + +# CHECK: vcvtph2ps %xmm0, %ymm0 +0xc4 0xe2 0x7d 0x13 0xc0 + +# CHECK: vcvtph2ps (%eax), %ymm0 +0xc4 0xe2 0x7d 0x13 0x00 + +# CHECK: vcvtps2ph $0, %xmm0, %xmm0 +0xc4 0xe3 0x79 0x1d 0xc0 0x00 + +# CHECK: vcvtps2ph $0, %xmm0, (%eax) +0xc4 0xe3 0x79 0x1d 0x00 0x00 + +# CHECK: vcvtps2ph $0, %ymm0, %xmm0 +0xc4 0xe3 0x7d 0x1d 0xc0 0x00 + +# CHECK: vcvtps2ph $0, %ymm0, (%eax) +0xc4 0xe3 0x7d 0x1d 0x00 0x00 -- 2.34.1