From 1f6252f0b25b3aebf629db39c3d33606220f55a3 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Sat, 21 Nov 2015 12:38:34 +0000 Subject: [PATCH] [X86][SSE] Legal XMM Register Class ordering for SSE1 It turns out we have a number of places that just grab the first type attached to a register class for various reasons. This is fine unless for some reason that type isn't legal on the current target, such as for SSE1 which doesn't support v16i8/v8i16/v4i32/v2i64 - all of which were included before 4f32 in the class. Given that this is such a rare situation I've just re-ordered the types and placed the float types first. Fix for PR16133 Differential Revision: http://reviews.llvm.org/D14787 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@253773 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86RegisterInfo.td | 9 +++++---- test/CodeGen/X86/inline-sse.ll | 32 +++++++++++++++++++++++++++++++ 2 files changed, 37 insertions(+), 4 deletions(-) create mode 100644 test/CodeGen/X86/inline-sse.ll diff --git a/lib/Target/X86/X86RegisterInfo.td b/lib/Target/X86/X86RegisterInfo.td index de32968a62e..6c74dcbff5b 100644 --- a/lib/Target/X86/X86RegisterInfo.td +++ b/lib/Target/X86/X86RegisterInfo.td @@ -442,10 +442,11 @@ def RST : RegisterClass<"X86", [f80, f64, f32], 32, (sequence "ST%u", 0, 7)> { } // Generic vector registers: VR64 and VR128. +// Ensure that float types are declared first - only float is legal on SSE1. def VR64: RegisterClass<"X86", [x86mmx], 64, (sequence "MM%u", 0, 7)>; -def VR128 : RegisterClass<"X86", [v16i8, v8i16, v4i32, v2i64, v4f32, v2f64], +def VR128 : RegisterClass<"X86", [v4f32, v2f64, v16i8, v8i16, v4i32, v2i64], 128, (add FR32)>; -def VR256 : RegisterClass<"X86", [v32i8, v16i16, v8i32, v4i64, v8f32, v4f64], +def VR256 : RegisterClass<"X86", [v8f32, v4f64, v32i8, v16i16, v8i32, v4i64], 256, (sequence "YMM%u", 0, 15)>; // Status flags registers. @@ -468,9 +469,9 @@ def FR32X : RegisterClass<"X86", [f32], 32, (sequence "XMM%u", 0, 31)>; def FR64X : RegisterClass<"X86", [f64], 64, (add FR32X)>; // Extended VR128 and VR256 for AVX-512 instructions -def VR128X : RegisterClass<"X86", [v16i8, v8i16, v4i32, v2i64, v4f32, v2f64], +def VR128X : RegisterClass<"X86", [v4f32, v2f64, v16i8, v8i16, v4i32, v2i64], 128, (add FR32X)>; -def VR256X : RegisterClass<"X86", [v32i8, v16i16, v8i32, v4i64, v8f32, v4f64], +def VR256X : RegisterClass<"X86", [v8f32, v4f64, v32i8, v16i16, v8i32, v4i64], 256, (sequence "YMM%u", 0, 31)>; // Mask registers diff --git a/test/CodeGen/X86/inline-sse.ll b/test/CodeGen/X86/inline-sse.ll new file mode 100644 index 00000000000..08819b85829 --- /dev/null +++ b/test/CodeGen/X86/inline-sse.ll @@ -0,0 +1,32 @@ +; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+sse | FileCheck %s --check-prefix=X32 +; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefix=X32 +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=-sse2 | FileCheck %s --check-prefix=X64 +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefix=X64 + +; PR16133 - we must treat XMM registers as v4f32 as SSE1 targets don't permit other vector types. + +define void @nop() nounwind { +; X32-LABEL: nop: +; X32: # BB#0: +; X32-NEXT: pushl %ebp +; X32-NEXT: movl %esp, %ebp +; X32-NEXT: andl $-16, %esp +; X32-NEXT: subl $32, %esp +; X32-NEXT: #APP +; X32-NEXT: #NO_APP +; X32-NEXT: movaps %xmm0, (%esp) +; X32-NEXT: movl %ebp, %esp +; X32-NEXT: popl %ebp +; X32-NEXT: retl +; +; X64-LABEL: nop: +; X64: # BB#0: +; X64-NEXT: #APP +; X64-NEXT: #NO_APP +; X64-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp) +; X64-NEXT: retq + %1 = alloca <4 x float>, align 16 + %2 = call <4 x float> asm "", "=x,~{dirflag},~{fpsr},~{flags}"() + store <4 x float> %2, <4 x float>* %1, align 16 + ret void +} -- 2.34.1