From 4fd100f9e922b0d087d33c44e0f3eef7d85ba893 Mon Sep 17 00:00:00 2001 From: Chandler Carruth Date: Fri, 20 Feb 2015 02:44:13 +0000 Subject: [PATCH] [x86] Clean up a couple of test cases with the new update script. Split one test case that is only partially tested in 32-bits into two test cases so that the script doesn't generate massive spews of tests for the cases we don't care about. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@229955 91177308-0d34-0410-b5e6-96231b3b80d8 --- test/CodeGen/X86/bswap-vector.ll | 469 +++++++++++++++------ test/CodeGen/X86/mmx-arg-passing-x86-64.ll | 64 +++ test/CodeGen/X86/mmx-arg-passing.ll | 80 ++-- 3 files changed, 435 insertions(+), 178 deletions(-) create mode 100644 test/CodeGen/X86/mmx-arg-passing-x86-64.ll diff --git a/test/CodeGen/X86/bswap-vector.ll b/test/CodeGen/X86/bswap-vector.ll index 9dc960d7779..0351b22a700 100644 --- a/test/CodeGen/X86/bswap-vector.ll +++ b/test/CodeGen/X86/bswap-vector.ll @@ -1,7 +1,8 @@ -; RUN: llc < %s -mcpu=x86-64 | FileCheck %s -check-prefix=CHECK-NOSSSE3 -; RUN: llc < %s -mcpu=core2 | FileCheck %s -check-prefix=CHECK-SSSE3 -; RUN: llc < %s -mcpu=core-avx2 | FileCheck %s -check-prefix=CHECK-AVX2 -; RUN: llc < %s -mcpu=core-avx2 -x86-experimental-vector-widening-legalization | FileCheck %s -check-prefix=CHECK-WIDE-AVX2 +; RUN: llc < %s -mcpu=x86-64 | FileCheck %s --check-prefix=CHECK-NOSSSE3 +; RUN: llc < %s -mcpu=core2 | FileCheck %s --check-prefix=CHECK-SSSE3 +; RUN: llc < %s -mcpu=core-avx2 | FileCheck %s --check-prefix=CHECK-AVX2 +; RUN: llc < %s -mcpu=core-avx2 -x86-experimental-vector-widening-legalization | FileCheck %s --check-prefix=CHECK-WIDE-AVX2 + target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" target triple = "x86_64-unknown-linux-gnu" @@ -9,165 +10,381 @@ declare <8 x i16> @llvm.bswap.v8i16(<8 x i16>) declare <4 x i32> @llvm.bswap.v4i32(<4 x i32>) declare <2 x i64> @llvm.bswap.v2i64(<2 x i64>) -define <8 x i16> @test1(<8 x i16> %v) #0 { +define <8 x i16> @test1(<8 x i16> %v) { +; CHECK-NOSSSE3-LABEL: test1: +; CHECK-NOSSSE3: # BB#0: # %entry +; CHECK-NOSSSE3-NEXT: pextrw $7, %xmm0, %eax +; CHECK-NOSSSE3-NEXT: rolw $8, %ax +; CHECK-NOSSSE3-NEXT: movd %eax, %xmm1 +; CHECK-NOSSSE3-NEXT: pextrw $3, %xmm0, %eax +; CHECK-NOSSSE3-NEXT: rolw $8, %ax +; CHECK-NOSSSE3-NEXT: movd %eax, %xmm2 +; CHECK-NOSSSE3-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3] +; CHECK-NOSSSE3-NEXT: pextrw $5, %xmm0, %eax +; CHECK-NOSSSE3-NEXT: rolw $8, %ax +; CHECK-NOSSSE3-NEXT: movd %eax, %xmm3 +; CHECK-NOSSSE3-NEXT: pextrw $1, %xmm0, %eax +; CHECK-NOSSSE3-NEXT: rolw $8, %ax +; CHECK-NOSSSE3-NEXT: movd %eax, %xmm1 +; CHECK-NOSSSE3-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1],xmm1[2],xmm3[2],xmm1[3],xmm3[3] +; CHECK-NOSSSE3-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3] +; CHECK-NOSSSE3-NEXT: pextrw $6, %xmm0, %eax +; CHECK-NOSSSE3-NEXT: rolw $8, %ax +; CHECK-NOSSSE3-NEXT: movd %eax, %xmm2 +; CHECK-NOSSSE3-NEXT: pextrw $2, %xmm0, %eax +; CHECK-NOSSSE3-NEXT: rolw $8, %ax +; CHECK-NOSSSE3-NEXT: movd %eax, %xmm3 +; CHECK-NOSSSE3-NEXT: punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3] +; CHECK-NOSSSE3-NEXT: pextrw $4, %xmm0, %eax +; CHECK-NOSSSE3-NEXT: rolw $8, %ax +; CHECK-NOSSSE3-NEXT: movd %eax, %xmm2 +; CHECK-NOSSSE3-NEXT: movd %xmm0, %eax +; CHECK-NOSSSE3-NEXT: rolw $8, %ax +; CHECK-NOSSSE3-NEXT: movd %eax, %xmm0 +; CHECK-NOSSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3] +; CHECK-NOSSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1],xmm0[2],xmm3[2],xmm0[3],xmm3[3] +; CHECK-NOSSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] +; CHECK-NOSSSE3-NEXT: retq +; +; CHECK-SSSE3-LABEL: test1: +; CHECK-SSSE3: # BB#0: # %entry +; CHECK-SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[1,0,3,2,5,4,7,6,9,8,11,10,13,12,15,14] +; CHECK-SSSE3-NEXT: retq +; +; CHECK-AVX2-LABEL: test1: +; CHECK-AVX2: # BB#0: # %entry +; CHECK-AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[1,0,3,2,5,4,7,6,9,8,11,10,13,12,15,14] +; CHECK-AVX2-NEXT: retq +; +; CHECK-WIDE-AVX2-LABEL: test1: +; CHECK-WIDE-AVX2: # BB#0: # %entry +; CHECK-WIDE-AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[1,0,3,2,5,4,7,6,9,8,11,10,13,12,15,14] +; CHECK-WIDE-AVX2-NEXT: retq entry: %r = call <8 x i16> @llvm.bswap.v8i16(<8 x i16> %v) ret <8 x i16> %r - -; CHECK-NOSSSE3-LABEL: @test1 -; CHECK-NOSSSE3: rolw -; CHECK-NOSSSE3: rolw -; CHECK-NOSSSE3: rolw -; CHECK-NOSSSE3: rolw -; CHECK-NOSSSE3: rolw -; CHECK-NOSSSE3: rolw -; CHECK-NOSSSE3: rolw -; CHECK-NOSSSE3: rolw -; CHECK-NOSSSE3: retq - -; CHECK-SSSE3-LABEL: @test1 -; CHECK-SSSE3: pshufb -; CHECK-SSSE3-NEXT: retq - -; CHECK-AVX2-LABEL: @test1 -; CHECK-AVX2: vpshufb -; CHECK-AVX2-NEXT: retq - -; CHECK-WIDE-AVX2-LABEL: @test1 -; CHECK-WIDE-AVX2: vpshufb -; CHECK-WIDE-AVX2-NEXT: retq } -define <4 x i32> @test2(<4 x i32> %v) #0 { +define <4 x i32> @test2(<4 x i32> %v) { +; CHECK-NOSSSE3-LABEL: test2: +; CHECK-NOSSSE3: # BB#0: # %entry +; CHECK-NOSSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[3,1,2,3] +; CHECK-NOSSSE3-NEXT: movd %xmm1, %eax +; CHECK-NOSSSE3-NEXT: bswapl %eax +; CHECK-NOSSSE3-NEXT: movd %eax, %xmm1 +; CHECK-NOSSSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,1,2,3] +; CHECK-NOSSSE3-NEXT: movd %xmm2, %eax +; CHECK-NOSSSE3-NEXT: bswapl %eax +; CHECK-NOSSSE3-NEXT: movd %eax, %xmm2 +; CHECK-NOSSSE3-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1] +; CHECK-NOSSSE3-NEXT: movd %xmm0, %eax +; CHECK-NOSSSE3-NEXT: bswapl %eax +; CHECK-NOSSSE3-NEXT: movd %eax, %xmm1 +; CHECK-NOSSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1] +; CHECK-NOSSSE3-NEXT: movd %xmm0, %eax +; CHECK-NOSSSE3-NEXT: bswapl %eax +; CHECK-NOSSSE3-NEXT: movd %eax, %xmm0 +; CHECK-NOSSSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] +; CHECK-NOSSSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1] +; CHECK-NOSSSE3-NEXT: movdqa %xmm1, %xmm0 +; CHECK-NOSSSE3-NEXT: retq +; +; CHECK-SSSE3-LABEL: test2: +; CHECK-SSSE3: # BB#0: # %entry +; CHECK-SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[3,2,1,0,7,6,5,4,11,10,9,8,15,14,13,12] +; CHECK-SSSE3-NEXT: retq +; +; CHECK-AVX2-LABEL: test2: +; CHECK-AVX2: # BB#0: # %entry +; CHECK-AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[3,2,1,0,7,6,5,4,11,10,9,8,15,14,13,12] +; CHECK-AVX2-NEXT: retq +; +; CHECK-WIDE-AVX2-LABEL: test2: +; CHECK-WIDE-AVX2: # BB#0: # %entry +; CHECK-WIDE-AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[3,2,1,0,7,6,5,4,11,10,9,8,15,14,13,12] +; CHECK-WIDE-AVX2-NEXT: retq entry: %r = call <4 x i32> @llvm.bswap.v4i32(<4 x i32> %v) ret <4 x i32> %r - -; CHECK-NOSSSE3-LABEL: @test2 -; CHECK-NOSSSE3: bswapl -; CHECK-NOSSSE3: bswapl -; CHECK-NOSSSE3: bswapl -; CHECK-NOSSSE3: bswapl -; CHECK-NOSSSE3: retq - -; CHECK-SSSE3-LABEL: @test2 -; CHECK-SSSE3: pshufb -; CHECK-SSSE3-NEXT: retq - -; CHECK-AVX2-LABEL: @test2 -; CHECK-AVX2: vpshufb -; CHECK-AVX2-NEXT: retq - -; CHECK-WIDE-AVX2-LABEL: @test2 -; CHECK-WIDE-AVX2: vpshufb -; CHECK-WIDE-AVX2-NEXT: retq } -define <2 x i64> @test3(<2 x i64> %v) #0 { +define <2 x i64> @test3(<2 x i64> %v) { +; CHECK-NOSSSE3-LABEL: test3: +; CHECK-NOSSSE3: # BB#0: # %entry +; CHECK-NOSSSE3-NEXT: movd %xmm0, %rax +; CHECK-NOSSSE3-NEXT: bswapq %rax +; CHECK-NOSSSE3-NEXT: movd %rax, %xmm1 +; CHECK-NOSSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1] +; CHECK-NOSSSE3-NEXT: movd %xmm0, %rax +; CHECK-NOSSSE3-NEXT: bswapq %rax +; CHECK-NOSSSE3-NEXT: movd %rax, %xmm0 +; CHECK-NOSSSE3-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0] +; CHECK-NOSSSE3-NEXT: movdqa %xmm1, %xmm0 +; CHECK-NOSSSE3-NEXT: retq +; +; CHECK-SSSE3-LABEL: test3: +; CHECK-SSSE3: # BB#0: # %entry +; CHECK-SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[7,6,5,4,3,2,1,0,15,14,13,12,11,10,9,8] +; CHECK-SSSE3-NEXT: retq +; +; CHECK-AVX2-LABEL: test3: +; CHECK-AVX2: # BB#0: # %entry +; CHECK-AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[7,6,5,4,3,2,1,0,15,14,13,12,11,10,9,8] +; CHECK-AVX2-NEXT: retq +; +; CHECK-WIDE-AVX2-LABEL: test3: +; CHECK-WIDE-AVX2: # BB#0: # %entry +; CHECK-WIDE-AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[7,6,5,4,3,2,1,0,15,14,13,12,11,10,9,8] +; CHECK-WIDE-AVX2-NEXT: retq entry: %r = call <2 x i64> @llvm.bswap.v2i64(<2 x i64> %v) ret <2 x i64> %r - -; CHECK-NOSSSE3-LABEL: @test3 -; CHECK-NOSSSE3: bswapq -; CHECK-NOSSSE3: bswapq -; CHECK-NOSSSE3: retq - -; CHECK-SSSE3-LABEL: @test3 -; CHECK-SSSE3: pshufb -; CHECK-SSSE3-NEXT: retq - -; CHECK-AVX2-LABEL: @test3 -; CHECK-AVX2: vpshufb -; CHECK-AVX2-NEXT: retq - -; CHECK-WIDE-AVX2-LABEL: @test3 -; CHECK-WIDE-AVX2: vpshufb -; CHECK-WIDE-AVX2-NEXT: retq } declare <16 x i16> @llvm.bswap.v16i16(<16 x i16>) declare <8 x i32> @llvm.bswap.v8i32(<8 x i32>) declare <4 x i64> @llvm.bswap.v4i64(<4 x i64>) -define <16 x i16> @test4(<16 x i16> %v) #0 { +define <16 x i16> @test4(<16 x i16> %v) { +; CHECK-NOSSSE3-LABEL: test4: +; CHECK-NOSSSE3: # BB#0: # %entry +; CHECK-NOSSSE3-NEXT: pextrw $7, %xmm0, %eax +; CHECK-NOSSSE3-NEXT: rolw $8, %ax +; CHECK-NOSSSE3-NEXT: movd %eax, %xmm2 +; CHECK-NOSSSE3-NEXT: pextrw $3, %xmm0, %eax +; CHECK-NOSSSE3-NEXT: rolw $8, %ax +; CHECK-NOSSSE3-NEXT: movd %eax, %xmm3 +; CHECK-NOSSSE3-NEXT: punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3] +; CHECK-NOSSSE3-NEXT: pextrw $5, %xmm0, %eax +; CHECK-NOSSSE3-NEXT: rolw $8, %ax +; CHECK-NOSSSE3-NEXT: movd %eax, %xmm4 +; CHECK-NOSSSE3-NEXT: pextrw $1, %xmm0, %eax +; CHECK-NOSSSE3-NEXT: rolw $8, %ax +; CHECK-NOSSSE3-NEXT: movd %eax, %xmm2 +; CHECK-NOSSSE3-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm4[0],xmm2[1],xmm4[1],xmm2[2],xmm4[2],xmm2[3],xmm4[3] +; CHECK-NOSSSE3-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1],xmm2[2],xmm3[2],xmm2[3],xmm3[3] +; CHECK-NOSSSE3-NEXT: pextrw $6, %xmm0, %eax +; CHECK-NOSSSE3-NEXT: rolw $8, %ax +; CHECK-NOSSSE3-NEXT: movd %eax, %xmm3 +; CHECK-NOSSSE3-NEXT: pextrw $2, %xmm0, %eax +; CHECK-NOSSSE3-NEXT: rolw $8, %ax +; CHECK-NOSSSE3-NEXT: movd %eax, %xmm4 +; CHECK-NOSSSE3-NEXT: punpcklwd {{.*#+}} xmm4 = xmm4[0],xmm3[0],xmm4[1],xmm3[1],xmm4[2],xmm3[2],xmm4[3],xmm3[3] +; CHECK-NOSSSE3-NEXT: pextrw $4, %xmm0, %eax +; CHECK-NOSSSE3-NEXT: rolw $8, %ax +; CHECK-NOSSSE3-NEXT: movd %eax, %xmm3 +; CHECK-NOSSSE3-NEXT: movd %xmm0, %eax +; CHECK-NOSSSE3-NEXT: rolw $8, %ax +; CHECK-NOSSSE3-NEXT: movd %eax, %xmm0 +; CHECK-NOSSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1],xmm0[2],xmm3[2],xmm0[3],xmm3[3] +; CHECK-NOSSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1],xmm0[2],xmm4[2],xmm0[3],xmm4[3] +; CHECK-NOSSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3] +; CHECK-NOSSSE3-NEXT: pextrw $7, %xmm1, %eax +; CHECK-NOSSSE3-NEXT: rolw $8, %ax +; CHECK-NOSSSE3-NEXT: movd %eax, %xmm2 +; CHECK-NOSSSE3-NEXT: pextrw $3, %xmm1, %eax +; CHECK-NOSSSE3-NEXT: rolw $8, %ax +; CHECK-NOSSSE3-NEXT: movd %eax, %xmm3 +; CHECK-NOSSSE3-NEXT: punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3] +; CHECK-NOSSSE3-NEXT: pextrw $5, %xmm1, %eax +; CHECK-NOSSSE3-NEXT: rolw $8, %ax +; CHECK-NOSSSE3-NEXT: movd %eax, %xmm4 +; CHECK-NOSSSE3-NEXT: pextrw $1, %xmm1, %eax +; CHECK-NOSSSE3-NEXT: rolw $8, %ax +; CHECK-NOSSSE3-NEXT: movd %eax, %xmm2 +; CHECK-NOSSSE3-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm4[0],xmm2[1],xmm4[1],xmm2[2],xmm4[2],xmm2[3],xmm4[3] +; CHECK-NOSSSE3-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1],xmm2[2],xmm3[2],xmm2[3],xmm3[3] +; CHECK-NOSSSE3-NEXT: pextrw $6, %xmm1, %eax +; CHECK-NOSSSE3-NEXT: rolw $8, %ax +; CHECK-NOSSSE3-NEXT: movd %eax, %xmm3 +; CHECK-NOSSSE3-NEXT: pextrw $2, %xmm1, %eax +; CHECK-NOSSSE3-NEXT: rolw $8, %ax +; CHECK-NOSSSE3-NEXT: movd %eax, %xmm4 +; CHECK-NOSSSE3-NEXT: punpcklwd {{.*#+}} xmm4 = xmm4[0],xmm3[0],xmm4[1],xmm3[1],xmm4[2],xmm3[2],xmm4[3],xmm3[3] +; CHECK-NOSSSE3-NEXT: pextrw $4, %xmm1, %eax +; CHECK-NOSSSE3-NEXT: rolw $8, %ax +; CHECK-NOSSSE3-NEXT: movd %eax, %xmm3 +; CHECK-NOSSSE3-NEXT: movd %xmm1, %eax +; CHECK-NOSSSE3-NEXT: rolw $8, %ax +; CHECK-NOSSSE3-NEXT: movd %eax, %xmm1 +; CHECK-NOSSSE3-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1],xmm1[2],xmm3[2],xmm1[3],xmm3[3] +; CHECK-NOSSSE3-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm4[0],xmm1[1],xmm4[1],xmm1[2],xmm4[2],xmm1[3],xmm4[3] +; CHECK-NOSSSE3-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3] +; CHECK-NOSSSE3-NEXT: retq +; +; CHECK-SSSE3-LABEL: test4: +; CHECK-SSSE3: # BB#0: # %entry +; CHECK-SSSE3-NEXT: movdqa {{.*#+}} xmm2 = [1,0,3,2,5,4,7,6,9,8,11,10,13,12,15,14] +; CHECK-SSSE3-NEXT: pshufb %xmm2, %xmm0 +; CHECK-SSSE3-NEXT: pshufb %xmm2, %xmm1 +; CHECK-SSSE3-NEXT: retq +; +; CHECK-AVX2-LABEL: test4: +; CHECK-AVX2: # BB#0: # %entry +; CHECK-AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[1,0,3,2,5,4,7,6,9,8,11,10,13,12,15,14,17,16,19,18,21,20,23,22,25,24,27,26,29,28,31,30] +; CHECK-AVX2-NEXT: retq +; +; CHECK-WIDE-AVX2-LABEL: test4: +; CHECK-WIDE-AVX2: # BB#0: # %entry +; CHECK-WIDE-AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[1,0,3,2,5,4,7,6,9,8,11,10,13,12,15,14,17,16,19,18,21,20,23,22,25,24,27,26,29,28,31,30] +; CHECK-WIDE-AVX2-NEXT: retq entry: %r = call <16 x i16> @llvm.bswap.v16i16(<16 x i16> %v) ret <16 x i16> %r - -; CHECK-SSSE3-LABEL: @test4 -; CHECK-SSSE3: pshufb -; CHECK-SSSE3: pshufb -; CHECK-SSSE3-NEXT: retq - -; CHECK-AVX2-LABEL: @test4 -; CHECK-AVX2: vpshufb -; CHECK-AVX2-NEXT: retq - -; CHECK-WIDE-AVX2-LABEL: @test4 -; CHECK-WIDE-AVX2: vpshufb -; CHECK-WIDE-AVX2-NEXT: retq } -define <8 x i32> @test5(<8 x i32> %v) #0 { +define <8 x i32> @test5(<8 x i32> %v) { +; CHECK-NOSSSE3-LABEL: test5: +; CHECK-NOSSSE3: # BB#0: # %entry +; CHECK-NOSSSE3-NEXT: movdqa %xmm0, %xmm2 +; CHECK-NOSSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm2[3,1,2,3] +; CHECK-NOSSSE3-NEXT: movd %xmm0, %eax +; CHECK-NOSSSE3-NEXT: bswapl %eax +; CHECK-NOSSSE3-NEXT: movd %eax, %xmm0 +; CHECK-NOSSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[1,1,2,3] +; CHECK-NOSSSE3-NEXT: movd %xmm3, %eax +; CHECK-NOSSSE3-NEXT: bswapl %eax +; CHECK-NOSSSE3-NEXT: movd %eax, %xmm3 +; CHECK-NOSSSE3-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1] +; CHECK-NOSSSE3-NEXT: movd %xmm2, %eax +; CHECK-NOSSSE3-NEXT: bswapl %eax +; CHECK-NOSSSE3-NEXT: movd %eax, %xmm0 +; CHECK-NOSSSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm2[2,3,0,1] +; CHECK-NOSSSE3-NEXT: movd %xmm2, %eax +; CHECK-NOSSSE3-NEXT: bswapl %eax +; CHECK-NOSSSE3-NEXT: movd %eax, %xmm2 +; CHECK-NOSSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] +; CHECK-NOSSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1] +; CHECK-NOSSSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm1[3,1,2,3] +; CHECK-NOSSSE3-NEXT: movd %xmm2, %eax +; CHECK-NOSSSE3-NEXT: bswapl %eax +; CHECK-NOSSSE3-NEXT: movd %eax, %xmm2 +; CHECK-NOSSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm1[1,1,2,3] +; CHECK-NOSSSE3-NEXT: movd %xmm3, %eax +; CHECK-NOSSSE3-NEXT: bswapl %eax +; CHECK-NOSSSE3-NEXT: movd %eax, %xmm3 +; CHECK-NOSSSE3-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1] +; CHECK-NOSSSE3-NEXT: movd %xmm1, %eax +; CHECK-NOSSSE3-NEXT: bswapl %eax +; CHECK-NOSSSE3-NEXT: movd %eax, %xmm2 +; CHECK-NOSSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,0,1] +; CHECK-NOSSSE3-NEXT: movd %xmm1, %eax +; CHECK-NOSSSE3-NEXT: bswapl %eax +; CHECK-NOSSSE3-NEXT: movd %eax, %xmm1 +; CHECK-NOSSSE3-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1] +; CHECK-NOSSSE3-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1] +; CHECK-NOSSSE3-NEXT: movdqa %xmm2, %xmm1 +; CHECK-NOSSSE3-NEXT: retq +; +; CHECK-SSSE3-LABEL: test5: +; CHECK-SSSE3: # BB#0: # %entry +; CHECK-SSSE3-NEXT: movdqa {{.*#+}} xmm2 = [3,2,1,0,7,6,5,4,11,10,9,8,15,14,13,12] +; CHECK-SSSE3-NEXT: pshufb %xmm2, %xmm0 +; CHECK-SSSE3-NEXT: pshufb %xmm2, %xmm1 +; CHECK-SSSE3-NEXT: retq +; +; CHECK-AVX2-LABEL: test5: +; CHECK-AVX2: # BB#0: # %entry +; CHECK-AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4,11,10,9,8,15,14,13,12,19,18,17,16,23,22,21,20,27,26,25,24,31,30,29,28] +; CHECK-AVX2-NEXT: retq +; +; CHECK-WIDE-AVX2-LABEL: test5: +; CHECK-WIDE-AVX2: # BB#0: # %entry +; CHECK-WIDE-AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4,11,10,9,8,15,14,13,12,19,18,17,16,23,22,21,20,27,26,25,24,31,30,29,28] +; CHECK-WIDE-AVX2-NEXT: retq entry: %r = call <8 x i32> @llvm.bswap.v8i32(<8 x i32> %v) ret <8 x i32> %r - -; CHECK-SSSE3-LABEL: @test5 -; CHECK-SSSE3: pshufb -; CHECK-SSSE3: pshufb -; CHECK-SSSE3-NEXT: retq - -; CHECK-AVX2-LABEL: @test5 -; CHECK-AVX2: vpshufb -; CHECK-AVX2-NEXT: retq - -; CHECK-WIDE-AVX2-LABEL: @test5 -; CHECK-WIDE-AVX2: vpshufb -; CHECK-WIDE-AVX2-NEXT: retq } -define <4 x i64> @test6(<4 x i64> %v) #0 { +define <4 x i64> @test6(<4 x i64> %v) { +; CHECK-NOSSSE3-LABEL: test6: +; CHECK-NOSSSE3: # BB#0: # %entry +; CHECK-NOSSSE3-NEXT: movd %xmm0, %rax +; CHECK-NOSSSE3-NEXT: bswapq %rax +; CHECK-NOSSSE3-NEXT: movd %rax, %xmm2 +; CHECK-NOSSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1] +; CHECK-NOSSSE3-NEXT: movd %xmm0, %rax +; CHECK-NOSSSE3-NEXT: bswapq %rax +; CHECK-NOSSSE3-NEXT: movd %rax, %xmm0 +; CHECK-NOSSSE3-NEXT: punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm0[0] +; CHECK-NOSSSE3-NEXT: movd %xmm1, %rax +; CHECK-NOSSSE3-NEXT: bswapq %rax +; CHECK-NOSSSE3-NEXT: movd %rax, %xmm3 +; CHECK-NOSSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,0,1] +; CHECK-NOSSSE3-NEXT: movd %xmm0, %rax +; CHECK-NOSSSE3-NEXT: bswapq %rax +; CHECK-NOSSSE3-NEXT: movd %rax, %xmm0 +; CHECK-NOSSSE3-NEXT: punpcklqdq {{.*#+}} xmm3 = xmm3[0],xmm0[0] +; CHECK-NOSSSE3-NEXT: movdqa %xmm2, %xmm0 +; CHECK-NOSSSE3-NEXT: movdqa %xmm3, %xmm1 +; CHECK-NOSSSE3-NEXT: retq +; +; CHECK-SSSE3-LABEL: test6: +; CHECK-SSSE3: # BB#0: # %entry +; CHECK-SSSE3-NEXT: movdqa {{.*#+}} xmm2 = [7,6,5,4,3,2,1,0,15,14,13,12,11,10,9,8] +; CHECK-SSSE3-NEXT: pshufb %xmm2, %xmm0 +; CHECK-SSSE3-NEXT: pshufb %xmm2, %xmm1 +; CHECK-SSSE3-NEXT: retq +; +; CHECK-AVX2-LABEL: test6: +; CHECK-AVX2: # BB#0: # %entry +; CHECK-AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[7,6,5,4,3,2,1,0,15,14,13,12,11,10,9,8,23,22,21,20,19,18,17,16,31,30,29,28,27,26,25,24] +; CHECK-AVX2-NEXT: retq +; +; CHECK-WIDE-AVX2-LABEL: test6: +; CHECK-WIDE-AVX2: # BB#0: # %entry +; CHECK-WIDE-AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[7,6,5,4,3,2,1,0,15,14,13,12,11,10,9,8,23,22,21,20,19,18,17,16,31,30,29,28,27,26,25,24] +; CHECK-WIDE-AVX2-NEXT: retq entry: %r = call <4 x i64> @llvm.bswap.v4i64(<4 x i64> %v) ret <4 x i64> %r - -; CHECK-SSSE3-LABEL: @test6 -; CHECK-SSSE3: pshufb -; CHECK-SSSE3: pshufb -; CHECK-SSSE3-NEXT: retq - -; CHECK-AVX2-LABEL: @test6 -; CHECK-AVX2: vpshufb -; CHECK-AVX2-NEXT: retq - -; CHECK-WIDE-AVX2-LABEL: @test6 -; CHECK-WIDE-AVX2: vpshufb -; CHECK-WIDE-AVX2-NEXT: retq } declare <4 x i16> @llvm.bswap.v4i16(<4 x i16>) -define <4 x i16> @test7(<4 x i16> %v) #0 { +define <4 x i16> @test7(<4 x i16> %v) { +; CHECK-NOSSSE3-LABEL: test7: +; CHECK-NOSSSE3: # BB#0: # %entry +; CHECK-NOSSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[3,1,2,3] +; CHECK-NOSSSE3-NEXT: movd %xmm1, %eax +; CHECK-NOSSSE3-NEXT: bswapl %eax +; CHECK-NOSSSE3-NEXT: movd %eax, %xmm1 +; CHECK-NOSSSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,1,2,3] +; CHECK-NOSSSE3-NEXT: movd %xmm2, %eax +; CHECK-NOSSSE3-NEXT: bswapl %eax +; CHECK-NOSSSE3-NEXT: movd %eax, %xmm2 +; CHECK-NOSSSE3-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1] +; CHECK-NOSSSE3-NEXT: movd %xmm0, %eax +; CHECK-NOSSSE3-NEXT: bswapl %eax +; CHECK-NOSSSE3-NEXT: movd %eax, %xmm1 +; CHECK-NOSSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1] +; CHECK-NOSSSE3-NEXT: movd %xmm0, %eax +; CHECK-NOSSSE3-NEXT: bswapl %eax +; CHECK-NOSSSE3-NEXT: movd %eax, %xmm0 +; CHECK-NOSSSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] +; CHECK-NOSSSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1] +; CHECK-NOSSSE3-NEXT: psrld $16, %xmm1 +; CHECK-NOSSSE3-NEXT: movdqa %xmm1, %xmm0 +; CHECK-NOSSSE3-NEXT: retq +; +; CHECK-SSSE3-LABEL: test7: +; CHECK-SSSE3: # BB#0: # %entry +; CHECK-SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[3,2,1,0,7,6,5,4,11,10,9,8,15,14,13,12] +; CHECK-SSSE3-NEXT: psrld $16, %xmm0 +; CHECK-SSSE3-NEXT: retq +; +; CHECK-AVX2-LABEL: test7: +; CHECK-AVX2: # BB#0: # %entry +; CHECK-AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[3,2,1,0,7,6,5,4,11,10,9,8,15,14,13,12] +; CHECK-AVX2-NEXT: vpsrld $16, %xmm0, %xmm0 +; CHECK-AVX2-NEXT: retq +; +; CHECK-WIDE-AVX2-LABEL: test7: +; CHECK-WIDE-AVX2: # BB#0: # %entry +; CHECK-WIDE-AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[1,0,3,2,5,4,7,6,9,8,11,10,13,12,15,14] +; CHECK-WIDE-AVX2-NEXT: retq entry: %r = call <4 x i16> @llvm.bswap.v4i16(<4 x i16> %v) ret <4 x i16> %r - -; CHECK-SSSE3-LABEL: @test7 -; CHECK-SSSE3: pshufb -; CHECK-SSSE3: psrld $16 -; CHECK-SSSE3-NEXT: retq - -; CHECK-AVX2-LABEL: @test7 -; CHECK-AVX2: vpshufb -; CHECK-AVX2: vpsrld $16 -; CHECK-AVX2-NEXT: retq - -; CHECK-WIDE-AVX2-LABEL: @test7 -; CHECK-WIDE-AVX2: vpshufb -; CHECK-WIDE-AVX2-NEXT: retq } - -attributes #0 = { nounwind uwtable } - diff --git a/test/CodeGen/X86/mmx-arg-passing-x86-64.ll b/test/CodeGen/X86/mmx-arg-passing-x86-64.ll new file mode 100644 index 00000000000..b4a51e47602 --- /dev/null +++ b/test/CodeGen/X86/mmx-arg-passing-x86-64.ll @@ -0,0 +1,64 @@ +; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+mmx,+sse2 | FileCheck %s --check-prefix=X86-64 +; +; On Darwin x86-64, v8i8, v4i16, v2i32 values are passed in XMM[0-7]. +; On Darwin x86-64, v1i64 values are passed in 64-bit GPRs. + +@g_v8qi = external global <8 x i8> + +define void @t3() nounwind { +; X86-64-LABEL: t3: +; X86-64: ## BB#0: +; X86-64-NEXT: movq _g_v8qi@{{.*}}(%rip), %rax +; X86-64-NEXT: movq {{.*#+}} xmm0 = mem[0],zero +; X86-64-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] +; X86-64-NEXT: pand {{.*}}(%rip), %xmm0 +; X86-64-NEXT: packuswb %xmm0, %xmm0 +; X86-64-NEXT: movd %xmm0, %rax +; X86-64-NEXT: movd %rax, %xmm0 +; X86-64-NEXT: movb $1, %al +; X86-64-NEXT: jmp _pass_v8qi ## TAILCALL + %tmp3 = load <8 x i8>* @g_v8qi, align 8 + %tmp3a = bitcast <8 x i8> %tmp3 to x86_mmx + %tmp4 = tail call i32 (...)* @pass_v8qi( x86_mmx %tmp3a ) nounwind + ret void +} + +define void @t4(x86_mmx %v1, x86_mmx %v2) nounwind { +; X86-64-LABEL: t4: +; X86-64: ## BB#0: +; X86-64-NEXT: movdq2q %xmm1, %mm0 +; X86-64-NEXT: movq %mm0, -{{[0-9]+}}(%rsp) +; X86-64-NEXT: movdq2q %xmm0, %mm0 +; X86-64-NEXT: movq %mm0, -{{[0-9]+}}(%rsp) +; X86-64-NEXT: movq {{.*#+}} xmm0 = mem[0],zero +; X86-64-NEXT: movq {{.*#+}} xmm1 = mem[0],zero +; X86-64-NEXT: paddb %xmm0, %xmm1 +; X86-64-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7] +; X86-64-NEXT: pand {{.*}}(%rip), %xmm1 +; X86-64-NEXT: packuswb %xmm1, %xmm1 +; X86-64-NEXT: movd %xmm1, %rax +; X86-64-NEXT: movd %rax, %xmm0 +; X86-64-NEXT: movb $1, %al +; X86-64-NEXT: jmp _pass_v8qi ## TAILCALL + %v1a = bitcast x86_mmx %v1 to <8 x i8> + %v2b = bitcast x86_mmx %v2 to <8 x i8> + %tmp3 = add <8 x i8> %v1a, %v2b + %tmp3a = bitcast <8 x i8> %tmp3 to x86_mmx + %tmp4 = tail call i32 (...)* @pass_v8qi( x86_mmx %tmp3a ) nounwind + ret void +} + +define void @t5() nounwind { +; X86-64-LABEL: t5: +; X86-64: ## BB#0: +; X86-64-NEXT: pushq %rax +; X86-64-NEXT: xorl %edi, %edi +; X86-64-NEXT: callq _pass_v1di +; X86-64-NEXT: popq %rax +; X86-64-NEXT: retq + call void @pass_v1di( <1 x i64> zeroinitializer ) + ret void +} + +declare i32 @pass_v8qi(...) +declare void @pass_v1di(<1 x i64>) diff --git a/test/CodeGen/X86/mmx-arg-passing.ll b/test/CodeGen/X86/mmx-arg-passing.ll index 728c668edde..4e003107620 100644 --- a/test/CodeGen/X86/mmx-arg-passing.ll +++ b/test/CodeGen/X86/mmx-arg-passing.ll @@ -1,5 +1,5 @@ -; RUN: llc < %s -mtriple=i386-apple-darwin -mattr=+mmx | FileCheck %s -check-prefix=X86-32 -; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+mmx,+sse2 | FileCheck %s -check-prefix=X86-64 +; RUN: llc < %s -mtriple=i386-apple-darwin -mattr=+mmx | FileCheck %s --check-prefix=X86-32 +; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+mmx,+sse2 | FileCheck %s --check-prefix=X86-64 ; ; On Darwin x86-32, v8i8, v4i16, v2i32 values are passed in MM[0-2]. ; On Darwin x86-32, v1i64 values are passed in memory. In this example, they @@ -10,64 +10,40 @@ @u1 = external global x86_mmx define void @t1(x86_mmx %v1) nounwind { - store x86_mmx %v1, x86_mmx* @u1, align 8 - ret void - ; X86-32-LABEL: t1: -; X86-32: movq %mm0 - +; X86-32: ## BB#0: +; X86-32-NEXT: movl L_u1$non_lazy_ptr, %eax +; X86-32-NEXT: movq %mm0, (%eax) +; X86-32-NEXT: retl +; ; X86-64-LABEL: t1: -; X86-64: movdq2q %xmm0 -; X86-64: movq %mm0 +; X86-64: ## BB#0: +; X86-64-NEXT: movdq2q %xmm0, %mm0 +; X86-64-NEXT: movq _u1@{{.*}}(%rip), %rax +; X86-64-NEXT: movq %mm0, (%rax) +; X86-64-NEXT: retq + store x86_mmx %v1, x86_mmx* @u1, align 8 + ret void } @u2 = external global x86_mmx define void @t2(<1 x i64> %v1) nounwind { +; X86-32-LABEL: t2: +; X86-32: ## BB#0: +; X86-32-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-32-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-32-NEXT: movl L_u2$non_lazy_ptr, %edx +; X86-32-NEXT: movl %ecx, 4(%edx) +; X86-32-NEXT: movl %eax, (%edx) +; X86-32-NEXT: retl +; +; X86-64-LABEL: t2: +; X86-64: ## BB#0: +; X86-64-NEXT: movq _u2@{{.*}}(%rip), %rax +; X86-64-NEXT: movq %rdi, (%rax) +; X86-64-NEXT: retq %tmp = bitcast <1 x i64> %v1 to x86_mmx store x86_mmx %tmp, x86_mmx* @u2, align 8 ret void - -; X86-32-LABEL: t2: -; X86-32: movl 4(%esp) -; X86-32: movl 8(%esp) - -; X86-64-LABEL: t2: -; X86-64: movq %rdi -} - -@g_v8qi = external global <8 x i8> - -define void @t3() nounwind { -; X86-64-LABEL: t3: -; X86-64-NOT: movdq2q -; X86-64: punpcklbw - %tmp3 = load <8 x i8>* @g_v8qi, align 8 - %tmp3a = bitcast <8 x i8> %tmp3 to x86_mmx - %tmp4 = tail call i32 (...)* @pass_v8qi( x86_mmx %tmp3a ) nounwind - ret void } - -define void @t4(x86_mmx %v1, x86_mmx %v2) nounwind { -; X86-64-LABEL: t4: -; X86-64: movdq2q -; X86-64: movdq2q -; X86-64-NOT: movdq2q - %v1a = bitcast x86_mmx %v1 to <8 x i8> - %v2b = bitcast x86_mmx %v2 to <8 x i8> - %tmp3 = add <8 x i8> %v1a, %v2b - %tmp3a = bitcast <8 x i8> %tmp3 to x86_mmx - %tmp4 = tail call i32 (...)* @pass_v8qi( x86_mmx %tmp3a ) nounwind - ret void -} - -define void @t5() nounwind { -; X86-64-LABEL: t5: -; X86-64-NOT: movdq2q -; X86-64: xorl %edi, %edi - call void @pass_v1di( <1 x i64> zeroinitializer ) - ret void -} - -declare i32 @pass_v8qi(...) -declare void @pass_v1di(<1 x i64>) -- 2.34.1