From a9fe27ffb3aeaea3c774d16ea938b10149f348a8 Mon Sep 17 00:00:00 2001 From: Elena Demikhovsky Date: Sun, 2 Mar 2014 09:19:44 +0000 Subject: [PATCH] AVX-512: Fixed extract_vector_elt for v8i1 vector git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@202624 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86ISelLowering.cpp | 6 ++++-- lib/Target/X86/X86ISelLowering.h | 1 + lib/Target/X86/X86InstrAVX512.td | 5 +++++ lib/Target/X86/X86RegisterInfo.td | 8 ++++---- test/CodeGen/X86/avx512-insert-extract.ll | 24 +++++++++++++++++------ 5 files changed, 32 insertions(+), 12 deletions(-) diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 71aee4ff3de..3ae4147a794 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -7697,7 +7697,8 @@ static SDValue LowerEXTRACT_VECTOR_ELT_SSE4(SDValue Op, SelectionDAG &DAG) { /// Extract one bit from mask vector, like v16i1 or v8i1. /// AVX-512 feature. -static SDValue ExtractBitFromMaskVector(SDValue Op, SelectionDAG &DAG) { +SDValue +X86TargetLowering::ExtractBitFromMaskVector(SDValue Op, SelectionDAG &DAG) const { SDValue Vec = Op.getOperand(0); SDLoc dl(Vec); MVT VecVT = Vec.getSimpleValueType(); @@ -7717,7 +7718,8 @@ static SDValue ExtractBitFromMaskVector(SDValue Op, SelectionDAG &DAG) { } unsigned IdxVal = cast(Idx)->getZExtValue(); - unsigned MaxSift = VecVT.getSizeInBits() - 1; + const TargetRegisterClass* rc = getRegClassFor(VecVT); + unsigned MaxSift = rc->getSize()*8 - 1; Vec = DAG.getNode(X86ISD::VSHLI, dl, VecVT, Vec, DAG.getConstant(MaxSift - IdxVal, MVT::i8)); Vec = DAG.getNode(X86ISD::VSRLI, dl, VecVT, Vec, diff --git a/lib/Target/X86/X86ISelLowering.h b/lib/Target/X86/X86ISelLowering.h index af97b15cc19..9bf2b90f8fc 100644 --- a/lib/Target/X86/X86ISelLowering.h +++ b/lib/Target/X86/X86ISelLowering.h @@ -867,6 +867,7 @@ namespace llvm { SDValue LowerBUILD_VECTORvXi1(SDValue Op, SelectionDAG &DAG) const; SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const; SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const; + SDValue ExtractBitFromMaskVector(SDValue Op, SelectionDAG &DAG) const; SDValue LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const; SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG) const; SDValue LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const; diff --git a/lib/Target/X86/X86InstrAVX512.td b/lib/Target/X86/X86InstrAVX512.td index 328d74f8d7a..825ea09cfea 100644 --- a/lib/Target/X86/X86InstrAVX512.td +++ b/lib/Target/X86/X86InstrAVX512.td @@ -1213,6 +1213,11 @@ def : Pat<(v16i1 (insert_subvector undef, (v8i1 VK8:$src), (iPTR 0))), def : Pat<(v8i1 (extract_subvector (v16i1 VK16:$src), (iPTR 8))), (v8i1 (COPY_TO_REGCLASS (KSHIFTRWri VK16:$src, (i8 8)), VK8))>; +def : Pat<(v8i1 (X86vshli VK8:$src, (i8 imm:$imm))), + (v8i1 (COPY_TO_REGCLASS (KSHIFTLWri (COPY_TO_REGCLASS VK8:$src, VK16), (I8Imm $imm)), VK8))>; + +def : Pat<(v8i1 (X86vsrli VK8:$src, (i8 imm:$imm))), + (v8i1 (COPY_TO_REGCLASS (KSHIFTRWri (COPY_TO_REGCLASS VK8:$src, VK16), (I8Imm $imm)), VK8))>; //===----------------------------------------------------------------------===// // AVX-512 - Aligned and unaligned load and store // diff --git a/lib/Target/X86/X86RegisterInfo.td b/lib/Target/X86/X86RegisterInfo.td index a88b2bb1f15..33c402b69a4 100644 --- a/lib/Target/X86/X86RegisterInfo.td +++ b/lib/Target/X86/X86RegisterInfo.td @@ -466,10 +466,10 @@ def VR256X : RegisterClass<"X86", [v32i8, v16i16, v8i32, v4i64, v8f32, v4f64], // The size of the all masked registers is 16 bit because we have only one // KMOVW istruction that can store this register in memory, and it writes 2 bytes def VK1 : RegisterClass<"X86", [i1], 16, (sequence "K%u", 0, 7)>; -def VK8 : RegisterClass<"X86", [v8i1], 16, (sequence "K%u", 0, 7)>; -def VK16 : RegisterClass<"X86", [v16i1], 16, (add VK8)>; +def VK8 : RegisterClass<"X86", [v8i1], 16, (add VK1)> {let Size = 16;} +def VK16 : RegisterClass<"X86", [v16i1], 16, (add VK8)> {let Size = 16;} -def VK1WM : RegisterClass<"X86", [i1], 16, (sub VK1, K0)>; -def VK8WM : RegisterClass<"X86", [v8i1], 16, (sub VK8, K0)>; +def VK1WM : RegisterClass<"X86", [i1], 16, (sub VK1, K0)> {let Size = 16;} +def VK8WM : RegisterClass<"X86", [v8i1], 16, (sub VK8, K0)> {let Size = 16;} def VK16WM : RegisterClass<"X86", [v16i1], 16, (add VK8WM)>; diff --git a/test/CodeGen/X86/avx512-insert-extract.ll b/test/CodeGen/X86/avx512-insert-extract.ll index 4fca78fe72e..6557ac34935 100644 --- a/test/CodeGen/X86/avx512-insert-extract.ll +++ b/test/CodeGen/X86/avx512-insert-extract.ll @@ -91,7 +91,7 @@ define float @test9(<8 x float> %x, i32 %ind) nounwind { ;CHECK-LABEL: test10 ;CHECK: vmovd ;CHECK: vpermd %zmm -;CHEKK: vmovdz %xmm0, %eax +;CHECK: vmovd %xmm0, %eax ;CHECK: ret define i32 @test10(<16 x i32> %x, i32 %ind) nounwind { %e = extractelement <16 x i32> %x, i32 %ind @@ -100,8 +100,8 @@ define i32 @test10(<16 x i32> %x, i32 %ind) nounwind { ;CHECK-LABEL: test11 ;CHECK: vpcmpltud -;CKECK: kshiftlw $11 -;CKECK: kshiftrw $15 +;CHECK: kshiftlw $11 +;CHECK: kshiftrw $15 ;CHECK: kortestw ;CHECK: je ;CHECK: ret @@ -119,8 +119,8 @@ define <16 x i32> @test11(<16 x i32>%a, <16 x i32>%b) { ;CHECK-LABEL: test12 ;CHECK: vpcmpgtq -;CKECK: kshiftlw $15 -;CKECK: kshiftrw $15 +;CHECK: kshiftlw $15 +;CHECK: kshiftrw $15 ;CHECK: kortestw ;CHECK: ret @@ -135,7 +135,7 @@ define i64 @test12(<16 x i64>%a, <16 x i64>%b, i64 %a1, i64 %b1) { ;CHECK-LABEL: test13 ;CHECK: cmpl ;CHECK: sbbl -;CKECK: orl $65532 +;CHECK: orl $65532 ;CHECK: ret define i16 @test13(i32 %a, i32 %b) { %cmp_res = icmp ult i32 %a, %b @@ -144,5 +144,17 @@ define i16 @test13(i32 %a, i32 %b) { ret i16 %res } +;CHECK-LABEL: test14 +;CHECK: vpcmpgtq +;CHECK: kshiftlw $11 +;CHECK: kshiftrw $15 +;CHECK: kortestw +;CHECK: ret +define i64 @test14(<8 x i64>%a, <8 x i64>%b, i64 %a1, i64 %b1) { + %cmpvector_func.i = icmp slt <8 x i64> %a, %b + %extract24vector_func.i = extractelement <8 x i1> %cmpvector_func.i, i32 4 + %res = select i1 %extract24vector_func.i, i64 %a1, i64 %b1 + ret i64 %res +} -- 2.34.1