From: Tim Northover Date: Mon, 10 Mar 2014 09:34:07 +0000 (+0000) Subject: AArch64: fix LowerCONCAT_VECTORS for new CodeGen. X-Git-Url: http://plrg.eecs.uci.edu/git/?a=commitdiff_plain;h=8ca089df49df6807b853dbcb18ab0d2d7dc76c62;p=oota-llvm.git AArch64: fix LowerCONCAT_VECTORS for new CodeGen. The function was making too many assumptions about its input: 1. The NEON_VDUP optimisation was far too aggressive, assuming (I think) that the input would always be BUILD_VECTOR. 2. We were treating most unknown concats as legal (by returning Op rather than SDValue()). I think only concats of pairs of vectors are actually legal. http://llvm.org/PR19094 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@203450 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/lib/Target/AArch64/AArch64ISelLowering.cpp b/lib/Target/AArch64/AArch64ISelLowering.cpp index 447f5005e55..cf2e46dbe39 100644 --- a/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -2281,19 +2281,20 @@ static SDValue LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) { // We custom lower concat_vectors with 4, 8, or 16 operands that are all the // same operand and of type v1* using the DUP instruction. unsigned NumOps = Op->getNumOperands(); - if (NumOps != 4 && NumOps != 8 && NumOps != 16) + if (NumOps == 2) { + assert(Op.getValueType().getSizeInBits() == 128 && "unexpected concat"); return Op; + } + + if (NumOps != 4 && NumOps != 8 && NumOps != 16) + return SDValue(); // Must be a single value for VDUP. - bool isConstant = true; SDValue Op0 = Op.getOperand(0); for (unsigned i = 1; i < NumOps; ++i) { SDValue OpN = Op.getOperand(i); if (Op0 != OpN) - return Op; - - if (!isa(OpN->getOperand(0))) - isConstant = false; + return SDValue(); } // Verify the value type. @@ -2302,22 +2303,22 @@ static SDValue LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) { default: llvm_unreachable("Unexpected number of operands"); case 4: if (EltVT != MVT::v1i16 && EltVT != MVT::v1i32) - return Op; + return SDValue(); break; case 8: if (EltVT != MVT::v1i8 && EltVT != MVT::v1i16) - return Op; + return SDValue(); break; case 16: if (EltVT != MVT::v1i8) - return Op; + return SDValue(); break; } SDLoc DL(Op); EVT VT = Op.getValueType(); // VDUP produces better code for constants. - if (isConstant) + if (Op0->getOpcode() == ISD::BUILD_VECTOR) return DAG.getNode(AArch64ISD::NEON_VDUP, DL, VT, Op0->getOperand(0)); return DAG.getNode(AArch64ISD::NEON_VDUPLANE, DL, VT, Op0, DAG.getConstant(0, MVT::i64)); diff --git a/test/CodeGen/AArch64/concatvector-bugs.ll b/test/CodeGen/AArch64/concatvector-bugs.ll new file mode 100644 index 00000000000..5889e226564 --- /dev/null +++ b/test/CodeGen/AArch64/concatvector-bugs.ll @@ -0,0 +1,68 @@ +; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon +; Bug: i8 type in FRP8 register but not registering with register class causes segmentation fault. +; Fix: Removed i8 type from FPR8 register class. + +define void @test_concatvector_v8i8() { +entry.split: + br i1 undef, label %if.then, label %if.end + +if.then: ; preds = %entry.split + unreachable + +if.end: ; preds = %entry.split + br i1 undef, label %if.then9, label %if.end18 + +if.then9: ; preds = %if.end + unreachable + +if.end18: ; preds = %if.end + br label %for.body + +for.body: ; preds = %for.inc, %if.end18 + br i1 false, label %if.then30, label %for.inc + +if.then30: ; preds = %for.body + unreachable + +for.inc: ; preds = %for.body + br i1 undef, label %for.end, label %for.body + +for.end: ; preds = %for.inc + br label %for.body77 + +for.body77: ; preds = %for.body77, %for.end + br i1 undef, label %for.end106, label %for.body77 + +for.end106: ; preds = %for.body77 + br i1 undef, label %for.body130.us.us, label %stmt.for.body130.us.us + +stmt.for.body130.us.us: ; preds = %stmt.for.body130.us.us, %for.end106 + %_p_splat.us = shufflevector <1 x i8> zeroinitializer, <1 x i8> undef, <8 x i32> zeroinitializer + store <8 x i8> %_p_splat.us, <8 x i8>* undef, align 1 + br label %stmt.for.body130.us.us + +for.body130.us.us: ; preds = %for.body130.us.us, %for.end106 + br label %for.body130.us.us +} + +declare <1 x i16> @llvm.aarch64.neon.vuqrshrn.v1i16(<1 x i32>, i32) + +define <8 x i16> @test_splat(i32 %l) nounwind { +; CHECK-LABEL: test_splat: +; CHECK: ret + %lhs = insertelement <1 x i32> undef, i32 %l, i32 0 + %shift = tail call <1 x i16> @llvm.aarch64.neon.vuqrshrn.v1i16(<1 x i32> %lhs, i32 11) + %vec = shufflevector <1 x i16> %shift, <1 x i16> undef, <8 x i32> zeroinitializer + ret <8 x i16> %vec +} + + +define <8 x i16> @test_notsplat(<8 x i16> %a, <8 x i16> %b, i32 %l) nounwind { +; CHECK-LABEL: test_notsplat: +; CHECK: ret +entry: + %lhs = insertelement <1 x i32> undef, i32 %l, i32 0 + %shift = tail call <1 x i16> @llvm.aarch64.neon.vuqrshrn.v1i16(<1 x i32> %lhs, i32 11) + %vec = shufflevector <1 x i16> %shift, <1 x i16> undef, <8 x i32> + ret <8 x i16> %vec +} diff --git a/test/CodeGen/AArch64/concatvector-v8i8-bug.ll b/test/CodeGen/AArch64/concatvector-v8i8-bug.ll deleted file mode 100644 index f8854c3b662..00000000000 --- a/test/CodeGen/AArch64/concatvector-v8i8-bug.ll +++ /dev/null @@ -1,47 +0,0 @@ -; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon -; Bug: i8 type in FRP8 register but not registering with register class causes segmentation fault. -; Fix: Removed i8 type from FPR8 register class. - -define void @test_concatvector_v8i8() { -entry.split: - br i1 undef, label %if.then, label %if.end - -if.then: ; preds = %entry.split - unreachable - -if.end: ; preds = %entry.split - br i1 undef, label %if.then9, label %if.end18 - -if.then9: ; preds = %if.end - unreachable - -if.end18: ; preds = %if.end - br label %for.body - -for.body: ; preds = %for.inc, %if.end18 - br i1 false, label %if.then30, label %for.inc - -if.then30: ; preds = %for.body - unreachable - -for.inc: ; preds = %for.body - br i1 undef, label %for.end, label %for.body - -for.end: ; preds = %for.inc - br label %for.body77 - -for.body77: ; preds = %for.body77, %for.end - br i1 undef, label %for.end106, label %for.body77 - -for.end106: ; preds = %for.body77 - br i1 undef, label %for.body130.us.us, label %stmt.for.body130.us.us - -stmt.for.body130.us.us: ; preds = %stmt.for.body130.us.us, %for.end106 - %_p_splat.us = shufflevector <1 x i8> zeroinitializer, <1 x i8> undef, <8 x i32> zeroinitializer - store <8 x i8> %_p_splat.us, <8 x i8>* undef, align 1 - br label %stmt.for.body130.us.us - -for.body130.us.us: ; preds = %for.body130.us.us, %for.end106 - br label %for.body130.us.us -} -