From: Simon Pilgrim Date: Thu, 16 Apr 2015 08:21:09 +0000 (+0000) Subject: TRUNCATE constant folding - minor fix for rL233224 X-Git-Url: http://plrg.eecs.uci.edu/git/?p=oota-llvm.git;a=commitdiff_plain;h=c7bcb37fd3a16d27753360f69ace0511501fa903 TRUNCATE constant folding - minor fix for rL233224 Fix for test case found by James Molloy - TRUNCATE of constant build vectors can be more simply achieved by simply replacing with a new build vector node with the truncated value type - no need to touch the scalar operands at all. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@235079 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp index 0a97ef81b3c..770f0b226ee 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -2851,13 +2851,16 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, // FIXME: Entirely reasonable to perform folding of other unary // operations here as the need arises. break; + case ISD::TRUNCATE: + // Constant build vector truncation can be done with the original scalar + // operands but with a new build vector with the truncated value type. + return getNode(ISD::BUILD_VECTOR, DL, VT, BV->ops()); case ISD::FNEG: case ISD::FABS: case ISD::FCEIL: case ISD::FTRUNC: case ISD::FFLOOR: case ISD::FP_EXTEND: - case ISD::TRUNCATE: case ISD::UINT_TO_FP: case ISD::SINT_TO_FP: { // Let the above scalar folding handle the folding of each element. diff --git a/test/CodeGen/AArch64/fold-constants.ll b/test/CodeGen/AArch64/fold-constants.ll new file mode 100644 index 00000000000..2dd0d124593 --- /dev/null +++ b/test/CodeGen/AArch64/fold-constants.ll @@ -0,0 +1,21 @@ +; RUN: llc -mtriple=aarch64-linux-gnu -o - %s | FileCheck %s + +define i64 @dotests_616() { +; CHECK-LABEL: dotests_616 +; CHECK: movi d0, #0000000000000000 +; CHECK-NEXT: umov w8, v0.b[2] +; CHECK-NEXT: sbfx w8, w8, #0, #1 +; CHECK-NEXT: fmov s0, w8 +; CHECK-NEXT: fmov x0, d0 +; CHECK-NEXT: ret +entry: + %0 = bitcast <2 x i64> zeroinitializer to <8 x i16> + %1 = and <8 x i16> zeroinitializer, %0 + %2 = icmp ne <8 x i16> %1, zeroinitializer + %3 = extractelement <8 x i1> %2, i32 2 + %vgetq_lane285 = sext i1 %3 to i16 + %vset_lane = insertelement <4 x i16> undef, i16 %vgetq_lane285, i32 0 + %4 = bitcast <4 x i16> %vset_lane to <1 x i64> + %vget_lane = extractelement <1 x i64> %4, i32 0 + ret i64 %vget_lane +}