From: Pawel Bylica Date: Tue, 21 Apr 2015 06:28:36 +0000 (+0000) Subject: Fix generic shift expansion when shift amount is 0 X-Git-Url: http://plrg.eecs.uci.edu/git/?p=oota-llvm.git;a=commitdiff_plain;h=775c174b7bf5a80b93ad1107c374c1c40a9a8458 Fix generic shift expansion when shift amount is 0 Summary: This fixes http://llvm.org/bugs/show_bug.cgi?id=16439. This is one possible way to approach this. The other would be to split InL>>(nbits-Amt) into (InL>>(nbits-1-Amt))>>1, which is also valid since since we only need to care about Amt up nbits-1. It's hard to tell which one is better since the shift might be expensive if this stage of expansion is not yet a legal machine integer, whereas comparisons with zero are relatively cheap at all sizes, but more expensive than a shift if the shift is on a legal machine type. Patch by Keno Fischer! Test Plan: regression test from http://reviews.llvm.org/D7752 Reviewers: chfast, resistor Reviewed By: chfast, resistor Subscribers: sanjoy, resistor, chfast, llvm-commits Differential Revision: http://reviews.llvm.org/D4978 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@235370 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp index 6643103bcc1..d65bca97f80 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp @@ -1547,6 +1547,9 @@ ExpandShiftWithUnknownAmountBit(SDNode *N, SDValue &Lo, SDValue &Hi) { SDValue AmtLack = DAG.getNode(ISD::SUB, dl, ShTy, NVBitsNode, Amt); SDValue isShort = DAG.getSetCC(dl, getSetCCResultType(ShTy), Amt, NVBitsNode, ISD::SETULT); + SDValue isZero = DAG.getSetCC(dl, getSetCCResultType(ShTy), + Amt, DAG.getConstant(0, ShTy), + ISD::SETEQ); SDValue LoS, HiS, LoL, HiL; switch (N->getOpcode()) { @@ -1556,8 +1559,6 @@ ExpandShiftWithUnknownAmountBit(SDNode *N, SDValue &Lo, SDValue &Hi) { LoS = DAG.getNode(ISD::SHL, dl, NVT, InL, Amt); HiS = DAG.getNode(ISD::OR, dl, NVT, DAG.getNode(ISD::SHL, dl, NVT, InH, Amt), - // FIXME: If Amt is zero, the following shift generates an undefined result - // on some architectures. DAG.getNode(ISD::SRL, dl, NVT, InL, AmtLack)); // Long: ShAmt >= NVTBits @@ -1565,7 +1566,8 @@ ExpandShiftWithUnknownAmountBit(SDNode *N, SDValue &Lo, SDValue &Hi) { HiL = DAG.getNode(ISD::SHL, dl, NVT, InL, AmtExcess); // Hi from Lo part. Lo = DAG.getSelect(dl, NVT, isShort, LoS, LoL); - Hi = DAG.getSelect(dl, NVT, isShort, HiS, HiL); + Hi = DAG.getSelect(dl, NVT, isZero, InH, + DAG.getSelect(dl, NVT, isShort, HiS, HiL)); return true; case ISD::SRL: // Short: ShAmt < NVTBits @@ -1580,7 +1582,8 @@ ExpandShiftWithUnknownAmountBit(SDNode *N, SDValue &Lo, SDValue &Hi) { HiL = DAG.getConstant(0, NVT); // Hi part is zero. LoL = DAG.getNode(ISD::SRL, dl, NVT, InH, AmtExcess); // Lo from Hi part. - Lo = DAG.getSelect(dl, NVT, isShort, LoS, LoL); + Lo = DAG.getSelect(dl, NVT, isZero, InL, + DAG.getSelect(dl, NVT, isShort, LoS, LoL)); Hi = DAG.getSelect(dl, NVT, isShort, HiS, HiL); return true; case ISD::SRA: @@ -1588,8 +1591,6 @@ ExpandShiftWithUnknownAmountBit(SDNode *N, SDValue &Lo, SDValue &Hi) { HiS = DAG.getNode(ISD::SRA, dl, NVT, InH, Amt); LoS = DAG.getNode(ISD::OR, dl, NVT, DAG.getNode(ISD::SRL, dl, NVT, InL, Amt), - // FIXME: If Amt is zero, the following shift generates an undefined result - // on some architectures. DAG.getNode(ISD::SHL, dl, NVT, InH, AmtLack)); // Long: ShAmt >= NVTBits @@ -1597,7 +1598,8 @@ ExpandShiftWithUnknownAmountBit(SDNode *N, SDValue &Lo, SDValue &Hi) { DAG.getConstant(NVTBits-1, ShTy)); LoL = DAG.getNode(ISD::SRA, dl, NVT, InH, AmtExcess); // Lo from Hi part. - Lo = DAG.getSelect(dl, NVT, isShort, LoS, LoL); + Lo = DAG.getSelect(dl, NVT, isZero, InL, + DAG.getSelect(dl, NVT, isShort, LoS, LoL)); Hi = DAG.getSelect(dl, NVT, isShort, HiS, HiL); return true; } diff --git a/test/CodeGen/X86/shift-i256.ll b/test/CodeGen/X86/shift-i256.ll index d5f65a6ed18..866e7e67fb0 100644 --- a/test/CodeGen/X86/shift-i256.ll +++ b/test/CodeGen/X86/shift-i256.ll @@ -1,9 +1,21 @@ -; RUN: llc < %s -march=x86 -; RUN: llc < %s -march=x86-64 +; RUN: llc < %s -march=x86 | FileCheck %s +; RUN: llc < %s -march=x86-64 -O0 | FileCheck %s -check-prefix=CHECK-X64 +; RUN: llc < %s -march=x86-64 -O2 | FileCheck %s -check-prefix=CHECK-X64 -define void @t(i256 %x, i256 %a, i256* nocapture %r) nounwind readnone { +; CHECK-LABEL: shift1 +define void @shift1(i256 %x, i256 %a, i256* nocapture %r) nounwind readnone { entry: %0 = ashr i256 %x, %a store i256 %0, i256* %r ret void } + +; CHECK-LABEL: shift2 +define i256 @shift2(i256 %c) nounwind +{ + %b = shl i256 1, %c ; %c must not be a constant + ; Special case when %c is 0: + ; CHECK-X64: testb [[REG:%r[0-9]+b]], [[REG]] + ; CHECK-X64: cmoveq + ret i256 %b +}