From: Lang Hames Date: Wed, 19 Nov 2014 19:15:41 +0000 (+0000) Subject: [ADT] Fix PR20728 - Incorrect APFloat::fusedMultiplyAdd results for x86_fp80. X-Git-Url: http://plrg.eecs.uci.edu/git/?p=oota-llvm.git;a=commitdiff_plain;h=58c62e1dba9b9104cba04044d8c1678632bae729 [ADT] Fix PR20728 - Incorrect APFloat::fusedMultiplyAdd results for x86_fp80. As detailed at http://llvm.org/PR20728, due to an internal overflow in APFloat::multiplySignificand the APFloat::fusedMultiplyAdd method can return incorrect results for x87DoubleExtended (x86_fp80) values. This commonly manifests as incorrect constant folding of libm fmal calls on x86. E.g. fmal(1.0L, 1.0L, 3.0L) == 0.0L (should be 4.0L) This patch fixes PR20728 by adding an extra bit to the significand for intermediate results of APFloat::multiplySignificand, avoiding the overflow. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@222374 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/lib/Support/APFloat.cpp b/lib/Support/APFloat.cpp index f63d7748973..295b16c55e2 100644 --- a/lib/Support/APFloat.cpp +++ b/lib/Support/APFloat.cpp @@ -926,7 +926,10 @@ APFloat::multiplySignificand(const APFloat &rhs, const APFloat *addend) assert(semantics == rhs.semantics); precision = semantics->precision; - newPartsCount = partCountForBits(precision * 2); + + // Allocate space for twice as many bits as the original significand, plus one + // extra bit for the addition to overflow into. + newPartsCount = partCountForBits(precision * 2 + 1); if (newPartsCount > 4) fullSignificand = new integerPart[newPartsCount]; @@ -948,11 +951,12 @@ APFloat::multiplySignificand(const APFloat &rhs, const APFloat *addend) // *this = a23 . a22 ... a0 * 2^e1 // rhs = b23 . b22 ... b0 * 2^e2 // the result of multiplication is: - // *this = c47 c46 . c45 ... c0 * 2^(e1+e2) - // Note that there are two significant bits at the left-hand side of the - // radix point. Move the radix point toward left by one bit, and adjust - // exponent accordingly. - exponent += 1; + // *this = c48 c47 c46 . c45 ... c0 * 2^(e1+e2) + // Note that there are three significant bits at the left-hand side of the + // radix point: two for the multiplication, and an overflow bit for the + // addition (that will always be zero at this point). Move the radix point + // toward left by two bits, and adjust exponent accordingly. + exponent += 2; if (addend && addend->isNonZero()) { // The intermediate result of the multiplication has "2 * precision" @@ -964,13 +968,13 @@ APFloat::multiplySignificand(const APFloat &rhs, const APFloat *addend) opStatus status; unsigned int extendedPrecision; - /* Normalize our MSB. */ - extendedPrecision = 2 * precision; - if (omsb != extendedPrecision) { + // Normalize our MSB to one below the top bit to allow for overflow. + extendedPrecision = 2 * precision + 1; + if (omsb != extendedPrecision - 1) { assert(extendedPrecision > omsb); APInt::tcShiftLeft(fullSignificand, newPartsCount, - extendedPrecision - omsb); - exponent -= extendedPrecision - omsb; + (extendedPrecision - 1) - omsb); + exponent -= (extendedPrecision - 1) - omsb; } /* Create new semantics. */ @@ -987,6 +991,14 @@ APFloat::multiplySignificand(const APFloat &rhs, const APFloat *addend) status = extendedAddend.convert(extendedSemantics, rmTowardZero, &ignored); assert(status == opOK); (void)status; + + // Shift the significand of the addend right by one bit. This guarantees + // that the high bit of the significand is zero (same as fullSignificand), + // so the addition will overflow (if it does overflow at all) into the top bit. + lost_fraction = extendedAddend.shiftSignificandRight(1); + assert(lost_fraction == lfExactlyZero && + "Lost precision while shifting addend for fused-multiply-add."); + lost_fraction = addOrSubtractSignificand(extendedAddend, false); /* Restore our state. */ @@ -1002,7 +1014,7 @@ APFloat::multiplySignificand(const APFloat &rhs, const APFloat *addend) // having "precision" significant-bits. First, move the radix point from // poision "2*precision - 1" to "precision - 1". The exponent need to be // adjusted by "2*precision - 1" - "precision - 1" = "precision". - exponent -= precision; + exponent -= precision + 1; // In case MSB resides at the left-hand side of radix point, shift the // mantissa right by some amount to make sure the MSB reside right before diff --git a/unittests/ADT/APFloatTest.cpp b/unittests/ADT/APFloatTest.cpp index a86be5af375..c7ec16ba803 100644 --- a/unittests/ADT/APFloatTest.cpp +++ b/unittests/ADT/APFloatTest.cpp @@ -474,6 +474,18 @@ TEST(APFloatTest, FMA) { f1.fusedMultiplyAdd(f2, f3, APFloat::rmNearestTiesToEven); EXPECT_EQ(12.0f, f1.convertToFloat()); } + + { + APFloat M1(APFloat::x87DoubleExtended, 1.0); + APFloat M2(APFloat::x87DoubleExtended, 1.0); + APFloat A(APFloat::x87DoubleExtended, 3.0); + + bool losesInfo = false; + M1.fusedMultiplyAdd(M1, A, APFloat::rmNearestTiesToEven); + M1.convert(APFloat::IEEEsingle, APFloat::rmNearestTiesToEven, &losesInfo); + EXPECT_FALSE(losesInfo); + EXPECT_EQ(4.0f, M1.convertToFloat()); + } } TEST(APFloatTest, MinNum) {