From: Lang Hames <lhames@gmail.com>
Date: Wed, 19 Nov 2014 19:15:41 +0000 (+0000)
Subject: [ADT] Fix PR20728 - Incorrect APFloat::fusedMultiplyAdd results for x86_fp80.
X-Git-Url: http://plrg.eecs.uci.edu/git/?p=oota-llvm.git;a=commitdiff_plain;h=58c62e1dba9b9104cba04044d8c1678632bae729

[ADT] Fix PR20728 - Incorrect APFloat::fusedMultiplyAdd results for x86_fp80.

As detailed at http://llvm.org/PR20728, due to an internal overflow in
APFloat::multiplySignificand the APFloat::fusedMultiplyAdd method can return
incorrect results for x87DoubleExtended (x86_fp80) values. This commonly
manifests as incorrect constant folding of libm fmal calls on x86. E.g.

fmal(1.0L, 1.0L, 3.0L) == 0.0L      (should be 4.0L)

This patch fixes PR20728 by adding an extra bit to the significand for
intermediate results of APFloat::multiplySignificand, avoiding the overflow.


git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@222374 91177308-0d34-0410-b5e6-96231b3b80d8
---

diff --git a/lib/Support/APFloat.cpp b/lib/Support/APFloat.cpp
index f63d7748973..295b16c55e2 100644
--- a/lib/Support/APFloat.cpp
+++ b/lib/Support/APFloat.cpp
@@ -926,7 +926,10 @@ APFloat::multiplySignificand(const APFloat &rhs, const APFloat *addend)
   assert(semantics == rhs.semantics);
 
   precision = semantics->precision;
-  newPartsCount = partCountForBits(precision * 2);
+
+  // Allocate space for twice as many bits as the original significand, plus one
+  // extra bit for the addition to overflow into.
+  newPartsCount = partCountForBits(precision * 2 + 1);
 
   if (newPartsCount > 4)
     fullSignificand = new integerPart[newPartsCount];
@@ -948,11 +951,12 @@ APFloat::multiplySignificand(const APFloat &rhs, const APFloat *addend)
   //   *this = a23 . a22 ... a0 * 2^e1
   //     rhs = b23 . b22 ... b0 * 2^e2
   // the result of multiplication is:
-  //   *this = c47 c46 . c45 ... c0 * 2^(e1+e2)
-  // Note that there are two significant bits at the left-hand side of the 
-  // radix point. Move the radix point toward left by one bit, and adjust
-  // exponent accordingly.
-  exponent += 1;
+  //   *this = c48 c47 c46 . c45 ... c0 * 2^(e1+e2)
+  // Note that there are three significant bits at the left-hand side of the 
+  // radix point: two for the multiplication, and an overflow bit for the
+  // addition (that will always be zero at this point). Move the radix point
+  // toward left by two bits, and adjust exponent accordingly.
+  exponent += 2;
 
   if (addend && addend->isNonZero()) {
     // The intermediate result of the multiplication has "2 * precision" 
@@ -964,13 +968,13 @@ APFloat::multiplySignificand(const APFloat &rhs, const APFloat *addend)
     opStatus status;
     unsigned int extendedPrecision;
 
-    /* Normalize our MSB.  */
-    extendedPrecision = 2 * precision;
-    if (omsb != extendedPrecision) {
+    // Normalize our MSB to one below the top bit to allow for overflow.
+    extendedPrecision = 2 * precision + 1;
+    if (omsb != extendedPrecision - 1) {
       assert(extendedPrecision > omsb);
       APInt::tcShiftLeft(fullSignificand, newPartsCount,
-                         extendedPrecision - omsb);
-      exponent -= extendedPrecision - omsb;
+                         (extendedPrecision - 1) - omsb);
+      exponent -= (extendedPrecision - 1) - omsb;
     }
 
     /* Create new semantics.  */
@@ -987,6 +991,14 @@ APFloat::multiplySignificand(const APFloat &rhs, const APFloat *addend)
     status = extendedAddend.convert(extendedSemantics, rmTowardZero, &ignored);
     assert(status == opOK);
     (void)status;
+
+    // Shift the significand of the addend right by one bit. This guarantees
+    // that the high bit of the significand is zero (same as fullSignificand),
+    // so the addition will overflow (if it does overflow at all) into the top bit.
+    lost_fraction = extendedAddend.shiftSignificandRight(1);
+    assert(lost_fraction == lfExactlyZero &&
+           "Lost precision while shifting addend for fused-multiply-add.");
+
     lost_fraction = addOrSubtractSignificand(extendedAddend, false);
 
     /* Restore our state.  */
@@ -1002,7 +1014,7 @@ APFloat::multiplySignificand(const APFloat &rhs, const APFloat *addend)
   // having "precision" significant-bits. First, move the radix point from 
   // poision "2*precision - 1" to "precision - 1". The exponent need to be
   // adjusted by "2*precision - 1" - "precision - 1" = "precision".
-  exponent -= precision;
+  exponent -= precision + 1;
 
   // In case MSB resides at the left-hand side of radix point, shift the
   // mantissa right by some amount to make sure the MSB reside right before
diff --git a/unittests/ADT/APFloatTest.cpp b/unittests/ADT/APFloatTest.cpp
index a86be5af375..c7ec16ba803 100644
--- a/unittests/ADT/APFloatTest.cpp
+++ b/unittests/ADT/APFloatTest.cpp
@@ -474,6 +474,18 @@ TEST(APFloatTest, FMA) {
     f1.fusedMultiplyAdd(f2, f3, APFloat::rmNearestTiesToEven);
     EXPECT_EQ(12.0f, f1.convertToFloat());
   }
+
+  {
+    APFloat M1(APFloat::x87DoubleExtended, 1.0);
+    APFloat M2(APFloat::x87DoubleExtended, 1.0);
+    APFloat A(APFloat::x87DoubleExtended, 3.0);
+
+    bool losesInfo = false;
+    M1.fusedMultiplyAdd(M1, A, APFloat::rmNearestTiesToEven);
+    M1.convert(APFloat::IEEEsingle, APFloat::rmNearestTiesToEven, &losesInfo);
+    EXPECT_FALSE(losesInfo);
+    EXPECT_EQ(4.0f, M1.convertToFloat());
+  }
 }
 
 TEST(APFloatTest, MinNum) {