1 //===-- APFloat.cpp - Implement APFloat class -----------------------------===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This file implements a class to represent arbitrary precision floating
11 // point values and provide a variety of arithmetic operations on them.
13 //===----------------------------------------------------------------------===//
15 #include "llvm/ADT/APFloat.h"
16 #include "llvm/ADT/APSInt.h"
17 #include "llvm/ADT/FoldingSet.h"
18 #include "llvm/ADT/Hashing.h"
19 #include "llvm/ADT/StringExtras.h"
20 #include "llvm/ADT/StringRef.h"
21 #include "llvm/Support/ErrorHandling.h"
22 #include "llvm/Support/MathExtras.h"
28 /// A macro used to combine two fcCategory enums into one key which can be used
29 /// in a switch statement to classify how the interaction of two APFloat's
30 /// categories affects an operation.
32 /// TODO: If clang source code is ever allowed to use constexpr in its own
33 /// codebase, change this into a static inline function.
34 #define PackCategoriesIntoKey(_lhs, _rhs) ((_lhs) * 4 + (_rhs))
36 /* Assumed in hexadecimal significand parsing, and conversion to
37 hexadecimal strings. */
38 #define COMPILE_TIME_ASSERT(cond) extern int CTAssert[(cond) ? 1 : -1]
39 COMPILE_TIME_ASSERT(integerPartWidth % 4 == 0);
43 /* Represents floating point arithmetic semantics. */
45 /* The largest E such that 2^E is representable; this matches the
46 definition of IEEE 754. */
47 APFloat::ExponentType maxExponent;
49 /* The smallest E such that 2^E is a normalized number; this
50 matches the definition of IEEE 754. */
51 APFloat::ExponentType minExponent;
53 /* Number of bits in the significand. This includes the integer
55 unsigned int precision;
58 const fltSemantics APFloat::IEEEhalf = { 15, -14, 11 };
59 const fltSemantics APFloat::IEEEsingle = { 127, -126, 24 };
60 const fltSemantics APFloat::IEEEdouble = { 1023, -1022, 53 };
61 const fltSemantics APFloat::IEEEquad = { 16383, -16382, 113 };
62 const fltSemantics APFloat::x87DoubleExtended = { 16383, -16382, 64 };
63 const fltSemantics APFloat::Bogus = { 0, 0, 0 };
65 /* The PowerPC format consists of two doubles. It does not map cleanly
66 onto the usual format above. It is approximated using twice the
67 mantissa bits. Note that for exponents near the double minimum,
68 we no longer can represent the full 106 mantissa bits, so those
69 will be treated as denormal numbers.
71 FIXME: While this approximation is equivalent to what GCC uses for
72 compile-time arithmetic on PPC double-double numbers, it is not able
73 to represent all possible values held by a PPC double-double number,
74 for example: (long double) 1.0 + (long double) 0x1p-106
75 Should this be replaced by a full emulation of PPC double-double? */
76 const fltSemantics APFloat::PPCDoubleDouble = { 1023, -1022 + 53, 53 + 53 };
78 /* A tight upper bound on number of parts required to hold the value
81 power * 815 / (351 * integerPartWidth) + 1
83 However, whilst the result may require only this many parts,
84 because we are multiplying two values to get it, the
85 multiplication may require an extra part with the excess part
86 being zero (consider the trivial case of 1 * 1, tcFullMultiply
87 requires two parts to hold the single-part result). So we add an
88 extra one to guarantee enough space whilst multiplying. */
89 const unsigned int maxExponent = 16383;
90 const unsigned int maxPrecision = 113;
91 const unsigned int maxPowerOfFiveExponent = maxExponent + maxPrecision - 1;
92 const unsigned int maxPowerOfFiveParts = 2 + ((maxPowerOfFiveExponent * 815)
93 / (351 * integerPartWidth));
96 /* A bunch of private, handy routines. */
98 static inline unsigned int
99 partCountForBits(unsigned int bits)
101 return ((bits) + integerPartWidth - 1) / integerPartWidth;
104 /* Returns 0U-9U. Return values >= 10U are not digits. */
105 static inline unsigned int
106 decDigitValue(unsigned int c)
111 /* Return the value of a decimal exponent of the form
114 If the exponent overflows, returns a large exponent with the
117 readExponent(StringRef::iterator begin, StringRef::iterator end)
120 unsigned int absExponent;
121 const unsigned int overlargeExponent = 24000; /* FIXME. */
122 StringRef::iterator p = begin;
124 assert(p != end && "Exponent has no digits");
126 isNegative = (*p == '-');
127 if (*p == '-' || *p == '+') {
129 assert(p != end && "Exponent has no digits");
132 absExponent = decDigitValue(*p++);
133 assert(absExponent < 10U && "Invalid character in exponent");
135 for (; p != end; ++p) {
138 value = decDigitValue(*p);
139 assert(value < 10U && "Invalid character in exponent");
141 value += absExponent * 10;
142 if (absExponent >= overlargeExponent) {
143 absExponent = overlargeExponent;
144 p = end; /* outwit assert below */
150 assert(p == end && "Invalid exponent in exponent");
153 return -(int) absExponent;
155 return (int) absExponent;
158 /* This is ugly and needs cleaning up, but I don't immediately see
159 how whilst remaining safe. */
161 totalExponent(StringRef::iterator p, StringRef::iterator end,
162 int exponentAdjustment)
164 int unsignedExponent;
165 bool negative, overflow;
168 assert(p != end && "Exponent has no digits");
170 negative = *p == '-';
171 if (*p == '-' || *p == '+') {
173 assert(p != end && "Exponent has no digits");
176 unsignedExponent = 0;
178 for (; p != end; ++p) {
181 value = decDigitValue(*p);
182 assert(value < 10U && "Invalid character in exponent");
184 unsignedExponent = unsignedExponent * 10 + value;
185 if (unsignedExponent > 32767) {
191 if (exponentAdjustment > 32767 || exponentAdjustment < -32768)
195 exponent = unsignedExponent;
197 exponent = -exponent;
198 exponent += exponentAdjustment;
199 if (exponent > 32767 || exponent < -32768)
204 exponent = negative ? -32768: 32767;
209 static StringRef::iterator
210 skipLeadingZeroesAndAnyDot(StringRef::iterator begin, StringRef::iterator end,
211 StringRef::iterator *dot)
213 StringRef::iterator p = begin;
215 while (*p == '0' && p != end)
221 assert(end - begin != 1 && "Significand has no digits");
223 while (*p == '0' && p != end)
230 /* Given a normal decimal floating point number of the form
234 where the decimal point and exponent are optional, fill out the
235 structure D. Exponent is appropriate if the significand is
236 treated as an integer, and normalizedExponent if the significand
237 is taken to have the decimal point after a single leading
240 If the value is zero, V->firstSigDigit points to a non-digit, and
241 the return exponent is zero.
244 const char *firstSigDigit;
245 const char *lastSigDigit;
247 int normalizedExponent;
251 interpretDecimal(StringRef::iterator begin, StringRef::iterator end,
254 StringRef::iterator dot = end;
255 StringRef::iterator p = skipLeadingZeroesAndAnyDot (begin, end, &dot);
257 D->firstSigDigit = p;
259 D->normalizedExponent = 0;
261 for (; p != end; ++p) {
263 assert(dot == end && "String contains multiple dots");
268 if (decDigitValue(*p) >= 10U)
273 assert((*p == 'e' || *p == 'E') && "Invalid character in significand");
274 assert(p != begin && "Significand has no digits");
275 assert((dot == end || p - begin != 1) && "Significand has no digits");
277 /* p points to the first non-digit in the string */
278 D->exponent = readExponent(p + 1, end);
280 /* Implied decimal point? */
285 /* If number is all zeroes accept any exponent. */
286 if (p != D->firstSigDigit) {
287 /* Drop insignificant trailing zeroes. */
292 while (p != begin && *p == '0');
293 while (p != begin && *p == '.');
296 /* Adjust the exponents for any decimal point. */
297 D->exponent += static_cast<APFloat::ExponentType>((dot - p) - (dot > p));
298 D->normalizedExponent = (D->exponent +
299 static_cast<APFloat::ExponentType>((p - D->firstSigDigit)
300 - (dot > D->firstSigDigit && dot < p)));
306 /* Return the trailing fraction of a hexadecimal number.
307 DIGITVALUE is the first hex digit of the fraction, P points to
310 trailingHexadecimalFraction(StringRef::iterator p, StringRef::iterator end,
311 unsigned int digitValue)
313 unsigned int hexDigit;
315 /* If the first trailing digit isn't 0 or 8 we can work out the
316 fraction immediately. */
318 return lfMoreThanHalf;
319 else if (digitValue < 8 && digitValue > 0)
320 return lfLessThanHalf;
322 /* Otherwise we need to find the first non-zero digit. */
326 assert(p != end && "Invalid trailing hexadecimal fraction!");
328 hexDigit = hexDigitValue(*p);
330 /* If we ran off the end it is exactly zero or one-half, otherwise
333 return digitValue == 0 ? lfExactlyZero: lfExactlyHalf;
335 return digitValue == 0 ? lfLessThanHalf: lfMoreThanHalf;
338 /* Return the fraction lost were a bignum truncated losing the least
339 significant BITS bits. */
341 lostFractionThroughTruncation(const integerPart *parts,
342 unsigned int partCount,
347 lsb = APInt::tcLSB(parts, partCount);
349 /* Note this is guaranteed true if bits == 0, or LSB == -1U. */
351 return lfExactlyZero;
353 return lfExactlyHalf;
354 if (bits <= partCount * integerPartWidth &&
355 APInt::tcExtractBit(parts, bits - 1))
356 return lfMoreThanHalf;
358 return lfLessThanHalf;
361 /* Shift DST right BITS bits noting lost fraction. */
363 shiftRight(integerPart *dst, unsigned int parts, unsigned int bits)
365 lostFraction lost_fraction;
367 lost_fraction = lostFractionThroughTruncation(dst, parts, bits);
369 APInt::tcShiftRight(dst, parts, bits);
371 return lost_fraction;
374 /* Combine the effect of two lost fractions. */
376 combineLostFractions(lostFraction moreSignificant,
377 lostFraction lessSignificant)
379 if (lessSignificant != lfExactlyZero) {
380 if (moreSignificant == lfExactlyZero)
381 moreSignificant = lfLessThanHalf;
382 else if (moreSignificant == lfExactlyHalf)
383 moreSignificant = lfMoreThanHalf;
386 return moreSignificant;
389 /* The error from the true value, in half-ulps, on multiplying two
390 floating point numbers, which differ from the value they
391 approximate by at most HUE1 and HUE2 half-ulps, is strictly less
392 than the returned value.
394 See "How to Read Floating Point Numbers Accurately" by William D
397 HUerrBound(bool inexactMultiply, unsigned int HUerr1, unsigned int HUerr2)
399 assert(HUerr1 < 2 || HUerr2 < 2 || (HUerr1 + HUerr2 < 8));
401 if (HUerr1 + HUerr2 == 0)
402 return inexactMultiply * 2; /* <= inexactMultiply half-ulps. */
404 return inexactMultiply + 2 * (HUerr1 + HUerr2);
407 /* The number of ulps from the boundary (zero, or half if ISNEAREST)
408 when the least significant BITS are truncated. BITS cannot be
411 ulpsFromBoundary(const integerPart *parts, unsigned int bits, bool isNearest)
413 unsigned int count, partBits;
414 integerPart part, boundary;
419 count = bits / integerPartWidth;
420 partBits = bits % integerPartWidth + 1;
422 part = parts[count] & (~(integerPart) 0 >> (integerPartWidth - partBits));
425 boundary = (integerPart) 1 << (partBits - 1);
430 if (part - boundary <= boundary - part)
431 return part - boundary;
433 return boundary - part;
436 if (part == boundary) {
439 return ~(integerPart) 0; /* A lot. */
442 } else if (part == boundary - 1) {
445 return ~(integerPart) 0; /* A lot. */
450 return ~(integerPart) 0; /* A lot. */
453 /* Place pow(5, power) in DST, and return the number of parts used.
454 DST must be at least one part larger than size of the answer. */
456 powerOf5(integerPart *dst, unsigned int power)
458 static const integerPart firstEightPowers[] = { 1, 5, 25, 125, 625, 3125,
460 integerPart pow5s[maxPowerOfFiveParts * 2 + 5];
461 pow5s[0] = 78125 * 5;
463 unsigned int partsCount[16] = { 1 };
464 integerPart scratch[maxPowerOfFiveParts], *p1, *p2, *pow5;
466 assert(power <= maxExponent);
471 *p1 = firstEightPowers[power & 7];
477 for (unsigned int n = 0; power; power >>= 1, n++) {
482 /* Calculate pow(5,pow(2,n+3)) if we haven't yet. */
484 pc = partsCount[n - 1];
485 APInt::tcFullMultiply(pow5, pow5 - pc, pow5 - pc, pc, pc);
487 if (pow5[pc - 1] == 0)
495 APInt::tcFullMultiply(p2, p1, pow5, result, pc);
497 if (p2[result - 1] == 0)
500 /* Now result is in p1 with partsCount parts and p2 is scratch
502 tmp = p1, p1 = p2, p2 = tmp;
509 APInt::tcAssign(dst, p1, result);
514 /* Zero at the end to avoid modular arithmetic when adding one; used
515 when rounding up during hexadecimal output. */
516 static const char hexDigitsLower[] = "0123456789abcdef0";
517 static const char hexDigitsUpper[] = "0123456789ABCDEF0";
518 static const char infinityL[] = "infinity";
519 static const char infinityU[] = "INFINITY";
520 static const char NaNL[] = "nan";
521 static const char NaNU[] = "NAN";
523 /* Write out an integerPart in hexadecimal, starting with the most
524 significant nibble. Write out exactly COUNT hexdigits, return
527 partAsHex (char *dst, integerPart part, unsigned int count,
528 const char *hexDigitChars)
530 unsigned int result = count;
532 assert(count != 0 && count <= integerPartWidth / 4);
534 part >>= (integerPartWidth - 4 * count);
536 dst[count] = hexDigitChars[part & 0xf];
543 /* Write out an unsigned decimal integer. */
545 writeUnsignedDecimal (char *dst, unsigned int n)
561 /* Write out a signed decimal integer. */
563 writeSignedDecimal (char *dst, int value)
567 dst = writeUnsignedDecimal(dst, -(unsigned) value);
569 dst = writeUnsignedDecimal(dst, value);
576 APFloat::initialize(const fltSemantics *ourSemantics)
580 semantics = ourSemantics;
583 significand.parts = new integerPart[count];
587 APFloat::freeSignificand()
590 delete [] significand.parts;
594 APFloat::assign(const APFloat &rhs)
596 assert(semantics == rhs.semantics);
599 category = rhs.category;
600 exponent = rhs.exponent;
601 if (category == fcNormal || category == fcNaN)
602 copySignificand(rhs);
606 APFloat::copySignificand(const APFloat &rhs)
608 assert(category == fcNormal || category == fcNaN);
609 assert(rhs.partCount() >= partCount());
611 APInt::tcAssign(significandParts(), rhs.significandParts(),
615 /* Make this number a NaN, with an arbitrary but deterministic value
616 for the significand. If double or longer, this is a signalling NaN,
617 which may not be ideal. If float, this is QNaN(0). */
618 void APFloat::makeNaN(bool SNaN, bool Negative, const APInt *fill)
623 integerPart *significand = significandParts();
624 unsigned numParts = partCount();
626 // Set the significand bits to the fill.
627 if (!fill || fill->getNumWords() < numParts)
628 APInt::tcSet(significand, 0, numParts);
630 APInt::tcAssign(significand, fill->getRawData(),
631 std::min(fill->getNumWords(), numParts));
633 // Zero out the excess bits of the significand.
634 unsigned bitsToPreserve = semantics->precision - 1;
635 unsigned part = bitsToPreserve / 64;
636 bitsToPreserve %= 64;
637 significand[part] &= ((1ULL << bitsToPreserve) - 1);
638 for (part++; part != numParts; ++part)
639 significand[part] = 0;
642 unsigned QNaNBit = semantics->precision - 2;
645 // We always have to clear the QNaN bit to make it an SNaN.
646 APInt::tcClearBit(significand, QNaNBit);
648 // If there are no bits set in the payload, we have to set
649 // *something* to make it a NaN instead of an infinity;
650 // conventionally, this is the next bit down from the QNaN bit.
651 if (APInt::tcIsZero(significand, numParts))
652 APInt::tcSetBit(significand, QNaNBit - 1);
654 // We always have to set the QNaN bit to make it a QNaN.
655 APInt::tcSetBit(significand, QNaNBit);
658 // For x87 extended precision, we want to make a NaN, not a
659 // pseudo-NaN. Maybe we should expose the ability to make
661 if (semantics == &APFloat::x87DoubleExtended)
662 APInt::tcSetBit(significand, QNaNBit + 1);
665 APFloat APFloat::makeNaN(const fltSemantics &Sem, bool SNaN, bool Negative,
667 APFloat value(Sem, uninitialized);
668 value.makeNaN(SNaN, Negative, fill);
673 APFloat::operator=(const APFloat &rhs)
676 if (semantics != rhs.semantics) {
678 initialize(rhs.semantics);
687 APFloat::isDenormal() const {
688 return isFiniteNonZero() && (exponent == semantics->minExponent) &&
689 (APInt::tcExtractBit(significandParts(),
690 semantics->precision - 1) == 0);
694 APFloat::isSmallest() const {
695 // The smallest number by magnitude in our format will be the smallest
696 // denormal, i.e. the floating point number with exponent being minimum
697 // exponent and significand bitwise equal to 1 (i.e. with MSB equal to 0).
698 return isFiniteNonZero() && exponent == semantics->minExponent &&
699 significandMSB() == 0;
702 bool APFloat::isSignificandAllOnes() const {
703 // Test if the significand excluding the integral bit is all ones. This allows
704 // us to test for binade boundaries.
705 const integerPart *Parts = significandParts();
706 const unsigned PartCount = partCount();
707 for (unsigned i = 0; i < PartCount - 1; i++)
711 // Set the unused high bits to all ones when we compare.
712 const unsigned NumHighBits =
713 PartCount*integerPartWidth - semantics->precision + 1;
714 assert(NumHighBits <= integerPartWidth && "Can not have more high bits to "
715 "fill than integerPartWidth");
716 const integerPart HighBitFill =
717 ~integerPart(0) << (integerPartWidth - NumHighBits);
718 if (~(Parts[PartCount - 1] | HighBitFill))
724 bool APFloat::isSignificandAllZeros() const {
725 // Test if the significand excluding the integral bit is all zeros. This
726 // allows us to test for binade boundaries.
727 const integerPart *Parts = significandParts();
728 const unsigned PartCount = partCount();
730 for (unsigned i = 0; i < PartCount - 1; i++)
734 const unsigned NumHighBits =
735 PartCount*integerPartWidth - semantics->precision + 1;
736 assert(NumHighBits <= integerPartWidth && "Can not have more high bits to "
737 "clear than integerPartWidth");
738 const integerPart HighBitMask = ~integerPart(0) >> NumHighBits;
740 if (Parts[PartCount - 1] & HighBitMask)
747 APFloat::isLargest() const {
748 // The largest number by magnitude in our format will be the floating point
749 // number with maximum exponent and with significand that is all ones.
750 return isFiniteNonZero() && exponent == semantics->maxExponent
751 && isSignificandAllOnes();
755 APFloat::bitwiseIsEqual(const APFloat &rhs) const {
758 if (semantics != rhs.semantics ||
759 category != rhs.category ||
762 if (category==fcZero || category==fcInfinity)
764 else if (category==fcNormal && exponent!=rhs.exponent)
768 const integerPart* p=significandParts();
769 const integerPart* q=rhs.significandParts();
770 for (; i>0; i--, p++, q++) {
778 APFloat::APFloat(const fltSemantics &ourSemantics, integerPart value) {
779 initialize(&ourSemantics);
782 exponent = ourSemantics.precision - 1;
783 significandParts()[0] = value;
784 normalize(rmNearestTiesToEven, lfExactlyZero);
787 APFloat::APFloat(const fltSemantics &ourSemantics) {
788 initialize(&ourSemantics);
793 APFloat::APFloat(const fltSemantics &ourSemantics, uninitializedTag tag) {
794 // Allocates storage if necessary but does not initialize it.
795 initialize(&ourSemantics);
798 APFloat::APFloat(const fltSemantics &ourSemantics,
799 fltCategory ourCategory, bool negative) {
800 initialize(&ourSemantics);
801 category = ourCategory;
803 if (category == fcNormal)
805 else if (ourCategory == fcNaN)
809 APFloat::APFloat(const fltSemantics &ourSemantics, StringRef text) {
810 initialize(&ourSemantics);
811 convertFromString(text, rmNearestTiesToEven);
814 APFloat::APFloat(const APFloat &rhs) {
815 initialize(rhs.semantics);
824 // Profile - This method 'profiles' an APFloat for use with FoldingSet.
825 void APFloat::Profile(FoldingSetNodeID& ID) const {
826 ID.Add(bitcastToAPInt());
830 APFloat::partCount() const
832 return partCountForBits(semantics->precision + 1);
836 APFloat::semanticsPrecision(const fltSemantics &semantics)
838 return semantics.precision;
842 APFloat::significandParts() const
844 return const_cast<APFloat *>(this)->significandParts();
848 APFloat::significandParts()
851 return significand.parts;
853 return &significand.part;
857 APFloat::zeroSignificand()
860 APInt::tcSet(significandParts(), 0, partCount());
863 /* Increment an fcNormal floating point number's significand. */
865 APFloat::incrementSignificand()
869 carry = APInt::tcIncrement(significandParts(), partCount());
871 /* Our callers should never cause us to overflow. */
876 /* Add the significand of the RHS. Returns the carry flag. */
878 APFloat::addSignificand(const APFloat &rhs)
882 parts = significandParts();
884 assert(semantics == rhs.semantics);
885 assert(exponent == rhs.exponent);
887 return APInt::tcAdd(parts, rhs.significandParts(), 0, partCount());
890 /* Subtract the significand of the RHS with a borrow flag. Returns
893 APFloat::subtractSignificand(const APFloat &rhs, integerPart borrow)
897 parts = significandParts();
899 assert(semantics == rhs.semantics);
900 assert(exponent == rhs.exponent);
902 return APInt::tcSubtract(parts, rhs.significandParts(), borrow,
906 /* Multiply the significand of the RHS. If ADDEND is non-NULL, add it
907 on to the full-precision result of the multiplication. Returns the
910 APFloat::multiplySignificand(const APFloat &rhs, const APFloat *addend)
912 unsigned int omsb; // One, not zero, based MSB.
913 unsigned int partsCount, newPartsCount, precision;
914 integerPart *lhsSignificand;
915 integerPart scratch[4];
916 integerPart *fullSignificand;
917 lostFraction lost_fraction;
920 assert(semantics == rhs.semantics);
922 precision = semantics->precision;
923 newPartsCount = partCountForBits(precision * 2);
925 if (newPartsCount > 4)
926 fullSignificand = new integerPart[newPartsCount];
928 fullSignificand = scratch;
930 lhsSignificand = significandParts();
931 partsCount = partCount();
933 APInt::tcFullMultiply(fullSignificand, lhsSignificand,
934 rhs.significandParts(), partsCount, partsCount);
936 lost_fraction = lfExactlyZero;
937 omsb = APInt::tcMSB(fullSignificand, newPartsCount) + 1;
938 exponent += rhs.exponent;
940 // Assume the operands involved in the multiplication are single-precision
941 // FP, and the two multiplicants are:
942 // *this = a23 . a22 ... a0 * 2^e1
943 // rhs = b23 . b22 ... b0 * 2^e2
944 // the result of multiplication is:
945 // *this = c47 c46 . c45 ... c0 * 2^(e1+e2)
946 // Note that there are two significant bits at the left-hand side of the
947 // radix point. Move the radix point toward left by one bit, and adjust
948 // exponent accordingly.
952 // The intermediate result of the multiplication has "2 * precision"
953 // signicant bit; adjust the addend to be consistent with mul result.
955 Significand savedSignificand = significand;
956 const fltSemantics *savedSemantics = semantics;
957 fltSemantics extendedSemantics;
959 unsigned int extendedPrecision;
961 /* Normalize our MSB. */
962 extendedPrecision = 2 * precision;
963 if (omsb != extendedPrecision) {
964 assert(extendedPrecision > omsb);
965 APInt::tcShiftLeft(fullSignificand, newPartsCount,
966 extendedPrecision - omsb);
967 exponent -= extendedPrecision - omsb;
970 /* Create new semantics. */
971 extendedSemantics = *semantics;
972 extendedSemantics.precision = extendedPrecision;
974 if (newPartsCount == 1)
975 significand.part = fullSignificand[0];
977 significand.parts = fullSignificand;
978 semantics = &extendedSemantics;
980 APFloat extendedAddend(*addend);
981 status = extendedAddend.convert(extendedSemantics, rmTowardZero, &ignored);
982 assert(status == opOK);
984 lost_fraction = addOrSubtractSignificand(extendedAddend, false);
986 /* Restore our state. */
987 if (newPartsCount == 1)
988 fullSignificand[0] = significand.part;
989 significand = savedSignificand;
990 semantics = savedSemantics;
992 omsb = APInt::tcMSB(fullSignificand, newPartsCount) + 1;
995 // Convert the result having "2 * precision" significant-bits back to the one
996 // having "precision" significant-bits. First, move the radix point from
997 // poision "2*precision - 1" to "precision - 1". The exponent need to be
998 // adjusted by "2*precision - 1" - "precision - 1" = "precision".
999 exponent -= precision;
1001 // In case MSB resides at the left-hand side of radix point, shift the
1002 // mantissa right by some amount to make sure the MSB reside right before
1003 // the radix point (i.e. "MSB . rest-significant-bits").
1005 // Note that the result is not normalized when "omsb < precision". So, the
1006 // caller needs to call APFloat::normalize() if normalized value is expected.
1007 if (omsb > precision) {
1008 unsigned int bits, significantParts;
1011 bits = omsb - precision;
1012 significantParts = partCountForBits(omsb);
1013 lf = shiftRight(fullSignificand, significantParts, bits);
1014 lost_fraction = combineLostFractions(lf, lost_fraction);
1018 APInt::tcAssign(lhsSignificand, fullSignificand, partsCount);
1020 if (newPartsCount > 4)
1021 delete [] fullSignificand;
1023 return lost_fraction;
1026 /* Multiply the significands of LHS and RHS to DST. */
1028 APFloat::divideSignificand(const APFloat &rhs)
1030 unsigned int bit, i, partsCount;
1031 const integerPart *rhsSignificand;
1032 integerPart *lhsSignificand, *dividend, *divisor;
1033 integerPart scratch[4];
1034 lostFraction lost_fraction;
1036 assert(semantics == rhs.semantics);
1038 lhsSignificand = significandParts();
1039 rhsSignificand = rhs.significandParts();
1040 partsCount = partCount();
1043 dividend = new integerPart[partsCount * 2];
1047 divisor = dividend + partsCount;
1049 /* Copy the dividend and divisor as they will be modified in-place. */
1050 for (i = 0; i < partsCount; i++) {
1051 dividend[i] = lhsSignificand[i];
1052 divisor[i] = rhsSignificand[i];
1053 lhsSignificand[i] = 0;
1056 exponent -= rhs.exponent;
1058 unsigned int precision = semantics->precision;
1060 /* Normalize the divisor. */
1061 bit = precision - APInt::tcMSB(divisor, partsCount) - 1;
1064 APInt::tcShiftLeft(divisor, partsCount, bit);
1067 /* Normalize the dividend. */
1068 bit = precision - APInt::tcMSB(dividend, partsCount) - 1;
1071 APInt::tcShiftLeft(dividend, partsCount, bit);
1074 /* Ensure the dividend >= divisor initially for the loop below.
1075 Incidentally, this means that the division loop below is
1076 guaranteed to set the integer bit to one. */
1077 if (APInt::tcCompare(dividend, divisor, partsCount) < 0) {
1079 APInt::tcShiftLeft(dividend, partsCount, 1);
1080 assert(APInt::tcCompare(dividend, divisor, partsCount) >= 0);
1083 /* Long division. */
1084 for (bit = precision; bit; bit -= 1) {
1085 if (APInt::tcCompare(dividend, divisor, partsCount) >= 0) {
1086 APInt::tcSubtract(dividend, divisor, 0, partsCount);
1087 APInt::tcSetBit(lhsSignificand, bit - 1);
1090 APInt::tcShiftLeft(dividend, partsCount, 1);
1093 /* Figure out the lost fraction. */
1094 int cmp = APInt::tcCompare(dividend, divisor, partsCount);
1097 lost_fraction = lfMoreThanHalf;
1099 lost_fraction = lfExactlyHalf;
1100 else if (APInt::tcIsZero(dividend, partsCount))
1101 lost_fraction = lfExactlyZero;
1103 lost_fraction = lfLessThanHalf;
1108 return lost_fraction;
1112 APFloat::significandMSB() const
1114 return APInt::tcMSB(significandParts(), partCount());
1118 APFloat::significandLSB() const
1120 return APInt::tcLSB(significandParts(), partCount());
1123 /* Note that a zero result is NOT normalized to fcZero. */
1125 APFloat::shiftSignificandRight(unsigned int bits)
1127 /* Our exponent should not overflow. */
1128 assert((ExponentType) (exponent + bits) >= exponent);
1132 return shiftRight(significandParts(), partCount(), bits);
1135 /* Shift the significand left BITS bits, subtract BITS from its exponent. */
1137 APFloat::shiftSignificandLeft(unsigned int bits)
1139 assert(bits < semantics->precision);
1142 unsigned int partsCount = partCount();
1144 APInt::tcShiftLeft(significandParts(), partsCount, bits);
1147 assert(!APInt::tcIsZero(significandParts(), partsCount));
1152 APFloat::compareAbsoluteValue(const APFloat &rhs) const
1156 assert(semantics == rhs.semantics);
1157 assert(category == fcNormal);
1158 assert(rhs.category == fcNormal);
1160 compare = exponent - rhs.exponent;
1162 /* If exponents are equal, do an unsigned bignum comparison of the
1165 compare = APInt::tcCompare(significandParts(), rhs.significandParts(),
1169 return cmpGreaterThan;
1170 else if (compare < 0)
1176 /* Handle overflow. Sign is preserved. We either become infinity or
1177 the largest finite number. */
1179 APFloat::handleOverflow(roundingMode rounding_mode)
1182 if (rounding_mode == rmNearestTiesToEven ||
1183 rounding_mode == rmNearestTiesToAway ||
1184 (rounding_mode == rmTowardPositive && !sign) ||
1185 (rounding_mode == rmTowardNegative && sign)) {
1186 category = fcInfinity;
1187 return (opStatus) (opOverflow | opInexact);
1190 /* Otherwise we become the largest finite number. */
1191 category = fcNormal;
1192 exponent = semantics->maxExponent;
1193 APInt::tcSetLeastSignificantBits(significandParts(), partCount(),
1194 semantics->precision);
1199 /* Returns TRUE if, when truncating the current number, with BIT the
1200 new LSB, with the given lost fraction and rounding mode, the result
1201 would need to be rounded away from zero (i.e., by increasing the
1202 signficand). This routine must work for fcZero of both signs, and
1203 fcNormal numbers. */
1205 APFloat::roundAwayFromZero(roundingMode rounding_mode,
1206 lostFraction lost_fraction,
1207 unsigned int bit) const
1209 /* NaNs and infinities should not have lost fractions. */
1210 assert(category == fcNormal || category == fcZero);
1212 /* Current callers never pass this so we don't handle it. */
1213 assert(lost_fraction != lfExactlyZero);
1215 switch (rounding_mode) {
1216 case rmNearestTiesToAway:
1217 return lost_fraction == lfExactlyHalf || lost_fraction == lfMoreThanHalf;
1219 case rmNearestTiesToEven:
1220 if (lost_fraction == lfMoreThanHalf)
1223 /* Our zeroes don't have a significand to test. */
1224 if (lost_fraction == lfExactlyHalf && category != fcZero)
1225 return APInt::tcExtractBit(significandParts(), bit);
1232 case rmTowardPositive:
1233 return sign == false;
1235 case rmTowardNegative:
1236 return sign == true;
1238 llvm_unreachable("Invalid rounding mode found");
1242 APFloat::normalize(roundingMode rounding_mode,
1243 lostFraction lost_fraction)
1245 unsigned int omsb; /* One, not zero, based MSB. */
1248 if (category != fcNormal)
1251 /* Before rounding normalize the exponent of fcNormal numbers. */
1252 omsb = significandMSB() + 1;
1255 /* OMSB is numbered from 1. We want to place it in the integer
1256 bit numbered PRECISION if possible, with a compensating change in
1258 exponentChange = omsb - semantics->precision;
1260 /* If the resulting exponent is too high, overflow according to
1261 the rounding mode. */
1262 if (exponent + exponentChange > semantics->maxExponent)
1263 return handleOverflow(rounding_mode);
1265 /* Subnormal numbers have exponent minExponent, and their MSB
1266 is forced based on that. */
1267 if (exponent + exponentChange < semantics->minExponent)
1268 exponentChange = semantics->minExponent - exponent;
1270 /* Shifting left is easy as we don't lose precision. */
1271 if (exponentChange < 0) {
1272 assert(lost_fraction == lfExactlyZero);
1274 shiftSignificandLeft(-exponentChange);
1279 if (exponentChange > 0) {
1282 /* Shift right and capture any new lost fraction. */
1283 lf = shiftSignificandRight(exponentChange);
1285 lost_fraction = combineLostFractions(lf, lost_fraction);
1287 /* Keep OMSB up-to-date. */
1288 if (omsb > (unsigned) exponentChange)
1289 omsb -= exponentChange;
1295 /* Now round the number according to rounding_mode given the lost
1298 /* As specified in IEEE 754, since we do not trap we do not report
1299 underflow for exact results. */
1300 if (lost_fraction == lfExactlyZero) {
1301 /* Canonicalize zeroes. */
1308 /* Increment the significand if we're rounding away from zero. */
1309 if (roundAwayFromZero(rounding_mode, lost_fraction, 0)) {
1311 exponent = semantics->minExponent;
1313 incrementSignificand();
1314 omsb = significandMSB() + 1;
1316 /* Did the significand increment overflow? */
1317 if (omsb == (unsigned) semantics->precision + 1) {
1318 /* Renormalize by incrementing the exponent and shifting our
1319 significand right one. However if we already have the
1320 maximum exponent we overflow to infinity. */
1321 if (exponent == semantics->maxExponent) {
1322 category = fcInfinity;
1324 return (opStatus) (opOverflow | opInexact);
1327 shiftSignificandRight(1);
1333 /* The normal case - we were and are not denormal, and any
1334 significand increment above didn't overflow. */
1335 if (omsb == semantics->precision)
1338 /* We have a non-zero denormal. */
1339 assert(omsb < semantics->precision);
1341 /* Canonicalize zeroes. */
1345 /* The fcZero case is a denormal that underflowed to zero. */
1346 return (opStatus) (opUnderflow | opInexact);
1350 APFloat::addOrSubtractSpecials(const APFloat &rhs, bool subtract)
1352 switch (PackCategoriesIntoKey(category, rhs.category)) {
1354 llvm_unreachable(0);
1356 case PackCategoriesIntoKey(fcNaN, fcZero):
1357 case PackCategoriesIntoKey(fcNaN, fcNormal):
1358 case PackCategoriesIntoKey(fcNaN, fcInfinity):
1359 case PackCategoriesIntoKey(fcNaN, fcNaN):
1360 case PackCategoriesIntoKey(fcNormal, fcZero):
1361 case PackCategoriesIntoKey(fcInfinity, fcNormal):
1362 case PackCategoriesIntoKey(fcInfinity, fcZero):
1365 case PackCategoriesIntoKey(fcZero, fcNaN):
1366 case PackCategoriesIntoKey(fcNormal, fcNaN):
1367 case PackCategoriesIntoKey(fcInfinity, fcNaN):
1369 copySignificand(rhs);
1372 case PackCategoriesIntoKey(fcNormal, fcInfinity):
1373 case PackCategoriesIntoKey(fcZero, fcInfinity):
1374 category = fcInfinity;
1375 sign = rhs.sign ^ subtract;
1378 case PackCategoriesIntoKey(fcZero, fcNormal):
1380 sign = rhs.sign ^ subtract;
1383 case PackCategoriesIntoKey(fcZero, fcZero):
1384 /* Sign depends on rounding mode; handled by caller. */
1387 case PackCategoriesIntoKey(fcInfinity, fcInfinity):
1388 /* Differently signed infinities can only be validly
1390 if (((sign ^ rhs.sign)!=0) != subtract) {
1397 case PackCategoriesIntoKey(fcNormal, fcNormal):
1402 /* Add or subtract two normal numbers. */
1404 APFloat::addOrSubtractSignificand(const APFloat &rhs, bool subtract)
1407 lostFraction lost_fraction;
1410 /* Determine if the operation on the absolute values is effectively
1411 an addition or subtraction. */
1412 subtract ^= (sign ^ rhs.sign) ? true : false;
1414 /* Are we bigger exponent-wise than the RHS? */
1415 bits = exponent - rhs.exponent;
1417 /* Subtraction is more subtle than one might naively expect. */
1419 APFloat temp_rhs(rhs);
1423 reverse = compareAbsoluteValue(temp_rhs) == cmpLessThan;
1424 lost_fraction = lfExactlyZero;
1425 } else if (bits > 0) {
1426 lost_fraction = temp_rhs.shiftSignificandRight(bits - 1);
1427 shiftSignificandLeft(1);
1430 lost_fraction = shiftSignificandRight(-bits - 1);
1431 temp_rhs.shiftSignificandLeft(1);
1436 carry = temp_rhs.subtractSignificand
1437 (*this, lost_fraction != lfExactlyZero);
1438 copySignificand(temp_rhs);
1441 carry = subtractSignificand
1442 (temp_rhs, lost_fraction != lfExactlyZero);
1445 /* Invert the lost fraction - it was on the RHS and
1447 if (lost_fraction == lfLessThanHalf)
1448 lost_fraction = lfMoreThanHalf;
1449 else if (lost_fraction == lfMoreThanHalf)
1450 lost_fraction = lfLessThanHalf;
1452 /* The code above is intended to ensure that no borrow is
1458 APFloat temp_rhs(rhs);
1460 lost_fraction = temp_rhs.shiftSignificandRight(bits);
1461 carry = addSignificand(temp_rhs);
1463 lost_fraction = shiftSignificandRight(-bits);
1464 carry = addSignificand(rhs);
1467 /* We have a guard bit; generating a carry cannot happen. */
1472 return lost_fraction;
1476 APFloat::multiplySpecials(const APFloat &rhs)
1478 switch (PackCategoriesIntoKey(category, rhs.category)) {
1480 llvm_unreachable(0);
1482 case PackCategoriesIntoKey(fcNaN, fcZero):
1483 case PackCategoriesIntoKey(fcNaN, fcNormal):
1484 case PackCategoriesIntoKey(fcNaN, fcInfinity):
1485 case PackCategoriesIntoKey(fcNaN, fcNaN):
1488 case PackCategoriesIntoKey(fcZero, fcNaN):
1489 case PackCategoriesIntoKey(fcNormal, fcNaN):
1490 case PackCategoriesIntoKey(fcInfinity, fcNaN):
1492 copySignificand(rhs);
1495 case PackCategoriesIntoKey(fcNormal, fcInfinity):
1496 case PackCategoriesIntoKey(fcInfinity, fcNormal):
1497 case PackCategoriesIntoKey(fcInfinity, fcInfinity):
1498 category = fcInfinity;
1501 case PackCategoriesIntoKey(fcZero, fcNormal):
1502 case PackCategoriesIntoKey(fcNormal, fcZero):
1503 case PackCategoriesIntoKey(fcZero, fcZero):
1507 case PackCategoriesIntoKey(fcZero, fcInfinity):
1508 case PackCategoriesIntoKey(fcInfinity, fcZero):
1512 case PackCategoriesIntoKey(fcNormal, fcNormal):
1518 APFloat::divideSpecials(const APFloat &rhs)
1520 switch (PackCategoriesIntoKey(category, rhs.category)) {
1522 llvm_unreachable(0);
1524 case PackCategoriesIntoKey(fcNaN, fcZero):
1525 case PackCategoriesIntoKey(fcNaN, fcNormal):
1526 case PackCategoriesIntoKey(fcNaN, fcInfinity):
1527 case PackCategoriesIntoKey(fcNaN, fcNaN):
1528 case PackCategoriesIntoKey(fcInfinity, fcZero):
1529 case PackCategoriesIntoKey(fcInfinity, fcNormal):
1530 case PackCategoriesIntoKey(fcZero, fcInfinity):
1531 case PackCategoriesIntoKey(fcZero, fcNormal):
1534 case PackCategoriesIntoKey(fcZero, fcNaN):
1535 case PackCategoriesIntoKey(fcNormal, fcNaN):
1536 case PackCategoriesIntoKey(fcInfinity, fcNaN):
1538 copySignificand(rhs);
1541 case PackCategoriesIntoKey(fcNormal, fcInfinity):
1545 case PackCategoriesIntoKey(fcNormal, fcZero):
1546 category = fcInfinity;
1549 case PackCategoriesIntoKey(fcInfinity, fcInfinity):
1550 case PackCategoriesIntoKey(fcZero, fcZero):
1554 case PackCategoriesIntoKey(fcNormal, fcNormal):
1560 APFloat::modSpecials(const APFloat &rhs)
1562 switch (PackCategoriesIntoKey(category, rhs.category)) {
1564 llvm_unreachable(0);
1566 case PackCategoriesIntoKey(fcNaN, fcZero):
1567 case PackCategoriesIntoKey(fcNaN, fcNormal):
1568 case PackCategoriesIntoKey(fcNaN, fcInfinity):
1569 case PackCategoriesIntoKey(fcNaN, fcNaN):
1570 case PackCategoriesIntoKey(fcZero, fcInfinity):
1571 case PackCategoriesIntoKey(fcZero, fcNormal):
1572 case PackCategoriesIntoKey(fcNormal, fcInfinity):
1575 case PackCategoriesIntoKey(fcZero, fcNaN):
1576 case PackCategoriesIntoKey(fcNormal, fcNaN):
1577 case PackCategoriesIntoKey(fcInfinity, fcNaN):
1579 copySignificand(rhs);
1582 case PackCategoriesIntoKey(fcNormal, fcZero):
1583 case PackCategoriesIntoKey(fcInfinity, fcZero):
1584 case PackCategoriesIntoKey(fcInfinity, fcNormal):
1585 case PackCategoriesIntoKey(fcInfinity, fcInfinity):
1586 case PackCategoriesIntoKey(fcZero, fcZero):
1590 case PackCategoriesIntoKey(fcNormal, fcNormal):
1597 APFloat::changeSign()
1599 /* Look mummy, this one's easy. */
1604 APFloat::clearSign()
1606 /* So is this one. */
1611 APFloat::copySign(const APFloat &rhs)
1617 /* Normalized addition or subtraction. */
1619 APFloat::addOrSubtract(const APFloat &rhs, roundingMode rounding_mode,
1624 fs = addOrSubtractSpecials(rhs, subtract);
1626 /* This return code means it was not a simple case. */
1627 if (fs == opDivByZero) {
1628 lostFraction lost_fraction;
1630 lost_fraction = addOrSubtractSignificand(rhs, subtract);
1631 fs = normalize(rounding_mode, lost_fraction);
1633 /* Can only be zero if we lost no fraction. */
1634 assert(category != fcZero || lost_fraction == lfExactlyZero);
1637 /* If two numbers add (exactly) to zero, IEEE 754 decrees it is a
1638 positive zero unless rounding to minus infinity, except that
1639 adding two like-signed zeroes gives that zero. */
1640 if (category == fcZero) {
1641 if (rhs.category != fcZero || (sign == rhs.sign) == subtract)
1642 sign = (rounding_mode == rmTowardNegative);
1648 /* Normalized addition. */
1650 APFloat::add(const APFloat &rhs, roundingMode rounding_mode)
1652 return addOrSubtract(rhs, rounding_mode, false);
1655 /* Normalized subtraction. */
1657 APFloat::subtract(const APFloat &rhs, roundingMode rounding_mode)
1659 return addOrSubtract(rhs, rounding_mode, true);
1662 /* Normalized multiply. */
1664 APFloat::multiply(const APFloat &rhs, roundingMode rounding_mode)
1669 fs = multiplySpecials(rhs);
1671 if (category == fcNormal) {
1672 lostFraction lost_fraction = multiplySignificand(rhs, 0);
1673 fs = normalize(rounding_mode, lost_fraction);
1674 if (lost_fraction != lfExactlyZero)
1675 fs = (opStatus) (fs | opInexact);
1681 /* Normalized divide. */
1683 APFloat::divide(const APFloat &rhs, roundingMode rounding_mode)
1688 fs = divideSpecials(rhs);
1690 if (category == fcNormal) {
1691 lostFraction lost_fraction = divideSignificand(rhs);
1692 fs = normalize(rounding_mode, lost_fraction);
1693 if (lost_fraction != lfExactlyZero)
1694 fs = (opStatus) (fs | opInexact);
1700 /* Normalized remainder. This is not currently correct in all cases. */
1702 APFloat::remainder(const APFloat &rhs)
1706 unsigned int origSign = sign;
1708 fs = V.divide(rhs, rmNearestTiesToEven);
1709 if (fs == opDivByZero)
1712 int parts = partCount();
1713 integerPart *x = new integerPart[parts];
1715 fs = V.convertToInteger(x, parts * integerPartWidth, true,
1716 rmNearestTiesToEven, &ignored);
1717 if (fs==opInvalidOp)
1720 fs = V.convertFromZeroExtendedInteger(x, parts * integerPartWidth, true,
1721 rmNearestTiesToEven);
1722 assert(fs==opOK); // should always work
1724 fs = V.multiply(rhs, rmNearestTiesToEven);
1725 assert(fs==opOK || fs==opInexact); // should not overflow or underflow
1727 fs = subtract(V, rmNearestTiesToEven);
1728 assert(fs==opOK || fs==opInexact); // likewise
1731 sign = origSign; // IEEE754 requires this
1736 /* Normalized llvm frem (C fmod).
1737 This is not currently correct in all cases. */
1739 APFloat::mod(const APFloat &rhs, roundingMode rounding_mode)
1742 fs = modSpecials(rhs);
1744 if (category == fcNormal && rhs.category == fcNormal) {
1746 unsigned int origSign = sign;
1748 fs = V.divide(rhs, rmNearestTiesToEven);
1749 if (fs == opDivByZero)
1752 int parts = partCount();
1753 integerPart *x = new integerPart[parts];
1755 fs = V.convertToInteger(x, parts * integerPartWidth, true,
1756 rmTowardZero, &ignored);
1757 if (fs==opInvalidOp)
1760 fs = V.convertFromZeroExtendedInteger(x, parts * integerPartWidth, true,
1761 rmNearestTiesToEven);
1762 assert(fs==opOK); // should always work
1764 fs = V.multiply(rhs, rounding_mode);
1765 assert(fs==opOK || fs==opInexact); // should not overflow or underflow
1767 fs = subtract(V, rounding_mode);
1768 assert(fs==opOK || fs==opInexact); // likewise
1771 sign = origSign; // IEEE754 requires this
1777 /* Normalized fused-multiply-add. */
1779 APFloat::fusedMultiplyAdd(const APFloat &multiplicand,
1780 const APFloat &addend,
1781 roundingMode rounding_mode)
1785 /* Post-multiplication sign, before addition. */
1786 sign ^= multiplicand.sign;
1788 /* If and only if all arguments are normal do we need to do an
1789 extended-precision calculation. */
1790 if (category == fcNormal &&
1791 multiplicand.category == fcNormal &&
1792 addend.category == fcNormal) {
1793 lostFraction lost_fraction;
1795 lost_fraction = multiplySignificand(multiplicand, &addend);
1796 fs = normalize(rounding_mode, lost_fraction);
1797 if (lost_fraction != lfExactlyZero)
1798 fs = (opStatus) (fs | opInexact);
1800 /* If two numbers add (exactly) to zero, IEEE 754 decrees it is a
1801 positive zero unless rounding to minus infinity, except that
1802 adding two like-signed zeroes gives that zero. */
1803 if (category == fcZero && sign != addend.sign)
1804 sign = (rounding_mode == rmTowardNegative);
1806 fs = multiplySpecials(multiplicand);
1808 /* FS can only be opOK or opInvalidOp. There is no more work
1809 to do in the latter case. The IEEE-754R standard says it is
1810 implementation-defined in this case whether, if ADDEND is a
1811 quiet NaN, we raise invalid op; this implementation does so.
1813 If we need to do the addition we can do so with normal
1816 fs = addOrSubtract(addend, rounding_mode, false);
1822 /* Rounding-mode corrrect round to integral value. */
1823 APFloat::opStatus APFloat::roundToIntegral(roundingMode rounding_mode) {
1826 // If the exponent is large enough, we know that this value is already
1827 // integral, and the arithmetic below would potentially cause it to saturate
1828 // to +/-Inf. Bail out early instead.
1829 if (category == fcNormal && exponent+1 >= (int)semanticsPrecision(*semantics))
1832 // The algorithm here is quite simple: we add 2^(p-1), where p is the
1833 // precision of our format, and then subtract it back off again. The choice
1834 // of rounding modes for the addition/subtraction determines the rounding mode
1835 // for our integral rounding as well.
1836 // NOTE: When the input value is negative, we do subtraction followed by
1837 // addition instead.
1838 APInt IntegerConstant(NextPowerOf2(semanticsPrecision(*semantics)), 1);
1839 IntegerConstant <<= semanticsPrecision(*semantics)-1;
1840 APFloat MagicConstant(*semantics);
1841 fs = MagicConstant.convertFromAPInt(IntegerConstant, false,
1842 rmNearestTiesToEven);
1843 MagicConstant.copySign(*this);
1848 // Preserve the input sign so that we can handle 0.0/-0.0 cases correctly.
1849 bool inputSign = isNegative();
1851 fs = add(MagicConstant, rounding_mode);
1852 if (fs != opOK && fs != opInexact)
1855 fs = subtract(MagicConstant, rounding_mode);
1857 // Restore the input sign.
1858 if (inputSign != isNegative())
1865 /* Comparison requires normalized numbers. */
1867 APFloat::compare(const APFloat &rhs) const
1871 assert(semantics == rhs.semantics);
1873 switch (PackCategoriesIntoKey(category, rhs.category)) {
1875 llvm_unreachable(0);
1877 case PackCategoriesIntoKey(fcNaN, fcZero):
1878 case PackCategoriesIntoKey(fcNaN, fcNormal):
1879 case PackCategoriesIntoKey(fcNaN, fcInfinity):
1880 case PackCategoriesIntoKey(fcNaN, fcNaN):
1881 case PackCategoriesIntoKey(fcZero, fcNaN):
1882 case PackCategoriesIntoKey(fcNormal, fcNaN):
1883 case PackCategoriesIntoKey(fcInfinity, fcNaN):
1884 return cmpUnordered;
1886 case PackCategoriesIntoKey(fcInfinity, fcNormal):
1887 case PackCategoriesIntoKey(fcInfinity, fcZero):
1888 case PackCategoriesIntoKey(fcNormal, fcZero):
1892 return cmpGreaterThan;
1894 case PackCategoriesIntoKey(fcNormal, fcInfinity):
1895 case PackCategoriesIntoKey(fcZero, fcInfinity):
1896 case PackCategoriesIntoKey(fcZero, fcNormal):
1898 return cmpGreaterThan;
1902 case PackCategoriesIntoKey(fcInfinity, fcInfinity):
1903 if (sign == rhs.sign)
1908 return cmpGreaterThan;
1910 case PackCategoriesIntoKey(fcZero, fcZero):
1913 case PackCategoriesIntoKey(fcNormal, fcNormal):
1917 /* Two normal numbers. Do they have the same sign? */
1918 if (sign != rhs.sign) {
1920 result = cmpLessThan;
1922 result = cmpGreaterThan;
1924 /* Compare absolute values; invert result if negative. */
1925 result = compareAbsoluteValue(rhs);
1928 if (result == cmpLessThan)
1929 result = cmpGreaterThan;
1930 else if (result == cmpGreaterThan)
1931 result = cmpLessThan;
1938 /// APFloat::convert - convert a value of one floating point type to another.
1939 /// The return value corresponds to the IEEE754 exceptions. *losesInfo
1940 /// records whether the transformation lost information, i.e. whether
1941 /// converting the result back to the original type will produce the
1942 /// original value (this is almost the same as return value==fsOK, but there
1943 /// are edge cases where this is not so).
1946 APFloat::convert(const fltSemantics &toSemantics,
1947 roundingMode rounding_mode, bool *losesInfo)
1949 lostFraction lostFraction;
1950 unsigned int newPartCount, oldPartCount;
1953 const fltSemantics &fromSemantics = *semantics;
1955 lostFraction = lfExactlyZero;
1956 newPartCount = partCountForBits(toSemantics.precision + 1);
1957 oldPartCount = partCount();
1958 shift = toSemantics.precision - fromSemantics.precision;
1960 bool X86SpecialNan = false;
1961 if (&fromSemantics == &APFloat::x87DoubleExtended &&
1962 &toSemantics != &APFloat::x87DoubleExtended && category == fcNaN &&
1963 (!(*significandParts() & 0x8000000000000000ULL) ||
1964 !(*significandParts() & 0x4000000000000000ULL))) {
1965 // x86 has some unusual NaNs which cannot be represented in any other
1966 // format; note them here.
1967 X86SpecialNan = true;
1970 // If this is a truncation, perform the shift before we narrow the storage.
1971 if (shift < 0 && (category==fcNormal || category==fcNaN))
1972 lostFraction = shiftRight(significandParts(), oldPartCount, -shift);
1974 // Fix the storage so it can hold to new value.
1975 if (newPartCount > oldPartCount) {
1976 // The new type requires more storage; make it available.
1977 integerPart *newParts;
1978 newParts = new integerPart[newPartCount];
1979 APInt::tcSet(newParts, 0, newPartCount);
1980 if (category==fcNormal || category==fcNaN)
1981 APInt::tcAssign(newParts, significandParts(), oldPartCount);
1983 significand.parts = newParts;
1984 } else if (newPartCount == 1 && oldPartCount != 1) {
1985 // Switch to built-in storage for a single part.
1986 integerPart newPart = 0;
1987 if (category==fcNormal || category==fcNaN)
1988 newPart = significandParts()[0];
1990 significand.part = newPart;
1993 // Now that we have the right storage, switch the semantics.
1994 semantics = &toSemantics;
1996 // If this is an extension, perform the shift now that the storage is
1998 if (shift > 0 && (category==fcNormal || category==fcNaN))
1999 APInt::tcShiftLeft(significandParts(), newPartCount, shift);
2001 if (category == fcNormal) {
2002 fs = normalize(rounding_mode, lostFraction);
2003 *losesInfo = (fs != opOK);
2004 } else if (category == fcNaN) {
2005 *losesInfo = lostFraction != lfExactlyZero || X86SpecialNan;
2007 // For x87 extended precision, we want to make a NaN, not a special NaN if
2008 // the input wasn't special either.
2009 if (!X86SpecialNan && semantics == &APFloat::x87DoubleExtended)
2010 APInt::tcSetBit(significandParts(), semantics->precision - 1);
2012 // gcc forces the Quiet bit on, which means (float)(double)(float_sNan)
2013 // does not give you back the same bits. This is dubious, and we
2014 // don't currently do it. You're really supposed to get
2015 // an invalid operation signal at runtime, but nobody does that.
2025 /* Convert a floating point number to an integer according to the
2026 rounding mode. If the rounded integer value is out of range this
2027 returns an invalid operation exception and the contents of the
2028 destination parts are unspecified. If the rounded value is in
2029 range but the floating point number is not the exact integer, the C
2030 standard doesn't require an inexact exception to be raised. IEEE
2031 854 does require it so we do that.
2033 Note that for conversions to integer type the C standard requires
2034 round-to-zero to always be used. */
2036 APFloat::convertToSignExtendedInteger(integerPart *parts, unsigned int width,
2038 roundingMode rounding_mode,
2039 bool *isExact) const
2041 lostFraction lost_fraction;
2042 const integerPart *src;
2043 unsigned int dstPartsCount, truncatedBits;
2047 /* Handle the three special cases first. */
2048 if (category == fcInfinity || category == fcNaN)
2051 dstPartsCount = partCountForBits(width);
2053 if (category == fcZero) {
2054 APInt::tcSet(parts, 0, dstPartsCount);
2055 // Negative zero can't be represented as an int.
2060 src = significandParts();
2062 /* Step 1: place our absolute value, with any fraction truncated, in
2065 /* Our absolute value is less than one; truncate everything. */
2066 APInt::tcSet(parts, 0, dstPartsCount);
2067 /* For exponent -1 the integer bit represents .5, look at that.
2068 For smaller exponents leftmost truncated bit is 0. */
2069 truncatedBits = semantics->precision -1U - exponent;
2071 /* We want the most significant (exponent + 1) bits; the rest are
2073 unsigned int bits = exponent + 1U;
2075 /* Hopelessly large in magnitude? */
2079 if (bits < semantics->precision) {
2080 /* We truncate (semantics->precision - bits) bits. */
2081 truncatedBits = semantics->precision - bits;
2082 APInt::tcExtract(parts, dstPartsCount, src, bits, truncatedBits);
2084 /* We want at least as many bits as are available. */
2085 APInt::tcExtract(parts, dstPartsCount, src, semantics->precision, 0);
2086 APInt::tcShiftLeft(parts, dstPartsCount, bits - semantics->precision);
2091 /* Step 2: work out any lost fraction, and increment the absolute
2092 value if we would round away from zero. */
2093 if (truncatedBits) {
2094 lost_fraction = lostFractionThroughTruncation(src, partCount(),
2096 if (lost_fraction != lfExactlyZero &&
2097 roundAwayFromZero(rounding_mode, lost_fraction, truncatedBits)) {
2098 if (APInt::tcIncrement(parts, dstPartsCount))
2099 return opInvalidOp; /* Overflow. */
2102 lost_fraction = lfExactlyZero;
2105 /* Step 3: check if we fit in the destination. */
2106 unsigned int omsb = APInt::tcMSB(parts, dstPartsCount) + 1;
2110 /* Negative numbers cannot be represented as unsigned. */
2114 /* It takes omsb bits to represent the unsigned integer value.
2115 We lose a bit for the sign, but care is needed as the
2116 maximally negative integer is a special case. */
2117 if (omsb == width && APInt::tcLSB(parts, dstPartsCount) + 1 != omsb)
2120 /* This case can happen because of rounding. */
2125 APInt::tcNegate (parts, dstPartsCount);
2127 if (omsb >= width + !isSigned)
2131 if (lost_fraction == lfExactlyZero) {
2138 /* Same as convertToSignExtendedInteger, except we provide
2139 deterministic values in case of an invalid operation exception,
2140 namely zero for NaNs and the minimal or maximal value respectively
2141 for underflow or overflow.
2142 The *isExact output tells whether the result is exact, in the sense
2143 that converting it back to the original floating point type produces
2144 the original value. This is almost equivalent to result==opOK,
2145 except for negative zeroes.
2148 APFloat::convertToInteger(integerPart *parts, unsigned int width,
2150 roundingMode rounding_mode, bool *isExact) const
2154 fs = convertToSignExtendedInteger(parts, width, isSigned, rounding_mode,
2157 if (fs == opInvalidOp) {
2158 unsigned int bits, dstPartsCount;
2160 dstPartsCount = partCountForBits(width);
2162 if (category == fcNaN)
2167 bits = width - isSigned;
2169 APInt::tcSetLeastSignificantBits(parts, dstPartsCount, bits);
2170 if (sign && isSigned)
2171 APInt::tcShiftLeft(parts, dstPartsCount, width - 1);
2177 /* Same as convertToInteger(integerPart*, ...), except the result is returned in
2178 an APSInt, whose initial bit-width and signed-ness are used to determine the
2179 precision of the conversion.
2182 APFloat::convertToInteger(APSInt &result,
2183 roundingMode rounding_mode, bool *isExact) const
2185 unsigned bitWidth = result.getBitWidth();
2186 SmallVector<uint64_t, 4> parts(result.getNumWords());
2187 opStatus status = convertToInteger(
2188 parts.data(), bitWidth, result.isSigned(), rounding_mode, isExact);
2189 // Keeps the original signed-ness.
2190 result = APInt(bitWidth, parts);
2194 /* Convert an unsigned integer SRC to a floating point number,
2195 rounding according to ROUNDING_MODE. The sign of the floating
2196 point number is not modified. */
2198 APFloat::convertFromUnsignedParts(const integerPart *src,
2199 unsigned int srcCount,
2200 roundingMode rounding_mode)
2202 unsigned int omsb, precision, dstCount;
2204 lostFraction lost_fraction;
2206 category = fcNormal;
2207 omsb = APInt::tcMSB(src, srcCount) + 1;
2208 dst = significandParts();
2209 dstCount = partCount();
2210 precision = semantics->precision;
2212 /* We want the most significant PRECISION bits of SRC. There may not
2213 be that many; extract what we can. */
2214 if (precision <= omsb) {
2215 exponent = omsb - 1;
2216 lost_fraction = lostFractionThroughTruncation(src, srcCount,
2218 APInt::tcExtract(dst, dstCount, src, precision, omsb - precision);
2220 exponent = precision - 1;
2221 lost_fraction = lfExactlyZero;
2222 APInt::tcExtract(dst, dstCount, src, omsb, 0);
2225 return normalize(rounding_mode, lost_fraction);
2229 APFloat::convertFromAPInt(const APInt &Val,
2231 roundingMode rounding_mode)
2233 unsigned int partCount = Val.getNumWords();
2237 if (isSigned && api.isNegative()) {
2242 return convertFromUnsignedParts(api.getRawData(), partCount, rounding_mode);
2245 /* Convert a two's complement integer SRC to a floating point number,
2246 rounding according to ROUNDING_MODE. ISSIGNED is true if the
2247 integer is signed, in which case it must be sign-extended. */
2249 APFloat::convertFromSignExtendedInteger(const integerPart *src,
2250 unsigned int srcCount,
2252 roundingMode rounding_mode)
2257 APInt::tcExtractBit(src, srcCount * integerPartWidth - 1)) {
2260 /* If we're signed and negative negate a copy. */
2262 copy = new integerPart[srcCount];
2263 APInt::tcAssign(copy, src, srcCount);
2264 APInt::tcNegate(copy, srcCount);
2265 status = convertFromUnsignedParts(copy, srcCount, rounding_mode);
2269 status = convertFromUnsignedParts(src, srcCount, rounding_mode);
2275 /* FIXME: should this just take a const APInt reference? */
2277 APFloat::convertFromZeroExtendedInteger(const integerPart *parts,
2278 unsigned int width, bool isSigned,
2279 roundingMode rounding_mode)
2281 unsigned int partCount = partCountForBits(width);
2282 APInt api = APInt(width, makeArrayRef(parts, partCount));
2285 if (isSigned && APInt::tcExtractBit(parts, width - 1)) {
2290 return convertFromUnsignedParts(api.getRawData(), partCount, rounding_mode);
2294 APFloat::convertFromHexadecimalString(StringRef s, roundingMode rounding_mode)
2296 lostFraction lost_fraction = lfExactlyZero;
2297 integerPart *significand;
2298 unsigned int bitPos, partsCount;
2299 StringRef::iterator dot, firstSignificantDigit;
2303 category = fcNormal;
2305 significand = significandParts();
2306 partsCount = partCount();
2307 bitPos = partsCount * integerPartWidth;
2309 /* Skip leading zeroes and any (hexa)decimal point. */
2310 StringRef::iterator begin = s.begin();
2311 StringRef::iterator end = s.end();
2312 StringRef::iterator p = skipLeadingZeroesAndAnyDot(begin, end, &dot);
2313 firstSignificantDigit = p;
2316 integerPart hex_value;
2319 assert(dot == end && "String contains multiple dots");
2326 hex_value = hexDigitValue(*p);
2327 if (hex_value == -1U) {
2336 /* Store the number whilst 4-bit nibbles remain. */
2339 hex_value <<= bitPos % integerPartWidth;
2340 significand[bitPos / integerPartWidth] |= hex_value;
2342 lost_fraction = trailingHexadecimalFraction(p, end, hex_value);
2343 while (p != end && hexDigitValue(*p) != -1U)
2350 /* Hex floats require an exponent but not a hexadecimal point. */
2351 assert(p != end && "Hex strings require an exponent");
2352 assert((*p == 'p' || *p == 'P') && "Invalid character in significand");
2353 assert(p != begin && "Significand has no digits");
2354 assert((dot == end || p - begin != 1) && "Significand has no digits");
2356 /* Ignore the exponent if we are zero. */
2357 if (p != firstSignificantDigit) {
2360 /* Implicit hexadecimal point? */
2364 /* Calculate the exponent adjustment implicit in the number of
2365 significant digits. */
2366 expAdjustment = static_cast<int>(dot - firstSignificantDigit);
2367 if (expAdjustment < 0)
2369 expAdjustment = expAdjustment * 4 - 1;
2371 /* Adjust for writing the significand starting at the most
2372 significant nibble. */
2373 expAdjustment += semantics->precision;
2374 expAdjustment -= partsCount * integerPartWidth;
2376 /* Adjust for the given exponent. */
2377 exponent = totalExponent(p + 1, end, expAdjustment);
2380 return normalize(rounding_mode, lost_fraction);
2384 APFloat::roundSignificandWithExponent(const integerPart *decSigParts,
2385 unsigned sigPartCount, int exp,
2386 roundingMode rounding_mode)
2388 unsigned int parts, pow5PartCount;
2389 fltSemantics calcSemantics = { 32767, -32767, 0 };
2390 integerPart pow5Parts[maxPowerOfFiveParts];
2393 isNearest = (rounding_mode == rmNearestTiesToEven ||
2394 rounding_mode == rmNearestTiesToAway);
2396 parts = partCountForBits(semantics->precision + 11);
2398 /* Calculate pow(5, abs(exp)). */
2399 pow5PartCount = powerOf5(pow5Parts, exp >= 0 ? exp: -exp);
2401 for (;; parts *= 2) {
2402 opStatus sigStatus, powStatus;
2403 unsigned int excessPrecision, truncatedBits;
2405 calcSemantics.precision = parts * integerPartWidth - 1;
2406 excessPrecision = calcSemantics.precision - semantics->precision;
2407 truncatedBits = excessPrecision;
2409 APFloat decSig(calcSemantics, fcZero, sign);
2410 APFloat pow5(calcSemantics, fcZero, false);
2412 sigStatus = decSig.convertFromUnsignedParts(decSigParts, sigPartCount,
2413 rmNearestTiesToEven);
2414 powStatus = pow5.convertFromUnsignedParts(pow5Parts, pow5PartCount,
2415 rmNearestTiesToEven);
2416 /* Add exp, as 10^n = 5^n * 2^n. */
2417 decSig.exponent += exp;
2419 lostFraction calcLostFraction;
2420 integerPart HUerr, HUdistance;
2421 unsigned int powHUerr;
2424 /* multiplySignificand leaves the precision-th bit set to 1. */
2425 calcLostFraction = decSig.multiplySignificand(pow5, NULL);
2426 powHUerr = powStatus != opOK;
2428 calcLostFraction = decSig.divideSignificand(pow5);
2429 /* Denormal numbers have less precision. */
2430 if (decSig.exponent < semantics->minExponent) {
2431 excessPrecision += (semantics->minExponent - decSig.exponent);
2432 truncatedBits = excessPrecision;
2433 if (excessPrecision > calcSemantics.precision)
2434 excessPrecision = calcSemantics.precision;
2436 /* Extra half-ulp lost in reciprocal of exponent. */
2437 powHUerr = (powStatus == opOK && calcLostFraction == lfExactlyZero) ? 0:2;
2440 /* Both multiplySignificand and divideSignificand return the
2441 result with the integer bit set. */
2442 assert(APInt::tcExtractBit
2443 (decSig.significandParts(), calcSemantics.precision - 1) == 1);
2445 HUerr = HUerrBound(calcLostFraction != lfExactlyZero, sigStatus != opOK,
2447 HUdistance = 2 * ulpsFromBoundary(decSig.significandParts(),
2448 excessPrecision, isNearest);
2450 /* Are we guaranteed to round correctly if we truncate? */
2451 if (HUdistance >= HUerr) {
2452 APInt::tcExtract(significandParts(), partCount(), decSig.significandParts(),
2453 calcSemantics.precision - excessPrecision,
2455 /* Take the exponent of decSig. If we tcExtract-ed less bits
2456 above we must adjust our exponent to compensate for the
2457 implicit right shift. */
2458 exponent = (decSig.exponent + semantics->precision
2459 - (calcSemantics.precision - excessPrecision));
2460 calcLostFraction = lostFractionThroughTruncation(decSig.significandParts(),
2463 return normalize(rounding_mode, calcLostFraction);
2469 APFloat::convertFromDecimalString(StringRef str, roundingMode rounding_mode)
2474 /* Scan the text. */
2475 StringRef::iterator p = str.begin();
2476 interpretDecimal(p, str.end(), &D);
2478 /* Handle the quick cases. First the case of no significant digits,
2479 i.e. zero, and then exponents that are obviously too large or too
2480 small. Writing L for log 10 / log 2, a number d.ddddd*10^exp
2481 definitely overflows if
2483 (exp - 1) * L >= maxExponent
2485 and definitely underflows to zero where
2487 (exp + 1) * L <= minExponent - precision
2489 With integer arithmetic the tightest bounds for L are
2491 93/28 < L < 196/59 [ numerator <= 256 ]
2492 42039/12655 < L < 28738/8651 [ numerator <= 65536 ]
2495 if (decDigitValue(*D.firstSigDigit) >= 10U) {
2499 /* Check whether the normalized exponent is high enough to overflow
2500 max during the log-rebasing in the max-exponent check below. */
2501 } else if (D.normalizedExponent - 1 > INT_MAX / 42039) {
2502 fs = handleOverflow(rounding_mode);
2504 /* If it wasn't, then it also wasn't high enough to overflow max
2505 during the log-rebasing in the min-exponent check. Check that it
2506 won't overflow min in either check, then perform the min-exponent
2508 } else if (D.normalizedExponent - 1 < INT_MIN / 42039 ||
2509 (D.normalizedExponent + 1) * 28738 <=
2510 8651 * (semantics->minExponent - (int) semantics->precision)) {
2511 /* Underflow to zero and round. */
2513 fs = normalize(rounding_mode, lfLessThanHalf);
2515 /* We can finally safely perform the max-exponent check. */
2516 } else if ((D.normalizedExponent - 1) * 42039
2517 >= 12655 * semantics->maxExponent) {
2518 /* Overflow and round. */
2519 fs = handleOverflow(rounding_mode);
2521 integerPart *decSignificand;
2522 unsigned int partCount;
2524 /* A tight upper bound on number of bits required to hold an
2525 N-digit decimal integer is N * 196 / 59. Allocate enough space
2526 to hold the full significand, and an extra part required by
2528 partCount = static_cast<unsigned int>(D.lastSigDigit - D.firstSigDigit) + 1;
2529 partCount = partCountForBits(1 + 196 * partCount / 59);
2530 decSignificand = new integerPart[partCount + 1];
2533 /* Convert to binary efficiently - we do almost all multiplication
2534 in an integerPart. When this would overflow do we do a single
2535 bignum multiplication, and then revert again to multiplication
2536 in an integerPart. */
2538 integerPart decValue, val, multiplier;
2546 if (p == str.end()) {
2550 decValue = decDigitValue(*p++);
2551 assert(decValue < 10U && "Invalid character in significand");
2553 val = val * 10 + decValue;
2554 /* The maximum number that can be multiplied by ten with any
2555 digit added without overflowing an integerPart. */
2556 } while (p <= D.lastSigDigit && multiplier <= (~ (integerPart) 0 - 9) / 10);
2558 /* Multiply out the current part. */
2559 APInt::tcMultiplyPart(decSignificand, decSignificand, multiplier, val,
2560 partCount, partCount + 1, false);
2562 /* If we used another part (likely but not guaranteed), increase
2564 if (decSignificand[partCount])
2566 } while (p <= D.lastSigDigit);
2568 category = fcNormal;
2569 fs = roundSignificandWithExponent(decSignificand, partCount,
2570 D.exponent, rounding_mode);
2572 delete [] decSignificand;
2579 APFloat::convertFromString(StringRef str, roundingMode rounding_mode)
2581 assert(!str.empty() && "Invalid string length");
2583 /* Handle a leading minus sign. */
2584 StringRef::iterator p = str.begin();
2585 size_t slen = str.size();
2586 sign = *p == '-' ? 1 : 0;
2587 if (*p == '-' || *p == '+') {
2590 assert(slen && "String has no digits");
2593 if (slen >= 2 && p[0] == '0' && (p[1] == 'x' || p[1] == 'X')) {
2594 assert(slen - 2 && "Invalid string");
2595 return convertFromHexadecimalString(StringRef(p + 2, slen - 2),
2599 return convertFromDecimalString(StringRef(p, slen), rounding_mode);
2602 /* Write out a hexadecimal representation of the floating point value
2603 to DST, which must be of sufficient size, in the C99 form
2604 [-]0xh.hhhhp[+-]d. Return the number of characters written,
2605 excluding the terminating NUL.
2607 If UPPERCASE, the output is in upper case, otherwise in lower case.
2609 HEXDIGITS digits appear altogether, rounding the value if
2610 necessary. If HEXDIGITS is 0, the minimal precision to display the
2611 number precisely is used instead. If nothing would appear after
2612 the decimal point it is suppressed.
2614 The decimal exponent is always printed and has at least one digit.
2615 Zero values display an exponent of zero. Infinities and NaNs
2616 appear as "infinity" or "nan" respectively.
2618 The above rules are as specified by C99. There is ambiguity about
2619 what the leading hexadecimal digit should be. This implementation
2620 uses whatever is necessary so that the exponent is displayed as
2621 stored. This implies the exponent will fall within the IEEE format
2622 range, and the leading hexadecimal digit will be 0 (for denormals),
2623 1 (normal numbers) or 2 (normal numbers rounded-away-from-zero with
2624 any other digits zero).
2627 APFloat::convertToHexString(char *dst, unsigned int hexDigits,
2628 bool upperCase, roundingMode rounding_mode) const
2638 memcpy (dst, upperCase ? infinityU: infinityL, sizeof infinityU - 1);
2639 dst += sizeof infinityL - 1;
2643 memcpy (dst, upperCase ? NaNU: NaNL, sizeof NaNU - 1);
2644 dst += sizeof NaNU - 1;
2649 *dst++ = upperCase ? 'X': 'x';
2651 if (hexDigits > 1) {
2653 memset (dst, '0', hexDigits - 1);
2654 dst += hexDigits - 1;
2656 *dst++ = upperCase ? 'P': 'p';
2661 dst = convertNormalToHexString (dst, hexDigits, upperCase, rounding_mode);
2667 return static_cast<unsigned int>(dst - p);
2670 /* Does the hard work of outputting the correctly rounded hexadecimal
2671 form of a normal floating point number with the specified number of
2672 hexadecimal digits. If HEXDIGITS is zero the minimum number of
2673 digits necessary to print the value precisely is output. */
2675 APFloat::convertNormalToHexString(char *dst, unsigned int hexDigits,
2677 roundingMode rounding_mode) const
2679 unsigned int count, valueBits, shift, partsCount, outputDigits;
2680 const char *hexDigitChars;
2681 const integerPart *significand;
2686 *dst++ = upperCase ? 'X': 'x';
2689 hexDigitChars = upperCase ? hexDigitsUpper: hexDigitsLower;
2691 significand = significandParts();
2692 partsCount = partCount();
2694 /* +3 because the first digit only uses the single integer bit, so
2695 we have 3 virtual zero most-significant-bits. */
2696 valueBits = semantics->precision + 3;
2697 shift = integerPartWidth - valueBits % integerPartWidth;
2699 /* The natural number of digits required ignoring trailing
2700 insignificant zeroes. */
2701 outputDigits = (valueBits - significandLSB () + 3) / 4;
2703 /* hexDigits of zero means use the required number for the
2704 precision. Otherwise, see if we are truncating. If we are,
2705 find out if we need to round away from zero. */
2707 if (hexDigits < outputDigits) {
2708 /* We are dropping non-zero bits, so need to check how to round.
2709 "bits" is the number of dropped bits. */
2711 lostFraction fraction;
2713 bits = valueBits - hexDigits * 4;
2714 fraction = lostFractionThroughTruncation (significand, partsCount, bits);
2715 roundUp = roundAwayFromZero(rounding_mode, fraction, bits);
2717 outputDigits = hexDigits;
2720 /* Write the digits consecutively, and start writing in the location
2721 of the hexadecimal point. We move the most significant digit
2722 left and add the hexadecimal point later. */
2725 count = (valueBits + integerPartWidth - 1) / integerPartWidth;
2727 while (outputDigits && count) {
2730 /* Put the most significant integerPartWidth bits in "part". */
2731 if (--count == partsCount)
2732 part = 0; /* An imaginary higher zero part. */
2734 part = significand[count] << shift;
2737 part |= significand[count - 1] >> (integerPartWidth - shift);
2739 /* Convert as much of "part" to hexdigits as we can. */
2740 unsigned int curDigits = integerPartWidth / 4;
2742 if (curDigits > outputDigits)
2743 curDigits = outputDigits;
2744 dst += partAsHex (dst, part, curDigits, hexDigitChars);
2745 outputDigits -= curDigits;
2751 /* Note that hexDigitChars has a trailing '0'. */
2754 *q = hexDigitChars[hexDigitValue (*q) + 1];
2755 } while (*q == '0');
2758 /* Add trailing zeroes. */
2759 memset (dst, '0', outputDigits);
2760 dst += outputDigits;
2763 /* Move the most significant digit to before the point, and if there
2764 is something after the decimal point add it. This must come
2765 after rounding above. */
2772 /* Finally output the exponent. */
2773 *dst++ = upperCase ? 'P': 'p';
2775 return writeSignedDecimal (dst, exponent);
2778 hash_code llvm::hash_value(const APFloat &Arg) {
2779 if (Arg.category != APFloat::fcNormal)
2780 return hash_combine((uint8_t)Arg.category,
2781 // NaN has no sign, fix it at zero.
2782 Arg.isNaN() ? (uint8_t)0 : (uint8_t)Arg.sign,
2783 Arg.semantics->precision);
2785 // Normal floats need their exponent and significand hashed.
2786 return hash_combine((uint8_t)Arg.category, (uint8_t)Arg.sign,
2787 Arg.semantics->precision, Arg.exponent,
2789 Arg.significandParts(),
2790 Arg.significandParts() + Arg.partCount()));
2793 // Conversion from APFloat to/from host float/double. It may eventually be
2794 // possible to eliminate these and have everybody deal with APFloats, but that
2795 // will take a while. This approach will not easily extend to long double.
2796 // Current implementation requires integerPartWidth==64, which is correct at
2797 // the moment but could be made more general.
2799 // Denormals have exponent minExponent in APFloat, but minExponent-1 in
2800 // the actual IEEE respresentations. We compensate for that here.
2803 APFloat::convertF80LongDoubleAPFloatToAPInt() const
2805 assert(semantics == (const llvm::fltSemantics*)&x87DoubleExtended);
2806 assert(partCount()==2);
2808 uint64_t myexponent, mysignificand;
2810 if (category==fcNormal) {
2811 myexponent = exponent+16383; //bias
2812 mysignificand = significandParts()[0];
2813 if (myexponent==1 && !(mysignificand & 0x8000000000000000ULL))
2814 myexponent = 0; // denormal
2815 } else if (category==fcZero) {
2818 } else if (category==fcInfinity) {
2819 myexponent = 0x7fff;
2820 mysignificand = 0x8000000000000000ULL;
2822 assert(category == fcNaN && "Unknown category");
2823 myexponent = 0x7fff;
2824 mysignificand = significandParts()[0];
2828 words[0] = mysignificand;
2829 words[1] = ((uint64_t)(sign & 1) << 15) |
2830 (myexponent & 0x7fffLL);
2831 return APInt(80, words);
2835 APFloat::convertPPCDoubleDoubleAPFloatToAPInt() const
2837 assert(semantics == (const llvm::fltSemantics*)&PPCDoubleDouble);
2838 assert(partCount()==2);
2844 // Convert number to double. To avoid spurious underflows, we re-
2845 // normalize against the "double" minExponent first, and only *then*
2846 // truncate the mantissa. The result of that second conversion
2847 // may be inexact, but should never underflow.
2848 // Declare fltSemantics before APFloat that uses it (and
2849 // saves pointer to it) to ensure correct destruction order.
2850 fltSemantics extendedSemantics = *semantics;
2851 extendedSemantics.minExponent = IEEEdouble.minExponent;
2852 APFloat extended(*this);
2853 fs = extended.convert(extendedSemantics, rmNearestTiesToEven, &losesInfo);
2854 assert(fs == opOK && !losesInfo);
2857 APFloat u(extended);
2858 fs = u.convert(IEEEdouble, rmNearestTiesToEven, &losesInfo);
2859 assert(fs == opOK || fs == opInexact);
2861 words[0] = *u.convertDoubleAPFloatToAPInt().getRawData();
2863 // If conversion was exact or resulted in a special case, we're done;
2864 // just set the second double to zero. Otherwise, re-convert back to
2865 // the extended format and compute the difference. This now should
2866 // convert exactly to double.
2867 if (u.category == fcNormal && losesInfo) {
2868 fs = u.convert(extendedSemantics, rmNearestTiesToEven, &losesInfo);
2869 assert(fs == opOK && !losesInfo);
2872 APFloat v(extended);
2873 v.subtract(u, rmNearestTiesToEven);
2874 fs = v.convert(IEEEdouble, rmNearestTiesToEven, &losesInfo);
2875 assert(fs == opOK && !losesInfo);
2877 words[1] = *v.convertDoubleAPFloatToAPInt().getRawData();
2882 return APInt(128, words);
2886 APFloat::convertQuadrupleAPFloatToAPInt() const
2888 assert(semantics == (const llvm::fltSemantics*)&IEEEquad);
2889 assert(partCount()==2);
2891 uint64_t myexponent, mysignificand, mysignificand2;
2893 if (category==fcNormal) {
2894 myexponent = exponent+16383; //bias
2895 mysignificand = significandParts()[0];
2896 mysignificand2 = significandParts()[1];
2897 if (myexponent==1 && !(mysignificand2 & 0x1000000000000LL))
2898 myexponent = 0; // denormal
2899 } else if (category==fcZero) {
2901 mysignificand = mysignificand2 = 0;
2902 } else if (category==fcInfinity) {
2903 myexponent = 0x7fff;
2904 mysignificand = mysignificand2 = 0;
2906 assert(category == fcNaN && "Unknown category!");
2907 myexponent = 0x7fff;
2908 mysignificand = significandParts()[0];
2909 mysignificand2 = significandParts()[1];
2913 words[0] = mysignificand;
2914 words[1] = ((uint64_t)(sign & 1) << 63) |
2915 ((myexponent & 0x7fff) << 48) |
2916 (mysignificand2 & 0xffffffffffffLL);
2918 return APInt(128, words);
2922 APFloat::convertDoubleAPFloatToAPInt() const
2924 assert(semantics == (const llvm::fltSemantics*)&IEEEdouble);
2925 assert(partCount()==1);
2927 uint64_t myexponent, mysignificand;
2929 if (category==fcNormal) {
2930 myexponent = exponent+1023; //bias
2931 mysignificand = *significandParts();
2932 if (myexponent==1 && !(mysignificand & 0x10000000000000LL))
2933 myexponent = 0; // denormal
2934 } else if (category==fcZero) {
2937 } else if (category==fcInfinity) {
2941 assert(category == fcNaN && "Unknown category!");
2943 mysignificand = *significandParts();
2946 return APInt(64, ((((uint64_t)(sign & 1) << 63) |
2947 ((myexponent & 0x7ff) << 52) |
2948 (mysignificand & 0xfffffffffffffLL))));
2952 APFloat::convertFloatAPFloatToAPInt() const
2954 assert(semantics == (const llvm::fltSemantics*)&IEEEsingle);
2955 assert(partCount()==1);
2957 uint32_t myexponent, mysignificand;
2959 if (category==fcNormal) {
2960 myexponent = exponent+127; //bias
2961 mysignificand = (uint32_t)*significandParts();
2962 if (myexponent == 1 && !(mysignificand & 0x800000))
2963 myexponent = 0; // denormal
2964 } else if (category==fcZero) {
2967 } else if (category==fcInfinity) {
2971 assert(category == fcNaN && "Unknown category!");
2973 mysignificand = (uint32_t)*significandParts();
2976 return APInt(32, (((sign&1) << 31) | ((myexponent&0xff) << 23) |
2977 (mysignificand & 0x7fffff)));
2981 APFloat::convertHalfAPFloatToAPInt() const
2983 assert(semantics == (const llvm::fltSemantics*)&IEEEhalf);
2984 assert(partCount()==1);
2986 uint32_t myexponent, mysignificand;
2988 if (category==fcNormal) {
2989 myexponent = exponent+15; //bias
2990 mysignificand = (uint32_t)*significandParts();
2991 if (myexponent == 1 && !(mysignificand & 0x400))
2992 myexponent = 0; // denormal
2993 } else if (category==fcZero) {
2996 } else if (category==fcInfinity) {
3000 assert(category == fcNaN && "Unknown category!");
3002 mysignificand = (uint32_t)*significandParts();
3005 return APInt(16, (((sign&1) << 15) | ((myexponent&0x1f) << 10) |
3006 (mysignificand & 0x3ff)));
3009 // This function creates an APInt that is just a bit map of the floating
3010 // point constant as it would appear in memory. It is not a conversion,
3011 // and treating the result as a normal integer is unlikely to be useful.
3014 APFloat::bitcastToAPInt() const
3016 if (semantics == (const llvm::fltSemantics*)&IEEEhalf)
3017 return convertHalfAPFloatToAPInt();
3019 if (semantics == (const llvm::fltSemantics*)&IEEEsingle)
3020 return convertFloatAPFloatToAPInt();
3022 if (semantics == (const llvm::fltSemantics*)&IEEEdouble)
3023 return convertDoubleAPFloatToAPInt();
3025 if (semantics == (const llvm::fltSemantics*)&IEEEquad)
3026 return convertQuadrupleAPFloatToAPInt();
3028 if (semantics == (const llvm::fltSemantics*)&PPCDoubleDouble)
3029 return convertPPCDoubleDoubleAPFloatToAPInt();
3031 assert(semantics == (const llvm::fltSemantics*)&x87DoubleExtended &&
3033 return convertF80LongDoubleAPFloatToAPInt();
3037 APFloat::convertToFloat() const
3039 assert(semantics == (const llvm::fltSemantics*)&IEEEsingle &&
3040 "Float semantics are not IEEEsingle");
3041 APInt api = bitcastToAPInt();
3042 return api.bitsToFloat();
3046 APFloat::convertToDouble() const
3048 assert(semantics == (const llvm::fltSemantics*)&IEEEdouble &&
3049 "Float semantics are not IEEEdouble");
3050 APInt api = bitcastToAPInt();
3051 return api.bitsToDouble();
3054 /// Integer bit is explicit in this format. Intel hardware (387 and later)
3055 /// does not support these bit patterns:
3056 /// exponent = all 1's, integer bit 0, significand 0 ("pseudoinfinity")
3057 /// exponent = all 1's, integer bit 0, significand nonzero ("pseudoNaN")
3058 /// exponent = 0, integer bit 1 ("pseudodenormal")
3059 /// exponent!=0 nor all 1's, integer bit 0 ("unnormal")
3060 /// At the moment, the first two are treated as NaNs, the second two as Normal.
3062 APFloat::initFromF80LongDoubleAPInt(const APInt &api)
3064 assert(api.getBitWidth()==80);
3065 uint64_t i1 = api.getRawData()[0];
3066 uint64_t i2 = api.getRawData()[1];
3067 uint64_t myexponent = (i2 & 0x7fff);
3068 uint64_t mysignificand = i1;
3070 initialize(&APFloat::x87DoubleExtended);
3071 assert(partCount()==2);
3073 sign = static_cast<unsigned int>(i2>>15);
3074 if (myexponent==0 && mysignificand==0) {
3075 // exponent, significand meaningless
3077 } else if (myexponent==0x7fff && mysignificand==0x8000000000000000ULL) {
3078 // exponent, significand meaningless
3079 category = fcInfinity;
3080 } else if (myexponent==0x7fff && mysignificand!=0x8000000000000000ULL) {
3081 // exponent meaningless
3083 significandParts()[0] = mysignificand;
3084 significandParts()[1] = 0;
3086 category = fcNormal;
3087 exponent = myexponent - 16383;
3088 significandParts()[0] = mysignificand;
3089 significandParts()[1] = 0;
3090 if (myexponent==0) // denormal
3096 APFloat::initFromPPCDoubleDoubleAPInt(const APInt &api)
3098 assert(api.getBitWidth()==128);
3099 uint64_t i1 = api.getRawData()[0];
3100 uint64_t i2 = api.getRawData()[1];
3104 // Get the first double and convert to our format.
3105 initFromDoubleAPInt(APInt(64, i1));
3106 fs = convert(PPCDoubleDouble, rmNearestTiesToEven, &losesInfo);
3107 assert(fs == opOK && !losesInfo);
3110 // Unless we have a special case, add in second double.
3111 if (category == fcNormal) {
3112 APFloat v(IEEEdouble, APInt(64, i2));
3113 fs = v.convert(PPCDoubleDouble, rmNearestTiesToEven, &losesInfo);
3114 assert(fs == opOK && !losesInfo);
3117 add(v, rmNearestTiesToEven);
3122 APFloat::initFromQuadrupleAPInt(const APInt &api)
3124 assert(api.getBitWidth()==128);
3125 uint64_t i1 = api.getRawData()[0];
3126 uint64_t i2 = api.getRawData()[1];
3127 uint64_t myexponent = (i2 >> 48) & 0x7fff;
3128 uint64_t mysignificand = i1;
3129 uint64_t mysignificand2 = i2 & 0xffffffffffffLL;
3131 initialize(&APFloat::IEEEquad);
3132 assert(partCount()==2);
3134 sign = static_cast<unsigned int>(i2>>63);
3135 if (myexponent==0 &&
3136 (mysignificand==0 && mysignificand2==0)) {
3137 // exponent, significand meaningless
3139 } else if (myexponent==0x7fff &&
3140 (mysignificand==0 && mysignificand2==0)) {
3141 // exponent, significand meaningless
3142 category = fcInfinity;
3143 } else if (myexponent==0x7fff &&
3144 (mysignificand!=0 || mysignificand2 !=0)) {
3145 // exponent meaningless
3147 significandParts()[0] = mysignificand;
3148 significandParts()[1] = mysignificand2;
3150 category = fcNormal;
3151 exponent = myexponent - 16383;
3152 significandParts()[0] = mysignificand;
3153 significandParts()[1] = mysignificand2;
3154 if (myexponent==0) // denormal
3157 significandParts()[1] |= 0x1000000000000LL; // integer bit
3162 APFloat::initFromDoubleAPInt(const APInt &api)
3164 assert(api.getBitWidth()==64);
3165 uint64_t i = *api.getRawData();
3166 uint64_t myexponent = (i >> 52) & 0x7ff;
3167 uint64_t mysignificand = i & 0xfffffffffffffLL;
3169 initialize(&APFloat::IEEEdouble);
3170 assert(partCount()==1);
3172 sign = static_cast<unsigned int>(i>>63);
3173 if (myexponent==0 && mysignificand==0) {
3174 // exponent, significand meaningless
3176 } else if (myexponent==0x7ff && mysignificand==0) {
3177 // exponent, significand meaningless
3178 category = fcInfinity;
3179 } else if (myexponent==0x7ff && mysignificand!=0) {
3180 // exponent meaningless
3182 *significandParts() = mysignificand;
3184 category = fcNormal;
3185 exponent = myexponent - 1023;
3186 *significandParts() = mysignificand;
3187 if (myexponent==0) // denormal
3190 *significandParts() |= 0x10000000000000LL; // integer bit
3195 APFloat::initFromFloatAPInt(const APInt & api)
3197 assert(api.getBitWidth()==32);
3198 uint32_t i = (uint32_t)*api.getRawData();
3199 uint32_t myexponent = (i >> 23) & 0xff;
3200 uint32_t mysignificand = i & 0x7fffff;
3202 initialize(&APFloat::IEEEsingle);
3203 assert(partCount()==1);
3206 if (myexponent==0 && mysignificand==0) {
3207 // exponent, significand meaningless
3209 } else if (myexponent==0xff && mysignificand==0) {
3210 // exponent, significand meaningless
3211 category = fcInfinity;
3212 } else if (myexponent==0xff && mysignificand!=0) {
3213 // sign, exponent, significand meaningless
3215 *significandParts() = mysignificand;
3217 category = fcNormal;
3218 exponent = myexponent - 127; //bias
3219 *significandParts() = mysignificand;
3220 if (myexponent==0) // denormal
3223 *significandParts() |= 0x800000; // integer bit
3228 APFloat::initFromHalfAPInt(const APInt & api)
3230 assert(api.getBitWidth()==16);
3231 uint32_t i = (uint32_t)*api.getRawData();
3232 uint32_t myexponent = (i >> 10) & 0x1f;
3233 uint32_t mysignificand = i & 0x3ff;
3235 initialize(&APFloat::IEEEhalf);
3236 assert(partCount()==1);
3239 if (myexponent==0 && mysignificand==0) {
3240 // exponent, significand meaningless
3242 } else if (myexponent==0x1f && mysignificand==0) {
3243 // exponent, significand meaningless
3244 category = fcInfinity;
3245 } else if (myexponent==0x1f && mysignificand!=0) {
3246 // sign, exponent, significand meaningless
3248 *significandParts() = mysignificand;
3250 category = fcNormal;
3251 exponent = myexponent - 15; //bias
3252 *significandParts() = mysignificand;
3253 if (myexponent==0) // denormal
3256 *significandParts() |= 0x400; // integer bit
3260 /// Treat api as containing the bits of a floating point number. Currently
3261 /// we infer the floating point type from the size of the APInt. The
3262 /// isIEEE argument distinguishes between PPC128 and IEEE128 (not meaningful
3263 /// when the size is anything else).
3265 APFloat::initFromAPInt(const fltSemantics* Sem, const APInt& api)
3267 if (Sem == &IEEEhalf)
3268 return initFromHalfAPInt(api);
3269 if (Sem == &IEEEsingle)
3270 return initFromFloatAPInt(api);
3271 if (Sem == &IEEEdouble)
3272 return initFromDoubleAPInt(api);
3273 if (Sem == &x87DoubleExtended)
3274 return initFromF80LongDoubleAPInt(api);
3275 if (Sem == &IEEEquad)
3276 return initFromQuadrupleAPInt(api);
3277 if (Sem == &PPCDoubleDouble)
3278 return initFromPPCDoubleDoubleAPInt(api);
3280 llvm_unreachable(0);
3284 APFloat::getAllOnesValue(unsigned BitWidth, bool isIEEE)
3288 return APFloat(IEEEhalf, APInt::getAllOnesValue(BitWidth));
3290 return APFloat(IEEEsingle, APInt::getAllOnesValue(BitWidth));
3292 return APFloat(IEEEdouble, APInt::getAllOnesValue(BitWidth));
3294 return APFloat(x87DoubleExtended, APInt::getAllOnesValue(BitWidth));
3297 return APFloat(IEEEquad, APInt::getAllOnesValue(BitWidth));
3298 return APFloat(PPCDoubleDouble, APInt::getAllOnesValue(BitWidth));
3300 llvm_unreachable("Unknown floating bit width");
3304 /// Make this number the largest magnitude normal number in the given
3306 void APFloat::makeLargest(bool Negative) {
3307 // We want (in interchange format):
3308 // sign = {Negative}
3310 // significand = 1..1
3311 category = fcNormal;
3313 exponent = semantics->maxExponent;
3315 // Use memset to set all but the highest integerPart to all ones.
3316 integerPart *significand = significandParts();
3317 unsigned PartCount = partCount();
3318 memset(significand, 0xFF, sizeof(integerPart)*(PartCount - 1));
3320 // Set the high integerPart especially setting all unused top bits for
3321 // internal consistency.
3322 const unsigned NumUnusedHighBits =
3323 PartCount*integerPartWidth - semantics->precision;
3324 significand[PartCount - 1] = ~integerPart(0) >> NumUnusedHighBits;
3327 /// Make this number the smallest magnitude denormal number in the given
3329 void APFloat::makeSmallest(bool Negative) {
3330 // We want (in interchange format):
3331 // sign = {Negative}
3333 // significand = 0..01
3334 category = fcNormal;
3336 exponent = semantics->minExponent;
3337 APInt::tcSet(significandParts(), 1, partCount());
3341 APFloat APFloat::getLargest(const fltSemantics &Sem, bool Negative) {
3342 // We want (in interchange format):
3343 // sign = {Negative}
3345 // significand = 1..1
3346 APFloat Val(Sem, uninitialized);
3347 Val.makeLargest(Negative);
3351 APFloat APFloat::getSmallest(const fltSemantics &Sem, bool Negative) {
3352 // We want (in interchange format):
3353 // sign = {Negative}
3355 // significand = 0..01
3356 APFloat Val(Sem, uninitialized);
3357 Val.makeSmallest(Negative);
3361 APFloat APFloat::getSmallestNormalized(const fltSemantics &Sem, bool Negative) {
3362 APFloat Val(Sem, fcNormal, Negative);
3364 // We want (in interchange format):
3365 // sign = {Negative}
3367 // significand = 10..0
3369 Val.exponent = Sem.minExponent;
3370 Val.zeroSignificand();
3371 Val.significandParts()[partCountForBits(Sem.precision)-1] |=
3372 (((integerPart) 1) << ((Sem.precision - 1) % integerPartWidth));
3377 APFloat::APFloat(const fltSemantics &Sem, const APInt &API) {
3378 initFromAPInt(&Sem, API);
3381 APFloat::APFloat(float f) {
3382 initFromAPInt(&IEEEsingle, APInt::floatToBits(f));
3385 APFloat::APFloat(double d) {
3386 initFromAPInt(&IEEEdouble, APInt::doubleToBits(d));
3390 void append(SmallVectorImpl<char> &Buffer, StringRef Str) {
3391 Buffer.append(Str.begin(), Str.end());
3394 /// Removes data from the given significand until it is no more
3395 /// precise than is required for the desired precision.
3396 void AdjustToPrecision(APInt &significand,
3397 int &exp, unsigned FormatPrecision) {
3398 unsigned bits = significand.getActiveBits();
3400 // 196/59 is a very slight overestimate of lg_2(10).
3401 unsigned bitsRequired = (FormatPrecision * 196 + 58) / 59;
3403 if (bits <= bitsRequired) return;
3405 unsigned tensRemovable = (bits - bitsRequired) * 59 / 196;
3406 if (!tensRemovable) return;
3408 exp += tensRemovable;
3410 APInt divisor(significand.getBitWidth(), 1);
3411 APInt powten(significand.getBitWidth(), 10);
3413 if (tensRemovable & 1)
3415 tensRemovable >>= 1;
3416 if (!tensRemovable) break;
3420 significand = significand.udiv(divisor);
3422 // Truncate the significand down to its active bit count.
3423 significand = significand.trunc(significand.getActiveBits());
3427 void AdjustToPrecision(SmallVectorImpl<char> &buffer,
3428 int &exp, unsigned FormatPrecision) {
3429 unsigned N = buffer.size();
3430 if (N <= FormatPrecision) return;
3432 // The most significant figures are the last ones in the buffer.
3433 unsigned FirstSignificant = N - FormatPrecision;
3436 // FIXME: this probably shouldn't use 'round half up'.
3438 // Rounding down is just a truncation, except we also want to drop
3439 // trailing zeros from the new result.
3440 if (buffer[FirstSignificant - 1] < '5') {
3441 while (FirstSignificant < N && buffer[FirstSignificant] == '0')
3444 exp += FirstSignificant;
3445 buffer.erase(&buffer[0], &buffer[FirstSignificant]);
3449 // Rounding up requires a decimal add-with-carry. If we continue
3450 // the carry, the newly-introduced zeros will just be truncated.
3451 for (unsigned I = FirstSignificant; I != N; ++I) {
3452 if (buffer[I] == '9') {
3460 // If we carried through, we have exactly one digit of precision.
3461 if (FirstSignificant == N) {
3462 exp += FirstSignificant;
3464 buffer.push_back('1');
3468 exp += FirstSignificant;
3469 buffer.erase(&buffer[0], &buffer[FirstSignificant]);
3473 void APFloat::toString(SmallVectorImpl<char> &Str,
3474 unsigned FormatPrecision,
3475 unsigned FormatMaxPadding) const {
3479 return append(Str, "-Inf");
3481 return append(Str, "+Inf");
3483 case fcNaN: return append(Str, "NaN");
3489 if (!FormatMaxPadding)
3490 append(Str, "0.0E+0");
3502 // Decompose the number into an APInt and an exponent.
3503 int exp = exponent - ((int) semantics->precision - 1);
3504 APInt significand(semantics->precision,
3505 makeArrayRef(significandParts(),
3506 partCountForBits(semantics->precision)));
3508 // Set FormatPrecision if zero. We want to do this before we
3509 // truncate trailing zeros, as those are part of the precision.
3510 if (!FormatPrecision) {
3511 // It's an interesting question whether to use the nominal
3512 // precision or the active precision here for denormals.
3514 // FormatPrecision = ceil(significandBits / lg_2(10))
3515 FormatPrecision = (semantics->precision * 59 + 195) / 196;
3518 // Ignore trailing binary zeros.
3519 int trailingZeros = significand.countTrailingZeros();
3520 exp += trailingZeros;
3521 significand = significand.lshr(trailingZeros);
3523 // Change the exponent from 2^e to 10^e.
3526 } else if (exp > 0) {
3528 significand = significand.zext(semantics->precision + exp);
3529 significand <<= exp;
3531 } else { /* exp < 0 */
3534 // We transform this using the identity:
3535 // (N)(2^-e) == (N)(5^e)(10^-e)
3536 // This means we have to multiply N (the significand) by 5^e.
3537 // To avoid overflow, we have to operate on numbers large
3538 // enough to store N * 5^e:
3539 // log2(N * 5^e) == log2(N) + e * log2(5)
3540 // <= semantics->precision + e * 137 / 59
3541 // (log_2(5) ~ 2.321928 < 2.322034 ~ 137/59)
3543 unsigned precision = semantics->precision + (137 * texp + 136) / 59;
3545 // Multiply significand by 5^e.
3546 // N * 5^0101 == N * 5^(1*1) * 5^(0*2) * 5^(1*4) * 5^(0*8)
3547 significand = significand.zext(precision);
3548 APInt five_to_the_i(precision, 5);
3550 if (texp & 1) significand *= five_to_the_i;
3554 five_to_the_i *= five_to_the_i;
3558 AdjustToPrecision(significand, exp, FormatPrecision);
3560 SmallVector<char, 256> buffer;
3563 unsigned precision = significand.getBitWidth();
3564 APInt ten(precision, 10);
3565 APInt digit(precision, 0);
3567 bool inTrail = true;
3568 while (significand != 0) {
3569 // digit <- significand % 10
3570 // significand <- significand / 10
3571 APInt::udivrem(significand, ten, significand, digit);
3573 unsigned d = digit.getZExtValue();
3575 // Drop trailing zeros.
3576 if (inTrail && !d) exp++;
3578 buffer.push_back((char) ('0' + d));
3583 assert(!buffer.empty() && "no characters in buffer!");
3585 // Drop down to FormatPrecision.
3586 // TODO: don't do more precise calculations above than are required.
3587 AdjustToPrecision(buffer, exp, FormatPrecision);
3589 unsigned NDigits = buffer.size();
3591 // Check whether we should use scientific notation.
3592 bool FormatScientific;
3593 if (!FormatMaxPadding)
3594 FormatScientific = true;
3599 // But we shouldn't make the number look more precise than it is.
3600 FormatScientific = ((unsigned) exp > FormatMaxPadding ||
3601 NDigits + (unsigned) exp > FormatPrecision);
3603 // Power of the most significant digit.
3604 int MSD = exp + (int) (NDigits - 1);
3607 FormatScientific = false;
3609 // 765e-5 == 0.00765
3611 FormatScientific = ((unsigned) -MSD) > FormatMaxPadding;
3616 // Scientific formatting is pretty straightforward.
3617 if (FormatScientific) {
3618 exp += (NDigits - 1);
3620 Str.push_back(buffer[NDigits-1]);
3625 for (unsigned I = 1; I != NDigits; ++I)
3626 Str.push_back(buffer[NDigits-1-I]);
3629 Str.push_back(exp >= 0 ? '+' : '-');
3630 if (exp < 0) exp = -exp;
3631 SmallVector<char, 6> expbuf;
3633 expbuf.push_back((char) ('0' + (exp % 10)));
3636 for (unsigned I = 0, E = expbuf.size(); I != E; ++I)
3637 Str.push_back(expbuf[E-1-I]);
3641 // Non-scientific, positive exponents.
3643 for (unsigned I = 0; I != NDigits; ++I)
3644 Str.push_back(buffer[NDigits-1-I]);
3645 for (unsigned I = 0; I != (unsigned) exp; ++I)
3650 // Non-scientific, negative exponents.
3652 // The number of digits to the left of the decimal point.
3653 int NWholeDigits = exp + (int) NDigits;
3656 if (NWholeDigits > 0) {
3657 for (; I != (unsigned) NWholeDigits; ++I)
3658 Str.push_back(buffer[NDigits-I-1]);
3661 unsigned NZeros = 1 + (unsigned) -NWholeDigits;
3665 for (unsigned Z = 1; Z != NZeros; ++Z)
3669 for (; I != NDigits; ++I)
3670 Str.push_back(buffer[NDigits-I-1]);
3673 bool APFloat::getExactInverse(APFloat *inv) const {
3674 // Special floats and denormals have no exact inverse.
3675 if (category != fcNormal)
3678 // Check that the number is a power of two by making sure that only the
3679 // integer bit is set in the significand.
3680 if (significandLSB() != semantics->precision - 1)
3684 APFloat reciprocal(*semantics, 1ULL);
3685 if (reciprocal.divide(*this, rmNearestTiesToEven) != opOK)
3688 // Avoid multiplication with a denormal, it is not safe on all platforms and
3689 // may be slower than a normal division.
3690 if (reciprocal.isDenormal())
3693 assert(reciprocal.category == fcNormal &&
3694 reciprocal.significandLSB() == reciprocal.semantics->precision - 1);
3702 bool APFloat::isSignaling() const {
3706 // IEEE-754R 2008 6.2.1: A signaling NaN bit string should be encoded with the
3707 // first bit of the trailing significand being 0.
3708 return !APInt::tcExtractBit(significandParts(), semantics->precision - 2);
3711 /// IEEE-754R 2008 5.3.1: nextUp/nextDown.
3713 /// *NOTE* since nextDown(x) = -nextUp(-x), we only implement nextUp with
3714 /// appropriate sign switching before/after the computation.
3715 APFloat::opStatus APFloat::next(bool nextDown) {
3716 // If we are performing nextDown, swap sign so we have -x.
3720 // Compute nextUp(x)
3721 opStatus result = opOK;
3723 // Handle each float category separately.
3726 // nextUp(+inf) = +inf
3729 // nextUp(-inf) = -getLargest()
3733 // IEEE-754R 2008 6.2 Par 2: nextUp(sNaN) = qNaN. Set Invalid flag.
3734 // IEEE-754R 2008 6.2: nextUp(qNaN) = qNaN. Must be identity so we do not
3735 // change the payload.
3736 if (isSignaling()) {
3737 result = opInvalidOp;
3738 // For consistency, propogate the sign of the sNaN to the qNaN.
3739 makeNaN(false, isNegative(), 0);
3743 // nextUp(pm 0) = +getSmallest()
3744 makeSmallest(false);
3747 // nextUp(-getSmallest()) = -0
3748 if (isSmallest() && isNegative()) {
3749 APInt::tcSet(significandParts(), 0, partCount());
3755 // nextUp(getLargest()) == INFINITY
3756 if (isLargest() && !isNegative()) {
3757 APInt::tcSet(significandParts(), 0, partCount());
3758 category = fcInfinity;
3759 exponent = semantics->maxExponent + 1;
3763 // nextUp(normal) == normal + inc.
3765 // If we are negative, we need to decrement the significand.
3767 // We only cross a binade boundary that requires adjusting the exponent
3769 // 1. exponent != semantics->minExponent. This implies we are not in the
3770 // smallest binade or are dealing with denormals.
3771 // 2. Our significand excluding the integral bit is all zeros.
3772 bool WillCrossBinadeBoundary =
3773 exponent != semantics->minExponent && isSignificandAllZeros();
3775 // Decrement the significand.
3777 // We always do this since:
3778 // 1. If we are dealing with a non binade decrement, by definition we
3779 // just decrement the significand.
3780 // 2. If we are dealing with a normal -> normal binade decrement, since
3781 // we have an explicit integral bit the fact that all bits but the
3782 // integral bit are zero implies that subtracting one will yield a
3783 // significand with 0 integral bit and 1 in all other spots. Thus we
3784 // must just adjust the exponent and set the integral bit to 1.
3785 // 3. If we are dealing with a normal -> denormal binade decrement,
3786 // since we set the integral bit to 0 when we represent denormals, we
3787 // just decrement the significand.
3788 integerPart *Parts = significandParts();
3789 APInt::tcDecrement(Parts, partCount());
3791 if (WillCrossBinadeBoundary) {
3792 // Our result is a normal number. Do the following:
3793 // 1. Set the integral bit to 1.
3794 // 2. Decrement the exponent.
3795 APInt::tcSetBit(Parts, semantics->precision - 1);
3799 // If we are positive, we need to increment the significand.
3801 // We only cross a binade boundary that requires adjusting the exponent if
3802 // the input is not a denormal and all of said input's significand bits
3803 // are set. If all of said conditions are true: clear the significand, set
3804 // the integral bit to 1, and increment the exponent. If we have a
3805 // denormal always increment since moving denormals and the numbers in the
3806 // smallest normal binade have the same exponent in our representation.
3807 bool WillCrossBinadeBoundary = !isDenormal() && isSignificandAllOnes();
3809 if (WillCrossBinadeBoundary) {
3810 integerPart *Parts = significandParts();
3811 APInt::tcSet(Parts, 0, partCount());
3812 APInt::tcSetBit(Parts, semantics->precision - 1);
3813 assert(exponent != semantics->maxExponent &&
3814 "We can not increment an exponent beyond the maxExponent allowed"
3815 " by the given floating point semantics.");
3818 incrementSignificand();
3824 // If we are performing nextDown, swap sign so we have -nextUp(-x)